diff options
193 files changed, 9190 insertions, 3198 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 3c248f772ae6..e60be91d8036 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
@@ -115,12 +115,17 @@ will access the virtual machine's physical address space; offset zero | |||
115 | corresponds to guest physical address zero. Use of mmap() on a VM fd | 115 | corresponds to guest physical address zero. Use of mmap() on a VM fd |
116 | is discouraged if userspace memory allocation (KVM_CAP_USER_MEMORY) is | 116 | is discouraged if userspace memory allocation (KVM_CAP_USER_MEMORY) is |
117 | available. | 117 | available. |
118 | You most certainly want to use 0 as machine type. | 118 | You probably want to use 0 as machine type. |
119 | 119 | ||
120 | In order to create user controlled virtual machines on S390, check | 120 | In order to create user controlled virtual machines on S390, check |
121 | KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL as | 121 | KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL as |
122 | privileged user (CAP_SYS_ADMIN). | 122 | privileged user (CAP_SYS_ADMIN). |
123 | 123 | ||
124 | To use hardware assisted virtualization on MIPS (VZ ASE) rather than | ||
125 | the default trap & emulate implementation (which changes the virtual | ||
126 | memory layout to fit in user mode), check KVM_CAP_MIPS_VZ and use the | ||
127 | flag KVM_VM_MIPS_VZ. | ||
128 | |||
124 | 129 | ||
125 | 4.3 KVM_GET_MSR_INDEX_LIST | 130 | 4.3 KVM_GET_MSR_INDEX_LIST |
126 | 131 | ||
@@ -1321,130 +1326,6 @@ The flags bitmap is defined as: | |||
1321 | /* the host supports the ePAPR idle hcall | 1326 | /* the host supports the ePAPR idle hcall |
1322 | #define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) | 1327 | #define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) |
1323 | 1328 | ||
1324 | 4.48 KVM_ASSIGN_PCI_DEVICE (deprecated) | ||
1325 | |||
1326 | Capability: none | ||
1327 | Architectures: x86 | ||
1328 | Type: vm ioctl | ||
1329 | Parameters: struct kvm_assigned_pci_dev (in) | ||
1330 | Returns: 0 on success, -1 on error | ||
1331 | |||
1332 | Assigns a host PCI device to the VM. | ||
1333 | |||
1334 | struct kvm_assigned_pci_dev { | ||
1335 | __u32 assigned_dev_id; | ||
1336 | __u32 busnr; | ||
1337 | __u32 devfn; | ||
1338 | __u32 flags; | ||
1339 | __u32 segnr; | ||
1340 | union { | ||
1341 | __u32 reserved[11]; | ||
1342 | }; | ||
1343 | }; | ||
1344 | |||
1345 | The PCI device is specified by the triple segnr, busnr, and devfn. | ||
1346 | Identification in succeeding service requests is done via assigned_dev_id. The | ||
1347 | following flags are specified: | ||
1348 | |||
1349 | /* Depends on KVM_CAP_IOMMU */ | ||
1350 | #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) | ||
1351 | /* The following two depend on KVM_CAP_PCI_2_3 */ | ||
1352 | #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) | ||
1353 | #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) | ||
1354 | |||
1355 | If KVM_DEV_ASSIGN_PCI_2_3 is set, the kernel will manage legacy INTx interrupts | ||
1356 | via the PCI-2.3-compliant device-level mask, thus enable IRQ sharing with other | ||
1357 | assigned devices or host devices. KVM_DEV_ASSIGN_MASK_INTX specifies the | ||
1358 | guest's view on the INTx mask, see KVM_ASSIGN_SET_INTX_MASK for details. | ||
1359 | |||
1360 | The KVM_DEV_ASSIGN_ENABLE_IOMMU flag is a mandatory option to ensure | ||
1361 | isolation of the device. Usages not specifying this flag are deprecated. | ||
1362 | |||
1363 | Only PCI header type 0 devices with PCI BAR resources are supported by | ||
1364 | device assignment. The user requesting this ioctl must have read/write | ||
1365 | access to the PCI sysfs resource files associated with the device. | ||
1366 | |||
1367 | Errors: | ||
1368 | ENOTTY: kernel does not support this ioctl | ||
1369 | |||
1370 | Other error conditions may be defined by individual device types or | ||
1371 | have their standard meanings. | ||
1372 | |||
1373 | |||
1374 | 4.49 KVM_DEASSIGN_PCI_DEVICE (deprecated) | ||
1375 | |||
1376 | Capability: none | ||
1377 | Architectures: x86 | ||
1378 | Type: vm ioctl | ||
1379 | Parameters: struct kvm_assigned_pci_dev (in) | ||
1380 | Returns: 0 on success, -1 on error | ||
1381 | |||
1382 | Ends PCI device assignment, releasing all associated resources. | ||
1383 | |||
1384 | See KVM_ASSIGN_PCI_DEVICE for the data structure. Only assigned_dev_id is | ||
1385 | used in kvm_assigned_pci_dev to identify the device. | ||
1386 | |||
1387 | Errors: | ||
1388 | ENOTTY: kernel does not support this ioctl | ||
1389 | |||
1390 | Other error conditions may be defined by individual device types or | ||
1391 | have their standard meanings. | ||
1392 | |||
1393 | 4.50 KVM_ASSIGN_DEV_IRQ (deprecated) | ||
1394 | |||
1395 | Capability: KVM_CAP_ASSIGN_DEV_IRQ | ||
1396 | Architectures: x86 | ||
1397 | Type: vm ioctl | ||
1398 | Parameters: struct kvm_assigned_irq (in) | ||
1399 | Returns: 0 on success, -1 on error | ||
1400 | |||
1401 | Assigns an IRQ to a passed-through device. | ||
1402 | |||
1403 | struct kvm_assigned_irq { | ||
1404 | __u32 assigned_dev_id; | ||
1405 | __u32 host_irq; /* ignored (legacy field) */ | ||
1406 | __u32 guest_irq; | ||
1407 | __u32 flags; | ||
1408 | union { | ||
1409 | __u32 reserved[12]; | ||
1410 | }; | ||
1411 | }; | ||
1412 | |||
1413 | The following flags are defined: | ||
1414 | |||
1415 | #define KVM_DEV_IRQ_HOST_INTX (1 << 0) | ||
1416 | #define KVM_DEV_IRQ_HOST_MSI (1 << 1) | ||
1417 | #define KVM_DEV_IRQ_HOST_MSIX (1 << 2) | ||
1418 | |||
1419 | #define KVM_DEV_IRQ_GUEST_INTX (1 << 8) | ||
1420 | #define KVM_DEV_IRQ_GUEST_MSI (1 << 9) | ||
1421 | #define KVM_DEV_IRQ_GUEST_MSIX (1 << 10) | ||
1422 | |||
1423 | It is not valid to specify multiple types per host or guest IRQ. However, the | ||
1424 | IRQ type of host and guest can differ or can even be null. | ||
1425 | |||
1426 | Errors: | ||
1427 | ENOTTY: kernel does not support this ioctl | ||
1428 | |||
1429 | Other error conditions may be defined by individual device types or | ||
1430 | have their standard meanings. | ||
1431 | |||
1432 | |||
1433 | 4.51 KVM_DEASSIGN_DEV_IRQ (deprecated) | ||
1434 | |||
1435 | Capability: KVM_CAP_ASSIGN_DEV_IRQ | ||
1436 | Architectures: x86 | ||
1437 | Type: vm ioctl | ||
1438 | Parameters: struct kvm_assigned_irq (in) | ||
1439 | Returns: 0 on success, -1 on error | ||
1440 | |||
1441 | Ends an IRQ assignment to a passed-through device. | ||
1442 | |||
1443 | See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified | ||
1444 | by assigned_dev_id, flags must correspond to the IRQ type specified on | ||
1445 | KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed. | ||
1446 | |||
1447 | |||
1448 | 4.52 KVM_SET_GSI_ROUTING | 1329 | 4.52 KVM_SET_GSI_ROUTING |
1449 | 1330 | ||
1450 | Capability: KVM_CAP_IRQ_ROUTING | 1331 | Capability: KVM_CAP_IRQ_ROUTING |
@@ -1531,52 +1412,6 @@ struct kvm_irq_routing_hv_sint { | |||
1531 | __u32 sint; | 1412 | __u32 sint; |
1532 | }; | 1413 | }; |
1533 | 1414 | ||
1534 | 4.53 KVM_ASSIGN_SET_MSIX_NR (deprecated) | ||
1535 | |||
1536 | Capability: none | ||
1537 | Architectures: x86 | ||
1538 | Type: vm ioctl | ||
1539 | Parameters: struct kvm_assigned_msix_nr (in) | ||
1540 | Returns: 0 on success, -1 on error | ||
1541 | |||
1542 | Set the number of MSI-X interrupts for an assigned device. The number is | ||
1543 | reset again by terminating the MSI-X assignment of the device via | ||
1544 | KVM_DEASSIGN_DEV_IRQ. Calling this service more than once at any earlier | ||
1545 | point will fail. | ||
1546 | |||
1547 | struct kvm_assigned_msix_nr { | ||
1548 | __u32 assigned_dev_id; | ||
1549 | __u16 entry_nr; | ||
1550 | __u16 padding; | ||
1551 | }; | ||
1552 | |||
1553 | #define KVM_MAX_MSIX_PER_DEV 256 | ||
1554 | |||
1555 | |||
1556 | 4.54 KVM_ASSIGN_SET_MSIX_ENTRY (deprecated) | ||
1557 | |||
1558 | Capability: none | ||
1559 | Architectures: x86 | ||
1560 | Type: vm ioctl | ||
1561 | Parameters: struct kvm_assigned_msix_entry (in) | ||
1562 | Returns: 0 on success, -1 on error | ||
1563 | |||
1564 | Specifies the routing of an MSI-X assigned device interrupt to a GSI. Setting | ||
1565 | the GSI vector to zero means disabling the interrupt. | ||
1566 | |||
1567 | struct kvm_assigned_msix_entry { | ||
1568 | __u32 assigned_dev_id; | ||
1569 | __u32 gsi; | ||
1570 | __u16 entry; /* The index of entry in the MSI-X table */ | ||
1571 | __u16 padding[3]; | ||
1572 | }; | ||
1573 | |||
1574 | Errors: | ||
1575 | ENOTTY: kernel does not support this ioctl | ||
1576 | |||
1577 | Other error conditions may be defined by individual device types or | ||
1578 | have their standard meanings. | ||
1579 | |||
1580 | 1415 | ||
1581 | 4.55 KVM_SET_TSC_KHZ | 1416 | 4.55 KVM_SET_TSC_KHZ |
1582 | 1417 | ||
@@ -1728,40 +1563,6 @@ should skip processing the bitmap and just invalidate everything. It must | |||
1728 | be set to the number of set bits in the bitmap. | 1563 | be set to the number of set bits in the bitmap. |
1729 | 1564 | ||
1730 | 1565 | ||
1731 | 4.61 KVM_ASSIGN_SET_INTX_MASK (deprecated) | ||
1732 | |||
1733 | Capability: KVM_CAP_PCI_2_3 | ||
1734 | Architectures: x86 | ||
1735 | Type: vm ioctl | ||
1736 | Parameters: struct kvm_assigned_pci_dev (in) | ||
1737 | Returns: 0 on success, -1 on error | ||
1738 | |||
1739 | Allows userspace to mask PCI INTx interrupts from the assigned device. The | ||
1740 | kernel will not deliver INTx interrupts to the guest between setting and | ||
1741 | clearing of KVM_ASSIGN_SET_INTX_MASK via this interface. This enables use of | ||
1742 | and emulation of PCI 2.3 INTx disable command register behavior. | ||
1743 | |||
1744 | This may be used for both PCI 2.3 devices supporting INTx disable natively and | ||
1745 | older devices lacking this support. Userspace is responsible for emulating the | ||
1746 | read value of the INTx disable bit in the guest visible PCI command register. | ||
1747 | When modifying the INTx disable state, userspace should precede updating the | ||
1748 | physical device command register by calling this ioctl to inform the kernel of | ||
1749 | the new intended INTx mask state. | ||
1750 | |||
1751 | Note that the kernel uses the device INTx disable bit to internally manage the | ||
1752 | device interrupt state for PCI 2.3 devices. Reads of this register may | ||
1753 | therefore not match the expected value. Writes should always use the guest | ||
1754 | intended INTx disable value rather than attempting to read-copy-update the | ||
1755 | current physical device state. Races between user and kernel updates to the | ||
1756 | INTx disable bit are handled lazily in the kernel. It's possible the device | ||
1757 | may generate unintended interrupts, but they will not be injected into the | ||
1758 | guest. | ||
1759 | |||
1760 | See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified | ||
1761 | by assigned_dev_id. In the flags field, only KVM_DEV_ASSIGN_MASK_INTX is | ||
1762 | evaluated. | ||
1763 | |||
1764 | |||
1765 | 4.62 KVM_CREATE_SPAPR_TCE | 1566 | 4.62 KVM_CREATE_SPAPR_TCE |
1766 | 1567 | ||
1767 | Capability: KVM_CAP_SPAPR_TCE | 1568 | Capability: KVM_CAP_SPAPR_TCE |
@@ -2068,11 +1869,23 @@ registers, find a list below: | |||
2068 | MIPS | KVM_REG_MIPS_CP0_ENTRYLO0 | 64 | 1869 | MIPS | KVM_REG_MIPS_CP0_ENTRYLO0 | 64 |
2069 | MIPS | KVM_REG_MIPS_CP0_ENTRYLO1 | 64 | 1870 | MIPS | KVM_REG_MIPS_CP0_ENTRYLO1 | 64 |
2070 | MIPS | KVM_REG_MIPS_CP0_CONTEXT | 64 | 1871 | MIPS | KVM_REG_MIPS_CP0_CONTEXT | 64 |
1872 | MIPS | KVM_REG_MIPS_CP0_CONTEXTCONFIG| 32 | ||
2071 | MIPS | KVM_REG_MIPS_CP0_USERLOCAL | 64 | 1873 | MIPS | KVM_REG_MIPS_CP0_USERLOCAL | 64 |
1874 | MIPS | KVM_REG_MIPS_CP0_XCONTEXTCONFIG| 64 | ||
2072 | MIPS | KVM_REG_MIPS_CP0_PAGEMASK | 32 | 1875 | MIPS | KVM_REG_MIPS_CP0_PAGEMASK | 32 |
1876 | MIPS | KVM_REG_MIPS_CP0_PAGEGRAIN | 32 | ||
1877 | MIPS | KVM_REG_MIPS_CP0_SEGCTL0 | 64 | ||
1878 | MIPS | KVM_REG_MIPS_CP0_SEGCTL1 | 64 | ||
1879 | MIPS | KVM_REG_MIPS_CP0_SEGCTL2 | 64 | ||
1880 | MIPS | KVM_REG_MIPS_CP0_PWBASE | 64 | ||
1881 | MIPS | KVM_REG_MIPS_CP0_PWFIELD | 64 | ||
1882 | MIPS | KVM_REG_MIPS_CP0_PWSIZE | 64 | ||
2073 | MIPS | KVM_REG_MIPS_CP0_WIRED | 32 | 1883 | MIPS | KVM_REG_MIPS_CP0_WIRED | 32 |
1884 | MIPS | KVM_REG_MIPS_CP0_PWCTL | 32 | ||
2074 | MIPS | KVM_REG_MIPS_CP0_HWRENA | 32 | 1885 | MIPS | KVM_REG_MIPS_CP0_HWRENA | 32 |
2075 | MIPS | KVM_REG_MIPS_CP0_BADVADDR | 64 | 1886 | MIPS | KVM_REG_MIPS_CP0_BADVADDR | 64 |
1887 | MIPS | KVM_REG_MIPS_CP0_BADINSTR | 32 | ||
1888 | MIPS | KVM_REG_MIPS_CP0_BADINSTRP | 32 | ||
2076 | MIPS | KVM_REG_MIPS_CP0_COUNT | 32 | 1889 | MIPS | KVM_REG_MIPS_CP0_COUNT | 32 |
2077 | MIPS | KVM_REG_MIPS_CP0_ENTRYHI | 64 | 1890 | MIPS | KVM_REG_MIPS_CP0_ENTRYHI | 64 |
2078 | MIPS | KVM_REG_MIPS_CP0_COMPARE | 32 | 1891 | MIPS | KVM_REG_MIPS_CP0_COMPARE | 32 |
@@ -2089,6 +1902,7 @@ registers, find a list below: | |||
2089 | MIPS | KVM_REG_MIPS_CP0_CONFIG4 | 32 | 1902 | MIPS | KVM_REG_MIPS_CP0_CONFIG4 | 32 |
2090 | MIPS | KVM_REG_MIPS_CP0_CONFIG5 | 32 | 1903 | MIPS | KVM_REG_MIPS_CP0_CONFIG5 | 32 |
2091 | MIPS | KVM_REG_MIPS_CP0_CONFIG7 | 32 | 1904 | MIPS | KVM_REG_MIPS_CP0_CONFIG7 | 32 |
1905 | MIPS | KVM_REG_MIPS_CP0_XCONTEXT | 64 | ||
2092 | MIPS | KVM_REG_MIPS_CP0_ERROREPC | 64 | 1906 | MIPS | KVM_REG_MIPS_CP0_ERROREPC | 64 |
2093 | MIPS | KVM_REG_MIPS_CP0_KSCRATCH1 | 64 | 1907 | MIPS | KVM_REG_MIPS_CP0_KSCRATCH1 | 64 |
2094 | MIPS | KVM_REG_MIPS_CP0_KSCRATCH2 | 64 | 1908 | MIPS | KVM_REG_MIPS_CP0_KSCRATCH2 | 64 |
@@ -2096,6 +1910,7 @@ registers, find a list below: | |||
2096 | MIPS | KVM_REG_MIPS_CP0_KSCRATCH4 | 64 | 1910 | MIPS | KVM_REG_MIPS_CP0_KSCRATCH4 | 64 |
2097 | MIPS | KVM_REG_MIPS_CP0_KSCRATCH5 | 64 | 1911 | MIPS | KVM_REG_MIPS_CP0_KSCRATCH5 | 64 |
2098 | MIPS | KVM_REG_MIPS_CP0_KSCRATCH6 | 64 | 1912 | MIPS | KVM_REG_MIPS_CP0_KSCRATCH6 | 64 |
1913 | MIPS | KVM_REG_MIPS_CP0_MAAR(0..63) | 64 | ||
2099 | MIPS | KVM_REG_MIPS_COUNT_CTL | 64 | 1914 | MIPS | KVM_REG_MIPS_COUNT_CTL | 64 |
2100 | MIPS | KVM_REG_MIPS_COUNT_RESUME | 64 | 1915 | MIPS | KVM_REG_MIPS_COUNT_RESUME | 64 |
2101 | MIPS | KVM_REG_MIPS_COUNT_HZ | 64 | 1916 | MIPS | KVM_REG_MIPS_COUNT_HZ | 64 |
@@ -2162,6 +1977,10 @@ hardware, host kernel, guest, and whether XPA is present in the guest, i.e. | |||
2162 | with the RI and XI bits (if they exist) in bits 63 and 62 respectively, and | 1977 | with the RI and XI bits (if they exist) in bits 63 and 62 respectively, and |
2163 | the PFNX field starting at bit 30. | 1978 | the PFNX field starting at bit 30. |
2164 | 1979 | ||
1980 | MIPS MAARs (see KVM_REG_MIPS_CP0_MAAR(*) above) have the following id bit | ||
1981 | patterns: | ||
1982 | 0x7030 0000 0001 01 <reg:8> | ||
1983 | |||
2165 | MIPS KVM control registers (see above) have the following id bit patterns: | 1984 | MIPS KVM control registers (see above) have the following id bit patterns: |
2166 | 0x7030 0000 0002 <reg:16> | 1985 | 0x7030 0000 0002 <reg:16> |
2167 | 1986 | ||
@@ -3377,6 +3196,69 @@ struct kvm_ppc_resize_hpt { | |||
3377 | __u32 pad; | 3196 | __u32 pad; |
3378 | }; | 3197 | }; |
3379 | 3198 | ||
3199 | 4.104 KVM_X86_GET_MCE_CAP_SUPPORTED | ||
3200 | |||
3201 | Capability: KVM_CAP_MCE | ||
3202 | Architectures: x86 | ||
3203 | Type: system ioctl | ||
3204 | Parameters: u64 mce_cap (out) | ||
3205 | Returns: 0 on success, -1 on error | ||
3206 | |||
3207 | Returns supported MCE capabilities. The u64 mce_cap parameter | ||
3208 | has the same format as the MSR_IA32_MCG_CAP register. Supported | ||
3209 | capabilities will have the corresponding bits set. | ||
3210 | |||
3211 | 4.105 KVM_X86_SETUP_MCE | ||
3212 | |||
3213 | Capability: KVM_CAP_MCE | ||
3214 | Architectures: x86 | ||
3215 | Type: vcpu ioctl | ||
3216 | Parameters: u64 mcg_cap (in) | ||
3217 | Returns: 0 on success, | ||
3218 | -EFAULT if u64 mcg_cap cannot be read, | ||
3219 | -EINVAL if the requested number of banks is invalid, | ||
3220 | -EINVAL if requested MCE capability is not supported. | ||
3221 | |||
3222 | Initializes MCE support for use. The u64 mcg_cap parameter | ||
3223 | has the same format as the MSR_IA32_MCG_CAP register and | ||
3224 | specifies which capabilities should be enabled. The maximum | ||
3225 | supported number of error-reporting banks can be retrieved when | ||
3226 | checking for KVM_CAP_MCE. The supported capabilities can be | ||
3227 | retrieved with KVM_X86_GET_MCE_CAP_SUPPORTED. | ||
3228 | |||
3229 | 4.106 KVM_X86_SET_MCE | ||
3230 | |||
3231 | Capability: KVM_CAP_MCE | ||
3232 | Architectures: x86 | ||
3233 | Type: vcpu ioctl | ||
3234 | Parameters: struct kvm_x86_mce (in) | ||
3235 | Returns: 0 on success, | ||
3236 | -EFAULT if struct kvm_x86_mce cannot be read, | ||
3237 | -EINVAL if the bank number is invalid, | ||
3238 | -EINVAL if VAL bit is not set in status field. | ||
3239 | |||
3240 | Inject a machine check error (MCE) into the guest. The input | ||
3241 | parameter is: | ||
3242 | |||
3243 | struct kvm_x86_mce { | ||
3244 | __u64 status; | ||
3245 | __u64 addr; | ||
3246 | __u64 misc; | ||
3247 | __u64 mcg_status; | ||
3248 | __u8 bank; | ||
3249 | __u8 pad1[7]; | ||
3250 | __u64 pad2[3]; | ||
3251 | }; | ||
3252 | |||
3253 | If the MCE being reported is an uncorrected error, KVM will | ||
3254 | inject it as an MCE exception into the guest. If the guest | ||
3255 | MCG_STATUS register reports that an MCE is in progress, KVM | ||
3256 | causes an KVM_EXIT_SHUTDOWN vmexit. | ||
3257 | |||
3258 | Otherwise, if the MCE is a corrected error, KVM will just | ||
3259 | store it in the corresponding bank (provided this bank is | ||
3260 | not holding a previously reported uncorrected error). | ||
3261 | |||
3380 | 5. The kvm_run structure | 3262 | 5. The kvm_run structure |
3381 | ------------------------ | 3263 | ------------------------ |
3382 | 3264 | ||
@@ -4101,6 +3983,23 @@ to take care of that. | |||
4101 | This capability can be enabled dynamically even if VCPUs were already | 3983 | This capability can be enabled dynamically even if VCPUs were already |
4102 | created and are running. | 3984 | created and are running. |
4103 | 3985 | ||
3986 | 7.9 KVM_CAP_S390_GS | ||
3987 | |||
3988 | Architectures: s390 | ||
3989 | Parameters: none | ||
3990 | Returns: 0 on success; -EINVAL if the machine does not support | ||
3991 | guarded storage; -EBUSY if a VCPU has already been created. | ||
3992 | |||
3993 | Allows use of guarded storage for the KVM guest. | ||
3994 | |||
3995 | 7.10 KVM_CAP_S390_AIS | ||
3996 | |||
3997 | Architectures: s390 | ||
3998 | Parameters: none | ||
3999 | |||
4000 | Allow use of adapter-interruption suppression. | ||
4001 | Returns: 0 on success; -EBUSY if a VCPU has already been created. | ||
4002 | |||
4104 | 8. Other capabilities. | 4003 | 8. Other capabilities. |
4105 | ---------------------- | 4004 | ---------------------- |
4106 | 4005 | ||
@@ -4147,3 +4046,68 @@ This capability, if KVM_CHECK_EXTENSION indicates that it is | |||
4147 | available, means that that the kernel can support guests using the | 4046 | available, means that that the kernel can support guests using the |
4148 | hashed page table MMU defined in Power ISA V3.00 (as implemented in | 4047 | hashed page table MMU defined in Power ISA V3.00 (as implemented in |
4149 | the POWER9 processor), including in-memory segment tables. | 4048 | the POWER9 processor), including in-memory segment tables. |
4049 | |||
4050 | 8.5 KVM_CAP_MIPS_VZ | ||
4051 | |||
4052 | Architectures: mips | ||
4053 | |||
4054 | This capability, if KVM_CHECK_EXTENSION on the main kvm handle indicates that | ||
4055 | it is available, means that full hardware assisted virtualization capabilities | ||
4056 | of the hardware are available for use through KVM. An appropriate | ||
4057 | KVM_VM_MIPS_* type must be passed to KVM_CREATE_VM to create a VM which | ||
4058 | utilises it. | ||
4059 | |||
4060 | If KVM_CHECK_EXTENSION on a kvm VM handle indicates that this capability is | ||
4061 | available, it means that the VM is using full hardware assisted virtualization | ||
4062 | capabilities of the hardware. This is useful to check after creating a VM with | ||
4063 | KVM_VM_MIPS_DEFAULT. | ||
4064 | |||
4065 | The value returned by KVM_CHECK_EXTENSION should be compared against known | ||
4066 | values (see below). All other values are reserved. This is to allow for the | ||
4067 | possibility of other hardware assisted virtualization implementations which | ||
4068 | may be incompatible with the MIPS VZ ASE. | ||
4069 | |||
4070 | 0: The trap & emulate implementation is in use to run guest code in user | ||
4071 | mode. Guest virtual memory segments are rearranged to fit the guest in the | ||
4072 | user mode address space. | ||
4073 | |||
4074 | 1: The MIPS VZ ASE is in use, providing full hardware assisted | ||
4075 | virtualization, including standard guest virtual memory segments. | ||
4076 | |||
4077 | 8.6 KVM_CAP_MIPS_TE | ||
4078 | |||
4079 | Architectures: mips | ||
4080 | |||
4081 | This capability, if KVM_CHECK_EXTENSION on the main kvm handle indicates that | ||
4082 | it is available, means that the trap & emulate implementation is available to | ||
4083 | run guest code in user mode, even if KVM_CAP_MIPS_VZ indicates that hardware | ||
4084 | assisted virtualisation is also available. KVM_VM_MIPS_TE (0) must be passed | ||
4085 | to KVM_CREATE_VM to create a VM which utilises it. | ||
4086 | |||
4087 | If KVM_CHECK_EXTENSION on a kvm VM handle indicates that this capability is | ||
4088 | available, it means that the VM is using trap & emulate. | ||
4089 | |||
4090 | 8.7 KVM_CAP_MIPS_64BIT | ||
4091 | |||
4092 | Architectures: mips | ||
4093 | |||
4094 | This capability indicates the supported architecture type of the guest, i.e. the | ||
4095 | supported register and address width. | ||
4096 | |||
4097 | The values returned when this capability is checked by KVM_CHECK_EXTENSION on a | ||
4098 | kvm VM handle correspond roughly to the CP0_Config.AT register field, and should | ||
4099 | be checked specifically against known values (see below). All other values are | ||
4100 | reserved. | ||
4101 | |||
4102 | 0: MIPS32 or microMIPS32. | ||
4103 | Both registers and addresses are 32-bits wide. | ||
4104 | It will only be possible to run 32-bit guest code. | ||
4105 | |||
4106 | 1: MIPS64 or microMIPS64 with access only to 32-bit compatibility segments. | ||
4107 | Registers are 64-bits wide, but addresses are 32-bits wide. | ||
4108 | 64-bit guest code may run but cannot access MIPS64 memory segments. | ||
4109 | It will also be possible to run 32-bit guest code. | ||
4110 | |||
4111 | 2: MIPS64 or microMIPS64 with access to all address segments. | ||
4112 | Both registers and addresses are 64-bits wide. | ||
4113 | It will be possible to run 64-bit or 32-bit guest code. | ||
diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt index 6b0e115301c8..c2518cea8ab4 100644 --- a/Documentation/virtual/kvm/devices/s390_flic.txt +++ b/Documentation/virtual/kvm/devices/s390_flic.txt | |||
@@ -14,6 +14,8 @@ FLIC provides support to | |||
14 | - purge one pending floating I/O interrupt (KVM_DEV_FLIC_CLEAR_IO_IRQ) | 14 | - purge one pending floating I/O interrupt (KVM_DEV_FLIC_CLEAR_IO_IRQ) |
15 | - enable/disable for the guest transparent async page faults | 15 | - enable/disable for the guest transparent async page faults |
16 | - register and modify adapter interrupt sources (KVM_DEV_FLIC_ADAPTER_*) | 16 | - register and modify adapter interrupt sources (KVM_DEV_FLIC_ADAPTER_*) |
17 | - modify AIS (adapter-interruption-suppression) mode state (KVM_DEV_FLIC_AISM) | ||
18 | - inject adapter interrupts on a specified adapter (KVM_DEV_FLIC_AIRQ_INJECT) | ||
17 | 19 | ||
18 | Groups: | 20 | Groups: |
19 | KVM_DEV_FLIC_ENQUEUE | 21 | KVM_DEV_FLIC_ENQUEUE |
@@ -64,12 +66,18 @@ struct kvm_s390_io_adapter { | |||
64 | __u8 isc; | 66 | __u8 isc; |
65 | __u8 maskable; | 67 | __u8 maskable; |
66 | __u8 swap; | 68 | __u8 swap; |
67 | __u8 pad; | 69 | __u8 flags; |
68 | }; | 70 | }; |
69 | 71 | ||
70 | id contains the unique id for the adapter, isc the I/O interruption subclass | 72 | id contains the unique id for the adapter, isc the I/O interruption subclass |
71 | to use, maskable whether this adapter may be masked (interrupts turned off) | 73 | to use, maskable whether this adapter may be masked (interrupts turned off), |
72 | and swap whether the indicators need to be byte swapped. | 74 | swap whether the indicators need to be byte swapped, and flags contains |
75 | further characteristics of the adapter. | ||
76 | Currently defined values for 'flags' are: | ||
77 | - KVM_S390_ADAPTER_SUPPRESSIBLE: adapter is subject to AIS | ||
78 | (adapter-interrupt-suppression) facility. This flag only has an effect if | ||
79 | the AIS capability is enabled. | ||
80 | Unknown flag values are ignored. | ||
73 | 81 | ||
74 | 82 | ||
75 | KVM_DEV_FLIC_ADAPTER_MODIFY | 83 | KVM_DEV_FLIC_ADAPTER_MODIFY |
@@ -101,6 +109,33 @@ struct kvm_s390_io_adapter_req { | |||
101 | release a userspace page for the translated address specified in addr | 109 | release a userspace page for the translated address specified in addr |
102 | from the list of mappings | 110 | from the list of mappings |
103 | 111 | ||
112 | KVM_DEV_FLIC_AISM | ||
113 | modify the adapter-interruption-suppression mode for a given isc if the | ||
114 | AIS capability is enabled. Takes a kvm_s390_ais_req describing: | ||
115 | |||
116 | struct kvm_s390_ais_req { | ||
117 | __u8 isc; | ||
118 | __u16 mode; | ||
119 | }; | ||
120 | |||
121 | isc contains the target I/O interruption subclass, mode the target | ||
122 | adapter-interruption-suppression mode. The following modes are | ||
123 | currently supported: | ||
124 | - KVM_S390_AIS_MODE_ALL: ALL-Interruptions Mode, i.e. airq injection | ||
125 | is always allowed; | ||
126 | - KVM_S390_AIS_MODE_SINGLE: SINGLE-Interruption Mode, i.e. airq | ||
127 | injection is only allowed once and the following adapter interrupts | ||
128 | will be suppressed until the mode is set again to ALL-Interruptions | ||
129 | or SINGLE-Interruption mode. | ||
130 | |||
131 | KVM_DEV_FLIC_AIRQ_INJECT | ||
132 | Inject adapter interrupts on a specified adapter. | ||
133 | attr->attr contains the unique id for the adapter, which allows for | ||
134 | adapter-specific checks and actions. | ||
135 | For adapters subject to AIS, handle the airq injection suppression for | ||
136 | an isc according to the adapter-interruption-suppression mode on condition | ||
137 | that the AIS capability is enabled. | ||
138 | |||
104 | Note: The KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR device ioctls executed on | 139 | Note: The KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR device ioctls executed on |
105 | FLIC with an unknown group or attribute gives the error code EINVAL (instead of | 140 | FLIC with an unknown group or attribute gives the error code EINVAL (instead of |
106 | ENXIO, as specified in the API documentation). It is not possible to conclude | 141 | ENXIO, as specified in the API documentation). It is not possible to conclude |
diff --git a/Documentation/virtual/kvm/devices/vfio.txt b/Documentation/virtual/kvm/devices/vfio.txt index ef51740c67ca..528c77c8022c 100644 --- a/Documentation/virtual/kvm/devices/vfio.txt +++ b/Documentation/virtual/kvm/devices/vfio.txt | |||
@@ -16,7 +16,21 @@ Groups: | |||
16 | 16 | ||
17 | KVM_DEV_VFIO_GROUP attributes: | 17 | KVM_DEV_VFIO_GROUP attributes: |
18 | KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking | 18 | KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking |
19 | kvm_device_attr.addr points to an int32_t file descriptor | ||
20 | for the VFIO group. | ||
19 | KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking | 21 | KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking |
22 | kvm_device_attr.addr points to an int32_t file descriptor | ||
23 | for the VFIO group. | ||
24 | KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: attaches a guest visible TCE table | ||
25 | allocated by sPAPR KVM. | ||
26 | kvm_device_attr.addr points to a struct: | ||
20 | 27 | ||
21 | For each, kvm_device_attr.addr points to an int32_t file descriptor | 28 | struct kvm_vfio_spapr_tce { |
22 | for the VFIO group. | 29 | __s32 groupfd; |
30 | __s32 tablefd; | ||
31 | }; | ||
32 | |||
33 | where | ||
34 | @groupfd is a file descriptor for a VFIO group; | ||
35 | @tablefd is a file descriptor for a TCE table allocated via | ||
36 | KVM_CREATE_SPAPR_TCE. | ||
diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt index feaaa634f154..a890529c63ed 100644 --- a/Documentation/virtual/kvm/hypercalls.txt +++ b/Documentation/virtual/kvm/hypercalls.txt | |||
@@ -28,6 +28,11 @@ S390: | |||
28 | property inside the device tree's /hypervisor node. | 28 | property inside the device tree's /hypervisor node. |
29 | For more information refer to Documentation/virtual/kvm/ppc-pv.txt | 29 | For more information refer to Documentation/virtual/kvm/ppc-pv.txt |
30 | 30 | ||
31 | MIPS: | ||
32 | KVM hypercalls use the HYPCALL instruction with code 0 and the hypercall | ||
33 | number in $2 (v0). Up to four arguments may be placed in $4-$7 (a0-a3) and | ||
34 | the return value is placed in $2 (v0). | ||
35 | |||
31 | KVM Hypercalls Documentation | 36 | KVM Hypercalls Documentation |
32 | =========================== | 37 | =========================== |
33 | The template for each hypercall is: | 38 | The template for each hypercall is: |
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 31ee468ce667..de67ce647501 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h | |||
@@ -30,7 +30,6 @@ | |||
30 | #define __KVM_HAVE_ARCH_INTC_INITIALIZED | 30 | #define __KVM_HAVE_ARCH_INTC_INITIALIZED |
31 | 31 | ||
32 | #define KVM_USER_MEM_SLOTS 32 | 32 | #define KVM_USER_MEM_SLOTS 32 |
33 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 | ||
34 | #define KVM_HAVE_ONE_REG | 33 | #define KVM_HAVE_ONE_REG |
35 | #define KVM_HALT_POLL_NS_DEFAULT 500000 | 34 | #define KVM_HALT_POLL_NS_DEFAULT 500000 |
36 | 35 | ||
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 6ebd3e6a1fd1..254a38cace2a 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h | |||
@@ -27,6 +27,8 @@ | |||
27 | #define __KVM_HAVE_IRQ_LINE | 27 | #define __KVM_HAVE_IRQ_LINE |
28 | #define __KVM_HAVE_READONLY_MEM | 28 | #define __KVM_HAVE_READONLY_MEM |
29 | 29 | ||
30 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 | ||
31 | |||
30 | #define KVM_REG_SIZE(id) \ | 32 | #define KVM_REG_SIZE(id) \ |
31 | (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) | 33 | (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) |
32 | 34 | ||
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 96dba7cd8be7..e3c8105ada65 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c | |||
@@ -209,9 +209,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
209 | case KVM_CAP_IMMEDIATE_EXIT: | 209 | case KVM_CAP_IMMEDIATE_EXIT: |
210 | r = 1; | 210 | r = 1; |
211 | break; | 211 | break; |
212 | case KVM_CAP_COALESCED_MMIO: | ||
213 | r = KVM_COALESCED_MMIO_PAGE_OFFSET; | ||
214 | break; | ||
215 | case KVM_CAP_ARM_SET_DEVICE_ADDR: | 212 | case KVM_CAP_ARM_SET_DEVICE_ADDR: |
216 | r = 1; | 213 | r = 1; |
217 | break; | 214 | break; |
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e7705e7bb07b..522e4f60976e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h | |||
@@ -31,7 +31,6 @@ | |||
31 | #define __KVM_HAVE_ARCH_INTC_INITIALIZED | 31 | #define __KVM_HAVE_ARCH_INTC_INITIALIZED |
32 | 32 | ||
33 | #define KVM_USER_MEM_SLOTS 512 | 33 | #define KVM_USER_MEM_SLOTS 512 |
34 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 | ||
35 | #define KVM_HALT_POLL_NS_DEFAULT 500000 | 34 | #define KVM_HALT_POLL_NS_DEFAULT 500000 |
36 | 35 | ||
37 | #include <kvm/arm_vgic.h> | 36 | #include <kvm/arm_vgic.h> |
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index c2860358ae3e..aa5ab69c1312 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h | |||
@@ -39,6 +39,8 @@ | |||
39 | #define __KVM_HAVE_IRQ_LINE | 39 | #define __KVM_HAVE_IRQ_LINE |
40 | #define __KVM_HAVE_READONLY_MEM | 40 | #define __KVM_HAVE_READONLY_MEM |
41 | 41 | ||
42 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 | ||
43 | |||
42 | #define KVM_REG_SIZE(id) \ | 44 | #define KVM_REG_SIZE(id) \ |
43 | (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) | 45 | (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) |
44 | 46 | ||
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index a008a9f03072..0a4adbc326e6 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig | |||
@@ -1687,6 +1687,7 @@ config CPU_CAVIUM_OCTEON | |||
1687 | select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN | 1687 | select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN |
1688 | select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN | 1688 | select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN |
1689 | select MIPS_L1_CACHE_SHIFT_7 | 1689 | select MIPS_L1_CACHE_SHIFT_7 |
1690 | select HAVE_KVM | ||
1690 | help | 1691 | help |
1691 | The Cavium Octeon processor is a highly integrated chip containing | 1692 | The Cavium Octeon processor is a highly integrated chip containing |
1692 | many ethernet hardware widgets for networking tasks. The processor | 1693 | many ethernet hardware widgets for networking tasks. The processor |
diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index e961c8a7ea66..494d38274142 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h | |||
@@ -444,6 +444,10 @@ | |||
444 | # define cpu_has_msa 0 | 444 | # define cpu_has_msa 0 |
445 | #endif | 445 | #endif |
446 | 446 | ||
447 | #ifndef cpu_has_ufr | ||
448 | # define cpu_has_ufr (cpu_data[0].options & MIPS_CPU_UFR) | ||
449 | #endif | ||
450 | |||
447 | #ifndef cpu_has_fre | 451 | #ifndef cpu_has_fre |
448 | # define cpu_has_fre (cpu_data[0].options & MIPS_CPU_FRE) | 452 | # define cpu_has_fre (cpu_data[0].options & MIPS_CPU_FRE) |
449 | #endif | 453 | #endif |
@@ -528,6 +532,9 @@ | |||
528 | #ifndef cpu_guest_has_htw | 532 | #ifndef cpu_guest_has_htw |
529 | #define cpu_guest_has_htw (cpu_data[0].guest.options & MIPS_CPU_HTW) | 533 | #define cpu_guest_has_htw (cpu_data[0].guest.options & MIPS_CPU_HTW) |
530 | #endif | 534 | #endif |
535 | #ifndef cpu_guest_has_mvh | ||
536 | #define cpu_guest_has_mvh (cpu_data[0].guest.options & MIPS_CPU_MVH) | ||
537 | #endif | ||
531 | #ifndef cpu_guest_has_msa | 538 | #ifndef cpu_guest_has_msa |
532 | #define cpu_guest_has_msa (cpu_data[0].guest.ases & MIPS_ASE_MSA) | 539 | #define cpu_guest_has_msa (cpu_data[0].guest.ases & MIPS_ASE_MSA) |
533 | #endif | 540 | #endif |
@@ -543,6 +550,9 @@ | |||
543 | #ifndef cpu_guest_has_maar | 550 | #ifndef cpu_guest_has_maar |
544 | #define cpu_guest_has_maar (cpu_data[0].guest.options & MIPS_CPU_MAAR) | 551 | #define cpu_guest_has_maar (cpu_data[0].guest.options & MIPS_CPU_MAAR) |
545 | #endif | 552 | #endif |
553 | #ifndef cpu_guest_has_userlocal | ||
554 | #define cpu_guest_has_userlocal (cpu_data[0].guest.options & MIPS_CPU_ULRI) | ||
555 | #endif | ||
546 | 556 | ||
547 | /* | 557 | /* |
548 | * Guest dynamic capabilities | 558 | * Guest dynamic capabilities |
diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h index edbe2734a1bf..be3b4c25f335 100644 --- a/arch/mips/include/asm/cpu-info.h +++ b/arch/mips/include/asm/cpu-info.h | |||
@@ -33,6 +33,7 @@ struct guest_info { | |||
33 | unsigned long ases_dyn; | 33 | unsigned long ases_dyn; |
34 | unsigned long long options; | 34 | unsigned long long options; |
35 | unsigned long long options_dyn; | 35 | unsigned long long options_dyn; |
36 | int tlbsize; | ||
36 | u8 conf; | 37 | u8 conf; |
37 | u8 kscratch_mask; | 38 | u8 kscratch_mask; |
38 | }; | 39 | }; |
@@ -109,6 +110,7 @@ struct cpuinfo_mips { | |||
109 | struct guest_info guest; | 110 | struct guest_info guest; |
110 | unsigned int gtoffset_mask; | 111 | unsigned int gtoffset_mask; |
111 | unsigned int guestid_mask; | 112 | unsigned int guestid_mask; |
113 | unsigned int guestid_cache; | ||
112 | } __attribute__((aligned(SMP_CACHE_BYTES))); | 114 | } __attribute__((aligned(SMP_CACHE_BYTES))); |
113 | 115 | ||
114 | extern struct cpuinfo_mips cpu_data[]; | 116 | extern struct cpuinfo_mips cpu_data[]; |
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index 9a8372484edc..98f59307e6a3 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h | |||
@@ -415,6 +415,7 @@ enum cpu_type_enum { | |||
415 | #define MIPS_CPU_GUESTCTL2 MBIT_ULL(50) /* CPU has VZ GuestCtl2 register */ | 415 | #define MIPS_CPU_GUESTCTL2 MBIT_ULL(50) /* CPU has VZ GuestCtl2 register */ |
416 | #define MIPS_CPU_GUESTID MBIT_ULL(51) /* CPU uses VZ ASE GuestID feature */ | 416 | #define MIPS_CPU_GUESTID MBIT_ULL(51) /* CPU uses VZ ASE GuestID feature */ |
417 | #define MIPS_CPU_DRG MBIT_ULL(52) /* CPU has VZ Direct Root to Guest (DRG) */ | 417 | #define MIPS_CPU_DRG MBIT_ULL(52) /* CPU has VZ Direct Root to Guest (DRG) */ |
418 | #define MIPS_CPU_UFR MBIT_ULL(53) /* CPU supports User mode FR switching */ | ||
418 | 419 | ||
419 | /* | 420 | /* |
420 | * CPU ASE encodings | 421 | * CPU ASE encodings |
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 05e785fc061d..2998479fd4e8 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h | |||
@@ -10,6 +10,7 @@ | |||
10 | #ifndef __MIPS_KVM_HOST_H__ | 10 | #ifndef __MIPS_KVM_HOST_H__ |
11 | #define __MIPS_KVM_HOST_H__ | 11 | #define __MIPS_KVM_HOST_H__ |
12 | 12 | ||
13 | #include <linux/cpumask.h> | ||
13 | #include <linux/mutex.h> | 14 | #include <linux/mutex.h> |
14 | #include <linux/hrtimer.h> | 15 | #include <linux/hrtimer.h> |
15 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
@@ -33,12 +34,23 @@ | |||
33 | #define KVM_REG_MIPS_CP0_ENTRYLO0 MIPS_CP0_64(2, 0) | 34 | #define KVM_REG_MIPS_CP0_ENTRYLO0 MIPS_CP0_64(2, 0) |
34 | #define KVM_REG_MIPS_CP0_ENTRYLO1 MIPS_CP0_64(3, 0) | 35 | #define KVM_REG_MIPS_CP0_ENTRYLO1 MIPS_CP0_64(3, 0) |
35 | #define KVM_REG_MIPS_CP0_CONTEXT MIPS_CP0_64(4, 0) | 36 | #define KVM_REG_MIPS_CP0_CONTEXT MIPS_CP0_64(4, 0) |
37 | #define KVM_REG_MIPS_CP0_CONTEXTCONFIG MIPS_CP0_32(4, 1) | ||
36 | #define KVM_REG_MIPS_CP0_USERLOCAL MIPS_CP0_64(4, 2) | 38 | #define KVM_REG_MIPS_CP0_USERLOCAL MIPS_CP0_64(4, 2) |
39 | #define KVM_REG_MIPS_CP0_XCONTEXTCONFIG MIPS_CP0_64(4, 3) | ||
37 | #define KVM_REG_MIPS_CP0_PAGEMASK MIPS_CP0_32(5, 0) | 40 | #define KVM_REG_MIPS_CP0_PAGEMASK MIPS_CP0_32(5, 0) |
38 | #define KVM_REG_MIPS_CP0_PAGEGRAIN MIPS_CP0_32(5, 1) | 41 | #define KVM_REG_MIPS_CP0_PAGEGRAIN MIPS_CP0_32(5, 1) |
42 | #define KVM_REG_MIPS_CP0_SEGCTL0 MIPS_CP0_64(5, 2) | ||
43 | #define KVM_REG_MIPS_CP0_SEGCTL1 MIPS_CP0_64(5, 3) | ||
44 | #define KVM_REG_MIPS_CP0_SEGCTL2 MIPS_CP0_64(5, 4) | ||
45 | #define KVM_REG_MIPS_CP0_PWBASE MIPS_CP0_64(5, 5) | ||
46 | #define KVM_REG_MIPS_CP0_PWFIELD MIPS_CP0_64(5, 6) | ||
47 | #define KVM_REG_MIPS_CP0_PWSIZE MIPS_CP0_64(5, 7) | ||
39 | #define KVM_REG_MIPS_CP0_WIRED MIPS_CP0_32(6, 0) | 48 | #define KVM_REG_MIPS_CP0_WIRED MIPS_CP0_32(6, 0) |
49 | #define KVM_REG_MIPS_CP0_PWCTL MIPS_CP0_32(6, 6) | ||
40 | #define KVM_REG_MIPS_CP0_HWRENA MIPS_CP0_32(7, 0) | 50 | #define KVM_REG_MIPS_CP0_HWRENA MIPS_CP0_32(7, 0) |
41 | #define KVM_REG_MIPS_CP0_BADVADDR MIPS_CP0_64(8, 0) | 51 | #define KVM_REG_MIPS_CP0_BADVADDR MIPS_CP0_64(8, 0) |
52 | #define KVM_REG_MIPS_CP0_BADINSTR MIPS_CP0_32(8, 1) | ||
53 | #define KVM_REG_MIPS_CP0_BADINSTRP MIPS_CP0_32(8, 2) | ||
42 | #define KVM_REG_MIPS_CP0_COUNT MIPS_CP0_32(9, 0) | 54 | #define KVM_REG_MIPS_CP0_COUNT MIPS_CP0_32(9, 0) |
43 | #define KVM_REG_MIPS_CP0_ENTRYHI MIPS_CP0_64(10, 0) | 55 | #define KVM_REG_MIPS_CP0_ENTRYHI MIPS_CP0_64(10, 0) |
44 | #define KVM_REG_MIPS_CP0_COMPARE MIPS_CP0_32(11, 0) | 56 | #define KVM_REG_MIPS_CP0_COMPARE MIPS_CP0_32(11, 0) |
@@ -55,6 +67,7 @@ | |||
55 | #define KVM_REG_MIPS_CP0_CONFIG4 MIPS_CP0_32(16, 4) | 67 | #define KVM_REG_MIPS_CP0_CONFIG4 MIPS_CP0_32(16, 4) |
56 | #define KVM_REG_MIPS_CP0_CONFIG5 MIPS_CP0_32(16, 5) | 68 | #define KVM_REG_MIPS_CP0_CONFIG5 MIPS_CP0_32(16, 5) |
57 | #define KVM_REG_MIPS_CP0_CONFIG7 MIPS_CP0_32(16, 7) | 69 | #define KVM_REG_MIPS_CP0_CONFIG7 MIPS_CP0_32(16, 7) |
70 | #define KVM_REG_MIPS_CP0_MAARI MIPS_CP0_64(17, 2) | ||
58 | #define KVM_REG_MIPS_CP0_XCONTEXT MIPS_CP0_64(20, 0) | 71 | #define KVM_REG_MIPS_CP0_XCONTEXT MIPS_CP0_64(20, 0) |
59 | #define KVM_REG_MIPS_CP0_ERROREPC MIPS_CP0_64(30, 0) | 72 | #define KVM_REG_MIPS_CP0_ERROREPC MIPS_CP0_64(30, 0) |
60 | #define KVM_REG_MIPS_CP0_KSCRATCH1 MIPS_CP0_64(31, 2) | 73 | #define KVM_REG_MIPS_CP0_KSCRATCH1 MIPS_CP0_64(31, 2) |
@@ -70,9 +83,13 @@ | |||
70 | /* memory slots that does not exposed to userspace */ | 83 | /* memory slots that does not exposed to userspace */ |
71 | #define KVM_PRIVATE_MEM_SLOTS 0 | 84 | #define KVM_PRIVATE_MEM_SLOTS 0 |
72 | 85 | ||
73 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 | ||
74 | #define KVM_HALT_POLL_NS_DEFAULT 500000 | 86 | #define KVM_HALT_POLL_NS_DEFAULT 500000 |
75 | 87 | ||
88 | #ifdef CONFIG_KVM_MIPS_VZ | ||
89 | extern unsigned long GUESTID_MASK; | ||
90 | extern unsigned long GUESTID_FIRST_VERSION; | ||
91 | extern unsigned long GUESTID_VERSION_MASK; | ||
92 | #endif | ||
76 | 93 | ||
77 | 94 | ||
78 | /* | 95 | /* |
@@ -145,6 +162,16 @@ struct kvm_vcpu_stat { | |||
145 | u64 fpe_exits; | 162 | u64 fpe_exits; |
146 | u64 msa_disabled_exits; | 163 | u64 msa_disabled_exits; |
147 | u64 flush_dcache_exits; | 164 | u64 flush_dcache_exits; |
165 | #ifdef CONFIG_KVM_MIPS_VZ | ||
166 | u64 vz_gpsi_exits; | ||
167 | u64 vz_gsfc_exits; | ||
168 | u64 vz_hc_exits; | ||
169 | u64 vz_grr_exits; | ||
170 | u64 vz_gva_exits; | ||
171 | u64 vz_ghfc_exits; | ||
172 | u64 vz_gpa_exits; | ||
173 | u64 vz_resvd_exits; | ||
174 | #endif | ||
148 | u64 halt_successful_poll; | 175 | u64 halt_successful_poll; |
149 | u64 halt_attempted_poll; | 176 | u64 halt_attempted_poll; |
150 | u64 halt_poll_invalid; | 177 | u64 halt_poll_invalid; |
@@ -157,6 +184,8 @@ struct kvm_arch_memory_slot { | |||
157 | struct kvm_arch { | 184 | struct kvm_arch { |
158 | /* Guest physical mm */ | 185 | /* Guest physical mm */ |
159 | struct mm_struct gpa_mm; | 186 | struct mm_struct gpa_mm; |
187 | /* Mask of CPUs needing GPA ASID flush */ | ||
188 | cpumask_t asid_flush_mask; | ||
160 | }; | 189 | }; |
161 | 190 | ||
162 | #define N_MIPS_COPROC_REGS 32 | 191 | #define N_MIPS_COPROC_REGS 32 |
@@ -214,6 +243,11 @@ struct mips_coproc { | |||
214 | #define MIPS_CP0_CONFIG4_SEL 4 | 243 | #define MIPS_CP0_CONFIG4_SEL 4 |
215 | #define MIPS_CP0_CONFIG5_SEL 5 | 244 | #define MIPS_CP0_CONFIG5_SEL 5 |
216 | 245 | ||
246 | #define MIPS_CP0_GUESTCTL2 10 | ||
247 | #define MIPS_CP0_GUESTCTL2_SEL 5 | ||
248 | #define MIPS_CP0_GTOFFSET 12 | ||
249 | #define MIPS_CP0_GTOFFSET_SEL 7 | ||
250 | |||
217 | /* Resume Flags */ | 251 | /* Resume Flags */ |
218 | #define RESUME_FLAG_DR (1<<0) /* Reload guest nonvolatile state? */ | 252 | #define RESUME_FLAG_DR (1<<0) /* Reload guest nonvolatile state? */ |
219 | #define RESUME_FLAG_HOST (1<<1) /* Resume host? */ | 253 | #define RESUME_FLAG_HOST (1<<1) /* Resume host? */ |
@@ -229,6 +263,7 @@ enum emulation_result { | |||
229 | EMULATE_WAIT, /* WAIT instruction */ | 263 | EMULATE_WAIT, /* WAIT instruction */ |
230 | EMULATE_PRIV_FAIL, | 264 | EMULATE_PRIV_FAIL, |
231 | EMULATE_EXCEPT, /* A guest exception has been generated */ | 265 | EMULATE_EXCEPT, /* A guest exception has been generated */ |
266 | EMULATE_HYPERCALL, /* HYPCALL instruction */ | ||
232 | }; | 267 | }; |
233 | 268 | ||
234 | #define mips3_paddr_to_tlbpfn(x) \ | 269 | #define mips3_paddr_to_tlbpfn(x) \ |
@@ -276,13 +311,18 @@ struct kvm_mmu_memory_cache { | |||
276 | struct kvm_vcpu_arch { | 311 | struct kvm_vcpu_arch { |
277 | void *guest_ebase; | 312 | void *guest_ebase; |
278 | int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu); | 313 | int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu); |
314 | |||
315 | /* Host registers preserved across guest mode execution */ | ||
279 | unsigned long host_stack; | 316 | unsigned long host_stack; |
280 | unsigned long host_gp; | 317 | unsigned long host_gp; |
318 | unsigned long host_pgd; | ||
319 | unsigned long host_entryhi; | ||
281 | 320 | ||
282 | /* Host CP0 registers used when handling exits from guest */ | 321 | /* Host CP0 registers used when handling exits from guest */ |
283 | unsigned long host_cp0_badvaddr; | 322 | unsigned long host_cp0_badvaddr; |
284 | unsigned long host_cp0_epc; | 323 | unsigned long host_cp0_epc; |
285 | u32 host_cp0_cause; | 324 | u32 host_cp0_cause; |
325 | u32 host_cp0_guestctl0; | ||
286 | u32 host_cp0_badinstr; | 326 | u32 host_cp0_badinstr; |
287 | u32 host_cp0_badinstrp; | 327 | u32 host_cp0_badinstrp; |
288 | 328 | ||
@@ -340,7 +380,23 @@ struct kvm_vcpu_arch { | |||
340 | /* Cache some mmu pages needed inside spinlock regions */ | 380 | /* Cache some mmu pages needed inside spinlock regions */ |
341 | struct kvm_mmu_memory_cache mmu_page_cache; | 381 | struct kvm_mmu_memory_cache mmu_page_cache; |
342 | 382 | ||
383 | #ifdef CONFIG_KVM_MIPS_VZ | ||
384 | /* vcpu's vzguestid is different on each host cpu in an smp system */ | ||
385 | u32 vzguestid[NR_CPUS]; | ||
386 | |||
387 | /* wired guest TLB entries */ | ||
388 | struct kvm_mips_tlb *wired_tlb; | ||
389 | unsigned int wired_tlb_limit; | ||
390 | unsigned int wired_tlb_used; | ||
391 | |||
392 | /* emulated guest MAAR registers */ | ||
393 | unsigned long maar[6]; | ||
394 | #endif | ||
395 | |||
396 | /* Last CPU the VCPU state was loaded on */ | ||
343 | int last_sched_cpu; | 397 | int last_sched_cpu; |
398 | /* Last CPU the VCPU actually executed guest code on */ | ||
399 | int last_exec_cpu; | ||
344 | 400 | ||
345 | /* WAIT executed */ | 401 | /* WAIT executed */ |
346 | int wait; | 402 | int wait; |
@@ -349,78 +405,6 @@ struct kvm_vcpu_arch { | |||
349 | u8 msa_enabled; | 405 | u8 msa_enabled; |
350 | }; | 406 | }; |
351 | 407 | ||
352 | |||
353 | #define kvm_read_c0_guest_index(cop0) (cop0->reg[MIPS_CP0_TLB_INDEX][0]) | ||
354 | #define kvm_write_c0_guest_index(cop0, val) (cop0->reg[MIPS_CP0_TLB_INDEX][0] = val) | ||
355 | #define kvm_read_c0_guest_entrylo0(cop0) (cop0->reg[MIPS_CP0_TLB_LO0][0]) | ||
356 | #define kvm_write_c0_guest_entrylo0(cop0, val) (cop0->reg[MIPS_CP0_TLB_LO0][0] = (val)) | ||
357 | #define kvm_read_c0_guest_entrylo1(cop0) (cop0->reg[MIPS_CP0_TLB_LO1][0]) | ||
358 | #define kvm_write_c0_guest_entrylo1(cop0, val) (cop0->reg[MIPS_CP0_TLB_LO1][0] = (val)) | ||
359 | #define kvm_read_c0_guest_context(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0]) | ||
360 | #define kvm_write_c0_guest_context(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0] = (val)) | ||
361 | #define kvm_read_c0_guest_userlocal(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2]) | ||
362 | #define kvm_write_c0_guest_userlocal(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2] = (val)) | ||
363 | #define kvm_read_c0_guest_pagemask(cop0) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0]) | ||
364 | #define kvm_write_c0_guest_pagemask(cop0, val) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0] = (val)) | ||
365 | #define kvm_read_c0_guest_wired(cop0) (cop0->reg[MIPS_CP0_TLB_WIRED][0]) | ||
366 | #define kvm_write_c0_guest_wired(cop0, val) (cop0->reg[MIPS_CP0_TLB_WIRED][0] = (val)) | ||
367 | #define kvm_read_c0_guest_hwrena(cop0) (cop0->reg[MIPS_CP0_HWRENA][0]) | ||
368 | #define kvm_write_c0_guest_hwrena(cop0, val) (cop0->reg[MIPS_CP0_HWRENA][0] = (val)) | ||
369 | #define kvm_read_c0_guest_badvaddr(cop0) (cop0->reg[MIPS_CP0_BAD_VADDR][0]) | ||
370 | #define kvm_write_c0_guest_badvaddr(cop0, val) (cop0->reg[MIPS_CP0_BAD_VADDR][0] = (val)) | ||
371 | #define kvm_read_c0_guest_count(cop0) (cop0->reg[MIPS_CP0_COUNT][0]) | ||
372 | #define kvm_write_c0_guest_count(cop0, val) (cop0->reg[MIPS_CP0_COUNT][0] = (val)) | ||
373 | #define kvm_read_c0_guest_entryhi(cop0) (cop0->reg[MIPS_CP0_TLB_HI][0]) | ||
374 | #define kvm_write_c0_guest_entryhi(cop0, val) (cop0->reg[MIPS_CP0_TLB_HI][0] = (val)) | ||
375 | #define kvm_read_c0_guest_compare(cop0) (cop0->reg[MIPS_CP0_COMPARE][0]) | ||
376 | #define kvm_write_c0_guest_compare(cop0, val) (cop0->reg[MIPS_CP0_COMPARE][0] = (val)) | ||
377 | #define kvm_read_c0_guest_status(cop0) (cop0->reg[MIPS_CP0_STATUS][0]) | ||
378 | #define kvm_write_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] = (val)) | ||
379 | #define kvm_read_c0_guest_intctl(cop0) (cop0->reg[MIPS_CP0_STATUS][1]) | ||
380 | #define kvm_write_c0_guest_intctl(cop0, val) (cop0->reg[MIPS_CP0_STATUS][1] = (val)) | ||
381 | #define kvm_read_c0_guest_cause(cop0) (cop0->reg[MIPS_CP0_CAUSE][0]) | ||
382 | #define kvm_write_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] = (val)) | ||
383 | #define kvm_read_c0_guest_epc(cop0) (cop0->reg[MIPS_CP0_EXC_PC][0]) | ||
384 | #define kvm_write_c0_guest_epc(cop0, val) (cop0->reg[MIPS_CP0_EXC_PC][0] = (val)) | ||
385 | #define kvm_read_c0_guest_prid(cop0) (cop0->reg[MIPS_CP0_PRID][0]) | ||
386 | #define kvm_write_c0_guest_prid(cop0, val) (cop0->reg[MIPS_CP0_PRID][0] = (val)) | ||
387 | #define kvm_read_c0_guest_ebase(cop0) (cop0->reg[MIPS_CP0_PRID][1]) | ||
388 | #define kvm_write_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] = (val)) | ||
389 | #define kvm_read_c0_guest_config(cop0) (cop0->reg[MIPS_CP0_CONFIG][0]) | ||
390 | #define kvm_read_c0_guest_config1(cop0) (cop0->reg[MIPS_CP0_CONFIG][1]) | ||
391 | #define kvm_read_c0_guest_config2(cop0) (cop0->reg[MIPS_CP0_CONFIG][2]) | ||
392 | #define kvm_read_c0_guest_config3(cop0) (cop0->reg[MIPS_CP0_CONFIG][3]) | ||
393 | #define kvm_read_c0_guest_config4(cop0) (cop0->reg[MIPS_CP0_CONFIG][4]) | ||
394 | #define kvm_read_c0_guest_config5(cop0) (cop0->reg[MIPS_CP0_CONFIG][5]) | ||
395 | #define kvm_read_c0_guest_config7(cop0) (cop0->reg[MIPS_CP0_CONFIG][7]) | ||
396 | #define kvm_write_c0_guest_config(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][0] = (val)) | ||
397 | #define kvm_write_c0_guest_config1(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][1] = (val)) | ||
398 | #define kvm_write_c0_guest_config2(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][2] = (val)) | ||
399 | #define kvm_write_c0_guest_config3(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][3] = (val)) | ||
400 | #define kvm_write_c0_guest_config4(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][4] = (val)) | ||
401 | #define kvm_write_c0_guest_config5(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][5] = (val)) | ||
402 | #define kvm_write_c0_guest_config7(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][7] = (val)) | ||
403 | #define kvm_read_c0_guest_errorepc(cop0) (cop0->reg[MIPS_CP0_ERROR_PC][0]) | ||
404 | #define kvm_write_c0_guest_errorepc(cop0, val) (cop0->reg[MIPS_CP0_ERROR_PC][0] = (val)) | ||
405 | #define kvm_read_c0_guest_kscratch1(cop0) (cop0->reg[MIPS_CP0_DESAVE][2]) | ||
406 | #define kvm_read_c0_guest_kscratch2(cop0) (cop0->reg[MIPS_CP0_DESAVE][3]) | ||
407 | #define kvm_read_c0_guest_kscratch3(cop0) (cop0->reg[MIPS_CP0_DESAVE][4]) | ||
408 | #define kvm_read_c0_guest_kscratch4(cop0) (cop0->reg[MIPS_CP0_DESAVE][5]) | ||
409 | #define kvm_read_c0_guest_kscratch5(cop0) (cop0->reg[MIPS_CP0_DESAVE][6]) | ||
410 | #define kvm_read_c0_guest_kscratch6(cop0) (cop0->reg[MIPS_CP0_DESAVE][7]) | ||
411 | #define kvm_write_c0_guest_kscratch1(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][2] = (val)) | ||
412 | #define kvm_write_c0_guest_kscratch2(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][3] = (val)) | ||
413 | #define kvm_write_c0_guest_kscratch3(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][4] = (val)) | ||
414 | #define kvm_write_c0_guest_kscratch4(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][5] = (val)) | ||
415 | #define kvm_write_c0_guest_kscratch5(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][6] = (val)) | ||
416 | #define kvm_write_c0_guest_kscratch6(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][7] = (val)) | ||
417 | |||
418 | /* | ||
419 | * Some of the guest registers may be modified asynchronously (e.g. from a | ||
420 | * hrtimer callback in hard irq context) and therefore need stronger atomicity | ||
421 | * guarantees than other registers. | ||
422 | */ | ||
423 | |||
424 | static inline void _kvm_atomic_set_c0_guest_reg(unsigned long *reg, | 408 | static inline void _kvm_atomic_set_c0_guest_reg(unsigned long *reg, |
425 | unsigned long val) | 409 | unsigned long val) |
426 | { | 410 | { |
@@ -471,26 +455,286 @@ static inline void _kvm_atomic_change_c0_guest_reg(unsigned long *reg, | |||
471 | } while (unlikely(!temp)); | 455 | } while (unlikely(!temp)); |
472 | } | 456 | } |
473 | 457 | ||
474 | #define kvm_set_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] |= (val)) | 458 | /* Guest register types, used in accessor build below */ |
475 | #define kvm_clear_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] &= ~(val)) | 459 | #define __KVMT32 u32 |
460 | #define __KVMTl unsigned long | ||
476 | 461 | ||
477 | /* Cause can be modified asynchronously from hardirq hrtimer callback */ | 462 | /* |
478 | #define kvm_set_c0_guest_cause(cop0, val) \ | 463 | * __BUILD_KVM_$ops_SAVED(): kvm_$op_sw_gc0_$reg() |
479 | _kvm_atomic_set_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val) | 464 | * These operate on the saved guest C0 state in RAM. |
480 | #define kvm_clear_c0_guest_cause(cop0, val) \ | 465 | */ |
481 | _kvm_atomic_clear_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val) | 466 | |
482 | #define kvm_change_c0_guest_cause(cop0, change, val) \ | 467 | /* Generate saved context simple accessors */ |
483 | _kvm_atomic_change_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], \ | 468 | #define __BUILD_KVM_RW_SAVED(name, type, _reg, sel) \ |
484 | change, val) | 469 | static inline __KVMT##type kvm_read_sw_gc0_##name(struct mips_coproc *cop0) \ |
485 | 470 | { \ | |
486 | #define kvm_set_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] |= (val)) | 471 | return cop0->reg[(_reg)][(sel)]; \ |
487 | #define kvm_clear_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] &= ~(val)) | 472 | } \ |
488 | #define kvm_change_c0_guest_ebase(cop0, change, val) \ | 473 | static inline void kvm_write_sw_gc0_##name(struct mips_coproc *cop0, \ |
474 | __KVMT##type val) \ | ||
475 | { \ | ||
476 | cop0->reg[(_reg)][(sel)] = val; \ | ||
477 | } | ||
478 | |||
479 | /* Generate saved context bitwise modifiers */ | ||
480 | #define __BUILD_KVM_SET_SAVED(name, type, _reg, sel) \ | ||
481 | static inline void kvm_set_sw_gc0_##name(struct mips_coproc *cop0, \ | ||
482 | __KVMT##type val) \ | ||
483 | { \ | ||
484 | cop0->reg[(_reg)][(sel)] |= val; \ | ||
485 | } \ | ||
486 | static inline void kvm_clear_sw_gc0_##name(struct mips_coproc *cop0, \ | ||
487 | __KVMT##type val) \ | ||
488 | { \ | ||
489 | cop0->reg[(_reg)][(sel)] &= ~val; \ | ||
490 | } \ | ||
491 | static inline void kvm_change_sw_gc0_##name(struct mips_coproc *cop0, \ | ||
492 | __KVMT##type mask, \ | ||
493 | __KVMT##type val) \ | ||
494 | { \ | ||
495 | unsigned long _mask = mask; \ | ||
496 | cop0->reg[(_reg)][(sel)] &= ~_mask; \ | ||
497 | cop0->reg[(_reg)][(sel)] |= val & _mask; \ | ||
498 | } | ||
499 | |||
500 | /* Generate saved context atomic bitwise modifiers */ | ||
501 | #define __BUILD_KVM_ATOMIC_SAVED(name, type, _reg, sel) \ | ||
502 | static inline void kvm_set_sw_gc0_##name(struct mips_coproc *cop0, \ | ||
503 | __KVMT##type val) \ | ||
504 | { \ | ||
505 | _kvm_atomic_set_c0_guest_reg(&cop0->reg[(_reg)][(sel)], val); \ | ||
506 | } \ | ||
507 | static inline void kvm_clear_sw_gc0_##name(struct mips_coproc *cop0, \ | ||
508 | __KVMT##type val) \ | ||
509 | { \ | ||
510 | _kvm_atomic_clear_c0_guest_reg(&cop0->reg[(_reg)][(sel)], val); \ | ||
511 | } \ | ||
512 | static inline void kvm_change_sw_gc0_##name(struct mips_coproc *cop0, \ | ||
513 | __KVMT##type mask, \ | ||
514 | __KVMT##type val) \ | ||
515 | { \ | ||
516 | _kvm_atomic_change_c0_guest_reg(&cop0->reg[(_reg)][(sel)], mask, \ | ||
517 | val); \ | ||
518 | } | ||
519 | |||
520 | /* | ||
521 | * __BUILD_KVM_$ops_VZ(): kvm_$op_vz_gc0_$reg() | ||
522 | * These operate on the VZ guest C0 context in hardware. | ||
523 | */ | ||
524 | |||
525 | /* Generate VZ guest context simple accessors */ | ||
526 | #define __BUILD_KVM_RW_VZ(name, type, _reg, sel) \ | ||
527 | static inline __KVMT##type kvm_read_vz_gc0_##name(struct mips_coproc *cop0) \ | ||
528 | { \ | ||
529 | return read_gc0_##name(); \ | ||
530 | } \ | ||
531 | static inline void kvm_write_vz_gc0_##name(struct mips_coproc *cop0, \ | ||
532 | __KVMT##type val) \ | ||
533 | { \ | ||
534 | write_gc0_##name(val); \ | ||
535 | } | ||
536 | |||
537 | /* Generate VZ guest context bitwise modifiers */ | ||
538 | #define __BUILD_KVM_SET_VZ(name, type, _reg, sel) \ | ||
539 | static inline void kvm_set_vz_gc0_##name(struct mips_coproc *cop0, \ | ||
540 | __KVMT##type val) \ | ||
541 | { \ | ||
542 | set_gc0_##name(val); \ | ||
543 | } \ | ||
544 | static inline void kvm_clear_vz_gc0_##name(struct mips_coproc *cop0, \ | ||
545 | __KVMT##type val) \ | ||
546 | { \ | ||
547 | clear_gc0_##name(val); \ | ||
548 | } \ | ||
549 | static inline void kvm_change_vz_gc0_##name(struct mips_coproc *cop0, \ | ||
550 | __KVMT##type mask, \ | ||
551 | __KVMT##type val) \ | ||
552 | { \ | ||
553 | change_gc0_##name(mask, val); \ | ||
554 | } | ||
555 | |||
556 | /* Generate VZ guest context save/restore to/from saved context */ | ||
557 | #define __BUILD_KVM_SAVE_VZ(name, _reg, sel) \ | ||
558 | static inline void kvm_restore_gc0_##name(struct mips_coproc *cop0) \ | ||
559 | { \ | ||
560 | write_gc0_##name(cop0->reg[(_reg)][(sel)]); \ | ||
561 | } \ | ||
562 | static inline void kvm_save_gc0_##name(struct mips_coproc *cop0) \ | ||
563 | { \ | ||
564 | cop0->reg[(_reg)][(sel)] = read_gc0_##name(); \ | ||
565 | } | ||
566 | |||
567 | /* | ||
568 | * __BUILD_KVM_$ops_WRAP(): kvm_$op_$name1() -> kvm_$op_$name2() | ||
569 | * These wrap a set of operations to provide them with a different name. | ||
570 | */ | ||
571 | |||
572 | /* Generate simple accessor wrapper */ | ||
573 | #define __BUILD_KVM_RW_WRAP(name1, name2, type) \ | ||
574 | static inline __KVMT##type kvm_read_##name1(struct mips_coproc *cop0) \ | ||
575 | { \ | ||
576 | return kvm_read_##name2(cop0); \ | ||
577 | } \ | ||
578 | static inline void kvm_write_##name1(struct mips_coproc *cop0, \ | ||
579 | __KVMT##type val) \ | ||
580 | { \ | ||
581 | kvm_write_##name2(cop0, val); \ | ||
582 | } | ||
583 | |||
584 | /* Generate bitwise modifier wrapper */ | ||
585 | #define __BUILD_KVM_SET_WRAP(name1, name2, type) \ | ||
586 | static inline void kvm_set_##name1(struct mips_coproc *cop0, \ | ||
587 | __KVMT##type val) \ | ||
489 | { \ | 588 | { \ |
490 | kvm_clear_c0_guest_ebase(cop0, change); \ | 589 | kvm_set_##name2(cop0, val); \ |
491 | kvm_set_c0_guest_ebase(cop0, ((val) & (change))); \ | 590 | } \ |
591 | static inline void kvm_clear_##name1(struct mips_coproc *cop0, \ | ||
592 | __KVMT##type val) \ | ||
593 | { \ | ||
594 | kvm_clear_##name2(cop0, val); \ | ||
595 | } \ | ||
596 | static inline void kvm_change_##name1(struct mips_coproc *cop0, \ | ||
597 | __KVMT##type mask, \ | ||
598 | __KVMT##type val) \ | ||
599 | { \ | ||
600 | kvm_change_##name2(cop0, mask, val); \ | ||
492 | } | 601 | } |
493 | 602 | ||
603 | /* | ||
604 | * __BUILD_KVM_$ops_SW(): kvm_$op_c0_guest_$reg() -> kvm_$op_sw_gc0_$reg() | ||
605 | * These generate accessors operating on the saved context in RAM, and wrap them | ||
606 | * with the common guest C0 accessors (for use by common emulation code). | ||
607 | */ | ||
608 | |||
609 | #define __BUILD_KVM_RW_SW(name, type, _reg, sel) \ | ||
610 | __BUILD_KVM_RW_SAVED(name, type, _reg, sel) \ | ||
611 | __BUILD_KVM_RW_WRAP(c0_guest_##name, sw_gc0_##name, type) | ||
612 | |||
613 | #define __BUILD_KVM_SET_SW(name, type, _reg, sel) \ | ||
614 | __BUILD_KVM_SET_SAVED(name, type, _reg, sel) \ | ||
615 | __BUILD_KVM_SET_WRAP(c0_guest_##name, sw_gc0_##name, type) | ||
616 | |||
617 | #define __BUILD_KVM_ATOMIC_SW(name, type, _reg, sel) \ | ||
618 | __BUILD_KVM_ATOMIC_SAVED(name, type, _reg, sel) \ | ||
619 | __BUILD_KVM_SET_WRAP(c0_guest_##name, sw_gc0_##name, type) | ||
620 | |||
621 | #ifndef CONFIG_KVM_MIPS_VZ | ||
622 | |||
623 | /* | ||
624 | * T&E (trap & emulate software based virtualisation) | ||
625 | * We generate the common accessors operating exclusively on the saved context | ||
626 | * in RAM. | ||
627 | */ | ||
628 | |||
629 | #define __BUILD_KVM_RW_HW __BUILD_KVM_RW_SW | ||
630 | #define __BUILD_KVM_SET_HW __BUILD_KVM_SET_SW | ||
631 | #define __BUILD_KVM_ATOMIC_HW __BUILD_KVM_ATOMIC_SW | ||
632 | |||
633 | #else | ||
634 | |||
635 | /* | ||
636 | * VZ (hardware assisted virtualisation) | ||
637 | * These macros use the active guest state in VZ mode (hardware registers), | ||
638 | */ | ||
639 | |||
640 | /* | ||
641 | * __BUILD_KVM_$ops_HW(): kvm_$op_c0_guest_$reg() -> kvm_$op_vz_gc0_$reg() | ||
642 | * These generate accessors operating on the VZ guest context in hardware, and | ||
643 | * wrap them with the common guest C0 accessors (for use by common emulation | ||
644 | * code). | ||
645 | * | ||
646 | * Accessors operating on the saved context in RAM are also generated to allow | ||
647 | * convenient explicit saving and restoring of the state. | ||
648 | */ | ||
649 | |||
650 | #define __BUILD_KVM_RW_HW(name, type, _reg, sel) \ | ||
651 | __BUILD_KVM_RW_SAVED(name, type, _reg, sel) \ | ||
652 | __BUILD_KVM_RW_VZ(name, type, _reg, sel) \ | ||
653 | __BUILD_KVM_RW_WRAP(c0_guest_##name, vz_gc0_##name, type) \ | ||
654 | __BUILD_KVM_SAVE_VZ(name, _reg, sel) | ||
655 | |||
656 | #define __BUILD_KVM_SET_HW(name, type, _reg, sel) \ | ||
657 | __BUILD_KVM_SET_SAVED(name, type, _reg, sel) \ | ||
658 | __BUILD_KVM_SET_VZ(name, type, _reg, sel) \ | ||
659 | __BUILD_KVM_SET_WRAP(c0_guest_##name, vz_gc0_##name, type) | ||
660 | |||
661 | /* | ||
662 | * We can't do atomic modifications of COP0 state if hardware can modify it. | ||
663 | * Races must be handled explicitly. | ||
664 | */ | ||
665 | #define __BUILD_KVM_ATOMIC_HW __BUILD_KVM_SET_HW | ||
666 | |||
667 | #endif | ||
668 | |||
669 | /* | ||
670 | * Define accessors for CP0 registers that are accessible to the guest. These | ||
671 | * are primarily used by common emulation code, which may need to access the | ||
672 | * registers differently depending on the implementation. | ||
673 | * | ||
674 | * fns_hw/sw name type reg num select | ||
675 | */ | ||
676 | __BUILD_KVM_RW_HW(index, 32, MIPS_CP0_TLB_INDEX, 0) | ||
677 | __BUILD_KVM_RW_HW(entrylo0, l, MIPS_CP0_TLB_LO0, 0) | ||
678 | __BUILD_KVM_RW_HW(entrylo1, l, MIPS_CP0_TLB_LO1, 0) | ||
679 | __BUILD_KVM_RW_HW(context, l, MIPS_CP0_TLB_CONTEXT, 0) | ||
680 | __BUILD_KVM_RW_HW(contextconfig, 32, MIPS_CP0_TLB_CONTEXT, 1) | ||
681 | __BUILD_KVM_RW_HW(userlocal, l, MIPS_CP0_TLB_CONTEXT, 2) | ||
682 | __BUILD_KVM_RW_HW(xcontextconfig, l, MIPS_CP0_TLB_CONTEXT, 3) | ||
683 | __BUILD_KVM_RW_HW(pagemask, l, MIPS_CP0_TLB_PG_MASK, 0) | ||
684 | __BUILD_KVM_RW_HW(pagegrain, 32, MIPS_CP0_TLB_PG_MASK, 1) | ||
685 | __BUILD_KVM_RW_HW(segctl0, l, MIPS_CP0_TLB_PG_MASK, 2) | ||
686 | __BUILD_KVM_RW_HW(segctl1, l, MIPS_CP0_TLB_PG_MASK, 3) | ||
687 | __BUILD_KVM_RW_HW(segctl2, l, MIPS_CP0_TLB_PG_MASK, 4) | ||
688 | __BUILD_KVM_RW_HW(pwbase, l, MIPS_CP0_TLB_PG_MASK, 5) | ||
689 | __BUILD_KVM_RW_HW(pwfield, l, MIPS_CP0_TLB_PG_MASK, 6) | ||
690 | __BUILD_KVM_RW_HW(pwsize, l, MIPS_CP0_TLB_PG_MASK, 7) | ||
691 | __BUILD_KVM_RW_HW(wired, 32, MIPS_CP0_TLB_WIRED, 0) | ||
692 | __BUILD_KVM_RW_HW(pwctl, 32, MIPS_CP0_TLB_WIRED, 6) | ||
693 | __BUILD_KVM_RW_HW(hwrena, 32, MIPS_CP0_HWRENA, 0) | ||
694 | __BUILD_KVM_RW_HW(badvaddr, l, MIPS_CP0_BAD_VADDR, 0) | ||
695 | __BUILD_KVM_RW_HW(badinstr, 32, MIPS_CP0_BAD_VADDR, 1) | ||
696 | __BUILD_KVM_RW_HW(badinstrp, 32, MIPS_CP0_BAD_VADDR, 2) | ||
697 | __BUILD_KVM_RW_SW(count, 32, MIPS_CP0_COUNT, 0) | ||
698 | __BUILD_KVM_RW_HW(entryhi, l, MIPS_CP0_TLB_HI, 0) | ||
699 | __BUILD_KVM_RW_HW(compare, 32, MIPS_CP0_COMPARE, 0) | ||
700 | __BUILD_KVM_RW_HW(status, 32, MIPS_CP0_STATUS, 0) | ||
701 | __BUILD_KVM_RW_HW(intctl, 32, MIPS_CP0_STATUS, 1) | ||
702 | __BUILD_KVM_RW_HW(cause, 32, MIPS_CP0_CAUSE, 0) | ||
703 | __BUILD_KVM_RW_HW(epc, l, MIPS_CP0_EXC_PC, 0) | ||
704 | __BUILD_KVM_RW_SW(prid, 32, MIPS_CP0_PRID, 0) | ||
705 | __BUILD_KVM_RW_HW(ebase, l, MIPS_CP0_PRID, 1) | ||
706 | __BUILD_KVM_RW_HW(config, 32, MIPS_CP0_CONFIG, 0) | ||
707 | __BUILD_KVM_RW_HW(config1, 32, MIPS_CP0_CONFIG, 1) | ||
708 | __BUILD_KVM_RW_HW(config2, 32, MIPS_CP0_CONFIG, 2) | ||
709 | __BUILD_KVM_RW_HW(config3, 32, MIPS_CP0_CONFIG, 3) | ||
710 | __BUILD_KVM_RW_HW(config4, 32, MIPS_CP0_CONFIG, 4) | ||
711 | __BUILD_KVM_RW_HW(config5, 32, MIPS_CP0_CONFIG, 5) | ||
712 | __BUILD_KVM_RW_HW(config6, 32, MIPS_CP0_CONFIG, 6) | ||
713 | __BUILD_KVM_RW_HW(config7, 32, MIPS_CP0_CONFIG, 7) | ||
714 | __BUILD_KVM_RW_SW(maari, l, MIPS_CP0_LLADDR, 2) | ||
715 | __BUILD_KVM_RW_HW(xcontext, l, MIPS_CP0_TLB_XCONTEXT, 0) | ||
716 | __BUILD_KVM_RW_HW(errorepc, l, MIPS_CP0_ERROR_PC, 0) | ||
717 | __BUILD_KVM_RW_HW(kscratch1, l, MIPS_CP0_DESAVE, 2) | ||
718 | __BUILD_KVM_RW_HW(kscratch2, l, MIPS_CP0_DESAVE, 3) | ||
719 | __BUILD_KVM_RW_HW(kscratch3, l, MIPS_CP0_DESAVE, 4) | ||
720 | __BUILD_KVM_RW_HW(kscratch4, l, MIPS_CP0_DESAVE, 5) | ||
721 | __BUILD_KVM_RW_HW(kscratch5, l, MIPS_CP0_DESAVE, 6) | ||
722 | __BUILD_KVM_RW_HW(kscratch6, l, MIPS_CP0_DESAVE, 7) | ||
723 | |||
724 | /* Bitwise operations (on HW state) */ | ||
725 | __BUILD_KVM_SET_HW(status, 32, MIPS_CP0_STATUS, 0) | ||
726 | /* Cause can be modified asynchronously from hardirq hrtimer callback */ | ||
727 | __BUILD_KVM_ATOMIC_HW(cause, 32, MIPS_CP0_CAUSE, 0) | ||
728 | __BUILD_KVM_SET_HW(ebase, l, MIPS_CP0_PRID, 1) | ||
729 | |||
730 | /* Bitwise operations (on saved state) */ | ||
731 | __BUILD_KVM_SET_SAVED(config, 32, MIPS_CP0_CONFIG, 0) | ||
732 | __BUILD_KVM_SET_SAVED(config1, 32, MIPS_CP0_CONFIG, 1) | ||
733 | __BUILD_KVM_SET_SAVED(config2, 32, MIPS_CP0_CONFIG, 2) | ||
734 | __BUILD_KVM_SET_SAVED(config3, 32, MIPS_CP0_CONFIG, 3) | ||
735 | __BUILD_KVM_SET_SAVED(config4, 32, MIPS_CP0_CONFIG, 4) | ||
736 | __BUILD_KVM_SET_SAVED(config5, 32, MIPS_CP0_CONFIG, 5) | ||
737 | |||
494 | /* Helpers */ | 738 | /* Helpers */ |
495 | 739 | ||
496 | static inline bool kvm_mips_guest_can_have_fpu(struct kvm_vcpu_arch *vcpu) | 740 | static inline bool kvm_mips_guest_can_have_fpu(struct kvm_vcpu_arch *vcpu) |
@@ -531,6 +775,10 @@ struct kvm_mips_callbacks { | |||
531 | int (*handle_msa_fpe)(struct kvm_vcpu *vcpu); | 775 | int (*handle_msa_fpe)(struct kvm_vcpu *vcpu); |
532 | int (*handle_fpe)(struct kvm_vcpu *vcpu); | 776 | int (*handle_fpe)(struct kvm_vcpu *vcpu); |
533 | int (*handle_msa_disabled)(struct kvm_vcpu *vcpu); | 777 | int (*handle_msa_disabled)(struct kvm_vcpu *vcpu); |
778 | int (*handle_guest_exit)(struct kvm_vcpu *vcpu); | ||
779 | int (*hardware_enable)(void); | ||
780 | void (*hardware_disable)(void); | ||
781 | int (*check_extension)(struct kvm *kvm, long ext); | ||
534 | int (*vcpu_init)(struct kvm_vcpu *vcpu); | 782 | int (*vcpu_init)(struct kvm_vcpu *vcpu); |
535 | void (*vcpu_uninit)(struct kvm_vcpu *vcpu); | 783 | void (*vcpu_uninit)(struct kvm_vcpu *vcpu); |
536 | int (*vcpu_setup)(struct kvm_vcpu *vcpu); | 784 | int (*vcpu_setup)(struct kvm_vcpu *vcpu); |
@@ -599,6 +847,10 @@ u32 kvm_get_user_asid(struct kvm_vcpu *vcpu); | |||
599 | 847 | ||
600 | u32 kvm_get_commpage_asid (struct kvm_vcpu *vcpu); | 848 | u32 kvm_get_commpage_asid (struct kvm_vcpu *vcpu); |
601 | 849 | ||
850 | #ifdef CONFIG_KVM_MIPS_VZ | ||
851 | int kvm_mips_handle_vz_root_tlb_fault(unsigned long badvaddr, | ||
852 | struct kvm_vcpu *vcpu, bool write_fault); | ||
853 | #endif | ||
602 | extern int kvm_mips_handle_kseg0_tlb_fault(unsigned long badbaddr, | 854 | extern int kvm_mips_handle_kseg0_tlb_fault(unsigned long badbaddr, |
603 | struct kvm_vcpu *vcpu, | 855 | struct kvm_vcpu *vcpu, |
604 | bool write_fault); | 856 | bool write_fault); |
@@ -625,6 +877,18 @@ extern int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long entryhi, | |||
625 | extern int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, | 877 | extern int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, |
626 | unsigned long entryhi); | 878 | unsigned long entryhi); |
627 | 879 | ||
880 | #ifdef CONFIG_KVM_MIPS_VZ | ||
881 | int kvm_vz_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long entryhi); | ||
882 | int kvm_vz_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long gva, | ||
883 | unsigned long *gpa); | ||
884 | void kvm_vz_local_flush_roottlb_all_guests(void); | ||
885 | void kvm_vz_local_flush_guesttlb_all(void); | ||
886 | void kvm_vz_save_guesttlb(struct kvm_mips_tlb *buf, unsigned int index, | ||
887 | unsigned int count); | ||
888 | void kvm_vz_load_guesttlb(const struct kvm_mips_tlb *buf, unsigned int index, | ||
889 | unsigned int count); | ||
890 | #endif | ||
891 | |||
628 | void kvm_mips_suspend_mm(int cpu); | 892 | void kvm_mips_suspend_mm(int cpu); |
629 | void kvm_mips_resume_mm(int cpu); | 893 | void kvm_mips_resume_mm(int cpu); |
630 | 894 | ||
@@ -795,7 +1059,7 @@ extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, | |||
795 | u32 kvm_mips_read_count(struct kvm_vcpu *vcpu); | 1059 | u32 kvm_mips_read_count(struct kvm_vcpu *vcpu); |
796 | void kvm_mips_write_count(struct kvm_vcpu *vcpu, u32 count); | 1060 | void kvm_mips_write_count(struct kvm_vcpu *vcpu, u32 count); |
797 | void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack); | 1061 | void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack); |
798 | void kvm_mips_init_count(struct kvm_vcpu *vcpu); | 1062 | void kvm_mips_init_count(struct kvm_vcpu *vcpu, unsigned long count_hz); |
799 | int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl); | 1063 | int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl); |
800 | int kvm_mips_set_count_resume(struct kvm_vcpu *vcpu, s64 count_resume); | 1064 | int kvm_mips_set_count_resume(struct kvm_vcpu *vcpu, s64 count_resume); |
801 | int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz); | 1065 | int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz); |
@@ -803,6 +1067,20 @@ void kvm_mips_count_enable_cause(struct kvm_vcpu *vcpu); | |||
803 | void kvm_mips_count_disable_cause(struct kvm_vcpu *vcpu); | 1067 | void kvm_mips_count_disable_cause(struct kvm_vcpu *vcpu); |
804 | enum hrtimer_restart kvm_mips_count_timeout(struct kvm_vcpu *vcpu); | 1068 | enum hrtimer_restart kvm_mips_count_timeout(struct kvm_vcpu *vcpu); |
805 | 1069 | ||
1070 | /* fairly internal functions requiring some care to use */ | ||
1071 | int kvm_mips_count_disabled(struct kvm_vcpu *vcpu); | ||
1072 | ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, u32 *count); | ||
1073 | int kvm_mips_restore_hrtimer(struct kvm_vcpu *vcpu, ktime_t before, | ||
1074 | u32 count, int min_drift); | ||
1075 | |||
1076 | #ifdef CONFIG_KVM_MIPS_VZ | ||
1077 | void kvm_vz_acquire_htimer(struct kvm_vcpu *vcpu); | ||
1078 | void kvm_vz_lose_htimer(struct kvm_vcpu *vcpu); | ||
1079 | #else | ||
1080 | static inline void kvm_vz_acquire_htimer(struct kvm_vcpu *vcpu) {} | ||
1081 | static inline void kvm_vz_lose_htimer(struct kvm_vcpu *vcpu) {} | ||
1082 | #endif | ||
1083 | |||
806 | enum emulation_result kvm_mips_check_privilege(u32 cause, | 1084 | enum emulation_result kvm_mips_check_privilege(u32 cause, |
807 | u32 *opc, | 1085 | u32 *opc, |
808 | struct kvm_run *run, | 1086 | struct kvm_run *run, |
@@ -827,11 +1105,20 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, | |||
827 | struct kvm_run *run, | 1105 | struct kvm_run *run, |
828 | struct kvm_vcpu *vcpu); | 1106 | struct kvm_vcpu *vcpu); |
829 | 1107 | ||
1108 | /* COP0 */ | ||
1109 | enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu); | ||
1110 | |||
830 | unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu); | 1111 | unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu); |
831 | unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu); | 1112 | unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu); |
832 | unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu); | 1113 | unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu); |
833 | unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu); | 1114 | unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu); |
834 | 1115 | ||
1116 | /* Hypercalls (hypcall.c) */ | ||
1117 | |||
1118 | enum emulation_result kvm_mips_emul_hypcall(struct kvm_vcpu *vcpu, | ||
1119 | union mips_instruction inst); | ||
1120 | int kvm_mips_handle_hypcall(struct kvm_vcpu *vcpu); | ||
1121 | |||
835 | /* Dynamic binary translation */ | 1122 | /* Dynamic binary translation */ |
836 | extern int kvm_mips_trans_cache_index(union mips_instruction inst, | 1123 | extern int kvm_mips_trans_cache_index(union mips_instruction inst, |
837 | u32 *opc, struct kvm_vcpu *vcpu); | 1124 | u32 *opc, struct kvm_vcpu *vcpu); |
@@ -846,7 +1133,6 @@ extern int kvm_mips_trans_mtc0(union mips_instruction inst, u32 *opc, | |||
846 | extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu); | 1133 | extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu); |
847 | extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm); | 1134 | extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm); |
848 | 1135 | ||
849 | static inline void kvm_arch_hardware_disable(void) {} | ||
850 | static inline void kvm_arch_hardware_unsetup(void) {} | 1136 | static inline void kvm_arch_hardware_unsetup(void) {} |
851 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} | 1137 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} |
852 | static inline void kvm_arch_free_memslot(struct kvm *kvm, | 1138 | static inline void kvm_arch_free_memslot(struct kvm *kvm, |
diff --git a/arch/mips/include/asm/maar.h b/arch/mips/include/asm/maar.h index 21d9607c80d7..e10f78befbd9 100644 --- a/arch/mips/include/asm/maar.h +++ b/arch/mips/include/asm/maar.h | |||
@@ -36,7 +36,7 @@ unsigned platform_maar_init(unsigned num_pairs); | |||
36 | * @upper: The highest address that the MAAR pair will affect. Must be | 36 | * @upper: The highest address that the MAAR pair will affect. Must be |
37 | * aligned to one byte before a 2^16 byte boundary. | 37 | * aligned to one byte before a 2^16 byte boundary. |
38 | * @attrs: The accessibility attributes to program, eg. MIPS_MAAR_S. The | 38 | * @attrs: The accessibility attributes to program, eg. MIPS_MAAR_S. The |
39 | * MIPS_MAAR_V attribute will automatically be set. | 39 | * MIPS_MAAR_VL attribute will automatically be set. |
40 | * | 40 | * |
41 | * Program the pair of MAAR registers specified by idx to apply the attributes | 41 | * Program the pair of MAAR registers specified by idx to apply the attributes |
42 | * specified by attrs to the range of addresses from lower to higher. | 42 | * specified by attrs to the range of addresses from lower to higher. |
@@ -49,10 +49,10 @@ static inline void write_maar_pair(unsigned idx, phys_addr_t lower, | |||
49 | BUG_ON(((upper & 0xffff) != 0xffff) | 49 | BUG_ON(((upper & 0xffff) != 0xffff) |
50 | || ((upper & ~0xffffull) & ~(MIPS_MAAR_ADDR << 4))); | 50 | || ((upper & ~0xffffull) & ~(MIPS_MAAR_ADDR << 4))); |
51 | 51 | ||
52 | /* Automatically set MIPS_MAAR_V */ | 52 | /* Automatically set MIPS_MAAR_VL */ |
53 | attrs |= MIPS_MAAR_V; | 53 | attrs |= MIPS_MAAR_VL; |
54 | 54 | ||
55 | /* Write the upper address & attributes (only MIPS_MAAR_V matters) */ | 55 | /* Write the upper address & attributes (only MIPS_MAAR_VL matters) */ |
56 | write_c0_maari(idx << 1); | 56 | write_c0_maari(idx << 1); |
57 | back_to_back_c0_hazard(); | 57 | back_to_back_c0_hazard(); |
58 | write_c0_maar(((upper >> 4) & MIPS_MAAR_ADDR) | attrs); | 58 | write_c0_maar(((upper >> 4) & MIPS_MAAR_ADDR) | attrs); |
@@ -81,7 +81,7 @@ extern void maar_init(void); | |||
81 | * @upper: The highest address that the MAAR pair will affect. Must be | 81 | * @upper: The highest address that the MAAR pair will affect. Must be |
82 | * aligned to one byte before a 2^16 byte boundary. | 82 | * aligned to one byte before a 2^16 byte boundary. |
83 | * @attrs: The accessibility attributes to program, eg. MIPS_MAAR_S. The | 83 | * @attrs: The accessibility attributes to program, eg. MIPS_MAAR_S. The |
84 | * MIPS_MAAR_V attribute will automatically be set. | 84 | * MIPS_MAAR_VL attribute will automatically be set. |
85 | * | 85 | * |
86 | * Describes the configuration of a pair of Memory Accessibility Attribute | 86 | * Describes the configuration of a pair of Memory Accessibility Attribute |
87 | * Registers - applying attributes from attrs to the range of physical | 87 | * Registers - applying attributes from attrs to the range of physical |
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index f8d1d2f1d80d..6875b69f59f7 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h | |||
@@ -34,8 +34,10 @@ | |||
34 | */ | 34 | */ |
35 | #ifdef __ASSEMBLY__ | 35 | #ifdef __ASSEMBLY__ |
36 | #define _ULCAST_ | 36 | #define _ULCAST_ |
37 | #define _U64CAST_ | ||
37 | #else | 38 | #else |
38 | #define _ULCAST_ (unsigned long) | 39 | #define _ULCAST_ (unsigned long) |
40 | #define _U64CAST_ (u64) | ||
39 | #endif | 41 | #endif |
40 | 42 | ||
41 | /* | 43 | /* |
@@ -217,8 +219,10 @@ | |||
217 | /* | 219 | /* |
218 | * Wired register bits | 220 | * Wired register bits |
219 | */ | 221 | */ |
220 | #define MIPSR6_WIRED_LIMIT (_ULCAST_(0xffff) << 16) | 222 | #define MIPSR6_WIRED_LIMIT_SHIFT 16 |
221 | #define MIPSR6_WIRED_WIRED (_ULCAST_(0xffff) << 0) | 223 | #define MIPSR6_WIRED_LIMIT (_ULCAST_(0xffff) << MIPSR6_WIRED_LIMIT_SHIFT) |
224 | #define MIPSR6_WIRED_WIRED_SHIFT 0 | ||
225 | #define MIPSR6_WIRED_WIRED (_ULCAST_(0xffff) << MIPSR6_WIRED_WIRED_SHIFT) | ||
222 | 226 | ||
223 | /* | 227 | /* |
224 | * Values used for computation of new tlb entries | 228 | * Values used for computation of new tlb entries |
@@ -645,6 +649,7 @@ | |||
645 | #define MIPS_CONF5_LLB (_ULCAST_(1) << 4) | 649 | #define MIPS_CONF5_LLB (_ULCAST_(1) << 4) |
646 | #define MIPS_CONF5_MVH (_ULCAST_(1) << 5) | 650 | #define MIPS_CONF5_MVH (_ULCAST_(1) << 5) |
647 | #define MIPS_CONF5_VP (_ULCAST_(1) << 7) | 651 | #define MIPS_CONF5_VP (_ULCAST_(1) << 7) |
652 | #define MIPS_CONF5_SBRI (_ULCAST_(1) << 6) | ||
648 | #define MIPS_CONF5_FRE (_ULCAST_(1) << 8) | 653 | #define MIPS_CONF5_FRE (_ULCAST_(1) << 8) |
649 | #define MIPS_CONF5_UFE (_ULCAST_(1) << 9) | 654 | #define MIPS_CONF5_UFE (_ULCAST_(1) << 9) |
650 | #define MIPS_CONF5_MSAEN (_ULCAST_(1) << 27) | 655 | #define MIPS_CONF5_MSAEN (_ULCAST_(1) << 27) |
@@ -719,10 +724,14 @@ | |||
719 | #define XLR_PERFCTRL_ALLTHREADS (_ULCAST_(1) << 13) | 724 | #define XLR_PERFCTRL_ALLTHREADS (_ULCAST_(1) << 13) |
720 | 725 | ||
721 | /* MAAR bit definitions */ | 726 | /* MAAR bit definitions */ |
727 | #define MIPS_MAAR_VH (_U64CAST_(1) << 63) | ||
722 | #define MIPS_MAAR_ADDR ((BIT_ULL(BITS_PER_LONG - 12) - 1) << 12) | 728 | #define MIPS_MAAR_ADDR ((BIT_ULL(BITS_PER_LONG - 12) - 1) << 12) |
723 | #define MIPS_MAAR_ADDR_SHIFT 12 | 729 | #define MIPS_MAAR_ADDR_SHIFT 12 |
724 | #define MIPS_MAAR_S (_ULCAST_(1) << 1) | 730 | #define MIPS_MAAR_S (_ULCAST_(1) << 1) |
725 | #define MIPS_MAAR_V (_ULCAST_(1) << 0) | 731 | #define MIPS_MAAR_VL (_ULCAST_(1) << 0) |
732 | |||
733 | /* MAARI bit definitions */ | ||
734 | #define MIPS_MAARI_INDEX (_ULCAST_(0x3f) << 0) | ||
726 | 735 | ||
727 | /* EBase bit definitions */ | 736 | /* EBase bit definitions */ |
728 | #define MIPS_EBASE_CPUNUM_SHIFT 0 | 737 | #define MIPS_EBASE_CPUNUM_SHIFT 0 |
@@ -736,6 +745,10 @@ | |||
736 | #define MIPS_CMGCRB_BASE 11 | 745 | #define MIPS_CMGCRB_BASE 11 |
737 | #define MIPS_CMGCRF_BASE (~_ULCAST_((1 << MIPS_CMGCRB_BASE) - 1)) | 746 | #define MIPS_CMGCRF_BASE (~_ULCAST_((1 << MIPS_CMGCRB_BASE) - 1)) |
738 | 747 | ||
748 | /* LLAddr bit definitions */ | ||
749 | #define MIPS_LLADDR_LLB_SHIFT 0 | ||
750 | #define MIPS_LLADDR_LLB (_ULCAST_(1) << MIPS_LLADDR_LLB_SHIFT) | ||
751 | |||
739 | /* | 752 | /* |
740 | * Bits in the MIPS32 Memory Segmentation registers. | 753 | * Bits in the MIPS32 Memory Segmentation registers. |
741 | */ | 754 | */ |
@@ -961,6 +974,22 @@ | |||
961 | /* Flush FTLB */ | 974 | /* Flush FTLB */ |
962 | #define LOONGSON_DIAG_FTLB (_ULCAST_(1) << 13) | 975 | #define LOONGSON_DIAG_FTLB (_ULCAST_(1) << 13) |
963 | 976 | ||
977 | /* CvmCtl register field definitions */ | ||
978 | #define CVMCTL_IPPCI_SHIFT 7 | ||
979 | #define CVMCTL_IPPCI (_U64CAST_(0x7) << CVMCTL_IPPCI_SHIFT) | ||
980 | #define CVMCTL_IPTI_SHIFT 4 | ||
981 | #define CVMCTL_IPTI (_U64CAST_(0x7) << CVMCTL_IPTI_SHIFT) | ||
982 | |||
983 | /* CvmMemCtl2 register field definitions */ | ||
984 | #define CVMMEMCTL2_INHIBITTS (_U64CAST_(1) << 17) | ||
985 | |||
986 | /* CvmVMConfig register field definitions */ | ||
987 | #define CVMVMCONF_DGHT (_U64CAST_(1) << 60) | ||
988 | #define CVMVMCONF_MMUSIZEM1_S 12 | ||
989 | #define CVMVMCONF_MMUSIZEM1 (_U64CAST_(0xff) << CVMVMCONF_MMUSIZEM1_S) | ||
990 | #define CVMVMCONF_RMMUSIZEM1_S 0 | ||
991 | #define CVMVMCONF_RMMUSIZEM1 (_U64CAST_(0xff) << CVMVMCONF_RMMUSIZEM1_S) | ||
992 | |||
964 | /* | 993 | /* |
965 | * Coprocessor 1 (FPU) register names | 994 | * Coprocessor 1 (FPU) register names |
966 | */ | 995 | */ |
@@ -1720,6 +1749,13 @@ do { \ | |||
1720 | 1749 | ||
1721 | #define read_c0_cvmmemctl() __read_64bit_c0_register($11, 7) | 1750 | #define read_c0_cvmmemctl() __read_64bit_c0_register($11, 7) |
1722 | #define write_c0_cvmmemctl(val) __write_64bit_c0_register($11, 7, val) | 1751 | #define write_c0_cvmmemctl(val) __write_64bit_c0_register($11, 7, val) |
1752 | |||
1753 | #define read_c0_cvmmemctl2() __read_64bit_c0_register($16, 6) | ||
1754 | #define write_c0_cvmmemctl2(val) __write_64bit_c0_register($16, 6, val) | ||
1755 | |||
1756 | #define read_c0_cvmvmconfig() __read_64bit_c0_register($16, 7) | ||
1757 | #define write_c0_cvmvmconfig(val) __write_64bit_c0_register($16, 7, val) | ||
1758 | |||
1723 | /* | 1759 | /* |
1724 | * The cacheerr registers are not standardized. On OCTEON, they are | 1760 | * The cacheerr registers are not standardized. On OCTEON, they are |
1725 | * 64 bits wide. | 1761 | * 64 bits wide. |
@@ -1989,6 +2025,8 @@ do { \ | |||
1989 | #define read_gc0_epc() __read_ulong_gc0_register(14, 0) | 2025 | #define read_gc0_epc() __read_ulong_gc0_register(14, 0) |
1990 | #define write_gc0_epc(val) __write_ulong_gc0_register(14, 0, val) | 2026 | #define write_gc0_epc(val) __write_ulong_gc0_register(14, 0, val) |
1991 | 2027 | ||
2028 | #define read_gc0_prid() __read_32bit_gc0_register(15, 0) | ||
2029 | |||
1992 | #define read_gc0_ebase() __read_32bit_gc0_register(15, 1) | 2030 | #define read_gc0_ebase() __read_32bit_gc0_register(15, 1) |
1993 | #define write_gc0_ebase(val) __write_32bit_gc0_register(15, 1, val) | 2031 | #define write_gc0_ebase(val) __write_32bit_gc0_register(15, 1, val) |
1994 | 2032 | ||
@@ -2012,6 +2050,9 @@ do { \ | |||
2012 | #define write_gc0_config6(val) __write_32bit_gc0_register(16, 6, val) | 2050 | #define write_gc0_config6(val) __write_32bit_gc0_register(16, 6, val) |
2013 | #define write_gc0_config7(val) __write_32bit_gc0_register(16, 7, val) | 2051 | #define write_gc0_config7(val) __write_32bit_gc0_register(16, 7, val) |
2014 | 2052 | ||
2053 | #define read_gc0_lladdr() __read_ulong_gc0_register(17, 0) | ||
2054 | #define write_gc0_lladdr(val) __write_ulong_gc0_register(17, 0, val) | ||
2055 | |||
2015 | #define read_gc0_watchlo0() __read_ulong_gc0_register(18, 0) | 2056 | #define read_gc0_watchlo0() __read_ulong_gc0_register(18, 0) |
2016 | #define read_gc0_watchlo1() __read_ulong_gc0_register(18, 1) | 2057 | #define read_gc0_watchlo1() __read_ulong_gc0_register(18, 1) |
2017 | #define read_gc0_watchlo2() __read_ulong_gc0_register(18, 2) | 2058 | #define read_gc0_watchlo2() __read_ulong_gc0_register(18, 2) |
@@ -2090,6 +2131,19 @@ do { \ | |||
2090 | #define write_gc0_kscratch5(val) __write_ulong_gc0_register(31, 6, val) | 2131 | #define write_gc0_kscratch5(val) __write_ulong_gc0_register(31, 6, val) |
2091 | #define write_gc0_kscratch6(val) __write_ulong_gc0_register(31, 7, val) | 2132 | #define write_gc0_kscratch6(val) __write_ulong_gc0_register(31, 7, val) |
2092 | 2133 | ||
2134 | /* Cavium OCTEON (cnMIPS) */ | ||
2135 | #define read_gc0_cvmcount() __read_ulong_gc0_register(9, 6) | ||
2136 | #define write_gc0_cvmcount(val) __write_ulong_gc0_register(9, 6, val) | ||
2137 | |||
2138 | #define read_gc0_cvmctl() __read_64bit_gc0_register(9, 7) | ||
2139 | #define write_gc0_cvmctl(val) __write_64bit_gc0_register(9, 7, val) | ||
2140 | |||
2141 | #define read_gc0_cvmmemctl() __read_64bit_gc0_register(11, 7) | ||
2142 | #define write_gc0_cvmmemctl(val) __write_64bit_gc0_register(11, 7, val) | ||
2143 | |||
2144 | #define read_gc0_cvmmemctl2() __read_64bit_gc0_register(16, 6) | ||
2145 | #define write_gc0_cvmmemctl2(val) __write_64bit_gc0_register(16, 6, val) | ||
2146 | |||
2093 | /* | 2147 | /* |
2094 | * Macros to access the floating point coprocessor control registers | 2148 | * Macros to access the floating point coprocessor control registers |
2095 | */ | 2149 | */ |
@@ -2696,9 +2750,11 @@ __BUILD_SET_C0(brcm_mode) | |||
2696 | */ | 2750 | */ |
2697 | #define __BUILD_SET_GC0(name) __BUILD_SET_COMMON(gc0_##name) | 2751 | #define __BUILD_SET_GC0(name) __BUILD_SET_COMMON(gc0_##name) |
2698 | 2752 | ||
2753 | __BUILD_SET_GC0(wired) | ||
2699 | __BUILD_SET_GC0(status) | 2754 | __BUILD_SET_GC0(status) |
2700 | __BUILD_SET_GC0(cause) | 2755 | __BUILD_SET_GC0(cause) |
2701 | __BUILD_SET_GC0(ebase) | 2756 | __BUILD_SET_GC0(ebase) |
2757 | __BUILD_SET_GC0(config1) | ||
2702 | 2758 | ||
2703 | /* | 2759 | /* |
2704 | * Return low 10 bits of ebase. | 2760 | * Return low 10 bits of ebase. |
diff --git a/arch/mips/include/asm/tlb.h b/arch/mips/include/asm/tlb.h index dd179fd8acda..939734de4359 100644 --- a/arch/mips/include/asm/tlb.h +++ b/arch/mips/include/asm/tlb.h | |||
@@ -21,9 +21,11 @@ | |||
21 | */ | 21 | */ |
22 | #define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) | 22 | #define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) |
23 | 23 | ||
24 | #define UNIQUE_ENTRYHI(idx) \ | 24 | #define _UNIQUE_ENTRYHI(base, idx) \ |
25 | ((CKSEG0 + ((idx) << (PAGE_SHIFT + 1))) | \ | 25 | (((base) + ((idx) << (PAGE_SHIFT + 1))) | \ |
26 | (cpu_has_tlbinv ? MIPS_ENTRYHI_EHINV : 0)) | 26 | (cpu_has_tlbinv ? MIPS_ENTRYHI_EHINV : 0)) |
27 | #define UNIQUE_ENTRYHI(idx) _UNIQUE_ENTRYHI(CKSEG0, idx) | ||
28 | #define UNIQUE_GUEST_ENTRYHI(idx) _UNIQUE_ENTRYHI(CKSEG1, idx) | ||
27 | 29 | ||
28 | static inline unsigned int num_wired_entries(void) | 30 | static inline unsigned int num_wired_entries(void) |
29 | { | 31 | { |
diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h index 77429d1622b3..b5e46ae872d3 100644 --- a/arch/mips/include/uapi/asm/inst.h +++ b/arch/mips/include/uapi/asm/inst.h | |||
@@ -179,7 +179,7 @@ enum cop0_coi_func { | |||
179 | tlbr_op = 0x01, tlbwi_op = 0x02, | 179 | tlbr_op = 0x01, tlbwi_op = 0x02, |
180 | tlbwr_op = 0x06, tlbp_op = 0x08, | 180 | tlbwr_op = 0x06, tlbp_op = 0x08, |
181 | rfe_op = 0x10, eret_op = 0x18, | 181 | rfe_op = 0x10, eret_op = 0x18, |
182 | wait_op = 0x20, | 182 | wait_op = 0x20, hypcall_op = 0x28 |
183 | }; | 183 | }; |
184 | 184 | ||
185 | /* | 185 | /* |
diff --git a/arch/mips/include/uapi/asm/kvm.h b/arch/mips/include/uapi/asm/kvm.h index a8a0199bf760..0318c6b442ab 100644 --- a/arch/mips/include/uapi/asm/kvm.h +++ b/arch/mips/include/uapi/asm/kvm.h | |||
@@ -21,6 +21,8 @@ | |||
21 | 21 | ||
22 | #define __KVM_HAVE_READONLY_MEM | 22 | #define __KVM_HAVE_READONLY_MEM |
23 | 23 | ||
24 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 | ||
25 | |||
24 | /* | 26 | /* |
25 | * for KVM_GET_REGS and KVM_SET_REGS | 27 | * for KVM_GET_REGS and KVM_SET_REGS |
26 | * | 28 | * |
@@ -54,9 +56,14 @@ struct kvm_fpu { | |||
54 | * Register set = 0: GP registers from kvm_regs (see definitions below). | 56 | * Register set = 0: GP registers from kvm_regs (see definitions below). |
55 | * | 57 | * |
56 | * Register set = 1: CP0 registers. | 58 | * Register set = 1: CP0 registers. |
57 | * bits[15..8] - Must be zero. | 59 | * bits[15..8] - COP0 register set. |
58 | * bits[7..3] - Register 'rd' index. | 60 | * |
59 | * bits[2..0] - Register 'sel' index. | 61 | * COP0 register set = 0: Main CP0 registers. |
62 | * bits[7..3] - Register 'rd' index. | ||
63 | * bits[2..0] - Register 'sel' index. | ||
64 | * | ||
65 | * COP0 register set = 1: MAARs. | ||
66 | * bits[7..0] - MAAR index. | ||
60 | * | 67 | * |
61 | * Register set = 2: KVM specific registers (see definitions below). | 68 | * Register set = 2: KVM specific registers (see definitions below). |
62 | * | 69 | * |
@@ -115,6 +122,15 @@ struct kvm_fpu { | |||
115 | 122 | ||
116 | 123 | ||
117 | /* | 124 | /* |
125 | * KVM_REG_MIPS_CP0 - Coprocessor 0 registers. | ||
126 | */ | ||
127 | |||
128 | #define KVM_REG_MIPS_MAAR (KVM_REG_MIPS_CP0 | (1 << 8)) | ||
129 | #define KVM_REG_MIPS_CP0_MAAR(n) (KVM_REG_MIPS_MAAR | \ | ||
130 | KVM_REG_SIZE_U64 | (n)) | ||
131 | |||
132 | |||
133 | /* | ||
118 | * KVM_REG_MIPS_KVM - KVM specific control registers. | 134 | * KVM_REG_MIPS_KVM - KVM specific control registers. |
119 | */ | 135 | */ |
120 | 136 | ||
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 07718bb5fc9d..c72a4cda389c 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c | |||
@@ -289,6 +289,8 @@ static void cpu_set_fpu_opts(struct cpuinfo_mips *c) | |||
289 | MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) { | 289 | MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) { |
290 | if (c->fpu_id & MIPS_FPIR_3D) | 290 | if (c->fpu_id & MIPS_FPIR_3D) |
291 | c->ases |= MIPS_ASE_MIPS3D; | 291 | c->ases |= MIPS_ASE_MIPS3D; |
292 | if (c->fpu_id & MIPS_FPIR_UFRP) | ||
293 | c->options |= MIPS_CPU_UFR; | ||
292 | if (c->fpu_id & MIPS_FPIR_FREP) | 294 | if (c->fpu_id & MIPS_FPIR_FREP) |
293 | c->options |= MIPS_CPU_FRE; | 295 | c->options |= MIPS_CPU_FRE; |
294 | } | 296 | } |
@@ -1003,7 +1005,8 @@ static inline unsigned int decode_guest_config3(struct cpuinfo_mips *c) | |||
1003 | unsigned int config3, config3_dyn; | 1005 | unsigned int config3, config3_dyn; |
1004 | 1006 | ||
1005 | probe_gc0_config_dyn(config3, config3, config3_dyn, | 1007 | probe_gc0_config_dyn(config3, config3, config3_dyn, |
1006 | MIPS_CONF_M | MIPS_CONF3_MSA | MIPS_CONF3_CTXTC); | 1008 | MIPS_CONF_M | MIPS_CONF3_MSA | MIPS_CONF3_ULRI | |
1009 | MIPS_CONF3_CTXTC); | ||
1007 | 1010 | ||
1008 | if (config3 & MIPS_CONF3_CTXTC) | 1011 | if (config3 & MIPS_CONF3_CTXTC) |
1009 | c->guest.options |= MIPS_CPU_CTXTC; | 1012 | c->guest.options |= MIPS_CPU_CTXTC; |
@@ -1013,6 +1016,9 @@ static inline unsigned int decode_guest_config3(struct cpuinfo_mips *c) | |||
1013 | if (config3 & MIPS_CONF3_PW) | 1016 | if (config3 & MIPS_CONF3_PW) |
1014 | c->guest.options |= MIPS_CPU_HTW; | 1017 | c->guest.options |= MIPS_CPU_HTW; |
1015 | 1018 | ||
1019 | if (config3 & MIPS_CONF3_ULRI) | ||
1020 | c->guest.options |= MIPS_CPU_ULRI; | ||
1021 | |||
1016 | if (config3 & MIPS_CONF3_SC) | 1022 | if (config3 & MIPS_CONF3_SC) |
1017 | c->guest.options |= MIPS_CPU_SEGMENTS; | 1023 | c->guest.options |= MIPS_CPU_SEGMENTS; |
1018 | 1024 | ||
@@ -1051,7 +1057,7 @@ static inline unsigned int decode_guest_config5(struct cpuinfo_mips *c) | |||
1051 | unsigned int config5, config5_dyn; | 1057 | unsigned int config5, config5_dyn; |
1052 | 1058 | ||
1053 | probe_gc0_config_dyn(config5, config5, config5_dyn, | 1059 | probe_gc0_config_dyn(config5, config5, config5_dyn, |
1054 | MIPS_CONF_M | MIPS_CONF5_MRP); | 1060 | MIPS_CONF_M | MIPS_CONF5_MVH | MIPS_CONF5_MRP); |
1055 | 1061 | ||
1056 | if (config5 & MIPS_CONF5_MRP) | 1062 | if (config5 & MIPS_CONF5_MRP) |
1057 | c->guest.options |= MIPS_CPU_MAAR; | 1063 | c->guest.options |= MIPS_CPU_MAAR; |
@@ -1061,6 +1067,9 @@ static inline unsigned int decode_guest_config5(struct cpuinfo_mips *c) | |||
1061 | if (config5 & MIPS_CONF5_LLB) | 1067 | if (config5 & MIPS_CONF5_LLB) |
1062 | c->guest.options |= MIPS_CPU_RW_LLB; | 1068 | c->guest.options |= MIPS_CPU_RW_LLB; |
1063 | 1069 | ||
1070 | if (config5 & MIPS_CONF5_MVH) | ||
1071 | c->guest.options |= MIPS_CPU_MVH; | ||
1072 | |||
1064 | if (config5 & MIPS_CONF_M) | 1073 | if (config5 & MIPS_CONF_M) |
1065 | c->guest.conf |= BIT(6); | 1074 | c->guest.conf |= BIT(6); |
1066 | return config5 & MIPS_CONF_M; | 1075 | return config5 & MIPS_CONF_M; |
diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c index a7f81261c781..c036157fb891 100644 --- a/arch/mips/kernel/time.c +++ b/arch/mips/kernel/time.c | |||
@@ -70,6 +70,7 @@ EXPORT_SYMBOL(perf_irq); | |||
70 | */ | 70 | */ |
71 | 71 | ||
72 | unsigned int mips_hpt_frequency; | 72 | unsigned int mips_hpt_frequency; |
73 | EXPORT_SYMBOL_GPL(mips_hpt_frequency); | ||
73 | 74 | ||
74 | /* | 75 | /* |
75 | * This function exists in order to cause an error due to a duplicate | 76 | * This function exists in order to cause an error due to a duplicate |
diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig index 65067327db12..50a722dfb236 100644 --- a/arch/mips/kvm/Kconfig +++ b/arch/mips/kvm/Kconfig | |||
@@ -26,11 +26,34 @@ config KVM | |||
26 | select SRCU | 26 | select SRCU |
27 | ---help--- | 27 | ---help--- |
28 | Support for hosting Guest kernels. | 28 | Support for hosting Guest kernels. |
29 | Currently supported on MIPS32 processors. | 29 | |
30 | choice | ||
31 | prompt "Virtualization mode" | ||
32 | depends on KVM | ||
33 | default KVM_MIPS_TE | ||
34 | |||
35 | config KVM_MIPS_TE | ||
36 | bool "Trap & Emulate" | ||
37 | ---help--- | ||
38 | Use trap and emulate to virtualize 32-bit guests in user mode. This | ||
39 | does not require any special hardware Virtualization support beyond | ||
40 | standard MIPS32/64 r2 or later, but it does require the guest kernel | ||
41 | to be configured with CONFIG_KVM_GUEST=y so that it resides in the | ||
42 | user address segment. | ||
43 | |||
44 | config KVM_MIPS_VZ | ||
45 | bool "MIPS Virtualization (VZ) ASE" | ||
46 | ---help--- | ||
47 | Use the MIPS Virtualization (VZ) ASE to virtualize guests. This | ||
48 | supports running unmodified guest kernels (with CONFIG_KVM_GUEST=n), | ||
49 | but requires hardware support. | ||
50 | |||
51 | endchoice | ||
30 | 52 | ||
31 | config KVM_MIPS_DYN_TRANS | 53 | config KVM_MIPS_DYN_TRANS |
32 | bool "KVM/MIPS: Dynamic binary translation to reduce traps" | 54 | bool "KVM/MIPS: Dynamic binary translation to reduce traps" |
33 | depends on KVM | 55 | depends on KVM_MIPS_TE |
56 | default y | ||
34 | ---help--- | 57 | ---help--- |
35 | When running in Trap & Emulate mode patch privileged | 58 | When running in Trap & Emulate mode patch privileged |
36 | instructions to reduce the number of traps. | 59 | instructions to reduce the number of traps. |
diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile index 847429de780d..45d90f5d5177 100644 --- a/arch/mips/kvm/Makefile +++ b/arch/mips/kvm/Makefile | |||
@@ -9,8 +9,15 @@ common-objs-$(CONFIG_CPU_HAS_MSA) += msa.o | |||
9 | 9 | ||
10 | kvm-objs := $(common-objs-y) mips.o emulate.o entry.o \ | 10 | kvm-objs := $(common-objs-y) mips.o emulate.o entry.o \ |
11 | interrupt.o stats.o commpage.o \ | 11 | interrupt.o stats.o commpage.o \ |
12 | dyntrans.o trap_emul.o fpu.o | 12 | fpu.o |
13 | kvm-objs += hypcall.o | ||
13 | kvm-objs += mmu.o | 14 | kvm-objs += mmu.o |
14 | 15 | ||
16 | ifdef CONFIG_KVM_MIPS_VZ | ||
17 | kvm-objs += vz.o | ||
18 | else | ||
19 | kvm-objs += dyntrans.o | ||
20 | kvm-objs += trap_emul.o | ||
21 | endif | ||
15 | obj-$(CONFIG_KVM) += kvm.o | 22 | obj-$(CONFIG_KVM) += kvm.o |
16 | obj-y += callback.o tlb.o | 23 | obj-y += callback.o tlb.o |
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index d40cfaad4529..34e78a3ee9d7 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c | |||
@@ -308,7 +308,7 @@ int kvm_get_badinstrp(u32 *opc, struct kvm_vcpu *vcpu, u32 *out) | |||
308 | * CP0_Cause.DC bit or the count_ctl.DC bit. | 308 | * CP0_Cause.DC bit or the count_ctl.DC bit. |
309 | * 0 otherwise (in which case CP0_Count timer is running). | 309 | * 0 otherwise (in which case CP0_Count timer is running). |
310 | */ | 310 | */ |
311 | static inline int kvm_mips_count_disabled(struct kvm_vcpu *vcpu) | 311 | int kvm_mips_count_disabled(struct kvm_vcpu *vcpu) |
312 | { | 312 | { |
313 | struct mips_coproc *cop0 = vcpu->arch.cop0; | 313 | struct mips_coproc *cop0 = vcpu->arch.cop0; |
314 | 314 | ||
@@ -467,7 +467,7 @@ u32 kvm_mips_read_count(struct kvm_vcpu *vcpu) | |||
467 | * | 467 | * |
468 | * Returns: The ktime at the point of freeze. | 468 | * Returns: The ktime at the point of freeze. |
469 | */ | 469 | */ |
470 | static ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, u32 *count) | 470 | ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, u32 *count) |
471 | { | 471 | { |
472 | ktime_t now; | 472 | ktime_t now; |
473 | 473 | ||
@@ -517,6 +517,82 @@ static void kvm_mips_resume_hrtimer(struct kvm_vcpu *vcpu, | |||
517 | } | 517 | } |
518 | 518 | ||
519 | /** | 519 | /** |
520 | * kvm_mips_restore_hrtimer() - Restore hrtimer after a gap, updating expiry. | ||
521 | * @vcpu: Virtual CPU. | ||
522 | * @before: Time before Count was saved, lower bound of drift calculation. | ||
523 | * @count: CP0_Count at point of restore. | ||
524 | * @min_drift: Minimum amount of drift permitted before correction. | ||
525 | * Must be <= 0. | ||
526 | * | ||
527 | * Restores the timer from a particular @count, accounting for drift. This can | ||
528 | * be used in conjunction with kvm_mips_freeze_timer() when a hardware timer is | ||
529 | * to be used for a period of time, but the exact ktime corresponding to the | ||
530 | * final Count that must be restored is not known. | ||
531 | * | ||
532 | * It is gauranteed that a timer interrupt immediately after restore will be | ||
533 | * handled, but not if CP0_Compare is exactly at @count. That case should | ||
534 | * already be handled when the hardware timer state is saved. | ||
535 | * | ||
536 | * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is not | ||
537 | * stopped). | ||
538 | * | ||
539 | * Returns: Amount of correction to count_bias due to drift. | ||
540 | */ | ||
541 | int kvm_mips_restore_hrtimer(struct kvm_vcpu *vcpu, ktime_t before, | ||
542 | u32 count, int min_drift) | ||
543 | { | ||
544 | ktime_t now, count_time; | ||
545 | u32 now_count, before_count; | ||
546 | u64 delta; | ||
547 | int drift, ret = 0; | ||
548 | |||
549 | /* Calculate expected count at before */ | ||
550 | before_count = vcpu->arch.count_bias + | ||
551 | kvm_mips_ktime_to_count(vcpu, before); | ||
552 | |||
553 | /* | ||
554 | * Detect significantly negative drift, where count is lower than | ||
555 | * expected. Some negative drift is expected when hardware counter is | ||
556 | * set after kvm_mips_freeze_timer(), and it is harmless to allow the | ||
557 | * time to jump forwards a little, within reason. If the drift is too | ||
558 | * significant, adjust the bias to avoid a big Guest.CP0_Count jump. | ||
559 | */ | ||
560 | drift = count - before_count; | ||
561 | if (drift < min_drift) { | ||
562 | count_time = before; | ||
563 | vcpu->arch.count_bias += drift; | ||
564 | ret = drift; | ||
565 | goto resume; | ||
566 | } | ||
567 | |||
568 | /* Calculate expected count right now */ | ||
569 | now = ktime_get(); | ||
570 | now_count = vcpu->arch.count_bias + kvm_mips_ktime_to_count(vcpu, now); | ||
571 | |||
572 | /* | ||
573 | * Detect positive drift, where count is higher than expected, and | ||
574 | * adjust the bias to avoid guest time going backwards. | ||
575 | */ | ||
576 | drift = count - now_count; | ||
577 | if (drift > 0) { | ||
578 | count_time = now; | ||
579 | vcpu->arch.count_bias += drift; | ||
580 | ret = drift; | ||
581 | goto resume; | ||
582 | } | ||
583 | |||
584 | /* Subtract nanosecond delta to find ktime when count was read */ | ||
585 | delta = (u64)(u32)(now_count - count); | ||
586 | delta = div_u64(delta * NSEC_PER_SEC, vcpu->arch.count_hz); | ||
587 | count_time = ktime_sub_ns(now, delta); | ||
588 | |||
589 | resume: | ||
590 | /* Resume using the calculated ktime */ | ||
591 | kvm_mips_resume_hrtimer(vcpu, count_time, count); | ||
592 | return ret; | ||
593 | } | ||
594 | |||
595 | /** | ||
520 | * kvm_mips_write_count() - Modify the count and update timer. | 596 | * kvm_mips_write_count() - Modify the count and update timer. |
521 | * @vcpu: Virtual CPU. | 597 | * @vcpu: Virtual CPU. |
522 | * @count: Guest CP0_Count value to set. | 598 | * @count: Guest CP0_Count value to set. |
@@ -543,16 +619,15 @@ void kvm_mips_write_count(struct kvm_vcpu *vcpu, u32 count) | |||
543 | /** | 619 | /** |
544 | * kvm_mips_init_count() - Initialise timer. | 620 | * kvm_mips_init_count() - Initialise timer. |
545 | * @vcpu: Virtual CPU. | 621 | * @vcpu: Virtual CPU. |
622 | * @count_hz: Frequency of timer. | ||
546 | * | 623 | * |
547 | * Initialise the timer to a sensible frequency, namely 100MHz, zero it, and set | 624 | * Initialise the timer to the specified frequency, zero it, and set it going if |
548 | * it going if it's enabled. | 625 | * it's enabled. |
549 | */ | 626 | */ |
550 | void kvm_mips_init_count(struct kvm_vcpu *vcpu) | 627 | void kvm_mips_init_count(struct kvm_vcpu *vcpu, unsigned long count_hz) |
551 | { | 628 | { |
552 | /* 100 MHz */ | 629 | vcpu->arch.count_hz = count_hz; |
553 | vcpu->arch.count_hz = 100*1000*1000; | 630 | vcpu->arch.count_period = div_u64((u64)NSEC_PER_SEC << 32, count_hz); |
554 | vcpu->arch.count_period = div_u64((u64)NSEC_PER_SEC << 32, | ||
555 | vcpu->arch.count_hz); | ||
556 | vcpu->arch.count_dyn_bias = 0; | 631 | vcpu->arch.count_dyn_bias = 0; |
557 | 632 | ||
558 | /* Starting at 0 */ | 633 | /* Starting at 0 */ |
@@ -622,7 +697,9 @@ void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack) | |||
622 | struct mips_coproc *cop0 = vcpu->arch.cop0; | 697 | struct mips_coproc *cop0 = vcpu->arch.cop0; |
623 | int dc; | 698 | int dc; |
624 | u32 old_compare = kvm_read_c0_guest_compare(cop0); | 699 | u32 old_compare = kvm_read_c0_guest_compare(cop0); |
625 | ktime_t now; | 700 | s32 delta = compare - old_compare; |
701 | u32 cause; | ||
702 | ktime_t now = ktime_set(0, 0); /* silence bogus GCC warning */ | ||
626 | u32 count; | 703 | u32 count; |
627 | 704 | ||
628 | /* if unchanged, must just be an ack */ | 705 | /* if unchanged, must just be an ack */ |
@@ -634,6 +711,21 @@ void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack) | |||
634 | return; | 711 | return; |
635 | } | 712 | } |
636 | 713 | ||
714 | /* | ||
715 | * If guest CP0_Compare moves forward, CP0_GTOffset should be adjusted | ||
716 | * too to prevent guest CP0_Count hitting guest CP0_Compare. | ||
717 | * | ||
718 | * The new GTOffset corresponds to the new value of CP0_Compare, and is | ||
719 | * set prior to it being written into the guest context. We disable | ||
720 | * preemption until the new value is written to prevent restore of a | ||
721 | * GTOffset corresponding to the old CP0_Compare value. | ||
722 | */ | ||
723 | if (IS_ENABLED(CONFIG_KVM_MIPS_VZ) && delta > 0) { | ||
724 | preempt_disable(); | ||
725 | write_c0_gtoffset(compare - read_c0_count()); | ||
726 | back_to_back_c0_hazard(); | ||
727 | } | ||
728 | |||
637 | /* freeze_hrtimer() takes care of timer interrupts <= count */ | 729 | /* freeze_hrtimer() takes care of timer interrupts <= count */ |
638 | dc = kvm_mips_count_disabled(vcpu); | 730 | dc = kvm_mips_count_disabled(vcpu); |
639 | if (!dc) | 731 | if (!dc) |
@@ -641,12 +733,36 @@ void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack) | |||
641 | 733 | ||
642 | if (ack) | 734 | if (ack) |
643 | kvm_mips_callbacks->dequeue_timer_int(vcpu); | 735 | kvm_mips_callbacks->dequeue_timer_int(vcpu); |
736 | else if (IS_ENABLED(CONFIG_KVM_MIPS_VZ)) | ||
737 | /* | ||
738 | * With VZ, writing CP0_Compare acks (clears) CP0_Cause.TI, so | ||
739 | * preserve guest CP0_Cause.TI if we don't want to ack it. | ||
740 | */ | ||
741 | cause = kvm_read_c0_guest_cause(cop0); | ||
644 | 742 | ||
645 | kvm_write_c0_guest_compare(cop0, compare); | 743 | kvm_write_c0_guest_compare(cop0, compare); |
646 | 744 | ||
745 | if (IS_ENABLED(CONFIG_KVM_MIPS_VZ)) { | ||
746 | if (delta > 0) | ||
747 | preempt_enable(); | ||
748 | |||
749 | back_to_back_c0_hazard(); | ||
750 | |||
751 | if (!ack && cause & CAUSEF_TI) | ||
752 | kvm_write_c0_guest_cause(cop0, cause); | ||
753 | } | ||
754 | |||
647 | /* resume_hrtimer() takes care of timer interrupts > count */ | 755 | /* resume_hrtimer() takes care of timer interrupts > count */ |
648 | if (!dc) | 756 | if (!dc) |
649 | kvm_mips_resume_hrtimer(vcpu, now, count); | 757 | kvm_mips_resume_hrtimer(vcpu, now, count); |
758 | |||
759 | /* | ||
760 | * If guest CP0_Compare is moving backward, we delay CP0_GTOffset change | ||
761 | * until after the new CP0_Compare is written, otherwise new guest | ||
762 | * CP0_Count could hit new guest CP0_Compare. | ||
763 | */ | ||
764 | if (IS_ENABLED(CONFIG_KVM_MIPS_VZ) && delta <= 0) | ||
765 | write_c0_gtoffset(compare - read_c0_count()); | ||
650 | } | 766 | } |
651 | 767 | ||
652 | /** | 768 | /** |
@@ -857,6 +973,7 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu) | |||
857 | ++vcpu->stat.wait_exits; | 973 | ++vcpu->stat.wait_exits; |
858 | trace_kvm_exit(vcpu, KVM_TRACE_EXIT_WAIT); | 974 | trace_kvm_exit(vcpu, KVM_TRACE_EXIT_WAIT); |
859 | if (!vcpu->arch.pending_exceptions) { | 975 | if (!vcpu->arch.pending_exceptions) { |
976 | kvm_vz_lose_htimer(vcpu); | ||
860 | vcpu->arch.wait = 1; | 977 | vcpu->arch.wait = 1; |
861 | kvm_vcpu_block(vcpu); | 978 | kvm_vcpu_block(vcpu); |
862 | 979 | ||
@@ -873,17 +990,62 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu) | |||
873 | return EMULATE_DONE; | 990 | return EMULATE_DONE; |
874 | } | 991 | } |
875 | 992 | ||
876 | /* | 993 | static void kvm_mips_change_entryhi(struct kvm_vcpu *vcpu, |
877 | * XXXKYMA: Linux doesn't seem to use TLBR, return EMULATE_FAIL for now so that | 994 | unsigned long entryhi) |
878 | * we can catch this, if things ever change | 995 | { |
879 | */ | 996 | struct mips_coproc *cop0 = vcpu->arch.cop0; |
997 | struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; | ||
998 | int cpu, i; | ||
999 | u32 nasid = entryhi & KVM_ENTRYHI_ASID; | ||
1000 | |||
1001 | if (((kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID) != nasid)) { | ||
1002 | trace_kvm_asid_change(vcpu, kvm_read_c0_guest_entryhi(cop0) & | ||
1003 | KVM_ENTRYHI_ASID, nasid); | ||
1004 | |||
1005 | /* | ||
1006 | * Flush entries from the GVA page tables. | ||
1007 | * Guest user page table will get flushed lazily on re-entry to | ||
1008 | * guest user if the guest ASID actually changes. | ||
1009 | */ | ||
1010 | kvm_mips_flush_gva_pt(kern_mm->pgd, KMF_KERN); | ||
1011 | |||
1012 | /* | ||
1013 | * Regenerate/invalidate kernel MMU context. | ||
1014 | * The user MMU context will be regenerated lazily on re-entry | ||
1015 | * to guest user if the guest ASID actually changes. | ||
1016 | */ | ||
1017 | preempt_disable(); | ||
1018 | cpu = smp_processor_id(); | ||
1019 | get_new_mmu_context(kern_mm, cpu); | ||
1020 | for_each_possible_cpu(i) | ||
1021 | if (i != cpu) | ||
1022 | cpu_context(i, kern_mm) = 0; | ||
1023 | preempt_enable(); | ||
1024 | } | ||
1025 | kvm_write_c0_guest_entryhi(cop0, entryhi); | ||
1026 | } | ||
1027 | |||
880 | enum emulation_result kvm_mips_emul_tlbr(struct kvm_vcpu *vcpu) | 1028 | enum emulation_result kvm_mips_emul_tlbr(struct kvm_vcpu *vcpu) |
881 | { | 1029 | { |
882 | struct mips_coproc *cop0 = vcpu->arch.cop0; | 1030 | struct mips_coproc *cop0 = vcpu->arch.cop0; |
1031 | struct kvm_mips_tlb *tlb; | ||
883 | unsigned long pc = vcpu->arch.pc; | 1032 | unsigned long pc = vcpu->arch.pc; |
1033 | int index; | ||
884 | 1034 | ||
885 | kvm_err("[%#lx] COP0_TLBR [%ld]\n", pc, kvm_read_c0_guest_index(cop0)); | 1035 | index = kvm_read_c0_guest_index(cop0); |
886 | return EMULATE_FAIL; | 1036 | if (index < 0 || index >= KVM_MIPS_GUEST_TLB_SIZE) { |
1037 | /* UNDEFINED */ | ||
1038 | kvm_debug("[%#lx] TLBR Index %#x out of range\n", pc, index); | ||
1039 | index &= KVM_MIPS_GUEST_TLB_SIZE - 1; | ||
1040 | } | ||
1041 | |||
1042 | tlb = &vcpu->arch.guest_tlb[index]; | ||
1043 | kvm_write_c0_guest_pagemask(cop0, tlb->tlb_mask); | ||
1044 | kvm_write_c0_guest_entrylo0(cop0, tlb->tlb_lo[0]); | ||
1045 | kvm_write_c0_guest_entrylo1(cop0, tlb->tlb_lo[1]); | ||
1046 | kvm_mips_change_entryhi(vcpu, tlb->tlb_hi); | ||
1047 | |||
1048 | return EMULATE_DONE; | ||
887 | } | 1049 | } |
888 | 1050 | ||
889 | /** | 1051 | /** |
@@ -1105,11 +1267,9 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst, | |||
1105 | struct kvm_vcpu *vcpu) | 1267 | struct kvm_vcpu *vcpu) |
1106 | { | 1268 | { |
1107 | struct mips_coproc *cop0 = vcpu->arch.cop0; | 1269 | struct mips_coproc *cop0 = vcpu->arch.cop0; |
1108 | struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; | ||
1109 | enum emulation_result er = EMULATE_DONE; | 1270 | enum emulation_result er = EMULATE_DONE; |
1110 | u32 rt, rd, sel; | 1271 | u32 rt, rd, sel; |
1111 | unsigned long curr_pc; | 1272 | unsigned long curr_pc; |
1112 | int cpu, i; | ||
1113 | 1273 | ||
1114 | /* | 1274 | /* |
1115 | * Update PC and hold onto current PC in case there is | 1275 | * Update PC and hold onto current PC in case there is |
@@ -1143,6 +1303,9 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst, | |||
1143 | case wait_op: | 1303 | case wait_op: |
1144 | er = kvm_mips_emul_wait(vcpu); | 1304 | er = kvm_mips_emul_wait(vcpu); |
1145 | break; | 1305 | break; |
1306 | case hypcall_op: | ||
1307 | er = kvm_mips_emul_hypcall(vcpu, inst); | ||
1308 | break; | ||
1146 | } | 1309 | } |
1147 | } else { | 1310 | } else { |
1148 | rt = inst.c0r_format.rt; | 1311 | rt = inst.c0r_format.rt; |
@@ -1208,44 +1371,8 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst, | |||
1208 | kvm_change_c0_guest_ebase(cop0, 0x1ffff000, | 1371 | kvm_change_c0_guest_ebase(cop0, 0x1ffff000, |
1209 | vcpu->arch.gprs[rt]); | 1372 | vcpu->arch.gprs[rt]); |
1210 | } else if (rd == MIPS_CP0_TLB_HI && sel == 0) { | 1373 | } else if (rd == MIPS_CP0_TLB_HI && sel == 0) { |
1211 | u32 nasid = | 1374 | kvm_mips_change_entryhi(vcpu, |
1212 | vcpu->arch.gprs[rt] & KVM_ENTRYHI_ASID; | 1375 | vcpu->arch.gprs[rt]); |
1213 | if (((kvm_read_c0_guest_entryhi(cop0) & | ||
1214 | KVM_ENTRYHI_ASID) != nasid)) { | ||
1215 | trace_kvm_asid_change(vcpu, | ||
1216 | kvm_read_c0_guest_entryhi(cop0) | ||
1217 | & KVM_ENTRYHI_ASID, | ||
1218 | nasid); | ||
1219 | |||
1220 | /* | ||
1221 | * Flush entries from the GVA page | ||
1222 | * tables. | ||
1223 | * Guest user page table will get | ||
1224 | * flushed lazily on re-entry to guest | ||
1225 | * user if the guest ASID actually | ||
1226 | * changes. | ||
1227 | */ | ||
1228 | kvm_mips_flush_gva_pt(kern_mm->pgd, | ||
1229 | KMF_KERN); | ||
1230 | |||
1231 | /* | ||
1232 | * Regenerate/invalidate kernel MMU | ||
1233 | * context. | ||
1234 | * The user MMU context will be | ||
1235 | * regenerated lazily on re-entry to | ||
1236 | * guest user if the guest ASID actually | ||
1237 | * changes. | ||
1238 | */ | ||
1239 | preempt_disable(); | ||
1240 | cpu = smp_processor_id(); | ||
1241 | get_new_mmu_context(kern_mm, cpu); | ||
1242 | for_each_possible_cpu(i) | ||
1243 | if (i != cpu) | ||
1244 | cpu_context(i, kern_mm) = 0; | ||
1245 | preempt_enable(); | ||
1246 | } | ||
1247 | kvm_write_c0_guest_entryhi(cop0, | ||
1248 | vcpu->arch.gprs[rt]); | ||
1249 | } | 1376 | } |
1250 | /* Are we writing to COUNT */ | 1377 | /* Are we writing to COUNT */ |
1251 | else if ((rd == MIPS_CP0_COUNT) && (sel == 0)) { | 1378 | else if ((rd == MIPS_CP0_COUNT) && (sel == 0)) { |
@@ -1474,9 +1601,8 @@ enum emulation_result kvm_mips_emulate_store(union mips_instruction inst, | |||
1474 | struct kvm_run *run, | 1601 | struct kvm_run *run, |
1475 | struct kvm_vcpu *vcpu) | 1602 | struct kvm_vcpu *vcpu) |
1476 | { | 1603 | { |
1477 | enum emulation_result er = EMULATE_DO_MMIO; | 1604 | enum emulation_result er; |
1478 | u32 rt; | 1605 | u32 rt; |
1479 | u32 bytes; | ||
1480 | void *data = run->mmio.data; | 1606 | void *data = run->mmio.data; |
1481 | unsigned long curr_pc; | 1607 | unsigned long curr_pc; |
1482 | 1608 | ||
@@ -1491,103 +1617,74 @@ enum emulation_result kvm_mips_emulate_store(union mips_instruction inst, | |||
1491 | 1617 | ||
1492 | rt = inst.i_format.rt; | 1618 | rt = inst.i_format.rt; |
1493 | 1619 | ||
1620 | run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( | ||
1621 | vcpu->arch.host_cp0_badvaddr); | ||
1622 | if (run->mmio.phys_addr == KVM_INVALID_ADDR) | ||
1623 | goto out_fail; | ||
1624 | |||
1494 | switch (inst.i_format.opcode) { | 1625 | switch (inst.i_format.opcode) { |
1495 | case sb_op: | 1626 | #if defined(CONFIG_64BIT) && defined(CONFIG_KVM_MIPS_VZ) |
1496 | bytes = 1; | 1627 | case sd_op: |
1497 | if (bytes > sizeof(run->mmio.data)) { | 1628 | run->mmio.len = 8; |
1498 | kvm_err("%s: bad MMIO length: %d\n", __func__, | 1629 | *(u64 *)data = vcpu->arch.gprs[rt]; |
1499 | run->mmio.len); | ||
1500 | } | ||
1501 | run->mmio.phys_addr = | ||
1502 | kvm_mips_callbacks->gva_to_gpa(vcpu->arch. | ||
1503 | host_cp0_badvaddr); | ||
1504 | if (run->mmio.phys_addr == KVM_INVALID_ADDR) { | ||
1505 | er = EMULATE_FAIL; | ||
1506 | break; | ||
1507 | } | ||
1508 | run->mmio.len = bytes; | ||
1509 | run->mmio.is_write = 1; | ||
1510 | vcpu->mmio_needed = 1; | ||
1511 | vcpu->mmio_is_write = 1; | ||
1512 | *(u8 *) data = vcpu->arch.gprs[rt]; | ||
1513 | kvm_debug("OP_SB: eaddr: %#lx, gpr: %#lx, data: %#x\n", | ||
1514 | vcpu->arch.host_cp0_badvaddr, vcpu->arch.gprs[rt], | ||
1515 | *(u8 *) data); | ||
1516 | 1630 | ||
1631 | kvm_debug("[%#lx] OP_SD: eaddr: %#lx, gpr: %#lx, data: %#llx\n", | ||
1632 | vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, | ||
1633 | vcpu->arch.gprs[rt], *(u64 *)data); | ||
1517 | break; | 1634 | break; |
1635 | #endif | ||
1518 | 1636 | ||
1519 | case sw_op: | 1637 | case sw_op: |
1520 | bytes = 4; | 1638 | run->mmio.len = 4; |
1521 | if (bytes > sizeof(run->mmio.data)) { | 1639 | *(u32 *)data = vcpu->arch.gprs[rt]; |
1522 | kvm_err("%s: bad MMIO length: %d\n", __func__, | ||
1523 | run->mmio.len); | ||
1524 | } | ||
1525 | run->mmio.phys_addr = | ||
1526 | kvm_mips_callbacks->gva_to_gpa(vcpu->arch. | ||
1527 | host_cp0_badvaddr); | ||
1528 | if (run->mmio.phys_addr == KVM_INVALID_ADDR) { | ||
1529 | er = EMULATE_FAIL; | ||
1530 | break; | ||
1531 | } | ||
1532 | |||
1533 | run->mmio.len = bytes; | ||
1534 | run->mmio.is_write = 1; | ||
1535 | vcpu->mmio_needed = 1; | ||
1536 | vcpu->mmio_is_write = 1; | ||
1537 | *(u32 *) data = vcpu->arch.gprs[rt]; | ||
1538 | 1640 | ||
1539 | kvm_debug("[%#lx] OP_SW: eaddr: %#lx, gpr: %#lx, data: %#x\n", | 1641 | kvm_debug("[%#lx] OP_SW: eaddr: %#lx, gpr: %#lx, data: %#x\n", |
1540 | vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, | 1642 | vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, |
1541 | vcpu->arch.gprs[rt], *(u32 *) data); | 1643 | vcpu->arch.gprs[rt], *(u32 *)data); |
1542 | break; | 1644 | break; |
1543 | 1645 | ||
1544 | case sh_op: | 1646 | case sh_op: |
1545 | bytes = 2; | 1647 | run->mmio.len = 2; |
1546 | if (bytes > sizeof(run->mmio.data)) { | 1648 | *(u16 *)data = vcpu->arch.gprs[rt]; |
1547 | kvm_err("%s: bad MMIO length: %d\n", __func__, | ||
1548 | run->mmio.len); | ||
1549 | } | ||
1550 | run->mmio.phys_addr = | ||
1551 | kvm_mips_callbacks->gva_to_gpa(vcpu->arch. | ||
1552 | host_cp0_badvaddr); | ||
1553 | if (run->mmio.phys_addr == KVM_INVALID_ADDR) { | ||
1554 | er = EMULATE_FAIL; | ||
1555 | break; | ||
1556 | } | ||
1557 | |||
1558 | run->mmio.len = bytes; | ||
1559 | run->mmio.is_write = 1; | ||
1560 | vcpu->mmio_needed = 1; | ||
1561 | vcpu->mmio_is_write = 1; | ||
1562 | *(u16 *) data = vcpu->arch.gprs[rt]; | ||
1563 | 1649 | ||
1564 | kvm_debug("[%#lx] OP_SH: eaddr: %#lx, gpr: %#lx, data: %#x\n", | 1650 | kvm_debug("[%#lx] OP_SH: eaddr: %#lx, gpr: %#lx, data: %#x\n", |
1565 | vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, | 1651 | vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, |
1566 | vcpu->arch.gprs[rt], *(u32 *) data); | 1652 | vcpu->arch.gprs[rt], *(u16 *)data); |
1653 | break; | ||
1654 | |||
1655 | case sb_op: | ||
1656 | run->mmio.len = 1; | ||
1657 | *(u8 *)data = vcpu->arch.gprs[rt]; | ||
1658 | |||
1659 | kvm_debug("[%#lx] OP_SB: eaddr: %#lx, gpr: %#lx, data: %#x\n", | ||
1660 | vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, | ||
1661 | vcpu->arch.gprs[rt], *(u8 *)data); | ||
1567 | break; | 1662 | break; |
1568 | 1663 | ||
1569 | default: | 1664 | default: |
1570 | kvm_err("Store not yet supported (inst=0x%08x)\n", | 1665 | kvm_err("Store not yet supported (inst=0x%08x)\n", |
1571 | inst.word); | 1666 | inst.word); |
1572 | er = EMULATE_FAIL; | 1667 | goto out_fail; |
1573 | break; | ||
1574 | } | 1668 | } |
1575 | 1669 | ||
1576 | /* Rollback PC if emulation was unsuccessful */ | 1670 | run->mmio.is_write = 1; |
1577 | if (er == EMULATE_FAIL) | 1671 | vcpu->mmio_needed = 1; |
1578 | vcpu->arch.pc = curr_pc; | 1672 | vcpu->mmio_is_write = 1; |
1673 | return EMULATE_DO_MMIO; | ||
1579 | 1674 | ||
1580 | return er; | 1675 | out_fail: |
1676 | /* Rollback PC if emulation was unsuccessful */ | ||
1677 | vcpu->arch.pc = curr_pc; | ||
1678 | return EMULATE_FAIL; | ||
1581 | } | 1679 | } |
1582 | 1680 | ||
1583 | enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, | 1681 | enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, |
1584 | u32 cause, struct kvm_run *run, | 1682 | u32 cause, struct kvm_run *run, |
1585 | struct kvm_vcpu *vcpu) | 1683 | struct kvm_vcpu *vcpu) |
1586 | { | 1684 | { |
1587 | enum emulation_result er = EMULATE_DO_MMIO; | 1685 | enum emulation_result er; |
1588 | unsigned long curr_pc; | 1686 | unsigned long curr_pc; |
1589 | u32 op, rt; | 1687 | u32 op, rt; |
1590 | u32 bytes; | ||
1591 | 1688 | ||
1592 | rt = inst.i_format.rt; | 1689 | rt = inst.i_format.rt; |
1593 | op = inst.i_format.opcode; | 1690 | op = inst.i_format.opcode; |
@@ -1606,96 +1703,53 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, | |||
1606 | 1703 | ||
1607 | vcpu->arch.io_gpr = rt; | 1704 | vcpu->arch.io_gpr = rt; |
1608 | 1705 | ||
1706 | run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( | ||
1707 | vcpu->arch.host_cp0_badvaddr); | ||
1708 | if (run->mmio.phys_addr == KVM_INVALID_ADDR) | ||
1709 | return EMULATE_FAIL; | ||
1710 | |||
1711 | vcpu->mmio_needed = 2; /* signed */ | ||
1609 | switch (op) { | 1712 | switch (op) { |
1610 | case lw_op: | 1713 | #if defined(CONFIG_64BIT) && defined(CONFIG_KVM_MIPS_VZ) |
1611 | bytes = 4; | 1714 | case ld_op: |
1612 | if (bytes > sizeof(run->mmio.data)) { | 1715 | run->mmio.len = 8; |
1613 | kvm_err("%s: bad MMIO length: %d\n", __func__, | 1716 | break; |
1614 | run->mmio.len); | ||
1615 | er = EMULATE_FAIL; | ||
1616 | break; | ||
1617 | } | ||
1618 | run->mmio.phys_addr = | ||
1619 | kvm_mips_callbacks->gva_to_gpa(vcpu->arch. | ||
1620 | host_cp0_badvaddr); | ||
1621 | if (run->mmio.phys_addr == KVM_INVALID_ADDR) { | ||
1622 | er = EMULATE_FAIL; | ||
1623 | break; | ||
1624 | } | ||
1625 | 1717 | ||
1626 | run->mmio.len = bytes; | 1718 | case lwu_op: |
1627 | run->mmio.is_write = 0; | 1719 | vcpu->mmio_needed = 1; /* unsigned */ |
1628 | vcpu->mmio_needed = 1; | 1720 | /* fall through */ |
1629 | vcpu->mmio_is_write = 0; | 1721 | #endif |
1722 | case lw_op: | ||
1723 | run->mmio.len = 4; | ||
1630 | break; | 1724 | break; |
1631 | 1725 | ||
1632 | case lh_op: | ||
1633 | case lhu_op: | 1726 | case lhu_op: |
1634 | bytes = 2; | 1727 | vcpu->mmio_needed = 1; /* unsigned */ |
1635 | if (bytes > sizeof(run->mmio.data)) { | 1728 | /* fall through */ |
1636 | kvm_err("%s: bad MMIO length: %d\n", __func__, | 1729 | case lh_op: |
1637 | run->mmio.len); | 1730 | run->mmio.len = 2; |
1638 | er = EMULATE_FAIL; | ||
1639 | break; | ||
1640 | } | ||
1641 | run->mmio.phys_addr = | ||
1642 | kvm_mips_callbacks->gva_to_gpa(vcpu->arch. | ||
1643 | host_cp0_badvaddr); | ||
1644 | if (run->mmio.phys_addr == KVM_INVALID_ADDR) { | ||
1645 | er = EMULATE_FAIL; | ||
1646 | break; | ||
1647 | } | ||
1648 | |||
1649 | run->mmio.len = bytes; | ||
1650 | run->mmio.is_write = 0; | ||
1651 | vcpu->mmio_needed = 1; | ||
1652 | vcpu->mmio_is_write = 0; | ||
1653 | |||
1654 | if (op == lh_op) | ||
1655 | vcpu->mmio_needed = 2; | ||
1656 | else | ||
1657 | vcpu->mmio_needed = 1; | ||
1658 | |||
1659 | break; | 1731 | break; |
1660 | 1732 | ||
1661 | case lbu_op: | 1733 | case lbu_op: |
1734 | vcpu->mmio_needed = 1; /* unsigned */ | ||
1735 | /* fall through */ | ||
1662 | case lb_op: | 1736 | case lb_op: |
1663 | bytes = 1; | 1737 | run->mmio.len = 1; |
1664 | if (bytes > sizeof(run->mmio.data)) { | ||
1665 | kvm_err("%s: bad MMIO length: %d\n", __func__, | ||
1666 | run->mmio.len); | ||
1667 | er = EMULATE_FAIL; | ||
1668 | break; | ||
1669 | } | ||
1670 | run->mmio.phys_addr = | ||
1671 | kvm_mips_callbacks->gva_to_gpa(vcpu->arch. | ||
1672 | host_cp0_badvaddr); | ||
1673 | if (run->mmio.phys_addr == KVM_INVALID_ADDR) { | ||
1674 | er = EMULATE_FAIL; | ||
1675 | break; | ||
1676 | } | ||
1677 | |||
1678 | run->mmio.len = bytes; | ||
1679 | run->mmio.is_write = 0; | ||
1680 | vcpu->mmio_is_write = 0; | ||
1681 | |||
1682 | if (op == lb_op) | ||
1683 | vcpu->mmio_needed = 2; | ||
1684 | else | ||
1685 | vcpu->mmio_needed = 1; | ||
1686 | |||
1687 | break; | 1738 | break; |
1688 | 1739 | ||
1689 | default: | 1740 | default: |
1690 | kvm_err("Load not yet supported (inst=0x%08x)\n", | 1741 | kvm_err("Load not yet supported (inst=0x%08x)\n", |
1691 | inst.word); | 1742 | inst.word); |
1692 | er = EMULATE_FAIL; | 1743 | vcpu->mmio_needed = 0; |
1693 | break; | 1744 | return EMULATE_FAIL; |
1694 | } | 1745 | } |
1695 | 1746 | ||
1696 | return er; | 1747 | run->mmio.is_write = 0; |
1748 | vcpu->mmio_is_write = 0; | ||
1749 | return EMULATE_DO_MMIO; | ||
1697 | } | 1750 | } |
1698 | 1751 | ||
1752 | #ifndef CONFIG_KVM_MIPS_VZ | ||
1699 | static enum emulation_result kvm_mips_guest_cache_op(int (*fn)(unsigned long), | 1753 | static enum emulation_result kvm_mips_guest_cache_op(int (*fn)(unsigned long), |
1700 | unsigned long curr_pc, | 1754 | unsigned long curr_pc, |
1701 | unsigned long addr, | 1755 | unsigned long addr, |
@@ -1786,11 +1840,35 @@ enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst, | |||
1786 | vcpu->arch.pc, vcpu->arch.gprs[31], cache, op, base, | 1840 | vcpu->arch.pc, vcpu->arch.gprs[31], cache, op, base, |
1787 | arch->gprs[base], offset); | 1841 | arch->gprs[base], offset); |
1788 | 1842 | ||
1789 | if (cache == Cache_D) | 1843 | if (cache == Cache_D) { |
1844 | #ifdef CONFIG_CPU_R4K_CACHE_TLB | ||
1790 | r4k_blast_dcache(); | 1845 | r4k_blast_dcache(); |
1791 | else if (cache == Cache_I) | 1846 | #else |
1847 | switch (boot_cpu_type()) { | ||
1848 | case CPU_CAVIUM_OCTEON3: | ||
1849 | /* locally flush icache */ | ||
1850 | local_flush_icache_range(0, 0); | ||
1851 | break; | ||
1852 | default: | ||
1853 | __flush_cache_all(); | ||
1854 | break; | ||
1855 | } | ||
1856 | #endif | ||
1857 | } else if (cache == Cache_I) { | ||
1858 | #ifdef CONFIG_CPU_R4K_CACHE_TLB | ||
1792 | r4k_blast_icache(); | 1859 | r4k_blast_icache(); |
1793 | else { | 1860 | #else |
1861 | switch (boot_cpu_type()) { | ||
1862 | case CPU_CAVIUM_OCTEON3: | ||
1863 | /* locally flush icache */ | ||
1864 | local_flush_icache_range(0, 0); | ||
1865 | break; | ||
1866 | default: | ||
1867 | flush_icache_all(); | ||
1868 | break; | ||
1869 | } | ||
1870 | #endif | ||
1871 | } else { | ||
1794 | kvm_err("%s: unsupported CACHE INDEX operation\n", | 1872 | kvm_err("%s: unsupported CACHE INDEX operation\n", |
1795 | __func__); | 1873 | __func__); |
1796 | return EMULATE_FAIL; | 1874 | return EMULATE_FAIL; |
@@ -1870,18 +1948,6 @@ enum emulation_result kvm_mips_emulate_inst(u32 cause, u32 *opc, | |||
1870 | case cop0_op: | 1948 | case cop0_op: |
1871 | er = kvm_mips_emulate_CP0(inst, opc, cause, run, vcpu); | 1949 | er = kvm_mips_emulate_CP0(inst, opc, cause, run, vcpu); |
1872 | break; | 1950 | break; |
1873 | case sb_op: | ||
1874 | case sh_op: | ||
1875 | case sw_op: | ||
1876 | er = kvm_mips_emulate_store(inst, cause, run, vcpu); | ||
1877 | break; | ||
1878 | case lb_op: | ||
1879 | case lbu_op: | ||
1880 | case lhu_op: | ||
1881 | case lh_op: | ||
1882 | case lw_op: | ||
1883 | er = kvm_mips_emulate_load(inst, cause, run, vcpu); | ||
1884 | break; | ||
1885 | 1951 | ||
1886 | #ifndef CONFIG_CPU_MIPSR6 | 1952 | #ifndef CONFIG_CPU_MIPSR6 |
1887 | case cache_op: | 1953 | case cache_op: |
@@ -1915,6 +1981,7 @@ unknown: | |||
1915 | 1981 | ||
1916 | return er; | 1982 | return er; |
1917 | } | 1983 | } |
1984 | #endif /* CONFIG_KVM_MIPS_VZ */ | ||
1918 | 1985 | ||
1919 | /** | 1986 | /** |
1920 | * kvm_mips_guest_exception_base() - Find guest exception vector base address. | 1987 | * kvm_mips_guest_exception_base() - Find guest exception vector base address. |
@@ -2524,8 +2591,15 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, | |||
2524 | vcpu->arch.pc = vcpu->arch.io_pc; | 2591 | vcpu->arch.pc = vcpu->arch.io_pc; |
2525 | 2592 | ||
2526 | switch (run->mmio.len) { | 2593 | switch (run->mmio.len) { |
2594 | case 8: | ||
2595 | *gpr = *(s64 *)run->mmio.data; | ||
2596 | break; | ||
2597 | |||
2527 | case 4: | 2598 | case 4: |
2528 | *gpr = *(s32 *) run->mmio.data; | 2599 | if (vcpu->mmio_needed == 2) |
2600 | *gpr = *(s32 *)run->mmio.data; | ||
2601 | else | ||
2602 | *gpr = *(u32 *)run->mmio.data; | ||
2529 | break; | 2603 | break; |
2530 | 2604 | ||
2531 | case 2: | 2605 | case 2: |
diff --git a/arch/mips/kvm/entry.c b/arch/mips/kvm/entry.c index c5b254c4d0da..16e1c93b484f 100644 --- a/arch/mips/kvm/entry.c +++ b/arch/mips/kvm/entry.c | |||
@@ -51,12 +51,15 @@ | |||
51 | #define RA 31 | 51 | #define RA 31 |
52 | 52 | ||
53 | /* Some CP0 registers */ | 53 | /* Some CP0 registers */ |
54 | #define C0_PWBASE 5, 5 | ||
54 | #define C0_HWRENA 7, 0 | 55 | #define C0_HWRENA 7, 0 |
55 | #define C0_BADVADDR 8, 0 | 56 | #define C0_BADVADDR 8, 0 |
56 | #define C0_BADINSTR 8, 1 | 57 | #define C0_BADINSTR 8, 1 |
57 | #define C0_BADINSTRP 8, 2 | 58 | #define C0_BADINSTRP 8, 2 |
58 | #define C0_ENTRYHI 10, 0 | 59 | #define C0_ENTRYHI 10, 0 |
60 | #define C0_GUESTCTL1 10, 4 | ||
59 | #define C0_STATUS 12, 0 | 61 | #define C0_STATUS 12, 0 |
62 | #define C0_GUESTCTL0 12, 6 | ||
60 | #define C0_CAUSE 13, 0 | 63 | #define C0_CAUSE 13, 0 |
61 | #define C0_EPC 14, 0 | 64 | #define C0_EPC 14, 0 |
62 | #define C0_EBASE 15, 1 | 65 | #define C0_EBASE 15, 1 |
@@ -292,8 +295,8 @@ static void *kvm_mips_build_enter_guest(void *addr) | |||
292 | unsigned int i; | 295 | unsigned int i; |
293 | struct uasm_label labels[2]; | 296 | struct uasm_label labels[2]; |
294 | struct uasm_reloc relocs[2]; | 297 | struct uasm_reloc relocs[2]; |
295 | struct uasm_label *l = labels; | 298 | struct uasm_label __maybe_unused *l = labels; |
296 | struct uasm_reloc *r = relocs; | 299 | struct uasm_reloc __maybe_unused *r = relocs; |
297 | 300 | ||
298 | memset(labels, 0, sizeof(labels)); | 301 | memset(labels, 0, sizeof(labels)); |
299 | memset(relocs, 0, sizeof(relocs)); | 302 | memset(relocs, 0, sizeof(relocs)); |
@@ -302,7 +305,67 @@ static void *kvm_mips_build_enter_guest(void *addr) | |||
302 | UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, pc), K1); | 305 | UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, pc), K1); |
303 | UASM_i_MTC0(&p, T0, C0_EPC); | 306 | UASM_i_MTC0(&p, T0, C0_EPC); |
304 | 307 | ||
305 | /* Set the ASID for the Guest Kernel */ | 308 | #ifdef CONFIG_KVM_MIPS_VZ |
309 | /* Save normal linux process pgd (VZ guarantees pgd_reg is set) */ | ||
310 | UASM_i_MFC0(&p, K0, c0_kscratch(), pgd_reg); | ||
311 | UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, host_pgd), K1); | ||
312 | |||
313 | /* | ||
314 | * Set up KVM GPA pgd. | ||
315 | * This does roughly the same as TLBMISS_HANDLER_SETUP_PGD(): | ||
316 | * - call tlbmiss_handler_setup_pgd(mm->pgd) | ||
317 | * - write mm->pgd into CP0_PWBase | ||
318 | * | ||
319 | * We keep S0 pointing at struct kvm so we can load the ASID below. | ||
320 | */ | ||
321 | UASM_i_LW(&p, S0, (int)offsetof(struct kvm_vcpu, kvm) - | ||
322 | (int)offsetof(struct kvm_vcpu, arch), K1); | ||
323 | UASM_i_LW(&p, A0, offsetof(struct kvm, arch.gpa_mm.pgd), S0); | ||
324 | UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd); | ||
325 | uasm_i_jalr(&p, RA, T9); | ||
326 | /* delay slot */ | ||
327 | if (cpu_has_htw) | ||
328 | UASM_i_MTC0(&p, A0, C0_PWBASE); | ||
329 | else | ||
330 | uasm_i_nop(&p); | ||
331 | |||
332 | /* Set GM bit to setup eret to VZ guest context */ | ||
333 | uasm_i_addiu(&p, V1, ZERO, 1); | ||
334 | uasm_i_mfc0(&p, K0, C0_GUESTCTL0); | ||
335 | uasm_i_ins(&p, K0, V1, MIPS_GCTL0_GM_SHIFT, 1); | ||
336 | uasm_i_mtc0(&p, K0, C0_GUESTCTL0); | ||
337 | |||
338 | if (cpu_has_guestid) { | ||
339 | /* | ||
340 | * Set root mode GuestID, so that root TLB refill handler can | ||
341 | * use the correct GuestID in the root TLB. | ||
342 | */ | ||
343 | |||
344 | /* Get current GuestID */ | ||
345 | uasm_i_mfc0(&p, T0, C0_GUESTCTL1); | ||
346 | /* Set GuestCtl1.RID = GuestCtl1.ID */ | ||
347 | uasm_i_ext(&p, T1, T0, MIPS_GCTL1_ID_SHIFT, | ||
348 | MIPS_GCTL1_ID_WIDTH); | ||
349 | uasm_i_ins(&p, T0, T1, MIPS_GCTL1_RID_SHIFT, | ||
350 | MIPS_GCTL1_RID_WIDTH); | ||
351 | uasm_i_mtc0(&p, T0, C0_GUESTCTL1); | ||
352 | |||
353 | /* GuestID handles dealiasing so we don't need to touch ASID */ | ||
354 | goto skip_asid_restore; | ||
355 | } | ||
356 | |||
357 | /* Root ASID Dealias (RAD) */ | ||
358 | |||
359 | /* Save host ASID */ | ||
360 | UASM_i_MFC0(&p, K0, C0_ENTRYHI); | ||
361 | UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, host_entryhi), | ||
362 | K1); | ||
363 | |||
364 | /* Set the root ASID for the Guest */ | ||
365 | UASM_i_ADDIU(&p, T1, S0, | ||
366 | offsetof(struct kvm, arch.gpa_mm.context.asid)); | ||
367 | #else | ||
368 | /* Set the ASID for the Guest Kernel or User */ | ||
306 | UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, cop0), K1); | 369 | UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, cop0), K1); |
307 | UASM_i_LW(&p, T0, offsetof(struct mips_coproc, reg[MIPS_CP0_STATUS][0]), | 370 | UASM_i_LW(&p, T0, offsetof(struct mips_coproc, reg[MIPS_CP0_STATUS][0]), |
308 | T0); | 371 | T0); |
@@ -315,6 +378,7 @@ static void *kvm_mips_build_enter_guest(void *addr) | |||
315 | UASM_i_ADDIU(&p, T1, K1, offsetof(struct kvm_vcpu_arch, | 378 | UASM_i_ADDIU(&p, T1, K1, offsetof(struct kvm_vcpu_arch, |
316 | guest_user_mm.context.asid)); | 379 | guest_user_mm.context.asid)); |
317 | uasm_l_kernel_asid(&l, p); | 380 | uasm_l_kernel_asid(&l, p); |
381 | #endif | ||
318 | 382 | ||
319 | /* t1: contains the base of the ASID array, need to get the cpu id */ | 383 | /* t1: contains the base of the ASID array, need to get the cpu id */ |
320 | /* smp_processor_id */ | 384 | /* smp_processor_id */ |
@@ -339,6 +403,7 @@ static void *kvm_mips_build_enter_guest(void *addr) | |||
339 | uasm_i_andi(&p, K0, K0, MIPS_ENTRYHI_ASID); | 403 | uasm_i_andi(&p, K0, K0, MIPS_ENTRYHI_ASID); |
340 | #endif | 404 | #endif |
341 | 405 | ||
406 | #ifndef CONFIG_KVM_MIPS_VZ | ||
342 | /* | 407 | /* |
343 | * Set up KVM T&E GVA pgd. | 408 | * Set up KVM T&E GVA pgd. |
344 | * This does roughly the same as TLBMISS_HANDLER_SETUP_PGD(): | 409 | * This does roughly the same as TLBMISS_HANDLER_SETUP_PGD(): |
@@ -351,7 +416,11 @@ static void *kvm_mips_build_enter_guest(void *addr) | |||
351 | UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd); | 416 | UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd); |
352 | uasm_i_jalr(&p, RA, T9); | 417 | uasm_i_jalr(&p, RA, T9); |
353 | uasm_i_mtc0(&p, K0, C0_ENTRYHI); | 418 | uasm_i_mtc0(&p, K0, C0_ENTRYHI); |
354 | 419 | #else | |
420 | /* Set up KVM VZ root ASID (!guestid) */ | ||
421 | uasm_i_mtc0(&p, K0, C0_ENTRYHI); | ||
422 | skip_asid_restore: | ||
423 | #endif | ||
355 | uasm_i_ehb(&p); | 424 | uasm_i_ehb(&p); |
356 | 425 | ||
357 | /* Disable RDHWR access */ | 426 | /* Disable RDHWR access */ |
@@ -559,13 +628,10 @@ void *kvm_mips_build_exit(void *addr) | |||
559 | /* Now that context has been saved, we can use other registers */ | 628 | /* Now that context has been saved, we can use other registers */ |
560 | 629 | ||
561 | /* Restore vcpu */ | 630 | /* Restore vcpu */ |
562 | UASM_i_MFC0(&p, A1, scratch_vcpu[0], scratch_vcpu[1]); | 631 | UASM_i_MFC0(&p, S1, scratch_vcpu[0], scratch_vcpu[1]); |
563 | uasm_i_move(&p, S1, A1); | ||
564 | 632 | ||
565 | /* Restore run (vcpu->run) */ | 633 | /* Restore run (vcpu->run) */ |
566 | UASM_i_LW(&p, A0, offsetof(struct kvm_vcpu, run), A1); | 634 | UASM_i_LW(&p, S0, offsetof(struct kvm_vcpu, run), S1); |
567 | /* Save pointer to run in s0, will be saved by the compiler */ | ||
568 | uasm_i_move(&p, S0, A0); | ||
569 | 635 | ||
570 | /* | 636 | /* |
571 | * Save Host level EPC, BadVaddr and Cause to VCPU, useful to process | 637 | * Save Host level EPC, BadVaddr and Cause to VCPU, useful to process |
@@ -641,6 +707,52 @@ void *kvm_mips_build_exit(void *addr) | |||
641 | uasm_l_msa_1(&l, p); | 707 | uasm_l_msa_1(&l, p); |
642 | } | 708 | } |
643 | 709 | ||
710 | #ifdef CONFIG_KVM_MIPS_VZ | ||
711 | /* Restore host ASID */ | ||
712 | if (!cpu_has_guestid) { | ||
713 | UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, host_entryhi), | ||
714 | K1); | ||
715 | UASM_i_MTC0(&p, K0, C0_ENTRYHI); | ||
716 | } | ||
717 | |||
718 | /* | ||
719 | * Set up normal Linux process pgd. | ||
720 | * This does roughly the same as TLBMISS_HANDLER_SETUP_PGD(): | ||
721 | * - call tlbmiss_handler_setup_pgd(mm->pgd) | ||
722 | * - write mm->pgd into CP0_PWBase | ||
723 | */ | ||
724 | UASM_i_LW(&p, A0, | ||
725 | offsetof(struct kvm_vcpu_arch, host_pgd), K1); | ||
726 | UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd); | ||
727 | uasm_i_jalr(&p, RA, T9); | ||
728 | /* delay slot */ | ||
729 | if (cpu_has_htw) | ||
730 | UASM_i_MTC0(&p, A0, C0_PWBASE); | ||
731 | else | ||
732 | uasm_i_nop(&p); | ||
733 | |||
734 | /* Clear GM bit so we don't enter guest mode when EXL is cleared */ | ||
735 | uasm_i_mfc0(&p, K0, C0_GUESTCTL0); | ||
736 | uasm_i_ins(&p, K0, ZERO, MIPS_GCTL0_GM_SHIFT, 1); | ||
737 | uasm_i_mtc0(&p, K0, C0_GUESTCTL0); | ||
738 | |||
739 | /* Save GuestCtl0 so we can access GExcCode after CPU migration */ | ||
740 | uasm_i_sw(&p, K0, | ||
741 | offsetof(struct kvm_vcpu_arch, host_cp0_guestctl0), K1); | ||
742 | |||
743 | if (cpu_has_guestid) { | ||
744 | /* | ||
745 | * Clear root mode GuestID, so that root TLB operations use the | ||
746 | * root GuestID in the root TLB. | ||
747 | */ | ||
748 | uasm_i_mfc0(&p, T0, C0_GUESTCTL1); | ||
749 | /* Set GuestCtl1.RID = MIPS_GCTL1_ROOT_GUESTID (i.e. 0) */ | ||
750 | uasm_i_ins(&p, T0, ZERO, MIPS_GCTL1_RID_SHIFT, | ||
751 | MIPS_GCTL1_RID_WIDTH); | ||
752 | uasm_i_mtc0(&p, T0, C0_GUESTCTL1); | ||
753 | } | ||
754 | #endif | ||
755 | |||
644 | /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */ | 756 | /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */ |
645 | uasm_i_addiu(&p, AT, ZERO, ~(ST0_EXL | KSU_USER | ST0_IE)); | 757 | uasm_i_addiu(&p, AT, ZERO, ~(ST0_EXL | KSU_USER | ST0_IE)); |
646 | uasm_i_and(&p, V0, V0, AT); | 758 | uasm_i_and(&p, V0, V0, AT); |
@@ -680,6 +792,8 @@ void *kvm_mips_build_exit(void *addr) | |||
680 | * Now jump to the kvm_mips_handle_exit() to see if we can deal | 792 | * Now jump to the kvm_mips_handle_exit() to see if we can deal |
681 | * with this in the kernel | 793 | * with this in the kernel |
682 | */ | 794 | */ |
795 | uasm_i_move(&p, A0, S0); | ||
796 | uasm_i_move(&p, A1, S1); | ||
683 | UASM_i_LA(&p, T9, (unsigned long)kvm_mips_handle_exit); | 797 | UASM_i_LA(&p, T9, (unsigned long)kvm_mips_handle_exit); |
684 | uasm_i_jalr(&p, RA, T9); | 798 | uasm_i_jalr(&p, RA, T9); |
685 | UASM_i_ADDIU(&p, SP, SP, -CALLFRAME_SIZ); | 799 | UASM_i_ADDIU(&p, SP, SP, -CALLFRAME_SIZ); |
diff --git a/arch/mips/kvm/hypcall.c b/arch/mips/kvm/hypcall.c new file mode 100644 index 000000000000..83063435195f --- /dev/null +++ b/arch/mips/kvm/hypcall.c | |||
@@ -0,0 +1,53 @@ | |||
1 | /* | ||
2 | * This file is subject to the terms and conditions of the GNU General Public | ||
3 | * License. See the file "COPYING" in the main directory of this archive | ||
4 | * for more details. | ||
5 | * | ||
6 | * KVM/MIPS: Hypercall handling. | ||
7 | * | ||
8 | * Copyright (C) 2015 Imagination Technologies Ltd. | ||
9 | */ | ||
10 | |||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/kvm_host.h> | ||
13 | #include <linux/kvm_para.h> | ||
14 | |||
15 | #define MAX_HYPCALL_ARGS 4 | ||
16 | |||
17 | enum emulation_result kvm_mips_emul_hypcall(struct kvm_vcpu *vcpu, | ||
18 | union mips_instruction inst) | ||
19 | { | ||
20 | unsigned int code = (inst.co_format.code >> 5) & 0x3ff; | ||
21 | |||
22 | kvm_debug("[%#lx] HYPCALL %#03x\n", vcpu->arch.pc, code); | ||
23 | |||
24 | switch (code) { | ||
25 | case 0: | ||
26 | return EMULATE_HYPERCALL; | ||
27 | default: | ||
28 | return EMULATE_FAIL; | ||
29 | }; | ||
30 | } | ||
31 | |||
32 | static int kvm_mips_hypercall(struct kvm_vcpu *vcpu, unsigned long num, | ||
33 | const unsigned long *args, unsigned long *hret) | ||
34 | { | ||
35 | /* Report unimplemented hypercall to guest */ | ||
36 | *hret = -KVM_ENOSYS; | ||
37 | return RESUME_GUEST; | ||
38 | } | ||
39 | |||
40 | int kvm_mips_handle_hypcall(struct kvm_vcpu *vcpu) | ||
41 | { | ||
42 | unsigned long num, args[MAX_HYPCALL_ARGS]; | ||
43 | |||
44 | /* read hypcall number and arguments */ | ||
45 | num = vcpu->arch.gprs[2]; /* v0 */ | ||
46 | args[0] = vcpu->arch.gprs[4]; /* a0 */ | ||
47 | args[1] = vcpu->arch.gprs[5]; /* a1 */ | ||
48 | args[2] = vcpu->arch.gprs[6]; /* a2 */ | ||
49 | args[3] = vcpu->arch.gprs[7]; /* a3 */ | ||
50 | |||
51 | return kvm_mips_hypercall(vcpu, num, | ||
52 | args, &vcpu->arch.gprs[2] /* v0 */); | ||
53 | } | ||
diff --git a/arch/mips/kvm/interrupt.h b/arch/mips/kvm/interrupt.h index fb118a2c8379..3bf0a49725e8 100644 --- a/arch/mips/kvm/interrupt.h +++ b/arch/mips/kvm/interrupt.h | |||
@@ -30,8 +30,13 @@ | |||
30 | 30 | ||
31 | #define C_TI (_ULCAST_(1) << 30) | 31 | #define C_TI (_ULCAST_(1) << 30) |
32 | 32 | ||
33 | #ifdef CONFIG_KVM_MIPS_VZ | ||
34 | #define KVM_MIPS_IRQ_DELIVER_ALL_AT_ONCE (1) | ||
35 | #define KVM_MIPS_IRQ_CLEAR_ALL_AT_ONCE (1) | ||
36 | #else | ||
33 | #define KVM_MIPS_IRQ_DELIVER_ALL_AT_ONCE (0) | 37 | #define KVM_MIPS_IRQ_DELIVER_ALL_AT_ONCE (0) |
34 | #define KVM_MIPS_IRQ_CLEAR_ALL_AT_ONCE (0) | 38 | #define KVM_MIPS_IRQ_CLEAR_ALL_AT_ONCE (0) |
39 | #endif | ||
35 | 40 | ||
36 | void kvm_mips_queue_irq(struct kvm_vcpu *vcpu, unsigned int priority); | 41 | void kvm_mips_queue_irq(struct kvm_vcpu *vcpu, unsigned int priority); |
37 | void kvm_mips_dequeue_irq(struct kvm_vcpu *vcpu, unsigned int priority); | 42 | void kvm_mips_dequeue_irq(struct kvm_vcpu *vcpu, unsigned int priority); |
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 15a1b1716c2e..d4b2ad18eef2 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c | |||
@@ -59,6 +59,16 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
59 | { "fpe", VCPU_STAT(fpe_exits), KVM_STAT_VCPU }, | 59 | { "fpe", VCPU_STAT(fpe_exits), KVM_STAT_VCPU }, |
60 | { "msa_disabled", VCPU_STAT(msa_disabled_exits), KVM_STAT_VCPU }, | 60 | { "msa_disabled", VCPU_STAT(msa_disabled_exits), KVM_STAT_VCPU }, |
61 | { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU }, | 61 | { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU }, |
62 | #ifdef CONFIG_KVM_MIPS_VZ | ||
63 | { "vz_gpsi", VCPU_STAT(vz_gpsi_exits), KVM_STAT_VCPU }, | ||
64 | { "vz_gsfc", VCPU_STAT(vz_gsfc_exits), KVM_STAT_VCPU }, | ||
65 | { "vz_hc", VCPU_STAT(vz_hc_exits), KVM_STAT_VCPU }, | ||
66 | { "vz_grr", VCPU_STAT(vz_grr_exits), KVM_STAT_VCPU }, | ||
67 | { "vz_gva", VCPU_STAT(vz_gva_exits), KVM_STAT_VCPU }, | ||
68 | { "vz_ghfc", VCPU_STAT(vz_ghfc_exits), KVM_STAT_VCPU }, | ||
69 | { "vz_gpa", VCPU_STAT(vz_gpa_exits), KVM_STAT_VCPU }, | ||
70 | { "vz_resvd", VCPU_STAT(vz_resvd_exits), KVM_STAT_VCPU }, | ||
71 | #endif | ||
62 | { "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU }, | 72 | { "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU }, |
63 | { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), KVM_STAT_VCPU }, | 73 | { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), KVM_STAT_VCPU }, |
64 | { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid), KVM_STAT_VCPU }, | 74 | { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid), KVM_STAT_VCPU }, |
@@ -66,6 +76,19 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
66 | {NULL} | 76 | {NULL} |
67 | }; | 77 | }; |
68 | 78 | ||
79 | bool kvm_trace_guest_mode_change; | ||
80 | |||
81 | int kvm_guest_mode_change_trace_reg(void) | ||
82 | { | ||
83 | kvm_trace_guest_mode_change = 1; | ||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | void kvm_guest_mode_change_trace_unreg(void) | ||
88 | { | ||
89 | kvm_trace_guest_mode_change = 0; | ||
90 | } | ||
91 | |||
69 | /* | 92 | /* |
70 | * XXXKYMA: We are simulatoring a processor that has the WII bit set in | 93 | * XXXKYMA: We are simulatoring a processor that has the WII bit set in |
71 | * Config7, so we are "runnable" if interrupts are pending | 94 | * Config7, so we are "runnable" if interrupts are pending |
@@ -82,7 +105,12 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) | |||
82 | 105 | ||
83 | int kvm_arch_hardware_enable(void) | 106 | int kvm_arch_hardware_enable(void) |
84 | { | 107 | { |
85 | return 0; | 108 | return kvm_mips_callbacks->hardware_enable(); |
109 | } | ||
110 | |||
111 | void kvm_arch_hardware_disable(void) | ||
112 | { | ||
113 | kvm_mips_callbacks->hardware_disable(); | ||
86 | } | 114 | } |
87 | 115 | ||
88 | int kvm_arch_hardware_setup(void) | 116 | int kvm_arch_hardware_setup(void) |
@@ -97,6 +125,18 @@ void kvm_arch_check_processor_compat(void *rtn) | |||
97 | 125 | ||
98 | int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | 126 | int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) |
99 | { | 127 | { |
128 | switch (type) { | ||
129 | #ifdef CONFIG_KVM_MIPS_VZ | ||
130 | case KVM_VM_MIPS_VZ: | ||
131 | #else | ||
132 | case KVM_VM_MIPS_TE: | ||
133 | #endif | ||
134 | break; | ||
135 | default: | ||
136 | /* Unsupported KVM type */ | ||
137 | return -EINVAL; | ||
138 | }; | ||
139 | |||
100 | /* Allocate page table to map GPA -> RPA */ | 140 | /* Allocate page table to map GPA -> RPA */ |
101 | kvm->arch.gpa_mm.pgd = kvm_pgd_alloc(); | 141 | kvm->arch.gpa_mm.pgd = kvm_pgd_alloc(); |
102 | if (!kvm->arch.gpa_mm.pgd) | 142 | if (!kvm->arch.gpa_mm.pgd) |
@@ -301,8 +341,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) | |||
301 | /* Build guest exception vectors dynamically in unmapped memory */ | 341 | /* Build guest exception vectors dynamically in unmapped memory */ |
302 | handler = gebase + 0x2000; | 342 | handler = gebase + 0x2000; |
303 | 343 | ||
304 | /* TLB refill */ | 344 | /* TLB refill (or XTLB refill on 64-bit VZ where KX=1) */ |
305 | refill_start = gebase; | 345 | refill_start = gebase; |
346 | if (IS_ENABLED(CONFIG_KVM_MIPS_VZ) && IS_ENABLED(CONFIG_64BIT)) | ||
347 | refill_start += 0x080; | ||
306 | refill_end = kvm_mips_build_tlb_refill_exception(refill_start, handler); | 348 | refill_end = kvm_mips_build_tlb_refill_exception(refill_start, handler); |
307 | 349 | ||
308 | /* General Exception Entry point */ | 350 | /* General Exception Entry point */ |
@@ -353,9 +395,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) | |||
353 | 395 | ||
354 | /* Init */ | 396 | /* Init */ |
355 | vcpu->arch.last_sched_cpu = -1; | 397 | vcpu->arch.last_sched_cpu = -1; |
356 | 398 | vcpu->arch.last_exec_cpu = -1; | |
357 | /* Start off the timer */ | ||
358 | kvm_mips_init_count(vcpu); | ||
359 | 399 | ||
360 | return vcpu; | 400 | return vcpu; |
361 | 401 | ||
@@ -1030,9 +1070,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
1030 | case KVM_CAP_IMMEDIATE_EXIT: | 1070 | case KVM_CAP_IMMEDIATE_EXIT: |
1031 | r = 1; | 1071 | r = 1; |
1032 | break; | 1072 | break; |
1033 | case KVM_CAP_COALESCED_MMIO: | ||
1034 | r = KVM_COALESCED_MMIO_PAGE_OFFSET; | ||
1035 | break; | ||
1036 | case KVM_CAP_NR_VCPUS: | 1073 | case KVM_CAP_NR_VCPUS: |
1037 | r = num_online_cpus(); | 1074 | r = num_online_cpus(); |
1038 | break; | 1075 | break; |
@@ -1059,7 +1096,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
1059 | r = cpu_has_msa && !(boot_cpu_data.msa_id & MSA_IR_WRPF); | 1096 | r = cpu_has_msa && !(boot_cpu_data.msa_id & MSA_IR_WRPF); |
1060 | break; | 1097 | break; |
1061 | default: | 1098 | default: |
1062 | r = 0; | 1099 | r = kvm_mips_callbacks->check_extension(kvm, ext); |
1063 | break; | 1100 | break; |
1064 | } | 1101 | } |
1065 | return r; | 1102 | return r; |
@@ -1067,7 +1104,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
1067 | 1104 | ||
1068 | int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | 1105 | int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) |
1069 | { | 1106 | { |
1070 | return kvm_mips_pending_timer(vcpu); | 1107 | return kvm_mips_pending_timer(vcpu) || |
1108 | kvm_read_c0_guest_cause(vcpu->arch.cop0) & C_TI; | ||
1071 | } | 1109 | } |
1072 | 1110 | ||
1073 | int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu) | 1111 | int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu) |
@@ -1092,7 +1130,7 @@ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu) | |||
1092 | kvm_debug("\tlo: 0x%08lx\n", vcpu->arch.lo); | 1130 | kvm_debug("\tlo: 0x%08lx\n", vcpu->arch.lo); |
1093 | 1131 | ||
1094 | cop0 = vcpu->arch.cop0; | 1132 | cop0 = vcpu->arch.cop0; |
1095 | kvm_debug("\tStatus: 0x%08lx, Cause: 0x%08lx\n", | 1133 | kvm_debug("\tStatus: 0x%08x, Cause: 0x%08x\n", |
1096 | kvm_read_c0_guest_status(cop0), | 1134 | kvm_read_c0_guest_status(cop0), |
1097 | kvm_read_c0_guest_cause(cop0)); | 1135 | kvm_read_c0_guest_cause(cop0)); |
1098 | 1136 | ||
@@ -1208,7 +1246,8 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
1208 | vcpu->mode = OUTSIDE_GUEST_MODE; | 1246 | vcpu->mode = OUTSIDE_GUEST_MODE; |
1209 | 1247 | ||
1210 | /* re-enable HTW before enabling interrupts */ | 1248 | /* re-enable HTW before enabling interrupts */ |
1211 | htw_start(); | 1249 | if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ)) |
1250 | htw_start(); | ||
1212 | 1251 | ||
1213 | /* Set a default exit reason */ | 1252 | /* Set a default exit reason */ |
1214 | run->exit_reason = KVM_EXIT_UNKNOWN; | 1253 | run->exit_reason = KVM_EXIT_UNKNOWN; |
@@ -1226,17 +1265,20 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
1226 | cause, opc, run, vcpu); | 1265 | cause, opc, run, vcpu); |
1227 | trace_kvm_exit(vcpu, exccode); | 1266 | trace_kvm_exit(vcpu, exccode); |
1228 | 1267 | ||
1229 | /* | 1268 | if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ)) { |
1230 | * Do a privilege check, if in UM most of these exit conditions end up | 1269 | /* |
1231 | * causing an exception to be delivered to the Guest Kernel | 1270 | * Do a privilege check, if in UM most of these exit conditions |
1232 | */ | 1271 | * end up causing an exception to be delivered to the Guest |
1233 | er = kvm_mips_check_privilege(cause, opc, run, vcpu); | 1272 | * Kernel |
1234 | if (er == EMULATE_PRIV_FAIL) { | 1273 | */ |
1235 | goto skip_emul; | 1274 | er = kvm_mips_check_privilege(cause, opc, run, vcpu); |
1236 | } else if (er == EMULATE_FAIL) { | 1275 | if (er == EMULATE_PRIV_FAIL) { |
1237 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 1276 | goto skip_emul; |
1238 | ret = RESUME_HOST; | 1277 | } else if (er == EMULATE_FAIL) { |
1239 | goto skip_emul; | 1278 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
1279 | ret = RESUME_HOST; | ||
1280 | goto skip_emul; | ||
1281 | } | ||
1240 | } | 1282 | } |
1241 | 1283 | ||
1242 | switch (exccode) { | 1284 | switch (exccode) { |
@@ -1267,7 +1309,7 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
1267 | break; | 1309 | break; |
1268 | 1310 | ||
1269 | case EXCCODE_TLBS: | 1311 | case EXCCODE_TLBS: |
1270 | kvm_debug("TLB ST fault: cause %#x, status %#lx, PC: %p, BadVaddr: %#lx\n", | 1312 | kvm_debug("TLB ST fault: cause %#x, status %#x, PC: %p, BadVaddr: %#lx\n", |
1271 | cause, kvm_read_c0_guest_status(vcpu->arch.cop0), opc, | 1313 | cause, kvm_read_c0_guest_status(vcpu->arch.cop0), opc, |
1272 | badvaddr); | 1314 | badvaddr); |
1273 | 1315 | ||
@@ -1328,12 +1370,17 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
1328 | ret = kvm_mips_callbacks->handle_msa_disabled(vcpu); | 1370 | ret = kvm_mips_callbacks->handle_msa_disabled(vcpu); |
1329 | break; | 1371 | break; |
1330 | 1372 | ||
1373 | case EXCCODE_GE: | ||
1374 | /* defer exit accounting to handler */ | ||
1375 | ret = kvm_mips_callbacks->handle_guest_exit(vcpu); | ||
1376 | break; | ||
1377 | |||
1331 | default: | 1378 | default: |
1332 | if (cause & CAUSEF_BD) | 1379 | if (cause & CAUSEF_BD) |
1333 | opc += 1; | 1380 | opc += 1; |
1334 | inst = 0; | 1381 | inst = 0; |
1335 | kvm_get_badinstr(opc, vcpu, &inst); | 1382 | kvm_get_badinstr(opc, vcpu, &inst); |
1336 | kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#lx\n", | 1383 | kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#x\n", |
1337 | exccode, opc, inst, badvaddr, | 1384 | exccode, opc, inst, badvaddr, |
1338 | kvm_read_c0_guest_status(vcpu->arch.cop0)); | 1385 | kvm_read_c0_guest_status(vcpu->arch.cop0)); |
1339 | kvm_arch_vcpu_dump_regs(vcpu); | 1386 | kvm_arch_vcpu_dump_regs(vcpu); |
@@ -1346,6 +1393,9 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
1346 | skip_emul: | 1393 | skip_emul: |
1347 | local_irq_disable(); | 1394 | local_irq_disable(); |
1348 | 1395 | ||
1396 | if (ret == RESUME_GUEST) | ||
1397 | kvm_vz_acquire_htimer(vcpu); | ||
1398 | |||
1349 | if (er == EMULATE_DONE && !(ret & RESUME_HOST)) | 1399 | if (er == EMULATE_DONE && !(ret & RESUME_HOST)) |
1350 | kvm_mips_deliver_interrupts(vcpu, cause); | 1400 | kvm_mips_deliver_interrupts(vcpu, cause); |
1351 | 1401 | ||
@@ -1391,7 +1441,8 @@ skip_emul: | |||
1391 | } | 1441 | } |
1392 | 1442 | ||
1393 | /* Disable HTW before returning to guest or host */ | 1443 | /* Disable HTW before returning to guest or host */ |
1394 | htw_stop(); | 1444 | if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ)) |
1445 | htw_stop(); | ||
1395 | 1446 | ||
1396 | return ret; | 1447 | return ret; |
1397 | } | 1448 | } |
@@ -1527,16 +1578,18 @@ void kvm_drop_fpu(struct kvm_vcpu *vcpu) | |||
1527 | void kvm_lose_fpu(struct kvm_vcpu *vcpu) | 1578 | void kvm_lose_fpu(struct kvm_vcpu *vcpu) |
1528 | { | 1579 | { |
1529 | /* | 1580 | /* |
1530 | * FPU & MSA get disabled in root context (hardware) when it is disabled | 1581 | * With T&E, FPU & MSA get disabled in root context (hardware) when it |
1531 | * in guest context (software), but the register state in the hardware | 1582 | * is disabled in guest context (software), but the register state in |
1532 | * may still be in use. This is why we explicitly re-enable the hardware | 1583 | * the hardware may still be in use. |
1533 | * before saving. | 1584 | * This is why we explicitly re-enable the hardware before saving. |
1534 | */ | 1585 | */ |
1535 | 1586 | ||
1536 | preempt_disable(); | 1587 | preempt_disable(); |
1537 | if (cpu_has_msa && vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) { | 1588 | if (cpu_has_msa && vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) { |
1538 | set_c0_config5(MIPS_CONF5_MSAEN); | 1589 | if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ)) { |
1539 | enable_fpu_hazard(); | 1590 | set_c0_config5(MIPS_CONF5_MSAEN); |
1591 | enable_fpu_hazard(); | ||
1592 | } | ||
1540 | 1593 | ||
1541 | __kvm_save_msa(&vcpu->arch); | 1594 | __kvm_save_msa(&vcpu->arch); |
1542 | trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_FPU_MSA); | 1595 | trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_FPU_MSA); |
@@ -1549,8 +1602,10 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu) | |||
1549 | } | 1602 | } |
1550 | vcpu->arch.aux_inuse &= ~(KVM_MIPS_AUX_FPU | KVM_MIPS_AUX_MSA); | 1603 | vcpu->arch.aux_inuse &= ~(KVM_MIPS_AUX_FPU | KVM_MIPS_AUX_MSA); |
1551 | } else if (vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) { | 1604 | } else if (vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) { |
1552 | set_c0_status(ST0_CU1); | 1605 | if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ)) { |
1553 | enable_fpu_hazard(); | 1606 | set_c0_status(ST0_CU1); |
1607 | enable_fpu_hazard(); | ||
1608 | } | ||
1554 | 1609 | ||
1555 | __kvm_save_fpu(&vcpu->arch); | 1610 | __kvm_save_fpu(&vcpu->arch); |
1556 | vcpu->arch.aux_inuse &= ~KVM_MIPS_AUX_FPU; | 1611 | vcpu->arch.aux_inuse &= ~KVM_MIPS_AUX_FPU; |
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index cb0faade311e..ee64db032793 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c | |||
@@ -992,6 +992,22 @@ static pte_t kvm_mips_gpa_pte_to_gva_mapped(pte_t pte, long entrylo) | |||
992 | return kvm_mips_gpa_pte_to_gva_unmapped(pte); | 992 | return kvm_mips_gpa_pte_to_gva_unmapped(pte); |
993 | } | 993 | } |
994 | 994 | ||
995 | #ifdef CONFIG_KVM_MIPS_VZ | ||
996 | int kvm_mips_handle_vz_root_tlb_fault(unsigned long badvaddr, | ||
997 | struct kvm_vcpu *vcpu, | ||
998 | bool write_fault) | ||
999 | { | ||
1000 | int ret; | ||
1001 | |||
1002 | ret = kvm_mips_map_page(vcpu, badvaddr, write_fault, NULL, NULL); | ||
1003 | if (ret) | ||
1004 | return ret; | ||
1005 | |||
1006 | /* Invalidate this entry in the TLB */ | ||
1007 | return kvm_vz_host_tlb_inv(vcpu, badvaddr); | ||
1008 | } | ||
1009 | #endif | ||
1010 | |||
995 | /* XXXKYMA: Must be called with interrupts disabled */ | 1011 | /* XXXKYMA: Must be called with interrupts disabled */ |
996 | int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr, | 1012 | int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr, |
997 | struct kvm_vcpu *vcpu, | 1013 | struct kvm_vcpu *vcpu, |
@@ -1225,6 +1241,10 @@ int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out) | |||
1225 | { | 1241 | { |
1226 | int err; | 1242 | int err; |
1227 | 1243 | ||
1244 | if (WARN(IS_ENABLED(CONFIG_KVM_MIPS_VZ), | ||
1245 | "Expect BadInstr/BadInstrP registers to be used with VZ\n")) | ||
1246 | return -EINVAL; | ||
1247 | |||
1228 | retry: | 1248 | retry: |
1229 | kvm_trap_emul_gva_lockless_begin(vcpu); | 1249 | kvm_trap_emul_gva_lockless_begin(vcpu); |
1230 | err = get_user(*out, opc); | 1250 | err = get_user(*out, opc); |
diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c index 2819eb793345..7c6336dd2638 100644 --- a/arch/mips/kvm/tlb.c +++ b/arch/mips/kvm/tlb.c | |||
@@ -33,6 +33,25 @@ | |||
33 | #define KVM_GUEST_PC_TLB 0 | 33 | #define KVM_GUEST_PC_TLB 0 |
34 | #define KVM_GUEST_SP_TLB 1 | 34 | #define KVM_GUEST_SP_TLB 1 |
35 | 35 | ||
36 | #ifdef CONFIG_KVM_MIPS_VZ | ||
37 | unsigned long GUESTID_MASK; | ||
38 | EXPORT_SYMBOL_GPL(GUESTID_MASK); | ||
39 | unsigned long GUESTID_FIRST_VERSION; | ||
40 | EXPORT_SYMBOL_GPL(GUESTID_FIRST_VERSION); | ||
41 | unsigned long GUESTID_VERSION_MASK; | ||
42 | EXPORT_SYMBOL_GPL(GUESTID_VERSION_MASK); | ||
43 | |||
44 | static u32 kvm_mips_get_root_asid(struct kvm_vcpu *vcpu) | ||
45 | { | ||
46 | struct mm_struct *gpa_mm = &vcpu->kvm->arch.gpa_mm; | ||
47 | |||
48 | if (cpu_has_guestid) | ||
49 | return 0; | ||
50 | else | ||
51 | return cpu_asid(smp_processor_id(), gpa_mm); | ||
52 | } | ||
53 | #endif | ||
54 | |||
36 | static u32 kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu) | 55 | static u32 kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu) |
37 | { | 56 | { |
38 | struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; | 57 | struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; |
@@ -166,6 +185,13 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va, | |||
166 | 185 | ||
167 | local_irq_restore(flags); | 186 | local_irq_restore(flags); |
168 | 187 | ||
188 | /* | ||
189 | * We don't want to get reserved instruction exceptions for missing tlb | ||
190 | * entries. | ||
191 | */ | ||
192 | if (cpu_has_vtag_icache) | ||
193 | flush_icache_all(); | ||
194 | |||
169 | if (user && idx_user >= 0) | 195 | if (user && idx_user >= 0) |
170 | kvm_debug("%s: Invalidated guest user entryhi %#lx @ idx %d\n", | 196 | kvm_debug("%s: Invalidated guest user entryhi %#lx @ idx %d\n", |
171 | __func__, (va & VPN2_MASK) | | 197 | __func__, (va & VPN2_MASK) | |
@@ -179,6 +205,421 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va, | |||
179 | } | 205 | } |
180 | EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_inv); | 206 | EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_inv); |
181 | 207 | ||
208 | #ifdef CONFIG_KVM_MIPS_VZ | ||
209 | |||
210 | /* GuestID management */ | ||
211 | |||
212 | /** | ||
213 | * clear_root_gid() - Set GuestCtl1.RID for normal root operation. | ||
214 | */ | ||
215 | static inline void clear_root_gid(void) | ||
216 | { | ||
217 | if (cpu_has_guestid) { | ||
218 | clear_c0_guestctl1(MIPS_GCTL1_RID); | ||
219 | mtc0_tlbw_hazard(); | ||
220 | } | ||
221 | } | ||
222 | |||
223 | /** | ||
224 | * set_root_gid_to_guest_gid() - Set GuestCtl1.RID to match GuestCtl1.ID. | ||
225 | * | ||
226 | * Sets the root GuestID to match the current guest GuestID, for TLB operation | ||
227 | * on the GPA->RPA mappings in the root TLB. | ||
228 | * | ||
229 | * The caller must be sure to disable HTW while the root GID is set, and | ||
230 | * possibly longer if TLB registers are modified. | ||
231 | */ | ||
232 | static inline void set_root_gid_to_guest_gid(void) | ||
233 | { | ||
234 | unsigned int guestctl1; | ||
235 | |||
236 | if (cpu_has_guestid) { | ||
237 | back_to_back_c0_hazard(); | ||
238 | guestctl1 = read_c0_guestctl1(); | ||
239 | guestctl1 = (guestctl1 & ~MIPS_GCTL1_RID) | | ||
240 | ((guestctl1 & MIPS_GCTL1_ID) >> MIPS_GCTL1_ID_SHIFT) | ||
241 | << MIPS_GCTL1_RID_SHIFT; | ||
242 | write_c0_guestctl1(guestctl1); | ||
243 | mtc0_tlbw_hazard(); | ||
244 | } | ||
245 | } | ||
246 | |||
247 | int kvm_vz_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va) | ||
248 | { | ||
249 | int idx; | ||
250 | unsigned long flags, old_entryhi; | ||
251 | |||
252 | local_irq_save(flags); | ||
253 | htw_stop(); | ||
254 | |||
255 | /* Set root GuestID for root probe and write of guest TLB entry */ | ||
256 | set_root_gid_to_guest_gid(); | ||
257 | |||
258 | old_entryhi = read_c0_entryhi(); | ||
259 | |||
260 | idx = _kvm_mips_host_tlb_inv((va & VPN2_MASK) | | ||
261 | kvm_mips_get_root_asid(vcpu)); | ||
262 | |||
263 | write_c0_entryhi(old_entryhi); | ||
264 | clear_root_gid(); | ||
265 | mtc0_tlbw_hazard(); | ||
266 | |||
267 | htw_start(); | ||
268 | local_irq_restore(flags); | ||
269 | |||
270 | /* | ||
271 | * We don't want to get reserved instruction exceptions for missing tlb | ||
272 | * entries. | ||
273 | */ | ||
274 | if (cpu_has_vtag_icache) | ||
275 | flush_icache_all(); | ||
276 | |||
277 | if (idx > 0) | ||
278 | kvm_debug("%s: Invalidated root entryhi %#lx @ idx %d\n", | ||
279 | __func__, (va & VPN2_MASK) | | ||
280 | kvm_mips_get_root_asid(vcpu), idx); | ||
281 | |||
282 | return 0; | ||
283 | } | ||
284 | EXPORT_SYMBOL_GPL(kvm_vz_host_tlb_inv); | ||
285 | |||
286 | /** | ||
287 | * kvm_vz_guest_tlb_lookup() - Lookup a guest VZ TLB mapping. | ||
288 | * @vcpu: KVM VCPU pointer. | ||
289 | * @gpa: Guest virtual address in a TLB mapped guest segment. | ||
290 | * @gpa: Ponter to output guest physical address it maps to. | ||
291 | * | ||
292 | * Converts a guest virtual address in a guest TLB mapped segment to a guest | ||
293 | * physical address, by probing the guest TLB. | ||
294 | * | ||
295 | * Returns: 0 if guest TLB mapping exists for @gva. *@gpa will have been | ||
296 | * written. | ||
297 | * -EFAULT if no guest TLB mapping exists for @gva. *@gpa may not | ||
298 | * have been written. | ||
299 | */ | ||
300 | int kvm_vz_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long gva, | ||
301 | unsigned long *gpa) | ||
302 | { | ||
303 | unsigned long o_entryhi, o_entrylo[2], o_pagemask; | ||
304 | unsigned int o_index; | ||
305 | unsigned long entrylo[2], pagemask, pagemaskbit, pa; | ||
306 | unsigned long flags; | ||
307 | int index; | ||
308 | |||
309 | /* Probe the guest TLB for a mapping */ | ||
310 | local_irq_save(flags); | ||
311 | /* Set root GuestID for root probe of guest TLB entry */ | ||
312 | htw_stop(); | ||
313 | set_root_gid_to_guest_gid(); | ||
314 | |||
315 | o_entryhi = read_gc0_entryhi(); | ||
316 | o_index = read_gc0_index(); | ||
317 | |||
318 | write_gc0_entryhi((o_entryhi & 0x3ff) | (gva & ~0xfffl)); | ||
319 | mtc0_tlbw_hazard(); | ||
320 | guest_tlb_probe(); | ||
321 | tlb_probe_hazard(); | ||
322 | |||
323 | index = read_gc0_index(); | ||
324 | if (index < 0) { | ||
325 | /* No match, fail */ | ||
326 | write_gc0_entryhi(o_entryhi); | ||
327 | write_gc0_index(o_index); | ||
328 | |||
329 | clear_root_gid(); | ||
330 | htw_start(); | ||
331 | local_irq_restore(flags); | ||
332 | return -EFAULT; | ||
333 | } | ||
334 | |||
335 | /* Match! read the TLB entry */ | ||
336 | o_entrylo[0] = read_gc0_entrylo0(); | ||
337 | o_entrylo[1] = read_gc0_entrylo1(); | ||
338 | o_pagemask = read_gc0_pagemask(); | ||
339 | |||
340 | mtc0_tlbr_hazard(); | ||
341 | guest_tlb_read(); | ||
342 | tlb_read_hazard(); | ||
343 | |||
344 | entrylo[0] = read_gc0_entrylo0(); | ||
345 | entrylo[1] = read_gc0_entrylo1(); | ||
346 | pagemask = ~read_gc0_pagemask() & ~0x1fffl; | ||
347 | |||
348 | write_gc0_entryhi(o_entryhi); | ||
349 | write_gc0_index(o_index); | ||
350 | write_gc0_entrylo0(o_entrylo[0]); | ||
351 | write_gc0_entrylo1(o_entrylo[1]); | ||
352 | write_gc0_pagemask(o_pagemask); | ||
353 | |||
354 | clear_root_gid(); | ||
355 | htw_start(); | ||
356 | local_irq_restore(flags); | ||
357 | |||
358 | /* Select one of the EntryLo values and interpret the GPA */ | ||
359 | pagemaskbit = (pagemask ^ (pagemask & (pagemask - 1))) >> 1; | ||
360 | pa = entrylo[!!(gva & pagemaskbit)]; | ||
361 | |||
362 | /* | ||
363 | * TLB entry may have become invalid since TLB probe if physical FTLB | ||
364 | * entries are shared between threads (e.g. I6400). | ||
365 | */ | ||
366 | if (!(pa & ENTRYLO_V)) | ||
367 | return -EFAULT; | ||
368 | |||
369 | /* | ||
370 | * Note, this doesn't take guest MIPS32 XPA into account, where PFN is | ||
371 | * split with XI/RI in the middle. | ||
372 | */ | ||
373 | pa = (pa << 6) & ~0xfffl; | ||
374 | pa |= gva & ~(pagemask | pagemaskbit); | ||
375 | |||
376 | *gpa = pa; | ||
377 | return 0; | ||
378 | } | ||
379 | EXPORT_SYMBOL_GPL(kvm_vz_guest_tlb_lookup); | ||
380 | |||
381 | /** | ||
382 | * kvm_vz_local_flush_roottlb_all_guests() - Flush all root TLB entries for | ||
383 | * guests. | ||
384 | * | ||
385 | * Invalidate all entries in root tlb which are GPA mappings. | ||
386 | */ | ||
387 | void kvm_vz_local_flush_roottlb_all_guests(void) | ||
388 | { | ||
389 | unsigned long flags; | ||
390 | unsigned long old_entryhi, old_pagemask, old_guestctl1; | ||
391 | int entry; | ||
392 | |||
393 | if (WARN_ON(!cpu_has_guestid)) | ||
394 | return; | ||
395 | |||
396 | local_irq_save(flags); | ||
397 | htw_stop(); | ||
398 | |||
399 | /* TLBR may clobber EntryHi.ASID, PageMask, and GuestCtl1.RID */ | ||
400 | old_entryhi = read_c0_entryhi(); | ||
401 | old_pagemask = read_c0_pagemask(); | ||
402 | old_guestctl1 = read_c0_guestctl1(); | ||
403 | |||
404 | /* | ||
405 | * Invalidate guest entries in root TLB while leaving root entries | ||
406 | * intact when possible. | ||
407 | */ | ||
408 | for (entry = 0; entry < current_cpu_data.tlbsize; entry++) { | ||
409 | write_c0_index(entry); | ||
410 | mtc0_tlbw_hazard(); | ||
411 | tlb_read(); | ||
412 | tlb_read_hazard(); | ||
413 | |||
414 | /* Don't invalidate non-guest (RVA) mappings in the root TLB */ | ||
415 | if (!(read_c0_guestctl1() & MIPS_GCTL1_RID)) | ||
416 | continue; | ||
417 | |||
418 | /* Make sure all entries differ. */ | ||
419 | write_c0_entryhi(UNIQUE_ENTRYHI(entry)); | ||
420 | write_c0_entrylo0(0); | ||
421 | write_c0_entrylo1(0); | ||
422 | write_c0_guestctl1(0); | ||
423 | mtc0_tlbw_hazard(); | ||
424 | tlb_write_indexed(); | ||
425 | } | ||
426 | |||
427 | write_c0_entryhi(old_entryhi); | ||
428 | write_c0_pagemask(old_pagemask); | ||
429 | write_c0_guestctl1(old_guestctl1); | ||
430 | tlbw_use_hazard(); | ||
431 | |||
432 | htw_start(); | ||
433 | local_irq_restore(flags); | ||
434 | } | ||
435 | EXPORT_SYMBOL_GPL(kvm_vz_local_flush_roottlb_all_guests); | ||
436 | |||
437 | /** | ||
438 | * kvm_vz_local_flush_guesttlb_all() - Flush all guest TLB entries. | ||
439 | * | ||
440 | * Invalidate all entries in guest tlb irrespective of guestid. | ||
441 | */ | ||
442 | void kvm_vz_local_flush_guesttlb_all(void) | ||
443 | { | ||
444 | unsigned long flags; | ||
445 | unsigned long old_index; | ||
446 | unsigned long old_entryhi; | ||
447 | unsigned long old_entrylo[2]; | ||
448 | unsigned long old_pagemask; | ||
449 | int entry; | ||
450 | u64 cvmmemctl2 = 0; | ||
451 | |||
452 | local_irq_save(flags); | ||
453 | |||
454 | /* Preserve all clobbered guest registers */ | ||
455 | old_index = read_gc0_index(); | ||
456 | old_entryhi = read_gc0_entryhi(); | ||
457 | old_entrylo[0] = read_gc0_entrylo0(); | ||
458 | old_entrylo[1] = read_gc0_entrylo1(); | ||
459 | old_pagemask = read_gc0_pagemask(); | ||
460 | |||
461 | switch (current_cpu_type()) { | ||
462 | case CPU_CAVIUM_OCTEON3: | ||
463 | /* Inhibit machine check due to multiple matching TLB entries */ | ||
464 | cvmmemctl2 = read_c0_cvmmemctl2(); | ||
465 | cvmmemctl2 |= CVMMEMCTL2_INHIBITTS; | ||
466 | write_c0_cvmmemctl2(cvmmemctl2); | ||
467 | break; | ||
468 | }; | ||
469 | |||
470 | /* Invalidate guest entries in guest TLB */ | ||
471 | write_gc0_entrylo0(0); | ||
472 | write_gc0_entrylo1(0); | ||
473 | write_gc0_pagemask(0); | ||
474 | for (entry = 0; entry < current_cpu_data.guest.tlbsize; entry++) { | ||
475 | /* Make sure all entries differ. */ | ||
476 | write_gc0_index(entry); | ||
477 | write_gc0_entryhi(UNIQUE_GUEST_ENTRYHI(entry)); | ||
478 | mtc0_tlbw_hazard(); | ||
479 | guest_tlb_write_indexed(); | ||
480 | } | ||
481 | |||
482 | if (cvmmemctl2) { | ||
483 | cvmmemctl2 &= ~CVMMEMCTL2_INHIBITTS; | ||
484 | write_c0_cvmmemctl2(cvmmemctl2); | ||
485 | }; | ||
486 | |||
487 | write_gc0_index(old_index); | ||
488 | write_gc0_entryhi(old_entryhi); | ||
489 | write_gc0_entrylo0(old_entrylo[0]); | ||
490 | write_gc0_entrylo1(old_entrylo[1]); | ||
491 | write_gc0_pagemask(old_pagemask); | ||
492 | tlbw_use_hazard(); | ||
493 | |||
494 | local_irq_restore(flags); | ||
495 | } | ||
496 | EXPORT_SYMBOL_GPL(kvm_vz_local_flush_guesttlb_all); | ||
497 | |||
498 | /** | ||
499 | * kvm_vz_save_guesttlb() - Save a range of guest TLB entries. | ||
500 | * @buf: Buffer to write TLB entries into. | ||
501 | * @index: Start index. | ||
502 | * @count: Number of entries to save. | ||
503 | * | ||
504 | * Save a range of guest TLB entries. The caller must ensure interrupts are | ||
505 | * disabled. | ||
506 | */ | ||
507 | void kvm_vz_save_guesttlb(struct kvm_mips_tlb *buf, unsigned int index, | ||
508 | unsigned int count) | ||
509 | { | ||
510 | unsigned int end = index + count; | ||
511 | unsigned long old_entryhi, old_entrylo0, old_entrylo1, old_pagemask; | ||
512 | unsigned int guestctl1 = 0; | ||
513 | int old_index, i; | ||
514 | |||
515 | /* Save registers we're about to clobber */ | ||
516 | old_index = read_gc0_index(); | ||
517 | old_entryhi = read_gc0_entryhi(); | ||
518 | old_entrylo0 = read_gc0_entrylo0(); | ||
519 | old_entrylo1 = read_gc0_entrylo1(); | ||
520 | old_pagemask = read_gc0_pagemask(); | ||
521 | |||
522 | /* Set root GuestID for root probe */ | ||
523 | htw_stop(); | ||
524 | set_root_gid_to_guest_gid(); | ||
525 | if (cpu_has_guestid) | ||
526 | guestctl1 = read_c0_guestctl1(); | ||
527 | |||
528 | /* Read each entry from guest TLB */ | ||
529 | for (i = index; i < end; ++i, ++buf) { | ||
530 | write_gc0_index(i); | ||
531 | |||
532 | mtc0_tlbr_hazard(); | ||
533 | guest_tlb_read(); | ||
534 | tlb_read_hazard(); | ||
535 | |||
536 | if (cpu_has_guestid && | ||
537 | (read_c0_guestctl1() ^ guestctl1) & MIPS_GCTL1_RID) { | ||
538 | /* Entry invalid or belongs to another guest */ | ||
539 | buf->tlb_hi = UNIQUE_GUEST_ENTRYHI(i); | ||
540 | buf->tlb_lo[0] = 0; | ||
541 | buf->tlb_lo[1] = 0; | ||
542 | buf->tlb_mask = 0; | ||
543 | } else { | ||
544 | /* Entry belongs to the right guest */ | ||
545 | buf->tlb_hi = read_gc0_entryhi(); | ||
546 | buf->tlb_lo[0] = read_gc0_entrylo0(); | ||
547 | buf->tlb_lo[1] = read_gc0_entrylo1(); | ||
548 | buf->tlb_mask = read_gc0_pagemask(); | ||
549 | } | ||
550 | } | ||
551 | |||
552 | /* Clear root GuestID again */ | ||
553 | clear_root_gid(); | ||
554 | htw_start(); | ||
555 | |||
556 | /* Restore clobbered registers */ | ||
557 | write_gc0_index(old_index); | ||
558 | write_gc0_entryhi(old_entryhi); | ||
559 | write_gc0_entrylo0(old_entrylo0); | ||
560 | write_gc0_entrylo1(old_entrylo1); | ||
561 | write_gc0_pagemask(old_pagemask); | ||
562 | |||
563 | tlbw_use_hazard(); | ||
564 | } | ||
565 | EXPORT_SYMBOL_GPL(kvm_vz_save_guesttlb); | ||
566 | |||
567 | /** | ||
568 | * kvm_vz_load_guesttlb() - Save a range of guest TLB entries. | ||
569 | * @buf: Buffer to read TLB entries from. | ||
570 | * @index: Start index. | ||
571 | * @count: Number of entries to load. | ||
572 | * | ||
573 | * Load a range of guest TLB entries. The caller must ensure interrupts are | ||
574 | * disabled. | ||
575 | */ | ||
576 | void kvm_vz_load_guesttlb(const struct kvm_mips_tlb *buf, unsigned int index, | ||
577 | unsigned int count) | ||
578 | { | ||
579 | unsigned int end = index + count; | ||
580 | unsigned long old_entryhi, old_entrylo0, old_entrylo1, old_pagemask; | ||
581 | int old_index, i; | ||
582 | |||
583 | /* Save registers we're about to clobber */ | ||
584 | old_index = read_gc0_index(); | ||
585 | old_entryhi = read_gc0_entryhi(); | ||
586 | old_entrylo0 = read_gc0_entrylo0(); | ||
587 | old_entrylo1 = read_gc0_entrylo1(); | ||
588 | old_pagemask = read_gc0_pagemask(); | ||
589 | |||
590 | /* Set root GuestID for root probe */ | ||
591 | htw_stop(); | ||
592 | set_root_gid_to_guest_gid(); | ||
593 | |||
594 | /* Write each entry to guest TLB */ | ||
595 | for (i = index; i < end; ++i, ++buf) { | ||
596 | write_gc0_index(i); | ||
597 | write_gc0_entryhi(buf->tlb_hi); | ||
598 | write_gc0_entrylo0(buf->tlb_lo[0]); | ||
599 | write_gc0_entrylo1(buf->tlb_lo[1]); | ||
600 | write_gc0_pagemask(buf->tlb_mask); | ||
601 | |||
602 | mtc0_tlbw_hazard(); | ||
603 | guest_tlb_write_indexed(); | ||
604 | } | ||
605 | |||
606 | /* Clear root GuestID again */ | ||
607 | clear_root_gid(); | ||
608 | htw_start(); | ||
609 | |||
610 | /* Restore clobbered registers */ | ||
611 | write_gc0_index(old_index); | ||
612 | write_gc0_entryhi(old_entryhi); | ||
613 | write_gc0_entrylo0(old_entrylo0); | ||
614 | write_gc0_entrylo1(old_entrylo1); | ||
615 | write_gc0_pagemask(old_pagemask); | ||
616 | |||
617 | tlbw_use_hazard(); | ||
618 | } | ||
619 | EXPORT_SYMBOL_GPL(kvm_vz_load_guesttlb); | ||
620 | |||
621 | #endif | ||
622 | |||
182 | /** | 623 | /** |
183 | * kvm_mips_suspend_mm() - Suspend the active mm. | 624 | * kvm_mips_suspend_mm() - Suspend the active mm. |
184 | * @cpu The CPU we're running on. | 625 | * @cpu The CPU we're running on. |
diff --git a/arch/mips/kvm/trace.h b/arch/mips/kvm/trace.h index c858cf168078..a8c7fd7bf6d2 100644 --- a/arch/mips/kvm/trace.h +++ b/arch/mips/kvm/trace.h | |||
@@ -18,6 +18,13 @@ | |||
18 | #define TRACE_INCLUDE_FILE trace | 18 | #define TRACE_INCLUDE_FILE trace |
19 | 19 | ||
20 | /* | 20 | /* |
21 | * arch/mips/kvm/mips.c | ||
22 | */ | ||
23 | extern bool kvm_trace_guest_mode_change; | ||
24 | int kvm_guest_mode_change_trace_reg(void); | ||
25 | void kvm_guest_mode_change_trace_unreg(void); | ||
26 | |||
27 | /* | ||
21 | * Tracepoints for VM enters | 28 | * Tracepoints for VM enters |
22 | */ | 29 | */ |
23 | DECLARE_EVENT_CLASS(kvm_transition, | 30 | DECLARE_EVENT_CLASS(kvm_transition, |
@@ -62,10 +69,20 @@ DEFINE_EVENT(kvm_transition, kvm_out, | |||
62 | #define KVM_TRACE_EXIT_MSA_FPE 14 | 69 | #define KVM_TRACE_EXIT_MSA_FPE 14 |
63 | #define KVM_TRACE_EXIT_FPE 15 | 70 | #define KVM_TRACE_EXIT_FPE 15 |
64 | #define KVM_TRACE_EXIT_MSA_DISABLED 21 | 71 | #define KVM_TRACE_EXIT_MSA_DISABLED 21 |
72 | #define KVM_TRACE_EXIT_GUEST_EXIT 27 | ||
65 | /* Further exit reasons */ | 73 | /* Further exit reasons */ |
66 | #define KVM_TRACE_EXIT_WAIT 32 | 74 | #define KVM_TRACE_EXIT_WAIT 32 |
67 | #define KVM_TRACE_EXIT_CACHE 33 | 75 | #define KVM_TRACE_EXIT_CACHE 33 |
68 | #define KVM_TRACE_EXIT_SIGNAL 34 | 76 | #define KVM_TRACE_EXIT_SIGNAL 34 |
77 | /* 32 exit reasons correspond to GuestCtl0.GExcCode (VZ) */ | ||
78 | #define KVM_TRACE_EXIT_GEXCCODE_BASE 64 | ||
79 | #define KVM_TRACE_EXIT_GPSI 64 /* 0 */ | ||
80 | #define KVM_TRACE_EXIT_GSFC 65 /* 1 */ | ||
81 | #define KVM_TRACE_EXIT_HC 66 /* 2 */ | ||
82 | #define KVM_TRACE_EXIT_GRR 67 /* 3 */ | ||
83 | #define KVM_TRACE_EXIT_GVA 72 /* 8 */ | ||
84 | #define KVM_TRACE_EXIT_GHFC 73 /* 9 */ | ||
85 | #define KVM_TRACE_EXIT_GPA 74 /* 10 */ | ||
69 | 86 | ||
70 | /* Tracepoints for VM exits */ | 87 | /* Tracepoints for VM exits */ |
71 | #define kvm_trace_symbol_exit_types \ | 88 | #define kvm_trace_symbol_exit_types \ |
@@ -83,9 +100,17 @@ DEFINE_EVENT(kvm_transition, kvm_out, | |||
83 | { KVM_TRACE_EXIT_MSA_FPE, "MSA FPE" }, \ | 100 | { KVM_TRACE_EXIT_MSA_FPE, "MSA FPE" }, \ |
84 | { KVM_TRACE_EXIT_FPE, "FPE" }, \ | 101 | { KVM_TRACE_EXIT_FPE, "FPE" }, \ |
85 | { KVM_TRACE_EXIT_MSA_DISABLED, "MSA Disabled" }, \ | 102 | { KVM_TRACE_EXIT_MSA_DISABLED, "MSA Disabled" }, \ |
103 | { KVM_TRACE_EXIT_GUEST_EXIT, "Guest Exit" }, \ | ||
86 | { KVM_TRACE_EXIT_WAIT, "WAIT" }, \ | 104 | { KVM_TRACE_EXIT_WAIT, "WAIT" }, \ |
87 | { KVM_TRACE_EXIT_CACHE, "CACHE" }, \ | 105 | { KVM_TRACE_EXIT_CACHE, "CACHE" }, \ |
88 | { KVM_TRACE_EXIT_SIGNAL, "Signal" } | 106 | { KVM_TRACE_EXIT_SIGNAL, "Signal" }, \ |
107 | { KVM_TRACE_EXIT_GPSI, "GPSI" }, \ | ||
108 | { KVM_TRACE_EXIT_GSFC, "GSFC" }, \ | ||
109 | { KVM_TRACE_EXIT_HC, "HC" }, \ | ||
110 | { KVM_TRACE_EXIT_GRR, "GRR" }, \ | ||
111 | { KVM_TRACE_EXIT_GVA, "GVA" }, \ | ||
112 | { KVM_TRACE_EXIT_GHFC, "GHFC" }, \ | ||
113 | { KVM_TRACE_EXIT_GPA, "GPA" } | ||
89 | 114 | ||
90 | TRACE_EVENT(kvm_exit, | 115 | TRACE_EVENT(kvm_exit, |
91 | TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason), | 116 | TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason), |
@@ -158,6 +183,8 @@ TRACE_EVENT(kvm_exit, | |||
158 | { KVM_TRACE_COP0(16, 4), "Config4" }, \ | 183 | { KVM_TRACE_COP0(16, 4), "Config4" }, \ |
159 | { KVM_TRACE_COP0(16, 5), "Config5" }, \ | 184 | { KVM_TRACE_COP0(16, 5), "Config5" }, \ |
160 | { KVM_TRACE_COP0(16, 7), "Config7" }, \ | 185 | { KVM_TRACE_COP0(16, 7), "Config7" }, \ |
186 | { KVM_TRACE_COP0(17, 1), "MAAR" }, \ | ||
187 | { KVM_TRACE_COP0(17, 2), "MAARI" }, \ | ||
161 | { KVM_TRACE_COP0(26, 0), "ECC" }, \ | 188 | { KVM_TRACE_COP0(26, 0), "ECC" }, \ |
162 | { KVM_TRACE_COP0(30, 0), "ErrorEPC" }, \ | 189 | { KVM_TRACE_COP0(30, 0), "ErrorEPC" }, \ |
163 | { KVM_TRACE_COP0(31, 2), "KScratch1" }, \ | 190 | { KVM_TRACE_COP0(31, 2), "KScratch1" }, \ |
@@ -268,6 +295,51 @@ TRACE_EVENT(kvm_asid_change, | |||
268 | __entry->new_asid) | 295 | __entry->new_asid) |
269 | ); | 296 | ); |
270 | 297 | ||
298 | TRACE_EVENT(kvm_guestid_change, | ||
299 | TP_PROTO(struct kvm_vcpu *vcpu, unsigned int guestid), | ||
300 | TP_ARGS(vcpu, guestid), | ||
301 | TP_STRUCT__entry( | ||
302 | __field(unsigned int, guestid) | ||
303 | ), | ||
304 | |||
305 | TP_fast_assign( | ||
306 | __entry->guestid = guestid; | ||
307 | ), | ||
308 | |||
309 | TP_printk("GuestID: 0x%02x", | ||
310 | __entry->guestid) | ||
311 | ); | ||
312 | |||
313 | TRACE_EVENT_FN(kvm_guest_mode_change, | ||
314 | TP_PROTO(struct kvm_vcpu *vcpu), | ||
315 | TP_ARGS(vcpu), | ||
316 | TP_STRUCT__entry( | ||
317 | __field(unsigned long, epc) | ||
318 | __field(unsigned long, pc) | ||
319 | __field(unsigned long, badvaddr) | ||
320 | __field(unsigned int, status) | ||
321 | __field(unsigned int, cause) | ||
322 | ), | ||
323 | |||
324 | TP_fast_assign( | ||
325 | __entry->epc = kvm_read_c0_guest_epc(vcpu->arch.cop0); | ||
326 | __entry->pc = vcpu->arch.pc; | ||
327 | __entry->badvaddr = kvm_read_c0_guest_badvaddr(vcpu->arch.cop0); | ||
328 | __entry->status = kvm_read_c0_guest_status(vcpu->arch.cop0); | ||
329 | __entry->cause = kvm_read_c0_guest_cause(vcpu->arch.cop0); | ||
330 | ), | ||
331 | |||
332 | TP_printk("EPC: 0x%08lx PC: 0x%08lx Status: 0x%08x Cause: 0x%08x BadVAddr: 0x%08lx", | ||
333 | __entry->epc, | ||
334 | __entry->pc, | ||
335 | __entry->status, | ||
336 | __entry->cause, | ||
337 | __entry->badvaddr), | ||
338 | |||
339 | kvm_guest_mode_change_trace_reg, | ||
340 | kvm_guest_mode_change_trace_unreg | ||
341 | ); | ||
342 | |||
271 | #endif /* _TRACE_KVM_H */ | 343 | #endif /* _TRACE_KVM_H */ |
272 | 344 | ||
273 | /* This part must be outside protection */ | 345 | /* This part must be outside protection */ |
diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c index b1fa53b252ea..a563759fd142 100644 --- a/arch/mips/kvm/trap_emul.c +++ b/arch/mips/kvm/trap_emul.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/errno.h> | 12 | #include <linux/errno.h> |
13 | #include <linux/err.h> | 13 | #include <linux/err.h> |
14 | #include <linux/kvm_host.h> | 14 | #include <linux/kvm_host.h> |
15 | #include <linux/log2.h> | ||
15 | #include <linux/uaccess.h> | 16 | #include <linux/uaccess.h> |
16 | #include <linux/vmalloc.h> | 17 | #include <linux/vmalloc.h> |
17 | #include <asm/mmu_context.h> | 18 | #include <asm/mmu_context.h> |
@@ -40,6 +41,29 @@ static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva) | |||
40 | return gpa; | 41 | return gpa; |
41 | } | 42 | } |
42 | 43 | ||
44 | static int kvm_trap_emul_no_handler(struct kvm_vcpu *vcpu) | ||
45 | { | ||
46 | u32 __user *opc = (u32 __user *) vcpu->arch.pc; | ||
47 | u32 cause = vcpu->arch.host_cp0_cause; | ||
48 | u32 exccode = (cause & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE; | ||
49 | unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; | ||
50 | u32 inst = 0; | ||
51 | |||
52 | /* | ||
53 | * Fetch the instruction. | ||
54 | */ | ||
55 | if (cause & CAUSEF_BD) | ||
56 | opc += 1; | ||
57 | kvm_get_badinstr(opc, vcpu, &inst); | ||
58 | |||
59 | kvm_err("Exception Code: %d not handled @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#x\n", | ||
60 | exccode, opc, inst, badvaddr, | ||
61 | kvm_read_c0_guest_status(vcpu->arch.cop0)); | ||
62 | kvm_arch_vcpu_dump_regs(vcpu); | ||
63 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
64 | return RESUME_HOST; | ||
65 | } | ||
66 | |||
43 | static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu) | 67 | static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu) |
44 | { | 68 | { |
45 | struct mips_coproc *cop0 = vcpu->arch.cop0; | 69 | struct mips_coproc *cop0 = vcpu->arch.cop0; |
@@ -82,6 +106,10 @@ static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu) | |||
82 | ret = RESUME_HOST; | 106 | ret = RESUME_HOST; |
83 | break; | 107 | break; |
84 | 108 | ||
109 | case EMULATE_HYPERCALL: | ||
110 | ret = kvm_mips_handle_hypcall(vcpu); | ||
111 | break; | ||
112 | |||
85 | default: | 113 | default: |
86 | BUG(); | 114 | BUG(); |
87 | } | 115 | } |
@@ -484,6 +512,31 @@ static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu) | |||
484 | return ret; | 512 | return ret; |
485 | } | 513 | } |
486 | 514 | ||
515 | static int kvm_trap_emul_hardware_enable(void) | ||
516 | { | ||
517 | return 0; | ||
518 | } | ||
519 | |||
520 | static void kvm_trap_emul_hardware_disable(void) | ||
521 | { | ||
522 | } | ||
523 | |||
524 | static int kvm_trap_emul_check_extension(struct kvm *kvm, long ext) | ||
525 | { | ||
526 | int r; | ||
527 | |||
528 | switch (ext) { | ||
529 | case KVM_CAP_MIPS_TE: | ||
530 | r = 1; | ||
531 | break; | ||
532 | default: | ||
533 | r = 0; | ||
534 | break; | ||
535 | } | ||
536 | |||
537 | return r; | ||
538 | } | ||
539 | |||
487 | static int kvm_trap_emul_vcpu_init(struct kvm_vcpu *vcpu) | 540 | static int kvm_trap_emul_vcpu_init(struct kvm_vcpu *vcpu) |
488 | { | 541 | { |
489 | struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; | 542 | struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; |
@@ -561,6 +614,9 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu) | |||
561 | u32 config, config1; | 614 | u32 config, config1; |
562 | int vcpu_id = vcpu->vcpu_id; | 615 | int vcpu_id = vcpu->vcpu_id; |
563 | 616 | ||
617 | /* Start off the timer at 100 MHz */ | ||
618 | kvm_mips_init_count(vcpu, 100*1000*1000); | ||
619 | |||
564 | /* | 620 | /* |
565 | * Arch specific stuff, set up config registers properly so that the | 621 | * Arch specific stuff, set up config registers properly so that the |
566 | * guest will come up as expected | 622 | * guest will come up as expected |
@@ -589,6 +645,13 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu) | |||
589 | /* Read the cache characteristics from the host Config1 Register */ | 645 | /* Read the cache characteristics from the host Config1 Register */ |
590 | config1 = (read_c0_config1() & ~0x7f); | 646 | config1 = (read_c0_config1() & ~0x7f); |
591 | 647 | ||
648 | /* DCache line size not correctly reported in Config1 on Octeon CPUs */ | ||
649 | if (cpu_dcache_line_size()) { | ||
650 | config1 &= ~MIPS_CONF1_DL; | ||
651 | config1 |= ((ilog2(cpu_dcache_line_size()) - 1) << | ||
652 | MIPS_CONF1_DL_SHF) & MIPS_CONF1_DL; | ||
653 | } | ||
654 | |||
592 | /* Set up MMU size */ | 655 | /* Set up MMU size */ |
593 | config1 &= ~(0x3f << 25); | 656 | config1 &= ~(0x3f << 25); |
594 | config1 |= ((KVM_MIPS_GUEST_TLB_SIZE - 1) << 25); | 657 | config1 |= ((KVM_MIPS_GUEST_TLB_SIZE - 1) << 25); |
@@ -892,10 +955,12 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu, | |||
892 | if (v & CAUSEF_DC) { | 955 | if (v & CAUSEF_DC) { |
893 | /* disable timer first */ | 956 | /* disable timer first */ |
894 | kvm_mips_count_disable_cause(vcpu); | 957 | kvm_mips_count_disable_cause(vcpu); |
895 | kvm_change_c0_guest_cause(cop0, ~CAUSEF_DC, v); | 958 | kvm_change_c0_guest_cause(cop0, (u32)~CAUSEF_DC, |
959 | v); | ||
896 | } else { | 960 | } else { |
897 | /* enable timer last */ | 961 | /* enable timer last */ |
898 | kvm_change_c0_guest_cause(cop0, ~CAUSEF_DC, v); | 962 | kvm_change_c0_guest_cause(cop0, (u32)~CAUSEF_DC, |
963 | v); | ||
899 | kvm_mips_count_enable_cause(vcpu); | 964 | kvm_mips_count_enable_cause(vcpu); |
900 | } | 965 | } |
901 | } else { | 966 | } else { |
@@ -1230,7 +1295,11 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = { | |||
1230 | .handle_msa_fpe = kvm_trap_emul_handle_msa_fpe, | 1295 | .handle_msa_fpe = kvm_trap_emul_handle_msa_fpe, |
1231 | .handle_fpe = kvm_trap_emul_handle_fpe, | 1296 | .handle_fpe = kvm_trap_emul_handle_fpe, |
1232 | .handle_msa_disabled = kvm_trap_emul_handle_msa_disabled, | 1297 | .handle_msa_disabled = kvm_trap_emul_handle_msa_disabled, |
1298 | .handle_guest_exit = kvm_trap_emul_no_handler, | ||
1233 | 1299 | ||
1300 | .hardware_enable = kvm_trap_emul_hardware_enable, | ||
1301 | .hardware_disable = kvm_trap_emul_hardware_disable, | ||
1302 | .check_extension = kvm_trap_emul_check_extension, | ||
1234 | .vcpu_init = kvm_trap_emul_vcpu_init, | 1303 | .vcpu_init = kvm_trap_emul_vcpu_init, |
1235 | .vcpu_uninit = kvm_trap_emul_vcpu_uninit, | 1304 | .vcpu_uninit = kvm_trap_emul_vcpu_uninit, |
1236 | .vcpu_setup = kvm_trap_emul_vcpu_setup, | 1305 | .vcpu_setup = kvm_trap_emul_vcpu_setup, |
diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c new file mode 100644 index 000000000000..71d8856ade64 --- /dev/null +++ b/arch/mips/kvm/vz.c | |||
@@ -0,0 +1,3223 @@ | |||
1 | /* | ||
2 | * This file is subject to the terms and conditions of the GNU General Public | ||
3 | * License. See the file "COPYING" in the main directory of this archive | ||
4 | * for more details. | ||
5 | * | ||
6 | * KVM/MIPS: Support for hardware virtualization extensions | ||
7 | * | ||
8 | * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. | ||
9 | * Authors: Yann Le Du <ledu@kymasys.com> | ||
10 | */ | ||
11 | |||
12 | #include <linux/errno.h> | ||
13 | #include <linux/err.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/preempt.h> | ||
16 | #include <linux/vmalloc.h> | ||
17 | #include <asm/cacheflush.h> | ||
18 | #include <asm/cacheops.h> | ||
19 | #include <asm/cmpxchg.h> | ||
20 | #include <asm/fpu.h> | ||
21 | #include <asm/hazards.h> | ||
22 | #include <asm/inst.h> | ||
23 | #include <asm/mmu_context.h> | ||
24 | #include <asm/r4kcache.h> | ||
25 | #include <asm/time.h> | ||
26 | #include <asm/tlb.h> | ||
27 | #include <asm/tlbex.h> | ||
28 | |||
29 | #include <linux/kvm_host.h> | ||
30 | |||
31 | #include "interrupt.h" | ||
32 | |||
33 | #include "trace.h" | ||
34 | |||
35 | /* Pointers to last VCPU loaded on each physical CPU */ | ||
36 | static struct kvm_vcpu *last_vcpu[NR_CPUS]; | ||
37 | /* Pointers to last VCPU executed on each physical CPU */ | ||
38 | static struct kvm_vcpu *last_exec_vcpu[NR_CPUS]; | ||
39 | |||
40 | /* | ||
41 | * Number of guest VTLB entries to use, so we can catch inconsistency between | ||
42 | * CPUs. | ||
43 | */ | ||
44 | static unsigned int kvm_vz_guest_vtlb_size; | ||
45 | |||
46 | static inline long kvm_vz_read_gc0_ebase(void) | ||
47 | { | ||
48 | if (sizeof(long) == 8 && cpu_has_ebase_wg) | ||
49 | return read_gc0_ebase_64(); | ||
50 | else | ||
51 | return read_gc0_ebase(); | ||
52 | } | ||
53 | |||
54 | static inline void kvm_vz_write_gc0_ebase(long v) | ||
55 | { | ||
56 | /* | ||
57 | * First write with WG=1 to write upper bits, then write again in case | ||
58 | * WG should be left at 0. | ||
59 | * write_gc0_ebase_64() is no longer UNDEFINED since R6. | ||
60 | */ | ||
61 | if (sizeof(long) == 8 && | ||
62 | (cpu_has_mips64r6 || cpu_has_ebase_wg)) { | ||
63 | write_gc0_ebase_64(v | MIPS_EBASE_WG); | ||
64 | write_gc0_ebase_64(v); | ||
65 | } else { | ||
66 | write_gc0_ebase(v | MIPS_EBASE_WG); | ||
67 | write_gc0_ebase(v); | ||
68 | } | ||
69 | } | ||
70 | |||
71 | /* | ||
72 | * These Config bits may be writable by the guest: | ||
73 | * Config: [K23, KU] (!TLB), K0 | ||
74 | * Config1: (none) | ||
75 | * Config2: [TU, SU] (impl) | ||
76 | * Config3: ISAOnExc | ||
77 | * Config4: FTLBPageSize | ||
78 | * Config5: K, CV, MSAEn, UFE, FRE, SBRI, UFR | ||
79 | */ | ||
80 | |||
81 | static inline unsigned int kvm_vz_config_guest_wrmask(struct kvm_vcpu *vcpu) | ||
82 | { | ||
83 | return CONF_CM_CMASK; | ||
84 | } | ||
85 | |||
86 | static inline unsigned int kvm_vz_config1_guest_wrmask(struct kvm_vcpu *vcpu) | ||
87 | { | ||
88 | return 0; | ||
89 | } | ||
90 | |||
91 | static inline unsigned int kvm_vz_config2_guest_wrmask(struct kvm_vcpu *vcpu) | ||
92 | { | ||
93 | return 0; | ||
94 | } | ||
95 | |||
96 | static inline unsigned int kvm_vz_config3_guest_wrmask(struct kvm_vcpu *vcpu) | ||
97 | { | ||
98 | return MIPS_CONF3_ISA_OE; | ||
99 | } | ||
100 | |||
101 | static inline unsigned int kvm_vz_config4_guest_wrmask(struct kvm_vcpu *vcpu) | ||
102 | { | ||
103 | /* no need to be exact */ | ||
104 | return MIPS_CONF4_VFTLBPAGESIZE; | ||
105 | } | ||
106 | |||
107 | static inline unsigned int kvm_vz_config5_guest_wrmask(struct kvm_vcpu *vcpu) | ||
108 | { | ||
109 | unsigned int mask = MIPS_CONF5_K | MIPS_CONF5_CV | MIPS_CONF5_SBRI; | ||
110 | |||
111 | /* Permit MSAEn changes if MSA supported and enabled */ | ||
112 | if (kvm_mips_guest_has_msa(&vcpu->arch)) | ||
113 | mask |= MIPS_CONF5_MSAEN; | ||
114 | |||
115 | /* | ||
116 | * Permit guest FPU mode changes if FPU is enabled and the relevant | ||
117 | * feature exists according to FIR register. | ||
118 | */ | ||
119 | if (kvm_mips_guest_has_fpu(&vcpu->arch)) { | ||
120 | if (cpu_has_ufr) | ||
121 | mask |= MIPS_CONF5_UFR; | ||
122 | if (cpu_has_fre) | ||
123 | mask |= MIPS_CONF5_FRE | MIPS_CONF5_UFE; | ||
124 | } | ||
125 | |||
126 | return mask; | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * VZ optionally allows these additional Config bits to be written by root: | ||
131 | * Config: M, [MT] | ||
132 | * Config1: M, [MMUSize-1, C2, MD, PC, WR, CA], FP | ||
133 | * Config2: M | ||
134 | * Config3: M, MSAP, [BPG], ULRI, [DSP2P, DSPP], CTXTC, [ITL, LPA, VEIC, | ||
135 | * VInt, SP, CDMM, MT, SM, TL] | ||
136 | * Config4: M, [VTLBSizeExt, MMUSizeExt] | ||
137 | * Config5: MRP | ||
138 | */ | ||
139 | |||
140 | static inline unsigned int kvm_vz_config_user_wrmask(struct kvm_vcpu *vcpu) | ||
141 | { | ||
142 | return kvm_vz_config_guest_wrmask(vcpu) | MIPS_CONF_M; | ||
143 | } | ||
144 | |||
145 | static inline unsigned int kvm_vz_config1_user_wrmask(struct kvm_vcpu *vcpu) | ||
146 | { | ||
147 | unsigned int mask = kvm_vz_config1_guest_wrmask(vcpu) | MIPS_CONF_M; | ||
148 | |||
149 | /* Permit FPU to be present if FPU is supported */ | ||
150 | if (kvm_mips_guest_can_have_fpu(&vcpu->arch)) | ||
151 | mask |= MIPS_CONF1_FP; | ||
152 | |||
153 | return mask; | ||
154 | } | ||
155 | |||
156 | static inline unsigned int kvm_vz_config2_user_wrmask(struct kvm_vcpu *vcpu) | ||
157 | { | ||
158 | return kvm_vz_config2_guest_wrmask(vcpu) | MIPS_CONF_M; | ||
159 | } | ||
160 | |||
161 | static inline unsigned int kvm_vz_config3_user_wrmask(struct kvm_vcpu *vcpu) | ||
162 | { | ||
163 | unsigned int mask = kvm_vz_config3_guest_wrmask(vcpu) | MIPS_CONF_M | | ||
164 | MIPS_CONF3_ULRI | MIPS_CONF3_CTXTC; | ||
165 | |||
166 | /* Permit MSA to be present if MSA is supported */ | ||
167 | if (kvm_mips_guest_can_have_msa(&vcpu->arch)) | ||
168 | mask |= MIPS_CONF3_MSA; | ||
169 | |||
170 | return mask; | ||
171 | } | ||
172 | |||
173 | static inline unsigned int kvm_vz_config4_user_wrmask(struct kvm_vcpu *vcpu) | ||
174 | { | ||
175 | return kvm_vz_config4_guest_wrmask(vcpu) | MIPS_CONF_M; | ||
176 | } | ||
177 | |||
178 | static inline unsigned int kvm_vz_config5_user_wrmask(struct kvm_vcpu *vcpu) | ||
179 | { | ||
180 | return kvm_vz_config5_guest_wrmask(vcpu) | MIPS_CONF5_MRP; | ||
181 | } | ||
182 | |||
183 | static gpa_t kvm_vz_gva_to_gpa_cb(gva_t gva) | ||
184 | { | ||
185 | /* VZ guest has already converted gva to gpa */ | ||
186 | return gva; | ||
187 | } | ||
188 | |||
189 | static void kvm_vz_queue_irq(struct kvm_vcpu *vcpu, unsigned int priority) | ||
190 | { | ||
191 | set_bit(priority, &vcpu->arch.pending_exceptions); | ||
192 | clear_bit(priority, &vcpu->arch.pending_exceptions_clr); | ||
193 | } | ||
194 | |||
195 | static void kvm_vz_dequeue_irq(struct kvm_vcpu *vcpu, unsigned int priority) | ||
196 | { | ||
197 | clear_bit(priority, &vcpu->arch.pending_exceptions); | ||
198 | set_bit(priority, &vcpu->arch.pending_exceptions_clr); | ||
199 | } | ||
200 | |||
201 | static void kvm_vz_queue_timer_int_cb(struct kvm_vcpu *vcpu) | ||
202 | { | ||
203 | /* | ||
204 | * timer expiry is asynchronous to vcpu execution therefore defer guest | ||
205 | * cp0 accesses | ||
206 | */ | ||
207 | kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_TIMER); | ||
208 | } | ||
209 | |||
210 | static void kvm_vz_dequeue_timer_int_cb(struct kvm_vcpu *vcpu) | ||
211 | { | ||
212 | /* | ||
213 | * timer expiry is asynchronous to vcpu execution therefore defer guest | ||
214 | * cp0 accesses | ||
215 | */ | ||
216 | kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_TIMER); | ||
217 | } | ||
218 | |||
219 | static void kvm_vz_queue_io_int_cb(struct kvm_vcpu *vcpu, | ||
220 | struct kvm_mips_interrupt *irq) | ||
221 | { | ||
222 | int intr = (int)irq->irq; | ||
223 | |||
224 | /* | ||
225 | * interrupts are asynchronous to vcpu execution therefore defer guest | ||
226 | * cp0 accesses | ||
227 | */ | ||
228 | switch (intr) { | ||
229 | case 2: | ||
230 | kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_IO); | ||
231 | break; | ||
232 | |||
233 | case 3: | ||
234 | kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_IPI_1); | ||
235 | break; | ||
236 | |||
237 | case 4: | ||
238 | kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_IPI_2); | ||
239 | break; | ||
240 | |||
241 | default: | ||
242 | break; | ||
243 | } | ||
244 | |||
245 | } | ||
246 | |||
247 | static void kvm_vz_dequeue_io_int_cb(struct kvm_vcpu *vcpu, | ||
248 | struct kvm_mips_interrupt *irq) | ||
249 | { | ||
250 | int intr = (int)irq->irq; | ||
251 | |||
252 | /* | ||
253 | * interrupts are asynchronous to vcpu execution therefore defer guest | ||
254 | * cp0 accesses | ||
255 | */ | ||
256 | switch (intr) { | ||
257 | case -2: | ||
258 | kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_IO); | ||
259 | break; | ||
260 | |||
261 | case -3: | ||
262 | kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_IPI_1); | ||
263 | break; | ||
264 | |||
265 | case -4: | ||
266 | kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_IPI_2); | ||
267 | break; | ||
268 | |||
269 | default: | ||
270 | break; | ||
271 | } | ||
272 | |||
273 | } | ||
274 | |||
275 | static u32 kvm_vz_priority_to_irq[MIPS_EXC_MAX] = { | ||
276 | [MIPS_EXC_INT_TIMER] = C_IRQ5, | ||
277 | [MIPS_EXC_INT_IO] = C_IRQ0, | ||
278 | [MIPS_EXC_INT_IPI_1] = C_IRQ1, | ||
279 | [MIPS_EXC_INT_IPI_2] = C_IRQ2, | ||
280 | }; | ||
281 | |||
282 | static int kvm_vz_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority, | ||
283 | u32 cause) | ||
284 | { | ||
285 | u32 irq = (priority < MIPS_EXC_MAX) ? | ||
286 | kvm_vz_priority_to_irq[priority] : 0; | ||
287 | |||
288 | switch (priority) { | ||
289 | case MIPS_EXC_INT_TIMER: | ||
290 | set_gc0_cause(C_TI); | ||
291 | break; | ||
292 | |||
293 | case MIPS_EXC_INT_IO: | ||
294 | case MIPS_EXC_INT_IPI_1: | ||
295 | case MIPS_EXC_INT_IPI_2: | ||
296 | if (cpu_has_guestctl2) | ||
297 | set_c0_guestctl2(irq); | ||
298 | else | ||
299 | set_gc0_cause(irq); | ||
300 | break; | ||
301 | |||
302 | default: | ||
303 | break; | ||
304 | } | ||
305 | |||
306 | clear_bit(priority, &vcpu->arch.pending_exceptions); | ||
307 | return 1; | ||
308 | } | ||
309 | |||
310 | static int kvm_vz_irq_clear_cb(struct kvm_vcpu *vcpu, unsigned int priority, | ||
311 | u32 cause) | ||
312 | { | ||
313 | u32 irq = (priority < MIPS_EXC_MAX) ? | ||
314 | kvm_vz_priority_to_irq[priority] : 0; | ||
315 | |||
316 | switch (priority) { | ||
317 | case MIPS_EXC_INT_TIMER: | ||
318 | /* | ||
319 | * Call to kvm_write_c0_guest_compare() clears Cause.TI in | ||
320 | * kvm_mips_emulate_CP0(). Explicitly clear irq associated with | ||
321 | * Cause.IP[IPTI] if GuestCtl2 virtual interrupt register not | ||
322 | * supported or if not using GuestCtl2 Hardware Clear. | ||
323 | */ | ||
324 | if (cpu_has_guestctl2) { | ||
325 | if (!(read_c0_guestctl2() & (irq << 14))) | ||
326 | clear_c0_guestctl2(irq); | ||
327 | } else { | ||
328 | clear_gc0_cause(irq); | ||
329 | } | ||
330 | break; | ||
331 | |||
332 | case MIPS_EXC_INT_IO: | ||
333 | case MIPS_EXC_INT_IPI_1: | ||
334 | case MIPS_EXC_INT_IPI_2: | ||
335 | /* Clear GuestCtl2.VIP irq if not using Hardware Clear */ | ||
336 | if (cpu_has_guestctl2) { | ||
337 | if (!(read_c0_guestctl2() & (irq << 14))) | ||
338 | clear_c0_guestctl2(irq); | ||
339 | } else { | ||
340 | clear_gc0_cause(irq); | ||
341 | } | ||
342 | break; | ||
343 | |||
344 | default: | ||
345 | break; | ||
346 | } | ||
347 | |||
348 | clear_bit(priority, &vcpu->arch.pending_exceptions_clr); | ||
349 | return 1; | ||
350 | } | ||
351 | |||
352 | /* | ||
353 | * VZ guest timer handling. | ||
354 | */ | ||
355 | |||
356 | /** | ||
357 | * kvm_vz_should_use_htimer() - Find whether to use the VZ hard guest timer. | ||
358 | * @vcpu: Virtual CPU. | ||
359 | * | ||
360 | * Returns: true if the VZ GTOffset & real guest CP0_Count should be used | ||
361 | * instead of software emulation of guest timer. | ||
362 | * false otherwise. | ||
363 | */ | ||
364 | static bool kvm_vz_should_use_htimer(struct kvm_vcpu *vcpu) | ||
365 | { | ||
366 | if (kvm_mips_count_disabled(vcpu)) | ||
367 | return false; | ||
368 | |||
369 | /* Chosen frequency must match real frequency */ | ||
370 | if (mips_hpt_frequency != vcpu->arch.count_hz) | ||
371 | return false; | ||
372 | |||
373 | /* We don't support a CP0_GTOffset with fewer bits than CP0_Count */ | ||
374 | if (current_cpu_data.gtoffset_mask != 0xffffffff) | ||
375 | return false; | ||
376 | |||
377 | return true; | ||
378 | } | ||
379 | |||
380 | /** | ||
381 | * _kvm_vz_restore_stimer() - Restore soft timer state. | ||
382 | * @vcpu: Virtual CPU. | ||
383 | * @compare: CP0_Compare register value, restored by caller. | ||
384 | * @cause: CP0_Cause register to restore. | ||
385 | * | ||
386 | * Restore VZ state relating to the soft timer. The hard timer can be enabled | ||
387 | * later. | ||
388 | */ | ||
389 | static void _kvm_vz_restore_stimer(struct kvm_vcpu *vcpu, u32 compare, | ||
390 | u32 cause) | ||
391 | { | ||
392 | /* | ||
393 | * Avoid spurious counter interrupts by setting Guest CP0_Count to just | ||
394 | * after Guest CP0_Compare. | ||
395 | */ | ||
396 | write_c0_gtoffset(compare - read_c0_count()); | ||
397 | |||
398 | back_to_back_c0_hazard(); | ||
399 | write_gc0_cause(cause); | ||
400 | } | ||
401 | |||
402 | /** | ||
403 | * _kvm_vz_restore_htimer() - Restore hard timer state. | ||
404 | * @vcpu: Virtual CPU. | ||
405 | * @compare: CP0_Compare register value, restored by caller. | ||
406 | * @cause: CP0_Cause register to restore. | ||
407 | * | ||
408 | * Restore hard timer Guest.Count & Guest.Cause taking care to preserve the | ||
409 | * value of Guest.CP0_Cause.TI while restoring Guest.CP0_Cause. | ||
410 | */ | ||
411 | static void _kvm_vz_restore_htimer(struct kvm_vcpu *vcpu, | ||
412 | u32 compare, u32 cause) | ||
413 | { | ||
414 | u32 start_count, after_count; | ||
415 | ktime_t freeze_time; | ||
416 | unsigned long flags; | ||
417 | |||
418 | /* | ||
419 | * Freeze the soft-timer and sync the guest CP0_Count with it. We do | ||
420 | * this with interrupts disabled to avoid latency. | ||
421 | */ | ||
422 | local_irq_save(flags); | ||
423 | freeze_time = kvm_mips_freeze_hrtimer(vcpu, &start_count); | ||
424 | write_c0_gtoffset(start_count - read_c0_count()); | ||
425 | local_irq_restore(flags); | ||
426 | |||
427 | /* restore guest CP0_Cause, as TI may already be set */ | ||
428 | back_to_back_c0_hazard(); | ||
429 | write_gc0_cause(cause); | ||
430 | |||
431 | /* | ||
432 | * The above sequence isn't atomic and would result in lost timer | ||
433 | * interrupts if we're not careful. Detect if a timer interrupt is due | ||
434 | * and assert it. | ||
435 | */ | ||
436 | back_to_back_c0_hazard(); | ||
437 | after_count = read_gc0_count(); | ||
438 | if (after_count - start_count > compare - start_count - 1) | ||
439 | kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_TIMER); | ||
440 | } | ||
441 | |||
442 | /** | ||
443 | * kvm_vz_restore_timer() - Restore timer state. | ||
444 | * @vcpu: Virtual CPU. | ||
445 | * | ||
446 | * Restore soft timer state from saved context. | ||
447 | */ | ||
448 | static void kvm_vz_restore_timer(struct kvm_vcpu *vcpu) | ||
449 | { | ||
450 | struct mips_coproc *cop0 = vcpu->arch.cop0; | ||
451 | u32 cause, compare; | ||
452 | |||
453 | compare = kvm_read_sw_gc0_compare(cop0); | ||
454 | cause = kvm_read_sw_gc0_cause(cop0); | ||
455 | |||
456 | write_gc0_compare(compare); | ||
457 | _kvm_vz_restore_stimer(vcpu, compare, cause); | ||
458 | } | ||
459 | |||
460 | /** | ||
461 | * kvm_vz_acquire_htimer() - Switch to hard timer state. | ||
462 | * @vcpu: Virtual CPU. | ||
463 | * | ||
464 | * Restore hard timer state on top of existing soft timer state if possible. | ||
465 | * | ||
466 | * Since hard timer won't remain active over preemption, preemption should be | ||
467 | * disabled by the caller. | ||
468 | */ | ||
469 | void kvm_vz_acquire_htimer(struct kvm_vcpu *vcpu) | ||
470 | { | ||
471 | u32 gctl0; | ||
472 | |||
473 | gctl0 = read_c0_guestctl0(); | ||
474 | if (!(gctl0 & MIPS_GCTL0_GT) && kvm_vz_should_use_htimer(vcpu)) { | ||
475 | /* enable guest access to hard timer */ | ||
476 | write_c0_guestctl0(gctl0 | MIPS_GCTL0_GT); | ||
477 | |||
478 | _kvm_vz_restore_htimer(vcpu, read_gc0_compare(), | ||
479 | read_gc0_cause()); | ||
480 | } | ||
481 | } | ||
482 | |||
483 | /** | ||
484 | * _kvm_vz_save_htimer() - Switch to software emulation of guest timer. | ||
485 | * @vcpu: Virtual CPU. | ||
486 | * @compare: Pointer to write compare value to. | ||
487 | * @cause: Pointer to write cause value to. | ||
488 | * | ||
489 | * Save VZ guest timer state and switch to software emulation of guest CP0 | ||
490 | * timer. The hard timer must already be in use, so preemption should be | ||
491 | * disabled. | ||
492 | */ | ||
493 | static void _kvm_vz_save_htimer(struct kvm_vcpu *vcpu, | ||
494 | u32 *out_compare, u32 *out_cause) | ||
495 | { | ||
496 | u32 cause, compare, before_count, end_count; | ||
497 | ktime_t before_time; | ||
498 | |||
499 | compare = read_gc0_compare(); | ||
500 | *out_compare = compare; | ||
501 | |||
502 | before_time = ktime_get(); | ||
503 | |||
504 | /* | ||
505 | * Record the CP0_Count *prior* to saving CP0_Cause, so we have a time | ||
506 | * at which no pending timer interrupt is missing. | ||
507 | */ | ||
508 | before_count = read_gc0_count(); | ||
509 | back_to_back_c0_hazard(); | ||
510 | cause = read_gc0_cause(); | ||
511 | *out_cause = cause; | ||
512 | |||
513 | /* | ||
514 | * Record a final CP0_Count which we will transfer to the soft-timer. | ||
515 | * This is recorded *after* saving CP0_Cause, so we don't get any timer | ||
516 | * interrupts from just after the final CP0_Count point. | ||
517 | */ | ||
518 | back_to_back_c0_hazard(); | ||
519 | end_count = read_gc0_count(); | ||
520 | |||
521 | /* | ||
522 | * The above sequence isn't atomic, so we could miss a timer interrupt | ||
523 | * between reading CP0_Cause and end_count. Detect and record any timer | ||
524 | * interrupt due between before_count and end_count. | ||
525 | */ | ||
526 | if (end_count - before_count > compare - before_count - 1) | ||
527 | kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_TIMER); | ||
528 | |||
529 | /* | ||
530 | * Restore soft-timer, ignoring a small amount of negative drift due to | ||
531 | * delay between freeze_hrtimer and setting CP0_GTOffset. | ||
532 | */ | ||
533 | kvm_mips_restore_hrtimer(vcpu, before_time, end_count, -0x10000); | ||
534 | } | ||
535 | |||
536 | /** | ||
537 | * kvm_vz_save_timer() - Save guest timer state. | ||
538 | * @vcpu: Virtual CPU. | ||
539 | * | ||
540 | * Save VZ guest timer state and switch to soft guest timer if hard timer was in | ||
541 | * use. | ||
542 | */ | ||
543 | static void kvm_vz_save_timer(struct kvm_vcpu *vcpu) | ||
544 | { | ||
545 | struct mips_coproc *cop0 = vcpu->arch.cop0; | ||
546 | u32 gctl0, compare, cause; | ||
547 | |||
548 | gctl0 = read_c0_guestctl0(); | ||
549 | if (gctl0 & MIPS_GCTL0_GT) { | ||
550 | /* disable guest use of hard timer */ | ||
551 | write_c0_guestctl0(gctl0 & ~MIPS_GCTL0_GT); | ||
552 | |||
553 | /* save hard timer state */ | ||
554 | _kvm_vz_save_htimer(vcpu, &compare, &cause); | ||
555 | } else { | ||
556 | compare = read_gc0_compare(); | ||
557 | cause = read_gc0_cause(); | ||
558 | } | ||
559 | |||
560 | /* save timer-related state to VCPU context */ | ||
561 | kvm_write_sw_gc0_cause(cop0, cause); | ||
562 | kvm_write_sw_gc0_compare(cop0, compare); | ||
563 | } | ||
564 | |||
565 | /** | ||
566 | * kvm_vz_lose_htimer() - Ensure hard guest timer is not in use. | ||
567 | * @vcpu: Virtual CPU. | ||
568 | * | ||
569 | * Transfers the state of the hard guest timer to the soft guest timer, leaving | ||
570 | * guest state intact so it can continue to be used with the soft timer. | ||
571 | */ | ||
572 | void kvm_vz_lose_htimer(struct kvm_vcpu *vcpu) | ||
573 | { | ||
574 | u32 gctl0, compare, cause; | ||
575 | |||
576 | preempt_disable(); | ||
577 | gctl0 = read_c0_guestctl0(); | ||
578 | if (gctl0 & MIPS_GCTL0_GT) { | ||
579 | /* disable guest use of timer */ | ||
580 | write_c0_guestctl0(gctl0 & ~MIPS_GCTL0_GT); | ||
581 | |||
582 | /* switch to soft timer */ | ||
583 | _kvm_vz_save_htimer(vcpu, &compare, &cause); | ||
584 | |||
585 | /* leave soft timer in usable state */ | ||
586 | _kvm_vz_restore_stimer(vcpu, compare, cause); | ||
587 | } | ||
588 | preempt_enable(); | ||
589 | } | ||
590 | |||
591 | /** | ||
592 | * is_eva_access() - Find whether an instruction is an EVA memory accessor. | ||
593 | * @inst: 32-bit instruction encoding. | ||
594 | * | ||
595 | * Finds whether @inst encodes an EVA memory access instruction, which would | ||
596 | * indicate that emulation of it should access the user mode address space | ||
597 | * instead of the kernel mode address space. This matters for MUSUK segments | ||
598 | * which are TLB mapped for user mode but unmapped for kernel mode. | ||
599 | * | ||
600 | * Returns: Whether @inst encodes an EVA accessor instruction. | ||
601 | */ | ||
602 | static bool is_eva_access(union mips_instruction inst) | ||
603 | { | ||
604 | if (inst.spec3_format.opcode != spec3_op) | ||
605 | return false; | ||
606 | |||
607 | switch (inst.spec3_format.func) { | ||
608 | case lwle_op: | ||
609 | case lwre_op: | ||
610 | case cachee_op: | ||
611 | case sbe_op: | ||
612 | case she_op: | ||
613 | case sce_op: | ||
614 | case swe_op: | ||
615 | case swle_op: | ||
616 | case swre_op: | ||
617 | case prefe_op: | ||
618 | case lbue_op: | ||
619 | case lhue_op: | ||
620 | case lbe_op: | ||
621 | case lhe_op: | ||
622 | case lle_op: | ||
623 | case lwe_op: | ||
624 | return true; | ||
625 | default: | ||
626 | return false; | ||
627 | } | ||
628 | } | ||
629 | |||
630 | /** | ||
631 | * is_eva_am_mapped() - Find whether an access mode is mapped. | ||
632 | * @vcpu: KVM VCPU state. | ||
633 | * @am: 3-bit encoded access mode. | ||
634 | * @eu: Segment becomes unmapped and uncached when Status.ERL=1. | ||
635 | * | ||
636 | * Decode @am to find whether it encodes a mapped segment for the current VCPU | ||
637 | * state. Where necessary @eu and the actual instruction causing the fault are | ||
638 | * taken into account to make the decision. | ||
639 | * | ||
640 | * Returns: Whether the VCPU faulted on a TLB mapped address. | ||
641 | */ | ||
642 | static bool is_eva_am_mapped(struct kvm_vcpu *vcpu, unsigned int am, bool eu) | ||
643 | { | ||
644 | u32 am_lookup; | ||
645 | int err; | ||
646 | |||
647 | /* | ||
648 | * Interpret access control mode. We assume address errors will already | ||
649 | * have been caught by the guest, leaving us with: | ||
650 | * AM UM SM KM 31..24 23..16 | ||
651 | * UK 0 000 Unm 0 0 | ||
652 | * MK 1 001 TLB 1 | ||
653 | * MSK 2 010 TLB TLB 1 | ||
654 | * MUSK 3 011 TLB TLB TLB 1 | ||
655 | * MUSUK 4 100 TLB TLB Unm 0 1 | ||
656 | * USK 5 101 Unm Unm 0 0 | ||
657 | * - 6 110 0 0 | ||
658 | * UUSK 7 111 Unm Unm Unm 0 0 | ||
659 | * | ||
660 | * We shift a magic value by AM across the sign bit to find if always | ||
661 | * TLB mapped, and if not shift by 8 again to find if it depends on KM. | ||
662 | */ | ||
663 | am_lookup = 0x70080000 << am; | ||
664 | if ((s32)am_lookup < 0) { | ||
665 | /* | ||
666 | * MK, MSK, MUSK | ||
667 | * Always TLB mapped, unless SegCtl.EU && ERL | ||
668 | */ | ||
669 | if (!eu || !(read_gc0_status() & ST0_ERL)) | ||
670 | return true; | ||
671 | } else { | ||
672 | am_lookup <<= 8; | ||
673 | if ((s32)am_lookup < 0) { | ||
674 | union mips_instruction inst; | ||
675 | unsigned int status; | ||
676 | u32 *opc; | ||
677 | |||
678 | /* | ||
679 | * MUSUK | ||
680 | * TLB mapped if not in kernel mode | ||
681 | */ | ||
682 | status = read_gc0_status(); | ||
683 | if (!(status & (ST0_EXL | ST0_ERL)) && | ||
684 | (status & ST0_KSU)) | ||
685 | return true; | ||
686 | /* | ||
687 | * EVA access instructions in kernel | ||
688 | * mode access user address space. | ||
689 | */ | ||
690 | opc = (u32 *)vcpu->arch.pc; | ||
691 | if (vcpu->arch.host_cp0_cause & CAUSEF_BD) | ||
692 | opc += 1; | ||
693 | err = kvm_get_badinstr(opc, vcpu, &inst.word); | ||
694 | if (!err && is_eva_access(inst)) | ||
695 | return true; | ||
696 | } | ||
697 | } | ||
698 | |||
699 | return false; | ||
700 | } | ||
701 | |||
702 | /** | ||
703 | * kvm_vz_gva_to_gpa() - Convert valid GVA to GPA. | ||
704 | * @vcpu: KVM VCPU state. | ||
705 | * @gva: Guest virtual address to convert. | ||
706 | * @gpa: Output guest physical address. | ||
707 | * | ||
708 | * Convert a guest virtual address (GVA) which is valid according to the guest | ||
709 | * context, to a guest physical address (GPA). | ||
710 | * | ||
711 | * Returns: 0 on success. | ||
712 | * -errno on failure. | ||
713 | */ | ||
714 | static int kvm_vz_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, | ||
715 | unsigned long *gpa) | ||
716 | { | ||
717 | u32 gva32 = gva; | ||
718 | unsigned long segctl; | ||
719 | |||
720 | if ((long)gva == (s32)gva32) { | ||
721 | /* Handle canonical 32-bit virtual address */ | ||
722 | if (cpu_guest_has_segments) { | ||
723 | unsigned long mask, pa; | ||
724 | |||
725 | switch (gva32 >> 29) { | ||
726 | case 0: | ||
727 | case 1: /* CFG5 (1GB) */ | ||
728 | segctl = read_gc0_segctl2() >> 16; | ||
729 | mask = (unsigned long)0xfc0000000ull; | ||
730 | break; | ||
731 | case 2: | ||
732 | case 3: /* CFG4 (1GB) */ | ||
733 | segctl = read_gc0_segctl2(); | ||
734 | mask = (unsigned long)0xfc0000000ull; | ||
735 | break; | ||
736 | case 4: /* CFG3 (512MB) */ | ||
737 | segctl = read_gc0_segctl1() >> 16; | ||
738 | mask = (unsigned long)0xfe0000000ull; | ||
739 | break; | ||
740 | case 5: /* CFG2 (512MB) */ | ||
741 | segctl = read_gc0_segctl1(); | ||
742 | mask = (unsigned long)0xfe0000000ull; | ||
743 | break; | ||
744 | case 6: /* CFG1 (512MB) */ | ||
745 | segctl = read_gc0_segctl0() >> 16; | ||
746 | mask = (unsigned long)0xfe0000000ull; | ||
747 | break; | ||
748 | case 7: /* CFG0 (512MB) */ | ||
749 | segctl = read_gc0_segctl0(); | ||
750 | mask = (unsigned long)0xfe0000000ull; | ||
751 | break; | ||
752 | default: | ||
753 | /* | ||
754 | * GCC 4.9 isn't smart enough to figure out that | ||
755 | * segctl and mask are always initialised. | ||
756 | */ | ||
757 | unreachable(); | ||
758 | } | ||
759 | |||
760 | if (is_eva_am_mapped(vcpu, (segctl >> 4) & 0x7, | ||
761 | segctl & 0x0008)) | ||
762 | goto tlb_mapped; | ||
763 | |||
764 | /* Unmapped, find guest physical address */ | ||
765 | pa = (segctl << 20) & mask; | ||
766 | pa |= gva32 & ~mask; | ||
767 | *gpa = pa; | ||
768 | return 0; | ||
769 | } else if ((s32)gva32 < (s32)0xc0000000) { | ||
770 | /* legacy unmapped KSeg0 or KSeg1 */ | ||
771 | *gpa = gva32 & 0x1fffffff; | ||
772 | return 0; | ||
773 | } | ||
774 | #ifdef CONFIG_64BIT | ||
775 | } else if ((gva & 0xc000000000000000) == 0x8000000000000000) { | ||
776 | /* XKPHYS */ | ||
777 | if (cpu_guest_has_segments) { | ||
778 | /* | ||
779 | * Each of the 8 regions can be overridden by SegCtl2.XR | ||
780 | * to use SegCtl1.XAM. | ||
781 | */ | ||
782 | segctl = read_gc0_segctl2(); | ||
783 | if (segctl & (1ull << (56 + ((gva >> 59) & 0x7)))) { | ||
784 | segctl = read_gc0_segctl1(); | ||
785 | if (is_eva_am_mapped(vcpu, (segctl >> 59) & 0x7, | ||
786 | 0)) | ||
787 | goto tlb_mapped; | ||
788 | } | ||
789 | |||
790 | } | ||
791 | /* | ||
792 | * Traditionally fully unmapped. | ||
793 | * Bits 61:59 specify the CCA, which we can just mask off here. | ||
794 | * Bits 58:PABITS should be zero, but we shouldn't have got here | ||
795 | * if it wasn't. | ||
796 | */ | ||
797 | *gpa = gva & 0x07ffffffffffffff; | ||
798 | return 0; | ||
799 | #endif | ||
800 | } | ||
801 | |||
802 | tlb_mapped: | ||
803 | return kvm_vz_guest_tlb_lookup(vcpu, gva, gpa); | ||
804 | } | ||
805 | |||
806 | /** | ||
807 | * kvm_vz_badvaddr_to_gpa() - Convert GVA BadVAddr from root exception to GPA. | ||
808 | * @vcpu: KVM VCPU state. | ||
809 | * @badvaddr: Root BadVAddr. | ||
810 | * @gpa: Output guest physical address. | ||
811 | * | ||
812 | * VZ implementations are permitted to report guest virtual addresses (GVA) in | ||
813 | * BadVAddr on a root exception during guest execution, instead of the more | ||
814 | * convenient guest physical addresses (GPA). When we get a GVA, this function | ||
815 | * converts it to a GPA, taking into account guest segmentation and guest TLB | ||
816 | * state. | ||
817 | * | ||
818 | * Returns: 0 on success. | ||
819 | * -errno on failure. | ||
820 | */ | ||
821 | static int kvm_vz_badvaddr_to_gpa(struct kvm_vcpu *vcpu, unsigned long badvaddr, | ||
822 | unsigned long *gpa) | ||
823 | { | ||
824 | unsigned int gexccode = (vcpu->arch.host_cp0_guestctl0 & | ||
825 | MIPS_GCTL0_GEXC) >> MIPS_GCTL0_GEXC_SHIFT; | ||
826 | |||
827 | /* If BadVAddr is GPA, then all is well in the world */ | ||
828 | if (likely(gexccode == MIPS_GCTL0_GEXC_GPA)) { | ||
829 | *gpa = badvaddr; | ||
830 | return 0; | ||
831 | } | ||
832 | |||
833 | /* Otherwise we'd expect it to be GVA ... */ | ||
834 | if (WARN(gexccode != MIPS_GCTL0_GEXC_GVA, | ||
835 | "Unexpected gexccode %#x\n", gexccode)) | ||
836 | return -EINVAL; | ||
837 | |||
838 | /* ... and we need to perform the GVA->GPA translation in software */ | ||
839 | return kvm_vz_gva_to_gpa(vcpu, badvaddr, gpa); | ||
840 | } | ||
841 | |||
842 | static int kvm_trap_vz_no_handler(struct kvm_vcpu *vcpu) | ||
843 | { | ||
844 | u32 *opc = (u32 *) vcpu->arch.pc; | ||
845 | u32 cause = vcpu->arch.host_cp0_cause; | ||
846 | u32 exccode = (cause & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE; | ||
847 | unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; | ||
848 | u32 inst = 0; | ||
849 | |||
850 | /* | ||
851 | * Fetch the instruction. | ||
852 | */ | ||
853 | if (cause & CAUSEF_BD) | ||
854 | opc += 1; | ||
855 | kvm_get_badinstr(opc, vcpu, &inst); | ||
856 | |||
857 | kvm_err("Exception Code: %d not handled @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#x\n", | ||
858 | exccode, opc, inst, badvaddr, | ||
859 | read_gc0_status()); | ||
860 | kvm_arch_vcpu_dump_regs(vcpu); | ||
861 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
862 | return RESUME_HOST; | ||
863 | } | ||
864 | |||
865 | static unsigned long mips_process_maar(unsigned int op, unsigned long val) | ||
866 | { | ||
867 | /* Mask off unused bits */ | ||
868 | unsigned long mask = 0xfffff000 | MIPS_MAAR_S | MIPS_MAAR_VL; | ||
869 | |||
870 | if (read_gc0_pagegrain() & PG_ELPA) | ||
871 | mask |= 0x00ffffff00000000ull; | ||
872 | if (cpu_guest_has_mvh) | ||
873 | mask |= MIPS_MAAR_VH; | ||
874 | |||
875 | /* Set or clear VH */ | ||
876 | if (op == mtc_op) { | ||
877 | /* clear VH */ | ||
878 | val &= ~MIPS_MAAR_VH; | ||
879 | } else if (op == dmtc_op) { | ||
880 | /* set VH to match VL */ | ||
881 | val &= ~MIPS_MAAR_VH; | ||
882 | if (val & MIPS_MAAR_VL) | ||
883 | val |= MIPS_MAAR_VH; | ||
884 | } | ||
885 | |||
886 | return val & mask; | ||
887 | } | ||
888 | |||
889 | static void kvm_write_maari(struct kvm_vcpu *vcpu, unsigned long val) | ||
890 | { | ||
891 | struct mips_coproc *cop0 = vcpu->arch.cop0; | ||
892 | |||
893 | val &= MIPS_MAARI_INDEX; | ||
894 | if (val == MIPS_MAARI_INDEX) | ||
895 | kvm_write_sw_gc0_maari(cop0, ARRAY_SIZE(vcpu->arch.maar) - 1); | ||
896 | else if (val < ARRAY_SIZE(vcpu->arch.maar)) | ||
897 | kvm_write_sw_gc0_maari(cop0, val); | ||
898 | } | ||
899 | |||
900 | static enum emulation_result kvm_vz_gpsi_cop0(union mips_instruction inst, | ||
901 | u32 *opc, u32 cause, | ||
902 | struct kvm_run *run, | ||
903 | struct kvm_vcpu *vcpu) | ||
904 | { | ||
905 | struct mips_coproc *cop0 = vcpu->arch.cop0; | ||
906 | enum emulation_result er = EMULATE_DONE; | ||
907 | u32 rt, rd, sel; | ||
908 | unsigned long curr_pc; | ||
909 | unsigned long val; | ||
910 | |||
911 | /* | ||
912 | * Update PC and hold onto current PC in case there is | ||
913 | * an error and we want to rollback the PC | ||
914 | */ | ||
915 | curr_pc = vcpu->arch.pc; | ||
916 | er = update_pc(vcpu, cause); | ||
917 | if (er == EMULATE_FAIL) | ||
918 | return er; | ||
919 | |||
920 | if (inst.co_format.co) { | ||
921 | switch (inst.co_format.func) { | ||
922 | case wait_op: | ||
923 | er = kvm_mips_emul_wait(vcpu); | ||
924 | break; | ||
925 | default: | ||
926 | er = EMULATE_FAIL; | ||
927 | } | ||
928 | } else { | ||
929 | rt = inst.c0r_format.rt; | ||
930 | rd = inst.c0r_format.rd; | ||
931 | sel = inst.c0r_format.sel; | ||
932 | |||
933 | switch (inst.c0r_format.rs) { | ||
934 | case dmfc_op: | ||
935 | case mfc_op: | ||
936 | #ifdef CONFIG_KVM_MIPS_DEBUG_COP0_COUNTERS | ||
937 | cop0->stat[rd][sel]++; | ||
938 | #endif | ||
939 | if (rd == MIPS_CP0_COUNT && | ||
940 | sel == 0) { /* Count */ | ||
941 | val = kvm_mips_read_count(vcpu); | ||
942 | } else if (rd == MIPS_CP0_COMPARE && | ||
943 | sel == 0) { /* Compare */ | ||
944 | val = read_gc0_compare(); | ||
945 | } else if (rd == MIPS_CP0_LLADDR && | ||
946 | sel == 0) { /* LLAddr */ | ||
947 | if (cpu_guest_has_rw_llb) | ||
948 | val = read_gc0_lladdr() & | ||
949 | MIPS_LLADDR_LLB; | ||
950 | else | ||
951 | val = 0; | ||
952 | } else if (rd == MIPS_CP0_LLADDR && | ||
953 | sel == 1 && /* MAAR */ | ||
954 | cpu_guest_has_maar && | ||
955 | !cpu_guest_has_dyn_maar) { | ||
956 | /* MAARI must be in range */ | ||
957 | BUG_ON(kvm_read_sw_gc0_maari(cop0) >= | ||
958 | ARRAY_SIZE(vcpu->arch.maar)); | ||
959 | val = vcpu->arch.maar[ | ||
960 | kvm_read_sw_gc0_maari(cop0)]; | ||
961 | } else if ((rd == MIPS_CP0_PRID && | ||
962 | (sel == 0 || /* PRid */ | ||
963 | sel == 2 || /* CDMMBase */ | ||
964 | sel == 3)) || /* CMGCRBase */ | ||
965 | (rd == MIPS_CP0_STATUS && | ||
966 | (sel == 2 || /* SRSCtl */ | ||
967 | sel == 3)) || /* SRSMap */ | ||
968 | (rd == MIPS_CP0_CONFIG && | ||
969 | (sel == 7)) || /* Config7 */ | ||
970 | (rd == MIPS_CP0_LLADDR && | ||
971 | (sel == 2) && /* MAARI */ | ||
972 | cpu_guest_has_maar && | ||
973 | !cpu_guest_has_dyn_maar) || | ||
974 | (rd == MIPS_CP0_ERRCTL && | ||
975 | (sel == 0))) { /* ErrCtl */ | ||
976 | val = cop0->reg[rd][sel]; | ||
977 | } else { | ||
978 | val = 0; | ||
979 | er = EMULATE_FAIL; | ||
980 | } | ||
981 | |||
982 | if (er != EMULATE_FAIL) { | ||
983 | /* Sign extend */ | ||
984 | if (inst.c0r_format.rs == mfc_op) | ||
985 | val = (int)val; | ||
986 | vcpu->arch.gprs[rt] = val; | ||
987 | } | ||
988 | |||
989 | trace_kvm_hwr(vcpu, (inst.c0r_format.rs == mfc_op) ? | ||
990 | KVM_TRACE_MFC0 : KVM_TRACE_DMFC0, | ||
991 | KVM_TRACE_COP0(rd, sel), val); | ||
992 | break; | ||
993 | |||
994 | case dmtc_op: | ||
995 | case mtc_op: | ||
996 | #ifdef CONFIG_KVM_MIPS_DEBUG_COP0_COUNTERS | ||
997 | cop0->stat[rd][sel]++; | ||
998 | #endif | ||
999 | val = vcpu->arch.gprs[rt]; | ||
1000 | trace_kvm_hwr(vcpu, (inst.c0r_format.rs == mtc_op) ? | ||
1001 | KVM_TRACE_MTC0 : KVM_TRACE_DMTC0, | ||
1002 | KVM_TRACE_COP0(rd, sel), val); | ||
1003 | |||
1004 | if (rd == MIPS_CP0_COUNT && | ||
1005 | sel == 0) { /* Count */ | ||
1006 | kvm_vz_lose_htimer(vcpu); | ||
1007 | kvm_mips_write_count(vcpu, vcpu->arch.gprs[rt]); | ||
1008 | } else if (rd == MIPS_CP0_COMPARE && | ||
1009 | sel == 0) { /* Compare */ | ||
1010 | kvm_mips_write_compare(vcpu, | ||
1011 | vcpu->arch.gprs[rt], | ||
1012 | true); | ||
1013 | } else if (rd == MIPS_CP0_LLADDR && | ||
1014 | sel == 0) { /* LLAddr */ | ||
1015 | /* | ||
1016 | * P5600 generates GPSI on guest MTC0 LLAddr. | ||
1017 | * Only allow the guest to clear LLB. | ||
1018 | */ | ||
1019 | if (cpu_guest_has_rw_llb && | ||
1020 | !(val & MIPS_LLADDR_LLB)) | ||
1021 | write_gc0_lladdr(0); | ||
1022 | } else if (rd == MIPS_CP0_LLADDR && | ||
1023 | sel == 1 && /* MAAR */ | ||
1024 | cpu_guest_has_maar && | ||
1025 | !cpu_guest_has_dyn_maar) { | ||
1026 | val = mips_process_maar(inst.c0r_format.rs, | ||
1027 | val); | ||
1028 | |||
1029 | /* MAARI must be in range */ | ||
1030 | BUG_ON(kvm_read_sw_gc0_maari(cop0) >= | ||
1031 | ARRAY_SIZE(vcpu->arch.maar)); | ||
1032 | vcpu->arch.maar[kvm_read_sw_gc0_maari(cop0)] = | ||
1033 | val; | ||
1034 | } else if (rd == MIPS_CP0_LLADDR && | ||
1035 | (sel == 2) && /* MAARI */ | ||
1036 | cpu_guest_has_maar && | ||
1037 | !cpu_guest_has_dyn_maar) { | ||
1038 | kvm_write_maari(vcpu, val); | ||
1039 | } else if (rd == MIPS_CP0_ERRCTL && | ||
1040 | (sel == 0)) { /* ErrCtl */ | ||
1041 | /* ignore the written value */ | ||
1042 | } else { | ||
1043 | er = EMULATE_FAIL; | ||
1044 | } | ||
1045 | break; | ||
1046 | |||
1047 | default: | ||
1048 | er = EMULATE_FAIL; | ||
1049 | break; | ||
1050 | } | ||
1051 | } | ||
1052 | /* Rollback PC only if emulation was unsuccessful */ | ||
1053 | if (er == EMULATE_FAIL) { | ||
1054 | kvm_err("[%#lx]%s: unsupported cop0 instruction 0x%08x\n", | ||
1055 | curr_pc, __func__, inst.word); | ||
1056 | |||
1057 | vcpu->arch.pc = curr_pc; | ||
1058 | } | ||
1059 | |||
1060 | return er; | ||
1061 | } | ||
1062 | |||
1063 | static enum emulation_result kvm_vz_gpsi_cache(union mips_instruction inst, | ||
1064 | u32 *opc, u32 cause, | ||
1065 | struct kvm_run *run, | ||
1066 | struct kvm_vcpu *vcpu) | ||
1067 | { | ||
1068 | enum emulation_result er = EMULATE_DONE; | ||
1069 | u32 cache, op_inst, op, base; | ||
1070 | s16 offset; | ||
1071 | struct kvm_vcpu_arch *arch = &vcpu->arch; | ||
1072 | unsigned long va, curr_pc; | ||
1073 | |||
1074 | /* | ||
1075 | * Update PC and hold onto current PC in case there is | ||
1076 | * an error and we want to rollback the PC | ||
1077 | */ | ||
1078 | curr_pc = vcpu->arch.pc; | ||
1079 | er = update_pc(vcpu, cause); | ||
1080 | if (er == EMULATE_FAIL) | ||
1081 | return er; | ||
1082 | |||
1083 | base = inst.i_format.rs; | ||
1084 | op_inst = inst.i_format.rt; | ||
1085 | if (cpu_has_mips_r6) | ||
1086 | offset = inst.spec3_format.simmediate; | ||
1087 | else | ||
1088 | offset = inst.i_format.simmediate; | ||
1089 | cache = op_inst & CacheOp_Cache; | ||
1090 | op = op_inst & CacheOp_Op; | ||
1091 | |||
1092 | va = arch->gprs[base] + offset; | ||
1093 | |||
1094 | kvm_debug("CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", | ||
1095 | cache, op, base, arch->gprs[base], offset); | ||
1096 | |||
1097 | /* Secondary or tirtiary cache ops ignored */ | ||
1098 | if (cache != Cache_I && cache != Cache_D) | ||
1099 | return EMULATE_DONE; | ||
1100 | |||
1101 | switch (op_inst) { | ||
1102 | case Index_Invalidate_I: | ||
1103 | flush_icache_line_indexed(va); | ||
1104 | return EMULATE_DONE; | ||
1105 | case Index_Writeback_Inv_D: | ||
1106 | flush_dcache_line_indexed(va); | ||
1107 | return EMULATE_DONE; | ||
1108 | case Hit_Invalidate_I: | ||
1109 | case Hit_Invalidate_D: | ||
1110 | case Hit_Writeback_Inv_D: | ||
1111 | if (boot_cpu_type() == CPU_CAVIUM_OCTEON3) { | ||
1112 | /* We can just flush entire icache */ | ||
1113 | local_flush_icache_range(0, 0); | ||
1114 | return EMULATE_DONE; | ||
1115 | } | ||
1116 | |||
1117 | /* So far, other platforms support guest hit cache ops */ | ||
1118 | break; | ||
1119 | default: | ||
1120 | break; | ||
1121 | }; | ||
1122 | |||
1123 | kvm_err("@ %#lx/%#lx CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", | ||
1124 | curr_pc, vcpu->arch.gprs[31], cache, op, base, arch->gprs[base], | ||
1125 | offset); | ||
1126 | /* Rollback PC */ | ||
1127 | vcpu->arch.pc = curr_pc; | ||
1128 | |||
1129 | return EMULATE_FAIL; | ||
1130 | } | ||
1131 | |||
1132 | static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc, | ||
1133 | struct kvm_vcpu *vcpu) | ||
1134 | { | ||
1135 | enum emulation_result er = EMULATE_DONE; | ||
1136 | struct kvm_vcpu_arch *arch = &vcpu->arch; | ||
1137 | struct kvm_run *run = vcpu->run; | ||
1138 | union mips_instruction inst; | ||
1139 | int rd, rt, sel; | ||
1140 | int err; | ||
1141 | |||
1142 | /* | ||
1143 | * Fetch the instruction. | ||
1144 | */ | ||
1145 | if (cause & CAUSEF_BD) | ||
1146 | opc += 1; | ||
1147 | err = kvm_get_badinstr(opc, vcpu, &inst.word); | ||
1148 | if (err) | ||
1149 | return EMULATE_FAIL; | ||
1150 | |||
1151 | switch (inst.r_format.opcode) { | ||
1152 | case cop0_op: | ||
1153 | er = kvm_vz_gpsi_cop0(inst, opc, cause, run, vcpu); | ||
1154 | break; | ||
1155 | #ifndef CONFIG_CPU_MIPSR6 | ||
1156 | case cache_op: | ||
1157 | trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE); | ||
1158 | er = kvm_vz_gpsi_cache(inst, opc, cause, run, vcpu); | ||
1159 | break; | ||
1160 | #endif | ||
1161 | case spec3_op: | ||
1162 | switch (inst.spec3_format.func) { | ||
1163 | #ifdef CONFIG_CPU_MIPSR6 | ||
1164 | case cache6_op: | ||
1165 | trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE); | ||
1166 | er = kvm_vz_gpsi_cache(inst, opc, cause, run, vcpu); | ||
1167 | break; | ||
1168 | #endif | ||
1169 | case rdhwr_op: | ||
1170 | if (inst.r_format.rs || (inst.r_format.re >> 3)) | ||
1171 | goto unknown; | ||
1172 | |||
1173 | rd = inst.r_format.rd; | ||
1174 | rt = inst.r_format.rt; | ||
1175 | sel = inst.r_format.re & 0x7; | ||
1176 | |||
1177 | switch (rd) { | ||
1178 | case MIPS_HWR_CC: /* Read count register */ | ||
1179 | arch->gprs[rt] = | ||
1180 | (long)(int)kvm_mips_read_count(vcpu); | ||
1181 | break; | ||
1182 | default: | ||
1183 | trace_kvm_hwr(vcpu, KVM_TRACE_RDHWR, | ||
1184 | KVM_TRACE_HWR(rd, sel), 0); | ||
1185 | goto unknown; | ||
1186 | }; | ||
1187 | |||
1188 | trace_kvm_hwr(vcpu, KVM_TRACE_RDHWR, | ||
1189 | KVM_TRACE_HWR(rd, sel), arch->gprs[rt]); | ||
1190 | |||
1191 | er = update_pc(vcpu, cause); | ||
1192 | break; | ||
1193 | default: | ||
1194 | goto unknown; | ||
1195 | }; | ||
1196 | break; | ||
1197 | unknown: | ||
1198 | |||
1199 | default: | ||
1200 | kvm_err("GPSI exception not supported (%p/%#x)\n", | ||
1201 | opc, inst.word); | ||
1202 | kvm_arch_vcpu_dump_regs(vcpu); | ||
1203 | er = EMULATE_FAIL; | ||
1204 | break; | ||
1205 | } | ||
1206 | |||
1207 | return er; | ||
1208 | } | ||
1209 | |||
1210 | static enum emulation_result kvm_trap_vz_handle_gsfc(u32 cause, u32 *opc, | ||
1211 | struct kvm_vcpu *vcpu) | ||
1212 | { | ||
1213 | enum emulation_result er = EMULATE_DONE; | ||
1214 | struct kvm_vcpu_arch *arch = &vcpu->arch; | ||
1215 | union mips_instruction inst; | ||
1216 | int err; | ||
1217 | |||
1218 | /* | ||
1219 | * Fetch the instruction. | ||
1220 | */ | ||
1221 | if (cause & CAUSEF_BD) | ||
1222 | opc += 1; | ||
1223 | err = kvm_get_badinstr(opc, vcpu, &inst.word); | ||
1224 | if (err) | ||
1225 | return EMULATE_FAIL; | ||
1226 | |||
1227 | /* complete MTC0 on behalf of guest and advance EPC */ | ||
1228 | if (inst.c0r_format.opcode == cop0_op && | ||
1229 | inst.c0r_format.rs == mtc_op && | ||
1230 | inst.c0r_format.z == 0) { | ||
1231 | int rt = inst.c0r_format.rt; | ||
1232 | int rd = inst.c0r_format.rd; | ||
1233 | int sel = inst.c0r_format.sel; | ||
1234 | unsigned int val = arch->gprs[rt]; | ||
1235 | unsigned int old_val, change; | ||
1236 | |||
1237 | trace_kvm_hwr(vcpu, KVM_TRACE_MTC0, KVM_TRACE_COP0(rd, sel), | ||
1238 | val); | ||
1239 | |||
1240 | if ((rd == MIPS_CP0_STATUS) && (sel == 0)) { | ||
1241 | /* FR bit should read as zero if no FPU */ | ||
1242 | if (!kvm_mips_guest_has_fpu(&vcpu->arch)) | ||
1243 | val &= ~(ST0_CU1 | ST0_FR); | ||
1244 | |||
1245 | /* | ||
1246 | * Also don't allow FR to be set if host doesn't support | ||
1247 | * it. | ||
1248 | */ | ||
1249 | if (!(boot_cpu_data.fpu_id & MIPS_FPIR_F64)) | ||
1250 | val &= ~ST0_FR; | ||
1251 | |||
1252 | old_val = read_gc0_status(); | ||
1253 | change = val ^ old_val; | ||
1254 | |||
1255 | if (change & ST0_FR) { | ||
1256 | /* | ||
1257 | * FPU and Vector register state is made | ||
1258 | * UNPREDICTABLE by a change of FR, so don't | ||
1259 | * even bother saving it. | ||
1260 | */ | ||
1261 | kvm_drop_fpu(vcpu); | ||
1262 | } | ||
1263 | |||
1264 | /* | ||
1265 | * If MSA state is already live, it is undefined how it | ||
1266 | * interacts with FR=0 FPU state, and we don't want to | ||
1267 | * hit reserved instruction exceptions trying to save | ||
1268 | * the MSA state later when CU=1 && FR=1, so play it | ||
1269 | * safe and save it first. | ||
1270 | */ | ||
1271 | if (change & ST0_CU1 && !(val & ST0_FR) && | ||
1272 | vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) | ||
1273 | kvm_lose_fpu(vcpu); | ||
1274 | |||
1275 | write_gc0_status(val); | ||
1276 | } else if ((rd == MIPS_CP0_CAUSE) && (sel == 0)) { | ||
1277 | u32 old_cause = read_gc0_cause(); | ||
1278 | u32 change = old_cause ^ val; | ||
1279 | |||
1280 | /* DC bit enabling/disabling timer? */ | ||
1281 | if (change & CAUSEF_DC) { | ||
1282 | if (val & CAUSEF_DC) { | ||
1283 | kvm_vz_lose_htimer(vcpu); | ||
1284 | kvm_mips_count_disable_cause(vcpu); | ||
1285 | } else { | ||
1286 | kvm_mips_count_enable_cause(vcpu); | ||
1287 | } | ||
1288 | } | ||
1289 | |||
1290 | /* Only certain bits are RW to the guest */ | ||
1291 | change &= (CAUSEF_DC | CAUSEF_IV | CAUSEF_WP | | ||
1292 | CAUSEF_IP0 | CAUSEF_IP1); | ||
1293 | |||
1294 | /* WP can only be cleared */ | ||
1295 | change &= ~CAUSEF_WP | old_cause; | ||
1296 | |||
1297 | write_gc0_cause(old_cause ^ change); | ||
1298 | } else if ((rd == MIPS_CP0_STATUS) && (sel == 1)) { /* IntCtl */ | ||
1299 | write_gc0_intctl(val); | ||
1300 | } else if ((rd == MIPS_CP0_CONFIG) && (sel == 5)) { | ||
1301 | old_val = read_gc0_config5(); | ||
1302 | change = val ^ old_val; | ||
1303 | /* Handle changes in FPU/MSA modes */ | ||
1304 | preempt_disable(); | ||
1305 | |||
1306 | /* | ||
1307 | * Propagate FRE changes immediately if the FPU | ||
1308 | * context is already loaded. | ||
1309 | */ | ||
1310 | if (change & MIPS_CONF5_FRE && | ||
1311 | vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) | ||
1312 | change_c0_config5(MIPS_CONF5_FRE, val); | ||
1313 | |||
1314 | preempt_enable(); | ||
1315 | |||
1316 | val = old_val ^ | ||
1317 | (change & kvm_vz_config5_guest_wrmask(vcpu)); | ||
1318 | write_gc0_config5(val); | ||
1319 | } else { | ||
1320 | kvm_err("Handle GSFC, unsupported field change @ %p: %#x\n", | ||
1321 | opc, inst.word); | ||
1322 | er = EMULATE_FAIL; | ||
1323 | } | ||
1324 | |||
1325 | if (er != EMULATE_FAIL) | ||
1326 | er = update_pc(vcpu, cause); | ||
1327 | } else { | ||
1328 | kvm_err("Handle GSFC, unrecognized instruction @ %p: %#x\n", | ||
1329 | opc, inst.word); | ||
1330 | er = EMULATE_FAIL; | ||
1331 | } | ||
1332 | |||
1333 | return er; | ||
1334 | } | ||
1335 | |||
1336 | static enum emulation_result kvm_trap_vz_handle_ghfc(u32 cause, u32 *opc, | ||
1337 | struct kvm_vcpu *vcpu) | ||
1338 | { | ||
1339 | /* | ||
1340 | * Presumably this is due to MC (guest mode change), so lets trace some | ||
1341 | * relevant info. | ||
1342 | */ | ||
1343 | trace_kvm_guest_mode_change(vcpu); | ||
1344 | |||
1345 | return EMULATE_DONE; | ||
1346 | } | ||
1347 | |||
1348 | static enum emulation_result kvm_trap_vz_handle_hc(u32 cause, u32 *opc, | ||
1349 | struct kvm_vcpu *vcpu) | ||
1350 | { | ||
1351 | enum emulation_result er; | ||
1352 | union mips_instruction inst; | ||
1353 | unsigned long curr_pc; | ||
1354 | int err; | ||
1355 | |||
1356 | if (cause & CAUSEF_BD) | ||
1357 | opc += 1; | ||
1358 | err = kvm_get_badinstr(opc, vcpu, &inst.word); | ||
1359 | if (err) | ||
1360 | return EMULATE_FAIL; | ||
1361 | |||
1362 | /* | ||
1363 | * Update PC and hold onto current PC in case there is | ||
1364 | * an error and we want to rollback the PC | ||
1365 | */ | ||
1366 | curr_pc = vcpu->arch.pc; | ||
1367 | er = update_pc(vcpu, cause); | ||
1368 | if (er == EMULATE_FAIL) | ||
1369 | return er; | ||
1370 | |||
1371 | er = kvm_mips_emul_hypcall(vcpu, inst); | ||
1372 | if (er == EMULATE_FAIL) | ||
1373 | vcpu->arch.pc = curr_pc; | ||
1374 | |||
1375 | return er; | ||
1376 | } | ||
1377 | |||
1378 | static enum emulation_result kvm_trap_vz_no_handler_guest_exit(u32 gexccode, | ||
1379 | u32 cause, | ||
1380 | u32 *opc, | ||
1381 | struct kvm_vcpu *vcpu) | ||
1382 | { | ||
1383 | u32 inst; | ||
1384 | |||
1385 | /* | ||
1386 | * Fetch the instruction. | ||
1387 | */ | ||
1388 | if (cause & CAUSEF_BD) | ||
1389 | opc += 1; | ||
1390 | kvm_get_badinstr(opc, vcpu, &inst); | ||
1391 | |||
1392 | kvm_err("Guest Exception Code: %d not yet handled @ PC: %p, inst: 0x%08x Status: %#x\n", | ||
1393 | gexccode, opc, inst, read_gc0_status()); | ||
1394 | |||
1395 | return EMULATE_FAIL; | ||
1396 | } | ||
1397 | |||
1398 | static int kvm_trap_vz_handle_guest_exit(struct kvm_vcpu *vcpu) | ||
1399 | { | ||
1400 | u32 *opc = (u32 *) vcpu->arch.pc; | ||
1401 | u32 cause = vcpu->arch.host_cp0_cause; | ||
1402 | enum emulation_result er = EMULATE_DONE; | ||
1403 | u32 gexccode = (vcpu->arch.host_cp0_guestctl0 & | ||
1404 | MIPS_GCTL0_GEXC) >> MIPS_GCTL0_GEXC_SHIFT; | ||
1405 | int ret = RESUME_GUEST; | ||
1406 | |||
1407 | trace_kvm_exit(vcpu, KVM_TRACE_EXIT_GEXCCODE_BASE + gexccode); | ||
1408 | switch (gexccode) { | ||
1409 | case MIPS_GCTL0_GEXC_GPSI: | ||
1410 | ++vcpu->stat.vz_gpsi_exits; | ||
1411 | er = kvm_trap_vz_handle_gpsi(cause, opc, vcpu); | ||
1412 | break; | ||
1413 | case MIPS_GCTL0_GEXC_GSFC: | ||
1414 | ++vcpu->stat.vz_gsfc_exits; | ||
1415 | er = kvm_trap_vz_handle_gsfc(cause, opc, vcpu); | ||
1416 | break; | ||
1417 | case MIPS_GCTL0_GEXC_HC: | ||
1418 | ++vcpu->stat.vz_hc_exits; | ||
1419 | er = kvm_trap_vz_handle_hc(cause, opc, vcpu); | ||
1420 | break; | ||
1421 | case MIPS_GCTL0_GEXC_GRR: | ||
1422 | ++vcpu->stat.vz_grr_exits; | ||
1423 | er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc, | ||
1424 | vcpu); | ||
1425 | break; | ||
1426 | case MIPS_GCTL0_GEXC_GVA: | ||
1427 | ++vcpu->stat.vz_gva_exits; | ||
1428 | er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc, | ||
1429 | vcpu); | ||
1430 | break; | ||
1431 | case MIPS_GCTL0_GEXC_GHFC: | ||
1432 | ++vcpu->stat.vz_ghfc_exits; | ||
1433 | er = kvm_trap_vz_handle_ghfc(cause, opc, vcpu); | ||
1434 | break; | ||
1435 | case MIPS_GCTL0_GEXC_GPA: | ||
1436 | ++vcpu->stat.vz_gpa_exits; | ||
1437 | er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc, | ||
1438 | vcpu); | ||
1439 | break; | ||
1440 | default: | ||
1441 | ++vcpu->stat.vz_resvd_exits; | ||
1442 | er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc, | ||
1443 | vcpu); | ||
1444 | break; | ||
1445 | |||
1446 | } | ||
1447 | |||
1448 | if (er == EMULATE_DONE) { | ||
1449 | ret = RESUME_GUEST; | ||
1450 | } else if (er == EMULATE_HYPERCALL) { | ||
1451 | ret = kvm_mips_handle_hypcall(vcpu); | ||
1452 | } else { | ||
1453 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
1454 | ret = RESUME_HOST; | ||
1455 | } | ||
1456 | return ret; | ||
1457 | } | ||
1458 | |||
1459 | /** | ||
1460 | * kvm_trap_vz_handle_cop_unusuable() - Guest used unusable coprocessor. | ||
1461 | * @vcpu: Virtual CPU context. | ||
1462 | * | ||
1463 | * Handle when the guest attempts to use a coprocessor which hasn't been allowed | ||
1464 | * by the root context. | ||
1465 | */ | ||
1466 | static int kvm_trap_vz_handle_cop_unusable(struct kvm_vcpu *vcpu) | ||
1467 | { | ||
1468 | struct kvm_run *run = vcpu->run; | ||
1469 | u32 cause = vcpu->arch.host_cp0_cause; | ||
1470 | enum emulation_result er = EMULATE_FAIL; | ||
1471 | int ret = RESUME_GUEST; | ||
1472 | |||
1473 | if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1) { | ||
1474 | /* | ||
1475 | * If guest FPU not present, the FPU operation should have been | ||
1476 | * treated as a reserved instruction! | ||
1477 | * If FPU already in use, we shouldn't get this at all. | ||
1478 | */ | ||
1479 | if (WARN_ON(!kvm_mips_guest_has_fpu(&vcpu->arch) || | ||
1480 | vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU)) { | ||
1481 | preempt_enable(); | ||
1482 | return EMULATE_FAIL; | ||
1483 | } | ||
1484 | |||
1485 | kvm_own_fpu(vcpu); | ||
1486 | er = EMULATE_DONE; | ||
1487 | } | ||
1488 | /* other coprocessors not handled */ | ||
1489 | |||
1490 | switch (er) { | ||
1491 | case EMULATE_DONE: | ||
1492 | ret = RESUME_GUEST; | ||
1493 | break; | ||
1494 | |||
1495 | case EMULATE_FAIL: | ||
1496 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
1497 | ret = RESUME_HOST; | ||
1498 | break; | ||
1499 | |||
1500 | default: | ||
1501 | BUG(); | ||
1502 | } | ||
1503 | return ret; | ||
1504 | } | ||
1505 | |||
1506 | /** | ||
1507 | * kvm_trap_vz_handle_msa_disabled() - Guest used MSA while disabled in root. | ||
1508 | * @vcpu: Virtual CPU context. | ||
1509 | * | ||
1510 | * Handle when the guest attempts to use MSA when it is disabled in the root | ||
1511 | * context. | ||
1512 | */ | ||
1513 | static int kvm_trap_vz_handle_msa_disabled(struct kvm_vcpu *vcpu) | ||
1514 | { | ||
1515 | struct kvm_run *run = vcpu->run; | ||
1516 | |||
1517 | /* | ||
1518 | * If MSA not present or not exposed to guest or FR=0, the MSA operation | ||
1519 | * should have been treated as a reserved instruction! | ||
1520 | * Same if CU1=1, FR=0. | ||
1521 | * If MSA already in use, we shouldn't get this at all. | ||
1522 | */ | ||
1523 | if (!kvm_mips_guest_has_msa(&vcpu->arch) || | ||
1524 | (read_gc0_status() & (ST0_CU1 | ST0_FR)) == ST0_CU1 || | ||
1525 | !(read_gc0_config5() & MIPS_CONF5_MSAEN) || | ||
1526 | vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) { | ||
1527 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
1528 | return RESUME_HOST; | ||
1529 | } | ||
1530 | |||
1531 | kvm_own_msa(vcpu); | ||
1532 | |||
1533 | return RESUME_GUEST; | ||
1534 | } | ||
1535 | |||
1536 | static int kvm_trap_vz_handle_tlb_ld_miss(struct kvm_vcpu *vcpu) | ||
1537 | { | ||
1538 | struct kvm_run *run = vcpu->run; | ||
1539 | u32 *opc = (u32 *) vcpu->arch.pc; | ||
1540 | u32 cause = vcpu->arch.host_cp0_cause; | ||
1541 | ulong badvaddr = vcpu->arch.host_cp0_badvaddr; | ||
1542 | union mips_instruction inst; | ||
1543 | enum emulation_result er = EMULATE_DONE; | ||
1544 | int err, ret = RESUME_GUEST; | ||
1545 | |||
1546 | if (kvm_mips_handle_vz_root_tlb_fault(badvaddr, vcpu, false)) { | ||
1547 | /* A code fetch fault doesn't count as an MMIO */ | ||
1548 | if (kvm_is_ifetch_fault(&vcpu->arch)) { | ||
1549 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
1550 | return RESUME_HOST; | ||
1551 | } | ||
1552 | |||
1553 | /* Fetch the instruction */ | ||
1554 | if (cause & CAUSEF_BD) | ||
1555 | opc += 1; | ||
1556 | err = kvm_get_badinstr(opc, vcpu, &inst.word); | ||
1557 | if (err) { | ||
1558 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
1559 | return RESUME_HOST; | ||
1560 | } | ||
1561 | |||
1562 | /* Treat as MMIO */ | ||
1563 | er = kvm_mips_emulate_load(inst, cause, run, vcpu); | ||
1564 | if (er == EMULATE_FAIL) { | ||
1565 | kvm_err("Guest Emulate Load from MMIO space failed: PC: %p, BadVaddr: %#lx\n", | ||
1566 | opc, badvaddr); | ||
1567 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
1568 | } | ||
1569 | } | ||
1570 | |||
1571 | if (er == EMULATE_DONE) { | ||
1572 | ret = RESUME_GUEST; | ||
1573 | } else if (er == EMULATE_DO_MMIO) { | ||
1574 | run->exit_reason = KVM_EXIT_MMIO; | ||
1575 | ret = RESUME_HOST; | ||
1576 | } else { | ||
1577 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
1578 | ret = RESUME_HOST; | ||
1579 | } | ||
1580 | return ret; | ||
1581 | } | ||
1582 | |||
1583 | static int kvm_trap_vz_handle_tlb_st_miss(struct kvm_vcpu *vcpu) | ||
1584 | { | ||
1585 | struct kvm_run *run = vcpu->run; | ||
1586 | u32 *opc = (u32 *) vcpu->arch.pc; | ||
1587 | u32 cause = vcpu->arch.host_cp0_cause; | ||
1588 | ulong badvaddr = vcpu->arch.host_cp0_badvaddr; | ||
1589 | union mips_instruction inst; | ||
1590 | enum emulation_result er = EMULATE_DONE; | ||
1591 | int err; | ||
1592 | int ret = RESUME_GUEST; | ||
1593 | |||
1594 | /* Just try the access again if we couldn't do the translation */ | ||
1595 | if (kvm_vz_badvaddr_to_gpa(vcpu, badvaddr, &badvaddr)) | ||
1596 | return RESUME_GUEST; | ||
1597 | vcpu->arch.host_cp0_badvaddr = badvaddr; | ||
1598 | |||
1599 | if (kvm_mips_handle_vz_root_tlb_fault(badvaddr, vcpu, true)) { | ||
1600 | /* Fetch the instruction */ | ||
1601 | if (cause & CAUSEF_BD) | ||
1602 | opc += 1; | ||
1603 | err = kvm_get_badinstr(opc, vcpu, &inst.word); | ||
1604 | if (err) { | ||
1605 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
1606 | return RESUME_HOST; | ||
1607 | } | ||
1608 | |||
1609 | /* Treat as MMIO */ | ||
1610 | er = kvm_mips_emulate_store(inst, cause, run, vcpu); | ||
1611 | if (er == EMULATE_FAIL) { | ||
1612 | kvm_err("Guest Emulate Store to MMIO space failed: PC: %p, BadVaddr: %#lx\n", | ||
1613 | opc, badvaddr); | ||
1614 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
1615 | } | ||
1616 | } | ||
1617 | |||
1618 | if (er == EMULATE_DONE) { | ||
1619 | ret = RESUME_GUEST; | ||
1620 | } else if (er == EMULATE_DO_MMIO) { | ||
1621 | run->exit_reason = KVM_EXIT_MMIO; | ||
1622 | ret = RESUME_HOST; | ||
1623 | } else { | ||
1624 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
1625 | ret = RESUME_HOST; | ||
1626 | } | ||
1627 | return ret; | ||
1628 | } | ||
1629 | |||
1630 | static u64 kvm_vz_get_one_regs[] = { | ||
1631 | KVM_REG_MIPS_CP0_INDEX, | ||
1632 | KVM_REG_MIPS_CP0_ENTRYLO0, | ||
1633 | KVM_REG_MIPS_CP0_ENTRYLO1, | ||
1634 | KVM_REG_MIPS_CP0_CONTEXT, | ||
1635 | KVM_REG_MIPS_CP0_PAGEMASK, | ||
1636 | KVM_REG_MIPS_CP0_PAGEGRAIN, | ||
1637 | KVM_REG_MIPS_CP0_WIRED, | ||
1638 | KVM_REG_MIPS_CP0_HWRENA, | ||
1639 | KVM_REG_MIPS_CP0_BADVADDR, | ||
1640 | KVM_REG_MIPS_CP0_COUNT, | ||
1641 | KVM_REG_MIPS_CP0_ENTRYHI, | ||
1642 | KVM_REG_MIPS_CP0_COMPARE, | ||
1643 | KVM_REG_MIPS_CP0_STATUS, | ||
1644 | KVM_REG_MIPS_CP0_INTCTL, | ||
1645 | KVM_REG_MIPS_CP0_CAUSE, | ||
1646 | KVM_REG_MIPS_CP0_EPC, | ||
1647 | KVM_REG_MIPS_CP0_PRID, | ||
1648 | KVM_REG_MIPS_CP0_EBASE, | ||
1649 | KVM_REG_MIPS_CP0_CONFIG, | ||
1650 | KVM_REG_MIPS_CP0_CONFIG1, | ||
1651 | KVM_REG_MIPS_CP0_CONFIG2, | ||
1652 | KVM_REG_MIPS_CP0_CONFIG3, | ||
1653 | KVM_REG_MIPS_CP0_CONFIG4, | ||
1654 | KVM_REG_MIPS_CP0_CONFIG5, | ||
1655 | #ifdef CONFIG_64BIT | ||
1656 | KVM_REG_MIPS_CP0_XCONTEXT, | ||
1657 | #endif | ||
1658 | KVM_REG_MIPS_CP0_ERROREPC, | ||
1659 | |||
1660 | KVM_REG_MIPS_COUNT_CTL, | ||
1661 | KVM_REG_MIPS_COUNT_RESUME, | ||
1662 | KVM_REG_MIPS_COUNT_HZ, | ||
1663 | }; | ||
1664 | |||
1665 | static u64 kvm_vz_get_one_regs_contextconfig[] = { | ||
1666 | KVM_REG_MIPS_CP0_CONTEXTCONFIG, | ||
1667 | #ifdef CONFIG_64BIT | ||
1668 | KVM_REG_MIPS_CP0_XCONTEXTCONFIG, | ||
1669 | #endif | ||
1670 | }; | ||
1671 | |||
1672 | static u64 kvm_vz_get_one_regs_segments[] = { | ||
1673 | KVM_REG_MIPS_CP0_SEGCTL0, | ||
1674 | KVM_REG_MIPS_CP0_SEGCTL1, | ||
1675 | KVM_REG_MIPS_CP0_SEGCTL2, | ||
1676 | }; | ||
1677 | |||
1678 | static u64 kvm_vz_get_one_regs_htw[] = { | ||
1679 | KVM_REG_MIPS_CP0_PWBASE, | ||
1680 | KVM_REG_MIPS_CP0_PWFIELD, | ||
1681 | KVM_REG_MIPS_CP0_PWSIZE, | ||
1682 | KVM_REG_MIPS_CP0_PWCTL, | ||
1683 | }; | ||
1684 | |||
1685 | static u64 kvm_vz_get_one_regs_kscratch[] = { | ||
1686 | KVM_REG_MIPS_CP0_KSCRATCH1, | ||
1687 | KVM_REG_MIPS_CP0_KSCRATCH2, | ||
1688 | KVM_REG_MIPS_CP0_KSCRATCH3, | ||
1689 | KVM_REG_MIPS_CP0_KSCRATCH4, | ||
1690 | KVM_REG_MIPS_CP0_KSCRATCH5, | ||
1691 | KVM_REG_MIPS_CP0_KSCRATCH6, | ||
1692 | }; | ||
1693 | |||
1694 | static unsigned long kvm_vz_num_regs(struct kvm_vcpu *vcpu) | ||
1695 | { | ||
1696 | unsigned long ret; | ||
1697 | |||
1698 | ret = ARRAY_SIZE(kvm_vz_get_one_regs); | ||
1699 | if (cpu_guest_has_userlocal) | ||
1700 | ++ret; | ||
1701 | if (cpu_guest_has_badinstr) | ||
1702 | ++ret; | ||
1703 | if (cpu_guest_has_badinstrp) | ||
1704 | ++ret; | ||
1705 | if (cpu_guest_has_contextconfig) | ||
1706 | ret += ARRAY_SIZE(kvm_vz_get_one_regs_contextconfig); | ||
1707 | if (cpu_guest_has_segments) | ||
1708 | ret += ARRAY_SIZE(kvm_vz_get_one_regs_segments); | ||
1709 | if (cpu_guest_has_htw) | ||
1710 | ret += ARRAY_SIZE(kvm_vz_get_one_regs_htw); | ||
1711 | if (cpu_guest_has_maar && !cpu_guest_has_dyn_maar) | ||
1712 | ret += 1 + ARRAY_SIZE(vcpu->arch.maar); | ||
1713 | ret += __arch_hweight8(cpu_data[0].guest.kscratch_mask); | ||
1714 | |||
1715 | return ret; | ||
1716 | } | ||
1717 | |||
1718 | static int kvm_vz_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices) | ||
1719 | { | ||
1720 | u64 index; | ||
1721 | unsigned int i; | ||
1722 | |||
1723 | if (copy_to_user(indices, kvm_vz_get_one_regs, | ||
1724 | sizeof(kvm_vz_get_one_regs))) | ||
1725 | return -EFAULT; | ||
1726 | indices += ARRAY_SIZE(kvm_vz_get_one_regs); | ||
1727 | |||
1728 | if (cpu_guest_has_userlocal) { | ||
1729 | index = KVM_REG_MIPS_CP0_USERLOCAL; | ||
1730 | if (copy_to_user(indices, &index, sizeof(index))) | ||
1731 | return -EFAULT; | ||
1732 | ++indices; | ||
1733 | } | ||
1734 | if (cpu_guest_has_badinstr) { | ||
1735 | index = KVM_REG_MIPS_CP0_BADINSTR; | ||
1736 | if (copy_to_user(indices, &index, sizeof(index))) | ||
1737 | return -EFAULT; | ||
1738 | ++indices; | ||
1739 | } | ||
1740 | if (cpu_guest_has_badinstrp) { | ||
1741 | index = KVM_REG_MIPS_CP0_BADINSTRP; | ||
1742 | if (copy_to_user(indices, &index, sizeof(index))) | ||
1743 | return -EFAULT; | ||
1744 | ++indices; | ||
1745 | } | ||
1746 | if (cpu_guest_has_contextconfig) { | ||
1747 | if (copy_to_user(indices, kvm_vz_get_one_regs_contextconfig, | ||
1748 | sizeof(kvm_vz_get_one_regs_contextconfig))) | ||
1749 | return -EFAULT; | ||
1750 | indices += ARRAY_SIZE(kvm_vz_get_one_regs_contextconfig); | ||
1751 | } | ||
1752 | if (cpu_guest_has_segments) { | ||
1753 | if (copy_to_user(indices, kvm_vz_get_one_regs_segments, | ||
1754 | sizeof(kvm_vz_get_one_regs_segments))) | ||
1755 | return -EFAULT; | ||
1756 | indices += ARRAY_SIZE(kvm_vz_get_one_regs_segments); | ||
1757 | } | ||
1758 | if (cpu_guest_has_htw) { | ||
1759 | if (copy_to_user(indices, kvm_vz_get_one_regs_htw, | ||
1760 | sizeof(kvm_vz_get_one_regs_htw))) | ||
1761 | return -EFAULT; | ||
1762 | indices += ARRAY_SIZE(kvm_vz_get_one_regs_htw); | ||
1763 | } | ||
1764 | if (cpu_guest_has_maar && !cpu_guest_has_dyn_maar) { | ||
1765 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.maar); ++i) { | ||
1766 | index = KVM_REG_MIPS_CP0_MAAR(i); | ||
1767 | if (copy_to_user(indices, &index, sizeof(index))) | ||
1768 | return -EFAULT; | ||
1769 | ++indices; | ||
1770 | } | ||
1771 | |||
1772 | index = KVM_REG_MIPS_CP0_MAARI; | ||
1773 | if (copy_to_user(indices, &index, sizeof(index))) | ||
1774 | return -EFAULT; | ||
1775 | ++indices; | ||
1776 | } | ||
1777 | for (i = 0; i < 6; ++i) { | ||
1778 | if (!cpu_guest_has_kscr(i + 2)) | ||
1779 | continue; | ||
1780 | |||
1781 | if (copy_to_user(indices, &kvm_vz_get_one_regs_kscratch[i], | ||
1782 | sizeof(kvm_vz_get_one_regs_kscratch[i]))) | ||
1783 | return -EFAULT; | ||
1784 | ++indices; | ||
1785 | } | ||
1786 | |||
1787 | return 0; | ||
1788 | } | ||
1789 | |||
1790 | static inline s64 entrylo_kvm_to_user(unsigned long v) | ||
1791 | { | ||
1792 | s64 mask, ret = v; | ||
1793 | |||
1794 | if (BITS_PER_LONG == 32) { | ||
1795 | /* | ||
1796 | * KVM API exposes 64-bit version of the register, so move the | ||
1797 | * RI/XI bits up into place. | ||
1798 | */ | ||
1799 | mask = MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI; | ||
1800 | ret &= ~mask; | ||
1801 | ret |= ((s64)v & mask) << 32; | ||
1802 | } | ||
1803 | return ret; | ||
1804 | } | ||
1805 | |||
1806 | static inline unsigned long entrylo_user_to_kvm(s64 v) | ||
1807 | { | ||
1808 | unsigned long mask, ret = v; | ||
1809 | |||
1810 | if (BITS_PER_LONG == 32) { | ||
1811 | /* | ||
1812 | * KVM API exposes 64-bit versiono of the register, so move the | ||
1813 | * RI/XI bits down into place. | ||
1814 | */ | ||
1815 | mask = MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI; | ||
1816 | ret &= ~mask; | ||
1817 | ret |= (v >> 32) & mask; | ||
1818 | } | ||
1819 | return ret; | ||
1820 | } | ||
1821 | |||
1822 | static int kvm_vz_get_one_reg(struct kvm_vcpu *vcpu, | ||
1823 | const struct kvm_one_reg *reg, | ||
1824 | s64 *v) | ||
1825 | { | ||
1826 | struct mips_coproc *cop0 = vcpu->arch.cop0; | ||
1827 | unsigned int idx; | ||
1828 | |||
1829 | switch (reg->id) { | ||
1830 | case KVM_REG_MIPS_CP0_INDEX: | ||
1831 | *v = (long)read_gc0_index(); | ||
1832 | break; | ||
1833 | case KVM_REG_MIPS_CP0_ENTRYLO0: | ||
1834 | *v = entrylo_kvm_to_user(read_gc0_entrylo0()); | ||
1835 | break; | ||
1836 | case KVM_REG_MIPS_CP0_ENTRYLO1: | ||
1837 | *v = entrylo_kvm_to_user(read_gc0_entrylo1()); | ||
1838 | break; | ||
1839 | case KVM_REG_MIPS_CP0_CONTEXT: | ||
1840 | *v = (long)read_gc0_context(); | ||
1841 | break; | ||
1842 | case KVM_REG_MIPS_CP0_CONTEXTCONFIG: | ||
1843 | if (!cpu_guest_has_contextconfig) | ||
1844 | return -EINVAL; | ||
1845 | *v = read_gc0_contextconfig(); | ||
1846 | break; | ||
1847 | case KVM_REG_MIPS_CP0_USERLOCAL: | ||
1848 | if (!cpu_guest_has_userlocal) | ||
1849 | return -EINVAL; | ||
1850 | *v = read_gc0_userlocal(); | ||
1851 | break; | ||
1852 | #ifdef CONFIG_64BIT | ||
1853 | case KVM_REG_MIPS_CP0_XCONTEXTCONFIG: | ||
1854 | if (!cpu_guest_has_contextconfig) | ||
1855 | return -EINVAL; | ||
1856 | *v = read_gc0_xcontextconfig(); | ||
1857 | break; | ||
1858 | #endif | ||
1859 | case KVM_REG_MIPS_CP0_PAGEMASK: | ||
1860 | *v = (long)read_gc0_pagemask(); | ||
1861 | break; | ||
1862 | case KVM_REG_MIPS_CP0_PAGEGRAIN: | ||
1863 | *v = (long)read_gc0_pagegrain(); | ||
1864 | break; | ||
1865 | case KVM_REG_MIPS_CP0_SEGCTL0: | ||
1866 | if (!cpu_guest_has_segments) | ||
1867 | return -EINVAL; | ||
1868 | *v = read_gc0_segctl0(); | ||
1869 | break; | ||
1870 | case KVM_REG_MIPS_CP0_SEGCTL1: | ||
1871 | if (!cpu_guest_has_segments) | ||
1872 | return -EINVAL; | ||
1873 | *v = read_gc0_segctl1(); | ||
1874 | break; | ||
1875 | case KVM_REG_MIPS_CP0_SEGCTL2: | ||
1876 | if (!cpu_guest_has_segments) | ||
1877 | return -EINVAL; | ||
1878 | *v = read_gc0_segctl2(); | ||
1879 | break; | ||
1880 | case KVM_REG_MIPS_CP0_PWBASE: | ||
1881 | if (!cpu_guest_has_htw) | ||
1882 | return -EINVAL; | ||
1883 | *v = read_gc0_pwbase(); | ||
1884 | break; | ||
1885 | case KVM_REG_MIPS_CP0_PWFIELD: | ||
1886 | if (!cpu_guest_has_htw) | ||
1887 | return -EINVAL; | ||
1888 | *v = read_gc0_pwfield(); | ||
1889 | break; | ||
1890 | case KVM_REG_MIPS_CP0_PWSIZE: | ||
1891 | if (!cpu_guest_has_htw) | ||
1892 | return -EINVAL; | ||
1893 | *v = read_gc0_pwsize(); | ||
1894 | break; | ||
1895 | case KVM_REG_MIPS_CP0_WIRED: | ||
1896 | *v = (long)read_gc0_wired(); | ||
1897 | break; | ||
1898 | case KVM_REG_MIPS_CP0_PWCTL: | ||
1899 | if (!cpu_guest_has_htw) | ||
1900 | return -EINVAL; | ||
1901 | *v = read_gc0_pwctl(); | ||
1902 | break; | ||
1903 | case KVM_REG_MIPS_CP0_HWRENA: | ||
1904 | *v = (long)read_gc0_hwrena(); | ||
1905 | break; | ||
1906 | case KVM_REG_MIPS_CP0_BADVADDR: | ||
1907 | *v = (long)read_gc0_badvaddr(); | ||
1908 | break; | ||
1909 | case KVM_REG_MIPS_CP0_BADINSTR: | ||
1910 | if (!cpu_guest_has_badinstr) | ||
1911 | return -EINVAL; | ||
1912 | *v = read_gc0_badinstr(); | ||
1913 | break; | ||
1914 | case KVM_REG_MIPS_CP0_BADINSTRP: | ||
1915 | if (!cpu_guest_has_badinstrp) | ||
1916 | return -EINVAL; | ||
1917 | *v = read_gc0_badinstrp(); | ||
1918 | break; | ||
1919 | case KVM_REG_MIPS_CP0_COUNT: | ||
1920 | *v = kvm_mips_read_count(vcpu); | ||
1921 | break; | ||
1922 | case KVM_REG_MIPS_CP0_ENTRYHI: | ||
1923 | *v = (long)read_gc0_entryhi(); | ||
1924 | break; | ||
1925 | case KVM_REG_MIPS_CP0_COMPARE: | ||
1926 | *v = (long)read_gc0_compare(); | ||
1927 | break; | ||
1928 | case KVM_REG_MIPS_CP0_STATUS: | ||
1929 | *v = (long)read_gc0_status(); | ||
1930 | break; | ||
1931 | case KVM_REG_MIPS_CP0_INTCTL: | ||
1932 | *v = read_gc0_intctl(); | ||
1933 | break; | ||
1934 | case KVM_REG_MIPS_CP0_CAUSE: | ||
1935 | *v = (long)read_gc0_cause(); | ||
1936 | break; | ||
1937 | case KVM_REG_MIPS_CP0_EPC: | ||
1938 | *v = (long)read_gc0_epc(); | ||
1939 | break; | ||
1940 | case KVM_REG_MIPS_CP0_PRID: | ||
1941 | switch (boot_cpu_type()) { | ||
1942 | case CPU_CAVIUM_OCTEON3: | ||
1943 | /* Octeon III has a read-only guest.PRid */ | ||
1944 | *v = read_gc0_prid(); | ||
1945 | break; | ||
1946 | default: | ||
1947 | *v = (long)kvm_read_c0_guest_prid(cop0); | ||
1948 | break; | ||
1949 | }; | ||
1950 | break; | ||
1951 | case KVM_REG_MIPS_CP0_EBASE: | ||
1952 | *v = kvm_vz_read_gc0_ebase(); | ||
1953 | break; | ||
1954 | case KVM_REG_MIPS_CP0_CONFIG: | ||
1955 | *v = read_gc0_config(); | ||
1956 | break; | ||
1957 | case KVM_REG_MIPS_CP0_CONFIG1: | ||
1958 | if (!cpu_guest_has_conf1) | ||
1959 | return -EINVAL; | ||
1960 | *v = read_gc0_config1(); | ||
1961 | break; | ||
1962 | case KVM_REG_MIPS_CP0_CONFIG2: | ||
1963 | if (!cpu_guest_has_conf2) | ||
1964 | return -EINVAL; | ||
1965 | *v = read_gc0_config2(); | ||
1966 | break; | ||
1967 | case KVM_REG_MIPS_CP0_CONFIG3: | ||
1968 | if (!cpu_guest_has_conf3) | ||
1969 | return -EINVAL; | ||
1970 | *v = read_gc0_config3(); | ||
1971 | break; | ||
1972 | case KVM_REG_MIPS_CP0_CONFIG4: | ||
1973 | if (!cpu_guest_has_conf4) | ||
1974 | return -EINVAL; | ||
1975 | *v = read_gc0_config4(); | ||
1976 | break; | ||
1977 | case KVM_REG_MIPS_CP0_CONFIG5: | ||
1978 | if (!cpu_guest_has_conf5) | ||
1979 | return -EINVAL; | ||
1980 | *v = read_gc0_config5(); | ||
1981 | break; | ||
1982 | case KVM_REG_MIPS_CP0_MAAR(0) ... KVM_REG_MIPS_CP0_MAAR(0x3f): | ||
1983 | if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar) | ||
1984 | return -EINVAL; | ||
1985 | idx = reg->id - KVM_REG_MIPS_CP0_MAAR(0); | ||
1986 | if (idx >= ARRAY_SIZE(vcpu->arch.maar)) | ||
1987 | return -EINVAL; | ||
1988 | *v = vcpu->arch.maar[idx]; | ||
1989 | break; | ||
1990 | case KVM_REG_MIPS_CP0_MAARI: | ||
1991 | if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar) | ||
1992 | return -EINVAL; | ||
1993 | *v = kvm_read_sw_gc0_maari(vcpu->arch.cop0); | ||
1994 | break; | ||
1995 | #ifdef CONFIG_64BIT | ||
1996 | case KVM_REG_MIPS_CP0_XCONTEXT: | ||
1997 | *v = read_gc0_xcontext(); | ||
1998 | break; | ||
1999 | #endif | ||
2000 | case KVM_REG_MIPS_CP0_ERROREPC: | ||
2001 | *v = (long)read_gc0_errorepc(); | ||
2002 | break; | ||
2003 | case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6: | ||
2004 | idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2; | ||
2005 | if (!cpu_guest_has_kscr(idx)) | ||
2006 | return -EINVAL; | ||
2007 | switch (idx) { | ||
2008 | case 2: | ||
2009 | *v = (long)read_gc0_kscratch1(); | ||
2010 | break; | ||
2011 | case 3: | ||
2012 | *v = (long)read_gc0_kscratch2(); | ||
2013 | break; | ||
2014 | case 4: | ||
2015 | *v = (long)read_gc0_kscratch3(); | ||
2016 | break; | ||
2017 | case 5: | ||
2018 | *v = (long)read_gc0_kscratch4(); | ||
2019 | break; | ||
2020 | case 6: | ||
2021 | *v = (long)read_gc0_kscratch5(); | ||
2022 | break; | ||
2023 | case 7: | ||
2024 | *v = (long)read_gc0_kscratch6(); | ||
2025 | break; | ||
2026 | } | ||
2027 | break; | ||
2028 | case KVM_REG_MIPS_COUNT_CTL: | ||
2029 | *v = vcpu->arch.count_ctl; | ||
2030 | break; | ||
2031 | case KVM_REG_MIPS_COUNT_RESUME: | ||
2032 | *v = ktime_to_ns(vcpu->arch.count_resume); | ||
2033 | break; | ||
2034 | case KVM_REG_MIPS_COUNT_HZ: | ||
2035 | *v = vcpu->arch.count_hz; | ||
2036 | break; | ||
2037 | default: | ||
2038 | return -EINVAL; | ||
2039 | } | ||
2040 | return 0; | ||
2041 | } | ||
2042 | |||
2043 | static int kvm_vz_set_one_reg(struct kvm_vcpu *vcpu, | ||
2044 | const struct kvm_one_reg *reg, | ||
2045 | s64 v) | ||
2046 | { | ||
2047 | struct mips_coproc *cop0 = vcpu->arch.cop0; | ||
2048 | unsigned int idx; | ||
2049 | int ret = 0; | ||
2050 | unsigned int cur, change; | ||
2051 | |||
2052 | switch (reg->id) { | ||
2053 | case KVM_REG_MIPS_CP0_INDEX: | ||
2054 | write_gc0_index(v); | ||
2055 | break; | ||
2056 | case KVM_REG_MIPS_CP0_ENTRYLO0: | ||
2057 | write_gc0_entrylo0(entrylo_user_to_kvm(v)); | ||
2058 | break; | ||
2059 | case KVM_REG_MIPS_CP0_ENTRYLO1: | ||
2060 | write_gc0_entrylo1(entrylo_user_to_kvm(v)); | ||
2061 | break; | ||
2062 | case KVM_REG_MIPS_CP0_CONTEXT: | ||
2063 | write_gc0_context(v); | ||
2064 | break; | ||
2065 | case KVM_REG_MIPS_CP0_CONTEXTCONFIG: | ||
2066 | if (!cpu_guest_has_contextconfig) | ||
2067 | return -EINVAL; | ||
2068 | write_gc0_contextconfig(v); | ||
2069 | break; | ||
2070 | case KVM_REG_MIPS_CP0_USERLOCAL: | ||
2071 | if (!cpu_guest_has_userlocal) | ||
2072 | return -EINVAL; | ||
2073 | write_gc0_userlocal(v); | ||
2074 | break; | ||
2075 | #ifdef CONFIG_64BIT | ||
2076 | case KVM_REG_MIPS_CP0_XCONTEXTCONFIG: | ||
2077 | if (!cpu_guest_has_contextconfig) | ||
2078 | return -EINVAL; | ||
2079 | write_gc0_xcontextconfig(v); | ||
2080 | break; | ||
2081 | #endif | ||
2082 | case KVM_REG_MIPS_CP0_PAGEMASK: | ||
2083 | write_gc0_pagemask(v); | ||
2084 | break; | ||
2085 | case KVM_REG_MIPS_CP0_PAGEGRAIN: | ||
2086 | write_gc0_pagegrain(v); | ||
2087 | break; | ||
2088 | case KVM_REG_MIPS_CP0_SEGCTL0: | ||
2089 | if (!cpu_guest_has_segments) | ||
2090 | return -EINVAL; | ||
2091 | write_gc0_segctl0(v); | ||
2092 | break; | ||
2093 | case KVM_REG_MIPS_CP0_SEGCTL1: | ||
2094 | if (!cpu_guest_has_segments) | ||
2095 | return -EINVAL; | ||
2096 | write_gc0_segctl1(v); | ||
2097 | break; | ||
2098 | case KVM_REG_MIPS_CP0_SEGCTL2: | ||
2099 | if (!cpu_guest_has_segments) | ||
2100 | return -EINVAL; | ||
2101 | write_gc0_segctl2(v); | ||
2102 | break; | ||
2103 | case KVM_REG_MIPS_CP0_PWBASE: | ||
2104 | if (!cpu_guest_has_htw) | ||
2105 | return -EINVAL; | ||
2106 | write_gc0_pwbase(v); | ||
2107 | break; | ||
2108 | case KVM_REG_MIPS_CP0_PWFIELD: | ||
2109 | if (!cpu_guest_has_htw) | ||
2110 | return -EINVAL; | ||
2111 | write_gc0_pwfield(v); | ||
2112 | break; | ||
2113 | case KVM_REG_MIPS_CP0_PWSIZE: | ||
2114 | if (!cpu_guest_has_htw) | ||
2115 | return -EINVAL; | ||
2116 | write_gc0_pwsize(v); | ||
2117 | break; | ||
2118 | case KVM_REG_MIPS_CP0_WIRED: | ||
2119 | change_gc0_wired(MIPSR6_WIRED_WIRED, v); | ||
2120 | break; | ||
2121 | case KVM_REG_MIPS_CP0_PWCTL: | ||
2122 | if (!cpu_guest_has_htw) | ||
2123 | return -EINVAL; | ||
2124 | write_gc0_pwctl(v); | ||
2125 | break; | ||
2126 | case KVM_REG_MIPS_CP0_HWRENA: | ||
2127 | write_gc0_hwrena(v); | ||
2128 | break; | ||
2129 | case KVM_REG_MIPS_CP0_BADVADDR: | ||
2130 | write_gc0_badvaddr(v); | ||
2131 | break; | ||
2132 | case KVM_REG_MIPS_CP0_BADINSTR: | ||
2133 | if (!cpu_guest_has_badinstr) | ||
2134 | return -EINVAL; | ||
2135 | write_gc0_badinstr(v); | ||
2136 | break; | ||
2137 | case KVM_REG_MIPS_CP0_BADINSTRP: | ||
2138 | if (!cpu_guest_has_badinstrp) | ||
2139 | return -EINVAL; | ||
2140 | write_gc0_badinstrp(v); | ||
2141 | break; | ||
2142 | case KVM_REG_MIPS_CP0_COUNT: | ||
2143 | kvm_mips_write_count(vcpu, v); | ||
2144 | break; | ||
2145 | case KVM_REG_MIPS_CP0_ENTRYHI: | ||
2146 | write_gc0_entryhi(v); | ||
2147 | break; | ||
2148 | case KVM_REG_MIPS_CP0_COMPARE: | ||
2149 | kvm_mips_write_compare(vcpu, v, false); | ||
2150 | break; | ||
2151 | case KVM_REG_MIPS_CP0_STATUS: | ||
2152 | write_gc0_status(v); | ||
2153 | break; | ||
2154 | case KVM_REG_MIPS_CP0_INTCTL: | ||
2155 | write_gc0_intctl(v); | ||
2156 | break; | ||
2157 | case KVM_REG_MIPS_CP0_CAUSE: | ||
2158 | /* | ||
2159 | * If the timer is stopped or started (DC bit) it must look | ||
2160 | * atomic with changes to the timer interrupt pending bit (TI). | ||
2161 | * A timer interrupt should not happen in between. | ||
2162 | */ | ||
2163 | if ((read_gc0_cause() ^ v) & CAUSEF_DC) { | ||
2164 | if (v & CAUSEF_DC) { | ||
2165 | /* disable timer first */ | ||
2166 | kvm_mips_count_disable_cause(vcpu); | ||
2167 | change_gc0_cause((u32)~CAUSEF_DC, v); | ||
2168 | } else { | ||
2169 | /* enable timer last */ | ||
2170 | change_gc0_cause((u32)~CAUSEF_DC, v); | ||
2171 | kvm_mips_count_enable_cause(vcpu); | ||
2172 | } | ||
2173 | } else { | ||
2174 | write_gc0_cause(v); | ||
2175 | } | ||
2176 | break; | ||
2177 | case KVM_REG_MIPS_CP0_EPC: | ||
2178 | write_gc0_epc(v); | ||
2179 | break; | ||
2180 | case KVM_REG_MIPS_CP0_PRID: | ||
2181 | switch (boot_cpu_type()) { | ||
2182 | case CPU_CAVIUM_OCTEON3: | ||
2183 | /* Octeon III has a guest.PRid, but its read-only */ | ||
2184 | break; | ||
2185 | default: | ||
2186 | kvm_write_c0_guest_prid(cop0, v); | ||
2187 | break; | ||
2188 | }; | ||
2189 | break; | ||
2190 | case KVM_REG_MIPS_CP0_EBASE: | ||
2191 | kvm_vz_write_gc0_ebase(v); | ||
2192 | break; | ||
2193 | case KVM_REG_MIPS_CP0_CONFIG: | ||
2194 | cur = read_gc0_config(); | ||
2195 | change = (cur ^ v) & kvm_vz_config_user_wrmask(vcpu); | ||
2196 | if (change) { | ||
2197 | v = cur ^ change; | ||
2198 | write_gc0_config(v); | ||
2199 | } | ||
2200 | break; | ||
2201 | case KVM_REG_MIPS_CP0_CONFIG1: | ||
2202 | if (!cpu_guest_has_conf1) | ||
2203 | break; | ||
2204 | cur = read_gc0_config1(); | ||
2205 | change = (cur ^ v) & kvm_vz_config1_user_wrmask(vcpu); | ||
2206 | if (change) { | ||
2207 | v = cur ^ change; | ||
2208 | write_gc0_config1(v); | ||
2209 | } | ||
2210 | break; | ||
2211 | case KVM_REG_MIPS_CP0_CONFIG2: | ||
2212 | if (!cpu_guest_has_conf2) | ||
2213 | break; | ||
2214 | cur = read_gc0_config2(); | ||
2215 | change = (cur ^ v) & kvm_vz_config2_user_wrmask(vcpu); | ||
2216 | if (change) { | ||
2217 | v = cur ^ change; | ||
2218 | write_gc0_config2(v); | ||
2219 | } | ||
2220 | break; | ||
2221 | case KVM_REG_MIPS_CP0_CONFIG3: | ||
2222 | if (!cpu_guest_has_conf3) | ||
2223 | break; | ||
2224 | cur = read_gc0_config3(); | ||
2225 | change = (cur ^ v) & kvm_vz_config3_user_wrmask(vcpu); | ||
2226 | if (change) { | ||
2227 | v = cur ^ change; | ||
2228 | write_gc0_config3(v); | ||
2229 | } | ||
2230 | break; | ||
2231 | case KVM_REG_MIPS_CP0_CONFIG4: | ||
2232 | if (!cpu_guest_has_conf4) | ||
2233 | break; | ||
2234 | cur = read_gc0_config4(); | ||
2235 | change = (cur ^ v) & kvm_vz_config4_user_wrmask(vcpu); | ||
2236 | if (change) { | ||
2237 | v = cur ^ change; | ||
2238 | write_gc0_config4(v); | ||
2239 | } | ||
2240 | break; | ||
2241 | case KVM_REG_MIPS_CP0_CONFIG5: | ||
2242 | if (!cpu_guest_has_conf5) | ||
2243 | break; | ||
2244 | cur = read_gc0_config5(); | ||
2245 | change = (cur ^ v) & kvm_vz_config5_user_wrmask(vcpu); | ||
2246 | if (change) { | ||
2247 | v = cur ^ change; | ||
2248 | write_gc0_config5(v); | ||
2249 | } | ||
2250 | break; | ||
2251 | case KVM_REG_MIPS_CP0_MAAR(0) ... KVM_REG_MIPS_CP0_MAAR(0x3f): | ||
2252 | if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar) | ||
2253 | return -EINVAL; | ||
2254 | idx = reg->id - KVM_REG_MIPS_CP0_MAAR(0); | ||
2255 | if (idx >= ARRAY_SIZE(vcpu->arch.maar)) | ||
2256 | return -EINVAL; | ||
2257 | vcpu->arch.maar[idx] = mips_process_maar(dmtc_op, v); | ||
2258 | break; | ||
2259 | case KVM_REG_MIPS_CP0_MAARI: | ||
2260 | if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar) | ||
2261 | return -EINVAL; | ||
2262 | kvm_write_maari(vcpu, v); | ||
2263 | break; | ||
2264 | #ifdef CONFIG_64BIT | ||
2265 | case KVM_REG_MIPS_CP0_XCONTEXT: | ||
2266 | write_gc0_xcontext(v); | ||
2267 | break; | ||
2268 | #endif | ||
2269 | case KVM_REG_MIPS_CP0_ERROREPC: | ||
2270 | write_gc0_errorepc(v); | ||
2271 | break; | ||
2272 | case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6: | ||
2273 | idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2; | ||
2274 | if (!cpu_guest_has_kscr(idx)) | ||
2275 | return -EINVAL; | ||
2276 | switch (idx) { | ||
2277 | case 2: | ||
2278 | write_gc0_kscratch1(v); | ||
2279 | break; | ||
2280 | case 3: | ||
2281 | write_gc0_kscratch2(v); | ||
2282 | break; | ||
2283 | case 4: | ||
2284 | write_gc0_kscratch3(v); | ||
2285 | break; | ||
2286 | case 5: | ||
2287 | write_gc0_kscratch4(v); | ||
2288 | break; | ||
2289 | case 6: | ||
2290 | write_gc0_kscratch5(v); | ||
2291 | break; | ||
2292 | case 7: | ||
2293 | write_gc0_kscratch6(v); | ||
2294 | break; | ||
2295 | } | ||
2296 | break; | ||
2297 | case KVM_REG_MIPS_COUNT_CTL: | ||
2298 | ret = kvm_mips_set_count_ctl(vcpu, v); | ||
2299 | break; | ||
2300 | case KVM_REG_MIPS_COUNT_RESUME: | ||
2301 | ret = kvm_mips_set_count_resume(vcpu, v); | ||
2302 | break; | ||
2303 | case KVM_REG_MIPS_COUNT_HZ: | ||
2304 | ret = kvm_mips_set_count_hz(vcpu, v); | ||
2305 | break; | ||
2306 | default: | ||
2307 | return -EINVAL; | ||
2308 | } | ||
2309 | return ret; | ||
2310 | } | ||
2311 | |||
2312 | #define guestid_cache(cpu) (cpu_data[cpu].guestid_cache) | ||
2313 | static void kvm_vz_get_new_guestid(unsigned long cpu, struct kvm_vcpu *vcpu) | ||
2314 | { | ||
2315 | unsigned long guestid = guestid_cache(cpu); | ||
2316 | |||
2317 | if (!(++guestid & GUESTID_MASK)) { | ||
2318 | if (cpu_has_vtag_icache) | ||
2319 | flush_icache_all(); | ||
2320 | |||
2321 | if (!guestid) /* fix version if needed */ | ||
2322 | guestid = GUESTID_FIRST_VERSION; | ||
2323 | |||
2324 | ++guestid; /* guestid 0 reserved for root */ | ||
2325 | |||
2326 | /* start new guestid cycle */ | ||
2327 | kvm_vz_local_flush_roottlb_all_guests(); | ||
2328 | kvm_vz_local_flush_guesttlb_all(); | ||
2329 | } | ||
2330 | |||
2331 | guestid_cache(cpu) = guestid; | ||
2332 | } | ||
2333 | |||
2334 | /* Returns 1 if the guest TLB may be clobbered */ | ||
2335 | static int kvm_vz_check_requests(struct kvm_vcpu *vcpu, int cpu) | ||
2336 | { | ||
2337 | int ret = 0; | ||
2338 | int i; | ||
2339 | |||
2340 | if (!vcpu->requests) | ||
2341 | return 0; | ||
2342 | |||
2343 | if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { | ||
2344 | if (cpu_has_guestid) { | ||
2345 | /* Drop all GuestIDs for this VCPU */ | ||
2346 | for_each_possible_cpu(i) | ||
2347 | vcpu->arch.vzguestid[i] = 0; | ||
2348 | /* This will clobber guest TLB contents too */ | ||
2349 | ret = 1; | ||
2350 | } | ||
2351 | /* | ||
2352 | * For Root ASID Dealias (RAD) we don't do anything here, but we | ||
2353 | * still need the request to ensure we recheck asid_flush_mask. | ||
2354 | * We can still return 0 as only the root TLB will be affected | ||
2355 | * by a root ASID flush. | ||
2356 | */ | ||
2357 | } | ||
2358 | |||
2359 | return ret; | ||
2360 | } | ||
2361 | |||
2362 | static void kvm_vz_vcpu_save_wired(struct kvm_vcpu *vcpu) | ||
2363 | { | ||
2364 | unsigned int wired = read_gc0_wired(); | ||
2365 | struct kvm_mips_tlb *tlbs; | ||
2366 | int i; | ||
2367 | |||
2368 | /* Expand the wired TLB array if necessary */ | ||
2369 | wired &= MIPSR6_WIRED_WIRED; | ||
2370 | if (wired > vcpu->arch.wired_tlb_limit) { | ||
2371 | tlbs = krealloc(vcpu->arch.wired_tlb, wired * | ||
2372 | sizeof(*vcpu->arch.wired_tlb), GFP_ATOMIC); | ||
2373 | if (WARN_ON(!tlbs)) { | ||
2374 | /* Save whatever we can */ | ||
2375 | wired = vcpu->arch.wired_tlb_limit; | ||
2376 | } else { | ||
2377 | vcpu->arch.wired_tlb = tlbs; | ||
2378 | vcpu->arch.wired_tlb_limit = wired; | ||
2379 | } | ||
2380 | } | ||
2381 | |||
2382 | if (wired) | ||
2383 | /* Save wired entries from the guest TLB */ | ||
2384 | kvm_vz_save_guesttlb(vcpu->arch.wired_tlb, 0, wired); | ||
2385 | /* Invalidate any dropped entries since last time */ | ||
2386 | for (i = wired; i < vcpu->arch.wired_tlb_used; ++i) { | ||
2387 | vcpu->arch.wired_tlb[i].tlb_hi = UNIQUE_GUEST_ENTRYHI(i); | ||
2388 | vcpu->arch.wired_tlb[i].tlb_lo[0] = 0; | ||
2389 | vcpu->arch.wired_tlb[i].tlb_lo[1] = 0; | ||
2390 | vcpu->arch.wired_tlb[i].tlb_mask = 0; | ||
2391 | } | ||
2392 | vcpu->arch.wired_tlb_used = wired; | ||
2393 | } | ||
2394 | |||
2395 | static void kvm_vz_vcpu_load_wired(struct kvm_vcpu *vcpu) | ||
2396 | { | ||
2397 | /* Load wired entries into the guest TLB */ | ||
2398 | if (vcpu->arch.wired_tlb) | ||
2399 | kvm_vz_load_guesttlb(vcpu->arch.wired_tlb, 0, | ||
2400 | vcpu->arch.wired_tlb_used); | ||
2401 | } | ||
2402 | |||
2403 | static void kvm_vz_vcpu_load_tlb(struct kvm_vcpu *vcpu, int cpu) | ||
2404 | { | ||
2405 | struct kvm *kvm = vcpu->kvm; | ||
2406 | struct mm_struct *gpa_mm = &kvm->arch.gpa_mm; | ||
2407 | bool migrated; | ||
2408 | |||
2409 | /* | ||
2410 | * Are we entering guest context on a different CPU to last time? | ||
2411 | * If so, the VCPU's guest TLB state on this CPU may be stale. | ||
2412 | */ | ||
2413 | migrated = (vcpu->arch.last_exec_cpu != cpu); | ||
2414 | vcpu->arch.last_exec_cpu = cpu; | ||
2415 | |||
2416 | /* | ||
2417 | * A vcpu's GuestID is set in GuestCtl1.ID when the vcpu is loaded and | ||
2418 | * remains set until another vcpu is loaded in. As a rule GuestRID | ||
2419 | * remains zeroed when in root context unless the kernel is busy | ||
2420 | * manipulating guest tlb entries. | ||
2421 | */ | ||
2422 | if (cpu_has_guestid) { | ||
2423 | /* | ||
2424 | * Check if our GuestID is of an older version and thus invalid. | ||
2425 | * | ||
2426 | * We also discard the stored GuestID if we've executed on | ||
2427 | * another CPU, as the guest mappings may have changed without | ||
2428 | * hypervisor knowledge. | ||
2429 | */ | ||
2430 | if (migrated || | ||
2431 | (vcpu->arch.vzguestid[cpu] ^ guestid_cache(cpu)) & | ||
2432 | GUESTID_VERSION_MASK) { | ||
2433 | kvm_vz_get_new_guestid(cpu, vcpu); | ||
2434 | vcpu->arch.vzguestid[cpu] = guestid_cache(cpu); | ||
2435 | trace_kvm_guestid_change(vcpu, | ||
2436 | vcpu->arch.vzguestid[cpu]); | ||
2437 | } | ||
2438 | |||
2439 | /* Restore GuestID */ | ||
2440 | change_c0_guestctl1(GUESTID_MASK, vcpu->arch.vzguestid[cpu]); | ||
2441 | } else { | ||
2442 | /* | ||
2443 | * The Guest TLB only stores a single guest's TLB state, so | ||
2444 | * flush it if another VCPU has executed on this CPU. | ||
2445 | * | ||
2446 | * We also flush if we've executed on another CPU, as the guest | ||
2447 | * mappings may have changed without hypervisor knowledge. | ||
2448 | */ | ||
2449 | if (migrated || last_exec_vcpu[cpu] != vcpu) | ||
2450 | kvm_vz_local_flush_guesttlb_all(); | ||
2451 | last_exec_vcpu[cpu] = vcpu; | ||
2452 | |||
2453 | /* | ||
2454 | * Root ASID dealiases guest GPA mappings in the root TLB. | ||
2455 | * Allocate new root ASID if needed. | ||
2456 | */ | ||
2457 | if (cpumask_test_and_clear_cpu(cpu, &kvm->arch.asid_flush_mask) | ||
2458 | || (cpu_context(cpu, gpa_mm) ^ asid_cache(cpu)) & | ||
2459 | asid_version_mask(cpu)) | ||
2460 | get_new_mmu_context(gpa_mm, cpu); | ||
2461 | } | ||
2462 | } | ||
2463 | |||
2464 | static int kvm_vz_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | ||
2465 | { | ||
2466 | struct mips_coproc *cop0 = vcpu->arch.cop0; | ||
2467 | bool migrated, all; | ||
2468 | |||
2469 | /* | ||
2470 | * Have we migrated to a different CPU? | ||
2471 | * If so, any old guest TLB state may be stale. | ||
2472 | */ | ||
2473 | migrated = (vcpu->arch.last_sched_cpu != cpu); | ||
2474 | |||
2475 | /* | ||
2476 | * Was this the last VCPU to run on this CPU? | ||
2477 | * If not, any old guest state from this VCPU will have been clobbered. | ||
2478 | */ | ||
2479 | all = migrated || (last_vcpu[cpu] != vcpu); | ||
2480 | last_vcpu[cpu] = vcpu; | ||
2481 | |||
2482 | /* | ||
2483 | * Restore CP0_Wired unconditionally as we clear it after use, and | ||
2484 | * restore wired guest TLB entries (while in guest context). | ||
2485 | */ | ||
2486 | kvm_restore_gc0_wired(cop0); | ||
2487 | if (current->flags & PF_VCPU) { | ||
2488 | tlbw_use_hazard(); | ||
2489 | kvm_vz_vcpu_load_tlb(vcpu, cpu); | ||
2490 | kvm_vz_vcpu_load_wired(vcpu); | ||
2491 | } | ||
2492 | |||
2493 | /* | ||
2494 | * Restore timer state regardless, as e.g. Cause.TI can change over time | ||
2495 | * if left unmaintained. | ||
2496 | */ | ||
2497 | kvm_vz_restore_timer(vcpu); | ||
2498 | |||
2499 | /* Set MC bit if we want to trace guest mode changes */ | ||
2500 | if (kvm_trace_guest_mode_change) | ||
2501 | set_c0_guestctl0(MIPS_GCTL0_MC); | ||
2502 | else | ||
2503 | clear_c0_guestctl0(MIPS_GCTL0_MC); | ||
2504 | |||
2505 | /* Don't bother restoring registers multiple times unless necessary */ | ||
2506 | if (!all) | ||
2507 | return 0; | ||
2508 | |||
2509 | /* | ||
2510 | * Restore config registers first, as some implementations restrict | ||
2511 | * writes to other registers when the corresponding feature bits aren't | ||
2512 | * set. For example Status.CU1 cannot be set unless Config1.FP is set. | ||
2513 | */ | ||
2514 | kvm_restore_gc0_config(cop0); | ||
2515 | if (cpu_guest_has_conf1) | ||
2516 | kvm_restore_gc0_config1(cop0); | ||
2517 | if (cpu_guest_has_conf2) | ||
2518 | kvm_restore_gc0_config2(cop0); | ||
2519 | if (cpu_guest_has_conf3) | ||
2520 | kvm_restore_gc0_config3(cop0); | ||
2521 | if (cpu_guest_has_conf4) | ||
2522 | kvm_restore_gc0_config4(cop0); | ||
2523 | if (cpu_guest_has_conf5) | ||
2524 | kvm_restore_gc0_config5(cop0); | ||
2525 | if (cpu_guest_has_conf6) | ||
2526 | kvm_restore_gc0_config6(cop0); | ||
2527 | if (cpu_guest_has_conf7) | ||
2528 | kvm_restore_gc0_config7(cop0); | ||
2529 | |||
2530 | kvm_restore_gc0_index(cop0); | ||
2531 | kvm_restore_gc0_entrylo0(cop0); | ||
2532 | kvm_restore_gc0_entrylo1(cop0); | ||
2533 | kvm_restore_gc0_context(cop0); | ||
2534 | if (cpu_guest_has_contextconfig) | ||
2535 | kvm_restore_gc0_contextconfig(cop0); | ||
2536 | #ifdef CONFIG_64BIT | ||
2537 | kvm_restore_gc0_xcontext(cop0); | ||
2538 | if (cpu_guest_has_contextconfig) | ||
2539 | kvm_restore_gc0_xcontextconfig(cop0); | ||
2540 | #endif | ||
2541 | kvm_restore_gc0_pagemask(cop0); | ||
2542 | kvm_restore_gc0_pagegrain(cop0); | ||
2543 | kvm_restore_gc0_hwrena(cop0); | ||
2544 | kvm_restore_gc0_badvaddr(cop0); | ||
2545 | kvm_restore_gc0_entryhi(cop0); | ||
2546 | kvm_restore_gc0_status(cop0); | ||
2547 | kvm_restore_gc0_intctl(cop0); | ||
2548 | kvm_restore_gc0_epc(cop0); | ||
2549 | kvm_vz_write_gc0_ebase(kvm_read_sw_gc0_ebase(cop0)); | ||
2550 | if (cpu_guest_has_userlocal) | ||
2551 | kvm_restore_gc0_userlocal(cop0); | ||
2552 | |||
2553 | kvm_restore_gc0_errorepc(cop0); | ||
2554 | |||
2555 | /* restore KScratch registers if enabled in guest */ | ||
2556 | if (cpu_guest_has_conf4) { | ||
2557 | if (cpu_guest_has_kscr(2)) | ||
2558 | kvm_restore_gc0_kscratch1(cop0); | ||
2559 | if (cpu_guest_has_kscr(3)) | ||
2560 | kvm_restore_gc0_kscratch2(cop0); | ||
2561 | if (cpu_guest_has_kscr(4)) | ||
2562 | kvm_restore_gc0_kscratch3(cop0); | ||
2563 | if (cpu_guest_has_kscr(5)) | ||
2564 | kvm_restore_gc0_kscratch4(cop0); | ||
2565 | if (cpu_guest_has_kscr(6)) | ||
2566 | kvm_restore_gc0_kscratch5(cop0); | ||
2567 | if (cpu_guest_has_kscr(7)) | ||
2568 | kvm_restore_gc0_kscratch6(cop0); | ||
2569 | } | ||
2570 | |||
2571 | if (cpu_guest_has_badinstr) | ||
2572 | kvm_restore_gc0_badinstr(cop0); | ||
2573 | if (cpu_guest_has_badinstrp) | ||
2574 | kvm_restore_gc0_badinstrp(cop0); | ||
2575 | |||
2576 | if (cpu_guest_has_segments) { | ||
2577 | kvm_restore_gc0_segctl0(cop0); | ||
2578 | kvm_restore_gc0_segctl1(cop0); | ||
2579 | kvm_restore_gc0_segctl2(cop0); | ||
2580 | } | ||
2581 | |||
2582 | /* restore HTW registers */ | ||
2583 | if (cpu_guest_has_htw) { | ||
2584 | kvm_restore_gc0_pwbase(cop0); | ||
2585 | kvm_restore_gc0_pwfield(cop0); | ||
2586 | kvm_restore_gc0_pwsize(cop0); | ||
2587 | kvm_restore_gc0_pwctl(cop0); | ||
2588 | } | ||
2589 | |||
2590 | /* restore Root.GuestCtl2 from unused Guest guestctl2 register */ | ||
2591 | if (cpu_has_guestctl2) | ||
2592 | write_c0_guestctl2( | ||
2593 | cop0->reg[MIPS_CP0_GUESTCTL2][MIPS_CP0_GUESTCTL2_SEL]); | ||
2594 | |||
2595 | /* | ||
2596 | * We should clear linked load bit to break interrupted atomics. This | ||
2597 | * prevents a SC on the next VCPU from succeeding by matching a LL on | ||
2598 | * the previous VCPU. | ||
2599 | */ | ||
2600 | if (cpu_guest_has_rw_llb) | ||
2601 | write_gc0_lladdr(0); | ||
2602 | |||
2603 | return 0; | ||
2604 | } | ||
2605 | |||
2606 | static int kvm_vz_vcpu_put(struct kvm_vcpu *vcpu, int cpu) | ||
2607 | { | ||
2608 | struct mips_coproc *cop0 = vcpu->arch.cop0; | ||
2609 | |||
2610 | if (current->flags & PF_VCPU) | ||
2611 | kvm_vz_vcpu_save_wired(vcpu); | ||
2612 | |||
2613 | kvm_lose_fpu(vcpu); | ||
2614 | |||
2615 | kvm_save_gc0_index(cop0); | ||
2616 | kvm_save_gc0_entrylo0(cop0); | ||
2617 | kvm_save_gc0_entrylo1(cop0); | ||
2618 | kvm_save_gc0_context(cop0); | ||
2619 | if (cpu_guest_has_contextconfig) | ||
2620 | kvm_save_gc0_contextconfig(cop0); | ||
2621 | #ifdef CONFIG_64BIT | ||
2622 | kvm_save_gc0_xcontext(cop0); | ||
2623 | if (cpu_guest_has_contextconfig) | ||
2624 | kvm_save_gc0_xcontextconfig(cop0); | ||
2625 | #endif | ||
2626 | kvm_save_gc0_pagemask(cop0); | ||
2627 | kvm_save_gc0_pagegrain(cop0); | ||
2628 | kvm_save_gc0_wired(cop0); | ||
2629 | /* allow wired TLB entries to be overwritten */ | ||
2630 | clear_gc0_wired(MIPSR6_WIRED_WIRED); | ||
2631 | kvm_save_gc0_hwrena(cop0); | ||
2632 | kvm_save_gc0_badvaddr(cop0); | ||
2633 | kvm_save_gc0_entryhi(cop0); | ||
2634 | kvm_save_gc0_status(cop0); | ||
2635 | kvm_save_gc0_intctl(cop0); | ||
2636 | kvm_save_gc0_epc(cop0); | ||
2637 | kvm_write_sw_gc0_ebase(cop0, kvm_vz_read_gc0_ebase()); | ||
2638 | if (cpu_guest_has_userlocal) | ||
2639 | kvm_save_gc0_userlocal(cop0); | ||
2640 | |||
2641 | /* only save implemented config registers */ | ||
2642 | kvm_save_gc0_config(cop0); | ||
2643 | if (cpu_guest_has_conf1) | ||
2644 | kvm_save_gc0_config1(cop0); | ||
2645 | if (cpu_guest_has_conf2) | ||
2646 | kvm_save_gc0_config2(cop0); | ||
2647 | if (cpu_guest_has_conf3) | ||
2648 | kvm_save_gc0_config3(cop0); | ||
2649 | if (cpu_guest_has_conf4) | ||
2650 | kvm_save_gc0_config4(cop0); | ||
2651 | if (cpu_guest_has_conf5) | ||
2652 | kvm_save_gc0_config5(cop0); | ||
2653 | if (cpu_guest_has_conf6) | ||
2654 | kvm_save_gc0_config6(cop0); | ||
2655 | if (cpu_guest_has_conf7) | ||
2656 | kvm_save_gc0_config7(cop0); | ||
2657 | |||
2658 | kvm_save_gc0_errorepc(cop0); | ||
2659 | |||
2660 | /* save KScratch registers if enabled in guest */ | ||
2661 | if (cpu_guest_has_conf4) { | ||
2662 | if (cpu_guest_has_kscr(2)) | ||
2663 | kvm_save_gc0_kscratch1(cop0); | ||
2664 | if (cpu_guest_has_kscr(3)) | ||
2665 | kvm_save_gc0_kscratch2(cop0); | ||
2666 | if (cpu_guest_has_kscr(4)) | ||
2667 | kvm_save_gc0_kscratch3(cop0); | ||
2668 | if (cpu_guest_has_kscr(5)) | ||
2669 | kvm_save_gc0_kscratch4(cop0); | ||
2670 | if (cpu_guest_has_kscr(6)) | ||
2671 | kvm_save_gc0_kscratch5(cop0); | ||
2672 | if (cpu_guest_has_kscr(7)) | ||
2673 | kvm_save_gc0_kscratch6(cop0); | ||
2674 | } | ||
2675 | |||
2676 | if (cpu_guest_has_badinstr) | ||
2677 | kvm_save_gc0_badinstr(cop0); | ||
2678 | if (cpu_guest_has_badinstrp) | ||
2679 | kvm_save_gc0_badinstrp(cop0); | ||
2680 | |||
2681 | if (cpu_guest_has_segments) { | ||
2682 | kvm_save_gc0_segctl0(cop0); | ||
2683 | kvm_save_gc0_segctl1(cop0); | ||
2684 | kvm_save_gc0_segctl2(cop0); | ||
2685 | } | ||
2686 | |||
2687 | /* save HTW registers if enabled in guest */ | ||
2688 | if (cpu_guest_has_htw && | ||
2689 | kvm_read_sw_gc0_config3(cop0) & MIPS_CONF3_PW) { | ||
2690 | kvm_save_gc0_pwbase(cop0); | ||
2691 | kvm_save_gc0_pwfield(cop0); | ||
2692 | kvm_save_gc0_pwsize(cop0); | ||
2693 | kvm_save_gc0_pwctl(cop0); | ||
2694 | } | ||
2695 | |||
2696 | kvm_vz_save_timer(vcpu); | ||
2697 | |||
2698 | /* save Root.GuestCtl2 in unused Guest guestctl2 register */ | ||
2699 | if (cpu_has_guestctl2) | ||
2700 | cop0->reg[MIPS_CP0_GUESTCTL2][MIPS_CP0_GUESTCTL2_SEL] = | ||
2701 | read_c0_guestctl2(); | ||
2702 | |||
2703 | return 0; | ||
2704 | } | ||
2705 | |||
2706 | /** | ||
2707 | * kvm_vz_resize_guest_vtlb() - Attempt to resize guest VTLB. | ||
2708 | * @size: Number of guest VTLB entries (0 < @size <= root VTLB entries). | ||
2709 | * | ||
2710 | * Attempt to resize the guest VTLB by writing guest Config registers. This is | ||
2711 | * necessary for cores with a shared root/guest TLB to avoid overlap with wired | ||
2712 | * entries in the root VTLB. | ||
2713 | * | ||
2714 | * Returns: The resulting guest VTLB size. | ||
2715 | */ | ||
2716 | static unsigned int kvm_vz_resize_guest_vtlb(unsigned int size) | ||
2717 | { | ||
2718 | unsigned int config4 = 0, ret = 0, limit; | ||
2719 | |||
2720 | /* Write MMUSize - 1 into guest Config registers */ | ||
2721 | if (cpu_guest_has_conf1) | ||
2722 | change_gc0_config1(MIPS_CONF1_TLBS, | ||
2723 | (size - 1) << MIPS_CONF1_TLBS_SHIFT); | ||
2724 | if (cpu_guest_has_conf4) { | ||
2725 | config4 = read_gc0_config4(); | ||
2726 | if (cpu_has_mips_r6 || (config4 & MIPS_CONF4_MMUEXTDEF) == | ||
2727 | MIPS_CONF4_MMUEXTDEF_VTLBSIZEEXT) { | ||
2728 | config4 &= ~MIPS_CONF4_VTLBSIZEEXT; | ||
2729 | config4 |= ((size - 1) >> MIPS_CONF1_TLBS_SIZE) << | ||
2730 | MIPS_CONF4_VTLBSIZEEXT_SHIFT; | ||
2731 | } else if ((config4 & MIPS_CONF4_MMUEXTDEF) == | ||
2732 | MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT) { | ||
2733 | config4 &= ~MIPS_CONF4_MMUSIZEEXT; | ||
2734 | config4 |= ((size - 1) >> MIPS_CONF1_TLBS_SIZE) << | ||
2735 | MIPS_CONF4_MMUSIZEEXT_SHIFT; | ||
2736 | } | ||
2737 | write_gc0_config4(config4); | ||
2738 | } | ||
2739 | |||
2740 | /* | ||
2741 | * Set Guest.Wired.Limit = 0 (no limit up to Guest.MMUSize-1), unless it | ||
2742 | * would exceed Root.Wired.Limit (clearing Guest.Wired.Wired so write | ||
2743 | * not dropped) | ||
2744 | */ | ||
2745 | if (cpu_has_mips_r6) { | ||
2746 | limit = (read_c0_wired() & MIPSR6_WIRED_LIMIT) >> | ||
2747 | MIPSR6_WIRED_LIMIT_SHIFT; | ||
2748 | if (size - 1 <= limit) | ||
2749 | limit = 0; | ||
2750 | write_gc0_wired(limit << MIPSR6_WIRED_LIMIT_SHIFT); | ||
2751 | } | ||
2752 | |||
2753 | /* Read back MMUSize - 1 */ | ||
2754 | back_to_back_c0_hazard(); | ||
2755 | if (cpu_guest_has_conf1) | ||
2756 | ret = (read_gc0_config1() & MIPS_CONF1_TLBS) >> | ||
2757 | MIPS_CONF1_TLBS_SHIFT; | ||
2758 | if (config4) { | ||
2759 | if (cpu_has_mips_r6 || (config4 & MIPS_CONF4_MMUEXTDEF) == | ||
2760 | MIPS_CONF4_MMUEXTDEF_VTLBSIZEEXT) | ||
2761 | ret |= ((config4 & MIPS_CONF4_VTLBSIZEEXT) >> | ||
2762 | MIPS_CONF4_VTLBSIZEEXT_SHIFT) << | ||
2763 | MIPS_CONF1_TLBS_SIZE; | ||
2764 | else if ((config4 & MIPS_CONF4_MMUEXTDEF) == | ||
2765 | MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT) | ||
2766 | ret |= ((config4 & MIPS_CONF4_MMUSIZEEXT) >> | ||
2767 | MIPS_CONF4_MMUSIZEEXT_SHIFT) << | ||
2768 | MIPS_CONF1_TLBS_SIZE; | ||
2769 | } | ||
2770 | return ret + 1; | ||
2771 | } | ||
2772 | |||
2773 | static int kvm_vz_hardware_enable(void) | ||
2774 | { | ||
2775 | unsigned int mmu_size, guest_mmu_size, ftlb_size; | ||
2776 | u64 guest_cvmctl, cvmvmconfig; | ||
2777 | |||
2778 | switch (current_cpu_type()) { | ||
2779 | case CPU_CAVIUM_OCTEON3: | ||
2780 | /* Set up guest timer/perfcount IRQ lines */ | ||
2781 | guest_cvmctl = read_gc0_cvmctl(); | ||
2782 | guest_cvmctl &= ~CVMCTL_IPTI; | ||
2783 | guest_cvmctl |= 7ull << CVMCTL_IPTI_SHIFT; | ||
2784 | guest_cvmctl &= ~CVMCTL_IPPCI; | ||
2785 | guest_cvmctl |= 6ull << CVMCTL_IPPCI_SHIFT; | ||
2786 | write_gc0_cvmctl(guest_cvmctl); | ||
2787 | |||
2788 | cvmvmconfig = read_c0_cvmvmconfig(); | ||
2789 | /* No I/O hole translation. */ | ||
2790 | cvmvmconfig |= CVMVMCONF_DGHT; | ||
2791 | /* Halve the root MMU size */ | ||
2792 | mmu_size = ((cvmvmconfig & CVMVMCONF_MMUSIZEM1) | ||
2793 | >> CVMVMCONF_MMUSIZEM1_S) + 1; | ||
2794 | guest_mmu_size = mmu_size / 2; | ||
2795 | mmu_size -= guest_mmu_size; | ||
2796 | cvmvmconfig &= ~CVMVMCONF_RMMUSIZEM1; | ||
2797 | cvmvmconfig |= mmu_size - 1; | ||
2798 | write_c0_cvmvmconfig(cvmvmconfig); | ||
2799 | |||
2800 | /* Update our records */ | ||
2801 | current_cpu_data.tlbsize = mmu_size; | ||
2802 | current_cpu_data.tlbsizevtlb = mmu_size; | ||
2803 | current_cpu_data.guest.tlbsize = guest_mmu_size; | ||
2804 | |||
2805 | /* Flush moved entries in new (guest) context */ | ||
2806 | kvm_vz_local_flush_guesttlb_all(); | ||
2807 | break; | ||
2808 | default: | ||
2809 | /* | ||
2810 | * ImgTec cores tend to use a shared root/guest TLB. To avoid | ||
2811 | * overlap of root wired and guest entries, the guest TLB may | ||
2812 | * need resizing. | ||
2813 | */ | ||
2814 | mmu_size = current_cpu_data.tlbsizevtlb; | ||
2815 | ftlb_size = current_cpu_data.tlbsize - mmu_size; | ||
2816 | |||
2817 | /* Try switching to maximum guest VTLB size for flush */ | ||
2818 | guest_mmu_size = kvm_vz_resize_guest_vtlb(mmu_size); | ||
2819 | current_cpu_data.guest.tlbsize = guest_mmu_size + ftlb_size; | ||
2820 | kvm_vz_local_flush_guesttlb_all(); | ||
2821 | |||
2822 | /* | ||
2823 | * Reduce to make space for root wired entries and at least 2 | ||
2824 | * root non-wired entries. This does assume that long-term wired | ||
2825 | * entries won't be added later. | ||
2826 | */ | ||
2827 | guest_mmu_size = mmu_size - num_wired_entries() - 2; | ||
2828 | guest_mmu_size = kvm_vz_resize_guest_vtlb(guest_mmu_size); | ||
2829 | current_cpu_data.guest.tlbsize = guest_mmu_size + ftlb_size; | ||
2830 | |||
2831 | /* | ||
2832 | * Write the VTLB size, but if another CPU has already written, | ||
2833 | * check it matches or we won't provide a consistent view to the | ||
2834 | * guest. If this ever happens it suggests an asymmetric number | ||
2835 | * of wired entries. | ||
2836 | */ | ||
2837 | if (cmpxchg(&kvm_vz_guest_vtlb_size, 0, guest_mmu_size) && | ||
2838 | WARN(guest_mmu_size != kvm_vz_guest_vtlb_size, | ||
2839 | "Available guest VTLB size mismatch")) | ||
2840 | return -EINVAL; | ||
2841 | break; | ||
2842 | } | ||
2843 | |||
2844 | /* | ||
2845 | * Enable virtualization features granting guest direct control of | ||
2846 | * certain features: | ||
2847 | * CP0=1: Guest coprocessor 0 context. | ||
2848 | * AT=Guest: Guest MMU. | ||
2849 | * CG=1: Hit (virtual address) CACHE operations (optional). | ||
2850 | * CF=1: Guest Config registers. | ||
2851 | * CGI=1: Indexed flush CACHE operations (optional). | ||
2852 | */ | ||
2853 | write_c0_guestctl0(MIPS_GCTL0_CP0 | | ||
2854 | (MIPS_GCTL0_AT_GUEST << MIPS_GCTL0_AT_SHIFT) | | ||
2855 | MIPS_GCTL0_CG | MIPS_GCTL0_CF); | ||
2856 | if (cpu_has_guestctl0ext) | ||
2857 | set_c0_guestctl0ext(MIPS_GCTL0EXT_CGI); | ||
2858 | |||
2859 | if (cpu_has_guestid) { | ||
2860 | write_c0_guestctl1(0); | ||
2861 | kvm_vz_local_flush_roottlb_all_guests(); | ||
2862 | |||
2863 | GUESTID_MASK = current_cpu_data.guestid_mask; | ||
2864 | GUESTID_FIRST_VERSION = GUESTID_MASK + 1; | ||
2865 | GUESTID_VERSION_MASK = ~GUESTID_MASK; | ||
2866 | |||
2867 | current_cpu_data.guestid_cache = GUESTID_FIRST_VERSION; | ||
2868 | } | ||
2869 | |||
2870 | /* clear any pending injected virtual guest interrupts */ | ||
2871 | if (cpu_has_guestctl2) | ||
2872 | clear_c0_guestctl2(0x3f << 10); | ||
2873 | |||
2874 | return 0; | ||
2875 | } | ||
2876 | |||
2877 | static void kvm_vz_hardware_disable(void) | ||
2878 | { | ||
2879 | u64 cvmvmconfig; | ||
2880 | unsigned int mmu_size; | ||
2881 | |||
2882 | /* Flush any remaining guest TLB entries */ | ||
2883 | kvm_vz_local_flush_guesttlb_all(); | ||
2884 | |||
2885 | switch (current_cpu_type()) { | ||
2886 | case CPU_CAVIUM_OCTEON3: | ||
2887 | /* | ||
2888 | * Allocate whole TLB for root. Existing guest TLB entries will | ||
2889 | * change ownership to the root TLB. We should be safe though as | ||
2890 | * they've already been flushed above while in guest TLB. | ||
2891 | */ | ||
2892 | cvmvmconfig = read_c0_cvmvmconfig(); | ||
2893 | mmu_size = ((cvmvmconfig & CVMVMCONF_MMUSIZEM1) | ||
2894 | >> CVMVMCONF_MMUSIZEM1_S) + 1; | ||
2895 | cvmvmconfig &= ~CVMVMCONF_RMMUSIZEM1; | ||
2896 | cvmvmconfig |= mmu_size - 1; | ||
2897 | write_c0_cvmvmconfig(cvmvmconfig); | ||
2898 | |||
2899 | /* Update our records */ | ||
2900 | current_cpu_data.tlbsize = mmu_size; | ||
2901 | current_cpu_data.tlbsizevtlb = mmu_size; | ||
2902 | current_cpu_data.guest.tlbsize = 0; | ||
2903 | |||
2904 | /* Flush moved entries in new (root) context */ | ||
2905 | local_flush_tlb_all(); | ||
2906 | break; | ||
2907 | } | ||
2908 | |||
2909 | if (cpu_has_guestid) { | ||
2910 | write_c0_guestctl1(0); | ||
2911 | kvm_vz_local_flush_roottlb_all_guests(); | ||
2912 | } | ||
2913 | } | ||
2914 | |||
2915 | static int kvm_vz_check_extension(struct kvm *kvm, long ext) | ||
2916 | { | ||
2917 | int r; | ||
2918 | |||
2919 | switch (ext) { | ||
2920 | case KVM_CAP_MIPS_VZ: | ||
2921 | /* we wouldn't be here unless cpu_has_vz */ | ||
2922 | r = 1; | ||
2923 | break; | ||
2924 | #ifdef CONFIG_64BIT | ||
2925 | case KVM_CAP_MIPS_64BIT: | ||
2926 | /* We support 64-bit registers/operations and addresses */ | ||
2927 | r = 2; | ||
2928 | break; | ||
2929 | #endif | ||
2930 | default: | ||
2931 | r = 0; | ||
2932 | break; | ||
2933 | } | ||
2934 | |||
2935 | return r; | ||
2936 | } | ||
2937 | |||
2938 | static int kvm_vz_vcpu_init(struct kvm_vcpu *vcpu) | ||
2939 | { | ||
2940 | int i; | ||
2941 | |||
2942 | for_each_possible_cpu(i) | ||
2943 | vcpu->arch.vzguestid[i] = 0; | ||
2944 | |||
2945 | return 0; | ||
2946 | } | ||
2947 | |||
2948 | static void kvm_vz_vcpu_uninit(struct kvm_vcpu *vcpu) | ||
2949 | { | ||
2950 | int cpu; | ||
2951 | |||
2952 | /* | ||
2953 | * If the VCPU is freed and reused as another VCPU, we don't want the | ||
2954 | * matching pointer wrongly hanging around in last_vcpu[] or | ||
2955 | * last_exec_vcpu[]. | ||
2956 | */ | ||
2957 | for_each_possible_cpu(cpu) { | ||
2958 | if (last_vcpu[cpu] == vcpu) | ||
2959 | last_vcpu[cpu] = NULL; | ||
2960 | if (last_exec_vcpu[cpu] == vcpu) | ||
2961 | last_exec_vcpu[cpu] = NULL; | ||
2962 | } | ||
2963 | } | ||
2964 | |||
2965 | static int kvm_vz_vcpu_setup(struct kvm_vcpu *vcpu) | ||
2966 | { | ||
2967 | struct mips_coproc *cop0 = vcpu->arch.cop0; | ||
2968 | unsigned long count_hz = 100*1000*1000; /* default to 100 MHz */ | ||
2969 | |||
2970 | /* | ||
2971 | * Start off the timer at the same frequency as the host timer, but the | ||
2972 | * soft timer doesn't handle frequencies greater than 1GHz yet. | ||
2973 | */ | ||
2974 | if (mips_hpt_frequency && mips_hpt_frequency <= NSEC_PER_SEC) | ||
2975 | count_hz = mips_hpt_frequency; | ||
2976 | kvm_mips_init_count(vcpu, count_hz); | ||
2977 | |||
2978 | /* | ||
2979 | * Initialize guest register state to valid architectural reset state. | ||
2980 | */ | ||
2981 | |||
2982 | /* PageGrain */ | ||
2983 | if (cpu_has_mips_r6) | ||
2984 | kvm_write_sw_gc0_pagegrain(cop0, PG_RIE | PG_XIE | PG_IEC); | ||
2985 | /* Wired */ | ||
2986 | if (cpu_has_mips_r6) | ||
2987 | kvm_write_sw_gc0_wired(cop0, | ||
2988 | read_gc0_wired() & MIPSR6_WIRED_LIMIT); | ||
2989 | /* Status */ | ||
2990 | kvm_write_sw_gc0_status(cop0, ST0_BEV | ST0_ERL); | ||
2991 | if (cpu_has_mips_r6) | ||
2992 | kvm_change_sw_gc0_status(cop0, ST0_FR, read_gc0_status()); | ||
2993 | /* IntCtl */ | ||
2994 | kvm_write_sw_gc0_intctl(cop0, read_gc0_intctl() & | ||
2995 | (INTCTLF_IPFDC | INTCTLF_IPPCI | INTCTLF_IPTI)); | ||
2996 | /* PRId */ | ||
2997 | kvm_write_sw_gc0_prid(cop0, boot_cpu_data.processor_id); | ||
2998 | /* EBase */ | ||
2999 | kvm_write_sw_gc0_ebase(cop0, (s32)0x80000000 | vcpu->vcpu_id); | ||
3000 | /* Config */ | ||
3001 | kvm_save_gc0_config(cop0); | ||
3002 | /* architecturally writable (e.g. from guest) */ | ||
3003 | kvm_change_sw_gc0_config(cop0, CONF_CM_CMASK, | ||
3004 | _page_cachable_default >> _CACHE_SHIFT); | ||
3005 | /* architecturally read only, but maybe writable from root */ | ||
3006 | kvm_change_sw_gc0_config(cop0, MIPS_CONF_MT, read_c0_config()); | ||
3007 | if (cpu_guest_has_conf1) { | ||
3008 | kvm_set_sw_gc0_config(cop0, MIPS_CONF_M); | ||
3009 | /* Config1 */ | ||
3010 | kvm_save_gc0_config1(cop0); | ||
3011 | /* architecturally read only, but maybe writable from root */ | ||
3012 | kvm_clear_sw_gc0_config1(cop0, MIPS_CONF1_C2 | | ||
3013 | MIPS_CONF1_MD | | ||
3014 | MIPS_CONF1_PC | | ||
3015 | MIPS_CONF1_WR | | ||
3016 | MIPS_CONF1_CA | | ||
3017 | MIPS_CONF1_FP); | ||
3018 | } | ||
3019 | if (cpu_guest_has_conf2) { | ||
3020 | kvm_set_sw_gc0_config1(cop0, MIPS_CONF_M); | ||
3021 | /* Config2 */ | ||
3022 | kvm_save_gc0_config2(cop0); | ||
3023 | } | ||
3024 | if (cpu_guest_has_conf3) { | ||
3025 | kvm_set_sw_gc0_config2(cop0, MIPS_CONF_M); | ||
3026 | /* Config3 */ | ||
3027 | kvm_save_gc0_config3(cop0); | ||
3028 | /* architecturally writable (e.g. from guest) */ | ||
3029 | kvm_clear_sw_gc0_config3(cop0, MIPS_CONF3_ISA_OE); | ||
3030 | /* architecturally read only, but maybe writable from root */ | ||
3031 | kvm_clear_sw_gc0_config3(cop0, MIPS_CONF3_MSA | | ||
3032 | MIPS_CONF3_BPG | | ||
3033 | MIPS_CONF3_ULRI | | ||
3034 | MIPS_CONF3_DSP | | ||
3035 | MIPS_CONF3_CTXTC | | ||
3036 | MIPS_CONF3_ITL | | ||
3037 | MIPS_CONF3_LPA | | ||
3038 | MIPS_CONF3_VEIC | | ||
3039 | MIPS_CONF3_VINT | | ||
3040 | MIPS_CONF3_SP | | ||
3041 | MIPS_CONF3_CDMM | | ||
3042 | MIPS_CONF3_MT | | ||
3043 | MIPS_CONF3_SM | | ||
3044 | MIPS_CONF3_TL); | ||
3045 | } | ||
3046 | if (cpu_guest_has_conf4) { | ||
3047 | kvm_set_sw_gc0_config3(cop0, MIPS_CONF_M); | ||
3048 | /* Config4 */ | ||
3049 | kvm_save_gc0_config4(cop0); | ||
3050 | } | ||
3051 | if (cpu_guest_has_conf5) { | ||
3052 | kvm_set_sw_gc0_config4(cop0, MIPS_CONF_M); | ||
3053 | /* Config5 */ | ||
3054 | kvm_save_gc0_config5(cop0); | ||
3055 | /* architecturally writable (e.g. from guest) */ | ||
3056 | kvm_clear_sw_gc0_config5(cop0, MIPS_CONF5_K | | ||
3057 | MIPS_CONF5_CV | | ||
3058 | MIPS_CONF5_MSAEN | | ||
3059 | MIPS_CONF5_UFE | | ||
3060 | MIPS_CONF5_FRE | | ||
3061 | MIPS_CONF5_SBRI | | ||
3062 | MIPS_CONF5_UFR); | ||
3063 | /* architecturally read only, but maybe writable from root */ | ||
3064 | kvm_clear_sw_gc0_config5(cop0, MIPS_CONF5_MRP); | ||
3065 | } | ||
3066 | |||
3067 | if (cpu_guest_has_contextconfig) { | ||
3068 | /* ContextConfig */ | ||
3069 | kvm_write_sw_gc0_contextconfig(cop0, 0x007ffff0); | ||
3070 | #ifdef CONFIG_64BIT | ||
3071 | /* XContextConfig */ | ||
3072 | /* bits SEGBITS-13+3:4 set */ | ||
3073 | kvm_write_sw_gc0_xcontextconfig(cop0, | ||
3074 | ((1ull << (cpu_vmbits - 13)) - 1) << 4); | ||
3075 | #endif | ||
3076 | } | ||
3077 | |||
3078 | /* Implementation dependent, use the legacy layout */ | ||
3079 | if (cpu_guest_has_segments) { | ||
3080 | /* SegCtl0, SegCtl1, SegCtl2 */ | ||
3081 | kvm_write_sw_gc0_segctl0(cop0, 0x00200010); | ||
3082 | kvm_write_sw_gc0_segctl1(cop0, 0x00000002 | | ||
3083 | (_page_cachable_default >> _CACHE_SHIFT) << | ||
3084 | (16 + MIPS_SEGCFG_C_SHIFT)); | ||
3085 | kvm_write_sw_gc0_segctl2(cop0, 0x00380438); | ||
3086 | } | ||
3087 | |||
3088 | /* reset HTW registers */ | ||
3089 | if (cpu_guest_has_htw && cpu_has_mips_r6) { | ||
3090 | /* PWField */ | ||
3091 | kvm_write_sw_gc0_pwfield(cop0, 0x0c30c302); | ||
3092 | /* PWSize */ | ||
3093 | kvm_write_sw_gc0_pwsize(cop0, 1 << MIPS_PWSIZE_PTW_SHIFT); | ||
3094 | } | ||
3095 | |||
3096 | /* start with no pending virtual guest interrupts */ | ||
3097 | if (cpu_has_guestctl2) | ||
3098 | cop0->reg[MIPS_CP0_GUESTCTL2][MIPS_CP0_GUESTCTL2_SEL] = 0; | ||
3099 | |||
3100 | /* Put PC at reset vector */ | ||
3101 | vcpu->arch.pc = CKSEG1ADDR(0x1fc00000); | ||
3102 | |||
3103 | return 0; | ||
3104 | } | ||
3105 | |||
3106 | static void kvm_vz_flush_shadow_all(struct kvm *kvm) | ||
3107 | { | ||
3108 | if (cpu_has_guestid) { | ||
3109 | /* Flush GuestID for each VCPU individually */ | ||
3110 | kvm_flush_remote_tlbs(kvm); | ||
3111 | } else { | ||
3112 | /* | ||
3113 | * For each CPU there is a single GPA ASID used by all VCPUs in | ||
3114 | * the VM, so it doesn't make sense for the VCPUs to handle | ||
3115 | * invalidation of these ASIDs individually. | ||
3116 | * | ||
3117 | * Instead mark all CPUs as needing ASID invalidation in | ||
3118 | * asid_flush_mask, and just use kvm_flush_remote_tlbs(kvm) to | ||
3119 | * kick any running VCPUs so they check asid_flush_mask. | ||
3120 | */ | ||
3121 | cpumask_setall(&kvm->arch.asid_flush_mask); | ||
3122 | kvm_flush_remote_tlbs(kvm); | ||
3123 | } | ||
3124 | } | ||
3125 | |||
3126 | static void kvm_vz_flush_shadow_memslot(struct kvm *kvm, | ||
3127 | const struct kvm_memory_slot *slot) | ||
3128 | { | ||
3129 | kvm_vz_flush_shadow_all(kvm); | ||
3130 | } | ||
3131 | |||
3132 | static void kvm_vz_vcpu_reenter(struct kvm_run *run, struct kvm_vcpu *vcpu) | ||
3133 | { | ||
3134 | int cpu = smp_processor_id(); | ||
3135 | int preserve_guest_tlb; | ||
3136 | |||
3137 | preserve_guest_tlb = kvm_vz_check_requests(vcpu, cpu); | ||
3138 | |||
3139 | if (preserve_guest_tlb) | ||
3140 | kvm_vz_vcpu_save_wired(vcpu); | ||
3141 | |||
3142 | kvm_vz_vcpu_load_tlb(vcpu, cpu); | ||
3143 | |||
3144 | if (preserve_guest_tlb) | ||
3145 | kvm_vz_vcpu_load_wired(vcpu); | ||
3146 | } | ||
3147 | |||
3148 | static int kvm_vz_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) | ||
3149 | { | ||
3150 | int cpu = smp_processor_id(); | ||
3151 | int r; | ||
3152 | |||
3153 | kvm_vz_acquire_htimer(vcpu); | ||
3154 | /* Check if we have any exceptions/interrupts pending */ | ||
3155 | kvm_mips_deliver_interrupts(vcpu, read_gc0_cause()); | ||
3156 | |||
3157 | kvm_vz_check_requests(vcpu, cpu); | ||
3158 | kvm_vz_vcpu_load_tlb(vcpu, cpu); | ||
3159 | kvm_vz_vcpu_load_wired(vcpu); | ||
3160 | |||
3161 | r = vcpu->arch.vcpu_run(run, vcpu); | ||
3162 | |||
3163 | kvm_vz_vcpu_save_wired(vcpu); | ||
3164 | |||
3165 | return r; | ||
3166 | } | ||
3167 | |||
3168 | static struct kvm_mips_callbacks kvm_vz_callbacks = { | ||
3169 | .handle_cop_unusable = kvm_trap_vz_handle_cop_unusable, | ||
3170 | .handle_tlb_mod = kvm_trap_vz_handle_tlb_st_miss, | ||
3171 | .handle_tlb_ld_miss = kvm_trap_vz_handle_tlb_ld_miss, | ||
3172 | .handle_tlb_st_miss = kvm_trap_vz_handle_tlb_st_miss, | ||
3173 | .handle_addr_err_st = kvm_trap_vz_no_handler, | ||
3174 | .handle_addr_err_ld = kvm_trap_vz_no_handler, | ||
3175 | .handle_syscall = kvm_trap_vz_no_handler, | ||
3176 | .handle_res_inst = kvm_trap_vz_no_handler, | ||
3177 | .handle_break = kvm_trap_vz_no_handler, | ||
3178 | .handle_msa_disabled = kvm_trap_vz_handle_msa_disabled, | ||
3179 | .handle_guest_exit = kvm_trap_vz_handle_guest_exit, | ||
3180 | |||
3181 | .hardware_enable = kvm_vz_hardware_enable, | ||
3182 | .hardware_disable = kvm_vz_hardware_disable, | ||
3183 | .check_extension = kvm_vz_check_extension, | ||
3184 | .vcpu_init = kvm_vz_vcpu_init, | ||
3185 | .vcpu_uninit = kvm_vz_vcpu_uninit, | ||
3186 | .vcpu_setup = kvm_vz_vcpu_setup, | ||
3187 | .flush_shadow_all = kvm_vz_flush_shadow_all, | ||
3188 | .flush_shadow_memslot = kvm_vz_flush_shadow_memslot, | ||
3189 | .gva_to_gpa = kvm_vz_gva_to_gpa_cb, | ||
3190 | .queue_timer_int = kvm_vz_queue_timer_int_cb, | ||
3191 | .dequeue_timer_int = kvm_vz_dequeue_timer_int_cb, | ||
3192 | .queue_io_int = kvm_vz_queue_io_int_cb, | ||
3193 | .dequeue_io_int = kvm_vz_dequeue_io_int_cb, | ||
3194 | .irq_deliver = kvm_vz_irq_deliver_cb, | ||
3195 | .irq_clear = kvm_vz_irq_clear_cb, | ||
3196 | .num_regs = kvm_vz_num_regs, | ||
3197 | .copy_reg_indices = kvm_vz_copy_reg_indices, | ||
3198 | .get_one_reg = kvm_vz_get_one_reg, | ||
3199 | .set_one_reg = kvm_vz_set_one_reg, | ||
3200 | .vcpu_load = kvm_vz_vcpu_load, | ||
3201 | .vcpu_put = kvm_vz_vcpu_put, | ||
3202 | .vcpu_run = kvm_vz_vcpu_run, | ||
3203 | .vcpu_reenter = kvm_vz_vcpu_reenter, | ||
3204 | }; | ||
3205 | |||
3206 | int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks) | ||
3207 | { | ||
3208 | if (!cpu_has_vz) | ||
3209 | return -ENODEV; | ||
3210 | |||
3211 | /* | ||
3212 | * VZ requires at least 2 KScratch registers, so it should have been | ||
3213 | * possible to allocate pgd_reg. | ||
3214 | */ | ||
3215 | if (WARN(pgd_reg == -1, | ||
3216 | "pgd_reg not allocated even though cpu_has_vz\n")) | ||
3217 | return -ENODEV; | ||
3218 | |||
3219 | pr_info("Starting KVM with MIPS VZ extensions\n"); | ||
3220 | |||
3221 | *install_callbacks = &kvm_vz_callbacks; | ||
3222 | return 0; | ||
3223 | } | ||
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index 6db341347202..899e46279902 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c | |||
@@ -24,6 +24,7 @@ | |||
24 | /* Cache operations. */ | 24 | /* Cache operations. */ |
25 | void (*flush_cache_all)(void); | 25 | void (*flush_cache_all)(void); |
26 | void (*__flush_cache_all)(void); | 26 | void (*__flush_cache_all)(void); |
27 | EXPORT_SYMBOL_GPL(__flush_cache_all); | ||
27 | void (*flush_cache_mm)(struct mm_struct *mm); | 28 | void (*flush_cache_mm)(struct mm_struct *mm); |
28 | void (*flush_cache_range)(struct vm_area_struct *vma, unsigned long start, | 29 | void (*flush_cache_range)(struct vm_area_struct *vma, unsigned long start, |
29 | unsigned long end); | 30 | unsigned long end); |
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index aa75849c36bc..3ca20283b31e 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c | |||
@@ -348,7 +348,7 @@ void maar_init(void) | |||
348 | upper = ((upper & MIPS_MAAR_ADDR) << 4) | 0xffff; | 348 | upper = ((upper & MIPS_MAAR_ADDR) << 4) | 0xffff; |
349 | 349 | ||
350 | pr_info(" [%d]: ", i / 2); | 350 | pr_info(" [%d]: ", i / 2); |
351 | if (!(attr & MIPS_MAAR_V)) { | 351 | if (!(attr & MIPS_MAAR_VL)) { |
352 | pr_cont("disabled\n"); | 352 | pr_cont("disabled\n"); |
353 | continue; | 353 | continue; |
354 | } | 354 | } |
diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h index 4852e849128b..c0a55050f70f 100644 --- a/arch/powerpc/include/asm/disassemble.h +++ b/arch/powerpc/include/asm/disassemble.h | |||
@@ -87,6 +87,11 @@ static inline unsigned int get_oc(u32 inst) | |||
87 | return (inst >> 11) & 0x7fff; | 87 | return (inst >> 11) & 0x7fff; |
88 | } | 88 | } |
89 | 89 | ||
90 | static inline unsigned int get_tx_or_sx(u32 inst) | ||
91 | { | ||
92 | return (inst) & 0x1; | ||
93 | } | ||
94 | |||
90 | #define IS_XFORM(inst) (get_op(inst) == 31) | 95 | #define IS_XFORM(inst) (get_op(inst) == 31) |
91 | #define IS_DSFORM(inst) (get_op(inst) >= 56) | 96 | #define IS_DSFORM(inst) (get_op(inst) >= 56) |
92 | 97 | ||
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 2c1d50792944..8a8ce220d7d0 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h | |||
@@ -64,6 +64,11 @@ struct iommu_table_ops { | |||
64 | long index, | 64 | long index, |
65 | unsigned long *hpa, | 65 | unsigned long *hpa, |
66 | enum dma_data_direction *direction); | 66 | enum dma_data_direction *direction); |
67 | /* Real mode */ | ||
68 | int (*exchange_rm)(struct iommu_table *tbl, | ||
69 | long index, | ||
70 | unsigned long *hpa, | ||
71 | enum dma_data_direction *direction); | ||
67 | #endif | 72 | #endif |
68 | void (*clear)(struct iommu_table *tbl, | 73 | void (*clear)(struct iommu_table *tbl, |
69 | long index, long npages); | 74 | long index, long npages); |
@@ -114,6 +119,7 @@ struct iommu_table { | |||
114 | struct list_head it_group_list;/* List of iommu_table_group_link */ | 119 | struct list_head it_group_list;/* List of iommu_table_group_link */ |
115 | unsigned long *it_userspace; /* userspace view of the table */ | 120 | unsigned long *it_userspace; /* userspace view of the table */ |
116 | struct iommu_table_ops *it_ops; | 121 | struct iommu_table_ops *it_ops; |
122 | struct kref it_kref; | ||
117 | }; | 123 | }; |
118 | 124 | ||
119 | #define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \ | 125 | #define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \ |
@@ -146,8 +152,8 @@ static inline void *get_iommu_table_base(struct device *dev) | |||
146 | 152 | ||
147 | extern int dma_iommu_dma_supported(struct device *dev, u64 mask); | 153 | extern int dma_iommu_dma_supported(struct device *dev, u64 mask); |
148 | 154 | ||
149 | /* Frees table for an individual device node */ | 155 | extern struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl); |
150 | extern void iommu_free_table(struct iommu_table *tbl, const char *node_name); | 156 | extern int iommu_tce_table_put(struct iommu_table *tbl); |
151 | 157 | ||
152 | /* Initializes an iommu_table based in values set in the passed-in | 158 | /* Initializes an iommu_table based in values set in the passed-in |
153 | * structure | 159 | * structure |
@@ -208,6 +214,8 @@ extern void iommu_del_device(struct device *dev); | |||
208 | extern int __init tce_iommu_bus_notifier_init(void); | 214 | extern int __init tce_iommu_bus_notifier_init(void); |
209 | extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, | 215 | extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, |
210 | unsigned long *hpa, enum dma_data_direction *direction); | 216 | unsigned long *hpa, enum dma_data_direction *direction); |
217 | extern long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry, | ||
218 | unsigned long *hpa, enum dma_data_direction *direction); | ||
211 | #else | 219 | #else |
212 | static inline void iommu_register_group(struct iommu_table_group *table_group, | 220 | static inline void iommu_register_group(struct iommu_table_group *table_group, |
213 | int pci_domain_number, | 221 | int pci_domain_number, |
@@ -288,11 +296,21 @@ static inline void iommu_restore(void) | |||
288 | #endif | 296 | #endif |
289 | 297 | ||
290 | /* The API to support IOMMU operations for VFIO */ | 298 | /* The API to support IOMMU operations for VFIO */ |
291 | extern int iommu_tce_clear_param_check(struct iommu_table *tbl, | 299 | extern int iommu_tce_check_ioba(unsigned long page_shift, |
292 | unsigned long ioba, unsigned long tce_value, | 300 | unsigned long offset, unsigned long size, |
293 | unsigned long npages); | 301 | unsigned long ioba, unsigned long npages); |
294 | extern int iommu_tce_put_param_check(struct iommu_table *tbl, | 302 | extern int iommu_tce_check_gpa(unsigned long page_shift, |
295 | unsigned long ioba, unsigned long tce); | 303 | unsigned long gpa); |
304 | |||
305 | #define iommu_tce_clear_param_check(tbl, ioba, tce_value, npages) \ | ||
306 | (iommu_tce_check_ioba((tbl)->it_page_shift, \ | ||
307 | (tbl)->it_offset, (tbl)->it_size, \ | ||
308 | (ioba), (npages)) || (tce_value)) | ||
309 | #define iommu_tce_put_param_check(tbl, ioba, gpa) \ | ||
310 | (iommu_tce_check_ioba((tbl)->it_page_shift, \ | ||
311 | (tbl)->it_offset, (tbl)->it_size, \ | ||
312 | (ioba), 1) || \ | ||
313 | iommu_tce_check_gpa((tbl)->it_page_shift, (gpa))) | ||
296 | 314 | ||
297 | extern void iommu_flush_tce(struct iommu_table *tbl); | 315 | extern void iommu_flush_tce(struct iommu_table *tbl); |
298 | extern int iommu_take_ownership(struct iommu_table *tbl); | 316 | extern int iommu_take_ownership(struct iommu_table *tbl); |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 5a8ab4a758f1..9c51ac4b8f36 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -45,9 +45,6 @@ | |||
45 | 45 | ||
46 | #define __KVM_HAVE_ARCH_INTC_INITIALIZED | 46 | #define __KVM_HAVE_ARCH_INTC_INITIALIZED |
47 | 47 | ||
48 | #ifdef CONFIG_KVM_MMIO | ||
49 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 | ||
50 | #endif | ||
51 | #define KVM_HALT_POLL_NS_DEFAULT 10000 /* 10 us */ | 48 | #define KVM_HALT_POLL_NS_DEFAULT 10000 /* 10 us */ |
52 | 49 | ||
53 | /* These values are internal and can be increased later */ | 50 | /* These values are internal and can be increased later */ |
@@ -191,6 +188,13 @@ struct kvmppc_pginfo { | |||
191 | atomic_t refcnt; | 188 | atomic_t refcnt; |
192 | }; | 189 | }; |
193 | 190 | ||
191 | struct kvmppc_spapr_tce_iommu_table { | ||
192 | struct rcu_head rcu; | ||
193 | struct list_head next; | ||
194 | struct iommu_table *tbl; | ||
195 | struct kref kref; | ||
196 | }; | ||
197 | |||
194 | struct kvmppc_spapr_tce_table { | 198 | struct kvmppc_spapr_tce_table { |
195 | struct list_head list; | 199 | struct list_head list; |
196 | struct kvm *kvm; | 200 | struct kvm *kvm; |
@@ -199,6 +203,7 @@ struct kvmppc_spapr_tce_table { | |||
199 | u32 page_shift; | 203 | u32 page_shift; |
200 | u64 offset; /* in pages */ | 204 | u64 offset; /* in pages */ |
201 | u64 size; /* window size in pages */ | 205 | u64 size; /* window size in pages */ |
206 | struct list_head iommu_tables; | ||
202 | struct page *pages[0]; | 207 | struct page *pages[0]; |
203 | }; | 208 | }; |
204 | 209 | ||
@@ -352,6 +357,7 @@ struct kvmppc_pte { | |||
352 | bool may_read : 1; | 357 | bool may_read : 1; |
353 | bool may_write : 1; | 358 | bool may_write : 1; |
354 | bool may_execute : 1; | 359 | bool may_execute : 1; |
360 | unsigned long wimg; | ||
355 | u8 page_size; /* MMU_PAGE_xxx */ | 361 | u8 page_size; /* MMU_PAGE_xxx */ |
356 | }; | 362 | }; |
357 | 363 | ||
@@ -448,6 +454,11 @@ struct mmio_hpte_cache { | |||
448 | unsigned int index; | 454 | unsigned int index; |
449 | }; | 455 | }; |
450 | 456 | ||
457 | #define KVMPPC_VSX_COPY_NONE 0 | ||
458 | #define KVMPPC_VSX_COPY_WORD 1 | ||
459 | #define KVMPPC_VSX_COPY_DWORD 2 | ||
460 | #define KVMPPC_VSX_COPY_DWORD_LOAD_DUMP 3 | ||
461 | |||
451 | struct openpic; | 462 | struct openpic; |
452 | 463 | ||
453 | /* W0 and W1 of a XIVE thread management context */ | 464 | /* W0 and W1 of a XIVE thread management context */ |
@@ -666,6 +677,21 @@ struct kvm_vcpu_arch { | |||
666 | u8 io_gpr; /* GPR used as IO source/target */ | 677 | u8 io_gpr; /* GPR used as IO source/target */ |
667 | u8 mmio_host_swabbed; | 678 | u8 mmio_host_swabbed; |
668 | u8 mmio_sign_extend; | 679 | u8 mmio_sign_extend; |
680 | /* conversion between single and double precision */ | ||
681 | u8 mmio_sp64_extend; | ||
682 | /* | ||
683 | * Number of simulations for vsx. | ||
684 | * If we use 2*8bytes to simulate 1*16bytes, | ||
685 | * then the number should be 2 and | ||
686 | * mmio_vsx_copy_type=KVMPPC_VSX_COPY_DWORD. | ||
687 | * If we use 4*4bytes to simulate 1*16bytes, | ||
688 | * the number should be 4 and | ||
689 | * mmio_vsx_copy_type=KVMPPC_VSX_COPY_WORD. | ||
690 | */ | ||
691 | u8 mmio_vsx_copy_nums; | ||
692 | u8 mmio_vsx_offset; | ||
693 | u8 mmio_vsx_copy_type; | ||
694 | u8 mmio_vsx_tx_sx_enabled; | ||
669 | u8 osi_needed; | 695 | u8 osi_needed; |
670 | u8 osi_enabled; | 696 | u8 osi_enabled; |
671 | u8 papr_enabled; | 697 | u8 papr_enabled; |
@@ -758,6 +784,8 @@ struct kvm_vcpu_arch { | |||
758 | }; | 784 | }; |
759 | 785 | ||
760 | #define VCPU_FPR(vcpu, i) (vcpu)->arch.fp.fpr[i][TS_FPROFFSET] | 786 | #define VCPU_FPR(vcpu, i) (vcpu)->arch.fp.fpr[i][TS_FPROFFSET] |
787 | #define VCPU_VSX_FPR(vcpu, i, j) ((vcpu)->arch.fp.fpr[i][j]) | ||
788 | #define VCPU_VSX_VR(vcpu, i) ((vcpu)->arch.vr.vr[i]) | ||
761 | 789 | ||
762 | /* Values for vcpu->arch.state */ | 790 | /* Values for vcpu->arch.state */ |
763 | #define KVMPPC_VCPU_NOTREADY 0 | 791 | #define KVMPPC_VCPU_NOTREADY 0 |
@@ -771,6 +799,7 @@ struct kvm_vcpu_arch { | |||
771 | #define KVM_MMIO_REG_FPR 0x0020 | 799 | #define KVM_MMIO_REG_FPR 0x0020 |
772 | #define KVM_MMIO_REG_QPR 0x0040 | 800 | #define KVM_MMIO_REG_QPR 0x0040 |
773 | #define KVM_MMIO_REG_FQPR 0x0060 | 801 | #define KVM_MMIO_REG_FQPR 0x0060 |
802 | #define KVM_MMIO_REG_VSX 0x0080 | ||
774 | 803 | ||
775 | #define __KVM_HAVE_ARCH_WQP | 804 | #define __KVM_HAVE_ARCH_WQP |
776 | #define __KVM_HAVE_CREATE_DEVICE | 805 | #define __KVM_HAVE_CREATE_DEVICE |
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index ed52b13d9ffb..e0d88c38602b 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h | |||
@@ -78,9 +78,15 @@ extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
78 | extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, | 78 | extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, |
79 | unsigned int rt, unsigned int bytes, | 79 | unsigned int rt, unsigned int bytes, |
80 | int is_default_endian); | 80 | int is_default_endian); |
81 | extern int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
82 | unsigned int rt, unsigned int bytes, | ||
83 | int is_default_endian, int mmio_sign_extend); | ||
81 | extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | 84 | extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, |
82 | u64 val, unsigned int bytes, | 85 | u64 val, unsigned int bytes, |
83 | int is_default_endian); | 86 | int is_default_endian); |
87 | extern int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
88 | int rs, unsigned int bytes, | ||
89 | int is_default_endian); | ||
84 | 90 | ||
85 | extern int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, | 91 | extern int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, |
86 | enum instruction_type type, u32 *inst); | 92 | enum instruction_type type, u32 *inst); |
@@ -132,6 +138,9 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu); | |||
132 | extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu); | 138 | extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu); |
133 | extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); | 139 | extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); |
134 | extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags); | 140 | extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags); |
141 | extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu); | ||
142 | extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu); | ||
143 | extern void kvmppc_core_queue_vsx_unavail(struct kvm_vcpu *vcpu); | ||
135 | extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); | 144 | extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); |
136 | extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); | 145 | extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); |
137 | extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, | 146 | extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, |
@@ -164,13 +173,19 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm, | |||
164 | extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, | 173 | extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, |
165 | struct kvm_memory_slot *memslot, unsigned long porder); | 174 | struct kvm_memory_slot *memslot, unsigned long porder); |
166 | extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); | 175 | extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); |
176 | extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, | ||
177 | struct iommu_group *grp); | ||
178 | extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, | ||
179 | struct iommu_group *grp); | ||
167 | 180 | ||
168 | extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | 181 | extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, |
169 | struct kvm_create_spapr_tce_64 *args); | 182 | struct kvm_create_spapr_tce_64 *args); |
170 | extern struct kvmppc_spapr_tce_table *kvmppc_find_table( | 183 | extern struct kvmppc_spapr_tce_table *kvmppc_find_table( |
171 | struct kvm_vcpu *vcpu, unsigned long liobn); | 184 | struct kvm *kvm, unsigned long liobn); |
172 | extern long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt, | 185 | #define kvmppc_ioba_validate(stt, ioba, npages) \ |
173 | unsigned long ioba, unsigned long npages); | 186 | (iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \ |
187 | (stt)->size, (ioba), (npages)) ? \ | ||
188 | H_PARAMETER : H_SUCCESS) | ||
174 | extern long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *tt, | 189 | extern long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *tt, |
175 | unsigned long tce); | 190 | unsigned long tce); |
176 | extern long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa, | 191 | extern long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa, |
@@ -241,6 +256,7 @@ union kvmppc_one_reg { | |||
241 | u64 dval; | 256 | u64 dval; |
242 | vector128 vval; | 257 | vector128 vval; |
243 | u64 vsxval[2]; | 258 | u64 vsxval[2]; |
259 | u32 vsx32val[4]; | ||
244 | struct { | 260 | struct { |
245 | u64 addr; | 261 | u64 addr; |
246 | u64 length; | 262 | u64 length; |
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index b9e3f0aca261..c70c8272523d 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h | |||
@@ -29,10 +29,14 @@ extern void mm_iommu_init(struct mm_struct *mm); | |||
29 | extern void mm_iommu_cleanup(struct mm_struct *mm); | 29 | extern void mm_iommu_cleanup(struct mm_struct *mm); |
30 | extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, | 30 | extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, |
31 | unsigned long ua, unsigned long size); | 31 | unsigned long ua, unsigned long size); |
32 | extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm( | ||
33 | struct mm_struct *mm, unsigned long ua, unsigned long size); | ||
32 | extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, | 34 | extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, |
33 | unsigned long ua, unsigned long entries); | 35 | unsigned long ua, unsigned long entries); |
34 | extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, | 36 | extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, |
35 | unsigned long ua, unsigned long *hpa); | 37 | unsigned long ua, unsigned long *hpa); |
38 | extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, | ||
39 | unsigned long ua, unsigned long *hpa); | ||
36 | extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem); | 40 | extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem); |
37 | extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem); | 41 | extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem); |
38 | #endif | 42 | #endif |
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index e7d6d86563ee..73f06f4dddc7 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h | |||
@@ -86,32 +86,79 @@ | |||
86 | #define OP_TRAP_64 2 | 86 | #define OP_TRAP_64 2 |
87 | 87 | ||
88 | #define OP_31_XOP_TRAP 4 | 88 | #define OP_31_XOP_TRAP 4 |
89 | #define OP_31_XOP_LDX 21 | ||
89 | #define OP_31_XOP_LWZX 23 | 90 | #define OP_31_XOP_LWZX 23 |
91 | #define OP_31_XOP_LDUX 53 | ||
90 | #define OP_31_XOP_DCBST 54 | 92 | #define OP_31_XOP_DCBST 54 |
91 | #define OP_31_XOP_LWZUX 55 | 93 | #define OP_31_XOP_LWZUX 55 |
92 | #define OP_31_XOP_TRAP_64 68 | 94 | #define OP_31_XOP_TRAP_64 68 |
93 | #define OP_31_XOP_DCBF 86 | 95 | #define OP_31_XOP_DCBF 86 |
94 | #define OP_31_XOP_LBZX 87 | 96 | #define OP_31_XOP_LBZX 87 |
97 | #define OP_31_XOP_STDX 149 | ||
95 | #define OP_31_XOP_STWX 151 | 98 | #define OP_31_XOP_STWX 151 |
99 | #define OP_31_XOP_STDUX 181 | ||
100 | #define OP_31_XOP_STWUX 183 | ||
96 | #define OP_31_XOP_STBX 215 | 101 | #define OP_31_XOP_STBX 215 |
97 | #define OP_31_XOP_LBZUX 119 | 102 | #define OP_31_XOP_LBZUX 119 |
98 | #define OP_31_XOP_STBUX 247 | 103 | #define OP_31_XOP_STBUX 247 |
99 | #define OP_31_XOP_LHZX 279 | 104 | #define OP_31_XOP_LHZX 279 |
100 | #define OP_31_XOP_LHZUX 311 | 105 | #define OP_31_XOP_LHZUX 311 |
101 | #define OP_31_XOP_MFSPR 339 | 106 | #define OP_31_XOP_MFSPR 339 |
107 | #define OP_31_XOP_LWAX 341 | ||
102 | #define OP_31_XOP_LHAX 343 | 108 | #define OP_31_XOP_LHAX 343 |
109 | #define OP_31_XOP_LWAUX 373 | ||
103 | #define OP_31_XOP_LHAUX 375 | 110 | #define OP_31_XOP_LHAUX 375 |
104 | #define OP_31_XOP_STHX 407 | 111 | #define OP_31_XOP_STHX 407 |
105 | #define OP_31_XOP_STHUX 439 | 112 | #define OP_31_XOP_STHUX 439 |
106 | #define OP_31_XOP_MTSPR 467 | 113 | #define OP_31_XOP_MTSPR 467 |
107 | #define OP_31_XOP_DCBI 470 | 114 | #define OP_31_XOP_DCBI 470 |
115 | #define OP_31_XOP_LDBRX 532 | ||
108 | #define OP_31_XOP_LWBRX 534 | 116 | #define OP_31_XOP_LWBRX 534 |
109 | #define OP_31_XOP_TLBSYNC 566 | 117 | #define OP_31_XOP_TLBSYNC 566 |
118 | #define OP_31_XOP_STDBRX 660 | ||
110 | #define OP_31_XOP_STWBRX 662 | 119 | #define OP_31_XOP_STWBRX 662 |
120 | #define OP_31_XOP_STFSX 663 | ||
121 | #define OP_31_XOP_STFSUX 695 | ||
122 | #define OP_31_XOP_STFDX 727 | ||
123 | #define OP_31_XOP_STFDUX 759 | ||
111 | #define OP_31_XOP_LHBRX 790 | 124 | #define OP_31_XOP_LHBRX 790 |
125 | #define OP_31_XOP_LFIWAX 855 | ||
126 | #define OP_31_XOP_LFIWZX 887 | ||
112 | #define OP_31_XOP_STHBRX 918 | 127 | #define OP_31_XOP_STHBRX 918 |
128 | #define OP_31_XOP_STFIWX 983 | ||
129 | |||
130 | /* VSX Scalar Load Instructions */ | ||
131 | #define OP_31_XOP_LXSDX 588 | ||
132 | #define OP_31_XOP_LXSSPX 524 | ||
133 | #define OP_31_XOP_LXSIWAX 76 | ||
134 | #define OP_31_XOP_LXSIWZX 12 | ||
135 | |||
136 | /* VSX Scalar Store Instructions */ | ||
137 | #define OP_31_XOP_STXSDX 716 | ||
138 | #define OP_31_XOP_STXSSPX 652 | ||
139 | #define OP_31_XOP_STXSIWX 140 | ||
140 | |||
141 | /* VSX Vector Load Instructions */ | ||
142 | #define OP_31_XOP_LXVD2X 844 | ||
143 | #define OP_31_XOP_LXVW4X 780 | ||
144 | |||
145 | /* VSX Vector Load and Splat Instruction */ | ||
146 | #define OP_31_XOP_LXVDSX 332 | ||
147 | |||
148 | /* VSX Vector Store Instructions */ | ||
149 | #define OP_31_XOP_STXVD2X 972 | ||
150 | #define OP_31_XOP_STXVW4X 908 | ||
151 | |||
152 | #define OP_31_XOP_LFSX 535 | ||
153 | #define OP_31_XOP_LFSUX 567 | ||
154 | #define OP_31_XOP_LFDX 599 | ||
155 | #define OP_31_XOP_LFDUX 631 | ||
113 | 156 | ||
114 | #define OP_LWZ 32 | 157 | #define OP_LWZ 32 |
158 | #define OP_STFS 52 | ||
159 | #define OP_STFSU 53 | ||
160 | #define OP_STFD 54 | ||
161 | #define OP_STFDU 55 | ||
115 | #define OP_LD 58 | 162 | #define OP_LD 58 |
116 | #define OP_LWZU 33 | 163 | #define OP_LWZU 33 |
117 | #define OP_LBZ 34 | 164 | #define OP_LBZ 34 |
@@ -127,6 +174,17 @@ | |||
127 | #define OP_LHAU 43 | 174 | #define OP_LHAU 43 |
128 | #define OP_STH 44 | 175 | #define OP_STH 44 |
129 | #define OP_STHU 45 | 176 | #define OP_STHU 45 |
177 | #define OP_LMW 46 | ||
178 | #define OP_STMW 47 | ||
179 | #define OP_LFS 48 | ||
180 | #define OP_LFSU 49 | ||
181 | #define OP_LFD 50 | ||
182 | #define OP_LFDU 51 | ||
183 | #define OP_STFS 52 | ||
184 | #define OP_STFSU 53 | ||
185 | #define OP_STFD 54 | ||
186 | #define OP_STFDU 55 | ||
187 | #define OP_LQ 56 | ||
130 | 188 | ||
131 | /* sorted alphabetically */ | 189 | /* sorted alphabetically */ |
132 | #define PPC_INST_BHRBE 0x7c00025c | 190 | #define PPC_INST_BHRBE 0x7c00025c |
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 4edbe4bb0e8b..07fbeb927834 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h | |||
@@ -29,6 +29,9 @@ | |||
29 | #define __KVM_HAVE_IRQ_LINE | 29 | #define __KVM_HAVE_IRQ_LINE |
30 | #define __KVM_HAVE_GUEST_DEBUG | 30 | #define __KVM_HAVE_GUEST_DEBUG |
31 | 31 | ||
32 | /* Not always available, but if it is, this is the correct offset. */ | ||
33 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 | ||
34 | |||
32 | struct kvm_regs { | 35 | struct kvm_regs { |
33 | __u64 pc; | 36 | __u64 pc; |
34 | __u64 cr; | 37 | __u64 cr; |
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 5f202a566ec5..f2b724cd9e64 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c | |||
@@ -711,13 +711,16 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) | |||
711 | return tbl; | 711 | return tbl; |
712 | } | 712 | } |
713 | 713 | ||
714 | void iommu_free_table(struct iommu_table *tbl, const char *node_name) | 714 | static void iommu_table_free(struct kref *kref) |
715 | { | 715 | { |
716 | unsigned long bitmap_sz; | 716 | unsigned long bitmap_sz; |
717 | unsigned int order; | 717 | unsigned int order; |
718 | struct iommu_table *tbl; | ||
718 | 719 | ||
719 | if (!tbl) | 720 | tbl = container_of(kref, struct iommu_table, it_kref); |
720 | return; | 721 | |
722 | if (tbl->it_ops->free) | ||
723 | tbl->it_ops->free(tbl); | ||
721 | 724 | ||
722 | if (!tbl->it_map) { | 725 | if (!tbl->it_map) { |
723 | kfree(tbl); | 726 | kfree(tbl); |
@@ -733,7 +736,7 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name) | |||
733 | 736 | ||
734 | /* verify that table contains no entries */ | 737 | /* verify that table contains no entries */ |
735 | if (!bitmap_empty(tbl->it_map, tbl->it_size)) | 738 | if (!bitmap_empty(tbl->it_map, tbl->it_size)) |
736 | pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name); | 739 | pr_warn("%s: Unexpected TCEs\n", __func__); |
737 | 740 | ||
738 | /* calculate bitmap size in bytes */ | 741 | /* calculate bitmap size in bytes */ |
739 | bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long); | 742 | bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long); |
@@ -746,6 +749,24 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name) | |||
746 | kfree(tbl); | 749 | kfree(tbl); |
747 | } | 750 | } |
748 | 751 | ||
752 | struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl) | ||
753 | { | ||
754 | if (kref_get_unless_zero(&tbl->it_kref)) | ||
755 | return tbl; | ||
756 | |||
757 | return NULL; | ||
758 | } | ||
759 | EXPORT_SYMBOL_GPL(iommu_tce_table_get); | ||
760 | |||
761 | int iommu_tce_table_put(struct iommu_table *tbl) | ||
762 | { | ||
763 | if (WARN_ON(!tbl)) | ||
764 | return 0; | ||
765 | |||
766 | return kref_put(&tbl->it_kref, iommu_table_free); | ||
767 | } | ||
768 | EXPORT_SYMBOL_GPL(iommu_tce_table_put); | ||
769 | |||
749 | /* Creates TCEs for a user provided buffer. The user buffer must be | 770 | /* Creates TCEs for a user provided buffer. The user buffer must be |
750 | * contiguous real kernel storage (not vmalloc). The address passed here | 771 | * contiguous real kernel storage (not vmalloc). The address passed here |
751 | * comprises a page address and offset into that page. The dma_addr_t | 772 | * comprises a page address and offset into that page. The dma_addr_t |
@@ -942,47 +963,36 @@ void iommu_flush_tce(struct iommu_table *tbl) | |||
942 | } | 963 | } |
943 | EXPORT_SYMBOL_GPL(iommu_flush_tce); | 964 | EXPORT_SYMBOL_GPL(iommu_flush_tce); |
944 | 965 | ||
945 | int iommu_tce_clear_param_check(struct iommu_table *tbl, | 966 | int iommu_tce_check_ioba(unsigned long page_shift, |
946 | unsigned long ioba, unsigned long tce_value, | 967 | unsigned long offset, unsigned long size, |
947 | unsigned long npages) | 968 | unsigned long ioba, unsigned long npages) |
948 | { | 969 | { |
949 | /* tbl->it_ops->clear() does not support any value but 0 */ | 970 | unsigned long mask = (1UL << page_shift) - 1; |
950 | if (tce_value) | ||
951 | return -EINVAL; | ||
952 | 971 | ||
953 | if (ioba & ~IOMMU_PAGE_MASK(tbl)) | 972 | if (ioba & mask) |
954 | return -EINVAL; | 973 | return -EINVAL; |
955 | 974 | ||
956 | ioba >>= tbl->it_page_shift; | 975 | ioba >>= page_shift; |
957 | if (ioba < tbl->it_offset) | 976 | if (ioba < offset) |
958 | return -EINVAL; | 977 | return -EINVAL; |
959 | 978 | ||
960 | if ((ioba + npages) > (tbl->it_offset + tbl->it_size)) | 979 | if ((ioba + 1) > (offset + size)) |
961 | return -EINVAL; | 980 | return -EINVAL; |
962 | 981 | ||
963 | return 0; | 982 | return 0; |
964 | } | 983 | } |
965 | EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check); | 984 | EXPORT_SYMBOL_GPL(iommu_tce_check_ioba); |
966 | 985 | ||
967 | int iommu_tce_put_param_check(struct iommu_table *tbl, | 986 | int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa) |
968 | unsigned long ioba, unsigned long tce) | ||
969 | { | 987 | { |
970 | if (tce & ~IOMMU_PAGE_MASK(tbl)) | 988 | unsigned long mask = (1UL << page_shift) - 1; |
971 | return -EINVAL; | ||
972 | |||
973 | if (ioba & ~IOMMU_PAGE_MASK(tbl)) | ||
974 | return -EINVAL; | ||
975 | 989 | ||
976 | ioba >>= tbl->it_page_shift; | 990 | if (gpa & mask) |
977 | if (ioba < tbl->it_offset) | ||
978 | return -EINVAL; | ||
979 | |||
980 | if ((ioba + 1) > (tbl->it_offset + tbl->it_size)) | ||
981 | return -EINVAL; | 991 | return -EINVAL; |
982 | 992 | ||
983 | return 0; | 993 | return 0; |
984 | } | 994 | } |
985 | EXPORT_SYMBOL_GPL(iommu_tce_put_param_check); | 995 | EXPORT_SYMBOL_GPL(iommu_tce_check_gpa); |
986 | 996 | ||
987 | long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, | 997 | long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, |
988 | unsigned long *hpa, enum dma_data_direction *direction) | 998 | unsigned long *hpa, enum dma_data_direction *direction) |
@@ -1004,6 +1014,31 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, | |||
1004 | } | 1014 | } |
1005 | EXPORT_SYMBOL_GPL(iommu_tce_xchg); | 1015 | EXPORT_SYMBOL_GPL(iommu_tce_xchg); |
1006 | 1016 | ||
1017 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
1018 | long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry, | ||
1019 | unsigned long *hpa, enum dma_data_direction *direction) | ||
1020 | { | ||
1021 | long ret; | ||
1022 | |||
1023 | ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction); | ||
1024 | |||
1025 | if (!ret && ((*direction == DMA_FROM_DEVICE) || | ||
1026 | (*direction == DMA_BIDIRECTIONAL))) { | ||
1027 | struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT); | ||
1028 | |||
1029 | if (likely(pg)) { | ||
1030 | SetPageDirty(pg); | ||
1031 | } else { | ||
1032 | tbl->it_ops->exchange_rm(tbl, entry, hpa, direction); | ||
1033 | ret = -EFAULT; | ||
1034 | } | ||
1035 | } | ||
1036 | |||
1037 | return ret; | ||
1038 | } | ||
1039 | EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm); | ||
1040 | #endif | ||
1041 | |||
1007 | int iommu_take_ownership(struct iommu_table *tbl) | 1042 | int iommu_take_ownership(struct iommu_table *tbl) |
1008 | { | 1043 | { |
1009 | unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; | 1044 | unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; |
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index b9d66e53b773..24de532c1736 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig | |||
@@ -67,6 +67,7 @@ config KVM_BOOK3S_64 | |||
67 | select KVM_BOOK3S_64_HANDLER | 67 | select KVM_BOOK3S_64_HANDLER |
68 | select KVM | 68 | select KVM |
69 | select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE | 69 | select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE |
70 | select SPAPR_TCE_IOMMU if IOMMU_SUPPORT | ||
70 | ---help--- | 71 | ---help--- |
71 | Support running unmodified book3s_64 and book3s_32 guest kernels | 72 | Support running unmodified book3s_64 and book3s_32 guest kernels |
72 | in virtual machines on book3s_64 host processors. | 73 | in virtual machines on book3s_64 host processors. |
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index cb8009cd688d..72d977e30952 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c | |||
@@ -198,6 +198,24 @@ void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags) | |||
198 | } | 198 | } |
199 | EXPORT_SYMBOL_GPL(kvmppc_core_queue_program); | 199 | EXPORT_SYMBOL_GPL(kvmppc_core_queue_program); |
200 | 200 | ||
201 | void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu) | ||
202 | { | ||
203 | /* might as well deliver this straight away */ | ||
204 | kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, 0); | ||
205 | } | ||
206 | |||
207 | void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu) | ||
208 | { | ||
209 | /* might as well deliver this straight away */ | ||
210 | kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_ALTIVEC, 0); | ||
211 | } | ||
212 | |||
213 | void kvmppc_core_queue_vsx_unavail(struct kvm_vcpu *vcpu) | ||
214 | { | ||
215 | /* might as well deliver this straight away */ | ||
216 | kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_VSX, 0); | ||
217 | } | ||
218 | |||
201 | void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) | 219 | void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) |
202 | { | 220 | { |
203 | kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER); | 221 | kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER); |
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 70153578131a..29ebe2fd5867 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c | |||
@@ -319,6 +319,7 @@ do_second: | |||
319 | gpte->may_execute = true; | 319 | gpte->may_execute = true; |
320 | gpte->may_read = false; | 320 | gpte->may_read = false; |
321 | gpte->may_write = false; | 321 | gpte->may_write = false; |
322 | gpte->wimg = r & HPTE_R_WIMG; | ||
322 | 323 | ||
323 | switch (pp) { | 324 | switch (pp) { |
324 | case 0: | 325 | case 0: |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index a587e8f4fd26..145a61892c48 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c | |||
@@ -145,6 +145,8 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, | |||
145 | else | 145 | else |
146 | kvmppc_mmu_flush_icache(pfn); | 146 | kvmppc_mmu_flush_icache(pfn); |
147 | 147 | ||
148 | rflags = (rflags & ~HPTE_R_WIMG) | orig_pte->wimg; | ||
149 | |||
148 | /* | 150 | /* |
149 | * Use 64K pages if possible; otherwise, on 64K page kernels, | 151 | * Use 64K pages if possible; otherwise, on 64K page kernels, |
150 | * we need to transfer 4 more bits from guest real to host real addr. | 152 | * we need to transfer 4 more bits from guest real to host real addr. |
@@ -177,12 +179,15 @@ map_again: | |||
177 | ret = mmu_hash_ops.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags, | 179 | ret = mmu_hash_ops.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags, |
178 | hpsize, hpsize, MMU_SEGSIZE_256M); | 180 | hpsize, hpsize, MMU_SEGSIZE_256M); |
179 | 181 | ||
180 | if (ret < 0) { | 182 | if (ret == -1) { |
181 | /* If we couldn't map a primary PTE, try a secondary */ | 183 | /* If we couldn't map a primary PTE, try a secondary */ |
182 | hash = ~hash; | 184 | hash = ~hash; |
183 | vflags ^= HPTE_V_SECONDARY; | 185 | vflags ^= HPTE_V_SECONDARY; |
184 | attempt++; | 186 | attempt++; |
185 | goto map_again; | 187 | goto map_again; |
188 | } else if (ret < 0) { | ||
189 | r = -EIO; | ||
190 | goto out_unlock; | ||
186 | } else { | 191 | } else { |
187 | trace_kvm_book3s_64_mmu_map(rflags, hpteg, | 192 | trace_kvm_book3s_64_mmu_map(rflags, hpteg, |
188 | vpn, hpaddr, orig_pte); | 193 | vpn, hpaddr, orig_pte); |
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 3e26cd4979f9..a160c14304eb 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c | |||
@@ -28,6 +28,8 @@ | |||
28 | #include <linux/hugetlb.h> | 28 | #include <linux/hugetlb.h> |
29 | #include <linux/list.h> | 29 | #include <linux/list.h> |
30 | #include <linux/anon_inodes.h> | 30 | #include <linux/anon_inodes.h> |
31 | #include <linux/iommu.h> | ||
32 | #include <linux/file.h> | ||
31 | 33 | ||
32 | #include <asm/tlbflush.h> | 34 | #include <asm/tlbflush.h> |
33 | #include <asm/kvm_ppc.h> | 35 | #include <asm/kvm_ppc.h> |
@@ -40,6 +42,7 @@ | |||
40 | #include <asm/udbg.h> | 42 | #include <asm/udbg.h> |
41 | #include <asm/iommu.h> | 43 | #include <asm/iommu.h> |
42 | #include <asm/tce.h> | 44 | #include <asm/tce.h> |
45 | #include <asm/mmu_context.h> | ||
43 | 46 | ||
44 | static unsigned long kvmppc_tce_pages(unsigned long iommu_pages) | 47 | static unsigned long kvmppc_tce_pages(unsigned long iommu_pages) |
45 | { | 48 | { |
@@ -91,6 +94,137 @@ static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc) | |||
91 | return ret; | 94 | return ret; |
92 | } | 95 | } |
93 | 96 | ||
97 | static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head) | ||
98 | { | ||
99 | struct kvmppc_spapr_tce_iommu_table *stit = container_of(head, | ||
100 | struct kvmppc_spapr_tce_iommu_table, rcu); | ||
101 | |||
102 | iommu_tce_table_put(stit->tbl); | ||
103 | |||
104 | kfree(stit); | ||
105 | } | ||
106 | |||
107 | static void kvm_spapr_tce_liobn_put(struct kref *kref) | ||
108 | { | ||
109 | struct kvmppc_spapr_tce_iommu_table *stit = container_of(kref, | ||
110 | struct kvmppc_spapr_tce_iommu_table, kref); | ||
111 | |||
112 | list_del_rcu(&stit->next); | ||
113 | |||
114 | call_rcu(&stit->rcu, kvm_spapr_tce_iommu_table_free); | ||
115 | } | ||
116 | |||
117 | extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, | ||
118 | struct iommu_group *grp) | ||
119 | { | ||
120 | int i; | ||
121 | struct kvmppc_spapr_tce_table *stt; | ||
122 | struct kvmppc_spapr_tce_iommu_table *stit, *tmp; | ||
123 | struct iommu_table_group *table_group = NULL; | ||
124 | |||
125 | list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) { | ||
126 | |||
127 | table_group = iommu_group_get_iommudata(grp); | ||
128 | if (WARN_ON(!table_group)) | ||
129 | continue; | ||
130 | |||
131 | list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) { | ||
132 | for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { | ||
133 | if (table_group->tables[i] != stit->tbl) | ||
134 | continue; | ||
135 | |||
136 | kref_put(&stit->kref, kvm_spapr_tce_liobn_put); | ||
137 | return; | ||
138 | } | ||
139 | } | ||
140 | } | ||
141 | } | ||
142 | |||
143 | extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, | ||
144 | struct iommu_group *grp) | ||
145 | { | ||
146 | struct kvmppc_spapr_tce_table *stt = NULL; | ||
147 | bool found = false; | ||
148 | struct iommu_table *tbl = NULL; | ||
149 | struct iommu_table_group *table_group; | ||
150 | long i; | ||
151 | struct kvmppc_spapr_tce_iommu_table *stit; | ||
152 | struct fd f; | ||
153 | |||
154 | f = fdget(tablefd); | ||
155 | if (!f.file) | ||
156 | return -EBADF; | ||
157 | |||
158 | list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) { | ||
159 | if (stt == f.file->private_data) { | ||
160 | found = true; | ||
161 | break; | ||
162 | } | ||
163 | } | ||
164 | |||
165 | fdput(f); | ||
166 | |||
167 | if (!found) | ||
168 | return -EINVAL; | ||
169 | |||
170 | table_group = iommu_group_get_iommudata(grp); | ||
171 | if (WARN_ON(!table_group)) | ||
172 | return -EFAULT; | ||
173 | |||
174 | for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { | ||
175 | struct iommu_table *tbltmp = table_group->tables[i]; | ||
176 | |||
177 | if (!tbltmp) | ||
178 | continue; | ||
179 | /* | ||
180 | * Make sure hardware table parameters are exactly the same; | ||
181 | * this is used in the TCE handlers where boundary checks | ||
182 | * use only the first attached table. | ||
183 | */ | ||
184 | if ((tbltmp->it_page_shift == stt->page_shift) && | ||
185 | (tbltmp->it_offset == stt->offset) && | ||
186 | (tbltmp->it_size == stt->size)) { | ||
187 | /* | ||
188 | * Reference the table to avoid races with | ||
189 | * add/remove DMA windows. | ||
190 | */ | ||
191 | tbl = iommu_tce_table_get(tbltmp); | ||
192 | break; | ||
193 | } | ||
194 | } | ||
195 | if (!tbl) | ||
196 | return -EINVAL; | ||
197 | |||
198 | list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { | ||
199 | if (tbl != stit->tbl) | ||
200 | continue; | ||
201 | |||
202 | if (!kref_get_unless_zero(&stit->kref)) { | ||
203 | /* stit is being destroyed */ | ||
204 | iommu_tce_table_put(tbl); | ||
205 | return -ENOTTY; | ||
206 | } | ||
207 | /* | ||
208 | * The table is already known to this KVM, we just increased | ||
209 | * its KVM reference counter and can return. | ||
210 | */ | ||
211 | return 0; | ||
212 | } | ||
213 | |||
214 | stit = kzalloc(sizeof(*stit), GFP_KERNEL); | ||
215 | if (!stit) { | ||
216 | iommu_tce_table_put(tbl); | ||
217 | return -ENOMEM; | ||
218 | } | ||
219 | |||
220 | stit->tbl = tbl; | ||
221 | kref_init(&stit->kref); | ||
222 | |||
223 | list_add_rcu(&stit->next, &stt->iommu_tables); | ||
224 | |||
225 | return 0; | ||
226 | } | ||
227 | |||
94 | static void release_spapr_tce_table(struct rcu_head *head) | 228 | static void release_spapr_tce_table(struct rcu_head *head) |
95 | { | 229 | { |
96 | struct kvmppc_spapr_tce_table *stt = container_of(head, | 230 | struct kvmppc_spapr_tce_table *stt = container_of(head, |
@@ -130,9 +264,18 @@ static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma) | |||
130 | static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) | 264 | static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) |
131 | { | 265 | { |
132 | struct kvmppc_spapr_tce_table *stt = filp->private_data; | 266 | struct kvmppc_spapr_tce_table *stt = filp->private_data; |
267 | struct kvmppc_spapr_tce_iommu_table *stit, *tmp; | ||
133 | 268 | ||
134 | list_del_rcu(&stt->list); | 269 | list_del_rcu(&stt->list); |
135 | 270 | ||
271 | list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) { | ||
272 | WARN_ON(!kref_read(&stit->kref)); | ||
273 | while (1) { | ||
274 | if (kref_put(&stit->kref, kvm_spapr_tce_liobn_put)) | ||
275 | break; | ||
276 | } | ||
277 | } | ||
278 | |||
136 | kvm_put_kvm(stt->kvm); | 279 | kvm_put_kvm(stt->kvm); |
137 | 280 | ||
138 | kvmppc_account_memlimit( | 281 | kvmppc_account_memlimit( |
@@ -164,7 +307,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | |||
164 | return -EBUSY; | 307 | return -EBUSY; |
165 | } | 308 | } |
166 | 309 | ||
167 | size = args->size; | 310 | size = _ALIGN_UP(args->size, PAGE_SIZE >> 3); |
168 | npages = kvmppc_tce_pages(size); | 311 | npages = kvmppc_tce_pages(size); |
169 | ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); | 312 | ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); |
170 | if (ret) { | 313 | if (ret) { |
@@ -183,6 +326,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | |||
183 | stt->offset = args->offset; | 326 | stt->offset = args->offset; |
184 | stt->size = size; | 327 | stt->size = size; |
185 | stt->kvm = kvm; | 328 | stt->kvm = kvm; |
329 | INIT_LIST_HEAD_RCU(&stt->iommu_tables); | ||
186 | 330 | ||
187 | for (i = 0; i < npages; i++) { | 331 | for (i = 0; i < npages; i++) { |
188 | stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); | 332 | stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); |
@@ -211,15 +355,106 @@ fail: | |||
211 | return ret; | 355 | return ret; |
212 | } | 356 | } |
213 | 357 | ||
358 | static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry) | ||
359 | { | ||
360 | unsigned long hpa = 0; | ||
361 | enum dma_data_direction dir = DMA_NONE; | ||
362 | |||
363 | iommu_tce_xchg(tbl, entry, &hpa, &dir); | ||
364 | } | ||
365 | |||
366 | static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm, | ||
367 | struct iommu_table *tbl, unsigned long entry) | ||
368 | { | ||
369 | struct mm_iommu_table_group_mem_t *mem = NULL; | ||
370 | const unsigned long pgsize = 1ULL << tbl->it_page_shift; | ||
371 | unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); | ||
372 | |||
373 | if (!pua) | ||
374 | /* it_userspace allocation might be delayed */ | ||
375 | return H_TOO_HARD; | ||
376 | |||
377 | mem = mm_iommu_lookup(kvm->mm, *pua, pgsize); | ||
378 | if (!mem) | ||
379 | return H_TOO_HARD; | ||
380 | |||
381 | mm_iommu_mapped_dec(mem); | ||
382 | |||
383 | *pua = 0; | ||
384 | |||
385 | return H_SUCCESS; | ||
386 | } | ||
387 | |||
388 | static long kvmppc_tce_iommu_unmap(struct kvm *kvm, | ||
389 | struct iommu_table *tbl, unsigned long entry) | ||
390 | { | ||
391 | enum dma_data_direction dir = DMA_NONE; | ||
392 | unsigned long hpa = 0; | ||
393 | long ret; | ||
394 | |||
395 | if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir))) | ||
396 | return H_HARDWARE; | ||
397 | |||
398 | if (dir == DMA_NONE) | ||
399 | return H_SUCCESS; | ||
400 | |||
401 | ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry); | ||
402 | if (ret != H_SUCCESS) | ||
403 | iommu_tce_xchg(tbl, entry, &hpa, &dir); | ||
404 | |||
405 | return ret; | ||
406 | } | ||
407 | |||
408 | long kvmppc_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, | ||
409 | unsigned long entry, unsigned long ua, | ||
410 | enum dma_data_direction dir) | ||
411 | { | ||
412 | long ret; | ||
413 | unsigned long hpa, *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); | ||
414 | struct mm_iommu_table_group_mem_t *mem; | ||
415 | |||
416 | if (!pua) | ||
417 | /* it_userspace allocation might be delayed */ | ||
418 | return H_TOO_HARD; | ||
419 | |||
420 | mem = mm_iommu_lookup(kvm->mm, ua, 1ULL << tbl->it_page_shift); | ||
421 | if (!mem) | ||
422 | /* This only handles v2 IOMMU type, v1 is handled via ioctl() */ | ||
423 | return H_TOO_HARD; | ||
424 | |||
425 | if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, &hpa))) | ||
426 | return H_HARDWARE; | ||
427 | |||
428 | if (mm_iommu_mapped_inc(mem)) | ||
429 | return H_CLOSED; | ||
430 | |||
431 | ret = iommu_tce_xchg(tbl, entry, &hpa, &dir); | ||
432 | if (WARN_ON_ONCE(ret)) { | ||
433 | mm_iommu_mapped_dec(mem); | ||
434 | return H_HARDWARE; | ||
435 | } | ||
436 | |||
437 | if (dir != DMA_NONE) | ||
438 | kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry); | ||
439 | |||
440 | *pua = ua; | ||
441 | |||
442 | return 0; | ||
443 | } | ||
444 | |||
214 | long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | 445 | long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, |
215 | unsigned long ioba, unsigned long tce) | 446 | unsigned long ioba, unsigned long tce) |
216 | { | 447 | { |
217 | struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn); | 448 | struct kvmppc_spapr_tce_table *stt; |
218 | long ret; | 449 | long ret, idx; |
450 | struct kvmppc_spapr_tce_iommu_table *stit; | ||
451 | unsigned long entry, ua = 0; | ||
452 | enum dma_data_direction dir; | ||
219 | 453 | ||
220 | /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ | 454 | /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ |
221 | /* liobn, ioba, tce); */ | 455 | /* liobn, ioba, tce); */ |
222 | 456 | ||
457 | stt = kvmppc_find_table(vcpu->kvm, liobn); | ||
223 | if (!stt) | 458 | if (!stt) |
224 | return H_TOO_HARD; | 459 | return H_TOO_HARD; |
225 | 460 | ||
@@ -231,7 +466,35 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | |||
231 | if (ret != H_SUCCESS) | 466 | if (ret != H_SUCCESS) |
232 | return ret; | 467 | return ret; |
233 | 468 | ||
234 | kvmppc_tce_put(stt, ioba >> stt->page_shift, tce); | 469 | dir = iommu_tce_direction(tce); |
470 | if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm, | ||
471 | tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL)) | ||
472 | return H_PARAMETER; | ||
473 | |||
474 | entry = ioba >> stt->page_shift; | ||
475 | |||
476 | list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { | ||
477 | if (dir == DMA_NONE) { | ||
478 | ret = kvmppc_tce_iommu_unmap(vcpu->kvm, | ||
479 | stit->tbl, entry); | ||
480 | } else { | ||
481 | idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
482 | ret = kvmppc_tce_iommu_map(vcpu->kvm, stit->tbl, | ||
483 | entry, ua, dir); | ||
484 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | ||
485 | } | ||
486 | |||
487 | if (ret == H_SUCCESS) | ||
488 | continue; | ||
489 | |||
490 | if (ret == H_TOO_HARD) | ||
491 | return ret; | ||
492 | |||
493 | WARN_ON_ONCE(1); | ||
494 | kvmppc_clear_tce(stit->tbl, entry); | ||
495 | } | ||
496 | |||
497 | kvmppc_tce_put(stt, entry, tce); | ||
235 | 498 | ||
236 | return H_SUCCESS; | 499 | return H_SUCCESS; |
237 | } | 500 | } |
@@ -246,8 +509,9 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, | |||
246 | unsigned long entry, ua = 0; | 509 | unsigned long entry, ua = 0; |
247 | u64 __user *tces; | 510 | u64 __user *tces; |
248 | u64 tce; | 511 | u64 tce; |
512 | struct kvmppc_spapr_tce_iommu_table *stit; | ||
249 | 513 | ||
250 | stt = kvmppc_find_table(vcpu, liobn); | 514 | stt = kvmppc_find_table(vcpu->kvm, liobn); |
251 | if (!stt) | 515 | if (!stt) |
252 | return H_TOO_HARD; | 516 | return H_TOO_HARD; |
253 | 517 | ||
@@ -284,6 +548,26 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, | |||
284 | if (ret != H_SUCCESS) | 548 | if (ret != H_SUCCESS) |
285 | goto unlock_exit; | 549 | goto unlock_exit; |
286 | 550 | ||
551 | if (kvmppc_gpa_to_ua(vcpu->kvm, | ||
552 | tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), | ||
553 | &ua, NULL)) | ||
554 | return H_PARAMETER; | ||
555 | |||
556 | list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { | ||
557 | ret = kvmppc_tce_iommu_map(vcpu->kvm, | ||
558 | stit->tbl, entry + i, ua, | ||
559 | iommu_tce_direction(tce)); | ||
560 | |||
561 | if (ret == H_SUCCESS) | ||
562 | continue; | ||
563 | |||
564 | if (ret == H_TOO_HARD) | ||
565 | goto unlock_exit; | ||
566 | |||
567 | WARN_ON_ONCE(1); | ||
568 | kvmppc_clear_tce(stit->tbl, entry); | ||
569 | } | ||
570 | |||
287 | kvmppc_tce_put(stt, entry + i, tce); | 571 | kvmppc_tce_put(stt, entry + i, tce); |
288 | } | 572 | } |
289 | 573 | ||
@@ -300,8 +584,9 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, | |||
300 | { | 584 | { |
301 | struct kvmppc_spapr_tce_table *stt; | 585 | struct kvmppc_spapr_tce_table *stt; |
302 | long i, ret; | 586 | long i, ret; |
587 | struct kvmppc_spapr_tce_iommu_table *stit; | ||
303 | 588 | ||
304 | stt = kvmppc_find_table(vcpu, liobn); | 589 | stt = kvmppc_find_table(vcpu->kvm, liobn); |
305 | if (!stt) | 590 | if (!stt) |
306 | return H_TOO_HARD; | 591 | return H_TOO_HARD; |
307 | 592 | ||
@@ -313,6 +598,24 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, | |||
313 | if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ)) | 598 | if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ)) |
314 | return H_PARAMETER; | 599 | return H_PARAMETER; |
315 | 600 | ||
601 | list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { | ||
602 | unsigned long entry = ioba >> stit->tbl->it_page_shift; | ||
603 | |||
604 | for (i = 0; i < npages; ++i) { | ||
605 | ret = kvmppc_tce_iommu_unmap(vcpu->kvm, | ||
606 | stit->tbl, entry + i); | ||
607 | |||
608 | if (ret == H_SUCCESS) | ||
609 | continue; | ||
610 | |||
611 | if (ret == H_TOO_HARD) | ||
612 | return ret; | ||
613 | |||
614 | WARN_ON_ONCE(1); | ||
615 | kvmppc_clear_tce(stit->tbl, entry); | ||
616 | } | ||
617 | } | ||
618 | |||
316 | for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) | 619 | for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) |
317 | kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); | 620 | kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); |
318 | 621 | ||
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index e4c4ea973e57..eda0a8f6fae8 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c | |||
@@ -40,6 +40,31 @@ | |||
40 | #include <asm/iommu.h> | 40 | #include <asm/iommu.h> |
41 | #include <asm/tce.h> | 41 | #include <asm/tce.h> |
42 | 42 | ||
43 | #ifdef CONFIG_BUG | ||
44 | |||
45 | #define WARN_ON_ONCE_RM(condition) ({ \ | ||
46 | static bool __section(.data.unlikely) __warned; \ | ||
47 | int __ret_warn_once = !!(condition); \ | ||
48 | \ | ||
49 | if (unlikely(__ret_warn_once && !__warned)) { \ | ||
50 | __warned = true; \ | ||
51 | pr_err("WARN_ON_ONCE_RM: (%s) at %s:%u\n", \ | ||
52 | __stringify(condition), \ | ||
53 | __func__, __LINE__); \ | ||
54 | dump_stack(); \ | ||
55 | } \ | ||
56 | unlikely(__ret_warn_once); \ | ||
57 | }) | ||
58 | |||
59 | #else | ||
60 | |||
61 | #define WARN_ON_ONCE_RM(condition) ({ \ | ||
62 | int __ret_warn_on = !!(condition); \ | ||
63 | unlikely(__ret_warn_on); \ | ||
64 | }) | ||
65 | |||
66 | #endif | ||
67 | |||
43 | #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) | 68 | #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) |
44 | 69 | ||
45 | /* | 70 | /* |
@@ -48,10 +73,9 @@ | |||
48 | * WARNING: This will be called in real or virtual mode on HV KVM and virtual | 73 | * WARNING: This will be called in real or virtual mode on HV KVM and virtual |
49 | * mode on PR KVM | 74 | * mode on PR KVM |
50 | */ | 75 | */ |
51 | struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu, | 76 | struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm, |
52 | unsigned long liobn) | 77 | unsigned long liobn) |
53 | { | 78 | { |
54 | struct kvm *kvm = vcpu->kvm; | ||
55 | struct kvmppc_spapr_tce_table *stt; | 79 | struct kvmppc_spapr_tce_table *stt; |
56 | 80 | ||
57 | list_for_each_entry_lockless(stt, &kvm->arch.spapr_tce_tables, list) | 81 | list_for_each_entry_lockless(stt, &kvm->arch.spapr_tce_tables, list) |
@@ -63,27 +87,6 @@ struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu, | |||
63 | EXPORT_SYMBOL_GPL(kvmppc_find_table); | 87 | EXPORT_SYMBOL_GPL(kvmppc_find_table); |
64 | 88 | ||
65 | /* | 89 | /* |
66 | * Validates IO address. | ||
67 | * | ||
68 | * WARNING: This will be called in real-mode on HV KVM and virtual | ||
69 | * mode on PR KVM | ||
70 | */ | ||
71 | long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt, | ||
72 | unsigned long ioba, unsigned long npages) | ||
73 | { | ||
74 | unsigned long mask = (1ULL << stt->page_shift) - 1; | ||
75 | unsigned long idx = ioba >> stt->page_shift; | ||
76 | |||
77 | if ((ioba & mask) || (idx < stt->offset) || | ||
78 | (idx - stt->offset + npages > stt->size) || | ||
79 | (idx + npages < idx)) | ||
80 | return H_PARAMETER; | ||
81 | |||
82 | return H_SUCCESS; | ||
83 | } | ||
84 | EXPORT_SYMBOL_GPL(kvmppc_ioba_validate); | ||
85 | |||
86 | /* | ||
87 | * Validates TCE address. | 90 | * Validates TCE address. |
88 | * At the moment flags and page mask are validated. | 91 | * At the moment flags and page mask are validated. |
89 | * As the host kernel does not access those addresses (just puts them | 92 | * As the host kernel does not access those addresses (just puts them |
@@ -96,10 +99,14 @@ EXPORT_SYMBOL_GPL(kvmppc_ioba_validate); | |||
96 | */ | 99 | */ |
97 | long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce) | 100 | long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce) |
98 | { | 101 | { |
99 | unsigned long page_mask = ~((1ULL << stt->page_shift) - 1); | 102 | unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE); |
100 | unsigned long mask = ~(page_mask | TCE_PCI_WRITE | TCE_PCI_READ); | 103 | enum dma_data_direction dir = iommu_tce_direction(tce); |
104 | |||
105 | /* Allow userspace to poison TCE table */ | ||
106 | if (dir == DMA_NONE) | ||
107 | return H_SUCCESS; | ||
101 | 108 | ||
102 | if (tce & mask) | 109 | if (iommu_tce_check_gpa(stt->page_shift, gpa)) |
103 | return H_PARAMETER; | 110 | return H_PARAMETER; |
104 | 111 | ||
105 | return H_SUCCESS; | 112 | return H_SUCCESS; |
@@ -179,15 +186,122 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa, | |||
179 | EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua); | 186 | EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua); |
180 | 187 | ||
181 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE | 188 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE |
189 | static void kvmppc_rm_clear_tce(struct iommu_table *tbl, unsigned long entry) | ||
190 | { | ||
191 | unsigned long hpa = 0; | ||
192 | enum dma_data_direction dir = DMA_NONE; | ||
193 | |||
194 | iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); | ||
195 | } | ||
196 | |||
197 | static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm, | ||
198 | struct iommu_table *tbl, unsigned long entry) | ||
199 | { | ||
200 | struct mm_iommu_table_group_mem_t *mem = NULL; | ||
201 | const unsigned long pgsize = 1ULL << tbl->it_page_shift; | ||
202 | unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); | ||
203 | |||
204 | if (!pua) | ||
205 | /* it_userspace allocation might be delayed */ | ||
206 | return H_TOO_HARD; | ||
207 | |||
208 | pua = (void *) vmalloc_to_phys(pua); | ||
209 | if (WARN_ON_ONCE_RM(!pua)) | ||
210 | return H_HARDWARE; | ||
211 | |||
212 | mem = mm_iommu_lookup_rm(kvm->mm, *pua, pgsize); | ||
213 | if (!mem) | ||
214 | return H_TOO_HARD; | ||
215 | |||
216 | mm_iommu_mapped_dec(mem); | ||
217 | |||
218 | *pua = 0; | ||
219 | |||
220 | return H_SUCCESS; | ||
221 | } | ||
222 | |||
223 | static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm, | ||
224 | struct iommu_table *tbl, unsigned long entry) | ||
225 | { | ||
226 | enum dma_data_direction dir = DMA_NONE; | ||
227 | unsigned long hpa = 0; | ||
228 | long ret; | ||
229 | |||
230 | if (iommu_tce_xchg_rm(tbl, entry, &hpa, &dir)) | ||
231 | /* | ||
232 | * real mode xchg can fail if struct page crosses | ||
233 | * a page boundary | ||
234 | */ | ||
235 | return H_TOO_HARD; | ||
236 | |||
237 | if (dir == DMA_NONE) | ||
238 | return H_SUCCESS; | ||
239 | |||
240 | ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry); | ||
241 | if (ret) | ||
242 | iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); | ||
243 | |||
244 | return ret; | ||
245 | } | ||
246 | |||
247 | static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, | ||
248 | unsigned long entry, unsigned long ua, | ||
249 | enum dma_data_direction dir) | ||
250 | { | ||
251 | long ret; | ||
252 | unsigned long hpa = 0; | ||
253 | unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); | ||
254 | struct mm_iommu_table_group_mem_t *mem; | ||
255 | |||
256 | if (!pua) | ||
257 | /* it_userspace allocation might be delayed */ | ||
258 | return H_TOO_HARD; | ||
259 | |||
260 | mem = mm_iommu_lookup_rm(kvm->mm, ua, 1ULL << tbl->it_page_shift); | ||
261 | if (!mem) | ||
262 | return H_TOO_HARD; | ||
263 | |||
264 | if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, &hpa))) | ||
265 | return H_HARDWARE; | ||
266 | |||
267 | pua = (void *) vmalloc_to_phys(pua); | ||
268 | if (WARN_ON_ONCE_RM(!pua)) | ||
269 | return H_HARDWARE; | ||
270 | |||
271 | if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem))) | ||
272 | return H_CLOSED; | ||
273 | |||
274 | ret = iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); | ||
275 | if (ret) { | ||
276 | mm_iommu_mapped_dec(mem); | ||
277 | /* | ||
278 | * real mode xchg can fail if struct page crosses | ||
279 | * a page boundary | ||
280 | */ | ||
281 | return H_TOO_HARD; | ||
282 | } | ||
283 | |||
284 | if (dir != DMA_NONE) | ||
285 | kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry); | ||
286 | |||
287 | *pua = ua; | ||
288 | |||
289 | return 0; | ||
290 | } | ||
291 | |||
182 | long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | 292 | long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, |
183 | unsigned long ioba, unsigned long tce) | 293 | unsigned long ioba, unsigned long tce) |
184 | { | 294 | { |
185 | struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn); | 295 | struct kvmppc_spapr_tce_table *stt; |
186 | long ret; | 296 | long ret; |
297 | struct kvmppc_spapr_tce_iommu_table *stit; | ||
298 | unsigned long entry, ua = 0; | ||
299 | enum dma_data_direction dir; | ||
187 | 300 | ||
188 | /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ | 301 | /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ |
189 | /* liobn, ioba, tce); */ | 302 | /* liobn, ioba, tce); */ |
190 | 303 | ||
304 | stt = kvmppc_find_table(vcpu->kvm, liobn); | ||
191 | if (!stt) | 305 | if (!stt) |
192 | return H_TOO_HARD; | 306 | return H_TOO_HARD; |
193 | 307 | ||
@@ -199,7 +313,32 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | |||
199 | if (ret != H_SUCCESS) | 313 | if (ret != H_SUCCESS) |
200 | return ret; | 314 | return ret; |
201 | 315 | ||
202 | kvmppc_tce_put(stt, ioba >> stt->page_shift, tce); | 316 | dir = iommu_tce_direction(tce); |
317 | if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm, | ||
318 | tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL)) | ||
319 | return H_PARAMETER; | ||
320 | |||
321 | entry = ioba >> stt->page_shift; | ||
322 | |||
323 | list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { | ||
324 | if (dir == DMA_NONE) | ||
325 | ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm, | ||
326 | stit->tbl, entry); | ||
327 | else | ||
328 | ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, | ||
329 | stit->tbl, entry, ua, dir); | ||
330 | |||
331 | if (ret == H_SUCCESS) | ||
332 | continue; | ||
333 | |||
334 | if (ret == H_TOO_HARD) | ||
335 | return ret; | ||
336 | |||
337 | WARN_ON_ONCE_RM(1); | ||
338 | kvmppc_rm_clear_tce(stit->tbl, entry); | ||
339 | } | ||
340 | |||
341 | kvmppc_tce_put(stt, entry, tce); | ||
203 | 342 | ||
204 | return H_SUCCESS; | 343 | return H_SUCCESS; |
205 | } | 344 | } |
@@ -239,8 +378,10 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, | |||
239 | long i, ret = H_SUCCESS; | 378 | long i, ret = H_SUCCESS; |
240 | unsigned long tces, entry, ua = 0; | 379 | unsigned long tces, entry, ua = 0; |
241 | unsigned long *rmap = NULL; | 380 | unsigned long *rmap = NULL; |
381 | bool prereg = false; | ||
382 | struct kvmppc_spapr_tce_iommu_table *stit; | ||
242 | 383 | ||
243 | stt = kvmppc_find_table(vcpu, liobn); | 384 | stt = kvmppc_find_table(vcpu->kvm, liobn); |
244 | if (!stt) | 385 | if (!stt) |
245 | return H_TOO_HARD; | 386 | return H_TOO_HARD; |
246 | 387 | ||
@@ -259,23 +400,49 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, | |||
259 | if (ret != H_SUCCESS) | 400 | if (ret != H_SUCCESS) |
260 | return ret; | 401 | return ret; |
261 | 402 | ||
262 | if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap)) | 403 | if (mm_iommu_preregistered(vcpu->kvm->mm)) { |
263 | return H_TOO_HARD; | 404 | /* |
405 | * We get here if guest memory was pre-registered which | ||
406 | * is normally VFIO case and gpa->hpa translation does not | ||
407 | * depend on hpt. | ||
408 | */ | ||
409 | struct mm_iommu_table_group_mem_t *mem; | ||
264 | 410 | ||
265 | rmap = (void *) vmalloc_to_phys(rmap); | 411 | if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) |
412 | return H_TOO_HARD; | ||
266 | 413 | ||
267 | /* | 414 | mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K); |
268 | * Synchronize with the MMU notifier callbacks in | 415 | if (mem) |
269 | * book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.). | 416 | prereg = mm_iommu_ua_to_hpa_rm(mem, ua, &tces) == 0; |
270 | * While we have the rmap lock, code running on other CPUs | 417 | } |
271 | * cannot finish unmapping the host real page that backs | 418 | |
272 | * this guest real page, so we are OK to access the host | 419 | if (!prereg) { |
273 | * real page. | 420 | /* |
274 | */ | 421 | * This is usually a case of a guest with emulated devices only |
275 | lock_rmap(rmap); | 422 | * when TCE list is not in preregistered memory. |
276 | if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) { | 423 | * We do not require memory to be preregistered in this case |
277 | ret = H_TOO_HARD; | 424 | * so lock rmap and do __find_linux_pte_or_hugepte(). |
278 | goto unlock_exit; | 425 | */ |
426 | if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap)) | ||
427 | return H_TOO_HARD; | ||
428 | |||
429 | rmap = (void *) vmalloc_to_phys(rmap); | ||
430 | if (WARN_ON_ONCE_RM(!rmap)) | ||
431 | return H_HARDWARE; | ||
432 | |||
433 | /* | ||
434 | * Synchronize with the MMU notifier callbacks in | ||
435 | * book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.). | ||
436 | * While we have the rmap lock, code running on other CPUs | ||
437 | * cannot finish unmapping the host real page that backs | ||
438 | * this guest real page, so we are OK to access the host | ||
439 | * real page. | ||
440 | */ | ||
441 | lock_rmap(rmap); | ||
442 | if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) { | ||
443 | ret = H_TOO_HARD; | ||
444 | goto unlock_exit; | ||
445 | } | ||
279 | } | 446 | } |
280 | 447 | ||
281 | for (i = 0; i < npages; ++i) { | 448 | for (i = 0; i < npages; ++i) { |
@@ -285,11 +452,33 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, | |||
285 | if (ret != H_SUCCESS) | 452 | if (ret != H_SUCCESS) |
286 | goto unlock_exit; | 453 | goto unlock_exit; |
287 | 454 | ||
455 | ua = 0; | ||
456 | if (kvmppc_gpa_to_ua(vcpu->kvm, | ||
457 | tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), | ||
458 | &ua, NULL)) | ||
459 | return H_PARAMETER; | ||
460 | |||
461 | list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { | ||
462 | ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, | ||
463 | stit->tbl, entry + i, ua, | ||
464 | iommu_tce_direction(tce)); | ||
465 | |||
466 | if (ret == H_SUCCESS) | ||
467 | continue; | ||
468 | |||
469 | if (ret == H_TOO_HARD) | ||
470 | goto unlock_exit; | ||
471 | |||
472 | WARN_ON_ONCE_RM(1); | ||
473 | kvmppc_rm_clear_tce(stit->tbl, entry); | ||
474 | } | ||
475 | |||
288 | kvmppc_tce_put(stt, entry + i, tce); | 476 | kvmppc_tce_put(stt, entry + i, tce); |
289 | } | 477 | } |
290 | 478 | ||
291 | unlock_exit: | 479 | unlock_exit: |
292 | unlock_rmap(rmap); | 480 | if (rmap) |
481 | unlock_rmap(rmap); | ||
293 | 482 | ||
294 | return ret; | 483 | return ret; |
295 | } | 484 | } |
@@ -300,8 +489,9 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, | |||
300 | { | 489 | { |
301 | struct kvmppc_spapr_tce_table *stt; | 490 | struct kvmppc_spapr_tce_table *stt; |
302 | long i, ret; | 491 | long i, ret; |
492 | struct kvmppc_spapr_tce_iommu_table *stit; | ||
303 | 493 | ||
304 | stt = kvmppc_find_table(vcpu, liobn); | 494 | stt = kvmppc_find_table(vcpu->kvm, liobn); |
305 | if (!stt) | 495 | if (!stt) |
306 | return H_TOO_HARD; | 496 | return H_TOO_HARD; |
307 | 497 | ||
@@ -313,6 +503,24 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, | |||
313 | if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ)) | 503 | if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ)) |
314 | return H_PARAMETER; | 504 | return H_PARAMETER; |
315 | 505 | ||
506 | list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { | ||
507 | unsigned long entry = ioba >> stit->tbl->it_page_shift; | ||
508 | |||
509 | for (i = 0; i < npages; ++i) { | ||
510 | ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm, | ||
511 | stit->tbl, entry + i); | ||
512 | |||
513 | if (ret == H_SUCCESS) | ||
514 | continue; | ||
515 | |||
516 | if (ret == H_TOO_HARD) | ||
517 | return ret; | ||
518 | |||
519 | WARN_ON_ONCE_RM(1); | ||
520 | kvmppc_rm_clear_tce(stit->tbl, entry); | ||
521 | } | ||
522 | } | ||
523 | |||
316 | for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) | 524 | for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) |
317 | kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); | 525 | kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); |
318 | 526 | ||
@@ -322,12 +530,13 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, | |||
322 | long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | 530 | long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, |
323 | unsigned long ioba) | 531 | unsigned long ioba) |
324 | { | 532 | { |
325 | struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn); | 533 | struct kvmppc_spapr_tce_table *stt; |
326 | long ret; | 534 | long ret; |
327 | unsigned long idx; | 535 | unsigned long idx; |
328 | struct page *page; | 536 | struct page *page; |
329 | u64 *tbl; | 537 | u64 *tbl; |
330 | 538 | ||
539 | stt = kvmppc_find_table(vcpu->kvm, liobn); | ||
331 | if (!stt) | 540 | if (!stt) |
332 | return H_TOO_HARD; | 541 | return H_TOO_HARD; |
333 | 542 | ||
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 8359752b3efc..68d68983948e 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c | |||
@@ -503,10 +503,18 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) | |||
503 | break; | 503 | break; |
504 | unprivileged: | 504 | unprivileged: |
505 | default: | 505 | default: |
506 | printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn); | 506 | pr_info_ratelimited("KVM: invalid SPR write: %d\n", sprn); |
507 | #ifndef DEBUG_SPR | 507 | if (sprn & 0x10) { |
508 | emulated = EMULATE_FAIL; | 508 | if (kvmppc_get_msr(vcpu) & MSR_PR) { |
509 | #endif | 509 | kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV); |
510 | emulated = EMULATE_AGAIN; | ||
511 | } | ||
512 | } else { | ||
513 | if ((kvmppc_get_msr(vcpu) & MSR_PR) || sprn == 0) { | ||
514 | kvmppc_core_queue_program(vcpu, SRR1_PROGILL); | ||
515 | emulated = EMULATE_AGAIN; | ||
516 | } | ||
517 | } | ||
510 | break; | 518 | break; |
511 | } | 519 | } |
512 | 520 | ||
@@ -648,10 +656,20 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val | |||
648 | break; | 656 | break; |
649 | default: | 657 | default: |
650 | unprivileged: | 658 | unprivileged: |
651 | printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn); | 659 | pr_info_ratelimited("KVM: invalid SPR read: %d\n", sprn); |
652 | #ifndef DEBUG_SPR | 660 | if (sprn & 0x10) { |
653 | emulated = EMULATE_FAIL; | 661 | if (kvmppc_get_msr(vcpu) & MSR_PR) { |
654 | #endif | 662 | kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV); |
663 | emulated = EMULATE_AGAIN; | ||
664 | } | ||
665 | } else { | ||
666 | if ((kvmppc_get_msr(vcpu) & MSR_PR) || sprn == 0 || | ||
667 | sprn == 4 || sprn == 5 || sprn == 6) { | ||
668 | kvmppc_core_queue_program(vcpu, SRR1_PROGILL); | ||
669 | emulated = EMULATE_AGAIN; | ||
670 | } | ||
671 | } | ||
672 | |||
655 | break; | 673 | break; |
656 | } | 674 | } |
657 | 675 | ||
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 128efb42ec4e..42b7a4fd57d9 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
@@ -3648,11 +3648,9 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) | |||
3648 | return -EIO; | 3648 | return -EIO; |
3649 | 3649 | ||
3650 | mutex_lock(&kvm->lock); | 3650 | mutex_lock(&kvm->lock); |
3651 | if (!kvm->arch.pimap) | ||
3652 | goto unlock; | ||
3651 | 3653 | ||
3652 | if (kvm->arch.pimap == NULL) { | ||
3653 | mutex_unlock(&kvm->lock); | ||
3654 | return 0; | ||
3655 | } | ||
3656 | pimap = kvm->arch.pimap; | 3654 | pimap = kvm->arch.pimap; |
3657 | 3655 | ||
3658 | for (i = 0; i < pimap->n_mapped; i++) { | 3656 | for (i = 0; i < pimap->n_mapped; i++) { |
@@ -3677,7 +3675,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) | |||
3677 | * We don't free this structure even when the count goes to | 3675 | * We don't free this structure even when the count goes to |
3678 | * zero. The structure is freed when we destroy the VM. | 3676 | * zero. The structure is freed when we destroy the VM. |
3679 | */ | 3677 | */ |
3680 | 3678 | unlock: | |
3681 | mutex_unlock(&kvm->lock); | 3679 | mutex_unlock(&kvm->lock); |
3682 | return rc; | 3680 | return rc; |
3683 | } | 3681 | } |
@@ -3957,7 +3955,7 @@ static int kvmppc_book3s_init_hv(void) | |||
3957 | * indirectly, via OPAL. | 3955 | * indirectly, via OPAL. |
3958 | */ | 3956 | */ |
3959 | #ifdef CONFIG_SMP | 3957 | #ifdef CONFIG_SMP |
3960 | if (!xive_enabled() && !get_paca()->kvm_hstate.xics_phys) { | 3958 | if (!xive_enabled() && !local_paca->kvm_hstate.xics_phys) { |
3961 | struct device_node *np; | 3959 | struct device_node *np; |
3962 | 3960 | ||
3963 | np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc"); | 3961 | np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc"); |
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index d4dfc0ca2a44..f026b062c0ed 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c | |||
@@ -537,8 +537,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
537 | int r = RESUME_GUEST; | 537 | int r = RESUME_GUEST; |
538 | int relocated; | 538 | int relocated; |
539 | int page_found = 0; | 539 | int page_found = 0; |
540 | struct kvmppc_pte pte; | 540 | struct kvmppc_pte pte = { 0 }; |
541 | bool is_mmio = false; | ||
542 | bool dr = (kvmppc_get_msr(vcpu) & MSR_DR) ? true : false; | 541 | bool dr = (kvmppc_get_msr(vcpu) & MSR_DR) ? true : false; |
543 | bool ir = (kvmppc_get_msr(vcpu) & MSR_IR) ? true : false; | 542 | bool ir = (kvmppc_get_msr(vcpu) & MSR_IR) ? true : false; |
544 | u64 vsid; | 543 | u64 vsid; |
@@ -616,8 +615,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
616 | /* Page not found in guest SLB */ | 615 | /* Page not found in guest SLB */ |
617 | kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu)); | 616 | kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu)); |
618 | kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); | 617 | kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); |
619 | } else if (!is_mmio && | 618 | } else if (kvmppc_visible_gpa(vcpu, pte.raddr)) { |
620 | kvmppc_visible_gpa(vcpu, pte.raddr)) { | ||
621 | if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) { | 619 | if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) { |
622 | /* | 620 | /* |
623 | * There is already a host HPTE there, presumably | 621 | * There is already a host HPTE there, presumably |
@@ -627,7 +625,11 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
627 | kvmppc_mmu_unmap_page(vcpu, &pte); | 625 | kvmppc_mmu_unmap_page(vcpu, &pte); |
628 | } | 626 | } |
629 | /* The guest's PTE is not mapped yet. Map on the host */ | 627 | /* The guest's PTE is not mapped yet. Map on the host */ |
630 | kvmppc_mmu_map_page(vcpu, &pte, iswrite); | 628 | if (kvmppc_mmu_map_page(vcpu, &pte, iswrite) == -EIO) { |
629 | /* Exit KVM if mapping failed */ | ||
630 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
631 | return RESUME_HOST; | ||
632 | } | ||
631 | if (data) | 633 | if (data) |
632 | vcpu->stat.sp_storage++; | 634 | vcpu->stat.sp_storage++; |
633 | else if (vcpu->arch.mmu.is_dcbz32(vcpu) && | 635 | else if (vcpu->arch.mmu.is_dcbz32(vcpu) && |
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 0514cbd4e533..3c296c2eacf8 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c | |||
@@ -300,6 +300,11 @@ void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags) | |||
300 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM); | 300 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM); |
301 | } | 301 | } |
302 | 302 | ||
303 | void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu) | ||
304 | { | ||
305 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL); | ||
306 | } | ||
307 | |||
303 | void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) | 308 | void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) |
304 | { | 309 | { |
305 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER); | 310 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER); |
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 0fda4230f6c0..77fd043b3ecc 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c | |||
@@ -797,9 +797,8 @@ int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500) | |||
797 | host_tlb_params[0].sets = | 797 | host_tlb_params[0].sets = |
798 | host_tlb_params[0].entries / host_tlb_params[0].ways; | 798 | host_tlb_params[0].entries / host_tlb_params[0].ways; |
799 | host_tlb_params[1].sets = 1; | 799 | host_tlb_params[1].sets = 1; |
800 | 800 | vcpu_e500->h2g_tlb1_rmap = kcalloc(host_tlb_params[1].entries, | |
801 | vcpu_e500->h2g_tlb1_rmap = kzalloc(sizeof(unsigned int) * | 801 | sizeof(*vcpu_e500->h2g_tlb1_rmap), |
802 | host_tlb_params[1].entries, | ||
803 | GFP_KERNEL); | 802 | GFP_KERNEL); |
804 | if (!vcpu_e500->h2g_tlb1_rmap) | 803 | if (!vcpu_e500->h2g_tlb1_rmap) |
805 | return -EINVAL; | 804 | return -EINVAL; |
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index b379146de55b..c873ffe55362 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c | |||
@@ -259,10 +259,18 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
259 | 259 | ||
260 | case OP_31_XOP_MFSPR: | 260 | case OP_31_XOP_MFSPR: |
261 | emulated = kvmppc_emulate_mfspr(vcpu, sprn, rt); | 261 | emulated = kvmppc_emulate_mfspr(vcpu, sprn, rt); |
262 | if (emulated == EMULATE_AGAIN) { | ||
263 | emulated = EMULATE_DONE; | ||
264 | advance = 0; | ||
265 | } | ||
262 | break; | 266 | break; |
263 | 267 | ||
264 | case OP_31_XOP_MTSPR: | 268 | case OP_31_XOP_MTSPR: |
265 | emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs); | 269 | emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs); |
270 | if (emulated == EMULATE_AGAIN) { | ||
271 | emulated = EMULATE_DONE; | ||
272 | advance = 0; | ||
273 | } | ||
266 | break; | 274 | break; |
267 | 275 | ||
268 | case OP_31_XOP_TLBSYNC: | 276 | case OP_31_XOP_TLBSYNC: |
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 6d3c0ee1d744..af833531af31 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c | |||
@@ -34,18 +34,38 @@ | |||
34 | #include "timing.h" | 34 | #include "timing.h" |
35 | #include "trace.h" | 35 | #include "trace.h" |
36 | 36 | ||
37 | /* XXX to do: | 37 | #ifdef CONFIG_PPC_FPU |
38 | * lhax | 38 | static bool kvmppc_check_fp_disabled(struct kvm_vcpu *vcpu) |
39 | * lhaux | 39 | { |
40 | * lswx | 40 | if (!(kvmppc_get_msr(vcpu) & MSR_FP)) { |
41 | * lswi | 41 | kvmppc_core_queue_fpunavail(vcpu); |
42 | * stswx | 42 | return true; |
43 | * stswi | 43 | } |
44 | * lha | 44 | |
45 | * lhau | 45 | return false; |
46 | * lmw | 46 | } |
47 | * stmw | 47 | #endif /* CONFIG_PPC_FPU */ |
48 | |||
49 | #ifdef CONFIG_VSX | ||
50 | static bool kvmppc_check_vsx_disabled(struct kvm_vcpu *vcpu) | ||
51 | { | ||
52 | if (!(kvmppc_get_msr(vcpu) & MSR_VSX)) { | ||
53 | kvmppc_core_queue_vsx_unavail(vcpu); | ||
54 | return true; | ||
55 | } | ||
56 | |||
57 | return false; | ||
58 | } | ||
59 | #endif /* CONFIG_VSX */ | ||
60 | |||
61 | /* | ||
62 | * XXX to do: | ||
63 | * lfiwax, lfiwzx | ||
64 | * vector loads and stores | ||
48 | * | 65 | * |
66 | * Instructions that trap when used on cache-inhibited mappings | ||
67 | * are not emulated here: multiple and string instructions, | ||
68 | * lq/stq, and the load-reserve/store-conditional instructions. | ||
49 | */ | 69 | */ |
50 | int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) | 70 | int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) |
51 | { | 71 | { |
@@ -66,6 +86,19 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) | |||
66 | rs = get_rs(inst); | 86 | rs = get_rs(inst); |
67 | rt = get_rt(inst); | 87 | rt = get_rt(inst); |
68 | 88 | ||
89 | /* | ||
90 | * if mmio_vsx_tx_sx_enabled == 0, copy data between | ||
91 | * VSR[0..31] and memory | ||
92 | * if mmio_vsx_tx_sx_enabled == 1, copy data between | ||
93 | * VSR[32..63] and memory | ||
94 | */ | ||
95 | vcpu->arch.mmio_vsx_tx_sx_enabled = get_tx_or_sx(inst); | ||
96 | vcpu->arch.mmio_vsx_copy_nums = 0; | ||
97 | vcpu->arch.mmio_vsx_offset = 0; | ||
98 | vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_NONE; | ||
99 | vcpu->arch.mmio_sp64_extend = 0; | ||
100 | vcpu->arch.mmio_sign_extend = 0; | ||
101 | |||
69 | switch (get_op(inst)) { | 102 | switch (get_op(inst)) { |
70 | case 31: | 103 | case 31: |
71 | switch (get_xop(inst)) { | 104 | switch (get_xop(inst)) { |
@@ -73,6 +106,11 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) | |||
73 | emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); | 106 | emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); |
74 | break; | 107 | break; |
75 | 108 | ||
109 | case OP_31_XOP_LWZUX: | ||
110 | emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); | ||
111 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); | ||
112 | break; | ||
113 | |||
76 | case OP_31_XOP_LBZX: | 114 | case OP_31_XOP_LBZX: |
77 | emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); | 115 | emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); |
78 | break; | 116 | break; |
@@ -82,22 +120,36 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) | |||
82 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); | 120 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); |
83 | break; | 121 | break; |
84 | 122 | ||
123 | case OP_31_XOP_STDX: | ||
124 | emulated = kvmppc_handle_store(run, vcpu, | ||
125 | kvmppc_get_gpr(vcpu, rs), 8, 1); | ||
126 | break; | ||
127 | |||
128 | case OP_31_XOP_STDUX: | ||
129 | emulated = kvmppc_handle_store(run, vcpu, | ||
130 | kvmppc_get_gpr(vcpu, rs), 8, 1); | ||
131 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); | ||
132 | break; | ||
133 | |||
85 | case OP_31_XOP_STWX: | 134 | case OP_31_XOP_STWX: |
86 | emulated = kvmppc_handle_store(run, vcpu, | 135 | emulated = kvmppc_handle_store(run, vcpu, |
87 | kvmppc_get_gpr(vcpu, rs), | 136 | kvmppc_get_gpr(vcpu, rs), 4, 1); |
88 | 4, 1); | 137 | break; |
138 | |||
139 | case OP_31_XOP_STWUX: | ||
140 | emulated = kvmppc_handle_store(run, vcpu, | ||
141 | kvmppc_get_gpr(vcpu, rs), 4, 1); | ||
142 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); | ||
89 | break; | 143 | break; |
90 | 144 | ||
91 | case OP_31_XOP_STBX: | 145 | case OP_31_XOP_STBX: |
92 | emulated = kvmppc_handle_store(run, vcpu, | 146 | emulated = kvmppc_handle_store(run, vcpu, |
93 | kvmppc_get_gpr(vcpu, rs), | 147 | kvmppc_get_gpr(vcpu, rs), 1, 1); |
94 | 1, 1); | ||
95 | break; | 148 | break; |
96 | 149 | ||
97 | case OP_31_XOP_STBUX: | 150 | case OP_31_XOP_STBUX: |
98 | emulated = kvmppc_handle_store(run, vcpu, | 151 | emulated = kvmppc_handle_store(run, vcpu, |
99 | kvmppc_get_gpr(vcpu, rs), | 152 | kvmppc_get_gpr(vcpu, rs), 1, 1); |
100 | 1, 1); | ||
101 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); | 153 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); |
102 | break; | 154 | break; |
103 | 155 | ||
@@ -105,6 +157,11 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) | |||
105 | emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); | 157 | emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); |
106 | break; | 158 | break; |
107 | 159 | ||
160 | case OP_31_XOP_LHAUX: | ||
161 | emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); | ||
162 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); | ||
163 | break; | ||
164 | |||
108 | case OP_31_XOP_LHZX: | 165 | case OP_31_XOP_LHZX: |
109 | emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); | 166 | emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); |
110 | break; | 167 | break; |
@@ -116,14 +173,12 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) | |||
116 | 173 | ||
117 | case OP_31_XOP_STHX: | 174 | case OP_31_XOP_STHX: |
118 | emulated = kvmppc_handle_store(run, vcpu, | 175 | emulated = kvmppc_handle_store(run, vcpu, |
119 | kvmppc_get_gpr(vcpu, rs), | 176 | kvmppc_get_gpr(vcpu, rs), 2, 1); |
120 | 2, 1); | ||
121 | break; | 177 | break; |
122 | 178 | ||
123 | case OP_31_XOP_STHUX: | 179 | case OP_31_XOP_STHUX: |
124 | emulated = kvmppc_handle_store(run, vcpu, | 180 | emulated = kvmppc_handle_store(run, vcpu, |
125 | kvmppc_get_gpr(vcpu, rs), | 181 | kvmppc_get_gpr(vcpu, rs), 2, 1); |
126 | 2, 1); | ||
127 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); | 182 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); |
128 | break; | 183 | break; |
129 | 184 | ||
@@ -143,8 +198,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) | |||
143 | 198 | ||
144 | case OP_31_XOP_STWBRX: | 199 | case OP_31_XOP_STWBRX: |
145 | emulated = kvmppc_handle_store(run, vcpu, | 200 | emulated = kvmppc_handle_store(run, vcpu, |
146 | kvmppc_get_gpr(vcpu, rs), | 201 | kvmppc_get_gpr(vcpu, rs), 4, 0); |
147 | 4, 0); | ||
148 | break; | 202 | break; |
149 | 203 | ||
150 | case OP_31_XOP_LHBRX: | 204 | case OP_31_XOP_LHBRX: |
@@ -153,10 +207,258 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) | |||
153 | 207 | ||
154 | case OP_31_XOP_STHBRX: | 208 | case OP_31_XOP_STHBRX: |
155 | emulated = kvmppc_handle_store(run, vcpu, | 209 | emulated = kvmppc_handle_store(run, vcpu, |
156 | kvmppc_get_gpr(vcpu, rs), | 210 | kvmppc_get_gpr(vcpu, rs), 2, 0); |
157 | 2, 0); | 211 | break; |
212 | |||
213 | case OP_31_XOP_LDBRX: | ||
214 | emulated = kvmppc_handle_load(run, vcpu, rt, 8, 0); | ||
215 | break; | ||
216 | |||
217 | case OP_31_XOP_STDBRX: | ||
218 | emulated = kvmppc_handle_store(run, vcpu, | ||
219 | kvmppc_get_gpr(vcpu, rs), 8, 0); | ||
220 | break; | ||
221 | |||
222 | case OP_31_XOP_LDX: | ||
223 | emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); | ||
224 | break; | ||
225 | |||
226 | case OP_31_XOP_LDUX: | ||
227 | emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); | ||
228 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); | ||
229 | break; | ||
230 | |||
231 | case OP_31_XOP_LWAX: | ||
232 | emulated = kvmppc_handle_loads(run, vcpu, rt, 4, 1); | ||
233 | break; | ||
234 | |||
235 | case OP_31_XOP_LWAUX: | ||
236 | emulated = kvmppc_handle_loads(run, vcpu, rt, 4, 1); | ||
237 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); | ||
238 | break; | ||
239 | |||
240 | #ifdef CONFIG_PPC_FPU | ||
241 | case OP_31_XOP_LFSX: | ||
242 | if (kvmppc_check_fp_disabled(vcpu)) | ||
243 | return EMULATE_DONE; | ||
244 | vcpu->arch.mmio_sp64_extend = 1; | ||
245 | emulated = kvmppc_handle_load(run, vcpu, | ||
246 | KVM_MMIO_REG_FPR|rt, 4, 1); | ||
247 | break; | ||
248 | |||
249 | case OP_31_XOP_LFSUX: | ||
250 | if (kvmppc_check_fp_disabled(vcpu)) | ||
251 | return EMULATE_DONE; | ||
252 | vcpu->arch.mmio_sp64_extend = 1; | ||
253 | emulated = kvmppc_handle_load(run, vcpu, | ||
254 | KVM_MMIO_REG_FPR|rt, 4, 1); | ||
255 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); | ||
256 | break; | ||
257 | |||
258 | case OP_31_XOP_LFDX: | ||
259 | if (kvmppc_check_fp_disabled(vcpu)) | ||
260 | return EMULATE_DONE; | ||
261 | emulated = kvmppc_handle_load(run, vcpu, | ||
262 | KVM_MMIO_REG_FPR|rt, 8, 1); | ||
263 | break; | ||
264 | |||
265 | case OP_31_XOP_LFDUX: | ||
266 | if (kvmppc_check_fp_disabled(vcpu)) | ||
267 | return EMULATE_DONE; | ||
268 | emulated = kvmppc_handle_load(run, vcpu, | ||
269 | KVM_MMIO_REG_FPR|rt, 8, 1); | ||
270 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); | ||
271 | break; | ||
272 | |||
273 | case OP_31_XOP_LFIWAX: | ||
274 | if (kvmppc_check_fp_disabled(vcpu)) | ||
275 | return EMULATE_DONE; | ||
276 | emulated = kvmppc_handle_loads(run, vcpu, | ||
277 | KVM_MMIO_REG_FPR|rt, 4, 1); | ||
278 | break; | ||
279 | |||
280 | case OP_31_XOP_LFIWZX: | ||
281 | if (kvmppc_check_fp_disabled(vcpu)) | ||
282 | return EMULATE_DONE; | ||
283 | emulated = kvmppc_handle_load(run, vcpu, | ||
284 | KVM_MMIO_REG_FPR|rt, 4, 1); | ||
285 | break; | ||
286 | |||
287 | case OP_31_XOP_STFSX: | ||
288 | if (kvmppc_check_fp_disabled(vcpu)) | ||
289 | return EMULATE_DONE; | ||
290 | vcpu->arch.mmio_sp64_extend = 1; | ||
291 | emulated = kvmppc_handle_store(run, vcpu, | ||
292 | VCPU_FPR(vcpu, rs), 4, 1); | ||
293 | break; | ||
294 | |||
295 | case OP_31_XOP_STFSUX: | ||
296 | if (kvmppc_check_fp_disabled(vcpu)) | ||
297 | return EMULATE_DONE; | ||
298 | vcpu->arch.mmio_sp64_extend = 1; | ||
299 | emulated = kvmppc_handle_store(run, vcpu, | ||
300 | VCPU_FPR(vcpu, rs), 4, 1); | ||
301 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); | ||
302 | break; | ||
303 | |||
304 | case OP_31_XOP_STFDX: | ||
305 | if (kvmppc_check_fp_disabled(vcpu)) | ||
306 | return EMULATE_DONE; | ||
307 | emulated = kvmppc_handle_store(run, vcpu, | ||
308 | VCPU_FPR(vcpu, rs), 8, 1); | ||
309 | break; | ||
310 | |||
311 | case OP_31_XOP_STFDUX: | ||
312 | if (kvmppc_check_fp_disabled(vcpu)) | ||
313 | return EMULATE_DONE; | ||
314 | emulated = kvmppc_handle_store(run, vcpu, | ||
315 | VCPU_FPR(vcpu, rs), 8, 1); | ||
316 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); | ||
317 | break; | ||
318 | |||
319 | case OP_31_XOP_STFIWX: | ||
320 | if (kvmppc_check_fp_disabled(vcpu)) | ||
321 | return EMULATE_DONE; | ||
322 | emulated = kvmppc_handle_store(run, vcpu, | ||
323 | VCPU_FPR(vcpu, rs), 4, 1); | ||
324 | break; | ||
325 | #endif | ||
326 | |||
327 | #ifdef CONFIG_VSX | ||
328 | case OP_31_XOP_LXSDX: | ||
329 | if (kvmppc_check_vsx_disabled(vcpu)) | ||
330 | return EMULATE_DONE; | ||
331 | vcpu->arch.mmio_vsx_copy_nums = 1; | ||
332 | vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; | ||
333 | emulated = kvmppc_handle_vsx_load(run, vcpu, | ||
334 | KVM_MMIO_REG_VSX|rt, 8, 1, 0); | ||
335 | break; | ||
336 | |||
337 | case OP_31_XOP_LXSSPX: | ||
338 | if (kvmppc_check_vsx_disabled(vcpu)) | ||
339 | return EMULATE_DONE; | ||
340 | vcpu->arch.mmio_vsx_copy_nums = 1; | ||
341 | vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; | ||
342 | vcpu->arch.mmio_sp64_extend = 1; | ||
343 | emulated = kvmppc_handle_vsx_load(run, vcpu, | ||
344 | KVM_MMIO_REG_VSX|rt, 4, 1, 0); | ||
345 | break; | ||
346 | |||
347 | case OP_31_XOP_LXSIWAX: | ||
348 | if (kvmppc_check_vsx_disabled(vcpu)) | ||
349 | return EMULATE_DONE; | ||
350 | vcpu->arch.mmio_vsx_copy_nums = 1; | ||
351 | vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; | ||
352 | emulated = kvmppc_handle_vsx_load(run, vcpu, | ||
353 | KVM_MMIO_REG_VSX|rt, 4, 1, 1); | ||
354 | break; | ||
355 | |||
356 | case OP_31_XOP_LXSIWZX: | ||
357 | if (kvmppc_check_vsx_disabled(vcpu)) | ||
358 | return EMULATE_DONE; | ||
359 | vcpu->arch.mmio_vsx_copy_nums = 1; | ||
360 | vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; | ||
361 | emulated = kvmppc_handle_vsx_load(run, vcpu, | ||
362 | KVM_MMIO_REG_VSX|rt, 4, 1, 0); | ||
363 | break; | ||
364 | |||
365 | case OP_31_XOP_LXVD2X: | ||
366 | /* | ||
367 | * In this case, the official load/store process is like this: | ||
368 | * Step1, exit from vm by page fault isr, then kvm save vsr. | ||
369 | * Please see guest_exit_cont->store_fp_state->SAVE_32VSRS | ||
370 | * as reference. | ||
371 | * | ||
372 | * Step2, copy data between memory and VCPU | ||
373 | * Notice: for LXVD2X/STXVD2X/LXVW4X/STXVW4X, we use | ||
374 | * 2copies*8bytes or 4copies*4bytes | ||
375 | * to simulate one copy of 16bytes. | ||
376 | * Also there is an endian issue here, we should notice the | ||
377 | * layout of memory. | ||
378 | * Please see MARCO of LXVD2X_ROT/STXVD2X_ROT as more reference. | ||
379 | * If host is little-endian, kvm will call XXSWAPD for | ||
380 | * LXVD2X_ROT/STXVD2X_ROT. | ||
381 | * So, if host is little-endian, | ||
382 | * the postion of memeory should be swapped. | ||
383 | * | ||
384 | * Step3, return to guest, kvm reset register. | ||
385 | * Please see kvmppc_hv_entry->load_fp_state->REST_32VSRS | ||
386 | * as reference. | ||
387 | */ | ||
388 | if (kvmppc_check_vsx_disabled(vcpu)) | ||
389 | return EMULATE_DONE; | ||
390 | vcpu->arch.mmio_vsx_copy_nums = 2; | ||
391 | vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; | ||
392 | emulated = kvmppc_handle_vsx_load(run, vcpu, | ||
393 | KVM_MMIO_REG_VSX|rt, 8, 1, 0); | ||
394 | break; | ||
395 | |||
396 | case OP_31_XOP_LXVW4X: | ||
397 | if (kvmppc_check_vsx_disabled(vcpu)) | ||
398 | return EMULATE_DONE; | ||
399 | vcpu->arch.mmio_vsx_copy_nums = 4; | ||
400 | vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_WORD; | ||
401 | emulated = kvmppc_handle_vsx_load(run, vcpu, | ||
402 | KVM_MMIO_REG_VSX|rt, 4, 1, 0); | ||
403 | break; | ||
404 | |||
405 | case OP_31_XOP_LXVDSX: | ||
406 | if (kvmppc_check_vsx_disabled(vcpu)) | ||
407 | return EMULATE_DONE; | ||
408 | vcpu->arch.mmio_vsx_copy_nums = 1; | ||
409 | vcpu->arch.mmio_vsx_copy_type = | ||
410 | KVMPPC_VSX_COPY_DWORD_LOAD_DUMP; | ||
411 | emulated = kvmppc_handle_vsx_load(run, vcpu, | ||
412 | KVM_MMIO_REG_VSX|rt, 8, 1, 0); | ||
413 | break; | ||
414 | |||
415 | case OP_31_XOP_STXSDX: | ||
416 | if (kvmppc_check_vsx_disabled(vcpu)) | ||
417 | return EMULATE_DONE; | ||
418 | vcpu->arch.mmio_vsx_copy_nums = 1; | ||
419 | vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; | ||
420 | emulated = kvmppc_handle_vsx_store(run, vcpu, | ||
421 | rs, 8, 1); | ||
158 | break; | 422 | break; |
159 | 423 | ||
424 | case OP_31_XOP_STXSSPX: | ||
425 | if (kvmppc_check_vsx_disabled(vcpu)) | ||
426 | return EMULATE_DONE; | ||
427 | vcpu->arch.mmio_vsx_copy_nums = 1; | ||
428 | vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; | ||
429 | vcpu->arch.mmio_sp64_extend = 1; | ||
430 | emulated = kvmppc_handle_vsx_store(run, vcpu, | ||
431 | rs, 4, 1); | ||
432 | break; | ||
433 | |||
434 | case OP_31_XOP_STXSIWX: | ||
435 | if (kvmppc_check_vsx_disabled(vcpu)) | ||
436 | return EMULATE_DONE; | ||
437 | vcpu->arch.mmio_vsx_offset = 1; | ||
438 | vcpu->arch.mmio_vsx_copy_nums = 1; | ||
439 | vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_WORD; | ||
440 | emulated = kvmppc_handle_vsx_store(run, vcpu, | ||
441 | rs, 4, 1); | ||
442 | break; | ||
443 | |||
444 | case OP_31_XOP_STXVD2X: | ||
445 | if (kvmppc_check_vsx_disabled(vcpu)) | ||
446 | return EMULATE_DONE; | ||
447 | vcpu->arch.mmio_vsx_copy_nums = 2; | ||
448 | vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; | ||
449 | emulated = kvmppc_handle_vsx_store(run, vcpu, | ||
450 | rs, 8, 1); | ||
451 | break; | ||
452 | |||
453 | case OP_31_XOP_STXVW4X: | ||
454 | if (kvmppc_check_vsx_disabled(vcpu)) | ||
455 | return EMULATE_DONE; | ||
456 | vcpu->arch.mmio_vsx_copy_nums = 4; | ||
457 | vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_WORD; | ||
458 | emulated = kvmppc_handle_vsx_store(run, vcpu, | ||
459 | rs, 4, 1); | ||
460 | break; | ||
461 | #endif /* CONFIG_VSX */ | ||
160 | default: | 462 | default: |
161 | emulated = EMULATE_FAIL; | 463 | emulated = EMULATE_FAIL; |
162 | break; | 464 | break; |
@@ -167,10 +469,60 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) | |||
167 | emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); | 469 | emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); |
168 | break; | 470 | break; |
169 | 471 | ||
170 | /* TBD: Add support for other 64 bit load variants like ldu, ldux, ldx etc. */ | 472 | #ifdef CONFIG_PPC_FPU |
473 | case OP_STFS: | ||
474 | if (kvmppc_check_fp_disabled(vcpu)) | ||
475 | return EMULATE_DONE; | ||
476 | vcpu->arch.mmio_sp64_extend = 1; | ||
477 | emulated = kvmppc_handle_store(run, vcpu, | ||
478 | VCPU_FPR(vcpu, rs), | ||
479 | 4, 1); | ||
480 | break; | ||
481 | |||
482 | case OP_STFSU: | ||
483 | if (kvmppc_check_fp_disabled(vcpu)) | ||
484 | return EMULATE_DONE; | ||
485 | vcpu->arch.mmio_sp64_extend = 1; | ||
486 | emulated = kvmppc_handle_store(run, vcpu, | ||
487 | VCPU_FPR(vcpu, rs), | ||
488 | 4, 1); | ||
489 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); | ||
490 | break; | ||
491 | |||
492 | case OP_STFD: | ||
493 | if (kvmppc_check_fp_disabled(vcpu)) | ||
494 | return EMULATE_DONE; | ||
495 | emulated = kvmppc_handle_store(run, vcpu, | ||
496 | VCPU_FPR(vcpu, rs), | ||
497 | 8, 1); | ||
498 | break; | ||
499 | |||
500 | case OP_STFDU: | ||
501 | if (kvmppc_check_fp_disabled(vcpu)) | ||
502 | return EMULATE_DONE; | ||
503 | emulated = kvmppc_handle_store(run, vcpu, | ||
504 | VCPU_FPR(vcpu, rs), | ||
505 | 8, 1); | ||
506 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); | ||
507 | break; | ||
508 | #endif | ||
509 | |||
171 | case OP_LD: | 510 | case OP_LD: |
172 | rt = get_rt(inst); | 511 | rt = get_rt(inst); |
173 | emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); | 512 | switch (inst & 3) { |
513 | case 0: /* ld */ | ||
514 | emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); | ||
515 | break; | ||
516 | case 1: /* ldu */ | ||
517 | emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); | ||
518 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); | ||
519 | break; | ||
520 | case 2: /* lwa */ | ||
521 | emulated = kvmppc_handle_loads(run, vcpu, rt, 4, 1); | ||
522 | break; | ||
523 | default: | ||
524 | emulated = EMULATE_FAIL; | ||
525 | } | ||
174 | break; | 526 | break; |
175 | 527 | ||
176 | case OP_LWZU: | 528 | case OP_LWZU: |
@@ -193,31 +545,37 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) | |||
193 | 4, 1); | 545 | 4, 1); |
194 | break; | 546 | break; |
195 | 547 | ||
196 | /* TBD: Add support for other 64 bit store variants like stdu, stdux, stdx etc. */ | ||
197 | case OP_STD: | 548 | case OP_STD: |
198 | rs = get_rs(inst); | 549 | rs = get_rs(inst); |
199 | emulated = kvmppc_handle_store(run, vcpu, | 550 | switch (inst & 3) { |
200 | kvmppc_get_gpr(vcpu, rs), | 551 | case 0: /* std */ |
201 | 8, 1); | 552 | emulated = kvmppc_handle_store(run, vcpu, |
553 | kvmppc_get_gpr(vcpu, rs), 8, 1); | ||
554 | break; | ||
555 | case 1: /* stdu */ | ||
556 | emulated = kvmppc_handle_store(run, vcpu, | ||
557 | kvmppc_get_gpr(vcpu, rs), 8, 1); | ||
558 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); | ||
559 | break; | ||
560 | default: | ||
561 | emulated = EMULATE_FAIL; | ||
562 | } | ||
202 | break; | 563 | break; |
203 | 564 | ||
204 | case OP_STWU: | 565 | case OP_STWU: |
205 | emulated = kvmppc_handle_store(run, vcpu, | 566 | emulated = kvmppc_handle_store(run, vcpu, |
206 | kvmppc_get_gpr(vcpu, rs), | 567 | kvmppc_get_gpr(vcpu, rs), 4, 1); |
207 | 4, 1); | ||
208 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); | 568 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); |
209 | break; | 569 | break; |
210 | 570 | ||
211 | case OP_STB: | 571 | case OP_STB: |
212 | emulated = kvmppc_handle_store(run, vcpu, | 572 | emulated = kvmppc_handle_store(run, vcpu, |
213 | kvmppc_get_gpr(vcpu, rs), | 573 | kvmppc_get_gpr(vcpu, rs), 1, 1); |
214 | 1, 1); | ||
215 | break; | 574 | break; |
216 | 575 | ||
217 | case OP_STBU: | 576 | case OP_STBU: |
218 | emulated = kvmppc_handle_store(run, vcpu, | 577 | emulated = kvmppc_handle_store(run, vcpu, |
219 | kvmppc_get_gpr(vcpu, rs), | 578 | kvmppc_get_gpr(vcpu, rs), 1, 1); |
220 | 1, 1); | ||
221 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); | 579 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); |
222 | break; | 580 | break; |
223 | 581 | ||
@@ -241,16 +599,48 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) | |||
241 | 599 | ||
242 | case OP_STH: | 600 | case OP_STH: |
243 | emulated = kvmppc_handle_store(run, vcpu, | 601 | emulated = kvmppc_handle_store(run, vcpu, |
244 | kvmppc_get_gpr(vcpu, rs), | 602 | kvmppc_get_gpr(vcpu, rs), 2, 1); |
245 | 2, 1); | ||
246 | break; | 603 | break; |
247 | 604 | ||
248 | case OP_STHU: | 605 | case OP_STHU: |
249 | emulated = kvmppc_handle_store(run, vcpu, | 606 | emulated = kvmppc_handle_store(run, vcpu, |
250 | kvmppc_get_gpr(vcpu, rs), | 607 | kvmppc_get_gpr(vcpu, rs), 2, 1); |
251 | 2, 1); | 608 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); |
609 | break; | ||
610 | |||
611 | #ifdef CONFIG_PPC_FPU | ||
612 | case OP_LFS: | ||
613 | if (kvmppc_check_fp_disabled(vcpu)) | ||
614 | return EMULATE_DONE; | ||
615 | vcpu->arch.mmio_sp64_extend = 1; | ||
616 | emulated = kvmppc_handle_load(run, vcpu, | ||
617 | KVM_MMIO_REG_FPR|rt, 4, 1); | ||
618 | break; | ||
619 | |||
620 | case OP_LFSU: | ||
621 | if (kvmppc_check_fp_disabled(vcpu)) | ||
622 | return EMULATE_DONE; | ||
623 | vcpu->arch.mmio_sp64_extend = 1; | ||
624 | emulated = kvmppc_handle_load(run, vcpu, | ||
625 | KVM_MMIO_REG_FPR|rt, 4, 1); | ||
626 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); | ||
627 | break; | ||
628 | |||
629 | case OP_LFD: | ||
630 | if (kvmppc_check_fp_disabled(vcpu)) | ||
631 | return EMULATE_DONE; | ||
632 | emulated = kvmppc_handle_load(run, vcpu, | ||
633 | KVM_MMIO_REG_FPR|rt, 8, 1); | ||
634 | break; | ||
635 | |||
636 | case OP_LFDU: | ||
637 | if (kvmppc_check_fp_disabled(vcpu)) | ||
638 | return EMULATE_DONE; | ||
639 | emulated = kvmppc_handle_load(run, vcpu, | ||
640 | KVM_MMIO_REG_FPR|rt, 8, 1); | ||
252 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); | 641 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); |
253 | break; | 642 | break; |
643 | #endif | ||
254 | 644 | ||
255 | default: | 645 | default: |
256 | emulated = EMULATE_FAIL; | 646 | emulated = EMULATE_FAIL; |
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index de79bd721ec7..e4b58f2e335e 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <asm/cputhreads.h> | 37 | #include <asm/cputhreads.h> |
38 | #include <asm/irqflags.h> | 38 | #include <asm/irqflags.h> |
39 | #include <asm/iommu.h> | 39 | #include <asm/iommu.h> |
40 | #include <asm/switch_to.h> | ||
40 | #include <asm/xive.h> | 41 | #include <asm/xive.h> |
41 | 42 | ||
42 | #include "timing.h" | 43 | #include "timing.h" |
@@ -526,11 +527,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
526 | /* We support this only for PR */ | 527 | /* We support this only for PR */ |
527 | r = !hv_enabled; | 528 | r = !hv_enabled; |
528 | break; | 529 | break; |
529 | #ifdef CONFIG_KVM_MMIO | ||
530 | case KVM_CAP_COALESCED_MMIO: | ||
531 | r = KVM_COALESCED_MMIO_PAGE_OFFSET; | ||
532 | break; | ||
533 | #endif | ||
534 | #ifdef CONFIG_KVM_MPIC | 530 | #ifdef CONFIG_KVM_MPIC |
535 | case KVM_CAP_IRQ_MPIC: | 531 | case KVM_CAP_IRQ_MPIC: |
536 | r = 1; | 532 | r = 1; |
@@ -540,6 +536,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
540 | #ifdef CONFIG_PPC_BOOK3S_64 | 536 | #ifdef CONFIG_PPC_BOOK3S_64 |
541 | case KVM_CAP_SPAPR_TCE: | 537 | case KVM_CAP_SPAPR_TCE: |
542 | case KVM_CAP_SPAPR_TCE_64: | 538 | case KVM_CAP_SPAPR_TCE_64: |
539 | /* fallthrough */ | ||
540 | case KVM_CAP_SPAPR_TCE_VFIO: | ||
543 | case KVM_CAP_PPC_RTAS: | 541 | case KVM_CAP_PPC_RTAS: |
544 | case KVM_CAP_PPC_FIXUP_HCALL: | 542 | case KVM_CAP_PPC_FIXUP_HCALL: |
545 | case KVM_CAP_PPC_ENABLE_HCALL: | 543 | case KVM_CAP_PPC_ENABLE_HCALL: |
@@ -811,6 +809,129 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, | |||
811 | kvm->arch.kvm_ops->irq_bypass_del_producer(cons, prod); | 809 | kvm->arch.kvm_ops->irq_bypass_del_producer(cons, prod); |
812 | } | 810 | } |
813 | 811 | ||
812 | #ifdef CONFIG_VSX | ||
813 | static inline int kvmppc_get_vsr_dword_offset(int index) | ||
814 | { | ||
815 | int offset; | ||
816 | |||
817 | if ((index != 0) && (index != 1)) | ||
818 | return -1; | ||
819 | |||
820 | #ifdef __BIG_ENDIAN | ||
821 | offset = index; | ||
822 | #else | ||
823 | offset = 1 - index; | ||
824 | #endif | ||
825 | |||
826 | return offset; | ||
827 | } | ||
828 | |||
829 | static inline int kvmppc_get_vsr_word_offset(int index) | ||
830 | { | ||
831 | int offset; | ||
832 | |||
833 | if ((index > 3) || (index < 0)) | ||
834 | return -1; | ||
835 | |||
836 | #ifdef __BIG_ENDIAN | ||
837 | offset = index; | ||
838 | #else | ||
839 | offset = 3 - index; | ||
840 | #endif | ||
841 | return offset; | ||
842 | } | ||
843 | |||
844 | static inline void kvmppc_set_vsr_dword(struct kvm_vcpu *vcpu, | ||
845 | u64 gpr) | ||
846 | { | ||
847 | union kvmppc_one_reg val; | ||
848 | int offset = kvmppc_get_vsr_dword_offset(vcpu->arch.mmio_vsx_offset); | ||
849 | int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK; | ||
850 | |||
851 | if (offset == -1) | ||
852 | return; | ||
853 | |||
854 | if (vcpu->arch.mmio_vsx_tx_sx_enabled) { | ||
855 | val.vval = VCPU_VSX_VR(vcpu, index); | ||
856 | val.vsxval[offset] = gpr; | ||
857 | VCPU_VSX_VR(vcpu, index) = val.vval; | ||
858 | } else { | ||
859 | VCPU_VSX_FPR(vcpu, index, offset) = gpr; | ||
860 | } | ||
861 | } | ||
862 | |||
863 | static inline void kvmppc_set_vsr_dword_dump(struct kvm_vcpu *vcpu, | ||
864 | u64 gpr) | ||
865 | { | ||
866 | union kvmppc_one_reg val; | ||
867 | int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK; | ||
868 | |||
869 | if (vcpu->arch.mmio_vsx_tx_sx_enabled) { | ||
870 | val.vval = VCPU_VSX_VR(vcpu, index); | ||
871 | val.vsxval[0] = gpr; | ||
872 | val.vsxval[1] = gpr; | ||
873 | VCPU_VSX_VR(vcpu, index) = val.vval; | ||
874 | } else { | ||
875 | VCPU_VSX_FPR(vcpu, index, 0) = gpr; | ||
876 | VCPU_VSX_FPR(vcpu, index, 1) = gpr; | ||
877 | } | ||
878 | } | ||
879 | |||
880 | static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu, | ||
881 | u32 gpr32) | ||
882 | { | ||
883 | union kvmppc_one_reg val; | ||
884 | int offset = kvmppc_get_vsr_word_offset(vcpu->arch.mmio_vsx_offset); | ||
885 | int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK; | ||
886 | int dword_offset, word_offset; | ||
887 | |||
888 | if (offset == -1) | ||
889 | return; | ||
890 | |||
891 | if (vcpu->arch.mmio_vsx_tx_sx_enabled) { | ||
892 | val.vval = VCPU_VSX_VR(vcpu, index); | ||
893 | val.vsx32val[offset] = gpr32; | ||
894 | VCPU_VSX_VR(vcpu, index) = val.vval; | ||
895 | } else { | ||
896 | dword_offset = offset / 2; | ||
897 | word_offset = offset % 2; | ||
898 | val.vsxval[0] = VCPU_VSX_FPR(vcpu, index, dword_offset); | ||
899 | val.vsx32val[word_offset] = gpr32; | ||
900 | VCPU_VSX_FPR(vcpu, index, dword_offset) = val.vsxval[0]; | ||
901 | } | ||
902 | } | ||
903 | #endif /* CONFIG_VSX */ | ||
904 | |||
905 | #ifdef CONFIG_PPC_FPU | ||
906 | static inline u64 sp_to_dp(u32 fprs) | ||
907 | { | ||
908 | u64 fprd; | ||
909 | |||
910 | preempt_disable(); | ||
911 | enable_kernel_fp(); | ||
912 | asm ("lfs%U1%X1 0,%1; stfd%U0%X0 0,%0" : "=m" (fprd) : "m" (fprs) | ||
913 | : "fr0"); | ||
914 | preempt_enable(); | ||
915 | return fprd; | ||
916 | } | ||
917 | |||
918 | static inline u32 dp_to_sp(u64 fprd) | ||
919 | { | ||
920 | u32 fprs; | ||
921 | |||
922 | preempt_disable(); | ||
923 | enable_kernel_fp(); | ||
924 | asm ("lfd%U1%X1 0,%1; stfs%U0%X0 0,%0" : "=m" (fprs) : "m" (fprd) | ||
925 | : "fr0"); | ||
926 | preempt_enable(); | ||
927 | return fprs; | ||
928 | } | ||
929 | |||
930 | #else | ||
931 | #define sp_to_dp(x) (x) | ||
932 | #define dp_to_sp(x) (x) | ||
933 | #endif /* CONFIG_PPC_FPU */ | ||
934 | |||
814 | static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, | 935 | static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, |
815 | struct kvm_run *run) | 936 | struct kvm_run *run) |
816 | { | 937 | { |
@@ -837,6 +958,10 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, | |||
837 | } | 958 | } |
838 | } | 959 | } |
839 | 960 | ||
961 | /* conversion between single and double precision */ | ||
962 | if ((vcpu->arch.mmio_sp64_extend) && (run->mmio.len == 4)) | ||
963 | gpr = sp_to_dp(gpr); | ||
964 | |||
840 | if (vcpu->arch.mmio_sign_extend) { | 965 | if (vcpu->arch.mmio_sign_extend) { |
841 | switch (run->mmio.len) { | 966 | switch (run->mmio.len) { |
842 | #ifdef CONFIG_PPC64 | 967 | #ifdef CONFIG_PPC64 |
@@ -853,8 +978,6 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, | |||
853 | } | 978 | } |
854 | } | 979 | } |
855 | 980 | ||
856 | kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); | ||
857 | |||
858 | switch (vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) { | 981 | switch (vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) { |
859 | case KVM_MMIO_REG_GPR: | 982 | case KVM_MMIO_REG_GPR: |
860 | kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); | 983 | kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); |
@@ -871,6 +994,17 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, | |||
871 | vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; | 994 | vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; |
872 | break; | 995 | break; |
873 | #endif | 996 | #endif |
997 | #ifdef CONFIG_VSX | ||
998 | case KVM_MMIO_REG_VSX: | ||
999 | if (vcpu->arch.mmio_vsx_copy_type == KVMPPC_VSX_COPY_DWORD) | ||
1000 | kvmppc_set_vsr_dword(vcpu, gpr); | ||
1001 | else if (vcpu->arch.mmio_vsx_copy_type == KVMPPC_VSX_COPY_WORD) | ||
1002 | kvmppc_set_vsr_word(vcpu, gpr); | ||
1003 | else if (vcpu->arch.mmio_vsx_copy_type == | ||
1004 | KVMPPC_VSX_COPY_DWORD_LOAD_DUMP) | ||
1005 | kvmppc_set_vsr_dword_dump(vcpu, gpr); | ||
1006 | break; | ||
1007 | #endif | ||
874 | default: | 1008 | default: |
875 | BUG(); | 1009 | BUG(); |
876 | } | 1010 | } |
@@ -937,6 +1071,35 @@ int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
937 | return __kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian, 1); | 1071 | return __kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian, 1); |
938 | } | 1072 | } |
939 | 1073 | ||
1074 | #ifdef CONFIG_VSX | ||
1075 | int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
1076 | unsigned int rt, unsigned int bytes, | ||
1077 | int is_default_endian, int mmio_sign_extend) | ||
1078 | { | ||
1079 | enum emulation_result emulated = EMULATE_DONE; | ||
1080 | |||
1081 | /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */ | ||
1082 | if ( (vcpu->arch.mmio_vsx_copy_nums > 4) || | ||
1083 | (vcpu->arch.mmio_vsx_copy_nums < 0) ) { | ||
1084 | return EMULATE_FAIL; | ||
1085 | } | ||
1086 | |||
1087 | while (vcpu->arch.mmio_vsx_copy_nums) { | ||
1088 | emulated = __kvmppc_handle_load(run, vcpu, rt, bytes, | ||
1089 | is_default_endian, mmio_sign_extend); | ||
1090 | |||
1091 | if (emulated != EMULATE_DONE) | ||
1092 | break; | ||
1093 | |||
1094 | vcpu->arch.paddr_accessed += run->mmio.len; | ||
1095 | |||
1096 | vcpu->arch.mmio_vsx_copy_nums--; | ||
1097 | vcpu->arch.mmio_vsx_offset++; | ||
1098 | } | ||
1099 | return emulated; | ||
1100 | } | ||
1101 | #endif /* CONFIG_VSX */ | ||
1102 | |||
940 | int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | 1103 | int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, |
941 | u64 val, unsigned int bytes, int is_default_endian) | 1104 | u64 val, unsigned int bytes, int is_default_endian) |
942 | { | 1105 | { |
@@ -962,6 +1125,9 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
962 | vcpu->mmio_needed = 1; | 1125 | vcpu->mmio_needed = 1; |
963 | vcpu->mmio_is_write = 1; | 1126 | vcpu->mmio_is_write = 1; |
964 | 1127 | ||
1128 | if ((vcpu->arch.mmio_sp64_extend) && (bytes == 4)) | ||
1129 | val = dp_to_sp(val); | ||
1130 | |||
965 | /* Store the value at the lowest bytes in 'data'. */ | 1131 | /* Store the value at the lowest bytes in 'data'. */ |
966 | if (!host_swabbed) { | 1132 | if (!host_swabbed) { |
967 | switch (bytes) { | 1133 | switch (bytes) { |
@@ -995,6 +1161,129 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
995 | } | 1161 | } |
996 | EXPORT_SYMBOL_GPL(kvmppc_handle_store); | 1162 | EXPORT_SYMBOL_GPL(kvmppc_handle_store); |
997 | 1163 | ||
1164 | #ifdef CONFIG_VSX | ||
1165 | static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val) | ||
1166 | { | ||
1167 | u32 dword_offset, word_offset; | ||
1168 | union kvmppc_one_reg reg; | ||
1169 | int vsx_offset = 0; | ||
1170 | int copy_type = vcpu->arch.mmio_vsx_copy_type; | ||
1171 | int result = 0; | ||
1172 | |||
1173 | switch (copy_type) { | ||
1174 | case KVMPPC_VSX_COPY_DWORD: | ||
1175 | vsx_offset = | ||
1176 | kvmppc_get_vsr_dword_offset(vcpu->arch.mmio_vsx_offset); | ||
1177 | |||
1178 | if (vsx_offset == -1) { | ||
1179 | result = -1; | ||
1180 | break; | ||
1181 | } | ||
1182 | |||
1183 | if (!vcpu->arch.mmio_vsx_tx_sx_enabled) { | ||
1184 | *val = VCPU_VSX_FPR(vcpu, rs, vsx_offset); | ||
1185 | } else { | ||
1186 | reg.vval = VCPU_VSX_VR(vcpu, rs); | ||
1187 | *val = reg.vsxval[vsx_offset]; | ||
1188 | } | ||
1189 | break; | ||
1190 | |||
1191 | case KVMPPC_VSX_COPY_WORD: | ||
1192 | vsx_offset = | ||
1193 | kvmppc_get_vsr_word_offset(vcpu->arch.mmio_vsx_offset); | ||
1194 | |||
1195 | if (vsx_offset == -1) { | ||
1196 | result = -1; | ||
1197 | break; | ||
1198 | } | ||
1199 | |||
1200 | if (!vcpu->arch.mmio_vsx_tx_sx_enabled) { | ||
1201 | dword_offset = vsx_offset / 2; | ||
1202 | word_offset = vsx_offset % 2; | ||
1203 | reg.vsxval[0] = VCPU_VSX_FPR(vcpu, rs, dword_offset); | ||
1204 | *val = reg.vsx32val[word_offset]; | ||
1205 | } else { | ||
1206 | reg.vval = VCPU_VSX_VR(vcpu, rs); | ||
1207 | *val = reg.vsx32val[vsx_offset]; | ||
1208 | } | ||
1209 | break; | ||
1210 | |||
1211 | default: | ||
1212 | result = -1; | ||
1213 | break; | ||
1214 | } | ||
1215 | |||
1216 | return result; | ||
1217 | } | ||
1218 | |||
1219 | int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
1220 | int rs, unsigned int bytes, int is_default_endian) | ||
1221 | { | ||
1222 | u64 val; | ||
1223 | enum emulation_result emulated = EMULATE_DONE; | ||
1224 | |||
1225 | vcpu->arch.io_gpr = rs; | ||
1226 | |||
1227 | /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */ | ||
1228 | if ( (vcpu->arch.mmio_vsx_copy_nums > 4) || | ||
1229 | (vcpu->arch.mmio_vsx_copy_nums < 0) ) { | ||
1230 | return EMULATE_FAIL; | ||
1231 | } | ||
1232 | |||
1233 | while (vcpu->arch.mmio_vsx_copy_nums) { | ||
1234 | if (kvmppc_get_vsr_data(vcpu, rs, &val) == -1) | ||
1235 | return EMULATE_FAIL; | ||
1236 | |||
1237 | emulated = kvmppc_handle_store(run, vcpu, | ||
1238 | val, bytes, is_default_endian); | ||
1239 | |||
1240 | if (emulated != EMULATE_DONE) | ||
1241 | break; | ||
1242 | |||
1243 | vcpu->arch.paddr_accessed += run->mmio.len; | ||
1244 | |||
1245 | vcpu->arch.mmio_vsx_copy_nums--; | ||
1246 | vcpu->arch.mmio_vsx_offset++; | ||
1247 | } | ||
1248 | |||
1249 | return emulated; | ||
1250 | } | ||
1251 | |||
1252 | static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu, | ||
1253 | struct kvm_run *run) | ||
1254 | { | ||
1255 | enum emulation_result emulated = EMULATE_FAIL; | ||
1256 | int r; | ||
1257 | |||
1258 | vcpu->arch.paddr_accessed += run->mmio.len; | ||
1259 | |||
1260 | if (!vcpu->mmio_is_write) { | ||
1261 | emulated = kvmppc_handle_vsx_load(run, vcpu, vcpu->arch.io_gpr, | ||
1262 | run->mmio.len, 1, vcpu->arch.mmio_sign_extend); | ||
1263 | } else { | ||
1264 | emulated = kvmppc_handle_vsx_store(run, vcpu, | ||
1265 | vcpu->arch.io_gpr, run->mmio.len, 1); | ||
1266 | } | ||
1267 | |||
1268 | switch (emulated) { | ||
1269 | case EMULATE_DO_MMIO: | ||
1270 | run->exit_reason = KVM_EXIT_MMIO; | ||
1271 | r = RESUME_HOST; | ||
1272 | break; | ||
1273 | case EMULATE_FAIL: | ||
1274 | pr_info("KVM: MMIO emulation failed (VSX repeat)\n"); | ||
1275 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
1276 | run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
1277 | r = RESUME_HOST; | ||
1278 | break; | ||
1279 | default: | ||
1280 | r = RESUME_GUEST; | ||
1281 | break; | ||
1282 | } | ||
1283 | return r; | ||
1284 | } | ||
1285 | #endif /* CONFIG_VSX */ | ||
1286 | |||
998 | int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) | 1287 | int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) |
999 | { | 1288 | { |
1000 | int r = 0; | 1289 | int r = 0; |
@@ -1097,13 +1386,24 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
1097 | int r; | 1386 | int r; |
1098 | sigset_t sigsaved; | 1387 | sigset_t sigsaved; |
1099 | 1388 | ||
1100 | if (vcpu->sigset_active) | ||
1101 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | ||
1102 | |||
1103 | if (vcpu->mmio_needed) { | 1389 | if (vcpu->mmio_needed) { |
1390 | vcpu->mmio_needed = 0; | ||
1104 | if (!vcpu->mmio_is_write) | 1391 | if (!vcpu->mmio_is_write) |
1105 | kvmppc_complete_mmio_load(vcpu, run); | 1392 | kvmppc_complete_mmio_load(vcpu, run); |
1106 | vcpu->mmio_needed = 0; | 1393 | #ifdef CONFIG_VSX |
1394 | if (vcpu->arch.mmio_vsx_copy_nums > 0) { | ||
1395 | vcpu->arch.mmio_vsx_copy_nums--; | ||
1396 | vcpu->arch.mmio_vsx_offset++; | ||
1397 | } | ||
1398 | |||
1399 | if (vcpu->arch.mmio_vsx_copy_nums > 0) { | ||
1400 | r = kvmppc_emulate_mmio_vsx_loadstore(vcpu, run); | ||
1401 | if (r == RESUME_HOST) { | ||
1402 | vcpu->mmio_needed = 1; | ||
1403 | return r; | ||
1404 | } | ||
1405 | } | ||
1406 | #endif | ||
1107 | } else if (vcpu->arch.osi_needed) { | 1407 | } else if (vcpu->arch.osi_needed) { |
1108 | u64 *gprs = run->osi.gprs; | 1408 | u64 *gprs = run->osi.gprs; |
1109 | int i; | 1409 | int i; |
@@ -1125,6 +1425,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
1125 | #endif | 1425 | #endif |
1126 | } | 1426 | } |
1127 | 1427 | ||
1428 | if (vcpu->sigset_active) | ||
1429 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | ||
1430 | |||
1128 | if (run->immediate_exit) | 1431 | if (run->immediate_exit) |
1129 | r = -EINTR; | 1432 | r = -EINTR; |
1130 | else | 1433 | else |
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index 497130c5c742..fc67bd766eaf 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c | |||
@@ -314,6 +314,25 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, | |||
314 | } | 314 | } |
315 | EXPORT_SYMBOL_GPL(mm_iommu_lookup); | 315 | EXPORT_SYMBOL_GPL(mm_iommu_lookup); |
316 | 316 | ||
317 | struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm, | ||
318 | unsigned long ua, unsigned long size) | ||
319 | { | ||
320 | struct mm_iommu_table_group_mem_t *mem, *ret = NULL; | ||
321 | |||
322 | list_for_each_entry_lockless(mem, &mm->context.iommu_group_mem_list, | ||
323 | next) { | ||
324 | if ((mem->ua <= ua) && | ||
325 | (ua + size <= mem->ua + | ||
326 | (mem->entries << PAGE_SHIFT))) { | ||
327 | ret = mem; | ||
328 | break; | ||
329 | } | ||
330 | } | ||
331 | |||
332 | return ret; | ||
333 | } | ||
334 | EXPORT_SYMBOL_GPL(mm_iommu_lookup_rm); | ||
335 | |||
317 | struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, | 336 | struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, |
318 | unsigned long ua, unsigned long entries) | 337 | unsigned long ua, unsigned long entries) |
319 | { | 338 | { |
@@ -345,6 +364,26 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, | |||
345 | } | 364 | } |
346 | EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa); | 365 | EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa); |
347 | 366 | ||
367 | long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, | ||
368 | unsigned long ua, unsigned long *hpa) | ||
369 | { | ||
370 | const long entry = (ua - mem->ua) >> PAGE_SHIFT; | ||
371 | void *va = &mem->hpas[entry]; | ||
372 | unsigned long *pa; | ||
373 | |||
374 | if (entry >= mem->entries) | ||
375 | return -EFAULT; | ||
376 | |||
377 | pa = (void *) vmalloc_to_phys(va); | ||
378 | if (!pa) | ||
379 | return -EFAULT; | ||
380 | |||
381 | *hpa = *pa | (ua & ~PAGE_MASK); | ||
382 | |||
383 | return 0; | ||
384 | } | ||
385 | EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa_rm); | ||
386 | |||
348 | long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem) | 387 | long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem) |
349 | { | 388 | { |
350 | if (atomic64_inc_not_zero(&mem->mapped)) | 389 | if (atomic64_inc_not_zero(&mem->mapped)) |
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index e36738291c32..ee4cdb5b893f 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c | |||
@@ -1424,8 +1424,7 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe | |||
1424 | iommu_group_put(pe->table_group.group); | 1424 | iommu_group_put(pe->table_group.group); |
1425 | BUG_ON(pe->table_group.group); | 1425 | BUG_ON(pe->table_group.group); |
1426 | } | 1426 | } |
1427 | pnv_pci_ioda2_table_free_pages(tbl); | 1427 | iommu_tce_table_put(tbl); |
1428 | iommu_free_table(tbl, of_node_full_name(dev->dev.of_node)); | ||
1429 | } | 1428 | } |
1430 | 1429 | ||
1431 | static void pnv_ioda_release_vf_PE(struct pci_dev *pdev) | 1430 | static void pnv_ioda_release_vf_PE(struct pci_dev *pdev) |
@@ -1860,6 +1859,17 @@ static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index, | |||
1860 | 1859 | ||
1861 | return ret; | 1860 | return ret; |
1862 | } | 1861 | } |
1862 | |||
1863 | static int pnv_ioda1_tce_xchg_rm(struct iommu_table *tbl, long index, | ||
1864 | unsigned long *hpa, enum dma_data_direction *direction) | ||
1865 | { | ||
1866 | long ret = pnv_tce_xchg(tbl, index, hpa, direction); | ||
1867 | |||
1868 | if (!ret) | ||
1869 | pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, true); | ||
1870 | |||
1871 | return ret; | ||
1872 | } | ||
1863 | #endif | 1873 | #endif |
1864 | 1874 | ||
1865 | static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index, | 1875 | static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index, |
@@ -1874,6 +1884,7 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = { | |||
1874 | .set = pnv_ioda1_tce_build, | 1884 | .set = pnv_ioda1_tce_build, |
1875 | #ifdef CONFIG_IOMMU_API | 1885 | #ifdef CONFIG_IOMMU_API |
1876 | .exchange = pnv_ioda1_tce_xchg, | 1886 | .exchange = pnv_ioda1_tce_xchg, |
1887 | .exchange_rm = pnv_ioda1_tce_xchg_rm, | ||
1877 | #endif | 1888 | #endif |
1878 | .clear = pnv_ioda1_tce_free, | 1889 | .clear = pnv_ioda1_tce_free, |
1879 | .get = pnv_tce_get, | 1890 | .get = pnv_tce_get, |
@@ -1948,7 +1959,7 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, | |||
1948 | { | 1959 | { |
1949 | struct iommu_table_group_link *tgl; | 1960 | struct iommu_table_group_link *tgl; |
1950 | 1961 | ||
1951 | list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { | 1962 | list_for_each_entry_lockless(tgl, &tbl->it_group_list, next) { |
1952 | struct pnv_ioda_pe *pe = container_of(tgl->table_group, | 1963 | struct pnv_ioda_pe *pe = container_of(tgl->table_group, |
1953 | struct pnv_ioda_pe, table_group); | 1964 | struct pnv_ioda_pe, table_group); |
1954 | struct pnv_phb *phb = pe->phb; | 1965 | struct pnv_phb *phb = pe->phb; |
@@ -2004,6 +2015,17 @@ static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index, | |||
2004 | 2015 | ||
2005 | return ret; | 2016 | return ret; |
2006 | } | 2017 | } |
2018 | |||
2019 | static int pnv_ioda2_tce_xchg_rm(struct iommu_table *tbl, long index, | ||
2020 | unsigned long *hpa, enum dma_data_direction *direction) | ||
2021 | { | ||
2022 | long ret = pnv_tce_xchg(tbl, index, hpa, direction); | ||
2023 | |||
2024 | if (!ret) | ||
2025 | pnv_pci_ioda2_tce_invalidate(tbl, index, 1, true); | ||
2026 | |||
2027 | return ret; | ||
2028 | } | ||
2007 | #endif | 2029 | #endif |
2008 | 2030 | ||
2009 | static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index, | 2031 | static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index, |
@@ -2017,13 +2039,13 @@ static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index, | |||
2017 | static void pnv_ioda2_table_free(struct iommu_table *tbl) | 2039 | static void pnv_ioda2_table_free(struct iommu_table *tbl) |
2018 | { | 2040 | { |
2019 | pnv_pci_ioda2_table_free_pages(tbl); | 2041 | pnv_pci_ioda2_table_free_pages(tbl); |
2020 | iommu_free_table(tbl, "pnv"); | ||
2021 | } | 2042 | } |
2022 | 2043 | ||
2023 | static struct iommu_table_ops pnv_ioda2_iommu_ops = { | 2044 | static struct iommu_table_ops pnv_ioda2_iommu_ops = { |
2024 | .set = pnv_ioda2_tce_build, | 2045 | .set = pnv_ioda2_tce_build, |
2025 | #ifdef CONFIG_IOMMU_API | 2046 | #ifdef CONFIG_IOMMU_API |
2026 | .exchange = pnv_ioda2_tce_xchg, | 2047 | .exchange = pnv_ioda2_tce_xchg, |
2048 | .exchange_rm = pnv_ioda2_tce_xchg_rm, | ||
2027 | #endif | 2049 | #endif |
2028 | .clear = pnv_ioda2_tce_free, | 2050 | .clear = pnv_ioda2_tce_free, |
2029 | .get = pnv_tce_get, | 2051 | .get = pnv_tce_get, |
@@ -2203,7 +2225,7 @@ found: | |||
2203 | __free_pages(tce_mem, get_order(tce32_segsz * segs)); | 2225 | __free_pages(tce_mem, get_order(tce32_segsz * segs)); |
2204 | if (tbl) { | 2226 | if (tbl) { |
2205 | pnv_pci_unlink_table_and_group(tbl, &pe->table_group); | 2227 | pnv_pci_unlink_table_and_group(tbl, &pe->table_group); |
2206 | iommu_free_table(tbl, "pnv"); | 2228 | iommu_tce_table_put(tbl); |
2207 | } | 2229 | } |
2208 | } | 2230 | } |
2209 | 2231 | ||
@@ -2293,16 +2315,16 @@ static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group, | |||
2293 | if (!tbl) | 2315 | if (!tbl) |
2294 | return -ENOMEM; | 2316 | return -ENOMEM; |
2295 | 2317 | ||
2318 | tbl->it_ops = &pnv_ioda2_iommu_ops; | ||
2319 | |||
2296 | ret = pnv_pci_ioda2_table_alloc_pages(nid, | 2320 | ret = pnv_pci_ioda2_table_alloc_pages(nid, |
2297 | bus_offset, page_shift, window_size, | 2321 | bus_offset, page_shift, window_size, |
2298 | levels, tbl); | 2322 | levels, tbl); |
2299 | if (ret) { | 2323 | if (ret) { |
2300 | iommu_free_table(tbl, "pnv"); | 2324 | iommu_tce_table_put(tbl); |
2301 | return ret; | 2325 | return ret; |
2302 | } | 2326 | } |
2303 | 2327 | ||
2304 | tbl->it_ops = &pnv_ioda2_iommu_ops; | ||
2305 | |||
2306 | *ptbl = tbl; | 2328 | *ptbl = tbl; |
2307 | 2329 | ||
2308 | return 0; | 2330 | return 0; |
@@ -2343,7 +2365,7 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) | |||
2343 | if (rc) { | 2365 | if (rc) { |
2344 | pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n", | 2366 | pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n", |
2345 | rc); | 2367 | rc); |
2346 | pnv_ioda2_table_free(tbl); | 2368 | iommu_tce_table_put(tbl); |
2347 | return rc; | 2369 | return rc; |
2348 | } | 2370 | } |
2349 | 2371 | ||
@@ -2431,7 +2453,7 @@ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group) | |||
2431 | pnv_pci_ioda2_unset_window(&pe->table_group, 0); | 2453 | pnv_pci_ioda2_unset_window(&pe->table_group, 0); |
2432 | if (pe->pbus) | 2454 | if (pe->pbus) |
2433 | pnv_ioda_setup_bus_dma(pe, pe->pbus, false); | 2455 | pnv_ioda_setup_bus_dma(pe, pe->pbus, false); |
2434 | pnv_ioda2_table_free(tbl); | 2456 | iommu_tce_table_put(tbl); |
2435 | } | 2457 | } |
2436 | 2458 | ||
2437 | static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) | 2459 | static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) |
@@ -3406,7 +3428,7 @@ static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe) | |||
3406 | } | 3428 | } |
3407 | 3429 | ||
3408 | free_pages(tbl->it_base, get_order(tbl->it_size << 3)); | 3430 | free_pages(tbl->it_base, get_order(tbl->it_size << 3)); |
3409 | iommu_free_table(tbl, "pnv"); | 3431 | iommu_tce_table_put(tbl); |
3410 | } | 3432 | } |
3411 | 3433 | ||
3412 | static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe) | 3434 | static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe) |
@@ -3433,7 +3455,7 @@ static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe) | |||
3433 | } | 3455 | } |
3434 | 3456 | ||
3435 | pnv_pci_ioda2_table_free_pages(tbl); | 3457 | pnv_pci_ioda2_table_free_pages(tbl); |
3436 | iommu_free_table(tbl, "pnv"); | 3458 | iommu_tce_table_put(tbl); |
3437 | } | 3459 | } |
3438 | 3460 | ||
3439 | static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe, | 3461 | static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe, |
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index eb835e977e33..204a829ff506 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c | |||
@@ -767,6 +767,7 @@ struct iommu_table *pnv_pci_table_alloc(int nid) | |||
767 | 767 | ||
768 | tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, nid); | 768 | tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, nid); |
769 | INIT_LIST_HEAD_RCU(&tbl->it_group_list); | 769 | INIT_LIST_HEAD_RCU(&tbl->it_group_list); |
770 | kref_init(&tbl->it_kref); | ||
770 | 771 | ||
771 | return tbl; | 772 | return tbl; |
772 | } | 773 | } |
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 4d757eaa46bf..7ce5db209abf 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c | |||
@@ -74,6 +74,7 @@ static struct iommu_table_group *iommu_pseries_alloc_group(int node) | |||
74 | goto fail_exit; | 74 | goto fail_exit; |
75 | 75 | ||
76 | INIT_LIST_HEAD_RCU(&tbl->it_group_list); | 76 | INIT_LIST_HEAD_RCU(&tbl->it_group_list); |
77 | kref_init(&tbl->it_kref); | ||
77 | tgl->table_group = table_group; | 78 | tgl->table_group = table_group; |
78 | list_add_rcu(&tgl->next, &tbl->it_group_list); | 79 | list_add_rcu(&tgl->next, &tbl->it_group_list); |
79 | 80 | ||
@@ -115,7 +116,7 @@ static void iommu_pseries_free_group(struct iommu_table_group *table_group, | |||
115 | BUG_ON(table_group->group); | 116 | BUG_ON(table_group->group); |
116 | } | 117 | } |
117 | #endif | 118 | #endif |
118 | iommu_free_table(tbl, node_name); | 119 | iommu_tce_table_put(tbl); |
119 | 120 | ||
120 | kfree(table_group); | 121 | kfree(table_group); |
121 | } | 122 | } |
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 720493932486..28b09fd797ec 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c | |||
@@ -1318,7 +1318,7 @@ static void vio_dev_release(struct device *dev) | |||
1318 | struct iommu_table *tbl = get_iommu_table_base(dev); | 1318 | struct iommu_table *tbl = get_iommu_table_base(dev); |
1319 | 1319 | ||
1320 | if (tbl) | 1320 | if (tbl) |
1321 | iommu_free_table(tbl, of_node_full_name(dev->of_node)); | 1321 | iommu_tce_table_put(tbl); |
1322 | of_node_put(dev->of_node); | 1322 | of_node_put(dev->of_node); |
1323 | kfree(to_vio_dev(dev)); | 1323 | kfree(to_vio_dev(dev)); |
1324 | } | 1324 | } |
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 1d48880b3cc1..e8f623041769 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h | |||
@@ -105,6 +105,7 @@ | |||
105 | #define HWCAP_S390_VXRS 2048 | 105 | #define HWCAP_S390_VXRS 2048 |
106 | #define HWCAP_S390_VXRS_BCD 4096 | 106 | #define HWCAP_S390_VXRS_BCD 4096 |
107 | #define HWCAP_S390_VXRS_EXT 8192 | 107 | #define HWCAP_S390_VXRS_EXT 8192 |
108 | #define HWCAP_S390_GS 16384 | ||
108 | 109 | ||
109 | /* Internal bits, not exposed via elf */ | 110 | /* Internal bits, not exposed via elf */ |
110 | #define HWCAP_INT_SIE 1UL | 111 | #define HWCAP_INT_SIE 1UL |
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index a41faf34b034..426614a882a9 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <asm/cpu.h> | 25 | #include <asm/cpu.h> |
26 | #include <asm/fpu/api.h> | 26 | #include <asm/fpu/api.h> |
27 | #include <asm/isc.h> | 27 | #include <asm/isc.h> |
28 | #include <asm/guarded_storage.h> | ||
28 | 29 | ||
29 | #define KVM_S390_BSCA_CPU_SLOTS 64 | 30 | #define KVM_S390_BSCA_CPU_SLOTS 64 |
30 | #define KVM_S390_ESCA_CPU_SLOTS 248 | 31 | #define KVM_S390_ESCA_CPU_SLOTS 248 |
@@ -121,6 +122,7 @@ struct esca_block { | |||
121 | #define CPUSTAT_SLSR 0x00002000 | 122 | #define CPUSTAT_SLSR 0x00002000 |
122 | #define CPUSTAT_ZARCH 0x00000800 | 123 | #define CPUSTAT_ZARCH 0x00000800 |
123 | #define CPUSTAT_MCDS 0x00000100 | 124 | #define CPUSTAT_MCDS 0x00000100 |
125 | #define CPUSTAT_KSS 0x00000200 | ||
124 | #define CPUSTAT_SM 0x00000080 | 126 | #define CPUSTAT_SM 0x00000080 |
125 | #define CPUSTAT_IBS 0x00000040 | 127 | #define CPUSTAT_IBS 0x00000040 |
126 | #define CPUSTAT_GED2 0x00000010 | 128 | #define CPUSTAT_GED2 0x00000010 |
@@ -164,16 +166,27 @@ struct kvm_s390_sie_block { | |||
164 | #define ICTL_RRBE 0x00001000 | 166 | #define ICTL_RRBE 0x00001000 |
165 | #define ICTL_TPROT 0x00000200 | 167 | #define ICTL_TPROT 0x00000200 |
166 | __u32 ictl; /* 0x0048 */ | 168 | __u32 ictl; /* 0x0048 */ |
169 | #define ECA_CEI 0x80000000 | ||
170 | #define ECA_IB 0x40000000 | ||
171 | #define ECA_SIGPI 0x10000000 | ||
172 | #define ECA_MVPGI 0x01000000 | ||
173 | #define ECA_VX 0x00020000 | ||
174 | #define ECA_PROTEXCI 0x00002000 | ||
175 | #define ECA_SII 0x00000001 | ||
167 | __u32 eca; /* 0x004c */ | 176 | __u32 eca; /* 0x004c */ |
168 | #define ICPT_INST 0x04 | 177 | #define ICPT_INST 0x04 |
169 | #define ICPT_PROGI 0x08 | 178 | #define ICPT_PROGI 0x08 |
170 | #define ICPT_INSTPROGI 0x0C | 179 | #define ICPT_INSTPROGI 0x0C |
180 | #define ICPT_EXTREQ 0x10 | ||
171 | #define ICPT_EXTINT 0x14 | 181 | #define ICPT_EXTINT 0x14 |
182 | #define ICPT_IOREQ 0x18 | ||
183 | #define ICPT_WAIT 0x1c | ||
172 | #define ICPT_VALIDITY 0x20 | 184 | #define ICPT_VALIDITY 0x20 |
173 | #define ICPT_STOP 0x28 | 185 | #define ICPT_STOP 0x28 |
174 | #define ICPT_OPEREXC 0x2C | 186 | #define ICPT_OPEREXC 0x2C |
175 | #define ICPT_PARTEXEC 0x38 | 187 | #define ICPT_PARTEXEC 0x38 |
176 | #define ICPT_IOINST 0x40 | 188 | #define ICPT_IOINST 0x40 |
189 | #define ICPT_KSS 0x5c | ||
177 | __u8 icptcode; /* 0x0050 */ | 190 | __u8 icptcode; /* 0x0050 */ |
178 | __u8 icptstatus; /* 0x0051 */ | 191 | __u8 icptstatus; /* 0x0051 */ |
179 | __u16 ihcpu; /* 0x0052 */ | 192 | __u16 ihcpu; /* 0x0052 */ |
@@ -182,10 +195,19 @@ struct kvm_s390_sie_block { | |||
182 | __u32 ipb; /* 0x0058 */ | 195 | __u32 ipb; /* 0x0058 */ |
183 | __u32 scaoh; /* 0x005c */ | 196 | __u32 scaoh; /* 0x005c */ |
184 | __u8 reserved60; /* 0x0060 */ | 197 | __u8 reserved60; /* 0x0060 */ |
198 | #define ECB_GS 0x40 | ||
199 | #define ECB_TE 0x10 | ||
200 | #define ECB_SRSI 0x04 | ||
201 | #define ECB_HOSTPROTINT 0x02 | ||
185 | __u8 ecb; /* 0x0061 */ | 202 | __u8 ecb; /* 0x0061 */ |
203 | #define ECB2_CMMA 0x80 | ||
204 | #define ECB2_IEP 0x20 | ||
205 | #define ECB2_PFMFI 0x08 | ||
206 | #define ECB2_ESCA 0x04 | ||
186 | __u8 ecb2; /* 0x0062 */ | 207 | __u8 ecb2; /* 0x0062 */ |
187 | #define ECB3_AES 0x04 | ||
188 | #define ECB3_DEA 0x08 | 208 | #define ECB3_DEA 0x08 |
209 | #define ECB3_AES 0x04 | ||
210 | #define ECB3_RI 0x01 | ||
189 | __u8 ecb3; /* 0x0063 */ | 211 | __u8 ecb3; /* 0x0063 */ |
190 | __u32 scaol; /* 0x0064 */ | 212 | __u32 scaol; /* 0x0064 */ |
191 | __u8 reserved68[4]; /* 0x0068 */ | 213 | __u8 reserved68[4]; /* 0x0068 */ |
@@ -219,11 +241,14 @@ struct kvm_s390_sie_block { | |||
219 | __u32 crycbd; /* 0x00fc */ | 241 | __u32 crycbd; /* 0x00fc */ |
220 | __u64 gcr[16]; /* 0x0100 */ | 242 | __u64 gcr[16]; /* 0x0100 */ |
221 | __u64 gbea; /* 0x0180 */ | 243 | __u64 gbea; /* 0x0180 */ |
222 | __u8 reserved188[24]; /* 0x0188 */ | 244 | __u8 reserved188[8]; /* 0x0188 */ |
245 | __u64 sdnxo; /* 0x0190 */ | ||
246 | __u8 reserved198[8]; /* 0x0198 */ | ||
223 | __u32 fac; /* 0x01a0 */ | 247 | __u32 fac; /* 0x01a0 */ |
224 | __u8 reserved1a4[20]; /* 0x01a4 */ | 248 | __u8 reserved1a4[20]; /* 0x01a4 */ |
225 | __u64 cbrlo; /* 0x01b8 */ | 249 | __u64 cbrlo; /* 0x01b8 */ |
226 | __u8 reserved1c0[8]; /* 0x01c0 */ | 250 | __u8 reserved1c0[8]; /* 0x01c0 */ |
251 | #define ECD_HOSTREGMGMT 0x20000000 | ||
227 | __u32 ecd; /* 0x01c8 */ | 252 | __u32 ecd; /* 0x01c8 */ |
228 | __u8 reserved1cc[18]; /* 0x01cc */ | 253 | __u8 reserved1cc[18]; /* 0x01cc */ |
229 | __u64 pp; /* 0x01de */ | 254 | __u64 pp; /* 0x01de */ |
@@ -498,6 +523,12 @@ struct kvm_s390_local_interrupt { | |||
498 | #define FIRQ_CNTR_PFAULT 3 | 523 | #define FIRQ_CNTR_PFAULT 3 |
499 | #define FIRQ_MAX_COUNT 4 | 524 | #define FIRQ_MAX_COUNT 4 |
500 | 525 | ||
526 | /* mask the AIS mode for a given ISC */ | ||
527 | #define AIS_MODE_MASK(isc) (0x80 >> isc) | ||
528 | |||
529 | #define KVM_S390_AIS_MODE_ALL 0 | ||
530 | #define KVM_S390_AIS_MODE_SINGLE 1 | ||
531 | |||
501 | struct kvm_s390_float_interrupt { | 532 | struct kvm_s390_float_interrupt { |
502 | unsigned long pending_irqs; | 533 | unsigned long pending_irqs; |
503 | spinlock_t lock; | 534 | spinlock_t lock; |
@@ -507,6 +538,10 @@ struct kvm_s390_float_interrupt { | |||
507 | struct kvm_s390_ext_info srv_signal; | 538 | struct kvm_s390_ext_info srv_signal; |
508 | int next_rr_cpu; | 539 | int next_rr_cpu; |
509 | unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)]; | 540 | unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)]; |
541 | struct mutex ais_lock; | ||
542 | u8 simm; | ||
543 | u8 nimm; | ||
544 | int ais_enabled; | ||
510 | }; | 545 | }; |
511 | 546 | ||
512 | struct kvm_hw_wp_info_arch { | 547 | struct kvm_hw_wp_info_arch { |
@@ -554,6 +589,7 @@ struct kvm_vcpu_arch { | |||
554 | /* if vsie is active, currently executed shadow sie control block */ | 589 | /* if vsie is active, currently executed shadow sie control block */ |
555 | struct kvm_s390_sie_block *vsie_block; | 590 | struct kvm_s390_sie_block *vsie_block; |
556 | unsigned int host_acrs[NUM_ACRS]; | 591 | unsigned int host_acrs[NUM_ACRS]; |
592 | struct gs_cb *host_gscb; | ||
557 | struct fpu host_fpregs; | 593 | struct fpu host_fpregs; |
558 | struct kvm_s390_local_interrupt local_int; | 594 | struct kvm_s390_local_interrupt local_int; |
559 | struct hrtimer ckc_timer; | 595 | struct hrtimer ckc_timer; |
@@ -574,6 +610,7 @@ struct kvm_vcpu_arch { | |||
574 | */ | 610 | */ |
575 | seqcount_t cputm_seqcount; | 611 | seqcount_t cputm_seqcount; |
576 | __u64 cputm_start; | 612 | __u64 cputm_start; |
613 | bool gs_enabled; | ||
577 | }; | 614 | }; |
578 | 615 | ||
579 | struct kvm_vm_stat { | 616 | struct kvm_vm_stat { |
@@ -596,6 +633,7 @@ struct s390_io_adapter { | |||
596 | bool maskable; | 633 | bool maskable; |
597 | bool masked; | 634 | bool masked; |
598 | bool swap; | 635 | bool swap; |
636 | bool suppressible; | ||
599 | struct rw_semaphore maps_lock; | 637 | struct rw_semaphore maps_lock; |
600 | struct list_head maps; | 638 | struct list_head maps; |
601 | atomic_t nr_maps; | 639 | atomic_t nr_maps; |
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 61261e0e95c0..8a5b082797f8 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h | |||
@@ -157,8 +157,8 @@ struct lowcore { | |||
157 | __u64 stfle_fac_list[32]; /* 0x0f00 */ | 157 | __u64 stfle_fac_list[32]; /* 0x0f00 */ |
158 | __u8 pad_0x1000[0x11b0-0x1000]; /* 0x1000 */ | 158 | __u8 pad_0x1000[0x11b0-0x1000]; /* 0x1000 */ |
159 | 159 | ||
160 | /* Pointer to vector register save area */ | 160 | /* Pointer to the machine check extended save area */ |
161 | __u64 vector_save_area_addr; /* 0x11b0 */ | 161 | __u64 mcesad; /* 0x11b0 */ |
162 | 162 | ||
163 | /* 64 bit extparam used for pfault/diag 250: defined by architecture */ | 163 | /* 64 bit extparam used for pfault/diag 250: defined by architecture */ |
164 | __u64 ext_params2; /* 0x11B8 */ | 164 | __u64 ext_params2; /* 0x11B8 */ |
@@ -182,10 +182,7 @@ struct lowcore { | |||
182 | 182 | ||
183 | /* Transaction abort diagnostic block */ | 183 | /* Transaction abort diagnostic block */ |
184 | __u8 pgm_tdb[256]; /* 0x1800 */ | 184 | __u8 pgm_tdb[256]; /* 0x1800 */ |
185 | __u8 pad_0x1900[0x1c00-0x1900]; /* 0x1900 */ | 185 | __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */ |
186 | |||
187 | /* Software defined save area for vector registers */ | ||
188 | __u8 vector_save_area[1024]; /* 0x1c00 */ | ||
189 | } __packed; | 186 | } __packed; |
190 | 187 | ||
191 | #define S390_lowcore (*((struct lowcore *) 0)) | 188 | #define S390_lowcore (*((struct lowcore *) 0)) |
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index b75fd910386a..e3e8895f5d3e 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h | |||
@@ -58,7 +58,9 @@ union mci { | |||
58 | u64 ie : 1; /* 32 indirect storage error */ | 58 | u64 ie : 1; /* 32 indirect storage error */ |
59 | u64 ar : 1; /* 33 access register validity */ | 59 | u64 ar : 1; /* 33 access register validity */ |
60 | u64 da : 1; /* 34 delayed access exception */ | 60 | u64 da : 1; /* 34 delayed access exception */ |
61 | u64 : 7; /* 35-41 */ | 61 | u64 : 1; /* 35 */ |
62 | u64 gs : 1; /* 36 guarded storage registers */ | ||
63 | u64 : 5; /* 37-41 */ | ||
62 | u64 pr : 1; /* 42 tod programmable register validity */ | 64 | u64 pr : 1; /* 42 tod programmable register validity */ |
63 | u64 fc : 1; /* 43 fp control register validity */ | 65 | u64 fc : 1; /* 43 fp control register validity */ |
64 | u64 ap : 1; /* 44 ancillary report */ | 66 | u64 ap : 1; /* 44 ancillary report */ |
@@ -69,6 +71,14 @@ union mci { | |||
69 | }; | 71 | }; |
70 | }; | 72 | }; |
71 | 73 | ||
74 | #define MCESA_ORIGIN_MASK (~0x3ffUL) | ||
75 | #define MCESA_LC_MASK (0xfUL) | ||
76 | |||
77 | struct mcesa { | ||
78 | u8 vector_save_area[1024]; | ||
79 | u8 guarded_storage_save_area[32]; | ||
80 | }; | ||
81 | |||
72 | struct pt_regs; | 82 | struct pt_regs; |
73 | 83 | ||
74 | extern void s390_handle_mcck(void); | 84 | extern void s390_handle_mcck(void); |
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index e4988710aa86..cc101f9371cb 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h | |||
@@ -135,6 +135,8 @@ struct thread_struct { | |||
135 | struct list_head list; | 135 | struct list_head list; |
136 | /* cpu runtime instrumentation */ | 136 | /* cpu runtime instrumentation */ |
137 | struct runtime_instr_cb *ri_cb; | 137 | struct runtime_instr_cb *ri_cb; |
138 | struct gs_cb *gs_cb; /* Current guarded storage cb */ | ||
139 | struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */ | ||
138 | unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ | 140 | unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ |
139 | /* | 141 | /* |
140 | * Warning: 'fpu' is dynamically-sized. It *MUST* be at | 142 | * Warning: 'fpu' is dynamically-sized. It *MUST* be at |
@@ -215,6 +217,9 @@ void show_cacheinfo(struct seq_file *m); | |||
215 | /* Free all resources held by a thread. */ | 217 | /* Free all resources held by a thread. */ |
216 | extern void release_thread(struct task_struct *); | 218 | extern void release_thread(struct task_struct *); |
217 | 219 | ||
220 | /* Free guarded storage control block for current */ | ||
221 | void exit_thread_gs(void); | ||
222 | |||
218 | /* | 223 | /* |
219 | * Return saved PC of a blocked thread. | 224 | * Return saved PC of a blocked thread. |
220 | */ | 225 | */ |
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index ace3bd315438..6f5167bc1928 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h | |||
@@ -75,6 +75,7 @@ struct sclp_info { | |||
75 | unsigned char has_pfmfi : 1; | 75 | unsigned char has_pfmfi : 1; |
76 | unsigned char has_ibs : 1; | 76 | unsigned char has_ibs : 1; |
77 | unsigned char has_skey : 1; | 77 | unsigned char has_skey : 1; |
78 | unsigned char has_kss : 1; | ||
78 | unsigned int ibc; | 79 | unsigned int ibc; |
79 | unsigned int mtid; | 80 | unsigned int mtid; |
80 | unsigned int mtid_cp; | 81 | unsigned int mtid_cp; |
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 30bdb5a027f3..383bd8358a8c 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h | |||
@@ -31,6 +31,7 @@ | |||
31 | #define MACHINE_FLAG_VX _BITUL(13) | 31 | #define MACHINE_FLAG_VX _BITUL(13) |
32 | #define MACHINE_FLAG_CAD _BITUL(14) | 32 | #define MACHINE_FLAG_CAD _BITUL(14) |
33 | #define MACHINE_FLAG_NX _BITUL(15) | 33 | #define MACHINE_FLAG_NX _BITUL(15) |
34 | #define MACHINE_FLAG_GS _BITUL(16) | ||
34 | 35 | ||
35 | #define LPP_MAGIC _BITUL(31) | 36 | #define LPP_MAGIC _BITUL(31) |
36 | #define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL) | 37 | #define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL) |
@@ -70,6 +71,7 @@ extern void detect_memory_memblock(void); | |||
70 | #define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX) | 71 | #define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX) |
71 | #define MACHINE_HAS_CAD (S390_lowcore.machine_flags & MACHINE_FLAG_CAD) | 72 | #define MACHINE_HAS_CAD (S390_lowcore.machine_flags & MACHINE_FLAG_CAD) |
72 | #define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX) | 73 | #define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX) |
74 | #define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS) | ||
73 | 75 | ||
74 | /* | 76 | /* |
75 | * Console mode. Override with conmode= | 77 | * Console mode. Override with conmode= |
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index 12d45f0cfdd9..f6c2b5814ab0 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/thread_info.h> | 10 | #include <linux/thread_info.h> |
11 | #include <asm/fpu/api.h> | 11 | #include <asm/fpu/api.h> |
12 | #include <asm/ptrace.h> | 12 | #include <asm/ptrace.h> |
13 | #include <asm/guarded_storage.h> | ||
13 | 14 | ||
14 | extern struct task_struct *__switch_to(void *, void *); | 15 | extern struct task_struct *__switch_to(void *, void *); |
15 | extern void update_cr_regs(struct task_struct *task); | 16 | extern void update_cr_regs(struct task_struct *task); |
@@ -33,12 +34,14 @@ static inline void restore_access_regs(unsigned int *acrs) | |||
33 | save_fpu_regs(); \ | 34 | save_fpu_regs(); \ |
34 | save_access_regs(&prev->thread.acrs[0]); \ | 35 | save_access_regs(&prev->thread.acrs[0]); \ |
35 | save_ri_cb(prev->thread.ri_cb); \ | 36 | save_ri_cb(prev->thread.ri_cb); \ |
37 | save_gs_cb(prev->thread.gs_cb); \ | ||
36 | } \ | 38 | } \ |
37 | if (next->mm) { \ | 39 | if (next->mm) { \ |
38 | update_cr_regs(next); \ | 40 | update_cr_regs(next); \ |
39 | set_cpu_flag(CIF_FPU); \ | 41 | set_cpu_flag(CIF_FPU); \ |
40 | restore_access_regs(&next->thread.acrs[0]); \ | 42 | restore_access_regs(&next->thread.acrs[0]); \ |
41 | restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ | 43 | restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ |
44 | restore_gs_cb(next->thread.gs_cb); \ | ||
42 | } \ | 45 | } \ |
43 | prev = __switch_to(prev,next); \ | 46 | prev = __switch_to(prev,next); \ |
44 | } while (0) | 47 | } while (0) |
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index a5b54a445eb8..f36e6e2b73f0 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h | |||
@@ -54,11 +54,12 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); | |||
54 | #define TIF_NOTIFY_RESUME 0 /* callback before returning to user */ | 54 | #define TIF_NOTIFY_RESUME 0 /* callback before returning to user */ |
55 | #define TIF_SIGPENDING 1 /* signal pending */ | 55 | #define TIF_SIGPENDING 1 /* signal pending */ |
56 | #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ | 56 | #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ |
57 | #define TIF_SYSCALL_TRACE 3 /* syscall trace active */ | 57 | #define TIF_UPROBE 3 /* breakpointed or single-stepping */ |
58 | #define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */ | 58 | #define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */ |
59 | #define TIF_SECCOMP 5 /* secure computing */ | 59 | #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ |
60 | #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ | 60 | #define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */ |
61 | #define TIF_UPROBE 7 /* breakpointed or single-stepping */ | 61 | #define TIF_SECCOMP 10 /* secure computing */ |
62 | #define TIF_SYSCALL_TRACEPOINT 11 /* syscall tracepoint instrumentation */ | ||
62 | #define TIF_31BIT 16 /* 32bit process */ | 63 | #define TIF_31BIT 16 /* 32bit process */ |
63 | #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ | 64 | #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ |
64 | #define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */ | 65 | #define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */ |
@@ -76,5 +77,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); | |||
76 | #define _TIF_UPROBE _BITUL(TIF_UPROBE) | 77 | #define _TIF_UPROBE _BITUL(TIF_UPROBE) |
77 | #define _TIF_31BIT _BITUL(TIF_31BIT) | 78 | #define _TIF_31BIT _BITUL(TIF_31BIT) |
78 | #define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP) | 79 | #define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP) |
80 | #define _TIF_GUARDED_STORAGE _BITUL(TIF_GUARDED_STORAGE) | ||
79 | 81 | ||
80 | #endif /* _ASM_THREAD_INFO_H */ | 82 | #endif /* _ASM_THREAD_INFO_H */ |
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild index 6848ba5c1454..86b761e583e3 100644 --- a/arch/s390/include/uapi/asm/Kbuild +++ b/arch/s390/include/uapi/asm/Kbuild | |||
@@ -12,6 +12,7 @@ header-y += dasd.h | |||
12 | header-y += debug.h | 12 | header-y += debug.h |
13 | header-y += errno.h | 13 | header-y += errno.h |
14 | header-y += fcntl.h | 14 | header-y += fcntl.h |
15 | header-y += guarded_storage.h | ||
15 | header-y += hypfs.h | 16 | header-y += hypfs.h |
16 | header-y += ioctl.h | 17 | header-y += ioctl.h |
17 | header-y += ioctls.h | 18 | header-y += ioctls.h |
diff --git a/arch/s390/include/uapi/asm/guarded_storage.h b/arch/s390/include/uapi/asm/guarded_storage.h new file mode 100644 index 000000000000..852850e8e17e --- /dev/null +++ b/arch/s390/include/uapi/asm/guarded_storage.h | |||
@@ -0,0 +1,77 @@ | |||
1 | #ifndef _GUARDED_STORAGE_H | ||
2 | #define _GUARDED_STORAGE_H | ||
3 | |||
4 | #include <linux/types.h> | ||
5 | |||
6 | struct gs_cb { | ||
7 | __u64 reserved; | ||
8 | __u64 gsd; | ||
9 | __u64 gssm; | ||
10 | __u64 gs_epl_a; | ||
11 | }; | ||
12 | |||
13 | struct gs_epl { | ||
14 | __u8 pad1; | ||
15 | union { | ||
16 | __u8 gs_eam; | ||
17 | struct { | ||
18 | __u8 : 6; | ||
19 | __u8 e : 1; | ||
20 | __u8 b : 1; | ||
21 | }; | ||
22 | }; | ||
23 | union { | ||
24 | __u8 gs_eci; | ||
25 | struct { | ||
26 | __u8 tx : 1; | ||
27 | __u8 cx : 1; | ||
28 | __u8 : 5; | ||
29 | __u8 in : 1; | ||
30 | }; | ||
31 | }; | ||
32 | union { | ||
33 | __u8 gs_eai; | ||
34 | struct { | ||
35 | __u8 : 1; | ||
36 | __u8 t : 1; | ||
37 | __u8 as : 2; | ||
38 | __u8 ar : 4; | ||
39 | }; | ||
40 | }; | ||
41 | __u32 pad2; | ||
42 | __u64 gs_eha; | ||
43 | __u64 gs_eia; | ||
44 | __u64 gs_eoa; | ||
45 | __u64 gs_eir; | ||
46 | __u64 gs_era; | ||
47 | }; | ||
48 | |||
49 | #define GS_ENABLE 0 | ||
50 | #define GS_DISABLE 1 | ||
51 | #define GS_SET_BC_CB 2 | ||
52 | #define GS_CLEAR_BC_CB 3 | ||
53 | #define GS_BROADCAST 4 | ||
54 | |||
55 | static inline void load_gs_cb(struct gs_cb *gs_cb) | ||
56 | { | ||
57 | asm volatile(".insn rxy,0xe3000000004d,0,%0" : : "Q" (*gs_cb)); | ||
58 | } | ||
59 | |||
60 | static inline void store_gs_cb(struct gs_cb *gs_cb) | ||
61 | { | ||
62 | asm volatile(".insn rxy,0xe30000000049,0,%0" : : "Q" (*gs_cb)); | ||
63 | } | ||
64 | |||
65 | static inline void save_gs_cb(struct gs_cb *gs_cb) | ||
66 | { | ||
67 | if (gs_cb) | ||
68 | store_gs_cb(gs_cb); | ||
69 | } | ||
70 | |||
71 | static inline void restore_gs_cb(struct gs_cb *gs_cb) | ||
72 | { | ||
73 | if (gs_cb) | ||
74 | load_gs_cb(gs_cb); | ||
75 | } | ||
76 | |||
77 | #endif /* _GUARDED_STORAGE_H */ | ||
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index a2ffec4139ad..bf9267930939 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h | |||
@@ -26,6 +26,8 @@ | |||
26 | #define KVM_DEV_FLIC_ADAPTER_REGISTER 6 | 26 | #define KVM_DEV_FLIC_ADAPTER_REGISTER 6 |
27 | #define KVM_DEV_FLIC_ADAPTER_MODIFY 7 | 27 | #define KVM_DEV_FLIC_ADAPTER_MODIFY 7 |
28 | #define KVM_DEV_FLIC_CLEAR_IO_IRQ 8 | 28 | #define KVM_DEV_FLIC_CLEAR_IO_IRQ 8 |
29 | #define KVM_DEV_FLIC_AISM 9 | ||
30 | #define KVM_DEV_FLIC_AIRQ_INJECT 10 | ||
29 | /* | 31 | /* |
30 | * We can have up to 4*64k pending subchannels + 8 adapter interrupts, | 32 | * We can have up to 4*64k pending subchannels + 8 adapter interrupts, |
31 | * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts. | 33 | * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts. |
@@ -41,7 +43,14 @@ struct kvm_s390_io_adapter { | |||
41 | __u8 isc; | 43 | __u8 isc; |
42 | __u8 maskable; | 44 | __u8 maskable; |
43 | __u8 swap; | 45 | __u8 swap; |
44 | __u8 pad; | 46 | __u8 flags; |
47 | }; | ||
48 | |||
49 | #define KVM_S390_ADAPTER_SUPPRESSIBLE 0x01 | ||
50 | |||
51 | struct kvm_s390_ais_req { | ||
52 | __u8 isc; | ||
53 | __u16 mode; | ||
45 | }; | 54 | }; |
46 | 55 | ||
47 | #define KVM_S390_IO_ADAPTER_MASK 1 | 56 | #define KVM_S390_IO_ADAPTER_MASK 1 |
@@ -110,6 +119,7 @@ struct kvm_s390_vm_cpu_machine { | |||
110 | #define KVM_S390_VM_CPU_FEAT_CMMA 10 | 119 | #define KVM_S390_VM_CPU_FEAT_CMMA 10 |
111 | #define KVM_S390_VM_CPU_FEAT_PFMFI 11 | 120 | #define KVM_S390_VM_CPU_FEAT_PFMFI 11 |
112 | #define KVM_S390_VM_CPU_FEAT_SIGPIF 12 | 121 | #define KVM_S390_VM_CPU_FEAT_SIGPIF 12 |
122 | #define KVM_S390_VM_CPU_FEAT_KSS 13 | ||
113 | struct kvm_s390_vm_cpu_feat { | 123 | struct kvm_s390_vm_cpu_feat { |
114 | __u64 feat[16]; | 124 | __u64 feat[16]; |
115 | }; | 125 | }; |
@@ -197,6 +207,10 @@ struct kvm_guest_debug_arch { | |||
197 | #define KVM_SYNC_VRS (1UL << 6) | 207 | #define KVM_SYNC_VRS (1UL << 6) |
198 | #define KVM_SYNC_RICCB (1UL << 7) | 208 | #define KVM_SYNC_RICCB (1UL << 7) |
199 | #define KVM_SYNC_FPRS (1UL << 8) | 209 | #define KVM_SYNC_FPRS (1UL << 8) |
210 | #define KVM_SYNC_GSCB (1UL << 9) | ||
211 | /* length and alignment of the sdnx as a power of two */ | ||
212 | #define SDNXC 8 | ||
213 | #define SDNXL (1UL << SDNXC) | ||
200 | /* definition of registers in kvm_run */ | 214 | /* definition of registers in kvm_run */ |
201 | struct kvm_sync_regs { | 215 | struct kvm_sync_regs { |
202 | __u64 prefix; /* prefix register */ | 216 | __u64 prefix; /* prefix register */ |
@@ -217,8 +231,16 @@ struct kvm_sync_regs { | |||
217 | }; | 231 | }; |
218 | __u8 reserved[512]; /* for future vector expansion */ | 232 | __u8 reserved[512]; /* for future vector expansion */ |
219 | __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */ | 233 | __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */ |
220 | __u8 padding[52]; /* riccb needs to be 64byte aligned */ | 234 | __u8 padding1[52]; /* riccb needs to be 64byte aligned */ |
221 | __u8 riccb[64]; /* runtime instrumentation controls block */ | 235 | __u8 riccb[64]; /* runtime instrumentation controls block */ |
236 | __u8 padding2[192]; /* sdnx needs to be 256byte aligned */ | ||
237 | union { | ||
238 | __u8 sdnx[SDNXL]; /* state description annex */ | ||
239 | struct { | ||
240 | __u64 reserved1[2]; | ||
241 | __u64 gscb[4]; | ||
242 | }; | ||
243 | }; | ||
222 | }; | 244 | }; |
223 | 245 | ||
224 | #define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1) | 246 | #define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1) |
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index 152de9b796e1..ea42290e7d51 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h | |||
@@ -313,7 +313,7 @@ | |||
313 | #define __NR_copy_file_range 375 | 313 | #define __NR_copy_file_range 375 |
314 | #define __NR_preadv2 376 | 314 | #define __NR_preadv2 376 |
315 | #define __NR_pwritev2 377 | 315 | #define __NR_pwritev2 377 |
316 | /* Number 378 is reserved for guarded storage */ | 316 | #define __NR_s390_guarded_storage 378 |
317 | #define __NR_statx 379 | 317 | #define __NR_statx 379 |
318 | #define NR_syscalls 380 | 318 | #define NR_syscalls 380 |
319 | 319 | ||
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 060ce548fe8b..aa5adbdaf200 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile | |||
@@ -57,7 +57,7 @@ obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o | |||
57 | obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o | 57 | obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o |
58 | obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o | 58 | obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o |
59 | obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o | 59 | obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o |
60 | obj-y += runtime_instr.o cache.o fpu.o dumpstack.o | 60 | obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o |
61 | obj-y += entry.o reipl.o relocate_kernel.o | 61 | obj-y += entry.o reipl.o relocate_kernel.o |
62 | 62 | ||
63 | extra-y += head.o head64.o vmlinux.lds | 63 | extra-y += head.o head64.o vmlinux.lds |
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index c4b3570ded5b..6bb29633e1f1 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c | |||
@@ -175,7 +175,7 @@ int main(void) | |||
175 | /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ | 175 | /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ |
176 | OFFSET(__LC_DUMP_REIPL, lowcore, ipib); | 176 | OFFSET(__LC_DUMP_REIPL, lowcore, ipib); |
177 | /* hardware defined lowcore locations 0x1000 - 0x18ff */ | 177 | /* hardware defined lowcore locations 0x1000 - 0x18ff */ |
178 | OFFSET(__LC_VX_SAVE_AREA_ADDR, lowcore, vector_save_area_addr); | 178 | OFFSET(__LC_MCESAD, lowcore, mcesad); |
179 | OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2); | 179 | OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2); |
180 | OFFSET(__LC_FPREGS_SAVE_AREA, lowcore, floating_pt_save_area); | 180 | OFFSET(__LC_FPREGS_SAVE_AREA, lowcore, floating_pt_save_area); |
181 | OFFSET(__LC_GPREGS_SAVE_AREA, lowcore, gpregs_save_area); | 181 | OFFSET(__LC_GPREGS_SAVE_AREA, lowcore, gpregs_save_area); |
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index e89cc2e71db1..986642a3543b 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c | |||
@@ -178,4 +178,5 @@ COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, | |||
178 | COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len); | 178 | COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len); |
179 | COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags); | 179 | COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags); |
180 | COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags); | 180 | COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags); |
181 | COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb); | ||
181 | COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer); | 182 | COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer); |
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 4e65c79cc5f2..95298a41076f 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c | |||
@@ -358,6 +358,8 @@ static __init void detect_machine_facilities(void) | |||
358 | S390_lowcore.machine_flags |= MACHINE_FLAG_NX; | 358 | S390_lowcore.machine_flags |= MACHINE_FLAG_NX; |
359 | __ctl_set_bit(0, 20); | 359 | __ctl_set_bit(0, 20); |
360 | } | 360 | } |
361 | if (test_facility(133)) | ||
362 | S390_lowcore.machine_flags |= MACHINE_FLAG_GS; | ||
361 | } | 363 | } |
362 | 364 | ||
363 | static inline void save_vector_registers(void) | 365 | static inline void save_vector_registers(void) |
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 6a7d737d514c..fa8b8f28e08b 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S | |||
@@ -47,7 +47,7 @@ STACK_SIZE = 1 << STACK_SHIFT | |||
47 | STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE | 47 | STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE |
48 | 48 | ||
49 | _TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ | 49 | _TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ |
50 | _TIF_UPROBE) | 50 | _TIF_UPROBE | _TIF_GUARDED_STORAGE) |
51 | _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ | 51 | _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ |
52 | _TIF_SYSCALL_TRACEPOINT) | 52 | _TIF_SYSCALL_TRACEPOINT) |
53 | _CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \ | 53 | _CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \ |
@@ -332,6 +332,8 @@ ENTRY(system_call) | |||
332 | TSTMSK __TI_flags(%r12),_TIF_UPROBE | 332 | TSTMSK __TI_flags(%r12),_TIF_UPROBE |
333 | jo .Lsysc_uprobe_notify | 333 | jo .Lsysc_uprobe_notify |
334 | #endif | 334 | #endif |
335 | TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE | ||
336 | jo .Lsysc_guarded_storage | ||
335 | TSTMSK __PT_FLAGS(%r11),_PIF_PER_TRAP | 337 | TSTMSK __PT_FLAGS(%r11),_PIF_PER_TRAP |
336 | jo .Lsysc_singlestep | 338 | jo .Lsysc_singlestep |
337 | TSTMSK __TI_flags(%r12),_TIF_SIGPENDING | 339 | TSTMSK __TI_flags(%r12),_TIF_SIGPENDING |
@@ -409,6 +411,14 @@ ENTRY(system_call) | |||
409 | #endif | 411 | #endif |
410 | 412 | ||
411 | # | 413 | # |
414 | # _TIF_GUARDED_STORAGE is set, call guarded_storage_load | ||
415 | # | ||
416 | .Lsysc_guarded_storage: | ||
417 | lgr %r2,%r11 # pass pointer to pt_regs | ||
418 | larl %r14,.Lsysc_return | ||
419 | jg gs_load_bc_cb | ||
420 | |||
421 | # | ||
412 | # _PIF_PER_TRAP is set, call do_per_trap | 422 | # _PIF_PER_TRAP is set, call do_per_trap |
413 | # | 423 | # |
414 | .Lsysc_singlestep: | 424 | .Lsysc_singlestep: |
@@ -663,6 +673,8 @@ ENTRY(io_int_handler) | |||
663 | jo .Lio_sigpending | 673 | jo .Lio_sigpending |
664 | TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME | 674 | TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME |
665 | jo .Lio_notify_resume | 675 | jo .Lio_notify_resume |
676 | TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE | ||
677 | jo .Lio_guarded_storage | ||
666 | TSTMSK __LC_CPU_FLAGS,_CIF_FPU | 678 | TSTMSK __LC_CPU_FLAGS,_CIF_FPU |
667 | jo .Lio_vxrs | 679 | jo .Lio_vxrs |
668 | TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY) | 680 | TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY) |
@@ -697,6 +709,18 @@ ENTRY(io_int_handler) | |||
697 | jg load_fpu_regs | 709 | jg load_fpu_regs |
698 | 710 | ||
699 | # | 711 | # |
712 | # _TIF_GUARDED_STORAGE is set, call guarded_storage_load | ||
713 | # | ||
714 | .Lio_guarded_storage: | ||
715 | # TRACE_IRQS_ON already done at .Lio_return | ||
716 | ssm __LC_SVC_NEW_PSW # reenable interrupts | ||
717 | lgr %r2,%r11 # pass pointer to pt_regs | ||
718 | brasl %r14,gs_load_bc_cb | ||
719 | ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts | ||
720 | TRACE_IRQS_OFF | ||
721 | j .Lio_return | ||
722 | |||
723 | # | ||
700 | # _TIF_NEED_RESCHED is set, call schedule | 724 | # _TIF_NEED_RESCHED is set, call schedule |
701 | # | 725 | # |
702 | .Lio_reschedule: | 726 | .Lio_reschedule: |
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 33f901865326..dbf5f7e18246 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h | |||
@@ -74,12 +74,14 @@ long sys_sigreturn(void); | |||
74 | 74 | ||
75 | long sys_s390_personality(unsigned int personality); | 75 | long sys_s390_personality(unsigned int personality); |
76 | long sys_s390_runtime_instr(int command, int signum); | 76 | long sys_s390_runtime_instr(int command, int signum); |
77 | long sys_s390_guarded_storage(int command, struct gs_cb __user *); | ||
77 | long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t); | 78 | long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t); |
78 | long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t); | 79 | long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t); |
79 | 80 | ||
80 | DECLARE_PER_CPU(u64, mt_cycles[8]); | 81 | DECLARE_PER_CPU(u64, mt_cycles[8]); |
81 | 82 | ||
82 | void verify_facilities(void); | 83 | void verify_facilities(void); |
84 | void gs_load_bc_cb(struct pt_regs *regs); | ||
83 | void set_fs_fixup(void); | 85 | void set_fs_fixup(void); |
84 | 86 | ||
85 | #endif /* _ENTRY_H */ | 87 | #endif /* _ENTRY_H */ |
diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c new file mode 100644 index 000000000000..6f064745c3b1 --- /dev/null +++ b/arch/s390/kernel/guarded_storage.c | |||
@@ -0,0 +1,128 @@ | |||
1 | /* | ||
2 | * Copyright IBM Corp. 2016 | ||
3 | * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> | ||
4 | */ | ||
5 | |||
6 | #include <linux/kernel.h> | ||
7 | #include <linux/syscalls.h> | ||
8 | #include <linux/signal.h> | ||
9 | #include <linux/mm.h> | ||
10 | #include <linux/slab.h> | ||
11 | #include <asm/guarded_storage.h> | ||
12 | #include "entry.h" | ||
13 | |||
14 | void exit_thread_gs(void) | ||
15 | { | ||
16 | kfree(current->thread.gs_cb); | ||
17 | kfree(current->thread.gs_bc_cb); | ||
18 | current->thread.gs_cb = current->thread.gs_bc_cb = NULL; | ||
19 | } | ||
20 | |||
21 | static int gs_enable(void) | ||
22 | { | ||
23 | struct gs_cb *gs_cb; | ||
24 | |||
25 | if (!current->thread.gs_cb) { | ||
26 | gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL); | ||
27 | if (!gs_cb) | ||
28 | return -ENOMEM; | ||
29 | gs_cb->gsd = 25; | ||
30 | preempt_disable(); | ||
31 | __ctl_set_bit(2, 4); | ||
32 | load_gs_cb(gs_cb); | ||
33 | current->thread.gs_cb = gs_cb; | ||
34 | preempt_enable(); | ||
35 | } | ||
36 | return 0; | ||
37 | } | ||
38 | |||
39 | static int gs_disable(void) | ||
40 | { | ||
41 | if (current->thread.gs_cb) { | ||
42 | preempt_disable(); | ||
43 | kfree(current->thread.gs_cb); | ||
44 | current->thread.gs_cb = NULL; | ||
45 | __ctl_clear_bit(2, 4); | ||
46 | preempt_enable(); | ||
47 | } | ||
48 | return 0; | ||
49 | } | ||
50 | |||
51 | static int gs_set_bc_cb(struct gs_cb __user *u_gs_cb) | ||
52 | { | ||
53 | struct gs_cb *gs_cb; | ||
54 | |||
55 | gs_cb = current->thread.gs_bc_cb; | ||
56 | if (!gs_cb) { | ||
57 | gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL); | ||
58 | if (!gs_cb) | ||
59 | return -ENOMEM; | ||
60 | current->thread.gs_bc_cb = gs_cb; | ||
61 | } | ||
62 | if (copy_from_user(gs_cb, u_gs_cb, sizeof(*gs_cb))) | ||
63 | return -EFAULT; | ||
64 | return 0; | ||
65 | } | ||
66 | |||
67 | static int gs_clear_bc_cb(void) | ||
68 | { | ||
69 | struct gs_cb *gs_cb; | ||
70 | |||
71 | gs_cb = current->thread.gs_bc_cb; | ||
72 | current->thread.gs_bc_cb = NULL; | ||
73 | kfree(gs_cb); | ||
74 | return 0; | ||
75 | } | ||
76 | |||
77 | void gs_load_bc_cb(struct pt_regs *regs) | ||
78 | { | ||
79 | struct gs_cb *gs_cb; | ||
80 | |||
81 | preempt_disable(); | ||
82 | clear_thread_flag(TIF_GUARDED_STORAGE); | ||
83 | gs_cb = current->thread.gs_bc_cb; | ||
84 | if (gs_cb) { | ||
85 | kfree(current->thread.gs_cb); | ||
86 | current->thread.gs_bc_cb = NULL; | ||
87 | __ctl_set_bit(2, 4); | ||
88 | load_gs_cb(gs_cb); | ||
89 | current->thread.gs_cb = gs_cb; | ||
90 | } | ||
91 | preempt_enable(); | ||
92 | } | ||
93 | |||
94 | static int gs_broadcast(void) | ||
95 | { | ||
96 | struct task_struct *sibling; | ||
97 | |||
98 | read_lock(&tasklist_lock); | ||
99 | for_each_thread(current, sibling) { | ||
100 | if (!sibling->thread.gs_bc_cb) | ||
101 | continue; | ||
102 | if (test_and_set_tsk_thread_flag(sibling, TIF_GUARDED_STORAGE)) | ||
103 | kick_process(sibling); | ||
104 | } | ||
105 | read_unlock(&tasklist_lock); | ||
106 | return 0; | ||
107 | } | ||
108 | |||
109 | SYSCALL_DEFINE2(s390_guarded_storage, int, command, | ||
110 | struct gs_cb __user *, gs_cb) | ||
111 | { | ||
112 | if (!MACHINE_HAS_GS) | ||
113 | return -EOPNOTSUPP; | ||
114 | switch (command) { | ||
115 | case GS_ENABLE: | ||
116 | return gs_enable(); | ||
117 | case GS_DISABLE: | ||
118 | return gs_disable(); | ||
119 | case GS_SET_BC_CB: | ||
120 | return gs_set_bc_cb(gs_cb); | ||
121 | case GS_CLEAR_BC_CB: | ||
122 | return gs_clear_bc_cb(); | ||
123 | case GS_BROADCAST: | ||
124 | return gs_broadcast(); | ||
125 | default: | ||
126 | return -EINVAL; | ||
127 | } | ||
128 | } | ||
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 3074c1d83829..db5658daf994 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <asm/cacheflush.h> | 27 | #include <asm/cacheflush.h> |
28 | #include <asm/os_info.h> | 28 | #include <asm/os_info.h> |
29 | #include <asm/switch_to.h> | 29 | #include <asm/switch_to.h> |
30 | #include <asm/nmi.h> | ||
30 | 31 | ||
31 | typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long); | 32 | typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long); |
32 | 33 | ||
@@ -102,6 +103,8 @@ static void __do_machine_kdump(void *image) | |||
102 | */ | 103 | */ |
103 | static noinline void __machine_kdump(void *image) | 104 | static noinline void __machine_kdump(void *image) |
104 | { | 105 | { |
106 | struct mcesa *mcesa; | ||
107 | unsigned long cr2_old, cr2_new; | ||
105 | int this_cpu, cpu; | 108 | int this_cpu, cpu; |
106 | 109 | ||
107 | lgr_info_log(); | 110 | lgr_info_log(); |
@@ -114,8 +117,16 @@ static noinline void __machine_kdump(void *image) | |||
114 | continue; | 117 | continue; |
115 | } | 118 | } |
116 | /* Store status of the boot CPU */ | 119 | /* Store status of the boot CPU */ |
120 | mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK); | ||
117 | if (MACHINE_HAS_VX) | 121 | if (MACHINE_HAS_VX) |
118 | save_vx_regs((void *) &S390_lowcore.vector_save_area); | 122 | save_vx_regs((__vector128 *) mcesa->vector_save_area); |
123 | if (MACHINE_HAS_GS) { | ||
124 | __ctl_store(cr2_old, 2, 2); | ||
125 | cr2_new = cr2_old | (1UL << 4); | ||
126 | __ctl_load(cr2_new, 2, 2); | ||
127 | save_gs_cb((struct gs_cb *) mcesa->guarded_storage_save_area); | ||
128 | __ctl_load(cr2_old, 2, 2); | ||
129 | } | ||
119 | /* | 130 | /* |
120 | * To create a good backchain for this CPU in the dump store_status | 131 | * To create a good backchain for this CPU in the dump store_status |
121 | * is passed the address of a function. The address is saved into | 132 | * is passed the address of a function. The address is saved into |
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 9bf8327154ee..985589523970 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c | |||
@@ -106,6 +106,7 @@ static int notrace s390_validate_registers(union mci mci, int umode) | |||
106 | int kill_task; | 106 | int kill_task; |
107 | u64 zero; | 107 | u64 zero; |
108 | void *fpt_save_area; | 108 | void *fpt_save_area; |
109 | struct mcesa *mcesa; | ||
109 | 110 | ||
110 | kill_task = 0; | 111 | kill_task = 0; |
111 | zero = 0; | 112 | zero = 0; |
@@ -165,6 +166,7 @@ static int notrace s390_validate_registers(union mci mci, int umode) | |||
165 | : : "Q" (S390_lowcore.fpt_creg_save_area)); | 166 | : : "Q" (S390_lowcore.fpt_creg_save_area)); |
166 | } | 167 | } |
167 | 168 | ||
169 | mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK); | ||
168 | if (!MACHINE_HAS_VX) { | 170 | if (!MACHINE_HAS_VX) { |
169 | /* Validate floating point registers */ | 171 | /* Validate floating point registers */ |
170 | asm volatile( | 172 | asm volatile( |
@@ -209,8 +211,8 @@ static int notrace s390_validate_registers(union mci mci, int umode) | |||
209 | " la 1,%0\n" | 211 | " la 1,%0\n" |
210 | " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ | 212 | " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ |
211 | " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ | 213 | " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ |
212 | : : "Q" (*(struct vx_array *) | 214 | : : "Q" (*(struct vx_array *) mcesa->vector_save_area) |
213 | &S390_lowcore.vector_save_area) : "1"); | 215 | : "1"); |
214 | __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0); | 216 | __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0); |
215 | } | 217 | } |
216 | /* Validate access registers */ | 218 | /* Validate access registers */ |
@@ -224,6 +226,19 @@ static int notrace s390_validate_registers(union mci mci, int umode) | |||
224 | */ | 226 | */ |
225 | kill_task = 1; | 227 | kill_task = 1; |
226 | } | 228 | } |
229 | /* Validate guarded storage registers */ | ||
230 | if (MACHINE_HAS_GS && (S390_lowcore.cregs_save_area[2] & (1UL << 4))) { | ||
231 | if (!mci.gs) | ||
232 | /* | ||
233 | * Guarded storage register can't be restored and | ||
234 | * the current processes uses guarded storage. | ||
235 | * It has to be terminated. | ||
236 | */ | ||
237 | kill_task = 1; | ||
238 | else | ||
239 | load_gs_cb((struct gs_cb *) | ||
240 | mcesa->guarded_storage_save_area); | ||
241 | } | ||
227 | /* | 242 | /* |
228 | * We don't even try to validate the TOD register, since we simply | 243 | * We don't even try to validate the TOD register, since we simply |
229 | * can't write something sensible into that register. | 244 | * can't write something sensible into that register. |
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index f29e41c5e2ec..999d7154bbdc 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c | |||
@@ -73,8 +73,10 @@ extern void kernel_thread_starter(void); | |||
73 | */ | 73 | */ |
74 | void exit_thread(struct task_struct *tsk) | 74 | void exit_thread(struct task_struct *tsk) |
75 | { | 75 | { |
76 | if (tsk == current) | 76 | if (tsk == current) { |
77 | exit_thread_runtime_instr(); | 77 | exit_thread_runtime_instr(); |
78 | exit_thread_gs(); | ||
79 | } | ||
78 | } | 80 | } |
79 | 81 | ||
80 | void flush_thread(void) | 82 | void flush_thread(void) |
@@ -159,6 +161,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp, | |||
159 | /* Don't copy runtime instrumentation info */ | 161 | /* Don't copy runtime instrumentation info */ |
160 | p->thread.ri_cb = NULL; | 162 | p->thread.ri_cb = NULL; |
161 | frame->childregs.psw.mask &= ~PSW_MASK_RI; | 163 | frame->childregs.psw.mask &= ~PSW_MASK_RI; |
164 | /* Don't copy guarded storage control block */ | ||
165 | p->thread.gs_cb = NULL; | ||
166 | p->thread.gs_bc_cb = NULL; | ||
162 | 167 | ||
163 | /* Set a new TLS ? */ | 168 | /* Set a new TLS ? */ |
164 | if (clone_flags & CLONE_SETTLS) { | 169 | if (clone_flags & CLONE_SETTLS) { |
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 928b929a6261..c73709869447 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c | |||
@@ -95,7 +95,7 @@ static void show_cpu_summary(struct seq_file *m, void *v) | |||
95 | { | 95 | { |
96 | static const char *hwcap_str[] = { | 96 | static const char *hwcap_str[] = { |
97 | "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", | 97 | "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", |
98 | "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe" | 98 | "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe", "gs" |
99 | }; | 99 | }; |
100 | static const char * const int_hwcap_str[] = { | 100 | static const char * const int_hwcap_str[] = { |
101 | "sie" | 101 | "sie" |
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index c14df0a1ec3c..c933e255b5d5 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c | |||
@@ -44,30 +44,42 @@ void update_cr_regs(struct task_struct *task) | |||
44 | struct pt_regs *regs = task_pt_regs(task); | 44 | struct pt_regs *regs = task_pt_regs(task); |
45 | struct thread_struct *thread = &task->thread; | 45 | struct thread_struct *thread = &task->thread; |
46 | struct per_regs old, new; | 46 | struct per_regs old, new; |
47 | 47 | unsigned long cr0_old, cr0_new; | |
48 | unsigned long cr2_old, cr2_new; | ||
49 | int cr0_changed, cr2_changed; | ||
50 | |||
51 | __ctl_store(cr0_old, 0, 0); | ||
52 | __ctl_store(cr2_old, 2, 2); | ||
53 | cr0_new = cr0_old; | ||
54 | cr2_new = cr2_old; | ||
48 | /* Take care of the enable/disable of transactional execution. */ | 55 | /* Take care of the enable/disable of transactional execution. */ |
49 | if (MACHINE_HAS_TE) { | 56 | if (MACHINE_HAS_TE) { |
50 | unsigned long cr, cr_new; | ||
51 | |||
52 | __ctl_store(cr, 0, 0); | ||
53 | /* Set or clear transaction execution TXC bit 8. */ | 57 | /* Set or clear transaction execution TXC bit 8. */ |
54 | cr_new = cr | (1UL << 55); | 58 | cr0_new |= (1UL << 55); |
55 | if (task->thread.per_flags & PER_FLAG_NO_TE) | 59 | if (task->thread.per_flags & PER_FLAG_NO_TE) |
56 | cr_new &= ~(1UL << 55); | 60 | cr0_new &= ~(1UL << 55); |
57 | if (cr_new != cr) | ||
58 | __ctl_load(cr_new, 0, 0); | ||
59 | /* Set or clear transaction execution TDC bits 62 and 63. */ | 61 | /* Set or clear transaction execution TDC bits 62 and 63. */ |
60 | __ctl_store(cr, 2, 2); | 62 | cr2_new &= ~3UL; |
61 | cr_new = cr & ~3UL; | ||
62 | if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { | 63 | if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { |
63 | if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND) | 64 | if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND) |
64 | cr_new |= 1UL; | 65 | cr2_new |= 1UL; |
65 | else | 66 | else |
66 | cr_new |= 2UL; | 67 | cr2_new |= 2UL; |
67 | } | 68 | } |
68 | if (cr_new != cr) | ||
69 | __ctl_load(cr_new, 2, 2); | ||
70 | } | 69 | } |
70 | /* Take care of enable/disable of guarded storage. */ | ||
71 | if (MACHINE_HAS_GS) { | ||
72 | cr2_new &= ~(1UL << 4); | ||
73 | if (task->thread.gs_cb) | ||
74 | cr2_new |= (1UL << 4); | ||
75 | } | ||
76 | /* Load control register 0/2 iff changed */ | ||
77 | cr0_changed = cr0_new != cr0_old; | ||
78 | cr2_changed = cr2_new != cr2_old; | ||
79 | if (cr0_changed) | ||
80 | __ctl_load(cr0_new, 0, 0); | ||
81 | if (cr2_changed) | ||
82 | __ctl_load(cr2_new, 2, 2); | ||
71 | /* Copy user specified PER registers */ | 83 | /* Copy user specified PER registers */ |
72 | new.control = thread->per_user.control; | 84 | new.control = thread->per_user.control; |
73 | new.start = thread->per_user.start; | 85 | new.start = thread->per_user.start; |
@@ -1137,6 +1149,36 @@ static int s390_system_call_set(struct task_struct *target, | |||
1137 | data, 0, sizeof(unsigned int)); | 1149 | data, 0, sizeof(unsigned int)); |
1138 | } | 1150 | } |
1139 | 1151 | ||
1152 | static int s390_gs_cb_get(struct task_struct *target, | ||
1153 | const struct user_regset *regset, | ||
1154 | unsigned int pos, unsigned int count, | ||
1155 | void *kbuf, void __user *ubuf) | ||
1156 | { | ||
1157 | struct gs_cb *data = target->thread.gs_cb; | ||
1158 | |||
1159 | if (!MACHINE_HAS_GS) | ||
1160 | return -ENODEV; | ||
1161 | if (!data) | ||
1162 | return -ENODATA; | ||
1163 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | ||
1164 | data, 0, sizeof(struct gs_cb)); | ||
1165 | } | ||
1166 | |||
1167 | static int s390_gs_cb_set(struct task_struct *target, | ||
1168 | const struct user_regset *regset, | ||
1169 | unsigned int pos, unsigned int count, | ||
1170 | const void *kbuf, const void __user *ubuf) | ||
1171 | { | ||
1172 | struct gs_cb *data = target->thread.gs_cb; | ||
1173 | |||
1174 | if (!MACHINE_HAS_GS) | ||
1175 | return -ENODEV; | ||
1176 | if (!data) | ||
1177 | return -ENODATA; | ||
1178 | return user_regset_copyin(&pos, &count, &kbuf, &ubuf, | ||
1179 | data, 0, sizeof(struct gs_cb)); | ||
1180 | } | ||
1181 | |||
1140 | static const struct user_regset s390_regsets[] = { | 1182 | static const struct user_regset s390_regsets[] = { |
1141 | { | 1183 | { |
1142 | .core_note_type = NT_PRSTATUS, | 1184 | .core_note_type = NT_PRSTATUS, |
@@ -1194,6 +1236,14 @@ static const struct user_regset s390_regsets[] = { | |||
1194 | .get = s390_vxrs_high_get, | 1236 | .get = s390_vxrs_high_get, |
1195 | .set = s390_vxrs_high_set, | 1237 | .set = s390_vxrs_high_set, |
1196 | }, | 1238 | }, |
1239 | { | ||
1240 | .core_note_type = NT_S390_GS_CB, | ||
1241 | .n = sizeof(struct gs_cb) / sizeof(__u64), | ||
1242 | .size = sizeof(__u64), | ||
1243 | .align = sizeof(__u64), | ||
1244 | .get = s390_gs_cb_get, | ||
1245 | .set = s390_gs_cb_set, | ||
1246 | }, | ||
1197 | }; | 1247 | }; |
1198 | 1248 | ||
1199 | static const struct user_regset_view user_s390_view = { | 1249 | static const struct user_regset_view user_s390_view = { |
@@ -1422,6 +1472,14 @@ static const struct user_regset s390_compat_regsets[] = { | |||
1422 | .get = s390_compat_regs_high_get, | 1472 | .get = s390_compat_regs_high_get, |
1423 | .set = s390_compat_regs_high_set, | 1473 | .set = s390_compat_regs_high_set, |
1424 | }, | 1474 | }, |
1475 | { | ||
1476 | .core_note_type = NT_S390_GS_CB, | ||
1477 | .n = sizeof(struct gs_cb) / sizeof(__u64), | ||
1478 | .size = sizeof(__u64), | ||
1479 | .align = sizeof(__u64), | ||
1480 | .get = s390_gs_cb_get, | ||
1481 | .set = s390_gs_cb_set, | ||
1482 | }, | ||
1425 | }; | 1483 | }; |
1426 | 1484 | ||
1427 | static const struct user_regset_view user_s390_compat_view = { | 1485 | static const struct user_regset_view user_s390_compat_view = { |
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 911dc0b49be0..3ae756c0db3d 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c | |||
@@ -339,9 +339,15 @@ static void __init setup_lowcore(void) | |||
339 | lc->stfl_fac_list = S390_lowcore.stfl_fac_list; | 339 | lc->stfl_fac_list = S390_lowcore.stfl_fac_list; |
340 | memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, | 340 | memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, |
341 | MAX_FACILITY_BIT/8); | 341 | MAX_FACILITY_BIT/8); |
342 | if (MACHINE_HAS_VX) | 342 | if (MACHINE_HAS_VX || MACHINE_HAS_GS) { |
343 | lc->vector_save_area_addr = | 343 | unsigned long bits, size; |
344 | (unsigned long) &lc->vector_save_area; | 344 | |
345 | bits = MACHINE_HAS_GS ? 11 : 10; | ||
346 | size = 1UL << bits; | ||
347 | lc->mcesad = (__u64) memblock_virt_alloc(size, size); | ||
348 | if (MACHINE_HAS_GS) | ||
349 | lc->mcesad |= bits; | ||
350 | } | ||
345 | lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; | 351 | lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; |
346 | lc->sync_enter_timer = S390_lowcore.sync_enter_timer; | 352 | lc->sync_enter_timer = S390_lowcore.sync_enter_timer; |
347 | lc->async_enter_timer = S390_lowcore.async_enter_timer; | 353 | lc->async_enter_timer = S390_lowcore.async_enter_timer; |
@@ -779,6 +785,12 @@ static int __init setup_hwcaps(void) | |||
779 | elf_hwcap |= HWCAP_S390_VXRS_BCD; | 785 | elf_hwcap |= HWCAP_S390_VXRS_BCD; |
780 | } | 786 | } |
781 | 787 | ||
788 | /* | ||
789 | * Guarded storage support HWCAP_S390_GS is bit 12. | ||
790 | */ | ||
791 | if (MACHINE_HAS_GS) | ||
792 | elf_hwcap |= HWCAP_S390_GS; | ||
793 | |||
782 | get_cpu_id(&cpu_id); | 794 | get_cpu_id(&cpu_id); |
783 | add_device_randomness(&cpu_id, sizeof(cpu_id)); | 795 | add_device_randomness(&cpu_id, sizeof(cpu_id)); |
784 | switch (cpu_id.machine) { | 796 | switch (cpu_id.machine) { |
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 47a973b5b4f1..286bcee800f4 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <asm/os_info.h> | 51 | #include <asm/os_info.h> |
52 | #include <asm/sigp.h> | 52 | #include <asm/sigp.h> |
53 | #include <asm/idle.h> | 53 | #include <asm/idle.h> |
54 | #include <asm/nmi.h> | ||
54 | #include "entry.h" | 55 | #include "entry.h" |
55 | 56 | ||
56 | enum { | 57 | enum { |
@@ -78,6 +79,8 @@ struct pcpu { | |||
78 | static u8 boot_core_type; | 79 | static u8 boot_core_type; |
79 | static struct pcpu pcpu_devices[NR_CPUS]; | 80 | static struct pcpu pcpu_devices[NR_CPUS]; |
80 | 81 | ||
82 | static struct kmem_cache *pcpu_mcesa_cache; | ||
83 | |||
81 | unsigned int smp_cpu_mt_shift; | 84 | unsigned int smp_cpu_mt_shift; |
82 | EXPORT_SYMBOL(smp_cpu_mt_shift); | 85 | EXPORT_SYMBOL(smp_cpu_mt_shift); |
83 | 86 | ||
@@ -188,8 +191,10 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) | |||
188 | static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) | 191 | static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) |
189 | { | 192 | { |
190 | unsigned long async_stack, panic_stack; | 193 | unsigned long async_stack, panic_stack; |
194 | unsigned long mcesa_origin, mcesa_bits; | ||
191 | struct lowcore *lc; | 195 | struct lowcore *lc; |
192 | 196 | ||
197 | mcesa_origin = mcesa_bits = 0; | ||
193 | if (pcpu != &pcpu_devices[0]) { | 198 | if (pcpu != &pcpu_devices[0]) { |
194 | pcpu->lowcore = (struct lowcore *) | 199 | pcpu->lowcore = (struct lowcore *) |
195 | __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); | 200 | __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); |
@@ -197,20 +202,27 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) | |||
197 | panic_stack = __get_free_page(GFP_KERNEL); | 202 | panic_stack = __get_free_page(GFP_KERNEL); |
198 | if (!pcpu->lowcore || !panic_stack || !async_stack) | 203 | if (!pcpu->lowcore || !panic_stack || !async_stack) |
199 | goto out; | 204 | goto out; |
205 | if (MACHINE_HAS_VX || MACHINE_HAS_GS) { | ||
206 | mcesa_origin = (unsigned long) | ||
207 | kmem_cache_alloc(pcpu_mcesa_cache, GFP_KERNEL); | ||
208 | if (!mcesa_origin) | ||
209 | goto out; | ||
210 | mcesa_bits = MACHINE_HAS_GS ? 11 : 0; | ||
211 | } | ||
200 | } else { | 212 | } else { |
201 | async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET; | 213 | async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET; |
202 | panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET; | 214 | panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET; |
215 | mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK; | ||
216 | mcesa_bits = pcpu->lowcore->mcesad & MCESA_LC_MASK; | ||
203 | } | 217 | } |
204 | lc = pcpu->lowcore; | 218 | lc = pcpu->lowcore; |
205 | memcpy(lc, &S390_lowcore, 512); | 219 | memcpy(lc, &S390_lowcore, 512); |
206 | memset((char *) lc + 512, 0, sizeof(*lc) - 512); | 220 | memset((char *) lc + 512, 0, sizeof(*lc) - 512); |
207 | lc->async_stack = async_stack + ASYNC_FRAME_OFFSET; | 221 | lc->async_stack = async_stack + ASYNC_FRAME_OFFSET; |
208 | lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET; | 222 | lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET; |
223 | lc->mcesad = mcesa_origin | mcesa_bits; | ||
209 | lc->cpu_nr = cpu; | 224 | lc->cpu_nr = cpu; |
210 | lc->spinlock_lockval = arch_spin_lockval(cpu); | 225 | lc->spinlock_lockval = arch_spin_lockval(cpu); |
211 | if (MACHINE_HAS_VX) | ||
212 | lc->vector_save_area_addr = | ||
213 | (unsigned long) &lc->vector_save_area; | ||
214 | if (vdso_alloc_per_cpu(lc)) | 226 | if (vdso_alloc_per_cpu(lc)) |
215 | goto out; | 227 | goto out; |
216 | lowcore_ptr[cpu] = lc; | 228 | lowcore_ptr[cpu] = lc; |
@@ -218,6 +230,9 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) | |||
218 | return 0; | 230 | return 0; |
219 | out: | 231 | out: |
220 | if (pcpu != &pcpu_devices[0]) { | 232 | if (pcpu != &pcpu_devices[0]) { |
233 | if (mcesa_origin) | ||
234 | kmem_cache_free(pcpu_mcesa_cache, | ||
235 | (void *) mcesa_origin); | ||
221 | free_page(panic_stack); | 236 | free_page(panic_stack); |
222 | free_pages(async_stack, ASYNC_ORDER); | 237 | free_pages(async_stack, ASYNC_ORDER); |
223 | free_pages((unsigned long) pcpu->lowcore, LC_ORDER); | 238 | free_pages((unsigned long) pcpu->lowcore, LC_ORDER); |
@@ -229,11 +244,17 @@ out: | |||
229 | 244 | ||
230 | static void pcpu_free_lowcore(struct pcpu *pcpu) | 245 | static void pcpu_free_lowcore(struct pcpu *pcpu) |
231 | { | 246 | { |
247 | unsigned long mcesa_origin; | ||
248 | |||
232 | pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); | 249 | pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); |
233 | lowcore_ptr[pcpu - pcpu_devices] = NULL; | 250 | lowcore_ptr[pcpu - pcpu_devices] = NULL; |
234 | vdso_free_per_cpu(pcpu->lowcore); | 251 | vdso_free_per_cpu(pcpu->lowcore); |
235 | if (pcpu == &pcpu_devices[0]) | 252 | if (pcpu == &pcpu_devices[0]) |
236 | return; | 253 | return; |
254 | if (MACHINE_HAS_VX || MACHINE_HAS_GS) { | ||
255 | mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK; | ||
256 | kmem_cache_free(pcpu_mcesa_cache, (void *) mcesa_origin); | ||
257 | } | ||
237 | free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET); | 258 | free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET); |
238 | free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER); | 259 | free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER); |
239 | free_pages((unsigned long) pcpu->lowcore, LC_ORDER); | 260 | free_pages((unsigned long) pcpu->lowcore, LC_ORDER); |
@@ -550,9 +571,11 @@ int smp_store_status(int cpu) | |||
550 | if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, | 571 | if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, |
551 | pa) != SIGP_CC_ORDER_CODE_ACCEPTED) | 572 | pa) != SIGP_CC_ORDER_CODE_ACCEPTED) |
552 | return -EIO; | 573 | return -EIO; |
553 | if (!MACHINE_HAS_VX) | 574 | if (!MACHINE_HAS_VX && !MACHINE_HAS_GS) |
554 | return 0; | 575 | return 0; |
555 | pa = __pa(pcpu->lowcore->vector_save_area_addr); | 576 | pa = __pa(pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK); |
577 | if (MACHINE_HAS_GS) | ||
578 | pa |= pcpu->lowcore->mcesad & MCESA_LC_MASK; | ||
556 | if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, | 579 | if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, |
557 | pa) != SIGP_CC_ORDER_CODE_ACCEPTED) | 580 | pa) != SIGP_CC_ORDER_CODE_ACCEPTED) |
558 | return -EIO; | 581 | return -EIO; |
@@ -897,12 +920,22 @@ void __init smp_fill_possible_mask(void) | |||
897 | 920 | ||
898 | void __init smp_prepare_cpus(unsigned int max_cpus) | 921 | void __init smp_prepare_cpus(unsigned int max_cpus) |
899 | { | 922 | { |
923 | unsigned long size; | ||
924 | |||
900 | /* request the 0x1201 emergency signal external interrupt */ | 925 | /* request the 0x1201 emergency signal external interrupt */ |
901 | if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt)) | 926 | if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt)) |
902 | panic("Couldn't request external interrupt 0x1201"); | 927 | panic("Couldn't request external interrupt 0x1201"); |
903 | /* request the 0x1202 external call external interrupt */ | 928 | /* request the 0x1202 external call external interrupt */ |
904 | if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt)) | 929 | if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt)) |
905 | panic("Couldn't request external interrupt 0x1202"); | 930 | panic("Couldn't request external interrupt 0x1202"); |
931 | /* create slab cache for the machine-check-extended-save-areas */ | ||
932 | if (MACHINE_HAS_VX || MACHINE_HAS_GS) { | ||
933 | size = 1UL << (MACHINE_HAS_GS ? 11 : 10); | ||
934 | pcpu_mcesa_cache = kmem_cache_create("nmi_save_areas", | ||
935 | size, size, 0, NULL); | ||
936 | if (!pcpu_mcesa_cache) | ||
937 | panic("Couldn't create nmi save area cache"); | ||
938 | } | ||
906 | } | 939 | } |
907 | 940 | ||
908 | void __init smp_prepare_boot_cpu(void) | 941 | void __init smp_prepare_boot_cpu(void) |
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 2659b5cfeddb..54fce7b065de 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S | |||
@@ -386,5 +386,5 @@ SYSCALL(sys_mlock2,compat_sys_mlock2) | |||
386 | SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */ | 386 | SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */ |
387 | SYSCALL(sys_preadv2,compat_sys_preadv2) | 387 | SYSCALL(sys_preadv2,compat_sys_preadv2) |
388 | SYSCALL(sys_pwritev2,compat_sys_pwritev2) | 388 | SYSCALL(sys_pwritev2,compat_sys_pwritev2) |
389 | NI_SYSCALL | 389 | SYSCALL(sys_s390_guarded_storage,compat_sys_s390_guarded_storage) /* 378 */ |
390 | SYSCALL(sys_statx,compat_sys_statx) | 390 | SYSCALL(sys_statx,compat_sys_statx) |
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index d55c829a5944..709aca9ceb05 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c | |||
@@ -262,7 +262,7 @@ struct aste { | |||
262 | 262 | ||
263 | int ipte_lock_held(struct kvm_vcpu *vcpu) | 263 | int ipte_lock_held(struct kvm_vcpu *vcpu) |
264 | { | 264 | { |
265 | if (vcpu->arch.sie_block->eca & 1) { | 265 | if (vcpu->arch.sie_block->eca & ECA_SII) { |
266 | int rc; | 266 | int rc; |
267 | 267 | ||
268 | read_lock(&vcpu->kvm->arch.sca_lock); | 268 | read_lock(&vcpu->kvm->arch.sca_lock); |
@@ -361,7 +361,7 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu) | |||
361 | 361 | ||
362 | void ipte_lock(struct kvm_vcpu *vcpu) | 362 | void ipte_lock(struct kvm_vcpu *vcpu) |
363 | { | 363 | { |
364 | if (vcpu->arch.sie_block->eca & 1) | 364 | if (vcpu->arch.sie_block->eca & ECA_SII) |
365 | ipte_lock_siif(vcpu); | 365 | ipte_lock_siif(vcpu); |
366 | else | 366 | else |
367 | ipte_lock_simple(vcpu); | 367 | ipte_lock_simple(vcpu); |
@@ -369,7 +369,7 @@ void ipte_lock(struct kvm_vcpu *vcpu) | |||
369 | 369 | ||
370 | void ipte_unlock(struct kvm_vcpu *vcpu) | 370 | void ipte_unlock(struct kvm_vcpu *vcpu) |
371 | { | 371 | { |
372 | if (vcpu->arch.sie_block->eca & 1) | 372 | if (vcpu->arch.sie_block->eca & ECA_SII) |
373 | ipte_unlock_siif(vcpu); | 373 | ipte_unlock_siif(vcpu); |
374 | else | 374 | else |
375 | ipte_unlock_simple(vcpu); | 375 | ipte_unlock_simple(vcpu); |
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 59920f96ebc0..a4752bf6b526 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c | |||
@@ -35,6 +35,7 @@ static const intercept_handler_t instruction_handlers[256] = { | |||
35 | [0xb6] = kvm_s390_handle_stctl, | 35 | [0xb6] = kvm_s390_handle_stctl, |
36 | [0xb7] = kvm_s390_handle_lctl, | 36 | [0xb7] = kvm_s390_handle_lctl, |
37 | [0xb9] = kvm_s390_handle_b9, | 37 | [0xb9] = kvm_s390_handle_b9, |
38 | [0xe3] = kvm_s390_handle_e3, | ||
38 | [0xe5] = kvm_s390_handle_e5, | 39 | [0xe5] = kvm_s390_handle_e5, |
39 | [0xeb] = kvm_s390_handle_eb, | 40 | [0xeb] = kvm_s390_handle_eb, |
40 | }; | 41 | }; |
@@ -368,8 +369,7 @@ static int handle_operexc(struct kvm_vcpu *vcpu) | |||
368 | trace_kvm_s390_handle_operexc(vcpu, vcpu->arch.sie_block->ipa, | 369 | trace_kvm_s390_handle_operexc(vcpu, vcpu->arch.sie_block->ipa, |
369 | vcpu->arch.sie_block->ipb); | 370 | vcpu->arch.sie_block->ipb); |
370 | 371 | ||
371 | if (vcpu->arch.sie_block->ipa == 0xb256 && | 372 | if (vcpu->arch.sie_block->ipa == 0xb256) |
372 | test_kvm_facility(vcpu->kvm, 74)) | ||
373 | return handle_sthyi(vcpu); | 373 | return handle_sthyi(vcpu); |
374 | 374 | ||
375 | if (vcpu->arch.sie_block->ipa == 0 && vcpu->kvm->arch.user_instr0) | 375 | if (vcpu->arch.sie_block->ipa == 0 && vcpu->kvm->arch.user_instr0) |
@@ -404,28 +404,31 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) | |||
404 | return -EOPNOTSUPP; | 404 | return -EOPNOTSUPP; |
405 | 405 | ||
406 | switch (vcpu->arch.sie_block->icptcode) { | 406 | switch (vcpu->arch.sie_block->icptcode) { |
407 | case 0x10: | 407 | case ICPT_EXTREQ: |
408 | case 0x18: | 408 | case ICPT_IOREQ: |
409 | return handle_noop(vcpu); | 409 | return handle_noop(vcpu); |
410 | case 0x04: | 410 | case ICPT_INST: |
411 | rc = handle_instruction(vcpu); | 411 | rc = handle_instruction(vcpu); |
412 | break; | 412 | break; |
413 | case 0x08: | 413 | case ICPT_PROGI: |
414 | return handle_prog(vcpu); | 414 | return handle_prog(vcpu); |
415 | case 0x14: | 415 | case ICPT_EXTINT: |
416 | return handle_external_interrupt(vcpu); | 416 | return handle_external_interrupt(vcpu); |
417 | case 0x1c: | 417 | case ICPT_WAIT: |
418 | return kvm_s390_handle_wait(vcpu); | 418 | return kvm_s390_handle_wait(vcpu); |
419 | case 0x20: | 419 | case ICPT_VALIDITY: |
420 | return handle_validity(vcpu); | 420 | return handle_validity(vcpu); |
421 | case 0x28: | 421 | case ICPT_STOP: |
422 | return handle_stop(vcpu); | 422 | return handle_stop(vcpu); |
423 | case 0x2c: | 423 | case ICPT_OPEREXC: |
424 | rc = handle_operexc(vcpu); | 424 | rc = handle_operexc(vcpu); |
425 | break; | 425 | break; |
426 | case 0x38: | 426 | case ICPT_PARTEXEC: |
427 | rc = handle_partial_execution(vcpu); | 427 | rc = handle_partial_execution(vcpu); |
428 | break; | 428 | break; |
429 | case ICPT_KSS: | ||
430 | rc = kvm_s390_skey_check_enable(vcpu); | ||
431 | break; | ||
429 | default: | 432 | default: |
430 | return -EOPNOTSUPP; | 433 | return -EOPNOTSUPP; |
431 | } | 434 | } |
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 0f8f14199734..caf15c8a8948 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c | |||
@@ -410,6 +410,7 @@ static int __write_machine_check(struct kvm_vcpu *vcpu, | |||
410 | struct kvm_s390_mchk_info *mchk) | 410 | struct kvm_s390_mchk_info *mchk) |
411 | { | 411 | { |
412 | unsigned long ext_sa_addr; | 412 | unsigned long ext_sa_addr; |
413 | unsigned long lc; | ||
413 | freg_t fprs[NUM_FPRS]; | 414 | freg_t fprs[NUM_FPRS]; |
414 | union mci mci; | 415 | union mci mci; |
415 | int rc; | 416 | int rc; |
@@ -418,12 +419,34 @@ static int __write_machine_check(struct kvm_vcpu *vcpu, | |||
418 | /* take care of lazy register loading */ | 419 | /* take care of lazy register loading */ |
419 | save_fpu_regs(); | 420 | save_fpu_regs(); |
420 | save_access_regs(vcpu->run->s.regs.acrs); | 421 | save_access_regs(vcpu->run->s.regs.acrs); |
422 | if (MACHINE_HAS_GS && vcpu->arch.gs_enabled) | ||
423 | save_gs_cb(current->thread.gs_cb); | ||
421 | 424 | ||
422 | /* Extended save area */ | 425 | /* Extended save area */ |
423 | rc = read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR, &ext_sa_addr, | 426 | rc = read_guest_lc(vcpu, __LC_MCESAD, &ext_sa_addr, |
424 | sizeof(unsigned long)); | 427 | sizeof(unsigned long)); |
425 | /* Only bits 0-53 are used for address formation */ | 428 | /* Only bits 0 through 63-LC are used for address formation */ |
426 | ext_sa_addr &= ~0x3ffUL; | 429 | lc = ext_sa_addr & MCESA_LC_MASK; |
430 | if (test_kvm_facility(vcpu->kvm, 133)) { | ||
431 | switch (lc) { | ||
432 | case 0: | ||
433 | case 10: | ||
434 | ext_sa_addr &= ~0x3ffUL; | ||
435 | break; | ||
436 | case 11: | ||
437 | ext_sa_addr &= ~0x7ffUL; | ||
438 | break; | ||
439 | case 12: | ||
440 | ext_sa_addr &= ~0xfffUL; | ||
441 | break; | ||
442 | default: | ||
443 | ext_sa_addr = 0; | ||
444 | break; | ||
445 | } | ||
446 | } else { | ||
447 | ext_sa_addr &= ~0x3ffUL; | ||
448 | } | ||
449 | |||
427 | if (!rc && mci.vr && ext_sa_addr && test_kvm_facility(vcpu->kvm, 129)) { | 450 | if (!rc && mci.vr && ext_sa_addr && test_kvm_facility(vcpu->kvm, 129)) { |
428 | if (write_guest_abs(vcpu, ext_sa_addr, vcpu->run->s.regs.vrs, | 451 | if (write_guest_abs(vcpu, ext_sa_addr, vcpu->run->s.regs.vrs, |
429 | 512)) | 452 | 512)) |
@@ -431,6 +454,14 @@ static int __write_machine_check(struct kvm_vcpu *vcpu, | |||
431 | } else { | 454 | } else { |
432 | mci.vr = 0; | 455 | mci.vr = 0; |
433 | } | 456 | } |
457 | if (!rc && mci.gs && ext_sa_addr && test_kvm_facility(vcpu->kvm, 133) | ||
458 | && (lc == 11 || lc == 12)) { | ||
459 | if (write_guest_abs(vcpu, ext_sa_addr + 1024, | ||
460 | &vcpu->run->s.regs.gscb, 32)) | ||
461 | mci.gs = 0; | ||
462 | } else { | ||
463 | mci.gs = 0; | ||
464 | } | ||
434 | 465 | ||
435 | /* General interruption information */ | 466 | /* General interruption information */ |
436 | rc |= put_guest_lc(vcpu, 1, (u8 __user *) __LC_AR_MODE_ID); | 467 | rc |= put_guest_lc(vcpu, 1, (u8 __user *) __LC_AR_MODE_ID); |
@@ -1968,6 +1999,8 @@ static int register_io_adapter(struct kvm_device *dev, | |||
1968 | adapter->maskable = adapter_info.maskable; | 1999 | adapter->maskable = adapter_info.maskable; |
1969 | adapter->masked = false; | 2000 | adapter->masked = false; |
1970 | adapter->swap = adapter_info.swap; | 2001 | adapter->swap = adapter_info.swap; |
2002 | adapter->suppressible = (adapter_info.flags) & | ||
2003 | KVM_S390_ADAPTER_SUPPRESSIBLE; | ||
1971 | dev->kvm->arch.adapters[adapter->id] = adapter; | 2004 | dev->kvm->arch.adapters[adapter->id] = adapter; |
1972 | 2005 | ||
1973 | return 0; | 2006 | return 0; |
@@ -2121,6 +2154,87 @@ static int clear_io_irq(struct kvm *kvm, struct kvm_device_attr *attr) | |||
2121 | return 0; | 2154 | return 0; |
2122 | } | 2155 | } |
2123 | 2156 | ||
2157 | static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr) | ||
2158 | { | ||
2159 | struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; | ||
2160 | struct kvm_s390_ais_req req; | ||
2161 | int ret = 0; | ||
2162 | |||
2163 | if (!fi->ais_enabled) | ||
2164 | return -ENOTSUPP; | ||
2165 | |||
2166 | if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req))) | ||
2167 | return -EFAULT; | ||
2168 | |||
2169 | if (req.isc > MAX_ISC) | ||
2170 | return -EINVAL; | ||
2171 | |||
2172 | trace_kvm_s390_modify_ais_mode(req.isc, | ||
2173 | (fi->simm & AIS_MODE_MASK(req.isc)) ? | ||
2174 | (fi->nimm & AIS_MODE_MASK(req.isc)) ? | ||
2175 | 2 : KVM_S390_AIS_MODE_SINGLE : | ||
2176 | KVM_S390_AIS_MODE_ALL, req.mode); | ||
2177 | |||
2178 | mutex_lock(&fi->ais_lock); | ||
2179 | switch (req.mode) { | ||
2180 | case KVM_S390_AIS_MODE_ALL: | ||
2181 | fi->simm &= ~AIS_MODE_MASK(req.isc); | ||
2182 | fi->nimm &= ~AIS_MODE_MASK(req.isc); | ||
2183 | break; | ||
2184 | case KVM_S390_AIS_MODE_SINGLE: | ||
2185 | fi->simm |= AIS_MODE_MASK(req.isc); | ||
2186 | fi->nimm &= ~AIS_MODE_MASK(req.isc); | ||
2187 | break; | ||
2188 | default: | ||
2189 | ret = -EINVAL; | ||
2190 | } | ||
2191 | mutex_unlock(&fi->ais_lock); | ||
2192 | |||
2193 | return ret; | ||
2194 | } | ||
2195 | |||
2196 | static int kvm_s390_inject_airq(struct kvm *kvm, | ||
2197 | struct s390_io_adapter *adapter) | ||
2198 | { | ||
2199 | struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; | ||
2200 | struct kvm_s390_interrupt s390int = { | ||
2201 | .type = KVM_S390_INT_IO(1, 0, 0, 0), | ||
2202 | .parm = 0, | ||
2203 | .parm64 = (adapter->isc << 27) | 0x80000000, | ||
2204 | }; | ||
2205 | int ret = 0; | ||
2206 | |||
2207 | if (!fi->ais_enabled || !adapter->suppressible) | ||
2208 | return kvm_s390_inject_vm(kvm, &s390int); | ||
2209 | |||
2210 | mutex_lock(&fi->ais_lock); | ||
2211 | if (fi->nimm & AIS_MODE_MASK(adapter->isc)) { | ||
2212 | trace_kvm_s390_airq_suppressed(adapter->id, adapter->isc); | ||
2213 | goto out; | ||
2214 | } | ||
2215 | |||
2216 | ret = kvm_s390_inject_vm(kvm, &s390int); | ||
2217 | if (!ret && (fi->simm & AIS_MODE_MASK(adapter->isc))) { | ||
2218 | fi->nimm |= AIS_MODE_MASK(adapter->isc); | ||
2219 | trace_kvm_s390_modify_ais_mode(adapter->isc, | ||
2220 | KVM_S390_AIS_MODE_SINGLE, 2); | ||
2221 | } | ||
2222 | out: | ||
2223 | mutex_unlock(&fi->ais_lock); | ||
2224 | return ret; | ||
2225 | } | ||
2226 | |||
2227 | static int flic_inject_airq(struct kvm *kvm, struct kvm_device_attr *attr) | ||
2228 | { | ||
2229 | unsigned int id = attr->attr; | ||
2230 | struct s390_io_adapter *adapter = get_io_adapter(kvm, id); | ||
2231 | |||
2232 | if (!adapter) | ||
2233 | return -EINVAL; | ||
2234 | |||
2235 | return kvm_s390_inject_airq(kvm, adapter); | ||
2236 | } | ||
2237 | |||
2124 | static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | 2238 | static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) |
2125 | { | 2239 | { |
2126 | int r = 0; | 2240 | int r = 0; |
@@ -2157,6 +2271,12 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | |||
2157 | case KVM_DEV_FLIC_CLEAR_IO_IRQ: | 2271 | case KVM_DEV_FLIC_CLEAR_IO_IRQ: |
2158 | r = clear_io_irq(dev->kvm, attr); | 2272 | r = clear_io_irq(dev->kvm, attr); |
2159 | break; | 2273 | break; |
2274 | case KVM_DEV_FLIC_AISM: | ||
2275 | r = modify_ais_mode(dev->kvm, attr); | ||
2276 | break; | ||
2277 | case KVM_DEV_FLIC_AIRQ_INJECT: | ||
2278 | r = flic_inject_airq(dev->kvm, attr); | ||
2279 | break; | ||
2160 | default: | 2280 | default: |
2161 | r = -EINVAL; | 2281 | r = -EINVAL; |
2162 | } | 2282 | } |
@@ -2176,6 +2296,8 @@ static int flic_has_attr(struct kvm_device *dev, | |||
2176 | case KVM_DEV_FLIC_ADAPTER_REGISTER: | 2296 | case KVM_DEV_FLIC_ADAPTER_REGISTER: |
2177 | case KVM_DEV_FLIC_ADAPTER_MODIFY: | 2297 | case KVM_DEV_FLIC_ADAPTER_MODIFY: |
2178 | case KVM_DEV_FLIC_CLEAR_IO_IRQ: | 2298 | case KVM_DEV_FLIC_CLEAR_IO_IRQ: |
2299 | case KVM_DEV_FLIC_AISM: | ||
2300 | case KVM_DEV_FLIC_AIRQ_INJECT: | ||
2179 | return 0; | 2301 | return 0; |
2180 | } | 2302 | } |
2181 | return -ENXIO; | 2303 | return -ENXIO; |
@@ -2286,12 +2408,7 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e, | |||
2286 | ret = adapter_indicators_set(kvm, adapter, &e->adapter); | 2408 | ret = adapter_indicators_set(kvm, adapter, &e->adapter); |
2287 | up_read(&adapter->maps_lock); | 2409 | up_read(&adapter->maps_lock); |
2288 | if ((ret > 0) && !adapter->masked) { | 2410 | if ((ret > 0) && !adapter->masked) { |
2289 | struct kvm_s390_interrupt s390int = { | 2411 | ret = kvm_s390_inject_airq(kvm, adapter); |
2290 | .type = KVM_S390_INT_IO(1, 0, 0, 0), | ||
2291 | .parm = 0, | ||
2292 | .parm64 = (adapter->isc << 27) | 0x80000000, | ||
2293 | }; | ||
2294 | ret = kvm_s390_inject_vm(kvm, &s390int); | ||
2295 | if (ret == 0) | 2412 | if (ret == 0) |
2296 | ret = 1; | 2413 | ret = 1; |
2297 | } | 2414 | } |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index fd6cd05bb6a7..8771fef112a1 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
@@ -300,6 +300,8 @@ static void kvm_s390_cpu_feat_init(void) | |||
300 | allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); | 300 | allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); |
301 | if (sclp.has_ibs) | 301 | if (sclp.has_ibs) |
302 | allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); | 302 | allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); |
303 | if (sclp.has_kss) | ||
304 | allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS); | ||
303 | /* | 305 | /* |
304 | * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make | 306 | * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make |
305 | * all skey handling functions read/set the skey from the PGSTE | 307 | * all skey handling functions read/set the skey from the PGSTE |
@@ -380,6 +382,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
380 | case KVM_CAP_S390_SKEYS: | 382 | case KVM_CAP_S390_SKEYS: |
381 | case KVM_CAP_S390_IRQ_STATE: | 383 | case KVM_CAP_S390_IRQ_STATE: |
382 | case KVM_CAP_S390_USER_INSTR0: | 384 | case KVM_CAP_S390_USER_INSTR0: |
385 | case KVM_CAP_S390_AIS: | ||
383 | r = 1; | 386 | r = 1; |
384 | break; | 387 | break; |
385 | case KVM_CAP_S390_MEM_OP: | 388 | case KVM_CAP_S390_MEM_OP: |
@@ -405,6 +408,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
405 | case KVM_CAP_S390_RI: | 408 | case KVM_CAP_S390_RI: |
406 | r = test_facility(64); | 409 | r = test_facility(64); |
407 | break; | 410 | break; |
411 | case KVM_CAP_S390_GS: | ||
412 | r = test_facility(133); | ||
413 | break; | ||
408 | default: | 414 | default: |
409 | r = 0; | 415 | r = 0; |
410 | } | 416 | } |
@@ -541,6 +547,34 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) | |||
541 | VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", | 547 | VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", |
542 | r ? "(not available)" : "(success)"); | 548 | r ? "(not available)" : "(success)"); |
543 | break; | 549 | break; |
550 | case KVM_CAP_S390_AIS: | ||
551 | mutex_lock(&kvm->lock); | ||
552 | if (kvm->created_vcpus) { | ||
553 | r = -EBUSY; | ||
554 | } else { | ||
555 | set_kvm_facility(kvm->arch.model.fac_mask, 72); | ||
556 | set_kvm_facility(kvm->arch.model.fac_list, 72); | ||
557 | kvm->arch.float_int.ais_enabled = 1; | ||
558 | r = 0; | ||
559 | } | ||
560 | mutex_unlock(&kvm->lock); | ||
561 | VM_EVENT(kvm, 3, "ENABLE: AIS %s", | ||
562 | r ? "(not available)" : "(success)"); | ||
563 | break; | ||
564 | case KVM_CAP_S390_GS: | ||
565 | r = -EINVAL; | ||
566 | mutex_lock(&kvm->lock); | ||
567 | if (atomic_read(&kvm->online_vcpus)) { | ||
568 | r = -EBUSY; | ||
569 | } else if (test_facility(133)) { | ||
570 | set_kvm_facility(kvm->arch.model.fac_mask, 133); | ||
571 | set_kvm_facility(kvm->arch.model.fac_list, 133); | ||
572 | r = 0; | ||
573 | } | ||
574 | mutex_unlock(&kvm->lock); | ||
575 | VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", | ||
576 | r ? "(not available)" : "(success)"); | ||
577 | break; | ||
544 | case KVM_CAP_S390_USER_STSI: | 578 | case KVM_CAP_S390_USER_STSI: |
545 | VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); | 579 | VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); |
546 | kvm->arch.user_stsi = 1; | 580 | kvm->arch.user_stsi = 1; |
@@ -1498,6 +1532,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
1498 | 1532 | ||
1499 | kvm_s390_crypto_init(kvm); | 1533 | kvm_s390_crypto_init(kvm); |
1500 | 1534 | ||
1535 | mutex_init(&kvm->arch.float_int.ais_lock); | ||
1536 | kvm->arch.float_int.simm = 0; | ||
1537 | kvm->arch.float_int.nimm = 0; | ||
1538 | kvm->arch.float_int.ais_enabled = 0; | ||
1501 | spin_lock_init(&kvm->arch.float_int.lock); | 1539 | spin_lock_init(&kvm->arch.float_int.lock); |
1502 | for (i = 0; i < FIRQ_LIST_COUNT; i++) | 1540 | for (i = 0; i < FIRQ_LIST_COUNT; i++) |
1503 | INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); | 1541 | INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); |
@@ -1646,7 +1684,7 @@ static void sca_add_vcpu(struct kvm_vcpu *vcpu) | |||
1646 | sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; | 1684 | sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; |
1647 | vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); | 1685 | vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); |
1648 | vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; | 1686 | vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; |
1649 | vcpu->arch.sie_block->ecb2 |= 0x04U; | 1687 | vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; |
1650 | set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); | 1688 | set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); |
1651 | } else { | 1689 | } else { |
1652 | struct bsca_block *sca = vcpu->kvm->arch.sca; | 1690 | struct bsca_block *sca = vcpu->kvm->arch.sca; |
@@ -1700,7 +1738,7 @@ static int sca_switch_to_extended(struct kvm *kvm) | |||
1700 | kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { | 1738 | kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { |
1701 | vcpu->arch.sie_block->scaoh = scaoh; | 1739 | vcpu->arch.sie_block->scaoh = scaoh; |
1702 | vcpu->arch.sie_block->scaol = scaol; | 1740 | vcpu->arch.sie_block->scaol = scaol; |
1703 | vcpu->arch.sie_block->ecb2 |= 0x04U; | 1741 | vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; |
1704 | } | 1742 | } |
1705 | kvm->arch.sca = new_sca; | 1743 | kvm->arch.sca = new_sca; |
1706 | kvm->arch.use_esca = 1; | 1744 | kvm->arch.use_esca = 1; |
@@ -1749,6 +1787,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
1749 | kvm_s390_set_prefix(vcpu, 0); | 1787 | kvm_s390_set_prefix(vcpu, 0); |
1750 | if (test_kvm_facility(vcpu->kvm, 64)) | 1788 | if (test_kvm_facility(vcpu->kvm, 64)) |
1751 | vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; | 1789 | vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; |
1790 | if (test_kvm_facility(vcpu->kvm, 133)) | ||
1791 | vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; | ||
1752 | /* fprs can be synchronized via vrs, even if the guest has no vx. With | 1792 | /* fprs can be synchronized via vrs, even if the guest has no vx. With |
1753 | * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. | 1793 | * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. |
1754 | */ | 1794 | */ |
@@ -1939,8 +1979,8 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) | |||
1939 | if (!vcpu->arch.sie_block->cbrlo) | 1979 | if (!vcpu->arch.sie_block->cbrlo) |
1940 | return -ENOMEM; | 1980 | return -ENOMEM; |
1941 | 1981 | ||
1942 | vcpu->arch.sie_block->ecb2 |= 0x80; | 1982 | vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; |
1943 | vcpu->arch.sie_block->ecb2 &= ~0x08; | 1983 | vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI; |
1944 | return 0; | 1984 | return 0; |
1945 | } | 1985 | } |
1946 | 1986 | ||
@@ -1970,31 +2010,37 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
1970 | 2010 | ||
1971 | /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ | 2011 | /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ |
1972 | if (MACHINE_HAS_ESOP) | 2012 | if (MACHINE_HAS_ESOP) |
1973 | vcpu->arch.sie_block->ecb |= 0x02; | 2013 | vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; |
1974 | if (test_kvm_facility(vcpu->kvm, 9)) | 2014 | if (test_kvm_facility(vcpu->kvm, 9)) |
1975 | vcpu->arch.sie_block->ecb |= 0x04; | 2015 | vcpu->arch.sie_block->ecb |= ECB_SRSI; |
1976 | if (test_kvm_facility(vcpu->kvm, 73)) | 2016 | if (test_kvm_facility(vcpu->kvm, 73)) |
1977 | vcpu->arch.sie_block->ecb |= 0x10; | 2017 | vcpu->arch.sie_block->ecb |= ECB_TE; |
1978 | 2018 | ||
1979 | if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi) | 2019 | if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi) |
1980 | vcpu->arch.sie_block->ecb2 |= 0x08; | 2020 | vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; |
1981 | if (test_kvm_facility(vcpu->kvm, 130)) | 2021 | if (test_kvm_facility(vcpu->kvm, 130)) |
1982 | vcpu->arch.sie_block->ecb2 |= 0x20; | 2022 | vcpu->arch.sie_block->ecb2 |= ECB2_IEP; |
1983 | vcpu->arch.sie_block->eca = 0x1002000U; | 2023 | vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI; |
1984 | if (sclp.has_cei) | 2024 | if (sclp.has_cei) |
1985 | vcpu->arch.sie_block->eca |= 0x80000000U; | 2025 | vcpu->arch.sie_block->eca |= ECA_CEI; |
1986 | if (sclp.has_ib) | 2026 | if (sclp.has_ib) |
1987 | vcpu->arch.sie_block->eca |= 0x40000000U; | 2027 | vcpu->arch.sie_block->eca |= ECA_IB; |
1988 | if (sclp.has_siif) | 2028 | if (sclp.has_siif) |
1989 | vcpu->arch.sie_block->eca |= 1; | 2029 | vcpu->arch.sie_block->eca |= ECA_SII; |
1990 | if (sclp.has_sigpif) | 2030 | if (sclp.has_sigpif) |
1991 | vcpu->arch.sie_block->eca |= 0x10000000U; | 2031 | vcpu->arch.sie_block->eca |= ECA_SIGPI; |
1992 | if (test_kvm_facility(vcpu->kvm, 129)) { | 2032 | if (test_kvm_facility(vcpu->kvm, 129)) { |
1993 | vcpu->arch.sie_block->eca |= 0x00020000; | 2033 | vcpu->arch.sie_block->eca |= ECA_VX; |
1994 | vcpu->arch.sie_block->ecd |= 0x20000000; | 2034 | vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; |
1995 | } | 2035 | } |
2036 | vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) | ||
2037 | | SDNXC; | ||
1996 | vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; | 2038 | vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; |
1997 | vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; | 2039 | |
2040 | if (sclp.has_kss) | ||
2041 | atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags); | ||
2042 | else | ||
2043 | vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; | ||
1998 | 2044 | ||
1999 | if (vcpu->kvm->arch.use_cmma) { | 2045 | if (vcpu->kvm->arch.use_cmma) { |
2000 | rc = kvm_s390_vcpu_setup_cmma(vcpu); | 2046 | rc = kvm_s390_vcpu_setup_cmma(vcpu); |
@@ -2719,6 +2765,11 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
2719 | 2765 | ||
2720 | static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2766 | static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
2721 | { | 2767 | { |
2768 | struct runtime_instr_cb *riccb; | ||
2769 | struct gs_cb *gscb; | ||
2770 | |||
2771 | riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; | ||
2772 | gscb = (struct gs_cb *) &kvm_run->s.regs.gscb; | ||
2722 | vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; | 2773 | vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; |
2723 | vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; | 2774 | vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; |
2724 | if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) | 2775 | if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) |
@@ -2747,12 +2798,24 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2747 | * we should enable RI here instead of doing the lazy enablement. | 2798 | * we should enable RI here instead of doing the lazy enablement. |
2748 | */ | 2799 | */ |
2749 | if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && | 2800 | if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && |
2750 | test_kvm_facility(vcpu->kvm, 64)) { | 2801 | test_kvm_facility(vcpu->kvm, 64) && |
2751 | struct runtime_instr_cb *riccb = | 2802 | riccb->valid && |
2752 | (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; | 2803 | !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) { |
2753 | 2804 | VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)"); | |
2754 | if (riccb->valid) | 2805 | vcpu->arch.sie_block->ecb3 |= ECB3_RI; |
2755 | vcpu->arch.sie_block->ecb3 |= 0x01; | 2806 | } |
2807 | /* | ||
2808 | * If userspace sets the gscb (e.g. after migration) to non-zero, | ||
2809 | * we should enable GS here instead of doing the lazy enablement. | ||
2810 | */ | ||
2811 | if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) && | ||
2812 | test_kvm_facility(vcpu->kvm, 133) && | ||
2813 | gscb->gssm && | ||
2814 | !vcpu->arch.gs_enabled) { | ||
2815 | VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)"); | ||
2816 | vcpu->arch.sie_block->ecb |= ECB_GS; | ||
2817 | vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; | ||
2818 | vcpu->arch.gs_enabled = 1; | ||
2756 | } | 2819 | } |
2757 | save_access_regs(vcpu->arch.host_acrs); | 2820 | save_access_regs(vcpu->arch.host_acrs); |
2758 | restore_access_regs(vcpu->run->s.regs.acrs); | 2821 | restore_access_regs(vcpu->run->s.regs.acrs); |
@@ -2768,6 +2831,20 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2768 | if (test_fp_ctl(current->thread.fpu.fpc)) | 2831 | if (test_fp_ctl(current->thread.fpu.fpc)) |
2769 | /* User space provided an invalid FPC, let's clear it */ | 2832 | /* User space provided an invalid FPC, let's clear it */ |
2770 | current->thread.fpu.fpc = 0; | 2833 | current->thread.fpu.fpc = 0; |
2834 | if (MACHINE_HAS_GS) { | ||
2835 | preempt_disable(); | ||
2836 | __ctl_set_bit(2, 4); | ||
2837 | if (current->thread.gs_cb) { | ||
2838 | vcpu->arch.host_gscb = current->thread.gs_cb; | ||
2839 | save_gs_cb(vcpu->arch.host_gscb); | ||
2840 | } | ||
2841 | if (vcpu->arch.gs_enabled) { | ||
2842 | current->thread.gs_cb = (struct gs_cb *) | ||
2843 | &vcpu->run->s.regs.gscb; | ||
2844 | restore_gs_cb(current->thread.gs_cb); | ||
2845 | } | ||
2846 | preempt_enable(); | ||
2847 | } | ||
2771 | 2848 | ||
2772 | kvm_run->kvm_dirty_regs = 0; | 2849 | kvm_run->kvm_dirty_regs = 0; |
2773 | } | 2850 | } |
@@ -2794,6 +2871,18 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2794 | /* Restore will be done lazily at return */ | 2871 | /* Restore will be done lazily at return */ |
2795 | current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; | 2872 | current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; |
2796 | current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; | 2873 | current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; |
2874 | if (MACHINE_HAS_GS) { | ||
2875 | __ctl_set_bit(2, 4); | ||
2876 | if (vcpu->arch.gs_enabled) | ||
2877 | save_gs_cb(current->thread.gs_cb); | ||
2878 | preempt_disable(); | ||
2879 | current->thread.gs_cb = vcpu->arch.host_gscb; | ||
2880 | restore_gs_cb(vcpu->arch.host_gscb); | ||
2881 | preempt_enable(); | ||
2882 | if (!vcpu->arch.host_gscb) | ||
2883 | __ctl_clear_bit(2, 4); | ||
2884 | vcpu->arch.host_gscb = NULL; | ||
2885 | } | ||
2797 | 2886 | ||
2798 | } | 2887 | } |
2799 | 2888 | ||
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index af9fa91a0c91..55f5c8457d6d 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
@@ -25,7 +25,7 @@ | |||
25 | typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); | 25 | typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); |
26 | 26 | ||
27 | /* Transactional Memory Execution related macros */ | 27 | /* Transactional Memory Execution related macros */ |
28 | #define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & 0x10)) | 28 | #define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & ECB_TE)) |
29 | #define TDB_FORMAT1 1 | 29 | #define TDB_FORMAT1 1 |
30 | #define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1)) | 30 | #define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1)) |
31 | 31 | ||
@@ -246,6 +246,7 @@ static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu) | |||
246 | int is_valid_psw(psw_t *psw); | 246 | int is_valid_psw(psw_t *psw); |
247 | int kvm_s390_handle_aa(struct kvm_vcpu *vcpu); | 247 | int kvm_s390_handle_aa(struct kvm_vcpu *vcpu); |
248 | int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); | 248 | int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); |
249 | int kvm_s390_handle_e3(struct kvm_vcpu *vcpu); | ||
249 | int kvm_s390_handle_e5(struct kvm_vcpu *vcpu); | 250 | int kvm_s390_handle_e5(struct kvm_vcpu *vcpu); |
250 | int kvm_s390_handle_01(struct kvm_vcpu *vcpu); | 251 | int kvm_s390_handle_01(struct kvm_vcpu *vcpu); |
251 | int kvm_s390_handle_b9(struct kvm_vcpu *vcpu); | 252 | int kvm_s390_handle_b9(struct kvm_vcpu *vcpu); |
@@ -253,6 +254,7 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu); | |||
253 | int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu); | 254 | int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu); |
254 | int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu); | 255 | int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu); |
255 | int kvm_s390_handle_eb(struct kvm_vcpu *vcpu); | 256 | int kvm_s390_handle_eb(struct kvm_vcpu *vcpu); |
257 | int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu); | ||
256 | 258 | ||
257 | /* implemented in vsie.c */ | 259 | /* implemented in vsie.c */ |
258 | int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu); | 260 | int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu); |
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 64b6a309f2c4..c03106c428cf 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c | |||
@@ -37,7 +37,8 @@ | |||
37 | static int handle_ri(struct kvm_vcpu *vcpu) | 37 | static int handle_ri(struct kvm_vcpu *vcpu) |
38 | { | 38 | { |
39 | if (test_kvm_facility(vcpu->kvm, 64)) { | 39 | if (test_kvm_facility(vcpu->kvm, 64)) { |
40 | vcpu->arch.sie_block->ecb3 |= 0x01; | 40 | VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (lazy)"); |
41 | vcpu->arch.sie_block->ecb3 |= ECB3_RI; | ||
41 | kvm_s390_retry_instr(vcpu); | 42 | kvm_s390_retry_instr(vcpu); |
42 | return 0; | 43 | return 0; |
43 | } else | 44 | } else |
@@ -52,6 +53,33 @@ int kvm_s390_handle_aa(struct kvm_vcpu *vcpu) | |||
52 | return -EOPNOTSUPP; | 53 | return -EOPNOTSUPP; |
53 | } | 54 | } |
54 | 55 | ||
56 | static int handle_gs(struct kvm_vcpu *vcpu) | ||
57 | { | ||
58 | if (test_kvm_facility(vcpu->kvm, 133)) { | ||
59 | VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (lazy)"); | ||
60 | preempt_disable(); | ||
61 | __ctl_set_bit(2, 4); | ||
62 | current->thread.gs_cb = (struct gs_cb *)&vcpu->run->s.regs.gscb; | ||
63 | restore_gs_cb(current->thread.gs_cb); | ||
64 | preempt_enable(); | ||
65 | vcpu->arch.sie_block->ecb |= ECB_GS; | ||
66 | vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; | ||
67 | vcpu->arch.gs_enabled = 1; | ||
68 | kvm_s390_retry_instr(vcpu); | ||
69 | return 0; | ||
70 | } else | ||
71 | return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); | ||
72 | } | ||
73 | |||
74 | int kvm_s390_handle_e3(struct kvm_vcpu *vcpu) | ||
75 | { | ||
76 | int code = vcpu->arch.sie_block->ipb & 0xff; | ||
77 | |||
78 | if (code == 0x49 || code == 0x4d) | ||
79 | return handle_gs(vcpu); | ||
80 | else | ||
81 | return -EOPNOTSUPP; | ||
82 | } | ||
55 | /* Handle SCK (SET CLOCK) interception */ | 83 | /* Handle SCK (SET CLOCK) interception */ |
56 | static int handle_set_clock(struct kvm_vcpu *vcpu) | 84 | static int handle_set_clock(struct kvm_vcpu *vcpu) |
57 | { | 85 | { |
@@ -170,18 +198,25 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) | |||
170 | return 0; | 198 | return 0; |
171 | } | 199 | } |
172 | 200 | ||
173 | static int __skey_check_enable(struct kvm_vcpu *vcpu) | 201 | int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu) |
174 | { | 202 | { |
175 | int rc = 0; | 203 | int rc = 0; |
204 | struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block; | ||
176 | 205 | ||
177 | trace_kvm_s390_skey_related_inst(vcpu); | 206 | trace_kvm_s390_skey_related_inst(vcpu); |
178 | if (!(vcpu->arch.sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE))) | 207 | if (!(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) && |
208 | !(atomic_read(&sie_block->cpuflags) & CPUSTAT_KSS)) | ||
179 | return rc; | 209 | return rc; |
180 | 210 | ||
181 | rc = s390_enable_skey(); | 211 | rc = s390_enable_skey(); |
182 | VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc); | 212 | VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc); |
183 | if (!rc) | 213 | if (!rc) { |
184 | vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); | 214 | if (atomic_read(&sie_block->cpuflags) & CPUSTAT_KSS) |
215 | atomic_andnot(CPUSTAT_KSS, &sie_block->cpuflags); | ||
216 | else | ||
217 | sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | | ||
218 | ICTL_RRBE); | ||
219 | } | ||
185 | return rc; | 220 | return rc; |
186 | } | 221 | } |
187 | 222 | ||
@@ -190,7 +225,7 @@ static int try_handle_skey(struct kvm_vcpu *vcpu) | |||
190 | int rc; | 225 | int rc; |
191 | 226 | ||
192 | vcpu->stat.instruction_storage_key++; | 227 | vcpu->stat.instruction_storage_key++; |
193 | rc = __skey_check_enable(vcpu); | 228 | rc = kvm_s390_skey_check_enable(vcpu); |
194 | if (rc) | 229 | if (rc) |
195 | return rc; | 230 | return rc; |
196 | if (sclp.has_skey) { | 231 | if (sclp.has_skey) { |
@@ -759,6 +794,7 @@ static const intercept_handler_t b2_handlers[256] = { | |||
759 | [0x3b] = handle_io_inst, | 794 | [0x3b] = handle_io_inst, |
760 | [0x3c] = handle_io_inst, | 795 | [0x3c] = handle_io_inst, |
761 | [0x50] = handle_ipte_interlock, | 796 | [0x50] = handle_ipte_interlock, |
797 | [0x56] = handle_sthyi, | ||
762 | [0x5f] = handle_io_inst, | 798 | [0x5f] = handle_io_inst, |
763 | [0x74] = handle_io_inst, | 799 | [0x74] = handle_io_inst, |
764 | [0x76] = handle_io_inst, | 800 | [0x76] = handle_io_inst, |
@@ -887,7 +923,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) | |||
887 | } | 923 | } |
888 | 924 | ||
889 | if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) { | 925 | if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) { |
890 | int rc = __skey_check_enable(vcpu); | 926 | int rc = kvm_s390_skey_check_enable(vcpu); |
891 | 927 | ||
892 | if (rc) | 928 | if (rc) |
893 | return rc; | 929 | return rc; |
diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c index 05c98bb853cf..926b5244263e 100644 --- a/arch/s390/kvm/sthyi.c +++ b/arch/s390/kvm/sthyi.c | |||
@@ -404,6 +404,9 @@ int handle_sthyi(struct kvm_vcpu *vcpu) | |||
404 | u64 code, addr, cc = 0; | 404 | u64 code, addr, cc = 0; |
405 | struct sthyi_sctns *sctns = NULL; | 405 | struct sthyi_sctns *sctns = NULL; |
406 | 406 | ||
407 | if (!test_kvm_facility(vcpu->kvm, 74)) | ||
408 | return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); | ||
409 | |||
407 | /* | 410 | /* |
408 | * STHYI requires extensive locking in the higher hypervisors | 411 | * STHYI requires extensive locking in the higher hypervisors |
409 | * and is very computational/memory expensive. Therefore we | 412 | * and is very computational/memory expensive. Therefore we |
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h index 396485bca191..78b7e847984a 100644 --- a/arch/s390/kvm/trace-s390.h +++ b/arch/s390/kvm/trace-s390.h | |||
@@ -280,6 +280,58 @@ TRACE_EVENT(kvm_s390_enable_disable_ibs, | |||
280 | __entry->state ? "enabling" : "disabling", __entry->id) | 280 | __entry->state ? "enabling" : "disabling", __entry->id) |
281 | ); | 281 | ); |
282 | 282 | ||
283 | /* | ||
284 | * Trace point for modifying ais mode for a given isc. | ||
285 | */ | ||
286 | TRACE_EVENT(kvm_s390_modify_ais_mode, | ||
287 | TP_PROTO(__u8 isc, __u16 from, __u16 to), | ||
288 | TP_ARGS(isc, from, to), | ||
289 | |||
290 | TP_STRUCT__entry( | ||
291 | __field(__u8, isc) | ||
292 | __field(__u16, from) | ||
293 | __field(__u16, to) | ||
294 | ), | ||
295 | |||
296 | TP_fast_assign( | ||
297 | __entry->isc = isc; | ||
298 | __entry->from = from; | ||
299 | __entry->to = to; | ||
300 | ), | ||
301 | |||
302 | TP_printk("for isc %x, modifying interruption mode from %s to %s", | ||
303 | __entry->isc, | ||
304 | (__entry->from == KVM_S390_AIS_MODE_ALL) ? | ||
305 | "ALL-Interruptions Mode" : | ||
306 | (__entry->from == KVM_S390_AIS_MODE_SINGLE) ? | ||
307 | "Single-Interruption Mode" : "No-Interruptions Mode", | ||
308 | (__entry->to == KVM_S390_AIS_MODE_ALL) ? | ||
309 | "ALL-Interruptions Mode" : | ||
310 | (__entry->to == KVM_S390_AIS_MODE_SINGLE) ? | ||
311 | "Single-Interruption Mode" : "No-Interruptions Mode") | ||
312 | ); | ||
313 | |||
314 | /* | ||
315 | * Trace point for suppressed adapter I/O interrupt. | ||
316 | */ | ||
317 | TRACE_EVENT(kvm_s390_airq_suppressed, | ||
318 | TP_PROTO(__u32 id, __u8 isc), | ||
319 | TP_ARGS(id, isc), | ||
320 | |||
321 | TP_STRUCT__entry( | ||
322 | __field(__u32, id) | ||
323 | __field(__u8, isc) | ||
324 | ), | ||
325 | |||
326 | TP_fast_assign( | ||
327 | __entry->id = id; | ||
328 | __entry->isc = isc; | ||
329 | ), | ||
330 | |||
331 | TP_printk("adapter I/O interrupt suppressed (id:%x isc:%x)", | ||
332 | __entry->id, __entry->isc) | ||
333 | ); | ||
334 | |||
283 | 335 | ||
284 | #endif /* _TRACE_KVMS390_H */ | 336 | #endif /* _TRACE_KVMS390_H */ |
285 | 337 | ||
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 5491be39776b..4719ecb9ab42 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c | |||
@@ -117,6 +117,8 @@ static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
117 | newflags |= cpuflags & CPUSTAT_SM; | 117 | newflags |= cpuflags & CPUSTAT_SM; |
118 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IBS)) | 118 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IBS)) |
119 | newflags |= cpuflags & CPUSTAT_IBS; | 119 | newflags |= cpuflags & CPUSTAT_IBS; |
120 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_KSS)) | ||
121 | newflags |= cpuflags & CPUSTAT_KSS; | ||
120 | 122 | ||
121 | atomic_set(&scb_s->cpuflags, newflags); | 123 | atomic_set(&scb_s->cpuflags, newflags); |
122 | return 0; | 124 | return 0; |
@@ -249,7 +251,7 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
249 | { | 251 | { |
250 | struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; | 252 | struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; |
251 | struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; | 253 | struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; |
252 | bool had_tx = scb_s->ecb & 0x10U; | 254 | bool had_tx = scb_s->ecb & ECB_TE; |
253 | unsigned long new_mso = 0; | 255 | unsigned long new_mso = 0; |
254 | int rc; | 256 | int rc; |
255 | 257 | ||
@@ -289,7 +291,9 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
289 | * bits. Therefore we cannot provide interpretation and would later | 291 | * bits. Therefore we cannot provide interpretation and would later |
290 | * have to provide own emulation handlers. | 292 | * have to provide own emulation handlers. |
291 | */ | 293 | */ |
292 | scb_s->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; | 294 | if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_KSS)) |
295 | scb_s->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; | ||
296 | |||
293 | scb_s->icpua = scb_o->icpua; | 297 | scb_s->icpua = scb_o->icpua; |
294 | 298 | ||
295 | if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_SM)) | 299 | if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_SM)) |
@@ -307,34 +311,39 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
307 | scb_s->ihcpu = scb_o->ihcpu; | 311 | scb_s->ihcpu = scb_o->ihcpu; |
308 | 312 | ||
309 | /* MVPG and Protection Exception Interpretation are always available */ | 313 | /* MVPG and Protection Exception Interpretation are always available */ |
310 | scb_s->eca |= scb_o->eca & 0x01002000U; | 314 | scb_s->eca |= scb_o->eca & (ECA_MVPGI | ECA_PROTEXCI); |
311 | /* Host-protection-interruption introduced with ESOP */ | 315 | /* Host-protection-interruption introduced with ESOP */ |
312 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP)) | 316 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP)) |
313 | scb_s->ecb |= scb_o->ecb & 0x02U; | 317 | scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT; |
314 | /* transactional execution */ | 318 | /* transactional execution */ |
315 | if (test_kvm_facility(vcpu->kvm, 73)) { | 319 | if (test_kvm_facility(vcpu->kvm, 73)) { |
316 | /* remap the prefix is tx is toggled on */ | 320 | /* remap the prefix is tx is toggled on */ |
317 | if ((scb_o->ecb & 0x10U) && !had_tx) | 321 | if ((scb_o->ecb & ECB_TE) && !had_tx) |
318 | prefix_unmapped(vsie_page); | 322 | prefix_unmapped(vsie_page); |
319 | scb_s->ecb |= scb_o->ecb & 0x10U; | 323 | scb_s->ecb |= scb_o->ecb & ECB_TE; |
320 | } | 324 | } |
321 | /* SIMD */ | 325 | /* SIMD */ |
322 | if (test_kvm_facility(vcpu->kvm, 129)) { | 326 | if (test_kvm_facility(vcpu->kvm, 129)) { |
323 | scb_s->eca |= scb_o->eca & 0x00020000U; | 327 | scb_s->eca |= scb_o->eca & ECA_VX; |
324 | scb_s->ecd |= scb_o->ecd & 0x20000000U; | 328 | scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT; |
325 | } | 329 | } |
326 | /* Run-time-Instrumentation */ | 330 | /* Run-time-Instrumentation */ |
327 | if (test_kvm_facility(vcpu->kvm, 64)) | 331 | if (test_kvm_facility(vcpu->kvm, 64)) |
328 | scb_s->ecb3 |= scb_o->ecb3 & 0x01U; | 332 | scb_s->ecb3 |= scb_o->ecb3 & ECB3_RI; |
329 | /* Instruction Execution Prevention */ | 333 | /* Instruction Execution Prevention */ |
330 | if (test_kvm_facility(vcpu->kvm, 130)) | 334 | if (test_kvm_facility(vcpu->kvm, 130)) |
331 | scb_s->ecb2 |= scb_o->ecb2 & 0x20U; | 335 | scb_s->ecb2 |= scb_o->ecb2 & ECB2_IEP; |
336 | /* Guarded Storage */ | ||
337 | if (test_kvm_facility(vcpu->kvm, 133)) { | ||
338 | scb_s->ecb |= scb_o->ecb & ECB_GS; | ||
339 | scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT; | ||
340 | } | ||
332 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIIF)) | 341 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIIF)) |
333 | scb_s->eca |= scb_o->eca & 0x00000001U; | 342 | scb_s->eca |= scb_o->eca & ECA_SII; |
334 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IB)) | 343 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IB)) |
335 | scb_s->eca |= scb_o->eca & 0x40000000U; | 344 | scb_s->eca |= scb_o->eca & ECA_IB; |
336 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI)) | 345 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI)) |
337 | scb_s->eca |= scb_o->eca & 0x80000000U; | 346 | scb_s->eca |= scb_o->eca & ECA_CEI; |
338 | 347 | ||
339 | prepare_ibc(vcpu, vsie_page); | 348 | prepare_ibc(vcpu, vsie_page); |
340 | rc = shadow_crycb(vcpu, vsie_page); | 349 | rc = shadow_crycb(vcpu, vsie_page); |
@@ -406,7 +415,7 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
406 | prefix += scb_s->mso; | 415 | prefix += scb_s->mso; |
407 | 416 | ||
408 | rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix); | 417 | rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix); |
409 | if (!rc && (scb_s->ecb & 0x10U)) | 418 | if (!rc && (scb_s->ecb & ECB_TE)) |
410 | rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, | 419 | rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, |
411 | prefix + PAGE_SIZE); | 420 | prefix + PAGE_SIZE); |
412 | /* | 421 | /* |
@@ -496,6 +505,13 @@ static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
496 | unpin_guest_page(vcpu->kvm, gpa, hpa); | 505 | unpin_guest_page(vcpu->kvm, gpa, hpa); |
497 | scb_s->riccbd = 0; | 506 | scb_s->riccbd = 0; |
498 | } | 507 | } |
508 | |||
509 | hpa = scb_s->sdnxo; | ||
510 | if (hpa) { | ||
511 | gpa = scb_o->sdnxo; | ||
512 | unpin_guest_page(vcpu->kvm, gpa, hpa); | ||
513 | scb_s->sdnxo = 0; | ||
514 | } | ||
499 | } | 515 | } |
500 | 516 | ||
501 | /* | 517 | /* |
@@ -543,7 +559,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
543 | } | 559 | } |
544 | 560 | ||
545 | gpa = scb_o->itdba & ~0xffUL; | 561 | gpa = scb_o->itdba & ~0xffUL; |
546 | if (gpa && (scb_s->ecb & 0x10U)) { | 562 | if (gpa && (scb_s->ecb & ECB_TE)) { |
547 | if (!(gpa & ~0x1fffU)) { | 563 | if (!(gpa & ~0x1fffU)) { |
548 | rc = set_validity_icpt(scb_s, 0x0080U); | 564 | rc = set_validity_icpt(scb_s, 0x0080U); |
549 | goto unpin; | 565 | goto unpin; |
@@ -558,8 +574,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
558 | } | 574 | } |
559 | 575 | ||
560 | gpa = scb_o->gvrd & ~0x1ffUL; | 576 | gpa = scb_o->gvrd & ~0x1ffUL; |
561 | if (gpa && (scb_s->eca & 0x00020000U) && | 577 | if (gpa && (scb_s->eca & ECA_VX) && !(scb_s->ecd & ECD_HOSTREGMGMT)) { |
562 | !(scb_s->ecd & 0x20000000U)) { | ||
563 | if (!(gpa & ~0x1fffUL)) { | 578 | if (!(gpa & ~0x1fffUL)) { |
564 | rc = set_validity_icpt(scb_s, 0x1310U); | 579 | rc = set_validity_icpt(scb_s, 0x1310U); |
565 | goto unpin; | 580 | goto unpin; |
@@ -577,7 +592,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
577 | } | 592 | } |
578 | 593 | ||
579 | gpa = scb_o->riccbd & ~0x3fUL; | 594 | gpa = scb_o->riccbd & ~0x3fUL; |
580 | if (gpa && (scb_s->ecb3 & 0x01U)) { | 595 | if (gpa && (scb_s->ecb3 & ECB3_RI)) { |
581 | if (!(gpa & ~0x1fffUL)) { | 596 | if (!(gpa & ~0x1fffUL)) { |
582 | rc = set_validity_icpt(scb_s, 0x0043U); | 597 | rc = set_validity_icpt(scb_s, 0x0043U); |
583 | goto unpin; | 598 | goto unpin; |
@@ -591,6 +606,33 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
591 | goto unpin; | 606 | goto unpin; |
592 | scb_s->riccbd = hpa; | 607 | scb_s->riccbd = hpa; |
593 | } | 608 | } |
609 | if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) { | ||
610 | unsigned long sdnxc; | ||
611 | |||
612 | gpa = scb_o->sdnxo & ~0xfUL; | ||
613 | sdnxc = scb_o->sdnxo & 0xfUL; | ||
614 | if (!gpa || !(gpa & ~0x1fffUL)) { | ||
615 | rc = set_validity_icpt(scb_s, 0x10b0U); | ||
616 | goto unpin; | ||
617 | } | ||
618 | if (sdnxc < 6 || sdnxc > 12) { | ||
619 | rc = set_validity_icpt(scb_s, 0x10b1U); | ||
620 | goto unpin; | ||
621 | } | ||
622 | if (gpa & ((1 << sdnxc) - 1)) { | ||
623 | rc = set_validity_icpt(scb_s, 0x10b2U); | ||
624 | goto unpin; | ||
625 | } | ||
626 | /* Due to alignment rules (checked above) this cannot | ||
627 | * cross page boundaries | ||
628 | */ | ||
629 | rc = pin_guest_page(vcpu->kvm, gpa, &hpa); | ||
630 | if (rc == -EINVAL) | ||
631 | rc = set_validity_icpt(scb_s, 0x10b0U); | ||
632 | if (rc) | ||
633 | goto unpin; | ||
634 | scb_s->sdnxo = hpa | sdnxc; | ||
635 | } | ||
594 | return 0; | 636 | return 0; |
595 | unpin: | 637 | unpin: |
596 | unpin_blocks(vcpu, vsie_page); | 638 | unpin_blocks(vcpu, vsie_page); |
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index de5d572225f3..cd1fa97776c3 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h | |||
@@ -302,8 +302,8 @@ extern int ignore_sigio_fd(int fd); | |||
302 | extern void maybe_sigio_broken(int fd, int read); | 302 | extern void maybe_sigio_broken(int fd, int read); |
303 | extern void sigio_broken(int fd, int read); | 303 | extern void sigio_broken(int fd, int read); |
304 | 304 | ||
305 | /* sys-x86_64/prctl.c */ | 305 | /* prctl.c */ |
306 | extern int os_arch_prctl(int pid, int code, unsigned long *addr); | 306 | extern int os_arch_prctl(int pid, int option, unsigned long *arg2); |
307 | 307 | ||
308 | /* tty.c */ | 308 | /* tty.c */ |
309 | extern int get_pty(void); | 309 | extern int get_pty(void); |
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 9ba050fe47f3..0af59fa789ea 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl | |||
@@ -390,3 +390,4 @@ | |||
390 | 381 i386 pkey_alloc sys_pkey_alloc | 390 | 381 i386 pkey_alloc sys_pkey_alloc |
391 | 382 i386 pkey_free sys_pkey_free | 391 | 382 i386 pkey_free sys_pkey_free |
392 | 383 i386 statx sys_statx | 392 | 383 i386 statx sys_statx |
393 | 384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl | ||
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b04bb6dfed7f..0fe00446f9ca 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h | |||
@@ -187,6 +187,7 @@ | |||
187 | * Reuse free bits when adding new feature flags! | 187 | * Reuse free bits when adding new feature flags! |
188 | */ | 188 | */ |
189 | #define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */ | 189 | #define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */ |
190 | #define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ | ||
190 | #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ | 191 | #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ |
191 | #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ | 192 | #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ |
192 | #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ | 193 | #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 74ef58c8ff53..2cc5ec7cc6f5 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -43,8 +43,6 @@ | |||
43 | #define KVM_PRIVATE_MEM_SLOTS 3 | 43 | #define KVM_PRIVATE_MEM_SLOTS 3 |
44 | #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) | 44 | #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) |
45 | 45 | ||
46 | #define KVM_PIO_PAGE_OFFSET 1 | ||
47 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 | ||
48 | #define KVM_HALT_POLL_NS_DEFAULT 400000 | 46 | #define KVM_HALT_POLL_NS_DEFAULT 400000 |
49 | 47 | ||
50 | #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS | 48 | #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS |
@@ -343,9 +341,10 @@ struct kvm_mmu { | |||
343 | void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 341 | void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
344 | u64 *spte, const void *pte); | 342 | u64 *spte, const void *pte); |
345 | hpa_t root_hpa; | 343 | hpa_t root_hpa; |
346 | int root_level; | ||
347 | int shadow_root_level; | ||
348 | union kvm_mmu_page_role base_role; | 344 | union kvm_mmu_page_role base_role; |
345 | u8 root_level; | ||
346 | u8 shadow_root_level; | ||
347 | u8 ept_ad; | ||
349 | bool direct_map; | 348 | bool direct_map; |
350 | 349 | ||
351 | /* | 350 | /* |
@@ -727,6 +726,7 @@ struct kvm_hv { | |||
727 | 726 | ||
728 | enum kvm_irqchip_mode { | 727 | enum kvm_irqchip_mode { |
729 | KVM_IRQCHIP_NONE, | 728 | KVM_IRQCHIP_NONE, |
729 | KVM_IRQCHIP_INIT_IN_PROGRESS, /* temporarily set during creation */ | ||
730 | KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */ | 730 | KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */ |
731 | KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ | 731 | KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ |
732 | }; | 732 | }; |
diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h index d74747b031ec..c4eda791f877 100644 --- a/arch/x86/include/asm/kvm_page_track.h +++ b/arch/x86/include/asm/kvm_page_track.h | |||
@@ -46,6 +46,7 @@ struct kvm_page_track_notifier_node { | |||
46 | }; | 46 | }; |
47 | 47 | ||
48 | void kvm_page_track_init(struct kvm *kvm); | 48 | void kvm_page_track_init(struct kvm *kvm); |
49 | void kvm_page_track_cleanup(struct kvm *kvm); | ||
49 | 50 | ||
50 | void kvm_page_track_free_memslot(struct kvm_memory_slot *free, | 51 | void kvm_page_track_free_memslot(struct kvm_memory_slot *free, |
51 | struct kvm_memory_slot *dont); | 52 | struct kvm_memory_slot *dont); |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index d8b5f8ab8ef9..673f9ac50f6d 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -45,6 +45,8 @@ | |||
45 | #define MSR_IA32_PERFCTR1 0x000000c2 | 45 | #define MSR_IA32_PERFCTR1 0x000000c2 |
46 | #define MSR_FSB_FREQ 0x000000cd | 46 | #define MSR_FSB_FREQ 0x000000cd |
47 | #define MSR_PLATFORM_INFO 0x000000ce | 47 | #define MSR_PLATFORM_INFO 0x000000ce |
48 | #define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31 | ||
49 | #define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT) | ||
48 | 50 | ||
49 | #define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 | 51 | #define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 |
50 | #define NHM_C3_AUTO_DEMOTE (1UL << 25) | 52 | #define NHM_C3_AUTO_DEMOTE (1UL << 25) |
@@ -127,6 +129,7 @@ | |||
127 | 129 | ||
128 | /* DEBUGCTLMSR bits (others vary by model): */ | 130 | /* DEBUGCTLMSR bits (others vary by model): */ |
129 | #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ | 131 | #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ |
132 | #define DEBUGCTLMSR_BTF_SHIFT 1 | ||
130 | #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ | 133 | #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ |
131 | #define DEBUGCTLMSR_TR (1UL << 6) | 134 | #define DEBUGCTLMSR_TR (1UL << 6) |
132 | #define DEBUGCTLMSR_BTS (1UL << 7) | 135 | #define DEBUGCTLMSR_BTS (1UL << 7) |
@@ -552,10 +555,12 @@ | |||
552 | #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39 | 555 | #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39 |
553 | #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT) | 556 | #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT) |
554 | 557 | ||
555 | /* MISC_FEATURE_ENABLES non-architectural features */ | 558 | /* MISC_FEATURES_ENABLES non-architectural features */ |
556 | #define MSR_MISC_FEATURE_ENABLES 0x00000140 | 559 | #define MSR_MISC_FEATURES_ENABLES 0x00000140 |
557 | 560 | ||
558 | #define MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT 1 | 561 | #define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0 |
562 | #define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT) | ||
563 | #define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1 | ||
559 | 564 | ||
560 | #define MSR_IA32_TSC_DEADLINE 0x000006E0 | 565 | #define MSR_IA32_TSC_DEADLINE 0x000006E0 |
561 | 566 | ||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index f385eca5407a..a80c1b3997ed 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -884,6 +884,8 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, | |||
884 | extern int get_tsc_mode(unsigned long adr); | 884 | extern int get_tsc_mode(unsigned long adr); |
885 | extern int set_tsc_mode(unsigned int val); | 885 | extern int set_tsc_mode(unsigned int val); |
886 | 886 | ||
887 | DECLARE_PER_CPU(u64, msr_misc_features_shadow); | ||
888 | |||
887 | /* Register/unregister a process' MPX related resource */ | 889 | /* Register/unregister a process' MPX related resource */ |
888 | #define MPX_ENABLE_MANAGEMENT() mpx_enable_management() | 890 | #define MPX_ENABLE_MANAGEMENT() mpx_enable_management() |
889 | #define MPX_DISABLE_MANAGEMENT() mpx_disable_management() | 891 | #define MPX_DISABLE_MANAGEMENT() mpx_disable_management() |
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 9b9b30b19441..8d3964fc5f91 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h | |||
@@ -9,6 +9,7 @@ void syscall_init(void); | |||
9 | 9 | ||
10 | #ifdef CONFIG_X86_64 | 10 | #ifdef CONFIG_X86_64 |
11 | void entry_SYSCALL_64(void); | 11 | void entry_SYSCALL_64(void); |
12 | long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2); | ||
12 | #endif | 13 | #endif |
13 | 14 | ||
14 | #ifdef CONFIG_X86_32 | 15 | #ifdef CONFIG_X86_32 |
@@ -30,6 +31,7 @@ void x86_report_nx(void); | |||
30 | 31 | ||
31 | extern int reboot_force; | 32 | extern int reboot_force; |
32 | 33 | ||
33 | long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); | 34 | long do_arch_prctl_common(struct task_struct *task, int option, |
35 | unsigned long cpuid_enabled); | ||
34 | 36 | ||
35 | #endif /* _ASM_X86_PROTO_H */ | 37 | #endif /* _ASM_X86_PROTO_H */ |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index ad6f5eb07a95..9fc44b95f7cb 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -87,6 +87,7 @@ struct thread_info { | |||
87 | #define TIF_SECCOMP 8 /* secure computing */ | 87 | #define TIF_SECCOMP 8 /* secure computing */ |
88 | #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ | 88 | #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ |
89 | #define TIF_UPROBE 12 /* breakpointed or singlestepping */ | 89 | #define TIF_UPROBE 12 /* breakpointed or singlestepping */ |
90 | #define TIF_NOCPUID 15 /* CPUID is not accessible in userland */ | ||
90 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ | 91 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ |
91 | #define TIF_IA32 17 /* IA32 compatibility process */ | 92 | #define TIF_IA32 17 /* IA32 compatibility process */ |
92 | #define TIF_NOHZ 19 /* in adaptive nohz mode */ | 93 | #define TIF_NOHZ 19 /* in adaptive nohz mode */ |
@@ -110,6 +111,7 @@ struct thread_info { | |||
110 | #define _TIF_SECCOMP (1 << TIF_SECCOMP) | 111 | #define _TIF_SECCOMP (1 << TIF_SECCOMP) |
111 | #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) | 112 | #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) |
112 | #define _TIF_UPROBE (1 << TIF_UPROBE) | 113 | #define _TIF_UPROBE (1 << TIF_UPROBE) |
114 | #define _TIF_NOCPUID (1 << TIF_NOCPUID) | ||
113 | #define _TIF_NOTSC (1 << TIF_NOTSC) | 115 | #define _TIF_NOTSC (1 << TIF_NOTSC) |
114 | #define _TIF_IA32 (1 << TIF_IA32) | 116 | #define _TIF_IA32 (1 << TIF_IA32) |
115 | #define _TIF_NOHZ (1 << TIF_NOHZ) | 117 | #define _TIF_NOHZ (1 << TIF_NOHZ) |
@@ -138,7 +140,7 @@ struct thread_info { | |||
138 | 140 | ||
139 | /* flags to check in __switch_to() */ | 141 | /* flags to check in __switch_to() */ |
140 | #define _TIF_WORK_CTXSW \ | 142 | #define _TIF_WORK_CTXSW \ |
141 | (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) | 143 | (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP) |
142 | 144 | ||
143 | #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) | 145 | #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) |
144 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) | 146 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) |
@@ -239,6 +241,8 @@ static inline int arch_within_stack_frames(const void * const stack, | |||
239 | extern void arch_task_cache_init(void); | 241 | extern void arch_task_cache_init(void); |
240 | extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); | 242 | extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); |
241 | extern void arch_release_task_struct(struct task_struct *tsk); | 243 | extern void arch_release_task_struct(struct task_struct *tsk); |
244 | extern void arch_setup_new_exec(void); | ||
245 | #define arch_setup_new_exec arch_setup_new_exec | ||
242 | #endif /* !__ASSEMBLY__ */ | 246 | #endif /* !__ASSEMBLY__ */ |
243 | 247 | ||
244 | #endif /* _ASM_X86_THREAD_INFO_H */ | 248 | #endif /* _ASM_X86_THREAD_INFO_H */ |
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index fc5abff9b7fd..75d002bdb3f3 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h | |||
@@ -110,6 +110,16 @@ static inline void cr4_clear_bits(unsigned long mask) | |||
110 | } | 110 | } |
111 | } | 111 | } |
112 | 112 | ||
113 | static inline void cr4_toggle_bits(unsigned long mask) | ||
114 | { | ||
115 | unsigned long cr4; | ||
116 | |||
117 | cr4 = this_cpu_read(cpu_tlbstate.cr4); | ||
118 | cr4 ^= mask; | ||
119 | this_cpu_write(cpu_tlbstate.cr4, cr4); | ||
120 | __write_cr4(cr4); | ||
121 | } | ||
122 | |||
113 | /* Read the CR4 shadow. */ | 123 | /* Read the CR4 shadow. */ |
114 | static inline unsigned long cr4_read_shadow(void) | 124 | static inline unsigned long cr4_read_shadow(void) |
115 | { | 125 | { |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index cc54b7026567..35cd06f636ab 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -70,8 +70,10 @@ | |||
70 | #define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 | 70 | #define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 |
71 | #define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 | 71 | #define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 |
72 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 | 72 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 |
73 | #define SECONDARY_EXEC_RDRAND 0x00000800 | ||
73 | #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 | 74 | #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 |
74 | #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 | 75 | #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 |
76 | #define SECONDARY_EXEC_RDSEED 0x00010000 | ||
75 | #define SECONDARY_EXEC_ENABLE_PML 0x00020000 | 77 | #define SECONDARY_EXEC_ENABLE_PML 0x00020000 |
76 | #define SECONDARY_EXEC_XSAVES 0x00100000 | 78 | #define SECONDARY_EXEC_XSAVES 0x00100000 |
77 | #define SECONDARY_EXEC_TSC_SCALING 0x02000000 | 79 | #define SECONDARY_EXEC_TSC_SCALING 0x02000000 |
@@ -516,12 +518,14 @@ struct vmx_msr_entry { | |||
516 | #define EPT_VIOLATION_READABLE_BIT 3 | 518 | #define EPT_VIOLATION_READABLE_BIT 3 |
517 | #define EPT_VIOLATION_WRITABLE_BIT 4 | 519 | #define EPT_VIOLATION_WRITABLE_BIT 4 |
518 | #define EPT_VIOLATION_EXECUTABLE_BIT 5 | 520 | #define EPT_VIOLATION_EXECUTABLE_BIT 5 |
521 | #define EPT_VIOLATION_GVA_TRANSLATED_BIT 8 | ||
519 | #define EPT_VIOLATION_ACC_READ (1 << EPT_VIOLATION_ACC_READ_BIT) | 522 | #define EPT_VIOLATION_ACC_READ (1 << EPT_VIOLATION_ACC_READ_BIT) |
520 | #define EPT_VIOLATION_ACC_WRITE (1 << EPT_VIOLATION_ACC_WRITE_BIT) | 523 | #define EPT_VIOLATION_ACC_WRITE (1 << EPT_VIOLATION_ACC_WRITE_BIT) |
521 | #define EPT_VIOLATION_ACC_INSTR (1 << EPT_VIOLATION_ACC_INSTR_BIT) | 524 | #define EPT_VIOLATION_ACC_INSTR (1 << EPT_VIOLATION_ACC_INSTR_BIT) |
522 | #define EPT_VIOLATION_READABLE (1 << EPT_VIOLATION_READABLE_BIT) | 525 | #define EPT_VIOLATION_READABLE (1 << EPT_VIOLATION_READABLE_BIT) |
523 | #define EPT_VIOLATION_WRITABLE (1 << EPT_VIOLATION_WRITABLE_BIT) | 526 | #define EPT_VIOLATION_WRITABLE (1 << EPT_VIOLATION_WRITABLE_BIT) |
524 | #define EPT_VIOLATION_EXECUTABLE (1 << EPT_VIOLATION_EXECUTABLE_BIT) | 527 | #define EPT_VIOLATION_EXECUTABLE (1 << EPT_VIOLATION_EXECUTABLE_BIT) |
528 | #define EPT_VIOLATION_GVA_TRANSLATED (1 << EPT_VIOLATION_GVA_TRANSLATED_BIT) | ||
525 | 529 | ||
526 | /* | 530 | /* |
527 | * VM-instruction error numbers | 531 | * VM-instruction error numbers |
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 739c0c594022..c2824d02ba37 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h | |||
@@ -9,6 +9,9 @@ | |||
9 | #include <linux/types.h> | 9 | #include <linux/types.h> |
10 | #include <linux/ioctl.h> | 10 | #include <linux/ioctl.h> |
11 | 11 | ||
12 | #define KVM_PIO_PAGE_OFFSET 1 | ||
13 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 | ||
14 | |||
12 | #define DE_VECTOR 0 | 15 | #define DE_VECTOR 0 |
13 | #define DB_VECTOR 1 | 16 | #define DB_VECTOR 1 |
14 | #define BP_VECTOR 3 | 17 | #define BP_VECTOR 3 |
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h index 835aa51c7f6e..c45765517092 100644 --- a/arch/x86/include/uapi/asm/prctl.h +++ b/arch/x86/include/uapi/asm/prctl.h | |||
@@ -1,10 +1,13 @@ | |||
1 | #ifndef _ASM_X86_PRCTL_H | 1 | #ifndef _ASM_X86_PRCTL_H |
2 | #define _ASM_X86_PRCTL_H | 2 | #define _ASM_X86_PRCTL_H |
3 | 3 | ||
4 | #define ARCH_SET_GS 0x1001 | 4 | #define ARCH_SET_GS 0x1001 |
5 | #define ARCH_SET_FS 0x1002 | 5 | #define ARCH_SET_FS 0x1002 |
6 | #define ARCH_GET_FS 0x1003 | 6 | #define ARCH_GET_FS 0x1003 |
7 | #define ARCH_GET_GS 0x1004 | 7 | #define ARCH_GET_GS 0x1004 |
8 | |||
9 | #define ARCH_GET_CPUID 0x1011 | ||
10 | #define ARCH_SET_CPUID 0x1012 | ||
8 | 11 | ||
9 | #define ARCH_MAP_VDSO_X32 0x2001 | 12 | #define ARCH_MAP_VDSO_X32 0x2001 |
10 | #define ARCH_MAP_VDSO_32 0x2002 | 13 | #define ARCH_MAP_VDSO_32 0x2002 |
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 14458658e988..690a2dcf4078 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h | |||
@@ -76,7 +76,11 @@ | |||
76 | #define EXIT_REASON_WBINVD 54 | 76 | #define EXIT_REASON_WBINVD 54 |
77 | #define EXIT_REASON_XSETBV 55 | 77 | #define EXIT_REASON_XSETBV 55 |
78 | #define EXIT_REASON_APIC_WRITE 56 | 78 | #define EXIT_REASON_APIC_WRITE 56 |
79 | #define EXIT_REASON_RDRAND 57 | ||
79 | #define EXIT_REASON_INVPCID 58 | 80 | #define EXIT_REASON_INVPCID 58 |
81 | #define EXIT_REASON_VMFUNC 59 | ||
82 | #define EXIT_REASON_ENCLS 60 | ||
83 | #define EXIT_REASON_RDSEED 61 | ||
80 | #define EXIT_REASON_PML_FULL 62 | 84 | #define EXIT_REASON_PML_FULL 62 |
81 | #define EXIT_REASON_XSAVES 63 | 85 | #define EXIT_REASON_XSAVES 63 |
82 | #define EXIT_REASON_XRSTORS 64 | 86 | #define EXIT_REASON_XRSTORS 64 |
@@ -90,6 +94,7 @@ | |||
90 | { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ | 94 | { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ |
91 | { EXIT_REASON_CPUID, "CPUID" }, \ | 95 | { EXIT_REASON_CPUID, "CPUID" }, \ |
92 | { EXIT_REASON_HLT, "HLT" }, \ | 96 | { EXIT_REASON_HLT, "HLT" }, \ |
97 | { EXIT_REASON_INVD, "INVD" }, \ | ||
93 | { EXIT_REASON_INVLPG, "INVLPG" }, \ | 98 | { EXIT_REASON_INVLPG, "INVLPG" }, \ |
94 | { EXIT_REASON_RDPMC, "RDPMC" }, \ | 99 | { EXIT_REASON_RDPMC, "RDPMC" }, \ |
95 | { EXIT_REASON_RDTSC, "RDTSC" }, \ | 100 | { EXIT_REASON_RDTSC, "RDTSC" }, \ |
@@ -108,6 +113,8 @@ | |||
108 | { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \ | 113 | { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \ |
109 | { EXIT_REASON_MSR_READ, "MSR_READ" }, \ | 114 | { EXIT_REASON_MSR_READ, "MSR_READ" }, \ |
110 | { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ | 115 | { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ |
116 | { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ | ||
117 | { EXIT_REASON_MSR_LOAD_FAIL, "MSR_LOAD_FAIL" }, \ | ||
111 | { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ | 118 | { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ |
112 | { EXIT_REASON_MONITOR_TRAP_FLAG, "MONITOR_TRAP_FLAG" }, \ | 119 | { EXIT_REASON_MONITOR_TRAP_FLAG, "MONITOR_TRAP_FLAG" }, \ |
113 | { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ | 120 | { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ |
@@ -115,20 +122,24 @@ | |||
115 | { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ | 122 | { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ |
116 | { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \ | 123 | { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \ |
117 | { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ | 124 | { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ |
118 | { EXIT_REASON_GDTR_IDTR, "GDTR_IDTR" }, \ | 125 | { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ |
119 | { EXIT_REASON_LDTR_TR, "LDTR_TR" }, \ | 126 | { EXIT_REASON_GDTR_IDTR, "GDTR_IDTR" }, \ |
127 | { EXIT_REASON_LDTR_TR, "LDTR_TR" }, \ | ||
120 | { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ | 128 | { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ |
121 | { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ | 129 | { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ |
122 | { EXIT_REASON_INVEPT, "INVEPT" }, \ | 130 | { EXIT_REASON_INVEPT, "INVEPT" }, \ |
131 | { EXIT_REASON_RDTSCP, "RDTSCP" }, \ | ||
123 | { EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" }, \ | 132 | { EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" }, \ |
133 | { EXIT_REASON_INVVPID, "INVVPID" }, \ | ||
124 | { EXIT_REASON_WBINVD, "WBINVD" }, \ | 134 | { EXIT_REASON_WBINVD, "WBINVD" }, \ |
135 | { EXIT_REASON_XSETBV, "XSETBV" }, \ | ||
125 | { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \ | 136 | { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \ |
126 | { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ | 137 | { EXIT_REASON_RDRAND, "RDRAND" }, \ |
127 | { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ | ||
128 | { EXIT_REASON_MSR_LOAD_FAIL, "MSR_LOAD_FAIL" }, \ | ||
129 | { EXIT_REASON_INVD, "INVD" }, \ | ||
130 | { EXIT_REASON_INVVPID, "INVVPID" }, \ | ||
131 | { EXIT_REASON_INVPCID, "INVPCID" }, \ | 138 | { EXIT_REASON_INVPCID, "INVPCID" }, \ |
139 | { EXIT_REASON_VMFUNC, "VMFUNC" }, \ | ||
140 | { EXIT_REASON_ENCLS, "ENCLS" }, \ | ||
141 | { EXIT_REASON_RDSEED, "RDSEED" }, \ | ||
142 | { EXIT_REASON_PML_FULL, "PML_FULL" }, \ | ||
132 | { EXIT_REASON_XSAVES, "XSAVES" }, \ | 143 | { EXIT_REASON_XSAVES, "XSAVES" }, \ |
133 | { EXIT_REASON_XRSTORS, "XRSTORS" } | 144 | { EXIT_REASON_XRSTORS, "XRSTORS" } |
134 | 145 | ||
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 063197771b8d..dfa90a3a5145 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -90,16 +90,12 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c) | |||
90 | return; | 90 | return; |
91 | } | 91 | } |
92 | 92 | ||
93 | if (ring3mwait_disabled) { | 93 | if (ring3mwait_disabled) |
94 | msr_clear_bit(MSR_MISC_FEATURE_ENABLES, | ||
95 | MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT); | ||
96 | return; | 94 | return; |
97 | } | ||
98 | |||
99 | msr_set_bit(MSR_MISC_FEATURE_ENABLES, | ||
100 | MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT); | ||
101 | 95 | ||
102 | set_cpu_cap(c, X86_FEATURE_RING3MWAIT); | 96 | set_cpu_cap(c, X86_FEATURE_RING3MWAIT); |
97 | this_cpu_or(msr_misc_features_shadow, | ||
98 | 1UL << MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT); | ||
103 | 99 | ||
104 | if (c == &boot_cpu_data) | 100 | if (c == &boot_cpu_data) |
105 | ELF_HWCAP2 |= HWCAP2_RING3MWAIT; | 101 | ELF_HWCAP2 |= HWCAP2_RING3MWAIT; |
@@ -488,6 +484,34 @@ static void intel_bsp_resume(struct cpuinfo_x86 *c) | |||
488 | init_intel_energy_perf(c); | 484 | init_intel_energy_perf(c); |
489 | } | 485 | } |
490 | 486 | ||
487 | static void init_cpuid_fault(struct cpuinfo_x86 *c) | ||
488 | { | ||
489 | u64 msr; | ||
490 | |||
491 | if (!rdmsrl_safe(MSR_PLATFORM_INFO, &msr)) { | ||
492 | if (msr & MSR_PLATFORM_INFO_CPUID_FAULT) | ||
493 | set_cpu_cap(c, X86_FEATURE_CPUID_FAULT); | ||
494 | } | ||
495 | } | ||
496 | |||
497 | static void init_intel_misc_features(struct cpuinfo_x86 *c) | ||
498 | { | ||
499 | u64 msr; | ||
500 | |||
501 | if (rdmsrl_safe(MSR_MISC_FEATURES_ENABLES, &msr)) | ||
502 | return; | ||
503 | |||
504 | /* Clear all MISC features */ | ||
505 | this_cpu_write(msr_misc_features_shadow, 0); | ||
506 | |||
507 | /* Check features and update capabilities and shadow control bits */ | ||
508 | init_cpuid_fault(c); | ||
509 | probe_xeon_phi_r3mwait(c); | ||
510 | |||
511 | msr = this_cpu_read(msr_misc_features_shadow); | ||
512 | wrmsrl(MSR_MISC_FEATURES_ENABLES, msr); | ||
513 | } | ||
514 | |||
491 | static void init_intel(struct cpuinfo_x86 *c) | 515 | static void init_intel(struct cpuinfo_x86 *c) |
492 | { | 516 | { |
493 | unsigned int l2 = 0; | 517 | unsigned int l2 = 0; |
@@ -602,7 +626,7 @@ static void init_intel(struct cpuinfo_x86 *c) | |||
602 | 626 | ||
603 | init_intel_energy_perf(c); | 627 | init_intel_energy_perf(c); |
604 | 628 | ||
605 | probe_xeon_phi_r3mwait(c); | 629 | init_intel_misc_features(c); |
606 | } | 630 | } |
607 | 631 | ||
608 | #ifdef CONFIG_X86_32 | 632 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 14f65a5f938e..da5c09789984 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -396,9 +396,9 @@ static u64 kvm_steal_clock(int cpu) | |||
396 | src = &per_cpu(steal_time, cpu); | 396 | src = &per_cpu(steal_time, cpu); |
397 | do { | 397 | do { |
398 | version = src->version; | 398 | version = src->version; |
399 | rmb(); | 399 | virt_rmb(); |
400 | steal = src->steal; | 400 | steal = src->steal; |
401 | rmb(); | 401 | virt_rmb(); |
402 | } while ((version & 1) || (version != src->version)); | 402 | } while ((version & 1) || (version != src->version)); |
403 | 403 | ||
404 | return steal; | 404 | return steal; |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index f67591561711..0bb88428cbf2 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <asm/vm86.h> | 37 | #include <asm/vm86.h> |
38 | #include <asm/switch_to.h> | 38 | #include <asm/switch_to.h> |
39 | #include <asm/desc.h> | 39 | #include <asm/desc.h> |
40 | #include <asm/prctl.h> | ||
40 | 41 | ||
41 | /* | 42 | /* |
42 | * per-CPU TSS segments. Threads are completely 'soft' on Linux, | 43 | * per-CPU TSS segments. Threads are completely 'soft' on Linux, |
@@ -124,11 +125,6 @@ void flush_thread(void) | |||
124 | fpu__clear(&tsk->thread.fpu); | 125 | fpu__clear(&tsk->thread.fpu); |
125 | } | 126 | } |
126 | 127 | ||
127 | static void hard_disable_TSC(void) | ||
128 | { | ||
129 | cr4_set_bits(X86_CR4_TSD); | ||
130 | } | ||
131 | |||
132 | void disable_TSC(void) | 128 | void disable_TSC(void) |
133 | { | 129 | { |
134 | preempt_disable(); | 130 | preempt_disable(); |
@@ -137,15 +133,10 @@ void disable_TSC(void) | |||
137 | * Must flip the CPU state synchronously with | 133 | * Must flip the CPU state synchronously with |
138 | * TIF_NOTSC in the current running context. | 134 | * TIF_NOTSC in the current running context. |
139 | */ | 135 | */ |
140 | hard_disable_TSC(); | 136 | cr4_set_bits(X86_CR4_TSD); |
141 | preempt_enable(); | 137 | preempt_enable(); |
142 | } | 138 | } |
143 | 139 | ||
144 | static void hard_enable_TSC(void) | ||
145 | { | ||
146 | cr4_clear_bits(X86_CR4_TSD); | ||
147 | } | ||
148 | |||
149 | static void enable_TSC(void) | 140 | static void enable_TSC(void) |
150 | { | 141 | { |
151 | preempt_disable(); | 142 | preempt_disable(); |
@@ -154,7 +145,7 @@ static void enable_TSC(void) | |||
154 | * Must flip the CPU state synchronously with | 145 | * Must flip the CPU state synchronously with |
155 | * TIF_NOTSC in the current running context. | 146 | * TIF_NOTSC in the current running context. |
156 | */ | 147 | */ |
157 | hard_enable_TSC(); | 148 | cr4_clear_bits(X86_CR4_TSD); |
158 | preempt_enable(); | 149 | preempt_enable(); |
159 | } | 150 | } |
160 | 151 | ||
@@ -182,54 +173,129 @@ int set_tsc_mode(unsigned int val) | |||
182 | return 0; | 173 | return 0; |
183 | } | 174 | } |
184 | 175 | ||
185 | void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | 176 | DEFINE_PER_CPU(u64, msr_misc_features_shadow); |
186 | struct tss_struct *tss) | ||
187 | { | ||
188 | struct thread_struct *prev, *next; | ||
189 | |||
190 | prev = &prev_p->thread; | ||
191 | next = &next_p->thread; | ||
192 | 177 | ||
193 | if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^ | 178 | static void set_cpuid_faulting(bool on) |
194 | test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) { | 179 | { |
195 | unsigned long debugctl = get_debugctlmsr(); | 180 | u64 msrval; |
196 | 181 | ||
197 | debugctl &= ~DEBUGCTLMSR_BTF; | 182 | msrval = this_cpu_read(msr_misc_features_shadow); |
198 | if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) | 183 | msrval &= ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT; |
199 | debugctl |= DEBUGCTLMSR_BTF; | 184 | msrval |= (on << MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT); |
185 | this_cpu_write(msr_misc_features_shadow, msrval); | ||
186 | wrmsrl(MSR_MISC_FEATURES_ENABLES, msrval); | ||
187 | } | ||
200 | 188 | ||
201 | update_debugctlmsr(debugctl); | 189 | static void disable_cpuid(void) |
190 | { | ||
191 | preempt_disable(); | ||
192 | if (!test_and_set_thread_flag(TIF_NOCPUID)) { | ||
193 | /* | ||
194 | * Must flip the CPU state synchronously with | ||
195 | * TIF_NOCPUID in the current running context. | ||
196 | */ | ||
197 | set_cpuid_faulting(true); | ||
202 | } | 198 | } |
199 | preempt_enable(); | ||
200 | } | ||
203 | 201 | ||
204 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ | 202 | static void enable_cpuid(void) |
205 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { | 203 | { |
206 | /* prev and next are different */ | 204 | preempt_disable(); |
207 | if (test_tsk_thread_flag(next_p, TIF_NOTSC)) | 205 | if (test_and_clear_thread_flag(TIF_NOCPUID)) { |
208 | hard_disable_TSC(); | 206 | /* |
209 | else | 207 | * Must flip the CPU state synchronously with |
210 | hard_enable_TSC(); | 208 | * TIF_NOCPUID in the current running context. |
209 | */ | ||
210 | set_cpuid_faulting(false); | ||
211 | } | 211 | } |
212 | preempt_enable(); | ||
213 | } | ||
214 | |||
215 | static int get_cpuid_mode(void) | ||
216 | { | ||
217 | return !test_thread_flag(TIF_NOCPUID); | ||
218 | } | ||
219 | |||
220 | static int set_cpuid_mode(struct task_struct *task, unsigned long cpuid_enabled) | ||
221 | { | ||
222 | if (!static_cpu_has(X86_FEATURE_CPUID_FAULT)) | ||
223 | return -ENODEV; | ||
224 | |||
225 | if (cpuid_enabled) | ||
226 | enable_cpuid(); | ||
227 | else | ||
228 | disable_cpuid(); | ||
229 | |||
230 | return 0; | ||
231 | } | ||
232 | |||
233 | /* | ||
234 | * Called immediately after a successful exec. | ||
235 | */ | ||
236 | void arch_setup_new_exec(void) | ||
237 | { | ||
238 | /* If cpuid was previously disabled for this task, re-enable it. */ | ||
239 | if (test_thread_flag(TIF_NOCPUID)) | ||
240 | enable_cpuid(); | ||
241 | } | ||
212 | 242 | ||
213 | if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { | 243 | static inline void switch_to_bitmap(struct tss_struct *tss, |
244 | struct thread_struct *prev, | ||
245 | struct thread_struct *next, | ||
246 | unsigned long tifp, unsigned long tifn) | ||
247 | { | ||
248 | if (tifn & _TIF_IO_BITMAP) { | ||
214 | /* | 249 | /* |
215 | * Copy the relevant range of the IO bitmap. | 250 | * Copy the relevant range of the IO bitmap. |
216 | * Normally this is 128 bytes or less: | 251 | * Normally this is 128 bytes or less: |
217 | */ | 252 | */ |
218 | memcpy(tss->io_bitmap, next->io_bitmap_ptr, | 253 | memcpy(tss->io_bitmap, next->io_bitmap_ptr, |
219 | max(prev->io_bitmap_max, next->io_bitmap_max)); | 254 | max(prev->io_bitmap_max, next->io_bitmap_max)); |
220 | |||
221 | /* | 255 | /* |
222 | * Make sure that the TSS limit is correct for the CPU | 256 | * Make sure that the TSS limit is correct for the CPU |
223 | * to notice the IO bitmap. | 257 | * to notice the IO bitmap. |
224 | */ | 258 | */ |
225 | refresh_tss_limit(); | 259 | refresh_tss_limit(); |
226 | } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { | 260 | } else if (tifp & _TIF_IO_BITMAP) { |
227 | /* | 261 | /* |
228 | * Clear any possible leftover bits: | 262 | * Clear any possible leftover bits: |
229 | */ | 263 | */ |
230 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); | 264 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); |
231 | } | 265 | } |
266 | } | ||
267 | |||
268 | void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | ||
269 | struct tss_struct *tss) | ||
270 | { | ||
271 | struct thread_struct *prev, *next; | ||
272 | unsigned long tifp, tifn; | ||
273 | |||
274 | prev = &prev_p->thread; | ||
275 | next = &next_p->thread; | ||
276 | |||
277 | tifn = READ_ONCE(task_thread_info(next_p)->flags); | ||
278 | tifp = READ_ONCE(task_thread_info(prev_p)->flags); | ||
279 | switch_to_bitmap(tss, prev, next, tifp, tifn); | ||
280 | |||
232 | propagate_user_return_notify(prev_p, next_p); | 281 | propagate_user_return_notify(prev_p, next_p); |
282 | |||
283 | if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) && | ||
284 | arch_has_block_step()) { | ||
285 | unsigned long debugctl, msk; | ||
286 | |||
287 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
288 | debugctl &= ~DEBUGCTLMSR_BTF; | ||
289 | msk = tifn & _TIF_BLOCKSTEP; | ||
290 | debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT; | ||
291 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
292 | } | ||
293 | |||
294 | if ((tifp ^ tifn) & _TIF_NOTSC) | ||
295 | cr4_toggle_bits(X86_CR4_TSD); | ||
296 | |||
297 | if ((tifp ^ tifn) & _TIF_NOCPUID) | ||
298 | set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); | ||
233 | } | 299 | } |
234 | 300 | ||
235 | /* | 301 | /* |
@@ -550,3 +616,16 @@ out: | |||
550 | put_task_stack(p); | 616 | put_task_stack(p); |
551 | return ret; | 617 | return ret; |
552 | } | 618 | } |
619 | |||
620 | long do_arch_prctl_common(struct task_struct *task, int option, | ||
621 | unsigned long cpuid_enabled) | ||
622 | { | ||
623 | switch (option) { | ||
624 | case ARCH_GET_CPUID: | ||
625 | return get_cpuid_mode(); | ||
626 | case ARCH_SET_CPUID: | ||
627 | return set_cpuid_mode(task, cpuid_enabled); | ||
628 | } | ||
629 | |||
630 | return -EINVAL; | ||
631 | } | ||
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4c818f8bc135..ff40e74c9181 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/uaccess.h> | 37 | #include <linux/uaccess.h> |
38 | #include <linux/io.h> | 38 | #include <linux/io.h> |
39 | #include <linux/kdebug.h> | 39 | #include <linux/kdebug.h> |
40 | #include <linux/syscalls.h> | ||
40 | 41 | ||
41 | #include <asm/pgtable.h> | 42 | #include <asm/pgtable.h> |
42 | #include <asm/ldt.h> | 43 | #include <asm/ldt.h> |
@@ -56,6 +57,7 @@ | |||
56 | #include <asm/switch_to.h> | 57 | #include <asm/switch_to.h> |
57 | #include <asm/vm86.h> | 58 | #include <asm/vm86.h> |
58 | #include <asm/intel_rdt.h> | 59 | #include <asm/intel_rdt.h> |
60 | #include <asm/proto.h> | ||
59 | 61 | ||
60 | void __show_regs(struct pt_regs *regs, int all) | 62 | void __show_regs(struct pt_regs *regs, int all) |
61 | { | 63 | { |
@@ -304,3 +306,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
304 | 306 | ||
305 | return prev_p; | 307 | return prev_p; |
306 | } | 308 | } |
309 | |||
310 | SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2) | ||
311 | { | ||
312 | return do_arch_prctl_common(current, option, arg2); | ||
313 | } | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index d6b784a5520d..ea1a6180bf39 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/uaccess.h> | 37 | #include <linux/uaccess.h> |
38 | #include <linux/io.h> | 38 | #include <linux/io.h> |
39 | #include <linux/ftrace.h> | 39 | #include <linux/ftrace.h> |
40 | #include <linux/syscalls.h> | ||
40 | 41 | ||
41 | #include <asm/pgtable.h> | 42 | #include <asm/pgtable.h> |
42 | #include <asm/processor.h> | 43 | #include <asm/processor.h> |
@@ -204,7 +205,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, | |||
204 | (struct user_desc __user *)tls, 0); | 205 | (struct user_desc __user *)tls, 0); |
205 | else | 206 | else |
206 | #endif | 207 | #endif |
207 | err = do_arch_prctl(p, ARCH_SET_FS, tls); | 208 | err = do_arch_prctl_64(p, ARCH_SET_FS, tls); |
208 | if (err) | 209 | if (err) |
209 | goto out; | 210 | goto out; |
210 | } | 211 | } |
@@ -547,70 +548,72 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr) | |||
547 | } | 548 | } |
548 | #endif | 549 | #endif |
549 | 550 | ||
550 | long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | 551 | long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2) |
551 | { | 552 | { |
552 | int ret = 0; | 553 | int ret = 0; |
553 | int doit = task == current; | 554 | int doit = task == current; |
554 | int cpu; | 555 | int cpu; |
555 | 556 | ||
556 | switch (code) { | 557 | switch (option) { |
557 | case ARCH_SET_GS: | 558 | case ARCH_SET_GS: |
558 | if (addr >= TASK_SIZE_MAX) | 559 | if (arg2 >= TASK_SIZE_MAX) |
559 | return -EPERM; | 560 | return -EPERM; |
560 | cpu = get_cpu(); | 561 | cpu = get_cpu(); |
561 | task->thread.gsindex = 0; | 562 | task->thread.gsindex = 0; |
562 | task->thread.gsbase = addr; | 563 | task->thread.gsbase = arg2; |
563 | if (doit) { | 564 | if (doit) { |
564 | load_gs_index(0); | 565 | load_gs_index(0); |
565 | ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); | 566 | ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2); |
566 | } | 567 | } |
567 | put_cpu(); | 568 | put_cpu(); |
568 | break; | 569 | break; |
569 | case ARCH_SET_FS: | 570 | case ARCH_SET_FS: |
570 | /* Not strictly needed for fs, but do it for symmetry | 571 | /* Not strictly needed for fs, but do it for symmetry |
571 | with gs */ | 572 | with gs */ |
572 | if (addr >= TASK_SIZE_MAX) | 573 | if (arg2 >= TASK_SIZE_MAX) |
573 | return -EPERM; | 574 | return -EPERM; |
574 | cpu = get_cpu(); | 575 | cpu = get_cpu(); |
575 | task->thread.fsindex = 0; | 576 | task->thread.fsindex = 0; |
576 | task->thread.fsbase = addr; | 577 | task->thread.fsbase = arg2; |
577 | if (doit) { | 578 | if (doit) { |
578 | /* set the selector to 0 to not confuse __switch_to */ | 579 | /* set the selector to 0 to not confuse __switch_to */ |
579 | loadsegment(fs, 0); | 580 | loadsegment(fs, 0); |
580 | ret = wrmsrl_safe(MSR_FS_BASE, addr); | 581 | ret = wrmsrl_safe(MSR_FS_BASE, arg2); |
581 | } | 582 | } |
582 | put_cpu(); | 583 | put_cpu(); |
583 | break; | 584 | break; |
584 | case ARCH_GET_FS: { | 585 | case ARCH_GET_FS: { |
585 | unsigned long base; | 586 | unsigned long base; |
587 | |||
586 | if (doit) | 588 | if (doit) |
587 | rdmsrl(MSR_FS_BASE, base); | 589 | rdmsrl(MSR_FS_BASE, base); |
588 | else | 590 | else |
589 | base = task->thread.fsbase; | 591 | base = task->thread.fsbase; |
590 | ret = put_user(base, (unsigned long __user *)addr); | 592 | ret = put_user(base, (unsigned long __user *)arg2); |
591 | break; | 593 | break; |
592 | } | 594 | } |
593 | case ARCH_GET_GS: { | 595 | case ARCH_GET_GS: { |
594 | unsigned long base; | 596 | unsigned long base; |
597 | |||
595 | if (doit) | 598 | if (doit) |
596 | rdmsrl(MSR_KERNEL_GS_BASE, base); | 599 | rdmsrl(MSR_KERNEL_GS_BASE, base); |
597 | else | 600 | else |
598 | base = task->thread.gsbase; | 601 | base = task->thread.gsbase; |
599 | ret = put_user(base, (unsigned long __user *)addr); | 602 | ret = put_user(base, (unsigned long __user *)arg2); |
600 | break; | 603 | break; |
601 | } | 604 | } |
602 | 605 | ||
603 | #ifdef CONFIG_CHECKPOINT_RESTORE | 606 | #ifdef CONFIG_CHECKPOINT_RESTORE |
604 | # ifdef CONFIG_X86_X32_ABI | 607 | # ifdef CONFIG_X86_X32_ABI |
605 | case ARCH_MAP_VDSO_X32: | 608 | case ARCH_MAP_VDSO_X32: |
606 | return prctl_map_vdso(&vdso_image_x32, addr); | 609 | return prctl_map_vdso(&vdso_image_x32, arg2); |
607 | # endif | 610 | # endif |
608 | # if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | 611 | # if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION |
609 | case ARCH_MAP_VDSO_32: | 612 | case ARCH_MAP_VDSO_32: |
610 | return prctl_map_vdso(&vdso_image_32, addr); | 613 | return prctl_map_vdso(&vdso_image_32, arg2); |
611 | # endif | 614 | # endif |
612 | case ARCH_MAP_VDSO_64: | 615 | case ARCH_MAP_VDSO_64: |
613 | return prctl_map_vdso(&vdso_image_64, addr); | 616 | return prctl_map_vdso(&vdso_image_64, arg2); |
614 | #endif | 617 | #endif |
615 | 618 | ||
616 | default: | 619 | default: |
@@ -621,10 +624,23 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
621 | return ret; | 624 | return ret; |
622 | } | 625 | } |
623 | 626 | ||
624 | long sys_arch_prctl(int code, unsigned long addr) | 627 | SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2) |
628 | { | ||
629 | long ret; | ||
630 | |||
631 | ret = do_arch_prctl_64(current, option, arg2); | ||
632 | if (ret == -EINVAL) | ||
633 | ret = do_arch_prctl_common(current, option, arg2); | ||
634 | |||
635 | return ret; | ||
636 | } | ||
637 | |||
638 | #ifdef CONFIG_IA32_EMULATION | ||
639 | COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2) | ||
625 | { | 640 | { |
626 | return do_arch_prctl(current, code, addr); | 641 | return do_arch_prctl_common(current, option, arg2); |
627 | } | 642 | } |
643 | #endif | ||
628 | 644 | ||
629 | unsigned long KSTK_ESP(struct task_struct *task) | 645 | unsigned long KSTK_ESP(struct task_struct *task) |
630 | { | 646 | { |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 2364b23ea3e5..f37d18124648 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -396,12 +396,12 @@ static int putreg(struct task_struct *child, | |||
396 | if (value >= TASK_SIZE_MAX) | 396 | if (value >= TASK_SIZE_MAX) |
397 | return -EIO; | 397 | return -EIO; |
398 | /* | 398 | /* |
399 | * When changing the segment base, use do_arch_prctl | 399 | * When changing the segment base, use do_arch_prctl_64 |
400 | * to set either thread.fs or thread.fsindex and the | 400 | * to set either thread.fs or thread.fsindex and the |
401 | * corresponding GDT slot. | 401 | * corresponding GDT slot. |
402 | */ | 402 | */ |
403 | if (child->thread.fsbase != value) | 403 | if (child->thread.fsbase != value) |
404 | return do_arch_prctl(child, ARCH_SET_FS, value); | 404 | return do_arch_prctl_64(child, ARCH_SET_FS, value); |
405 | return 0; | 405 | return 0; |
406 | case offsetof(struct user_regs_struct,gs_base): | 406 | case offsetof(struct user_regs_struct,gs_base): |
407 | /* | 407 | /* |
@@ -410,7 +410,7 @@ static int putreg(struct task_struct *child, | |||
410 | if (value >= TASK_SIZE_MAX) | 410 | if (value >= TASK_SIZE_MAX) |
411 | return -EIO; | 411 | return -EIO; |
412 | if (child->thread.gsbase != value) | 412 | if (child->thread.gsbase != value) |
413 | return do_arch_prctl(child, ARCH_SET_GS, value); | 413 | return do_arch_prctl_64(child, ARCH_SET_GS, value); |
414 | return 0; | 414 | return 0; |
415 | #endif | 415 | #endif |
416 | } | 416 | } |
@@ -869,7 +869,7 @@ long arch_ptrace(struct task_struct *child, long request, | |||
869 | Works just like arch_prctl, except that the arguments | 869 | Works just like arch_prctl, except that the arguments |
870 | are reversed. */ | 870 | are reversed. */ |
871 | case PTRACE_ARCH_PRCTL: | 871 | case PTRACE_ARCH_PRCTL: |
872 | ret = do_arch_prctl(child, data, addr); | 872 | ret = do_arch_prctl_64(child, data, addr); |
873 | break; | 873 | break; |
874 | #endif | 874 | #endif |
875 | 875 | ||
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index ab8e32f7b9a8..760433b2574a 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -86,18 +86,6 @@ config KVM_MMU_AUDIT | |||
86 | This option adds a R/W kVM module parameter 'mmu_audit', which allows | 86 | This option adds a R/W kVM module parameter 'mmu_audit', which allows |
87 | auditing of KVM MMU events at runtime. | 87 | auditing of KVM MMU events at runtime. |
88 | 88 | ||
89 | config KVM_DEVICE_ASSIGNMENT | ||
90 | bool "KVM legacy PCI device assignment support (DEPRECATED)" | ||
91 | depends on KVM && PCI && IOMMU_API | ||
92 | default n | ||
93 | ---help--- | ||
94 | Provide support for legacy PCI device assignment through KVM. The | ||
95 | kernel now also supports a full featured userspace device driver | ||
96 | framework through VFIO, which supersedes this support and provides | ||
97 | better security. | ||
98 | |||
99 | If unsure, say N. | ||
100 | |||
101 | # OK, it's a little counter-intuitive to do this, but it puts it neatly under | 89 | # OK, it's a little counter-intuitive to do this, but it puts it neatly under |
102 | # the virtualization menu. | 90 | # the virtualization menu. |
103 | source drivers/vhost/Kconfig | 91 | source drivers/vhost/Kconfig |
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 3bff20710471..09d4b17be022 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
@@ -15,8 +15,6 @@ kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ | |||
15 | i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \ | 15 | i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \ |
16 | hyperv.o page_track.o debugfs.o | 16 | hyperv.o page_track.o debugfs.o |
17 | 17 | ||
18 | kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += assigned-dev.o iommu.o | ||
19 | |||
20 | kvm-intel-y += vmx.o pmu_intel.o | 18 | kvm-intel-y += vmx.o pmu_intel.o |
21 | kvm-amd-y += svm.o pmu_amd.o | 19 | kvm-amd-y += svm.o pmu_amd.o |
22 | 20 | ||
diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c deleted file mode 100644 index 308b8597c691..000000000000 --- a/arch/x86/kvm/assigned-dev.c +++ /dev/null | |||
@@ -1,1058 +0,0 @@ | |||
1 | /* | ||
2 | * Kernel-based Virtual Machine - device assignment support | ||
3 | * | ||
4 | * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates. | ||
5 | * | ||
6 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
7 | * the COPYING file in the top-level directory. | ||
8 | * | ||
9 | */ | ||
10 | |||
11 | #include <linux/kvm_host.h> | ||
12 | #include <linux/kvm.h> | ||
13 | #include <linux/uaccess.h> | ||
14 | #include <linux/vmalloc.h> | ||
15 | #include <linux/errno.h> | ||
16 | #include <linux/spinlock.h> | ||
17 | #include <linux/pci.h> | ||
18 | #include <linux/interrupt.h> | ||
19 | #include <linux/slab.h> | ||
20 | #include <linux/namei.h> | ||
21 | #include <linux/fs.h> | ||
22 | #include "irq.h" | ||
23 | #include "assigned-dev.h" | ||
24 | #include "trace/events/kvm.h" | ||
25 | |||
26 | struct kvm_assigned_dev_kernel { | ||
27 | struct kvm_irq_ack_notifier ack_notifier; | ||
28 | struct list_head list; | ||
29 | int assigned_dev_id; | ||
30 | int host_segnr; | ||
31 | int host_busnr; | ||
32 | int host_devfn; | ||
33 | unsigned int entries_nr; | ||
34 | int host_irq; | ||
35 | bool host_irq_disabled; | ||
36 | bool pci_2_3; | ||
37 | struct msix_entry *host_msix_entries; | ||
38 | int guest_irq; | ||
39 | struct msix_entry *guest_msix_entries; | ||
40 | unsigned long irq_requested_type; | ||
41 | int irq_source_id; | ||
42 | int flags; | ||
43 | struct pci_dev *dev; | ||
44 | struct kvm *kvm; | ||
45 | spinlock_t intx_lock; | ||
46 | spinlock_t intx_mask_lock; | ||
47 | char irq_name[32]; | ||
48 | struct pci_saved_state *pci_saved_state; | ||
49 | }; | ||
50 | |||
51 | static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, | ||
52 | int assigned_dev_id) | ||
53 | { | ||
54 | struct kvm_assigned_dev_kernel *match; | ||
55 | |||
56 | list_for_each_entry(match, head, list) { | ||
57 | if (match->assigned_dev_id == assigned_dev_id) | ||
58 | return match; | ||
59 | } | ||
60 | return NULL; | ||
61 | } | ||
62 | |||
63 | static int find_index_from_host_irq(struct kvm_assigned_dev_kernel | ||
64 | *assigned_dev, int irq) | ||
65 | { | ||
66 | int i, index; | ||
67 | struct msix_entry *host_msix_entries; | ||
68 | |||
69 | host_msix_entries = assigned_dev->host_msix_entries; | ||
70 | |||
71 | index = -1; | ||
72 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
73 | if (irq == host_msix_entries[i].vector) { | ||
74 | index = i; | ||
75 | break; | ||
76 | } | ||
77 | if (index < 0) | ||
78 | printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); | ||
79 | |||
80 | return index; | ||
81 | } | ||
82 | |||
83 | static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id) | ||
84 | { | ||
85 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | ||
86 | int ret; | ||
87 | |||
88 | spin_lock(&assigned_dev->intx_lock); | ||
89 | if (pci_check_and_mask_intx(assigned_dev->dev)) { | ||
90 | assigned_dev->host_irq_disabled = true; | ||
91 | ret = IRQ_WAKE_THREAD; | ||
92 | } else | ||
93 | ret = IRQ_NONE; | ||
94 | spin_unlock(&assigned_dev->intx_lock); | ||
95 | |||
96 | return ret; | ||
97 | } | ||
98 | |||
99 | static void | ||
100 | kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev, | ||
101 | int vector) | ||
102 | { | ||
103 | if (unlikely(assigned_dev->irq_requested_type & | ||
104 | KVM_DEV_IRQ_GUEST_INTX)) { | ||
105 | spin_lock(&assigned_dev->intx_mask_lock); | ||
106 | if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) | ||
107 | kvm_set_irq(assigned_dev->kvm, | ||
108 | assigned_dev->irq_source_id, vector, 1, | ||
109 | false); | ||
110 | spin_unlock(&assigned_dev->intx_mask_lock); | ||
111 | } else | ||
112 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | ||
113 | vector, 1, false); | ||
114 | } | ||
115 | |||
116 | static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) | ||
117 | { | ||
118 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | ||
119 | |||
120 | if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { | ||
121 | spin_lock_irq(&assigned_dev->intx_lock); | ||
122 | disable_irq_nosync(irq); | ||
123 | assigned_dev->host_irq_disabled = true; | ||
124 | spin_unlock_irq(&assigned_dev->intx_lock); | ||
125 | } | ||
126 | |||
127 | kvm_assigned_dev_raise_guest_irq(assigned_dev, | ||
128 | assigned_dev->guest_irq); | ||
129 | |||
130 | return IRQ_HANDLED; | ||
131 | } | ||
132 | |||
133 | /* | ||
134 | * Deliver an IRQ in an atomic context if we can, or return a failure, | ||
135 | * user can retry in a process context. | ||
136 | * Return value: | ||
137 | * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context. | ||
138 | * Other values - No need to retry. | ||
139 | */ | ||
140 | static int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, | ||
141 | int level) | ||
142 | { | ||
143 | struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; | ||
144 | struct kvm_kernel_irq_routing_entry *e; | ||
145 | int ret = -EINVAL; | ||
146 | int idx; | ||
147 | |||
148 | trace_kvm_set_irq(irq, level, irq_source_id); | ||
149 | |||
150 | /* | ||
151 | * Injection into either PIC or IOAPIC might need to scan all CPUs, | ||
152 | * which would need to be retried from thread context; when same GSI | ||
153 | * is connected to both PIC and IOAPIC, we'd have to report a | ||
154 | * partial failure here. | ||
155 | * Since there's no easy way to do this, we only support injecting MSI | ||
156 | * which is limited to 1:1 GSI mapping. | ||
157 | */ | ||
158 | idx = srcu_read_lock(&kvm->irq_srcu); | ||
159 | if (kvm_irq_map_gsi(kvm, entries, irq) > 0) { | ||
160 | e = &entries[0]; | ||
161 | ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id, | ||
162 | irq, level); | ||
163 | } | ||
164 | srcu_read_unlock(&kvm->irq_srcu, idx); | ||
165 | return ret; | ||
166 | } | ||
167 | |||
168 | |||
169 | static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id) | ||
170 | { | ||
171 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | ||
172 | int ret = kvm_set_irq_inatomic(assigned_dev->kvm, | ||
173 | assigned_dev->irq_source_id, | ||
174 | assigned_dev->guest_irq, 1); | ||
175 | return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED; | ||
176 | } | ||
177 | |||
178 | static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id) | ||
179 | { | ||
180 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | ||
181 | |||
182 | kvm_assigned_dev_raise_guest_irq(assigned_dev, | ||
183 | assigned_dev->guest_irq); | ||
184 | |||
185 | return IRQ_HANDLED; | ||
186 | } | ||
187 | |||
188 | static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id) | ||
189 | { | ||
190 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | ||
191 | int index = find_index_from_host_irq(assigned_dev, irq); | ||
192 | u32 vector; | ||
193 | int ret = 0; | ||
194 | |||
195 | if (index >= 0) { | ||
196 | vector = assigned_dev->guest_msix_entries[index].vector; | ||
197 | ret = kvm_set_irq_inatomic(assigned_dev->kvm, | ||
198 | assigned_dev->irq_source_id, | ||
199 | vector, 1); | ||
200 | } | ||
201 | |||
202 | return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED; | ||
203 | } | ||
204 | |||
205 | static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) | ||
206 | { | ||
207 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | ||
208 | int index = find_index_from_host_irq(assigned_dev, irq); | ||
209 | u32 vector; | ||
210 | |||
211 | if (index >= 0) { | ||
212 | vector = assigned_dev->guest_msix_entries[index].vector; | ||
213 | kvm_assigned_dev_raise_guest_irq(assigned_dev, vector); | ||
214 | } | ||
215 | |||
216 | return IRQ_HANDLED; | ||
217 | } | ||
218 | |||
219 | /* Ack the irq line for an assigned device */ | ||
220 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | ||
221 | { | ||
222 | struct kvm_assigned_dev_kernel *dev = | ||
223 | container_of(kian, struct kvm_assigned_dev_kernel, | ||
224 | ack_notifier); | ||
225 | |||
226 | kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false); | ||
227 | |||
228 | spin_lock(&dev->intx_mask_lock); | ||
229 | |||
230 | if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) { | ||
231 | bool reassert = false; | ||
232 | |||
233 | spin_lock_irq(&dev->intx_lock); | ||
234 | /* | ||
235 | * The guest IRQ may be shared so this ack can come from an | ||
236 | * IRQ for another guest device. | ||
237 | */ | ||
238 | if (dev->host_irq_disabled) { | ||
239 | if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) | ||
240 | enable_irq(dev->host_irq); | ||
241 | else if (!pci_check_and_unmask_intx(dev->dev)) | ||
242 | reassert = true; | ||
243 | dev->host_irq_disabled = reassert; | ||
244 | } | ||
245 | spin_unlock_irq(&dev->intx_lock); | ||
246 | |||
247 | if (reassert) | ||
248 | kvm_set_irq(dev->kvm, dev->irq_source_id, | ||
249 | dev->guest_irq, 1, false); | ||
250 | } | ||
251 | |||
252 | spin_unlock(&dev->intx_mask_lock); | ||
253 | } | ||
254 | |||
255 | static void deassign_guest_irq(struct kvm *kvm, | ||
256 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
257 | { | ||
258 | if (assigned_dev->ack_notifier.gsi != -1) | ||
259 | kvm_unregister_irq_ack_notifier(kvm, | ||
260 | &assigned_dev->ack_notifier); | ||
261 | |||
262 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | ||
263 | assigned_dev->guest_irq, 0, false); | ||
264 | |||
265 | if (assigned_dev->irq_source_id != -1) | ||
266 | kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); | ||
267 | assigned_dev->irq_source_id = -1; | ||
268 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); | ||
269 | } | ||
270 | |||
271 | /* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ | ||
272 | static void deassign_host_irq(struct kvm *kvm, | ||
273 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
274 | { | ||
275 | /* | ||
276 | * We disable irq here to prevent further events. | ||
277 | * | ||
278 | * Notice this maybe result in nested disable if the interrupt type is | ||
279 | * INTx, but it's OK for we are going to free it. | ||
280 | * | ||
281 | * If this function is a part of VM destroy, please ensure that till | ||
282 | * now, the kvm state is still legal for probably we also have to wait | ||
283 | * on a currently running IRQ handler. | ||
284 | */ | ||
285 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
286 | int i; | ||
287 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
288 | disable_irq(assigned_dev->host_msix_entries[i].vector); | ||
289 | |||
290 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
291 | free_irq(assigned_dev->host_msix_entries[i].vector, | ||
292 | assigned_dev); | ||
293 | |||
294 | assigned_dev->entries_nr = 0; | ||
295 | kfree(assigned_dev->host_msix_entries); | ||
296 | kfree(assigned_dev->guest_msix_entries); | ||
297 | pci_disable_msix(assigned_dev->dev); | ||
298 | } else { | ||
299 | /* Deal with MSI and INTx */ | ||
300 | if ((assigned_dev->irq_requested_type & | ||
301 | KVM_DEV_IRQ_HOST_INTX) && | ||
302 | (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { | ||
303 | spin_lock_irq(&assigned_dev->intx_lock); | ||
304 | pci_intx(assigned_dev->dev, false); | ||
305 | spin_unlock_irq(&assigned_dev->intx_lock); | ||
306 | synchronize_irq(assigned_dev->host_irq); | ||
307 | } else | ||
308 | disable_irq(assigned_dev->host_irq); | ||
309 | |||
310 | free_irq(assigned_dev->host_irq, assigned_dev); | ||
311 | |||
312 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) | ||
313 | pci_disable_msi(assigned_dev->dev); | ||
314 | } | ||
315 | |||
316 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); | ||
317 | } | ||
318 | |||
319 | static int kvm_deassign_irq(struct kvm *kvm, | ||
320 | struct kvm_assigned_dev_kernel *assigned_dev, | ||
321 | unsigned long irq_requested_type) | ||
322 | { | ||
323 | unsigned long guest_irq_type, host_irq_type; | ||
324 | |||
325 | if (!irqchip_in_kernel(kvm)) | ||
326 | return -EINVAL; | ||
327 | /* no irq assignment to deassign */ | ||
328 | if (!assigned_dev->irq_requested_type) | ||
329 | return -ENXIO; | ||
330 | |||
331 | host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; | ||
332 | guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; | ||
333 | |||
334 | if (host_irq_type) | ||
335 | deassign_host_irq(kvm, assigned_dev); | ||
336 | if (guest_irq_type) | ||
337 | deassign_guest_irq(kvm, assigned_dev); | ||
338 | |||
339 | return 0; | ||
340 | } | ||
341 | |||
342 | static void kvm_free_assigned_irq(struct kvm *kvm, | ||
343 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
344 | { | ||
345 | kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); | ||
346 | } | ||
347 | |||
348 | static void kvm_free_assigned_device(struct kvm *kvm, | ||
349 | struct kvm_assigned_dev_kernel | ||
350 | *assigned_dev) | ||
351 | { | ||
352 | kvm_free_assigned_irq(kvm, assigned_dev); | ||
353 | |||
354 | pci_reset_function(assigned_dev->dev); | ||
355 | if (pci_load_and_free_saved_state(assigned_dev->dev, | ||
356 | &assigned_dev->pci_saved_state)) | ||
357 | printk(KERN_INFO "%s: Couldn't reload %s saved state\n", | ||
358 | __func__, dev_name(&assigned_dev->dev->dev)); | ||
359 | else | ||
360 | pci_restore_state(assigned_dev->dev); | ||
361 | |||
362 | pci_clear_dev_assigned(assigned_dev->dev); | ||
363 | |||
364 | pci_release_regions(assigned_dev->dev); | ||
365 | pci_disable_device(assigned_dev->dev); | ||
366 | pci_dev_put(assigned_dev->dev); | ||
367 | |||
368 | list_del(&assigned_dev->list); | ||
369 | kfree(assigned_dev); | ||
370 | } | ||
371 | |||
372 | void kvm_free_all_assigned_devices(struct kvm *kvm) | ||
373 | { | ||
374 | struct kvm_assigned_dev_kernel *assigned_dev, *tmp; | ||
375 | |||
376 | list_for_each_entry_safe(assigned_dev, tmp, | ||
377 | &kvm->arch.assigned_dev_head, list) { | ||
378 | kvm_free_assigned_device(kvm, assigned_dev); | ||
379 | } | ||
380 | } | ||
381 | |||
382 | static int assigned_device_enable_host_intx(struct kvm *kvm, | ||
383 | struct kvm_assigned_dev_kernel *dev) | ||
384 | { | ||
385 | irq_handler_t irq_handler; | ||
386 | unsigned long flags; | ||
387 | |||
388 | dev->host_irq = dev->dev->irq; | ||
389 | |||
390 | /* | ||
391 | * We can only share the IRQ line with other host devices if we are | ||
392 | * able to disable the IRQ source at device-level - independently of | ||
393 | * the guest driver. Otherwise host devices may suffer from unbounded | ||
394 | * IRQ latencies when the guest keeps the line asserted. | ||
395 | */ | ||
396 | if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { | ||
397 | irq_handler = kvm_assigned_dev_intx; | ||
398 | flags = IRQF_SHARED; | ||
399 | } else { | ||
400 | irq_handler = NULL; | ||
401 | flags = IRQF_ONESHOT; | ||
402 | } | ||
403 | if (request_threaded_irq(dev->host_irq, irq_handler, | ||
404 | kvm_assigned_dev_thread_intx, flags, | ||
405 | dev->irq_name, dev)) | ||
406 | return -EIO; | ||
407 | |||
408 | if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { | ||
409 | spin_lock_irq(&dev->intx_lock); | ||
410 | pci_intx(dev->dev, true); | ||
411 | spin_unlock_irq(&dev->intx_lock); | ||
412 | } | ||
413 | return 0; | ||
414 | } | ||
415 | |||
416 | static int assigned_device_enable_host_msi(struct kvm *kvm, | ||
417 | struct kvm_assigned_dev_kernel *dev) | ||
418 | { | ||
419 | int r; | ||
420 | |||
421 | if (!dev->dev->msi_enabled) { | ||
422 | r = pci_enable_msi(dev->dev); | ||
423 | if (r) | ||
424 | return r; | ||
425 | } | ||
426 | |||
427 | dev->host_irq = dev->dev->irq; | ||
428 | if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi, | ||
429 | kvm_assigned_dev_thread_msi, 0, | ||
430 | dev->irq_name, dev)) { | ||
431 | pci_disable_msi(dev->dev); | ||
432 | return -EIO; | ||
433 | } | ||
434 | |||
435 | return 0; | ||
436 | } | ||
437 | |||
438 | static int assigned_device_enable_host_msix(struct kvm *kvm, | ||
439 | struct kvm_assigned_dev_kernel *dev) | ||
440 | { | ||
441 | int i, r = -EINVAL; | ||
442 | |||
443 | /* host_msix_entries and guest_msix_entries should have been | ||
444 | * initialized */ | ||
445 | if (dev->entries_nr == 0) | ||
446 | return r; | ||
447 | |||
448 | r = pci_enable_msix_exact(dev->dev, | ||
449 | dev->host_msix_entries, dev->entries_nr); | ||
450 | if (r) | ||
451 | return r; | ||
452 | |||
453 | for (i = 0; i < dev->entries_nr; i++) { | ||
454 | r = request_threaded_irq(dev->host_msix_entries[i].vector, | ||
455 | kvm_assigned_dev_msix, | ||
456 | kvm_assigned_dev_thread_msix, | ||
457 | 0, dev->irq_name, dev); | ||
458 | if (r) | ||
459 | goto err; | ||
460 | } | ||
461 | |||
462 | return 0; | ||
463 | err: | ||
464 | for (i -= 1; i >= 0; i--) | ||
465 | free_irq(dev->host_msix_entries[i].vector, dev); | ||
466 | pci_disable_msix(dev->dev); | ||
467 | return r; | ||
468 | } | ||
469 | |||
470 | static int assigned_device_enable_guest_intx(struct kvm *kvm, | ||
471 | struct kvm_assigned_dev_kernel *dev, | ||
472 | struct kvm_assigned_irq *irq) | ||
473 | { | ||
474 | dev->guest_irq = irq->guest_irq; | ||
475 | dev->ack_notifier.gsi = irq->guest_irq; | ||
476 | return 0; | ||
477 | } | ||
478 | |||
479 | static int assigned_device_enable_guest_msi(struct kvm *kvm, | ||
480 | struct kvm_assigned_dev_kernel *dev, | ||
481 | struct kvm_assigned_irq *irq) | ||
482 | { | ||
483 | dev->guest_irq = irq->guest_irq; | ||
484 | dev->ack_notifier.gsi = -1; | ||
485 | return 0; | ||
486 | } | ||
487 | |||
488 | static int assigned_device_enable_guest_msix(struct kvm *kvm, | ||
489 | struct kvm_assigned_dev_kernel *dev, | ||
490 | struct kvm_assigned_irq *irq) | ||
491 | { | ||
492 | dev->guest_irq = irq->guest_irq; | ||
493 | dev->ack_notifier.gsi = -1; | ||
494 | return 0; | ||
495 | } | ||
496 | |||
497 | static int assign_host_irq(struct kvm *kvm, | ||
498 | struct kvm_assigned_dev_kernel *dev, | ||
499 | __u32 host_irq_type) | ||
500 | { | ||
501 | int r = -EEXIST; | ||
502 | |||
503 | if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) | ||
504 | return r; | ||
505 | |||
506 | snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s", | ||
507 | pci_name(dev->dev)); | ||
508 | |||
509 | switch (host_irq_type) { | ||
510 | case KVM_DEV_IRQ_HOST_INTX: | ||
511 | r = assigned_device_enable_host_intx(kvm, dev); | ||
512 | break; | ||
513 | case KVM_DEV_IRQ_HOST_MSI: | ||
514 | r = assigned_device_enable_host_msi(kvm, dev); | ||
515 | break; | ||
516 | case KVM_DEV_IRQ_HOST_MSIX: | ||
517 | r = assigned_device_enable_host_msix(kvm, dev); | ||
518 | break; | ||
519 | default: | ||
520 | r = -EINVAL; | ||
521 | } | ||
522 | dev->host_irq_disabled = false; | ||
523 | |||
524 | if (!r) | ||
525 | dev->irq_requested_type |= host_irq_type; | ||
526 | |||
527 | return r; | ||
528 | } | ||
529 | |||
530 | static int assign_guest_irq(struct kvm *kvm, | ||
531 | struct kvm_assigned_dev_kernel *dev, | ||
532 | struct kvm_assigned_irq *irq, | ||
533 | unsigned long guest_irq_type) | ||
534 | { | ||
535 | int id; | ||
536 | int r = -EEXIST; | ||
537 | |||
538 | if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) | ||
539 | return r; | ||
540 | |||
541 | id = kvm_request_irq_source_id(kvm); | ||
542 | if (id < 0) | ||
543 | return id; | ||
544 | |||
545 | dev->irq_source_id = id; | ||
546 | |||
547 | switch (guest_irq_type) { | ||
548 | case KVM_DEV_IRQ_GUEST_INTX: | ||
549 | r = assigned_device_enable_guest_intx(kvm, dev, irq); | ||
550 | break; | ||
551 | case KVM_DEV_IRQ_GUEST_MSI: | ||
552 | r = assigned_device_enable_guest_msi(kvm, dev, irq); | ||
553 | break; | ||
554 | case KVM_DEV_IRQ_GUEST_MSIX: | ||
555 | r = assigned_device_enable_guest_msix(kvm, dev, irq); | ||
556 | break; | ||
557 | default: | ||
558 | r = -EINVAL; | ||
559 | } | ||
560 | |||
561 | if (!r) { | ||
562 | dev->irq_requested_type |= guest_irq_type; | ||
563 | if (dev->ack_notifier.gsi != -1) | ||
564 | kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); | ||
565 | } else { | ||
566 | kvm_free_irq_source_id(kvm, dev->irq_source_id); | ||
567 | dev->irq_source_id = -1; | ||
568 | } | ||
569 | |||
570 | return r; | ||
571 | } | ||
572 | |||
573 | /* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ | ||
574 | static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, | ||
575 | struct kvm_assigned_irq *assigned_irq) | ||
576 | { | ||
577 | int r = -EINVAL; | ||
578 | struct kvm_assigned_dev_kernel *match; | ||
579 | unsigned long host_irq_type, guest_irq_type; | ||
580 | |||
581 | if (!irqchip_in_kernel(kvm)) | ||
582 | return r; | ||
583 | |||
584 | mutex_lock(&kvm->lock); | ||
585 | r = -ENODEV; | ||
586 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
587 | assigned_irq->assigned_dev_id); | ||
588 | if (!match) | ||
589 | goto out; | ||
590 | |||
591 | host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); | ||
592 | guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); | ||
593 | |||
594 | r = -EINVAL; | ||
595 | /* can only assign one type at a time */ | ||
596 | if (hweight_long(host_irq_type) > 1) | ||
597 | goto out; | ||
598 | if (hweight_long(guest_irq_type) > 1) | ||
599 | goto out; | ||
600 | if (host_irq_type == 0 && guest_irq_type == 0) | ||
601 | goto out; | ||
602 | |||
603 | r = 0; | ||
604 | if (host_irq_type) | ||
605 | r = assign_host_irq(kvm, match, host_irq_type); | ||
606 | if (r) | ||
607 | goto out; | ||
608 | |||
609 | if (guest_irq_type) | ||
610 | r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); | ||
611 | out: | ||
612 | mutex_unlock(&kvm->lock); | ||
613 | return r; | ||
614 | } | ||
615 | |||
616 | static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, | ||
617 | struct kvm_assigned_irq | ||
618 | *assigned_irq) | ||
619 | { | ||
620 | int r = -ENODEV; | ||
621 | struct kvm_assigned_dev_kernel *match; | ||
622 | unsigned long irq_type; | ||
623 | |||
624 | mutex_lock(&kvm->lock); | ||
625 | |||
626 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
627 | assigned_irq->assigned_dev_id); | ||
628 | if (!match) | ||
629 | goto out; | ||
630 | |||
631 | irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK | | ||
632 | KVM_DEV_IRQ_GUEST_MASK); | ||
633 | r = kvm_deassign_irq(kvm, match, irq_type); | ||
634 | out: | ||
635 | mutex_unlock(&kvm->lock); | ||
636 | return r; | ||
637 | } | ||
638 | |||
639 | /* | ||
640 | * We want to test whether the caller has been granted permissions to | ||
641 | * use this device. To be able to configure and control the device, | ||
642 | * the user needs access to PCI configuration space and BAR resources. | ||
643 | * These are accessed through PCI sysfs. PCI config space is often | ||
644 | * passed to the process calling this ioctl via file descriptor, so we | ||
645 | * can't rely on access to that file. We can check for permissions | ||
646 | * on each of the BAR resource files, which is a pretty clear | ||
647 | * indicator that the user has been granted access to the device. | ||
648 | */ | ||
649 | static int probe_sysfs_permissions(struct pci_dev *dev) | ||
650 | { | ||
651 | #ifdef CONFIG_SYSFS | ||
652 | int i; | ||
653 | bool bar_found = false; | ||
654 | |||
655 | for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) { | ||
656 | char *kpath, *syspath; | ||
657 | struct path path; | ||
658 | struct inode *inode; | ||
659 | int r; | ||
660 | |||
661 | if (!pci_resource_len(dev, i)) | ||
662 | continue; | ||
663 | |||
664 | kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL); | ||
665 | if (!kpath) | ||
666 | return -ENOMEM; | ||
667 | |||
668 | /* Per sysfs-rules, sysfs is always at /sys */ | ||
669 | syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i); | ||
670 | kfree(kpath); | ||
671 | if (!syspath) | ||
672 | return -ENOMEM; | ||
673 | |||
674 | r = kern_path(syspath, LOOKUP_FOLLOW, &path); | ||
675 | kfree(syspath); | ||
676 | if (r) | ||
677 | return r; | ||
678 | |||
679 | inode = d_backing_inode(path.dentry); | ||
680 | |||
681 | r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS); | ||
682 | path_put(&path); | ||
683 | if (r) | ||
684 | return r; | ||
685 | |||
686 | bar_found = true; | ||
687 | } | ||
688 | |||
689 | /* If no resources, probably something special */ | ||
690 | if (!bar_found) | ||
691 | return -EPERM; | ||
692 | |||
693 | return 0; | ||
694 | #else | ||
695 | return -EINVAL; /* No way to control the device without sysfs */ | ||
696 | #endif | ||
697 | } | ||
698 | |||
699 | static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | ||
700 | struct kvm_assigned_pci_dev *assigned_dev) | ||
701 | { | ||
702 | int r = 0, idx; | ||
703 | struct kvm_assigned_dev_kernel *match; | ||
704 | struct pci_dev *dev; | ||
705 | |||
706 | if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)) | ||
707 | return -EINVAL; | ||
708 | |||
709 | mutex_lock(&kvm->lock); | ||
710 | idx = srcu_read_lock(&kvm->srcu); | ||
711 | |||
712 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
713 | assigned_dev->assigned_dev_id); | ||
714 | if (match) { | ||
715 | /* device already assigned */ | ||
716 | r = -EEXIST; | ||
717 | goto out; | ||
718 | } | ||
719 | |||
720 | match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); | ||
721 | if (match == NULL) { | ||
722 | printk(KERN_INFO "%s: Couldn't allocate memory\n", | ||
723 | __func__); | ||
724 | r = -ENOMEM; | ||
725 | goto out; | ||
726 | } | ||
727 | dev = pci_get_domain_bus_and_slot(assigned_dev->segnr, | ||
728 | assigned_dev->busnr, | ||
729 | assigned_dev->devfn); | ||
730 | if (!dev) { | ||
731 | printk(KERN_INFO "%s: host device not found\n", __func__); | ||
732 | r = -EINVAL; | ||
733 | goto out_free; | ||
734 | } | ||
735 | |||
736 | /* Don't allow bridges to be assigned */ | ||
737 | if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) { | ||
738 | r = -EPERM; | ||
739 | goto out_put; | ||
740 | } | ||
741 | |||
742 | r = probe_sysfs_permissions(dev); | ||
743 | if (r) | ||
744 | goto out_put; | ||
745 | |||
746 | if (pci_enable_device(dev)) { | ||
747 | printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); | ||
748 | r = -EBUSY; | ||
749 | goto out_put; | ||
750 | } | ||
751 | r = pci_request_regions(dev, "kvm_assigned_device"); | ||
752 | if (r) { | ||
753 | printk(KERN_INFO "%s: Could not get access to device regions\n", | ||
754 | __func__); | ||
755 | goto out_disable; | ||
756 | } | ||
757 | |||
758 | pci_reset_function(dev); | ||
759 | pci_save_state(dev); | ||
760 | match->pci_saved_state = pci_store_saved_state(dev); | ||
761 | if (!match->pci_saved_state) | ||
762 | printk(KERN_DEBUG "%s: Couldn't store %s saved state\n", | ||
763 | __func__, dev_name(&dev->dev)); | ||
764 | |||
765 | if (!pci_intx_mask_supported(dev)) | ||
766 | assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3; | ||
767 | |||
768 | match->assigned_dev_id = assigned_dev->assigned_dev_id; | ||
769 | match->host_segnr = assigned_dev->segnr; | ||
770 | match->host_busnr = assigned_dev->busnr; | ||
771 | match->host_devfn = assigned_dev->devfn; | ||
772 | match->flags = assigned_dev->flags; | ||
773 | match->dev = dev; | ||
774 | spin_lock_init(&match->intx_lock); | ||
775 | spin_lock_init(&match->intx_mask_lock); | ||
776 | match->irq_source_id = -1; | ||
777 | match->kvm = kvm; | ||
778 | match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; | ||
779 | |||
780 | list_add(&match->list, &kvm->arch.assigned_dev_head); | ||
781 | |||
782 | if (!kvm->arch.iommu_domain) { | ||
783 | r = kvm_iommu_map_guest(kvm); | ||
784 | if (r) | ||
785 | goto out_list_del; | ||
786 | } | ||
787 | r = kvm_assign_device(kvm, match->dev); | ||
788 | if (r) | ||
789 | goto out_list_del; | ||
790 | |||
791 | out: | ||
792 | srcu_read_unlock(&kvm->srcu, idx); | ||
793 | mutex_unlock(&kvm->lock); | ||
794 | return r; | ||
795 | out_list_del: | ||
796 | if (pci_load_and_free_saved_state(dev, &match->pci_saved_state)) | ||
797 | printk(KERN_INFO "%s: Couldn't reload %s saved state\n", | ||
798 | __func__, dev_name(&dev->dev)); | ||
799 | list_del(&match->list); | ||
800 | pci_release_regions(dev); | ||
801 | out_disable: | ||
802 | pci_disable_device(dev); | ||
803 | out_put: | ||
804 | pci_dev_put(dev); | ||
805 | out_free: | ||
806 | kfree(match); | ||
807 | srcu_read_unlock(&kvm->srcu, idx); | ||
808 | mutex_unlock(&kvm->lock); | ||
809 | return r; | ||
810 | } | ||
811 | |||
812 | static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, | ||
813 | struct kvm_assigned_pci_dev *assigned_dev) | ||
814 | { | ||
815 | int r = 0; | ||
816 | struct kvm_assigned_dev_kernel *match; | ||
817 | |||
818 | mutex_lock(&kvm->lock); | ||
819 | |||
820 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
821 | assigned_dev->assigned_dev_id); | ||
822 | if (!match) { | ||
823 | printk(KERN_INFO "%s: device hasn't been assigned before, " | ||
824 | "so cannot be deassigned\n", __func__); | ||
825 | r = -EINVAL; | ||
826 | goto out; | ||
827 | } | ||
828 | |||
829 | kvm_deassign_device(kvm, match->dev); | ||
830 | |||
831 | kvm_free_assigned_device(kvm, match); | ||
832 | |||
833 | out: | ||
834 | mutex_unlock(&kvm->lock); | ||
835 | return r; | ||
836 | } | ||
837 | |||
838 | |||
839 | static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, | ||
840 | struct kvm_assigned_msix_nr *entry_nr) | ||
841 | { | ||
842 | int r = 0; | ||
843 | struct kvm_assigned_dev_kernel *adev; | ||
844 | |||
845 | mutex_lock(&kvm->lock); | ||
846 | |||
847 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
848 | entry_nr->assigned_dev_id); | ||
849 | if (!adev) { | ||
850 | r = -EINVAL; | ||
851 | goto msix_nr_out; | ||
852 | } | ||
853 | |||
854 | if (adev->entries_nr == 0) { | ||
855 | adev->entries_nr = entry_nr->entry_nr; | ||
856 | if (adev->entries_nr == 0 || | ||
857 | adev->entries_nr > KVM_MAX_MSIX_PER_DEV) { | ||
858 | r = -EINVAL; | ||
859 | goto msix_nr_out; | ||
860 | } | ||
861 | |||
862 | adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * | ||
863 | entry_nr->entry_nr, | ||
864 | GFP_KERNEL); | ||
865 | if (!adev->host_msix_entries) { | ||
866 | r = -ENOMEM; | ||
867 | goto msix_nr_out; | ||
868 | } | ||
869 | adev->guest_msix_entries = | ||
870 | kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr, | ||
871 | GFP_KERNEL); | ||
872 | if (!adev->guest_msix_entries) { | ||
873 | kfree(adev->host_msix_entries); | ||
874 | r = -ENOMEM; | ||
875 | goto msix_nr_out; | ||
876 | } | ||
877 | } else /* Not allowed set MSI-X number twice */ | ||
878 | r = -EINVAL; | ||
879 | msix_nr_out: | ||
880 | mutex_unlock(&kvm->lock); | ||
881 | return r; | ||
882 | } | ||
883 | |||
884 | static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, | ||
885 | struct kvm_assigned_msix_entry *entry) | ||
886 | { | ||
887 | int r = 0, i; | ||
888 | struct kvm_assigned_dev_kernel *adev; | ||
889 | |||
890 | mutex_lock(&kvm->lock); | ||
891 | |||
892 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
893 | entry->assigned_dev_id); | ||
894 | |||
895 | if (!adev) { | ||
896 | r = -EINVAL; | ||
897 | goto msix_entry_out; | ||
898 | } | ||
899 | |||
900 | for (i = 0; i < adev->entries_nr; i++) | ||
901 | if (adev->guest_msix_entries[i].vector == 0 || | ||
902 | adev->guest_msix_entries[i].entry == entry->entry) { | ||
903 | adev->guest_msix_entries[i].entry = entry->entry; | ||
904 | adev->guest_msix_entries[i].vector = entry->gsi; | ||
905 | adev->host_msix_entries[i].entry = entry->entry; | ||
906 | break; | ||
907 | } | ||
908 | if (i == adev->entries_nr) { | ||
909 | r = -ENOSPC; | ||
910 | goto msix_entry_out; | ||
911 | } | ||
912 | |||
913 | msix_entry_out: | ||
914 | mutex_unlock(&kvm->lock); | ||
915 | |||
916 | return r; | ||
917 | } | ||
918 | |||
919 | static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, | ||
920 | struct kvm_assigned_pci_dev *assigned_dev) | ||
921 | { | ||
922 | int r = 0; | ||
923 | struct kvm_assigned_dev_kernel *match; | ||
924 | |||
925 | mutex_lock(&kvm->lock); | ||
926 | |||
927 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
928 | assigned_dev->assigned_dev_id); | ||
929 | if (!match) { | ||
930 | r = -ENODEV; | ||
931 | goto out; | ||
932 | } | ||
933 | |||
934 | spin_lock(&match->intx_mask_lock); | ||
935 | |||
936 | match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX; | ||
937 | match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX; | ||
938 | |||
939 | if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { | ||
940 | if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) { | ||
941 | kvm_set_irq(match->kvm, match->irq_source_id, | ||
942 | match->guest_irq, 0, false); | ||
943 | /* | ||
944 | * Masking at hardware-level is performed on demand, | ||
945 | * i.e. when an IRQ actually arrives at the host. | ||
946 | */ | ||
947 | } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { | ||
948 | /* | ||
949 | * Unmask the IRQ line if required. Unmasking at | ||
950 | * device level will be performed by user space. | ||
951 | */ | ||
952 | spin_lock_irq(&match->intx_lock); | ||
953 | if (match->host_irq_disabled) { | ||
954 | enable_irq(match->host_irq); | ||
955 | match->host_irq_disabled = false; | ||
956 | } | ||
957 | spin_unlock_irq(&match->intx_lock); | ||
958 | } | ||
959 | } | ||
960 | |||
961 | spin_unlock(&match->intx_mask_lock); | ||
962 | |||
963 | out: | ||
964 | mutex_unlock(&kvm->lock); | ||
965 | return r; | ||
966 | } | ||
967 | |||
968 | long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | ||
969 | unsigned long arg) | ||
970 | { | ||
971 | void __user *argp = (void __user *)arg; | ||
972 | int r; | ||
973 | |||
974 | switch (ioctl) { | ||
975 | case KVM_ASSIGN_PCI_DEVICE: { | ||
976 | struct kvm_assigned_pci_dev assigned_dev; | ||
977 | |||
978 | r = -EFAULT; | ||
979 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | ||
980 | goto out; | ||
981 | r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); | ||
982 | if (r) | ||
983 | goto out; | ||
984 | break; | ||
985 | } | ||
986 | case KVM_ASSIGN_IRQ: { | ||
987 | r = -EOPNOTSUPP; | ||
988 | break; | ||
989 | } | ||
990 | case KVM_ASSIGN_DEV_IRQ: { | ||
991 | struct kvm_assigned_irq assigned_irq; | ||
992 | |||
993 | r = -EFAULT; | ||
994 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | ||
995 | goto out; | ||
996 | r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); | ||
997 | if (r) | ||
998 | goto out; | ||
999 | break; | ||
1000 | } | ||
1001 | case KVM_DEASSIGN_DEV_IRQ: { | ||
1002 | struct kvm_assigned_irq assigned_irq; | ||
1003 | |||
1004 | r = -EFAULT; | ||
1005 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | ||
1006 | goto out; | ||
1007 | r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); | ||
1008 | if (r) | ||
1009 | goto out; | ||
1010 | break; | ||
1011 | } | ||
1012 | case KVM_DEASSIGN_PCI_DEVICE: { | ||
1013 | struct kvm_assigned_pci_dev assigned_dev; | ||
1014 | |||
1015 | r = -EFAULT; | ||
1016 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | ||
1017 | goto out; | ||
1018 | r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); | ||
1019 | if (r) | ||
1020 | goto out; | ||
1021 | break; | ||
1022 | } | ||
1023 | case KVM_ASSIGN_SET_MSIX_NR: { | ||
1024 | struct kvm_assigned_msix_nr entry_nr; | ||
1025 | r = -EFAULT; | ||
1026 | if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) | ||
1027 | goto out; | ||
1028 | r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); | ||
1029 | if (r) | ||
1030 | goto out; | ||
1031 | break; | ||
1032 | } | ||
1033 | case KVM_ASSIGN_SET_MSIX_ENTRY: { | ||
1034 | struct kvm_assigned_msix_entry entry; | ||
1035 | r = -EFAULT; | ||
1036 | if (copy_from_user(&entry, argp, sizeof entry)) | ||
1037 | goto out; | ||
1038 | r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); | ||
1039 | if (r) | ||
1040 | goto out; | ||
1041 | break; | ||
1042 | } | ||
1043 | case KVM_ASSIGN_SET_INTX_MASK: { | ||
1044 | struct kvm_assigned_pci_dev assigned_dev; | ||
1045 | |||
1046 | r = -EFAULT; | ||
1047 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | ||
1048 | goto out; | ||
1049 | r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev); | ||
1050 | break; | ||
1051 | } | ||
1052 | default: | ||
1053 | r = -ENOTTY; | ||
1054 | break; | ||
1055 | } | ||
1056 | out: | ||
1057 | return r; | ||
1058 | } | ||
diff --git a/arch/x86/kvm/assigned-dev.h b/arch/x86/kvm/assigned-dev.h deleted file mode 100644 index a428c1a211b2..000000000000 --- a/arch/x86/kvm/assigned-dev.h +++ /dev/null | |||
@@ -1,32 +0,0 @@ | |||
1 | #ifndef ARCH_X86_KVM_ASSIGNED_DEV_H | ||
2 | #define ARCH_X86_KVM_ASSIGNED_DEV_H | ||
3 | |||
4 | #include <linux/kvm_host.h> | ||
5 | |||
6 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT | ||
7 | int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev); | ||
8 | int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev); | ||
9 | |||
10 | int kvm_iommu_map_guest(struct kvm *kvm); | ||
11 | int kvm_iommu_unmap_guest(struct kvm *kvm); | ||
12 | |||
13 | long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | ||
14 | unsigned long arg); | ||
15 | |||
16 | void kvm_free_all_assigned_devices(struct kvm *kvm); | ||
17 | #else | ||
18 | static inline int kvm_iommu_unmap_guest(struct kvm *kvm) | ||
19 | { | ||
20 | return 0; | ||
21 | } | ||
22 | |||
23 | static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | ||
24 | unsigned long arg) | ||
25 | { | ||
26 | return -ENOTTY; | ||
27 | } | ||
28 | |||
29 | static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {} | ||
30 | #endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */ | ||
31 | |||
32 | #endif /* ARCH_X86_KVM_ASSIGNED_DEV_H */ | ||
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 73ea24d4f119..bdcd4139eca9 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -49,7 +49,7 @@ static void pic_unlock(struct kvm_pic *s) | |||
49 | __releases(&s->lock) | 49 | __releases(&s->lock) |
50 | { | 50 | { |
51 | bool wakeup = s->wakeup_needed; | 51 | bool wakeup = s->wakeup_needed; |
52 | struct kvm_vcpu *vcpu, *found = NULL; | 52 | struct kvm_vcpu *vcpu; |
53 | int i; | 53 | int i; |
54 | 54 | ||
55 | s->wakeup_needed = false; | 55 | s->wakeup_needed = false; |
@@ -59,16 +59,11 @@ static void pic_unlock(struct kvm_pic *s) | |||
59 | if (wakeup) { | 59 | if (wakeup) { |
60 | kvm_for_each_vcpu(i, vcpu, s->kvm) { | 60 | kvm_for_each_vcpu(i, vcpu, s->kvm) { |
61 | if (kvm_apic_accept_pic_intr(vcpu)) { | 61 | if (kvm_apic_accept_pic_intr(vcpu)) { |
62 | found = vcpu; | 62 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
63 | break; | 63 | kvm_vcpu_kick(vcpu); |
64 | return; | ||
64 | } | 65 | } |
65 | } | 66 | } |
66 | |||
67 | if (!found) | ||
68 | return; | ||
69 | |||
70 | kvm_make_request(KVM_REQ_EVENT, found); | ||
71 | kvm_vcpu_kick(found); | ||
72 | } | 67 | } |
73 | } | 68 | } |
74 | 69 | ||
@@ -239,7 +234,7 @@ static inline void pic_intack(struct kvm_kpic_state *s, int irq) | |||
239 | int kvm_pic_read_irq(struct kvm *kvm) | 234 | int kvm_pic_read_irq(struct kvm *kvm) |
240 | { | 235 | { |
241 | int irq, irq2, intno; | 236 | int irq, irq2, intno; |
242 | struct kvm_pic *s = pic_irqchip(kvm); | 237 | struct kvm_pic *s = kvm->arch.vpic; |
243 | 238 | ||
244 | s->output = 0; | 239 | s->output = 0; |
245 | 240 | ||
@@ -273,7 +268,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
273 | return intno; | 268 | return intno; |
274 | } | 269 | } |
275 | 270 | ||
276 | void kvm_pic_reset(struct kvm_kpic_state *s) | 271 | static void kvm_pic_reset(struct kvm_kpic_state *s) |
277 | { | 272 | { |
278 | int irq, i; | 273 | int irq, i; |
279 | struct kvm_vcpu *vcpu; | 274 | struct kvm_vcpu *vcpu; |
@@ -422,19 +417,16 @@ static u32 pic_poll_read(struct kvm_kpic_state *s, u32 addr1) | |||
422 | return ret; | 417 | return ret; |
423 | } | 418 | } |
424 | 419 | ||
425 | static u32 pic_ioport_read(void *opaque, u32 addr1) | 420 | static u32 pic_ioport_read(void *opaque, u32 addr) |
426 | { | 421 | { |
427 | struct kvm_kpic_state *s = opaque; | 422 | struct kvm_kpic_state *s = opaque; |
428 | unsigned int addr; | ||
429 | int ret; | 423 | int ret; |
430 | 424 | ||
431 | addr = addr1; | ||
432 | addr &= 1; | ||
433 | if (s->poll) { | 425 | if (s->poll) { |
434 | ret = pic_poll_read(s, addr1); | 426 | ret = pic_poll_read(s, addr); |
435 | s->poll = 0; | 427 | s->poll = 0; |
436 | } else | 428 | } else |
437 | if (addr == 0) | 429 | if ((addr & 1) == 0) |
438 | if (s->read_reg_select) | 430 | if (s->read_reg_select) |
439 | ret = s->isr; | 431 | ret = s->isr; |
440 | else | 432 | else |
@@ -456,76 +448,64 @@ static u32 elcr_ioport_read(void *opaque, u32 addr1) | |||
456 | return s->elcr; | 448 | return s->elcr; |
457 | } | 449 | } |
458 | 450 | ||
459 | static int picdev_in_range(gpa_t addr) | ||
460 | { | ||
461 | switch (addr) { | ||
462 | case 0x20: | ||
463 | case 0x21: | ||
464 | case 0xa0: | ||
465 | case 0xa1: | ||
466 | case 0x4d0: | ||
467 | case 0x4d1: | ||
468 | return 1; | ||
469 | default: | ||
470 | return 0; | ||
471 | } | ||
472 | } | ||
473 | |||
474 | static int picdev_write(struct kvm_pic *s, | 451 | static int picdev_write(struct kvm_pic *s, |
475 | gpa_t addr, int len, const void *val) | 452 | gpa_t addr, int len, const void *val) |
476 | { | 453 | { |
477 | unsigned char data = *(unsigned char *)val; | 454 | unsigned char data = *(unsigned char *)val; |
478 | if (!picdev_in_range(addr)) | ||
479 | return -EOPNOTSUPP; | ||
480 | 455 | ||
481 | if (len != 1) { | 456 | if (len != 1) { |
482 | pr_pic_unimpl("non byte write\n"); | 457 | pr_pic_unimpl("non byte write\n"); |
483 | return 0; | 458 | return 0; |
484 | } | 459 | } |
485 | pic_lock(s); | ||
486 | switch (addr) { | 460 | switch (addr) { |
487 | case 0x20: | 461 | case 0x20: |
488 | case 0x21: | 462 | case 0x21: |
489 | case 0xa0: | 463 | case 0xa0: |
490 | case 0xa1: | 464 | case 0xa1: |
465 | pic_lock(s); | ||
491 | pic_ioport_write(&s->pics[addr >> 7], addr, data); | 466 | pic_ioport_write(&s->pics[addr >> 7], addr, data); |
467 | pic_unlock(s); | ||
492 | break; | 468 | break; |
493 | case 0x4d0: | 469 | case 0x4d0: |
494 | case 0x4d1: | 470 | case 0x4d1: |
471 | pic_lock(s); | ||
495 | elcr_ioport_write(&s->pics[addr & 1], addr, data); | 472 | elcr_ioport_write(&s->pics[addr & 1], addr, data); |
473 | pic_unlock(s); | ||
496 | break; | 474 | break; |
475 | default: | ||
476 | return -EOPNOTSUPP; | ||
497 | } | 477 | } |
498 | pic_unlock(s); | ||
499 | return 0; | 478 | return 0; |
500 | } | 479 | } |
501 | 480 | ||
502 | static int picdev_read(struct kvm_pic *s, | 481 | static int picdev_read(struct kvm_pic *s, |
503 | gpa_t addr, int len, void *val) | 482 | gpa_t addr, int len, void *val) |
504 | { | 483 | { |
505 | unsigned char data = 0; | 484 | unsigned char *data = (unsigned char *)val; |
506 | if (!picdev_in_range(addr)) | ||
507 | return -EOPNOTSUPP; | ||
508 | 485 | ||
509 | if (len != 1) { | 486 | if (len != 1) { |
510 | memset(val, 0, len); | 487 | memset(val, 0, len); |
511 | pr_pic_unimpl("non byte read\n"); | 488 | pr_pic_unimpl("non byte read\n"); |
512 | return 0; | 489 | return 0; |
513 | } | 490 | } |
514 | pic_lock(s); | ||
515 | switch (addr) { | 491 | switch (addr) { |
516 | case 0x20: | 492 | case 0x20: |
517 | case 0x21: | 493 | case 0x21: |
518 | case 0xa0: | 494 | case 0xa0: |
519 | case 0xa1: | 495 | case 0xa1: |
520 | data = pic_ioport_read(&s->pics[addr >> 7], addr); | 496 | pic_lock(s); |
497 | *data = pic_ioport_read(&s->pics[addr >> 7], addr); | ||
498 | pic_unlock(s); | ||
521 | break; | 499 | break; |
522 | case 0x4d0: | 500 | case 0x4d0: |
523 | case 0x4d1: | 501 | case 0x4d1: |
524 | data = elcr_ioport_read(&s->pics[addr & 1], addr); | 502 | pic_lock(s); |
503 | *data = elcr_ioport_read(&s->pics[addr & 1], addr); | ||
504 | pic_unlock(s); | ||
525 | break; | 505 | break; |
506 | default: | ||
507 | return -EOPNOTSUPP; | ||
526 | } | 508 | } |
527 | *(unsigned char *)val = data; | ||
528 | pic_unlock(s); | ||
529 | return 0; | 509 | return 0; |
530 | } | 510 | } |
531 | 511 | ||
@@ -576,7 +556,7 @@ static int picdev_eclr_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, | |||
576 | */ | 556 | */ |
577 | static void pic_irq_request(struct kvm *kvm, int level) | 557 | static void pic_irq_request(struct kvm *kvm, int level) |
578 | { | 558 | { |
579 | struct kvm_pic *s = pic_irqchip(kvm); | 559 | struct kvm_pic *s = kvm->arch.vpic; |
580 | 560 | ||
581 | if (!s->output) | 561 | if (!s->output) |
582 | s->wakeup_needed = true; | 562 | s->wakeup_needed = true; |
@@ -657,9 +637,14 @@ void kvm_pic_destroy(struct kvm *kvm) | |||
657 | { | 637 | { |
658 | struct kvm_pic *vpic = kvm->arch.vpic; | 638 | struct kvm_pic *vpic = kvm->arch.vpic; |
659 | 639 | ||
640 | if (!vpic) | ||
641 | return; | ||
642 | |||
643 | mutex_lock(&kvm->slots_lock); | ||
660 | kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master); | 644 | kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master); |
661 | kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave); | 645 | kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave); |
662 | kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr); | 646 | kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr); |
647 | mutex_unlock(&kvm->slots_lock); | ||
663 | 648 | ||
664 | kvm->arch.vpic = NULL; | 649 | kvm->arch.vpic = NULL; |
665 | kfree(vpic); | 650 | kfree(vpic); |
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 6e219e5c07d2..bdff437acbcb 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c | |||
@@ -266,11 +266,9 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) | |||
266 | spin_unlock(&ioapic->lock); | 266 | spin_unlock(&ioapic->lock); |
267 | } | 267 | } |
268 | 268 | ||
269 | void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) | 269 | void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm) |
270 | { | 270 | { |
271 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; | 271 | if (!ioapic_in_kernel(kvm)) |
272 | |||
273 | if (!ioapic) | ||
274 | return; | 272 | return; |
275 | kvm_make_scan_ioapic_request(kvm); | 273 | kvm_make_scan_ioapic_request(kvm); |
276 | } | 274 | } |
@@ -315,7 +313,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) | |||
315 | if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG | 313 | if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG |
316 | && ioapic->irr & (1 << index)) | 314 | && ioapic->irr & (1 << index)) |
317 | ioapic_service(ioapic, index, false); | 315 | ioapic_service(ioapic, index, false); |
318 | kvm_vcpu_request_scan_ioapic(ioapic->kvm); | 316 | kvm_make_scan_ioapic_request(ioapic->kvm); |
319 | break; | 317 | break; |
320 | } | 318 | } |
321 | } | 319 | } |
@@ -624,10 +622,8 @@ int kvm_ioapic_init(struct kvm *kvm) | |||
624 | if (ret < 0) { | 622 | if (ret < 0) { |
625 | kvm->arch.vioapic = NULL; | 623 | kvm->arch.vioapic = NULL; |
626 | kfree(ioapic); | 624 | kfree(ioapic); |
627 | return ret; | ||
628 | } | 625 | } |
629 | 626 | ||
630 | kvm_vcpu_request_scan_ioapic(kvm); | ||
631 | return ret; | 627 | return ret; |
632 | } | 628 | } |
633 | 629 | ||
@@ -635,37 +631,36 @@ void kvm_ioapic_destroy(struct kvm *kvm) | |||
635 | { | 631 | { |
636 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; | 632 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; |
637 | 633 | ||
634 | if (!ioapic) | ||
635 | return; | ||
636 | |||
638 | cancel_delayed_work_sync(&ioapic->eoi_inject); | 637 | cancel_delayed_work_sync(&ioapic->eoi_inject); |
638 | mutex_lock(&kvm->slots_lock); | ||
639 | kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); | 639 | kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); |
640 | mutex_unlock(&kvm->slots_lock); | ||
640 | kvm->arch.vioapic = NULL; | 641 | kvm->arch.vioapic = NULL; |
641 | kfree(ioapic); | 642 | kfree(ioapic); |
642 | } | 643 | } |
643 | 644 | ||
644 | int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) | 645 | void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) |
645 | { | 646 | { |
646 | struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); | 647 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; |
647 | if (!ioapic) | ||
648 | return -EINVAL; | ||
649 | 648 | ||
650 | spin_lock(&ioapic->lock); | 649 | spin_lock(&ioapic->lock); |
651 | memcpy(state, ioapic, sizeof(struct kvm_ioapic_state)); | 650 | memcpy(state, ioapic, sizeof(struct kvm_ioapic_state)); |
652 | state->irr &= ~ioapic->irr_delivered; | 651 | state->irr &= ~ioapic->irr_delivered; |
653 | spin_unlock(&ioapic->lock); | 652 | spin_unlock(&ioapic->lock); |
654 | return 0; | ||
655 | } | 653 | } |
656 | 654 | ||
657 | int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) | 655 | void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) |
658 | { | 656 | { |
659 | struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); | 657 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; |
660 | if (!ioapic) | ||
661 | return -EINVAL; | ||
662 | 658 | ||
663 | spin_lock(&ioapic->lock); | 659 | spin_lock(&ioapic->lock); |
664 | memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); | 660 | memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); |
665 | ioapic->irr = 0; | 661 | ioapic->irr = 0; |
666 | ioapic->irr_delivered = 0; | 662 | ioapic->irr_delivered = 0; |
667 | kvm_vcpu_request_scan_ioapic(kvm); | 663 | kvm_make_scan_ioapic_request(kvm); |
668 | kvm_ioapic_inject_all(ioapic, state->irr); | 664 | kvm_ioapic_inject_all(ioapic, state->irr); |
669 | spin_unlock(&ioapic->lock); | 665 | spin_unlock(&ioapic->lock); |
670 | return 0; | ||
671 | } | 666 | } |
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index 1cc6e54436db..29ce19732ccf 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h | |||
@@ -105,17 +105,13 @@ do { \ | |||
105 | #define ASSERT(x) do { } while (0) | 105 | #define ASSERT(x) do { } while (0) |
106 | #endif | 106 | #endif |
107 | 107 | ||
108 | static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) | ||
109 | { | ||
110 | return kvm->arch.vioapic; | ||
111 | } | ||
112 | |||
113 | static inline int ioapic_in_kernel(struct kvm *kvm) | 108 | static inline int ioapic_in_kernel(struct kvm *kvm) |
114 | { | 109 | { |
115 | int ret; | 110 | int mode = kvm->arch.irqchip_mode; |
116 | 111 | ||
117 | ret = (ioapic_irqchip(kvm) != NULL); | 112 | /* Matches smp_wmb() when setting irqchip_mode */ |
118 | return ret; | 113 | smp_rmb(); |
114 | return mode == KVM_IRQCHIP_KERNEL; | ||
119 | } | 115 | } |
120 | 116 | ||
121 | void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); | 117 | void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); |
@@ -132,8 +128,8 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id); | |||
132 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | 128 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, |
133 | struct kvm_lapic_irq *irq, | 129 | struct kvm_lapic_irq *irq, |
134 | struct dest_map *dest_map); | 130 | struct dest_map *dest_map); |
135 | int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); | 131 | void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); |
136 | int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); | 132 | void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); |
137 | void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, | 133 | void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, |
138 | ulong *ioapic_handled_vectors); | 134 | ulong *ioapic_handled_vectors); |
139 | void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, | 135 | void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, |
diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c deleted file mode 100644 index b181426f67b4..000000000000 --- a/arch/x86/kvm/iommu.c +++ /dev/null | |||
@@ -1,356 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006, Intel Corporation. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License along with | ||
14 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
15 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
16 | * | ||
17 | * Copyright (C) 2006-2008 Intel Corporation | ||
18 | * Copyright IBM Corporation, 2008 | ||
19 | * Copyright 2010 Red Hat, Inc. and/or its affiliates. | ||
20 | * | ||
21 | * Author: Allen M. Kay <allen.m.kay@intel.com> | ||
22 | * Author: Weidong Han <weidong.han@intel.com> | ||
23 | * Author: Ben-Ami Yassour <benami@il.ibm.com> | ||
24 | */ | ||
25 | |||
26 | #include <linux/list.h> | ||
27 | #include <linux/kvm_host.h> | ||
28 | #include <linux/moduleparam.h> | ||
29 | #include <linux/pci.h> | ||
30 | #include <linux/stat.h> | ||
31 | #include <linux/iommu.h> | ||
32 | #include "assigned-dev.h" | ||
33 | |||
34 | static bool allow_unsafe_assigned_interrupts; | ||
35 | module_param_named(allow_unsafe_assigned_interrupts, | ||
36 | allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR); | ||
37 | MODULE_PARM_DESC(allow_unsafe_assigned_interrupts, | ||
38 | "Enable device assignment on platforms without interrupt remapping support."); | ||
39 | |||
40 | static int kvm_iommu_unmap_memslots(struct kvm *kvm); | ||
41 | static void kvm_iommu_put_pages(struct kvm *kvm, | ||
42 | gfn_t base_gfn, unsigned long npages); | ||
43 | |||
44 | static kvm_pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, | ||
45 | unsigned long npages) | ||
46 | { | ||
47 | gfn_t end_gfn; | ||
48 | kvm_pfn_t pfn; | ||
49 | |||
50 | pfn = gfn_to_pfn_memslot(slot, gfn); | ||
51 | end_gfn = gfn + npages; | ||
52 | gfn += 1; | ||
53 | |||
54 | if (is_error_noslot_pfn(pfn)) | ||
55 | return pfn; | ||
56 | |||
57 | while (gfn < end_gfn) | ||
58 | gfn_to_pfn_memslot(slot, gfn++); | ||
59 | |||
60 | return pfn; | ||
61 | } | ||
62 | |||
63 | static void kvm_unpin_pages(struct kvm *kvm, kvm_pfn_t pfn, | ||
64 | unsigned long npages) | ||
65 | { | ||
66 | unsigned long i; | ||
67 | |||
68 | for (i = 0; i < npages; ++i) | ||
69 | kvm_release_pfn_clean(pfn + i); | ||
70 | } | ||
71 | |||
72 | int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) | ||
73 | { | ||
74 | gfn_t gfn, end_gfn; | ||
75 | kvm_pfn_t pfn; | ||
76 | int r = 0; | ||
77 | struct iommu_domain *domain = kvm->arch.iommu_domain; | ||
78 | int flags; | ||
79 | |||
80 | /* check if iommu exists and in use */ | ||
81 | if (!domain) | ||
82 | return 0; | ||
83 | |||
84 | gfn = slot->base_gfn; | ||
85 | end_gfn = gfn + slot->npages; | ||
86 | |||
87 | flags = IOMMU_READ; | ||
88 | if (!(slot->flags & KVM_MEM_READONLY)) | ||
89 | flags |= IOMMU_WRITE; | ||
90 | if (!kvm->arch.iommu_noncoherent) | ||
91 | flags |= IOMMU_CACHE; | ||
92 | |||
93 | |||
94 | while (gfn < end_gfn) { | ||
95 | unsigned long page_size; | ||
96 | |||
97 | /* Check if already mapped */ | ||
98 | if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) { | ||
99 | gfn += 1; | ||
100 | continue; | ||
101 | } | ||
102 | |||
103 | /* Get the page size we could use to map */ | ||
104 | page_size = kvm_host_page_size(kvm, gfn); | ||
105 | |||
106 | /* Make sure the page_size does not exceed the memslot */ | ||
107 | while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn) | ||
108 | page_size >>= 1; | ||
109 | |||
110 | /* Make sure gfn is aligned to the page size we want to map */ | ||
111 | while ((gfn << PAGE_SHIFT) & (page_size - 1)) | ||
112 | page_size >>= 1; | ||
113 | |||
114 | /* Make sure hva is aligned to the page size we want to map */ | ||
115 | while (__gfn_to_hva_memslot(slot, gfn) & (page_size - 1)) | ||
116 | page_size >>= 1; | ||
117 | |||
118 | /* | ||
119 | * Pin all pages we are about to map in memory. This is | ||
120 | * important because we unmap and unpin in 4kb steps later. | ||
121 | */ | ||
122 | pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT); | ||
123 | if (is_error_noslot_pfn(pfn)) { | ||
124 | gfn += 1; | ||
125 | continue; | ||
126 | } | ||
127 | |||
128 | /* Map into IO address space */ | ||
129 | r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn), | ||
130 | page_size, flags); | ||
131 | if (r) { | ||
132 | printk(KERN_ERR "kvm_iommu_map_address:" | ||
133 | "iommu failed to map pfn=%llx\n", pfn); | ||
134 | kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT); | ||
135 | goto unmap_pages; | ||
136 | } | ||
137 | |||
138 | gfn += page_size >> PAGE_SHIFT; | ||
139 | |||
140 | cond_resched(); | ||
141 | } | ||
142 | |||
143 | return 0; | ||
144 | |||
145 | unmap_pages: | ||
146 | kvm_iommu_put_pages(kvm, slot->base_gfn, gfn - slot->base_gfn); | ||
147 | return r; | ||
148 | } | ||
149 | |||
150 | static int kvm_iommu_map_memslots(struct kvm *kvm) | ||
151 | { | ||
152 | int idx, r = 0; | ||
153 | struct kvm_memslots *slots; | ||
154 | struct kvm_memory_slot *memslot; | ||
155 | |||
156 | if (kvm->arch.iommu_noncoherent) | ||
157 | kvm_arch_register_noncoherent_dma(kvm); | ||
158 | |||
159 | idx = srcu_read_lock(&kvm->srcu); | ||
160 | slots = kvm_memslots(kvm); | ||
161 | |||
162 | kvm_for_each_memslot(memslot, slots) { | ||
163 | r = kvm_iommu_map_pages(kvm, memslot); | ||
164 | if (r) | ||
165 | break; | ||
166 | } | ||
167 | srcu_read_unlock(&kvm->srcu, idx); | ||
168 | |||
169 | return r; | ||
170 | } | ||
171 | |||
172 | int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev) | ||
173 | { | ||
174 | struct iommu_domain *domain = kvm->arch.iommu_domain; | ||
175 | int r; | ||
176 | bool noncoherent; | ||
177 | |||
178 | /* check if iommu exists and in use */ | ||
179 | if (!domain) | ||
180 | return 0; | ||
181 | |||
182 | if (pdev == NULL) | ||
183 | return -ENODEV; | ||
184 | |||
185 | r = iommu_attach_device(domain, &pdev->dev); | ||
186 | if (r) { | ||
187 | dev_err(&pdev->dev, "kvm assign device failed ret %d", r); | ||
188 | return r; | ||
189 | } | ||
190 | |||
191 | noncoherent = !iommu_capable(&pci_bus_type, IOMMU_CAP_CACHE_COHERENCY); | ||
192 | |||
193 | /* Check if need to update IOMMU page table for guest memory */ | ||
194 | if (noncoherent != kvm->arch.iommu_noncoherent) { | ||
195 | kvm_iommu_unmap_memslots(kvm); | ||
196 | kvm->arch.iommu_noncoherent = noncoherent; | ||
197 | r = kvm_iommu_map_memslots(kvm); | ||
198 | if (r) | ||
199 | goto out_unmap; | ||
200 | } | ||
201 | |||
202 | kvm_arch_start_assignment(kvm); | ||
203 | pci_set_dev_assigned(pdev); | ||
204 | |||
205 | dev_info(&pdev->dev, "kvm assign device\n"); | ||
206 | |||
207 | return 0; | ||
208 | out_unmap: | ||
209 | kvm_iommu_unmap_memslots(kvm); | ||
210 | return r; | ||
211 | } | ||
212 | |||
213 | int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev) | ||
214 | { | ||
215 | struct iommu_domain *domain = kvm->arch.iommu_domain; | ||
216 | |||
217 | /* check if iommu exists and in use */ | ||
218 | if (!domain) | ||
219 | return 0; | ||
220 | |||
221 | if (pdev == NULL) | ||
222 | return -ENODEV; | ||
223 | |||
224 | iommu_detach_device(domain, &pdev->dev); | ||
225 | |||
226 | pci_clear_dev_assigned(pdev); | ||
227 | kvm_arch_end_assignment(kvm); | ||
228 | |||
229 | dev_info(&pdev->dev, "kvm deassign device\n"); | ||
230 | |||
231 | return 0; | ||
232 | } | ||
233 | |||
234 | int kvm_iommu_map_guest(struct kvm *kvm) | ||
235 | { | ||
236 | int r; | ||
237 | |||
238 | if (!iommu_present(&pci_bus_type)) { | ||
239 | printk(KERN_ERR "%s: iommu not found\n", __func__); | ||
240 | return -ENODEV; | ||
241 | } | ||
242 | |||
243 | mutex_lock(&kvm->slots_lock); | ||
244 | |||
245 | kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type); | ||
246 | if (!kvm->arch.iommu_domain) { | ||
247 | r = -ENOMEM; | ||
248 | goto out_unlock; | ||
249 | } | ||
250 | |||
251 | if (!allow_unsafe_assigned_interrupts && | ||
252 | !iommu_capable(&pci_bus_type, IOMMU_CAP_INTR_REMAP)) { | ||
253 | printk(KERN_WARNING "%s: No interrupt remapping support," | ||
254 | " disallowing device assignment." | ||
255 | " Re-enable with \"allow_unsafe_assigned_interrupts=1\"" | ||
256 | " module option.\n", __func__); | ||
257 | iommu_domain_free(kvm->arch.iommu_domain); | ||
258 | kvm->arch.iommu_domain = NULL; | ||
259 | r = -EPERM; | ||
260 | goto out_unlock; | ||
261 | } | ||
262 | |||
263 | r = kvm_iommu_map_memslots(kvm); | ||
264 | if (r) | ||
265 | kvm_iommu_unmap_memslots(kvm); | ||
266 | |||
267 | out_unlock: | ||
268 | mutex_unlock(&kvm->slots_lock); | ||
269 | return r; | ||
270 | } | ||
271 | |||
272 | static void kvm_iommu_put_pages(struct kvm *kvm, | ||
273 | gfn_t base_gfn, unsigned long npages) | ||
274 | { | ||
275 | struct iommu_domain *domain; | ||
276 | gfn_t end_gfn, gfn; | ||
277 | kvm_pfn_t pfn; | ||
278 | u64 phys; | ||
279 | |||
280 | domain = kvm->arch.iommu_domain; | ||
281 | end_gfn = base_gfn + npages; | ||
282 | gfn = base_gfn; | ||
283 | |||
284 | /* check if iommu exists and in use */ | ||
285 | if (!domain) | ||
286 | return; | ||
287 | |||
288 | while (gfn < end_gfn) { | ||
289 | unsigned long unmap_pages; | ||
290 | size_t size; | ||
291 | |||
292 | /* Get physical address */ | ||
293 | phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn)); | ||
294 | |||
295 | if (!phys) { | ||
296 | gfn++; | ||
297 | continue; | ||
298 | } | ||
299 | |||
300 | pfn = phys >> PAGE_SHIFT; | ||
301 | |||
302 | /* Unmap address from IO address space */ | ||
303 | size = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE); | ||
304 | unmap_pages = 1ULL << get_order(size); | ||
305 | |||
306 | /* Unpin all pages we just unmapped to not leak any memory */ | ||
307 | kvm_unpin_pages(kvm, pfn, unmap_pages); | ||
308 | |||
309 | gfn += unmap_pages; | ||
310 | |||
311 | cond_resched(); | ||
312 | } | ||
313 | } | ||
314 | |||
315 | void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot) | ||
316 | { | ||
317 | kvm_iommu_put_pages(kvm, slot->base_gfn, slot->npages); | ||
318 | } | ||
319 | |||
320 | static int kvm_iommu_unmap_memslots(struct kvm *kvm) | ||
321 | { | ||
322 | int idx; | ||
323 | struct kvm_memslots *slots; | ||
324 | struct kvm_memory_slot *memslot; | ||
325 | |||
326 | idx = srcu_read_lock(&kvm->srcu); | ||
327 | slots = kvm_memslots(kvm); | ||
328 | |||
329 | kvm_for_each_memslot(memslot, slots) | ||
330 | kvm_iommu_unmap_pages(kvm, memslot); | ||
331 | |||
332 | srcu_read_unlock(&kvm->srcu, idx); | ||
333 | |||
334 | if (kvm->arch.iommu_noncoherent) | ||
335 | kvm_arch_unregister_noncoherent_dma(kvm); | ||
336 | |||
337 | return 0; | ||
338 | } | ||
339 | |||
340 | int kvm_iommu_unmap_guest(struct kvm *kvm) | ||
341 | { | ||
342 | struct iommu_domain *domain = kvm->arch.iommu_domain; | ||
343 | |||
344 | /* check if iommu exists and in use */ | ||
345 | if (!domain) | ||
346 | return 0; | ||
347 | |||
348 | mutex_lock(&kvm->slots_lock); | ||
349 | kvm_iommu_unmap_memslots(kvm); | ||
350 | kvm->arch.iommu_domain = NULL; | ||
351 | kvm->arch.iommu_noncoherent = false; | ||
352 | mutex_unlock(&kvm->slots_lock); | ||
353 | |||
354 | iommu_domain_free(domain); | ||
355 | return 0; | ||
356 | } | ||
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 60d91c9d160c..5c24811e8b0b 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c | |||
@@ -60,7 +60,7 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v) | |||
60 | if (irqchip_split(v->kvm)) | 60 | if (irqchip_split(v->kvm)) |
61 | return pending_userspace_extint(v); | 61 | return pending_userspace_extint(v); |
62 | else | 62 | else |
63 | return pic_irqchip(v->kvm)->output; | 63 | return v->kvm->arch.vpic->output; |
64 | } else | 64 | } else |
65 | return 0; | 65 | return 0; |
66 | } | 66 | } |
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 40d5b2cf6061..0edd22c3344c 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -78,40 +78,42 @@ void kvm_pic_destroy(struct kvm *kvm); | |||
78 | int kvm_pic_read_irq(struct kvm *kvm); | 78 | int kvm_pic_read_irq(struct kvm *kvm); |
79 | void kvm_pic_update_irq(struct kvm_pic *s); | 79 | void kvm_pic_update_irq(struct kvm_pic *s); |
80 | 80 | ||
81 | static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) | ||
82 | { | ||
83 | return kvm->arch.vpic; | ||
84 | } | ||
85 | |||
86 | static inline int pic_in_kernel(struct kvm *kvm) | 81 | static inline int pic_in_kernel(struct kvm *kvm) |
87 | { | 82 | { |
88 | int ret; | 83 | int mode = kvm->arch.irqchip_mode; |
89 | 84 | ||
90 | ret = (pic_irqchip(kvm) != NULL); | 85 | /* Matches smp_wmb() when setting irqchip_mode */ |
91 | return ret; | 86 | smp_rmb(); |
87 | return mode == KVM_IRQCHIP_KERNEL; | ||
92 | } | 88 | } |
93 | 89 | ||
94 | static inline int irqchip_split(struct kvm *kvm) | 90 | static inline int irqchip_split(struct kvm *kvm) |
95 | { | 91 | { |
96 | return kvm->arch.irqchip_mode == KVM_IRQCHIP_SPLIT; | 92 | int mode = kvm->arch.irqchip_mode; |
93 | |||
94 | /* Matches smp_wmb() when setting irqchip_mode */ | ||
95 | smp_rmb(); | ||
96 | return mode == KVM_IRQCHIP_SPLIT; | ||
97 | } | 97 | } |
98 | 98 | ||
99 | static inline int irqchip_kernel(struct kvm *kvm) | 99 | static inline int irqchip_kernel(struct kvm *kvm) |
100 | { | 100 | { |
101 | return kvm->arch.irqchip_mode == KVM_IRQCHIP_KERNEL; | 101 | int mode = kvm->arch.irqchip_mode; |
102 | |||
103 | /* Matches smp_wmb() when setting irqchip_mode */ | ||
104 | smp_rmb(); | ||
105 | return mode == KVM_IRQCHIP_KERNEL; | ||
102 | } | 106 | } |
103 | 107 | ||
104 | static inline int irqchip_in_kernel(struct kvm *kvm) | 108 | static inline int irqchip_in_kernel(struct kvm *kvm) |
105 | { | 109 | { |
106 | bool ret = kvm->arch.irqchip_mode != KVM_IRQCHIP_NONE; | 110 | int mode = kvm->arch.irqchip_mode; |
107 | 111 | ||
108 | /* Matches with wmb after initializing kvm->irq_routing. */ | 112 | /* Matches smp_wmb() when setting irqchip_mode */ |
109 | smp_rmb(); | 113 | smp_rmb(); |
110 | return ret; | 114 | return mode > KVM_IRQCHIP_INIT_IN_PROGRESS; |
111 | } | 115 | } |
112 | 116 | ||
113 | void kvm_pic_reset(struct kvm_kpic_state *s); | ||
114 | |||
115 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); | 117 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); |
116 | void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); | 118 | void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); |
117 | void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); | 119 | void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 6825cd36d13b..4517a4c2ac3a 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c | |||
@@ -42,7 +42,7 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, | |||
42 | struct kvm *kvm, int irq_source_id, int level, | 42 | struct kvm *kvm, int irq_source_id, int level, |
43 | bool line_status) | 43 | bool line_status) |
44 | { | 44 | { |
45 | struct kvm_pic *pic = pic_irqchip(kvm); | 45 | struct kvm_pic *pic = kvm->arch.vpic; |
46 | return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level); | 46 | return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level); |
47 | } | 47 | } |
48 | 48 | ||
@@ -232,11 +232,11 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) | |||
232 | goto unlock; | 232 | goto unlock; |
233 | } | 233 | } |
234 | clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); | 234 | clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); |
235 | if (!ioapic_in_kernel(kvm)) | 235 | if (!irqchip_kernel(kvm)) |
236 | goto unlock; | 236 | goto unlock; |
237 | 237 | ||
238 | kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id); | 238 | kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id); |
239 | kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id); | 239 | kvm_pic_clear_all(kvm->arch.vpic, irq_source_id); |
240 | unlock: | 240 | unlock: |
241 | mutex_unlock(&kvm->irq_lock); | 241 | mutex_unlock(&kvm->irq_lock); |
242 | } | 242 | } |
@@ -278,38 +278,35 @@ int kvm_set_routing_entry(struct kvm *kvm, | |||
278 | struct kvm_kernel_irq_routing_entry *e, | 278 | struct kvm_kernel_irq_routing_entry *e, |
279 | const struct kvm_irq_routing_entry *ue) | 279 | const struct kvm_irq_routing_entry *ue) |
280 | { | 280 | { |
281 | int r = -EINVAL; | 281 | /* also allow creation of routes during KVM_IRQCHIP_INIT_IN_PROGRESS */ |
282 | int delta; | 282 | if (kvm->arch.irqchip_mode == KVM_IRQCHIP_NONE) |
283 | unsigned max_pin; | 283 | return -EINVAL; |
284 | 284 | ||
285 | /* Matches smp_wmb() when setting irqchip_mode */ | ||
286 | smp_rmb(); | ||
285 | switch (ue->type) { | 287 | switch (ue->type) { |
286 | case KVM_IRQ_ROUTING_IRQCHIP: | 288 | case KVM_IRQ_ROUTING_IRQCHIP: |
287 | delta = 0; | 289 | if (irqchip_split(kvm)) |
290 | return -EINVAL; | ||
291 | e->irqchip.pin = ue->u.irqchip.pin; | ||
288 | switch (ue->u.irqchip.irqchip) { | 292 | switch (ue->u.irqchip.irqchip) { |
289 | case KVM_IRQCHIP_PIC_SLAVE: | 293 | case KVM_IRQCHIP_PIC_SLAVE: |
290 | delta = 8; | 294 | e->irqchip.pin += PIC_NUM_PINS / 2; |
291 | /* fall through */ | 295 | /* fall through */ |
292 | case KVM_IRQCHIP_PIC_MASTER: | 296 | case KVM_IRQCHIP_PIC_MASTER: |
293 | if (!pic_in_kernel(kvm)) | 297 | if (ue->u.irqchip.pin >= PIC_NUM_PINS / 2) |
294 | goto out; | 298 | return -EINVAL; |
295 | |||
296 | e->set = kvm_set_pic_irq; | 299 | e->set = kvm_set_pic_irq; |
297 | max_pin = PIC_NUM_PINS; | ||
298 | break; | 300 | break; |
299 | case KVM_IRQCHIP_IOAPIC: | 301 | case KVM_IRQCHIP_IOAPIC: |
300 | if (!ioapic_in_kernel(kvm)) | 302 | if (ue->u.irqchip.pin >= KVM_IOAPIC_NUM_PINS) |
301 | goto out; | 303 | return -EINVAL; |
302 | |||
303 | max_pin = KVM_IOAPIC_NUM_PINS; | ||
304 | e->set = kvm_set_ioapic_irq; | 304 | e->set = kvm_set_ioapic_irq; |
305 | break; | 305 | break; |
306 | default: | 306 | default: |
307 | goto out; | 307 | return -EINVAL; |
308 | } | 308 | } |
309 | e->irqchip.irqchip = ue->u.irqchip.irqchip; | 309 | e->irqchip.irqchip = ue->u.irqchip.irqchip; |
310 | e->irqchip.pin = ue->u.irqchip.pin + delta; | ||
311 | if (e->irqchip.pin >= max_pin) | ||
312 | goto out; | ||
313 | break; | 310 | break; |
314 | case KVM_IRQ_ROUTING_MSI: | 311 | case KVM_IRQ_ROUTING_MSI: |
315 | e->set = kvm_set_msi; | 312 | e->set = kvm_set_msi; |
@@ -318,7 +315,7 @@ int kvm_set_routing_entry(struct kvm *kvm, | |||
318 | e->msi.data = ue->u.msi.data; | 315 | e->msi.data = ue->u.msi.data; |
319 | 316 | ||
320 | if (kvm_msi_route_invalid(kvm, e)) | 317 | if (kvm_msi_route_invalid(kvm, e)) |
321 | goto out; | 318 | return -EINVAL; |
322 | break; | 319 | break; |
323 | case KVM_IRQ_ROUTING_HV_SINT: | 320 | case KVM_IRQ_ROUTING_HV_SINT: |
324 | e->set = kvm_hv_set_sint; | 321 | e->set = kvm_hv_set_sint; |
@@ -326,12 +323,10 @@ int kvm_set_routing_entry(struct kvm *kvm, | |||
326 | e->hv_sint.sint = ue->u.hv_sint.sint; | 323 | e->hv_sint.sint = ue->u.hv_sint.sint; |
327 | break; | 324 | break; |
328 | default: | 325 | default: |
329 | goto out; | 326 | return -EINVAL; |
330 | } | 327 | } |
331 | 328 | ||
332 | r = 0; | 329 | return 0; |
333 | out: | ||
334 | return r; | ||
335 | } | 330 | } |
336 | 331 | ||
337 | bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, | 332 | bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ac7810513d0e..558676538fca 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -4340,7 +4340,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) | |||
4340 | } | 4340 | } |
4341 | EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); | 4341 | EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); |
4342 | 4342 | ||
4343 | void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly) | 4343 | void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, |
4344 | bool accessed_dirty) | ||
4344 | { | 4345 | { |
4345 | struct kvm_mmu *context = &vcpu->arch.mmu; | 4346 | struct kvm_mmu *context = &vcpu->arch.mmu; |
4346 | 4347 | ||
@@ -4349,6 +4350,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly) | |||
4349 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); | 4350 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); |
4350 | 4351 | ||
4351 | context->nx = true; | 4352 | context->nx = true; |
4353 | context->ept_ad = accessed_dirty; | ||
4352 | context->page_fault = ept_page_fault; | 4354 | context->page_fault = ept_page_fault; |
4353 | context->gva_to_gpa = ept_gva_to_gpa; | 4355 | context->gva_to_gpa = ept_gva_to_gpa; |
4354 | context->sync_page = ept_sync_page; | 4356 | context->sync_page = ept_sync_page; |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index ddc56e91f2e4..d8ccb32f7308 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -74,7 +74,8 @@ enum { | |||
74 | 74 | ||
75 | int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct); | 75 | int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct); |
76 | void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); | 76 | void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); |
77 | void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly); | 77 | void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, |
78 | bool accessed_dirty); | ||
78 | 79 | ||
79 | static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) | 80 | static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) |
80 | { | 81 | { |
diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c index 37942e419c32..60168cdd0546 100644 --- a/arch/x86/kvm/page_track.c +++ b/arch/x86/kvm/page_track.c | |||
@@ -160,6 +160,14 @@ bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
160 | return !!ACCESS_ONCE(slot->arch.gfn_track[mode][index]); | 160 | return !!ACCESS_ONCE(slot->arch.gfn_track[mode][index]); |
161 | } | 161 | } |
162 | 162 | ||
163 | void kvm_page_track_cleanup(struct kvm *kvm) | ||
164 | { | ||
165 | struct kvm_page_track_notifier_head *head; | ||
166 | |||
167 | head = &kvm->arch.track_notifier_head; | ||
168 | cleanup_srcu_struct(&head->track_srcu); | ||
169 | } | ||
170 | |||
163 | void kvm_page_track_init(struct kvm *kvm) | 171 | void kvm_page_track_init(struct kvm *kvm) |
164 | { | 172 | { |
165 | struct kvm_page_track_notifier_head *head; | 173 | struct kvm_page_track_notifier_head *head; |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index a01105485315..314d2071b337 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -23,13 +23,6 @@ | |||
23 | * so the code in this file is compiled twice, once per pte size. | 23 | * so the code in this file is compiled twice, once per pte size. |
24 | */ | 24 | */ |
25 | 25 | ||
26 | /* | ||
27 | * This is used to catch non optimized PT_GUEST_(DIRTY|ACCESS)_SHIFT macro | ||
28 | * uses for EPT without A/D paging type. | ||
29 | */ | ||
30 | extern u64 __pure __using_nonexistent_pte_bit(void) | ||
31 | __compiletime_error("wrong use of PT_GUEST_(DIRTY|ACCESS)_SHIFT"); | ||
32 | |||
33 | #if PTTYPE == 64 | 26 | #if PTTYPE == 64 |
34 | #define pt_element_t u64 | 27 | #define pt_element_t u64 |
35 | #define guest_walker guest_walker64 | 28 | #define guest_walker guest_walker64 |
@@ -39,10 +32,9 @@ extern u64 __pure __using_nonexistent_pte_bit(void) | |||
39 | #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) | 32 | #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) |
40 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) | 33 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) |
41 | #define PT_LEVEL_BITS PT64_LEVEL_BITS | 34 | #define PT_LEVEL_BITS PT64_LEVEL_BITS |
42 | #define PT_GUEST_ACCESSED_MASK PT_ACCESSED_MASK | ||
43 | #define PT_GUEST_DIRTY_MASK PT_DIRTY_MASK | ||
44 | #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT | 35 | #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT |
45 | #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT | 36 | #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT |
37 | #define PT_HAVE_ACCESSED_DIRTY(mmu) true | ||
46 | #ifdef CONFIG_X86_64 | 38 | #ifdef CONFIG_X86_64 |
47 | #define PT_MAX_FULL_LEVELS 4 | 39 | #define PT_MAX_FULL_LEVELS 4 |
48 | #define CMPXCHG cmpxchg | 40 | #define CMPXCHG cmpxchg |
@@ -60,10 +52,9 @@ extern u64 __pure __using_nonexistent_pte_bit(void) | |||
60 | #define PT_INDEX(addr, level) PT32_INDEX(addr, level) | 52 | #define PT_INDEX(addr, level) PT32_INDEX(addr, level) |
61 | #define PT_LEVEL_BITS PT32_LEVEL_BITS | 53 | #define PT_LEVEL_BITS PT32_LEVEL_BITS |
62 | #define PT_MAX_FULL_LEVELS 2 | 54 | #define PT_MAX_FULL_LEVELS 2 |
63 | #define PT_GUEST_ACCESSED_MASK PT_ACCESSED_MASK | ||
64 | #define PT_GUEST_DIRTY_MASK PT_DIRTY_MASK | ||
65 | #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT | 55 | #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT |
66 | #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT | 56 | #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT |
57 | #define PT_HAVE_ACCESSED_DIRTY(mmu) true | ||
67 | #define CMPXCHG cmpxchg | 58 | #define CMPXCHG cmpxchg |
68 | #elif PTTYPE == PTTYPE_EPT | 59 | #elif PTTYPE == PTTYPE_EPT |
69 | #define pt_element_t u64 | 60 | #define pt_element_t u64 |
@@ -74,16 +65,18 @@ extern u64 __pure __using_nonexistent_pte_bit(void) | |||
74 | #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) | 65 | #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) |
75 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) | 66 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) |
76 | #define PT_LEVEL_BITS PT64_LEVEL_BITS | 67 | #define PT_LEVEL_BITS PT64_LEVEL_BITS |
77 | #define PT_GUEST_ACCESSED_MASK 0 | 68 | #define PT_GUEST_DIRTY_SHIFT 9 |
78 | #define PT_GUEST_DIRTY_MASK 0 | 69 | #define PT_GUEST_ACCESSED_SHIFT 8 |
79 | #define PT_GUEST_DIRTY_SHIFT __using_nonexistent_pte_bit() | 70 | #define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad) |
80 | #define PT_GUEST_ACCESSED_SHIFT __using_nonexistent_pte_bit() | ||
81 | #define CMPXCHG cmpxchg64 | 71 | #define CMPXCHG cmpxchg64 |
82 | #define PT_MAX_FULL_LEVELS 4 | 72 | #define PT_MAX_FULL_LEVELS 4 |
83 | #else | 73 | #else |
84 | #error Invalid PTTYPE value | 74 | #error Invalid PTTYPE value |
85 | #endif | 75 | #endif |
86 | 76 | ||
77 | #define PT_GUEST_DIRTY_MASK (1 << PT_GUEST_DIRTY_SHIFT) | ||
78 | #define PT_GUEST_ACCESSED_MASK (1 << PT_GUEST_ACCESSED_SHIFT) | ||
79 | |||
87 | #define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl) | 80 | #define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl) |
88 | #define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PT_PAGE_TABLE_LEVEL) | 81 | #define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PT_PAGE_TABLE_LEVEL) |
89 | 82 | ||
@@ -111,12 +104,13 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) | |||
111 | return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT; | 104 | return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT; |
112 | } | 105 | } |
113 | 106 | ||
114 | static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte) | 107 | static inline void FNAME(protect_clean_gpte)(struct kvm_mmu *mmu, unsigned *access, |
108 | unsigned gpte) | ||
115 | { | 109 | { |
116 | unsigned mask; | 110 | unsigned mask; |
117 | 111 | ||
118 | /* dirty bit is not supported, so no need to track it */ | 112 | /* dirty bit is not supported, so no need to track it */ |
119 | if (!PT_GUEST_DIRTY_MASK) | 113 | if (!PT_HAVE_ACCESSED_DIRTY(mmu)) |
120 | return; | 114 | return; |
121 | 115 | ||
122 | BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK); | 116 | BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK); |
@@ -171,7 +165,7 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, | |||
171 | goto no_present; | 165 | goto no_present; |
172 | 166 | ||
173 | /* if accessed bit is not supported prefetch non accessed gpte */ | 167 | /* if accessed bit is not supported prefetch non accessed gpte */ |
174 | if (PT_GUEST_ACCESSED_MASK && !(gpte & PT_GUEST_ACCESSED_MASK)) | 168 | if (PT_HAVE_ACCESSED_DIRTY(&vcpu->arch.mmu) && !(gpte & PT_GUEST_ACCESSED_MASK)) |
175 | goto no_present; | 169 | goto no_present; |
176 | 170 | ||
177 | return false; | 171 | return false; |
@@ -217,7 +211,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, | |||
217 | int ret; | 211 | int ret; |
218 | 212 | ||
219 | /* dirty/accessed bits are not supported, so no need to update them */ | 213 | /* dirty/accessed bits are not supported, so no need to update them */ |
220 | if (!PT_GUEST_DIRTY_MASK) | 214 | if (!PT_HAVE_ACCESSED_DIRTY(mmu)) |
221 | return 0; | 215 | return 0; |
222 | 216 | ||
223 | for (level = walker->max_level; level >= walker->level; --level) { | 217 | for (level = walker->max_level; level >= walker->level; --level) { |
@@ -286,7 +280,9 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, | |||
286 | pt_element_t __user *uninitialized_var(ptep_user); | 280 | pt_element_t __user *uninitialized_var(ptep_user); |
287 | gfn_t table_gfn; | 281 | gfn_t table_gfn; |
288 | unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey; | 282 | unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey; |
283 | unsigned nested_access; | ||
289 | gpa_t pte_gpa; | 284 | gpa_t pte_gpa; |
285 | bool have_ad; | ||
290 | int offset; | 286 | int offset; |
291 | const int write_fault = access & PFERR_WRITE_MASK; | 287 | const int write_fault = access & PFERR_WRITE_MASK; |
292 | const int user_fault = access & PFERR_USER_MASK; | 288 | const int user_fault = access & PFERR_USER_MASK; |
@@ -299,6 +295,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, | |||
299 | retry_walk: | 295 | retry_walk: |
300 | walker->level = mmu->root_level; | 296 | walker->level = mmu->root_level; |
301 | pte = mmu->get_cr3(vcpu); | 297 | pte = mmu->get_cr3(vcpu); |
298 | have_ad = PT_HAVE_ACCESSED_DIRTY(mmu); | ||
302 | 299 | ||
303 | #if PTTYPE == 64 | 300 | #if PTTYPE == 64 |
304 | if (walker->level == PT32E_ROOT_LEVEL) { | 301 | if (walker->level == PT32E_ROOT_LEVEL) { |
@@ -312,7 +309,15 @@ retry_walk: | |||
312 | walker->max_level = walker->level; | 309 | walker->max_level = walker->level; |
313 | ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu))); | 310 | ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu))); |
314 | 311 | ||
315 | accessed_dirty = PT_GUEST_ACCESSED_MASK; | 312 | accessed_dirty = have_ad ? PT_GUEST_ACCESSED_MASK : 0; |
313 | |||
314 | /* | ||
315 | * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging | ||
316 | * by the MOV to CR instruction are treated as reads and do not cause the | ||
317 | * processor to set the dirty flag in any EPT paging-structure entry. | ||
318 | */ | ||
319 | nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK; | ||
320 | |||
316 | pt_access = pte_access = ACC_ALL; | 321 | pt_access = pte_access = ACC_ALL; |
317 | ++walker->level; | 322 | ++walker->level; |
318 | 323 | ||
@@ -332,7 +337,7 @@ retry_walk: | |||
332 | walker->pte_gpa[walker->level - 1] = pte_gpa; | 337 | walker->pte_gpa[walker->level - 1] = pte_gpa; |
333 | 338 | ||
334 | real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), | 339 | real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), |
335 | PFERR_USER_MASK|PFERR_WRITE_MASK, | 340 | nested_access, |
336 | &walker->fault); | 341 | &walker->fault); |
337 | 342 | ||
338 | /* | 343 | /* |
@@ -394,7 +399,7 @@ retry_walk: | |||
394 | walker->gfn = real_gpa >> PAGE_SHIFT; | 399 | walker->gfn = real_gpa >> PAGE_SHIFT; |
395 | 400 | ||
396 | if (!write_fault) | 401 | if (!write_fault) |
397 | FNAME(protect_clean_gpte)(&pte_access, pte); | 402 | FNAME(protect_clean_gpte)(mmu, &pte_access, pte); |
398 | else | 403 | else |
399 | /* | 404 | /* |
400 | * On a write fault, fold the dirty bit into accessed_dirty. | 405 | * On a write fault, fold the dirty bit into accessed_dirty. |
@@ -485,7 +490,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
485 | 490 | ||
486 | gfn = gpte_to_gfn(gpte); | 491 | gfn = gpte_to_gfn(gpte); |
487 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); | 492 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); |
488 | FNAME(protect_clean_gpte)(&pte_access, gpte); | 493 | FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte); |
489 | pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, | 494 | pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, |
490 | no_dirty_log && (pte_access & ACC_WRITE_MASK)); | 495 | no_dirty_log && (pte_access & ACC_WRITE_MASK)); |
491 | if (is_error_pfn(pfn)) | 496 | if (is_error_pfn(pfn)) |
@@ -979,7 +984,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
979 | gfn = gpte_to_gfn(gpte); | 984 | gfn = gpte_to_gfn(gpte); |
980 | pte_access = sp->role.access; | 985 | pte_access = sp->role.access; |
981 | pte_access &= FNAME(gpte_access)(vcpu, gpte); | 986 | pte_access &= FNAME(gpte_access)(vcpu, gpte); |
982 | FNAME(protect_clean_gpte)(&pte_access, gpte); | 987 | FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte); |
983 | 988 | ||
984 | if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access, | 989 | if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access, |
985 | &nr_present)) | 990 | &nr_present)) |
@@ -1025,3 +1030,4 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
1025 | #undef PT_GUEST_DIRTY_MASK | 1030 | #undef PT_GUEST_DIRTY_MASK |
1026 | #undef PT_GUEST_DIRTY_SHIFT | 1031 | #undef PT_GUEST_DIRTY_SHIFT |
1027 | #undef PT_GUEST_ACCESSED_SHIFT | 1032 | #undef PT_GUEST_ACCESSED_SHIFT |
1033 | #undef PT_HAVE_ACCESSED_DIRTY | ||
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d1efe2c62b3f..1b203abf76e1 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -1379,6 +1379,9 @@ static void avic_vm_destroy(struct kvm *kvm) | |||
1379 | unsigned long flags; | 1379 | unsigned long flags; |
1380 | struct kvm_arch *vm_data = &kvm->arch; | 1380 | struct kvm_arch *vm_data = &kvm->arch; |
1381 | 1381 | ||
1382 | if (!avic) | ||
1383 | return; | ||
1384 | |||
1382 | avic_free_vm_id(vm_data->avic_vm_id); | 1385 | avic_free_vm_id(vm_data->avic_vm_id); |
1383 | 1386 | ||
1384 | if (vm_data->avic_logical_id_table_page) | 1387 | if (vm_data->avic_logical_id_table_page) |
@@ -5253,6 +5256,12 @@ static inline void avic_post_state_restore(struct kvm_vcpu *vcpu) | |||
5253 | avic_handle_ldr_update(vcpu); | 5256 | avic_handle_ldr_update(vcpu); |
5254 | } | 5257 | } |
5255 | 5258 | ||
5259 | static void svm_setup_mce(struct kvm_vcpu *vcpu) | ||
5260 | { | ||
5261 | /* [63:9] are reserved. */ | ||
5262 | vcpu->arch.mcg_cap &= 0x1ff; | ||
5263 | } | ||
5264 | |||
5256 | static struct kvm_x86_ops svm_x86_ops __ro_after_init = { | 5265 | static struct kvm_x86_ops svm_x86_ops __ro_after_init = { |
5257 | .cpu_has_kvm_support = has_svm, | 5266 | .cpu_has_kvm_support = has_svm, |
5258 | .disabled_by_bios = is_disabled, | 5267 | .disabled_by_bios = is_disabled, |
@@ -5364,6 +5373,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { | |||
5364 | .pmu_ops = &amd_pmu_ops, | 5373 | .pmu_ops = &amd_pmu_ops, |
5365 | .deliver_posted_interrupt = svm_deliver_avic_intr, | 5374 | .deliver_posted_interrupt = svm_deliver_avic_intr, |
5366 | .update_pi_irte = svm_update_pi_irte, | 5375 | .update_pi_irte = svm_update_pi_irte, |
5376 | .setup_mce = svm_setup_mce, | ||
5367 | }; | 5377 | }; |
5368 | 5378 | ||
5369 | static int __init svm_init(void) | 5379 | static int __init svm_init(void) |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 98e82ee1e699..c1a12b94e1fd 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -84,9 +84,6 @@ module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO); | |||
84 | static bool __read_mostly emulate_invalid_guest_state = true; | 84 | static bool __read_mostly emulate_invalid_guest_state = true; |
85 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); | 85 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); |
86 | 86 | ||
87 | static bool __read_mostly vmm_exclusive = 1; | ||
88 | module_param(vmm_exclusive, bool, S_IRUGO); | ||
89 | |||
90 | static bool __read_mostly fasteoi = 1; | 87 | static bool __read_mostly fasteoi = 1; |
91 | module_param(fasteoi, bool, S_IRUGO); | 88 | module_param(fasteoi, bool, S_IRUGO); |
92 | 89 | ||
@@ -615,10 +612,6 @@ struct vcpu_vmx { | |||
615 | int vpid; | 612 | int vpid; |
616 | bool emulation_required; | 613 | bool emulation_required; |
617 | 614 | ||
618 | /* Support for vnmi-less CPUs */ | ||
619 | int soft_vnmi_blocked; | ||
620 | ktime_t entry_time; | ||
621 | s64 vnmi_blocked_time; | ||
622 | u32 exit_reason; | 615 | u32 exit_reason; |
623 | 616 | ||
624 | /* Posted interrupt descriptor */ | 617 | /* Posted interrupt descriptor */ |
@@ -914,8 +907,6 @@ static void nested_release_page_clean(struct page *page) | |||
914 | 907 | ||
915 | static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); | 908 | static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); |
916 | static u64 construct_eptp(unsigned long root_hpa); | 909 | static u64 construct_eptp(unsigned long root_hpa); |
917 | static void kvm_cpu_vmxon(u64 addr); | ||
918 | static void kvm_cpu_vmxoff(void); | ||
919 | static bool vmx_xsaves_supported(void); | 910 | static bool vmx_xsaves_supported(void); |
920 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); | 911 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); |
921 | static void vmx_set_segment(struct kvm_vcpu *vcpu, | 912 | static void vmx_set_segment(struct kvm_vcpu *vcpu, |
@@ -1239,6 +1230,11 @@ static inline bool cpu_has_vmx_invvpid_global(void) | |||
1239 | return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT; | 1230 | return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT; |
1240 | } | 1231 | } |
1241 | 1232 | ||
1233 | static inline bool cpu_has_vmx_invvpid(void) | ||
1234 | { | ||
1235 | return vmx_capability.vpid & VMX_VPID_INVVPID_BIT; | ||
1236 | } | ||
1237 | |||
1242 | static inline bool cpu_has_vmx_ept(void) | 1238 | static inline bool cpu_has_vmx_ept(void) |
1243 | { | 1239 | { |
1244 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 1240 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
@@ -1285,11 +1281,6 @@ static inline bool cpu_has_vmx_invpcid(void) | |||
1285 | SECONDARY_EXEC_ENABLE_INVPCID; | 1281 | SECONDARY_EXEC_ENABLE_INVPCID; |
1286 | } | 1282 | } |
1287 | 1283 | ||
1288 | static inline bool cpu_has_virtual_nmis(void) | ||
1289 | { | ||
1290 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; | ||
1291 | } | ||
1292 | |||
1293 | static inline bool cpu_has_vmx_wbinvd_exit(void) | 1284 | static inline bool cpu_has_vmx_wbinvd_exit(void) |
1294 | { | 1285 | { |
1295 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 1286 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
@@ -2235,15 +2226,10 @@ static void decache_tsc_multiplier(struct vcpu_vmx *vmx) | |||
2235 | static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 2226 | static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
2236 | { | 2227 | { |
2237 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2228 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2238 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); | ||
2239 | bool already_loaded = vmx->loaded_vmcs->cpu == cpu; | 2229 | bool already_loaded = vmx->loaded_vmcs->cpu == cpu; |
2240 | 2230 | ||
2241 | if (!vmm_exclusive) | ||
2242 | kvm_cpu_vmxon(phys_addr); | ||
2243 | else if (!already_loaded) | ||
2244 | loaded_vmcs_clear(vmx->loaded_vmcs); | ||
2245 | |||
2246 | if (!already_loaded) { | 2231 | if (!already_loaded) { |
2232 | loaded_vmcs_clear(vmx->loaded_vmcs); | ||
2247 | local_irq_disable(); | 2233 | local_irq_disable(); |
2248 | crash_disable_local_vmclear(cpu); | 2234 | crash_disable_local_vmclear(cpu); |
2249 | 2235 | ||
@@ -2321,11 +2307,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu) | |||
2321 | vmx_vcpu_pi_put(vcpu); | 2307 | vmx_vcpu_pi_put(vcpu); |
2322 | 2308 | ||
2323 | __vmx_load_host_state(to_vmx(vcpu)); | 2309 | __vmx_load_host_state(to_vmx(vcpu)); |
2324 | if (!vmm_exclusive) { | ||
2325 | __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs); | ||
2326 | vcpu->cpu = -1; | ||
2327 | kvm_cpu_vmxoff(); | ||
2328 | } | ||
2329 | } | 2310 | } |
2330 | 2311 | ||
2331 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); | 2312 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); |
@@ -2749,11 +2730,11 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) | |||
2749 | vmx->nested.nested_vmx_secondary_ctls_high); | 2730 | vmx->nested.nested_vmx_secondary_ctls_high); |
2750 | vmx->nested.nested_vmx_secondary_ctls_low = 0; | 2731 | vmx->nested.nested_vmx_secondary_ctls_low = 0; |
2751 | vmx->nested.nested_vmx_secondary_ctls_high &= | 2732 | vmx->nested.nested_vmx_secondary_ctls_high &= |
2733 | SECONDARY_EXEC_RDRAND | SECONDARY_EXEC_RDSEED | | ||
2752 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 2734 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
2753 | SECONDARY_EXEC_RDTSCP | | 2735 | SECONDARY_EXEC_RDTSCP | |
2754 | SECONDARY_EXEC_DESC | | 2736 | SECONDARY_EXEC_DESC | |
2755 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | | 2737 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | |
2756 | SECONDARY_EXEC_ENABLE_VPID | | ||
2757 | SECONDARY_EXEC_APIC_REGISTER_VIRT | | 2738 | SECONDARY_EXEC_APIC_REGISTER_VIRT | |
2758 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | | 2739 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | |
2759 | SECONDARY_EXEC_WBINVD_EXITING | | 2740 | SECONDARY_EXEC_WBINVD_EXITING | |
@@ -2764,14 +2745,16 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) | |||
2764 | vmx->nested.nested_vmx_secondary_ctls_high |= | 2745 | vmx->nested.nested_vmx_secondary_ctls_high |= |
2765 | SECONDARY_EXEC_ENABLE_EPT; | 2746 | SECONDARY_EXEC_ENABLE_EPT; |
2766 | vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | | 2747 | vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | |
2767 | VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT | | 2748 | VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT; |
2768 | VMX_EPT_INVEPT_BIT; | ||
2769 | if (cpu_has_vmx_ept_execute_only()) | 2749 | if (cpu_has_vmx_ept_execute_only()) |
2770 | vmx->nested.nested_vmx_ept_caps |= | 2750 | vmx->nested.nested_vmx_ept_caps |= |
2771 | VMX_EPT_EXECUTE_ONLY_BIT; | 2751 | VMX_EPT_EXECUTE_ONLY_BIT; |
2772 | vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept; | 2752 | vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept; |
2773 | vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT | | 2753 | vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT | |
2774 | VMX_EPT_EXTENT_CONTEXT_BIT; | 2754 | VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT | |
2755 | VMX_EPT_1GB_PAGE_BIT; | ||
2756 | if (enable_ept_ad_bits) | ||
2757 | vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT; | ||
2775 | } else | 2758 | } else |
2776 | vmx->nested.nested_vmx_ept_caps = 0; | 2759 | vmx->nested.nested_vmx_ept_caps = 0; |
2777 | 2760 | ||
@@ -2781,10 +2764,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) | |||
2781 | * though it is treated as global context. The alternative is | 2764 | * though it is treated as global context. The alternative is |
2782 | * not failing the single-context invvpid, and it is worse. | 2765 | * not failing the single-context invvpid, and it is worse. |
2783 | */ | 2766 | */ |
2784 | if (enable_vpid) | 2767 | if (enable_vpid) { |
2768 | vmx->nested.nested_vmx_secondary_ctls_high |= | ||
2769 | SECONDARY_EXEC_ENABLE_VPID; | ||
2785 | vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT | | 2770 | vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT | |
2786 | VMX_VPID_EXTENT_SUPPORTED_MASK; | 2771 | VMX_VPID_EXTENT_SUPPORTED_MASK; |
2787 | else | 2772 | } else |
2788 | vmx->nested.nested_vmx_vpid_caps = 0; | 2773 | vmx->nested.nested_vmx_vpid_caps = 0; |
2789 | 2774 | ||
2790 | if (enable_unrestricted_guest) | 2775 | if (enable_unrestricted_guest) |
@@ -3416,6 +3401,7 @@ static __init int vmx_disabled_by_bios(void) | |||
3416 | 3401 | ||
3417 | static void kvm_cpu_vmxon(u64 addr) | 3402 | static void kvm_cpu_vmxon(u64 addr) |
3418 | { | 3403 | { |
3404 | cr4_set_bits(X86_CR4_VMXE); | ||
3419 | intel_pt_handle_vmx(1); | 3405 | intel_pt_handle_vmx(1); |
3420 | 3406 | ||
3421 | asm volatile (ASM_VMX_VMXON_RAX | 3407 | asm volatile (ASM_VMX_VMXON_RAX |
@@ -3458,12 +3444,8 @@ static int hardware_enable(void) | |||
3458 | /* enable and lock */ | 3444 | /* enable and lock */ |
3459 | wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); | 3445 | wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); |
3460 | } | 3446 | } |
3461 | cr4_set_bits(X86_CR4_VMXE); | 3447 | kvm_cpu_vmxon(phys_addr); |
3462 | 3448 | ept_sync_global(); | |
3463 | if (vmm_exclusive) { | ||
3464 | kvm_cpu_vmxon(phys_addr); | ||
3465 | ept_sync_global(); | ||
3466 | } | ||
3467 | 3449 | ||
3468 | native_store_gdt(this_cpu_ptr(&host_gdt)); | 3450 | native_store_gdt(this_cpu_ptr(&host_gdt)); |
3469 | 3451 | ||
@@ -3489,15 +3471,13 @@ static void kvm_cpu_vmxoff(void) | |||
3489 | asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); | 3471 | asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); |
3490 | 3472 | ||
3491 | intel_pt_handle_vmx(0); | 3473 | intel_pt_handle_vmx(0); |
3474 | cr4_clear_bits(X86_CR4_VMXE); | ||
3492 | } | 3475 | } |
3493 | 3476 | ||
3494 | static void hardware_disable(void) | 3477 | static void hardware_disable(void) |
3495 | { | 3478 | { |
3496 | if (vmm_exclusive) { | 3479 | vmclear_local_loaded_vmcss(); |
3497 | vmclear_local_loaded_vmcss(); | 3480 | kvm_cpu_vmxoff(); |
3498 | kvm_cpu_vmxoff(); | ||
3499 | } | ||
3500 | cr4_clear_bits(X86_CR4_VMXE); | ||
3501 | } | 3481 | } |
3502 | 3482 | ||
3503 | static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, | 3483 | static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, |
@@ -3617,9 +3597,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
3617 | &_vmexit_control) < 0) | 3597 | &_vmexit_control) < 0) |
3618 | return -EIO; | 3598 | return -EIO; |
3619 | 3599 | ||
3620 | min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; | 3600 | min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | |
3621 | opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR | | 3601 | PIN_BASED_VIRTUAL_NMIS; |
3622 | PIN_BASED_VMX_PREEMPTION_TIMER; | 3602 | opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER; |
3623 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, | 3603 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, |
3624 | &_pin_based_exec_control) < 0) | 3604 | &_pin_based_exec_control) < 0) |
3625 | return -EIO; | 3605 | return -EIO; |
@@ -4011,11 +3991,12 @@ static void exit_lmode(struct kvm_vcpu *vcpu) | |||
4011 | 3991 | ||
4012 | static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid) | 3992 | static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid) |
4013 | { | 3993 | { |
4014 | vpid_sync_context(vpid); | ||
4015 | if (enable_ept) { | 3994 | if (enable_ept) { |
4016 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 3995 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
4017 | return; | 3996 | return; |
4018 | ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); | 3997 | ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); |
3998 | } else { | ||
3999 | vpid_sync_context(vpid); | ||
4019 | } | 4000 | } |
4020 | } | 4001 | } |
4021 | 4002 | ||
@@ -4024,6 +4005,12 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu) | |||
4024 | __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid); | 4005 | __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid); |
4025 | } | 4006 | } |
4026 | 4007 | ||
4008 | static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu) | ||
4009 | { | ||
4010 | if (enable_ept) | ||
4011 | vmx_flush_tlb(vcpu); | ||
4012 | } | ||
4013 | |||
4027 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | 4014 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) |
4028 | { | 4015 | { |
4029 | ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; | 4016 | ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; |
@@ -5285,8 +5272,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | |||
5285 | 5272 | ||
5286 | vmx->rmode.vm86_active = 0; | 5273 | vmx->rmode.vm86_active = 0; |
5287 | 5274 | ||
5288 | vmx->soft_vnmi_blocked = 0; | ||
5289 | |||
5290 | vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); | 5275 | vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); |
5291 | kvm_set_cr8(vcpu, 0); | 5276 | kvm_set_cr8(vcpu, 0); |
5292 | 5277 | ||
@@ -5406,8 +5391,7 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) | |||
5406 | 5391 | ||
5407 | static void enable_nmi_window(struct kvm_vcpu *vcpu) | 5392 | static void enable_nmi_window(struct kvm_vcpu *vcpu) |
5408 | { | 5393 | { |
5409 | if (!cpu_has_virtual_nmis() || | 5394 | if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { |
5410 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { | ||
5411 | enable_irq_window(vcpu); | 5395 | enable_irq_window(vcpu); |
5412 | return; | 5396 | return; |
5413 | } | 5397 | } |
@@ -5448,19 +5432,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | |||
5448 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 5432 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
5449 | 5433 | ||
5450 | if (!is_guest_mode(vcpu)) { | 5434 | if (!is_guest_mode(vcpu)) { |
5451 | if (!cpu_has_virtual_nmis()) { | ||
5452 | /* | ||
5453 | * Tracking the NMI-blocked state in software is built upon | ||
5454 | * finding the next open IRQ window. This, in turn, depends on | ||
5455 | * well-behaving guests: They have to keep IRQs disabled at | ||
5456 | * least as long as the NMI handler runs. Otherwise we may | ||
5457 | * cause NMI nesting, maybe breaking the guest. But as this is | ||
5458 | * highly unlikely, we can live with the residual risk. | ||
5459 | */ | ||
5460 | vmx->soft_vnmi_blocked = 1; | ||
5461 | vmx->vnmi_blocked_time = 0; | ||
5462 | } | ||
5463 | |||
5464 | ++vcpu->stat.nmi_injections; | 5435 | ++vcpu->stat.nmi_injections; |
5465 | vmx->nmi_known_unmasked = false; | 5436 | vmx->nmi_known_unmasked = false; |
5466 | } | 5437 | } |
@@ -5477,8 +5448,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | |||
5477 | 5448 | ||
5478 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) | 5449 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) |
5479 | { | 5450 | { |
5480 | if (!cpu_has_virtual_nmis()) | ||
5481 | return to_vmx(vcpu)->soft_vnmi_blocked; | ||
5482 | if (to_vmx(vcpu)->nmi_known_unmasked) | 5451 | if (to_vmx(vcpu)->nmi_known_unmasked) |
5483 | return false; | 5452 | return false; |
5484 | return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; | 5453 | return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; |
@@ -5488,20 +5457,13 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | |||
5488 | { | 5457 | { |
5489 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 5458 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
5490 | 5459 | ||
5491 | if (!cpu_has_virtual_nmis()) { | 5460 | vmx->nmi_known_unmasked = !masked; |
5492 | if (vmx->soft_vnmi_blocked != masked) { | 5461 | if (masked) |
5493 | vmx->soft_vnmi_blocked = masked; | 5462 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, |
5494 | vmx->vnmi_blocked_time = 0; | 5463 | GUEST_INTR_STATE_NMI); |
5495 | } | 5464 | else |
5496 | } else { | 5465 | vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, |
5497 | vmx->nmi_known_unmasked = !masked; | 5466 | GUEST_INTR_STATE_NMI); |
5498 | if (masked) | ||
5499 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | ||
5500 | GUEST_INTR_STATE_NMI); | ||
5501 | else | ||
5502 | vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, | ||
5503 | GUEST_INTR_STATE_NMI); | ||
5504 | } | ||
5505 | } | 5467 | } |
5506 | 5468 | ||
5507 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | 5469 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) |
@@ -5509,9 +5471,6 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | |||
5509 | if (to_vmx(vcpu)->nested.nested_run_pending) | 5471 | if (to_vmx(vcpu)->nested.nested_run_pending) |
5510 | return 0; | 5472 | return 0; |
5511 | 5473 | ||
5512 | if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) | ||
5513 | return 0; | ||
5514 | |||
5515 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | 5474 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & |
5516 | (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI | 5475 | (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI |
5517 | | GUEST_INTR_STATE_NMI)); | 5476 | | GUEST_INTR_STATE_NMI)); |
@@ -6232,21 +6191,18 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) | |||
6232 | unsigned long exit_qualification; | 6191 | unsigned long exit_qualification; |
6233 | gpa_t gpa; | 6192 | gpa_t gpa; |
6234 | u32 error_code; | 6193 | u32 error_code; |
6235 | int gla_validity; | ||
6236 | 6194 | ||
6237 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 6195 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
6238 | 6196 | ||
6239 | gla_validity = (exit_qualification >> 7) & 0x3; | 6197 | if (is_guest_mode(vcpu) |
6240 | if (gla_validity == 0x2) { | 6198 | && !(exit_qualification & EPT_VIOLATION_GVA_TRANSLATED)) { |
6241 | printk(KERN_ERR "EPT: Handling EPT violation failed!\n"); | 6199 | /* |
6242 | printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", | 6200 | * Fix up exit_qualification according to whether guest |
6243 | (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), | 6201 | * page table accesses are reads or writes. |
6244 | vmcs_readl(GUEST_LINEAR_ADDRESS)); | 6202 | */ |
6245 | printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", | 6203 | u64 eptp = nested_ept_get_cr3(vcpu); |
6246 | (long unsigned int)exit_qualification); | 6204 | if (!(eptp & VMX_EPT_AD_ENABLE_BIT)) |
6247 | vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; | 6205 | exit_qualification &= ~EPT_VIOLATION_ACC_WRITE; |
6248 | vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION; | ||
6249 | return 0; | ||
6250 | } | 6206 | } |
6251 | 6207 | ||
6252 | /* | 6208 | /* |
@@ -6256,7 +6212,6 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) | |||
6256 | * AAK134, BY25. | 6212 | * AAK134, BY25. |
6257 | */ | 6213 | */ |
6258 | if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && | 6214 | if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && |
6259 | cpu_has_virtual_nmis() && | ||
6260 | (exit_qualification & INTR_INFO_UNBLOCK_NMI)) | 6215 | (exit_qualification & INTR_INFO_UNBLOCK_NMI)) |
6261 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); | 6216 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); |
6262 | 6217 | ||
@@ -6517,8 +6472,10 @@ static __init int hardware_setup(void) | |||
6517 | if (boot_cpu_has(X86_FEATURE_NX)) | 6472 | if (boot_cpu_has(X86_FEATURE_NX)) |
6518 | kvm_enable_efer_bits(EFER_NX); | 6473 | kvm_enable_efer_bits(EFER_NX); |
6519 | 6474 | ||
6520 | if (!cpu_has_vmx_vpid()) | 6475 | if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() || |
6476 | !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global())) | ||
6521 | enable_vpid = 0; | 6477 | enable_vpid = 0; |
6478 | |||
6522 | if (!cpu_has_vmx_shadow_vmcs()) | 6479 | if (!cpu_has_vmx_shadow_vmcs()) |
6523 | enable_shadow_vmcs = 0; | 6480 | enable_shadow_vmcs = 0; |
6524 | if (enable_shadow_vmcs) | 6481 | if (enable_shadow_vmcs) |
@@ -7805,7 +7762,6 @@ static int handle_pml_full(struct kvm_vcpu *vcpu) | |||
7805 | * "blocked by NMI" bit has to be set before next VM entry. | 7762 | * "blocked by NMI" bit has to be set before next VM entry. |
7806 | */ | 7763 | */ |
7807 | if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && | 7764 | if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && |
7808 | cpu_has_virtual_nmis() && | ||
7809 | (exit_qualification & INTR_INFO_UNBLOCK_NMI)) | 7765 | (exit_qualification & INTR_INFO_UNBLOCK_NMI)) |
7810 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | 7766 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, |
7811 | GUEST_INTR_STATE_NMI); | 7767 | GUEST_INTR_STATE_NMI); |
@@ -8107,6 +8063,10 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
8107 | return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); | 8063 | return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); |
8108 | case EXIT_REASON_RDPMC: | 8064 | case EXIT_REASON_RDPMC: |
8109 | return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING); | 8065 | return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING); |
8066 | case EXIT_REASON_RDRAND: | ||
8067 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND); | ||
8068 | case EXIT_REASON_RDSEED: | ||
8069 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED); | ||
8110 | case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP: | 8070 | case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP: |
8111 | return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING); | 8071 | return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING); |
8112 | case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR: | 8072 | case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR: |
@@ -8477,31 +8437,12 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
8477 | return 0; | 8437 | return 0; |
8478 | } | 8438 | } |
8479 | 8439 | ||
8480 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked && | ||
8481 | !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis( | ||
8482 | get_vmcs12(vcpu))))) { | ||
8483 | if (vmx_interrupt_allowed(vcpu)) { | ||
8484 | vmx->soft_vnmi_blocked = 0; | ||
8485 | } else if (vmx->vnmi_blocked_time > 1000000000LL && | ||
8486 | vcpu->arch.nmi_pending) { | ||
8487 | /* | ||
8488 | * This CPU don't support us in finding the end of an | ||
8489 | * NMI-blocked window if the guest runs with IRQs | ||
8490 | * disabled. So we pull the trigger after 1 s of | ||
8491 | * futile waiting, but inform the user about this. | ||
8492 | */ | ||
8493 | printk(KERN_WARNING "%s: Breaking out of NMI-blocked " | ||
8494 | "state on VCPU %d after 1 s timeout\n", | ||
8495 | __func__, vcpu->vcpu_id); | ||
8496 | vmx->soft_vnmi_blocked = 0; | ||
8497 | } | ||
8498 | } | ||
8499 | |||
8500 | if (exit_reason < kvm_vmx_max_exit_handlers | 8440 | if (exit_reason < kvm_vmx_max_exit_handlers |
8501 | && kvm_vmx_exit_handlers[exit_reason]) | 8441 | && kvm_vmx_exit_handlers[exit_reason]) |
8502 | return kvm_vmx_exit_handlers[exit_reason](vcpu); | 8442 | return kvm_vmx_exit_handlers[exit_reason](vcpu); |
8503 | else { | 8443 | else { |
8504 | WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason); | 8444 | vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n", |
8445 | exit_reason); | ||
8505 | kvm_queue_exception(vcpu, UD_VECTOR); | 8446 | kvm_queue_exception(vcpu, UD_VECTOR); |
8506 | return 1; | 8447 | return 1; |
8507 | } | 8448 | } |
@@ -8547,6 +8488,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) | |||
8547 | } else { | 8488 | } else { |
8548 | sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; | 8489 | sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; |
8549 | sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 8490 | sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
8491 | vmx_flush_tlb_ept_only(vcpu); | ||
8550 | } | 8492 | } |
8551 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); | 8493 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); |
8552 | 8494 | ||
@@ -8572,8 +8514,10 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) | |||
8572 | */ | 8514 | */ |
8573 | if (!is_guest_mode(vcpu) || | 8515 | if (!is_guest_mode(vcpu) || |
8574 | !nested_cpu_has2(get_vmcs12(&vmx->vcpu), | 8516 | !nested_cpu_has2(get_vmcs12(&vmx->vcpu), |
8575 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) | 8517 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { |
8576 | vmcs_write64(APIC_ACCESS_ADDR, hpa); | 8518 | vmcs_write64(APIC_ACCESS_ADDR, hpa); |
8519 | vmx_flush_tlb_ept_only(vcpu); | ||
8520 | } | ||
8577 | } | 8521 | } |
8578 | 8522 | ||
8579 | static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) | 8523 | static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) |
@@ -8768,37 +8712,33 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) | |||
8768 | 8712 | ||
8769 | idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; | 8713 | idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; |
8770 | 8714 | ||
8771 | if (cpu_has_virtual_nmis()) { | 8715 | if (vmx->nmi_known_unmasked) |
8772 | if (vmx->nmi_known_unmasked) | 8716 | return; |
8773 | return; | 8717 | /* |
8774 | /* | 8718 | * Can't use vmx->exit_intr_info since we're not sure what |
8775 | * Can't use vmx->exit_intr_info since we're not sure what | 8719 | * the exit reason is. |
8776 | * the exit reason is. | 8720 | */ |
8777 | */ | 8721 | exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); |
8778 | exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 8722 | unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; |
8779 | unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; | 8723 | vector = exit_intr_info & INTR_INFO_VECTOR_MASK; |
8780 | vector = exit_intr_info & INTR_INFO_VECTOR_MASK; | 8724 | /* |
8781 | /* | 8725 | * SDM 3: 27.7.1.2 (September 2008) |
8782 | * SDM 3: 27.7.1.2 (September 2008) | 8726 | * Re-set bit "block by NMI" before VM entry if vmexit caused by |
8783 | * Re-set bit "block by NMI" before VM entry if vmexit caused by | 8727 | * a guest IRET fault. |
8784 | * a guest IRET fault. | 8728 | * SDM 3: 23.2.2 (September 2008) |
8785 | * SDM 3: 23.2.2 (September 2008) | 8729 | * Bit 12 is undefined in any of the following cases: |
8786 | * Bit 12 is undefined in any of the following cases: | 8730 | * If the VM exit sets the valid bit in the IDT-vectoring |
8787 | * If the VM exit sets the valid bit in the IDT-vectoring | 8731 | * information field. |
8788 | * information field. | 8732 | * If the VM exit is due to a double fault. |
8789 | * If the VM exit is due to a double fault. | 8733 | */ |
8790 | */ | 8734 | if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && |
8791 | if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && | 8735 | vector != DF_VECTOR && !idtv_info_valid) |
8792 | vector != DF_VECTOR && !idtv_info_valid) | 8736 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, |
8793 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | 8737 | GUEST_INTR_STATE_NMI); |
8794 | GUEST_INTR_STATE_NMI); | 8738 | else |
8795 | else | 8739 | vmx->nmi_known_unmasked = |
8796 | vmx->nmi_known_unmasked = | 8740 | !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) |
8797 | !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) | 8741 | & GUEST_INTR_STATE_NMI); |
8798 | & GUEST_INTR_STATE_NMI); | ||
8799 | } else if (unlikely(vmx->soft_vnmi_blocked)) | ||
8800 | vmx->vnmi_blocked_time += | ||
8801 | ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); | ||
8802 | } | 8742 | } |
8803 | 8743 | ||
8804 | static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, | 8744 | static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, |
@@ -8915,10 +8855,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
8915 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 8855 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
8916 | unsigned long debugctlmsr, cr4; | 8856 | unsigned long debugctlmsr, cr4; |
8917 | 8857 | ||
8918 | /* Record the guest's net vcpu time for enforced NMI injections. */ | ||
8919 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) | ||
8920 | vmx->entry_time = ktime_get(); | ||
8921 | |||
8922 | /* Don't enter VMX if guest state is invalid, let the exit handler | 8858 | /* Don't enter VMX if guest state is invalid, let the exit handler |
8923 | start emulation until we arrive back to a valid state */ | 8859 | start emulation until we arrive back to a valid state */ |
8924 | if (vmx->emulation_required) | 8860 | if (vmx->emulation_required) |
@@ -9126,16 +9062,16 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
9126 | vmx_complete_interrupts(vmx); | 9062 | vmx_complete_interrupts(vmx); |
9127 | } | 9063 | } |
9128 | 9064 | ||
9129 | static void vmx_load_vmcs01(struct kvm_vcpu *vcpu) | 9065 | static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) |
9130 | { | 9066 | { |
9131 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 9067 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
9132 | int cpu; | 9068 | int cpu; |
9133 | 9069 | ||
9134 | if (vmx->loaded_vmcs == &vmx->vmcs01) | 9070 | if (vmx->loaded_vmcs == vmcs) |
9135 | return; | 9071 | return; |
9136 | 9072 | ||
9137 | cpu = get_cpu(); | 9073 | cpu = get_cpu(); |
9138 | vmx->loaded_vmcs = &vmx->vmcs01; | 9074 | vmx->loaded_vmcs = vmcs; |
9139 | vmx_vcpu_put(vcpu); | 9075 | vmx_vcpu_put(vcpu); |
9140 | vmx_vcpu_load(vcpu, cpu); | 9076 | vmx_vcpu_load(vcpu, cpu); |
9141 | vcpu->cpu = cpu; | 9077 | vcpu->cpu = cpu; |
@@ -9153,7 +9089,7 @@ static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu) | |||
9153 | 9089 | ||
9154 | r = vcpu_load(vcpu); | 9090 | r = vcpu_load(vcpu); |
9155 | BUG_ON(r); | 9091 | BUG_ON(r); |
9156 | vmx_load_vmcs01(vcpu); | 9092 | vmx_switch_vmcs(vcpu, &vmx->vmcs01); |
9157 | free_nested(vmx); | 9093 | free_nested(vmx); |
9158 | vcpu_put(vcpu); | 9094 | vcpu_put(vcpu); |
9159 | } | 9095 | } |
@@ -9214,11 +9150,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
9214 | vmx->loaded_vmcs->shadow_vmcs = NULL; | 9150 | vmx->loaded_vmcs->shadow_vmcs = NULL; |
9215 | if (!vmx->loaded_vmcs->vmcs) | 9151 | if (!vmx->loaded_vmcs->vmcs) |
9216 | goto free_msrs; | 9152 | goto free_msrs; |
9217 | if (!vmm_exclusive) | ||
9218 | kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id()))); | ||
9219 | loaded_vmcs_init(vmx->loaded_vmcs); | 9153 | loaded_vmcs_init(vmx->loaded_vmcs); |
9220 | if (!vmm_exclusive) | ||
9221 | kvm_cpu_vmxoff(); | ||
9222 | 9154 | ||
9223 | cpu = get_cpu(); | 9155 | cpu = get_cpu(); |
9224 | vmx_vcpu_load(&vmx->vcpu, cpu); | 9156 | vmx_vcpu_load(&vmx->vcpu, cpu); |
@@ -9478,17 +9410,26 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu) | |||
9478 | return get_vmcs12(vcpu)->ept_pointer; | 9410 | return get_vmcs12(vcpu)->ept_pointer; |
9479 | } | 9411 | } |
9480 | 9412 | ||
9481 | static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) | 9413 | static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) |
9482 | { | 9414 | { |
9415 | u64 eptp; | ||
9416 | |||
9483 | WARN_ON(mmu_is_nested(vcpu)); | 9417 | WARN_ON(mmu_is_nested(vcpu)); |
9418 | eptp = nested_ept_get_cr3(vcpu); | ||
9419 | if ((eptp & VMX_EPT_AD_ENABLE_BIT) && !enable_ept_ad_bits) | ||
9420 | return 1; | ||
9421 | |||
9422 | kvm_mmu_unload(vcpu); | ||
9484 | kvm_init_shadow_ept_mmu(vcpu, | 9423 | kvm_init_shadow_ept_mmu(vcpu, |
9485 | to_vmx(vcpu)->nested.nested_vmx_ept_caps & | 9424 | to_vmx(vcpu)->nested.nested_vmx_ept_caps & |
9486 | VMX_EPT_EXECUTE_ONLY_BIT); | 9425 | VMX_EPT_EXECUTE_ONLY_BIT, |
9426 | eptp & VMX_EPT_AD_ENABLE_BIT); | ||
9487 | vcpu->arch.mmu.set_cr3 = vmx_set_cr3; | 9427 | vcpu->arch.mmu.set_cr3 = vmx_set_cr3; |
9488 | vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3; | 9428 | vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3; |
9489 | vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; | 9429 | vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; |
9490 | 9430 | ||
9491 | vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; | 9431 | vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; |
9432 | return 0; | ||
9492 | } | 9433 | } |
9493 | 9434 | ||
9494 | static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu) | 9435 | static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu) |
@@ -9974,7 +9915,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
9974 | { | 9915 | { |
9975 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 9916 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
9976 | u32 exec_control; | 9917 | u32 exec_control; |
9977 | bool nested_ept_enabled = false; | ||
9978 | 9918 | ||
9979 | vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); | 9919 | vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); |
9980 | vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); | 9920 | vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); |
@@ -10121,8 +10061,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
10121 | vmcs12->guest_intr_status); | 10061 | vmcs12->guest_intr_status); |
10122 | } | 10062 | } |
10123 | 10063 | ||
10124 | nested_ept_enabled = (exec_control & SECONDARY_EXEC_ENABLE_EPT) != 0; | ||
10125 | |||
10126 | /* | 10064 | /* |
10127 | * Write an illegal value to APIC_ACCESS_ADDR. Later, | 10065 | * Write an illegal value to APIC_ACCESS_ADDR. Later, |
10128 | * nested_get_vmcs12_pages will either fix it up or | 10066 | * nested_get_vmcs12_pages will either fix it up or |
@@ -10253,8 +10191,13 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
10253 | } | 10191 | } |
10254 | 10192 | ||
10255 | if (nested_cpu_has_ept(vmcs12)) { | 10193 | if (nested_cpu_has_ept(vmcs12)) { |
10256 | kvm_mmu_unload(vcpu); | 10194 | if (nested_ept_init_mmu_context(vcpu)) { |
10257 | nested_ept_init_mmu_context(vcpu); | 10195 | *entry_failure_code = ENTRY_FAIL_DEFAULT; |
10196 | return 1; | ||
10197 | } | ||
10198 | } else if (nested_cpu_has2(vmcs12, | ||
10199 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { | ||
10200 | vmx_flush_tlb_ept_only(vcpu); | ||
10258 | } | 10201 | } |
10259 | 10202 | ||
10260 | /* | 10203 | /* |
@@ -10282,12 +10225,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
10282 | vmx_set_efer(vcpu, vcpu->arch.efer); | 10225 | vmx_set_efer(vcpu, vcpu->arch.efer); |
10283 | 10226 | ||
10284 | /* Shadow page tables on either EPT or shadow page tables. */ | 10227 | /* Shadow page tables on either EPT or shadow page tables. */ |
10285 | if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_ept_enabled, | 10228 | if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12), |
10286 | entry_failure_code)) | 10229 | entry_failure_code)) |
10287 | return 1; | 10230 | return 1; |
10288 | 10231 | ||
10289 | kvm_mmu_reset_context(vcpu); | ||
10290 | |||
10291 | if (!enable_ept) | 10232 | if (!enable_ept) |
10292 | vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; | 10233 | vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; |
10293 | 10234 | ||
@@ -10407,7 +10348,6 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) | |||
10407 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 10348 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
10408 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 10349 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
10409 | struct loaded_vmcs *vmcs02; | 10350 | struct loaded_vmcs *vmcs02; |
10410 | int cpu; | ||
10411 | u32 msr_entry_idx; | 10351 | u32 msr_entry_idx; |
10412 | u32 exit_qual; | 10352 | u32 exit_qual; |
10413 | 10353 | ||
@@ -10420,18 +10360,12 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) | |||
10420 | if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) | 10360 | if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) |
10421 | vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); | 10361 | vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); |
10422 | 10362 | ||
10423 | cpu = get_cpu(); | 10363 | vmx_switch_vmcs(vcpu, vmcs02); |
10424 | vmx->loaded_vmcs = vmcs02; | ||
10425 | vmx_vcpu_put(vcpu); | ||
10426 | vmx_vcpu_load(vcpu, cpu); | ||
10427 | vcpu->cpu = cpu; | ||
10428 | put_cpu(); | ||
10429 | |||
10430 | vmx_segment_cache_clear(vmx); | 10364 | vmx_segment_cache_clear(vmx); |
10431 | 10365 | ||
10432 | if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) { | 10366 | if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) { |
10433 | leave_guest_mode(vcpu); | 10367 | leave_guest_mode(vcpu); |
10434 | vmx_load_vmcs01(vcpu); | 10368 | vmx_switch_vmcs(vcpu, &vmx->vmcs01); |
10435 | nested_vmx_entry_failure(vcpu, vmcs12, | 10369 | nested_vmx_entry_failure(vcpu, vmcs12, |
10436 | EXIT_REASON_INVALID_STATE, exit_qual); | 10370 | EXIT_REASON_INVALID_STATE, exit_qual); |
10437 | return 1; | 10371 | return 1; |
@@ -10444,7 +10378,7 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) | |||
10444 | vmcs12->vm_entry_msr_load_count); | 10378 | vmcs12->vm_entry_msr_load_count); |
10445 | if (msr_entry_idx) { | 10379 | if (msr_entry_idx) { |
10446 | leave_guest_mode(vcpu); | 10380 | leave_guest_mode(vcpu); |
10447 | vmx_load_vmcs01(vcpu); | 10381 | vmx_switch_vmcs(vcpu, &vmx->vmcs01); |
10448 | nested_vmx_entry_failure(vcpu, vmcs12, | 10382 | nested_vmx_entry_failure(vcpu, vmcs12, |
10449 | EXIT_REASON_MSR_LOAD_FAIL, msr_entry_idx); | 10383 | EXIT_REASON_MSR_LOAD_FAIL, msr_entry_idx); |
10450 | return 1; | 10384 | return 1; |
@@ -11012,7 +10946,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | |||
11012 | if (unlikely(vmx->fail)) | 10946 | if (unlikely(vmx->fail)) |
11013 | vm_inst_error = vmcs_read32(VM_INSTRUCTION_ERROR); | 10947 | vm_inst_error = vmcs_read32(VM_INSTRUCTION_ERROR); |
11014 | 10948 | ||
11015 | vmx_load_vmcs01(vcpu); | 10949 | vmx_switch_vmcs(vcpu, &vmx->vmcs01); |
11016 | 10950 | ||
11017 | if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) | 10951 | if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) |
11018 | && nested_exit_intr_ack_set(vcpu)) { | 10952 | && nested_exit_intr_ack_set(vcpu)) { |
@@ -11056,6 +10990,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | |||
11056 | vmx->nested.change_vmcs01_virtual_x2apic_mode = false; | 10990 | vmx->nested.change_vmcs01_virtual_x2apic_mode = false; |
11057 | vmx_set_virtual_x2apic_mode(vcpu, | 10991 | vmx_set_virtual_x2apic_mode(vcpu, |
11058 | vcpu->arch.apic_base & X2APIC_ENABLE); | 10992 | vcpu->arch.apic_base & X2APIC_ENABLE); |
10993 | } else if (!nested_cpu_has_ept(vmcs12) && | ||
10994 | nested_cpu_has2(vmcs12, | ||
10995 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { | ||
10996 | vmx_flush_tlb_ept_only(vcpu); | ||
11059 | } | 10997 | } |
11060 | 10998 | ||
11061 | /* This is needed for same reason as it was needed in prepare_vmcs02 */ | 10999 | /* This is needed for same reason as it was needed in prepare_vmcs02 */ |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1faf620a6fdc..34bf64fb4dea 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -27,7 +27,6 @@ | |||
27 | #include "kvm_cache_regs.h" | 27 | #include "kvm_cache_regs.h" |
28 | #include "x86.h" | 28 | #include "x86.h" |
29 | #include "cpuid.h" | 29 | #include "cpuid.h" |
30 | #include "assigned-dev.h" | ||
31 | #include "pmu.h" | 30 | #include "pmu.h" |
32 | #include "hyperv.h" | 31 | #include "hyperv.h" |
33 | 32 | ||
@@ -1444,10 +1443,10 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
1444 | struct kvm *kvm = vcpu->kvm; | 1443 | struct kvm *kvm = vcpu->kvm; |
1445 | u64 offset, ns, elapsed; | 1444 | u64 offset, ns, elapsed; |
1446 | unsigned long flags; | 1445 | unsigned long flags; |
1447 | s64 usdiff; | ||
1448 | bool matched; | 1446 | bool matched; |
1449 | bool already_matched; | 1447 | bool already_matched; |
1450 | u64 data = msr->data; | 1448 | u64 data = msr->data; |
1449 | bool synchronizing = false; | ||
1451 | 1450 | ||
1452 | raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); | 1451 | raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); |
1453 | offset = kvm_compute_tsc_offset(vcpu, data); | 1452 | offset = kvm_compute_tsc_offset(vcpu, data); |
@@ -1455,51 +1454,34 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
1455 | elapsed = ns - kvm->arch.last_tsc_nsec; | 1454 | elapsed = ns - kvm->arch.last_tsc_nsec; |
1456 | 1455 | ||
1457 | if (vcpu->arch.virtual_tsc_khz) { | 1456 | if (vcpu->arch.virtual_tsc_khz) { |
1458 | int faulted = 0; | 1457 | if (data == 0 && msr->host_initiated) { |
1459 | 1458 | /* | |
1460 | /* n.b - signed multiplication and division required */ | 1459 | * detection of vcpu initialization -- need to sync |
1461 | usdiff = data - kvm->arch.last_tsc_write; | 1460 | * with other vCPUs. This particularly helps to keep |
1462 | #ifdef CONFIG_X86_64 | 1461 | * kvm_clock stable after CPU hotplug |
1463 | usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz; | 1462 | */ |
1464 | #else | 1463 | synchronizing = true; |
1465 | /* do_div() only does unsigned */ | 1464 | } else { |
1466 | asm("1: idivl %[divisor]\n" | 1465 | u64 tsc_exp = kvm->arch.last_tsc_write + |
1467 | "2: xor %%edx, %%edx\n" | 1466 | nsec_to_cycles(vcpu, elapsed); |
1468 | " movl $0, %[faulted]\n" | 1467 | u64 tsc_hz = vcpu->arch.virtual_tsc_khz * 1000LL; |
1469 | "3:\n" | 1468 | /* |
1470 | ".section .fixup,\"ax\"\n" | 1469 | * Special case: TSC write with a small delta (1 second) |
1471 | "4: movl $1, %[faulted]\n" | 1470 | * of virtual cycle time against real time is |
1472 | " jmp 3b\n" | 1471 | * interpreted as an attempt to synchronize the CPU. |
1473 | ".previous\n" | 1472 | */ |
1474 | 1473 | synchronizing = data < tsc_exp + tsc_hz && | |
1475 | _ASM_EXTABLE(1b, 4b) | 1474 | data + tsc_hz > tsc_exp; |
1476 | 1475 | } | |
1477 | : "=A"(usdiff), [faulted] "=r" (faulted) | 1476 | } |
1478 | : "A"(usdiff * 1000), [divisor] "rm"(vcpu->arch.virtual_tsc_khz)); | ||
1479 | |||
1480 | #endif | ||
1481 | do_div(elapsed, 1000); | ||
1482 | usdiff -= elapsed; | ||
1483 | if (usdiff < 0) | ||
1484 | usdiff = -usdiff; | ||
1485 | |||
1486 | /* idivl overflow => difference is larger than USEC_PER_SEC */ | ||
1487 | if (faulted) | ||
1488 | usdiff = USEC_PER_SEC; | ||
1489 | } else | ||
1490 | usdiff = USEC_PER_SEC; /* disable TSC match window below */ | ||
1491 | 1477 | ||
1492 | /* | 1478 | /* |
1493 | * Special case: TSC write with a small delta (1 second) of virtual | ||
1494 | * cycle time against real time is interpreted as an attempt to | ||
1495 | * synchronize the CPU. | ||
1496 | * | ||
1497 | * For a reliable TSC, we can match TSC offsets, and for an unstable | 1479 | * For a reliable TSC, we can match TSC offsets, and for an unstable |
1498 | * TSC, we add elapsed time in this computation. We could let the | 1480 | * TSC, we add elapsed time in this computation. We could let the |
1499 | * compensation code attempt to catch up if we fall behind, but | 1481 | * compensation code attempt to catch up if we fall behind, but |
1500 | * it's better to try to match offsets from the beginning. | 1482 | * it's better to try to match offsets from the beginning. |
1501 | */ | 1483 | */ |
1502 | if (usdiff < USEC_PER_SEC && | 1484 | if (synchronizing && |
1503 | vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) { | 1485 | vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) { |
1504 | if (!check_tsc_unstable()) { | 1486 | if (!check_tsc_unstable()) { |
1505 | offset = kvm->arch.cur_tsc_offset; | 1487 | offset = kvm->arch.cur_tsc_offset; |
@@ -2155,6 +2137,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2155 | case MSR_VM_HSAVE_PA: | 2137 | case MSR_VM_HSAVE_PA: |
2156 | case MSR_AMD64_PATCH_LOADER: | 2138 | case MSR_AMD64_PATCH_LOADER: |
2157 | case MSR_AMD64_BU_CFG2: | 2139 | case MSR_AMD64_BU_CFG2: |
2140 | case MSR_AMD64_DC_CFG: | ||
2158 | break; | 2141 | break; |
2159 | 2142 | ||
2160 | case MSR_EFER: | 2143 | case MSR_EFER: |
@@ -2417,6 +2400,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2417 | case MSR_FAM10H_MMIO_CONF_BASE: | 2400 | case MSR_FAM10H_MMIO_CONF_BASE: |
2418 | case MSR_AMD64_BU_CFG2: | 2401 | case MSR_AMD64_BU_CFG2: |
2419 | case MSR_IA32_PERF_CTL: | 2402 | case MSR_IA32_PERF_CTL: |
2403 | case MSR_AMD64_DC_CFG: | ||
2420 | msr_info->data = 0; | 2404 | msr_info->data = 0; |
2421 | break; | 2405 | break; |
2422 | case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3: | 2406 | case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3: |
@@ -2675,10 +2659,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
2675 | case KVM_CAP_SET_BOOT_CPU_ID: | 2659 | case KVM_CAP_SET_BOOT_CPU_ID: |
2676 | case KVM_CAP_SPLIT_IRQCHIP: | 2660 | case KVM_CAP_SPLIT_IRQCHIP: |
2677 | case KVM_CAP_IMMEDIATE_EXIT: | 2661 | case KVM_CAP_IMMEDIATE_EXIT: |
2678 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT | ||
2679 | case KVM_CAP_ASSIGN_DEV_IRQ: | ||
2680 | case KVM_CAP_PCI_2_3: | ||
2681 | #endif | ||
2682 | r = 1; | 2662 | r = 1; |
2683 | break; | 2663 | break; |
2684 | case KVM_CAP_ADJUST_CLOCK: | 2664 | case KVM_CAP_ADJUST_CLOCK: |
@@ -2695,9 +2675,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
2695 | */ | 2675 | */ |
2696 | r = kvm_x86_ops->cpu_has_high_real_mode_segbase(); | 2676 | r = kvm_x86_ops->cpu_has_high_real_mode_segbase(); |
2697 | break; | 2677 | break; |
2698 | case KVM_CAP_COALESCED_MMIO: | ||
2699 | r = KVM_COALESCED_MMIO_PAGE_OFFSET; | ||
2700 | break; | ||
2701 | case KVM_CAP_VAPIC: | 2678 | case KVM_CAP_VAPIC: |
2702 | r = !kvm_x86_ops->cpu_has_accelerated_tpr(); | 2679 | r = !kvm_x86_ops->cpu_has_accelerated_tpr(); |
2703 | break; | 2680 | break; |
@@ -2713,11 +2690,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
2713 | case KVM_CAP_PV_MMU: /* obsolete */ | 2690 | case KVM_CAP_PV_MMU: /* obsolete */ |
2714 | r = 0; | 2691 | r = 0; |
2715 | break; | 2692 | break; |
2716 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT | ||
2717 | case KVM_CAP_IOMMU: | ||
2718 | r = iommu_present(&pci_bus_type); | ||
2719 | break; | ||
2720 | #endif | ||
2721 | case KVM_CAP_MCE: | 2693 | case KVM_CAP_MCE: |
2722 | r = KVM_MAX_MCE_BANKS; | 2694 | r = KVM_MAX_MCE_BANKS; |
2723 | break; | 2695 | break; |
@@ -3124,7 +3096,14 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
3124 | return -EINVAL; | 3096 | return -EINVAL; |
3125 | 3097 | ||
3126 | if (events->exception.injected && | 3098 | if (events->exception.injected && |
3127 | (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR)) | 3099 | (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR || |
3100 | is_guest_mode(vcpu))) | ||
3101 | return -EINVAL; | ||
3102 | |||
3103 | /* INITs are latched while in SMM */ | ||
3104 | if (events->flags & KVM_VCPUEVENT_VALID_SMM && | ||
3105 | (events->smi.smm || events->smi.pending) && | ||
3106 | vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) | ||
3128 | return -EINVAL; | 3107 | return -EINVAL; |
3129 | 3108 | ||
3130 | process_nmi(vcpu); | 3109 | process_nmi(vcpu); |
@@ -3721,22 +3700,21 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) | |||
3721 | 3700 | ||
3722 | static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | 3701 | static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) |
3723 | { | 3702 | { |
3703 | struct kvm_pic *pic = kvm->arch.vpic; | ||
3724 | int r; | 3704 | int r; |
3725 | 3705 | ||
3726 | r = 0; | 3706 | r = 0; |
3727 | switch (chip->chip_id) { | 3707 | switch (chip->chip_id) { |
3728 | case KVM_IRQCHIP_PIC_MASTER: | 3708 | case KVM_IRQCHIP_PIC_MASTER: |
3729 | memcpy(&chip->chip.pic, | 3709 | memcpy(&chip->chip.pic, &pic->pics[0], |
3730 | &pic_irqchip(kvm)->pics[0], | ||
3731 | sizeof(struct kvm_pic_state)); | 3710 | sizeof(struct kvm_pic_state)); |
3732 | break; | 3711 | break; |
3733 | case KVM_IRQCHIP_PIC_SLAVE: | 3712 | case KVM_IRQCHIP_PIC_SLAVE: |
3734 | memcpy(&chip->chip.pic, | 3713 | memcpy(&chip->chip.pic, &pic->pics[1], |
3735 | &pic_irqchip(kvm)->pics[1], | ||
3736 | sizeof(struct kvm_pic_state)); | 3714 | sizeof(struct kvm_pic_state)); |
3737 | break; | 3715 | break; |
3738 | case KVM_IRQCHIP_IOAPIC: | 3716 | case KVM_IRQCHIP_IOAPIC: |
3739 | r = kvm_get_ioapic(kvm, &chip->chip.ioapic); | 3717 | kvm_get_ioapic(kvm, &chip->chip.ioapic); |
3740 | break; | 3718 | break; |
3741 | default: | 3719 | default: |
3742 | r = -EINVAL; | 3720 | r = -EINVAL; |
@@ -3747,32 +3725,31 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | |||
3747 | 3725 | ||
3748 | static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | 3726 | static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) |
3749 | { | 3727 | { |
3728 | struct kvm_pic *pic = kvm->arch.vpic; | ||
3750 | int r; | 3729 | int r; |
3751 | 3730 | ||
3752 | r = 0; | 3731 | r = 0; |
3753 | switch (chip->chip_id) { | 3732 | switch (chip->chip_id) { |
3754 | case KVM_IRQCHIP_PIC_MASTER: | 3733 | case KVM_IRQCHIP_PIC_MASTER: |
3755 | spin_lock(&pic_irqchip(kvm)->lock); | 3734 | spin_lock(&pic->lock); |
3756 | memcpy(&pic_irqchip(kvm)->pics[0], | 3735 | memcpy(&pic->pics[0], &chip->chip.pic, |
3757 | &chip->chip.pic, | ||
3758 | sizeof(struct kvm_pic_state)); | 3736 | sizeof(struct kvm_pic_state)); |
3759 | spin_unlock(&pic_irqchip(kvm)->lock); | 3737 | spin_unlock(&pic->lock); |
3760 | break; | 3738 | break; |
3761 | case KVM_IRQCHIP_PIC_SLAVE: | 3739 | case KVM_IRQCHIP_PIC_SLAVE: |
3762 | spin_lock(&pic_irqchip(kvm)->lock); | 3740 | spin_lock(&pic->lock); |
3763 | memcpy(&pic_irqchip(kvm)->pics[1], | 3741 | memcpy(&pic->pics[1], &chip->chip.pic, |
3764 | &chip->chip.pic, | ||
3765 | sizeof(struct kvm_pic_state)); | 3742 | sizeof(struct kvm_pic_state)); |
3766 | spin_unlock(&pic_irqchip(kvm)->lock); | 3743 | spin_unlock(&pic->lock); |
3767 | break; | 3744 | break; |
3768 | case KVM_IRQCHIP_IOAPIC: | 3745 | case KVM_IRQCHIP_IOAPIC: |
3769 | r = kvm_set_ioapic(kvm, &chip->chip.ioapic); | 3746 | kvm_set_ioapic(kvm, &chip->chip.ioapic); |
3770 | break; | 3747 | break; |
3771 | default: | 3748 | default: |
3772 | r = -EINVAL; | 3749 | r = -EINVAL; |
3773 | break; | 3750 | break; |
3774 | } | 3751 | } |
3775 | kvm_pic_update_irq(pic_irqchip(kvm)); | 3752 | kvm_pic_update_irq(pic); |
3776 | return r; | 3753 | return r; |
3777 | } | 3754 | } |
3778 | 3755 | ||
@@ -3934,9 +3911,14 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, | |||
3934 | goto split_irqchip_unlock; | 3911 | goto split_irqchip_unlock; |
3935 | if (kvm->created_vcpus) | 3912 | if (kvm->created_vcpus) |
3936 | goto split_irqchip_unlock; | 3913 | goto split_irqchip_unlock; |
3914 | kvm->arch.irqchip_mode = KVM_IRQCHIP_INIT_IN_PROGRESS; | ||
3937 | r = kvm_setup_empty_irq_routing(kvm); | 3915 | r = kvm_setup_empty_irq_routing(kvm); |
3938 | if (r) | 3916 | if (r) { |
3917 | kvm->arch.irqchip_mode = KVM_IRQCHIP_NONE; | ||
3918 | /* Pairs with smp_rmb() when reading irqchip_mode */ | ||
3919 | smp_wmb(); | ||
3939 | goto split_irqchip_unlock; | 3920 | goto split_irqchip_unlock; |
3921 | } | ||
3940 | /* Pairs with irqchip_in_kernel. */ | 3922 | /* Pairs with irqchip_in_kernel. */ |
3941 | smp_wmb(); | 3923 | smp_wmb(); |
3942 | kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT; | 3924 | kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT; |
@@ -4018,20 +4000,18 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
4018 | 4000 | ||
4019 | r = kvm_ioapic_init(kvm); | 4001 | r = kvm_ioapic_init(kvm); |
4020 | if (r) { | 4002 | if (r) { |
4021 | mutex_lock(&kvm->slots_lock); | ||
4022 | kvm_pic_destroy(kvm); | 4003 | kvm_pic_destroy(kvm); |
4023 | mutex_unlock(&kvm->slots_lock); | ||
4024 | goto create_irqchip_unlock; | 4004 | goto create_irqchip_unlock; |
4025 | } | 4005 | } |
4026 | 4006 | ||
4007 | kvm->arch.irqchip_mode = KVM_IRQCHIP_INIT_IN_PROGRESS; | ||
4027 | r = kvm_setup_default_irq_routing(kvm); | 4008 | r = kvm_setup_default_irq_routing(kvm); |
4028 | if (r) { | 4009 | if (r) { |
4029 | mutex_lock(&kvm->slots_lock); | 4010 | kvm->arch.irqchip_mode = KVM_IRQCHIP_NONE; |
4030 | mutex_lock(&kvm->irq_lock); | 4011 | /* Pairs with smp_rmb() when reading irqchip_mode */ |
4012 | smp_wmb(); | ||
4031 | kvm_ioapic_destroy(kvm); | 4013 | kvm_ioapic_destroy(kvm); |
4032 | kvm_pic_destroy(kvm); | 4014 | kvm_pic_destroy(kvm); |
4033 | mutex_unlock(&kvm->irq_lock); | ||
4034 | mutex_unlock(&kvm->slots_lock); | ||
4035 | goto create_irqchip_unlock; | 4015 | goto create_irqchip_unlock; |
4036 | } | 4016 | } |
4037 | /* Write kvm->irq_routing before enabling irqchip_in_kernel. */ | 4017 | /* Write kvm->irq_routing before enabling irqchip_in_kernel. */ |
@@ -4230,7 +4210,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
4230 | break; | 4210 | break; |
4231 | } | 4211 | } |
4232 | default: | 4212 | default: |
4233 | r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); | 4213 | r = -ENOTTY; |
4234 | } | 4214 | } |
4235 | out: | 4215 | out: |
4236 | return r; | 4216 | return r; |
@@ -7355,6 +7335,12 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | |||
7355 | mp_state->mp_state != KVM_MP_STATE_RUNNABLE) | 7335 | mp_state->mp_state != KVM_MP_STATE_RUNNABLE) |
7356 | return -EINVAL; | 7336 | return -EINVAL; |
7357 | 7337 | ||
7338 | /* INITs are latched while in SMM */ | ||
7339 | if ((is_smm(vcpu) || vcpu->arch.smi_pending) && | ||
7340 | (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED || | ||
7341 | mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED)) | ||
7342 | return -EINVAL; | ||
7343 | |||
7358 | if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) { | 7344 | if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) { |
7359 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; | 7345 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; |
7360 | set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events); | 7346 | set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events); |
@@ -8068,7 +8054,6 @@ void kvm_arch_sync_events(struct kvm *kvm) | |||
8068 | { | 8054 | { |
8069 | cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work); | 8055 | cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work); |
8070 | cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work); | 8056 | cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work); |
8071 | kvm_free_all_assigned_devices(kvm); | ||
8072 | kvm_free_pit(kvm); | 8057 | kvm_free_pit(kvm); |
8073 | } | 8058 | } |
8074 | 8059 | ||
@@ -8152,12 +8137,12 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
8152 | } | 8137 | } |
8153 | if (kvm_x86_ops->vm_destroy) | 8138 | if (kvm_x86_ops->vm_destroy) |
8154 | kvm_x86_ops->vm_destroy(kvm); | 8139 | kvm_x86_ops->vm_destroy(kvm); |
8155 | kvm_iommu_unmap_guest(kvm); | 8140 | kvm_pic_destroy(kvm); |
8156 | kfree(kvm->arch.vpic); | 8141 | kvm_ioapic_destroy(kvm); |
8157 | kfree(kvm->arch.vioapic); | ||
8158 | kvm_free_vcpus(kvm); | 8142 | kvm_free_vcpus(kvm); |
8159 | kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); | 8143 | kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); |
8160 | kvm_mmu_uninit_vm(kvm); | 8144 | kvm_mmu_uninit_vm(kvm); |
8145 | kvm_page_track_cleanup(kvm); | ||
8161 | } | 8146 | } |
8162 | 8147 | ||
8163 | void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, | 8148 | void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, |
@@ -8566,11 +8551,11 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, | |||
8566 | { | 8551 | { |
8567 | struct x86_exception fault; | 8552 | struct x86_exception fault; |
8568 | 8553 | ||
8569 | trace_kvm_async_pf_ready(work->arch.token, work->gva); | ||
8570 | if (work->wakeup_all) | 8554 | if (work->wakeup_all) |
8571 | work->arch.token = ~0; /* broadcast wakeup */ | 8555 | work->arch.token = ~0; /* broadcast wakeup */ |
8572 | else | 8556 | else |
8573 | kvm_del_async_pf_gfn(vcpu, work->arch.gfn); | 8557 | kvm_del_async_pf_gfn(vcpu, work->arch.gfn); |
8558 | trace_kvm_async_pf_ready(work->arch.token, work->gva); | ||
8574 | 8559 | ||
8575 | if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) && | 8560 | if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) && |
8576 | !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) { | 8561 | !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) { |
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile index e7e7055a8658..69f0827d5f53 100644 --- a/arch/x86/um/Makefile +++ b/arch/x86/um/Makefile | |||
@@ -16,7 +16,7 @@ obj-y = bug.o bugs_$(BITS).o delay.o fault.o ldt.o \ | |||
16 | 16 | ||
17 | ifeq ($(CONFIG_X86_32),y) | 17 | ifeq ($(CONFIG_X86_32),y) |
18 | 18 | ||
19 | obj-y += checksum_32.o | 19 | obj-y += checksum_32.o syscalls_32.o |
20 | obj-$(CONFIG_ELF_CORE) += elfcore.o | 20 | obj-$(CONFIG_ELF_CORE) += elfcore.o |
21 | 21 | ||
22 | subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o | 22 | subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o |
diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h index e59eef20647b..b291ca5cf66b 100644 --- a/arch/x86/um/asm/ptrace.h +++ b/arch/x86/um/asm/ptrace.h | |||
@@ -78,7 +78,7 @@ static inline int ptrace_set_thread_area(struct task_struct *child, int idx, | |||
78 | return -ENOSYS; | 78 | return -ENOSYS; |
79 | } | 79 | } |
80 | 80 | ||
81 | extern long arch_prctl(struct task_struct *task, int code, | 81 | extern long arch_prctl(struct task_struct *task, int option, |
82 | unsigned long __user *addr); | 82 | unsigned long __user *addr); |
83 | 83 | ||
84 | #endif | 84 | #endif |
diff --git a/arch/x86/um/os-Linux/prctl.c b/arch/x86/um/os-Linux/prctl.c index 96eb2bd28832..8431e87ac333 100644 --- a/arch/x86/um/os-Linux/prctl.c +++ b/arch/x86/um/os-Linux/prctl.c | |||
@@ -6,7 +6,7 @@ | |||
6 | #include <sys/ptrace.h> | 6 | #include <sys/ptrace.h> |
7 | #include <asm/ptrace.h> | 7 | #include <asm/ptrace.h> |
8 | 8 | ||
9 | int os_arch_prctl(int pid, int code, unsigned long *addr) | 9 | int os_arch_prctl(int pid, int option, unsigned long *arg2) |
10 | { | 10 | { |
11 | return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) addr, code); | 11 | return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) arg2, option); |
12 | } | 12 | } |
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c new file mode 100644 index 000000000000..627d68836b16 --- /dev/null +++ b/arch/x86/um/syscalls_32.c | |||
@@ -0,0 +1,7 @@ | |||
1 | #include <linux/syscalls.h> | ||
2 | #include <os.h> | ||
3 | |||
4 | SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2) | ||
5 | { | ||
6 | return -EINVAL; | ||
7 | } | ||
diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c index 10d907098c26..58f51667e2e4 100644 --- a/arch/x86/um/syscalls_64.c +++ b/arch/x86/um/syscalls_64.c | |||
@@ -7,13 +7,15 @@ | |||
7 | 7 | ||
8 | #include <linux/sched.h> | 8 | #include <linux/sched.h> |
9 | #include <linux/sched/mm.h> | 9 | #include <linux/sched/mm.h> |
10 | #include <linux/syscalls.h> | ||
10 | #include <linux/uaccess.h> | 11 | #include <linux/uaccess.h> |
11 | #include <asm/prctl.h> /* XXX This should get the constants from libc */ | 12 | #include <asm/prctl.h> /* XXX This should get the constants from libc */ |
12 | #include <os.h> | 13 | #include <os.h> |
13 | 14 | ||
14 | long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr) | 15 | long arch_prctl(struct task_struct *task, int option, |
16 | unsigned long __user *arg2) | ||
15 | { | 17 | { |
16 | unsigned long *ptr = addr, tmp; | 18 | unsigned long *ptr = arg2, tmp; |
17 | long ret; | 19 | long ret; |
18 | int pid = task->mm->context.id.u.pid; | 20 | int pid = task->mm->context.id.u.pid; |
19 | 21 | ||
@@ -30,7 +32,7 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr) | |||
30 | * arch_prctl is run on the host, then the registers are read | 32 | * arch_prctl is run on the host, then the registers are read |
31 | * back. | 33 | * back. |
32 | */ | 34 | */ |
33 | switch (code) { | 35 | switch (option) { |
34 | case ARCH_SET_FS: | 36 | case ARCH_SET_FS: |
35 | case ARCH_SET_GS: | 37 | case ARCH_SET_GS: |
36 | ret = restore_registers(pid, ¤t->thread.regs.regs); | 38 | ret = restore_registers(pid, ¤t->thread.regs.regs); |
@@ -50,11 +52,11 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr) | |||
50 | ptr = &tmp; | 52 | ptr = &tmp; |
51 | } | 53 | } |
52 | 54 | ||
53 | ret = os_arch_prctl(pid, code, ptr); | 55 | ret = os_arch_prctl(pid, option, ptr); |
54 | if (ret) | 56 | if (ret) |
55 | return ret; | 57 | return ret; |
56 | 58 | ||
57 | switch (code) { | 59 | switch (option) { |
58 | case ARCH_SET_FS: | 60 | case ARCH_SET_FS: |
59 | current->thread.arch.fs = (unsigned long) ptr; | 61 | current->thread.arch.fs = (unsigned long) ptr; |
60 | ret = save_registers(pid, ¤t->thread.regs.regs); | 62 | ret = save_registers(pid, ¤t->thread.regs.regs); |
@@ -63,19 +65,19 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr) | |||
63 | ret = save_registers(pid, ¤t->thread.regs.regs); | 65 | ret = save_registers(pid, ¤t->thread.regs.regs); |
64 | break; | 66 | break; |
65 | case ARCH_GET_FS: | 67 | case ARCH_GET_FS: |
66 | ret = put_user(tmp, addr); | 68 | ret = put_user(tmp, arg2); |
67 | break; | 69 | break; |
68 | case ARCH_GET_GS: | 70 | case ARCH_GET_GS: |
69 | ret = put_user(tmp, addr); | 71 | ret = put_user(tmp, arg2); |
70 | break; | 72 | break; |
71 | } | 73 | } |
72 | 74 | ||
73 | return ret; | 75 | return ret; |
74 | } | 76 | } |
75 | 77 | ||
76 | long sys_arch_prctl(int code, unsigned long addr) | 78 | SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2) |
77 | { | 79 | { |
78 | return arch_prctl(current, code, (unsigned long __user *) addr); | 80 | return arch_prctl(current, option, (unsigned long __user *) arg2); |
79 | } | 81 | } |
80 | 82 | ||
81 | void arch_switch_to(struct task_struct *to) | 83 | void arch_switch_to(struct task_struct *to) |
diff --git a/drivers/gpio/gpio-altera-a10sr.c b/drivers/gpio/gpio-altera-a10sr.c index 9e1a138fed53..16a8951b2bed 100644 --- a/drivers/gpio/gpio-altera-a10sr.c +++ b/drivers/gpio/gpio-altera-a10sr.c | |||
@@ -96,7 +96,7 @@ static int altr_a10sr_gpio_probe(struct platform_device *pdev) | |||
96 | gpio->regmap = a10sr->regmap; | 96 | gpio->regmap = a10sr->regmap; |
97 | 97 | ||
98 | gpio->gp = altr_a10sr_gc; | 98 | gpio->gp = altr_a10sr_gc; |
99 | 99 | gpio->gp.parent = pdev->dev.parent; | |
100 | gpio->gp.of_node = pdev->dev.of_node; | 100 | gpio->gp.of_node = pdev->dev.of_node; |
101 | 101 | ||
102 | ret = devm_gpiochip_add_data(&pdev->dev, &gpio->gp, gpio); | 102 | ret = devm_gpiochip_add_data(&pdev->dev, &gpio->gp, gpio); |
diff --git a/drivers/gpio/gpio-altera.c b/drivers/gpio/gpio-altera.c index 5bddbd507ca9..3fe6a21e05a5 100644 --- a/drivers/gpio/gpio-altera.c +++ b/drivers/gpio/gpio-altera.c | |||
@@ -90,21 +90,18 @@ static int altera_gpio_irq_set_type(struct irq_data *d, | |||
90 | 90 | ||
91 | altera_gc = gpiochip_get_data(irq_data_get_irq_chip_data(d)); | 91 | altera_gc = gpiochip_get_data(irq_data_get_irq_chip_data(d)); |
92 | 92 | ||
93 | if (type == IRQ_TYPE_NONE) | 93 | if (type == IRQ_TYPE_NONE) { |
94 | irq_set_handler_locked(d, handle_bad_irq); | ||
94 | return 0; | 95 | return 0; |
95 | if (type == IRQ_TYPE_LEVEL_HIGH && | 96 | } |
96 | altera_gc->interrupt_trigger == IRQ_TYPE_LEVEL_HIGH) | 97 | if (type == altera_gc->interrupt_trigger) { |
97 | return 0; | 98 | if (type == IRQ_TYPE_LEVEL_HIGH) |
98 | if (type == IRQ_TYPE_EDGE_RISING && | 99 | irq_set_handler_locked(d, handle_level_irq); |
99 | altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_RISING) | 100 | else |
100 | return 0; | 101 | irq_set_handler_locked(d, handle_simple_irq); |
101 | if (type == IRQ_TYPE_EDGE_FALLING && | ||
102 | altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_FALLING) | ||
103 | return 0; | ||
104 | if (type == IRQ_TYPE_EDGE_BOTH && | ||
105 | altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_BOTH) | ||
106 | return 0; | 102 | return 0; |
107 | 103 | } | |
104 | irq_set_handler_locked(d, handle_bad_irq); | ||
108 | return -EINVAL; | 105 | return -EINVAL; |
109 | } | 106 | } |
110 | 107 | ||
@@ -230,7 +227,6 @@ static void altera_gpio_irq_edge_handler(struct irq_desc *desc) | |||
230 | chained_irq_exit(chip, desc); | 227 | chained_irq_exit(chip, desc); |
231 | } | 228 | } |
232 | 229 | ||
233 | |||
234 | static void altera_gpio_irq_leveL_high_handler(struct irq_desc *desc) | 230 | static void altera_gpio_irq_leveL_high_handler(struct irq_desc *desc) |
235 | { | 231 | { |
236 | struct altera_gpio_chip *altera_gc; | 232 | struct altera_gpio_chip *altera_gc; |
@@ -310,7 +306,7 @@ static int altera_gpio_probe(struct platform_device *pdev) | |||
310 | altera_gc->interrupt_trigger = reg; | 306 | altera_gc->interrupt_trigger = reg; |
311 | 307 | ||
312 | ret = gpiochip_irqchip_add(&altera_gc->mmchip.gc, &altera_irq_chip, 0, | 308 | ret = gpiochip_irqchip_add(&altera_gc->mmchip.gc, &altera_irq_chip, 0, |
313 | handle_simple_irq, IRQ_TYPE_NONE); | 309 | handle_bad_irq, IRQ_TYPE_NONE); |
314 | 310 | ||
315 | if (ret) { | 311 | if (ret) { |
316 | dev_err(&pdev->dev, "could not add irqchip\n"); | 312 | dev_err(&pdev->dev, "could not add irqchip\n"); |
diff --git a/drivers/gpio/gpio-mcp23s08.c b/drivers/gpio/gpio-mcp23s08.c index bdb692345428..2a57d024481d 100644 --- a/drivers/gpio/gpio-mcp23s08.c +++ b/drivers/gpio/gpio-mcp23s08.c | |||
@@ -270,8 +270,10 @@ mcp23s08_direction_output(struct gpio_chip *chip, unsigned offset, int value) | |||
270 | static irqreturn_t mcp23s08_irq(int irq, void *data) | 270 | static irqreturn_t mcp23s08_irq(int irq, void *data) |
271 | { | 271 | { |
272 | struct mcp23s08 *mcp = data; | 272 | struct mcp23s08 *mcp = data; |
273 | int intcap, intf, i; | 273 | int intcap, intf, i, gpio, gpio_orig, intcap_mask; |
274 | unsigned int child_irq; | 274 | unsigned int child_irq; |
275 | bool intf_set, intcap_changed, gpio_bit_changed, | ||
276 | defval_changed, gpio_set; | ||
275 | 277 | ||
276 | mutex_lock(&mcp->lock); | 278 | mutex_lock(&mcp->lock); |
277 | if (mcp_read(mcp, MCP_INTF, &intf) < 0) { | 279 | if (mcp_read(mcp, MCP_INTF, &intf) < 0) { |
@@ -287,14 +289,67 @@ static irqreturn_t mcp23s08_irq(int irq, void *data) | |||
287 | } | 289 | } |
288 | 290 | ||
289 | mcp->cache[MCP_INTCAP] = intcap; | 291 | mcp->cache[MCP_INTCAP] = intcap; |
292 | |||
293 | /* This clears the interrupt(configurable on S18) */ | ||
294 | if (mcp_read(mcp, MCP_GPIO, &gpio) < 0) { | ||
295 | mutex_unlock(&mcp->lock); | ||
296 | return IRQ_HANDLED; | ||
297 | } | ||
298 | gpio_orig = mcp->cache[MCP_GPIO]; | ||
299 | mcp->cache[MCP_GPIO] = gpio; | ||
290 | mutex_unlock(&mcp->lock); | 300 | mutex_unlock(&mcp->lock); |
291 | 301 | ||
302 | if (mcp->cache[MCP_INTF] == 0) { | ||
303 | /* There is no interrupt pending */ | ||
304 | return IRQ_HANDLED; | ||
305 | } | ||
306 | |||
307 | dev_dbg(mcp->chip.parent, | ||
308 | "intcap 0x%04X intf 0x%04X gpio_orig 0x%04X gpio 0x%04X\n", | ||
309 | intcap, intf, gpio_orig, gpio); | ||
292 | 310 | ||
293 | for (i = 0; i < mcp->chip.ngpio; i++) { | 311 | for (i = 0; i < mcp->chip.ngpio; i++) { |
294 | if ((BIT(i) & mcp->cache[MCP_INTF]) && | 312 | /* We must check all of the inputs on the chip, |
295 | ((BIT(i) & intcap & mcp->irq_rise) || | 313 | * otherwise we may not notice a change on >=2 pins. |
296 | (mcp->irq_fall & ~intcap & BIT(i)) || | 314 | * |
297 | (BIT(i) & mcp->cache[MCP_INTCON]))) { | 315 | * On at least the mcp23s17, INTCAP is only updated |
316 | * one byte at a time(INTCAPA and INTCAPB are | ||
317 | * not written to at the same time - only on a per-bank | ||
318 | * basis). | ||
319 | * | ||
320 | * INTF only contains the single bit that caused the | ||
321 | * interrupt per-bank. On the mcp23s17, there is | ||
322 | * INTFA and INTFB. If two pins are changed on the A | ||
323 | * side at the same time, INTF will only have one bit | ||
324 | * set. If one pin on the A side and one pin on the B | ||
325 | * side are changed at the same time, INTF will have | ||
326 | * two bits set. Thus, INTF can't be the only check | ||
327 | * to see if the input has changed. | ||
328 | */ | ||
329 | |||
330 | intf_set = BIT(i) & mcp->cache[MCP_INTF]; | ||
331 | if (i < 8 && intf_set) | ||
332 | intcap_mask = 0x00FF; | ||
333 | else if (i >= 8 && intf_set) | ||
334 | intcap_mask = 0xFF00; | ||
335 | else | ||
336 | intcap_mask = 0x00; | ||
337 | |||
338 | intcap_changed = (intcap_mask & | ||
339 | (BIT(i) & mcp->cache[MCP_INTCAP])) != | ||
340 | (intcap_mask & (BIT(i) & gpio_orig)); | ||
341 | gpio_set = BIT(i) & mcp->cache[MCP_GPIO]; | ||
342 | gpio_bit_changed = (BIT(i) & gpio_orig) != | ||
343 | (BIT(i) & mcp->cache[MCP_GPIO]); | ||
344 | defval_changed = (BIT(i) & mcp->cache[MCP_INTCON]) && | ||
345 | ((BIT(i) & mcp->cache[MCP_GPIO]) != | ||
346 | (BIT(i) & mcp->cache[MCP_DEFVAL])); | ||
347 | |||
348 | if (((gpio_bit_changed || intcap_changed) && | ||
349 | (BIT(i) & mcp->irq_rise) && gpio_set) || | ||
350 | ((gpio_bit_changed || intcap_changed) && | ||
351 | (BIT(i) & mcp->irq_fall) && !gpio_set) || | ||
352 | defval_changed) { | ||
298 | child_irq = irq_find_mapping(mcp->chip.irqdomain, i); | 353 | child_irq = irq_find_mapping(mcp->chip.irqdomain, i); |
299 | handle_nested_irq(child_irq); | 354 | handle_nested_irq(child_irq); |
300 | } | 355 | } |
diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c index 06dac72cb69c..d99338689213 100644 --- a/drivers/gpio/gpio-mockup.c +++ b/drivers/gpio/gpio-mockup.c | |||
@@ -197,7 +197,7 @@ static ssize_t gpio_mockup_event_write(struct file *file, | |||
197 | struct seq_file *sfile; | 197 | struct seq_file *sfile; |
198 | struct gpio_desc *desc; | 198 | struct gpio_desc *desc; |
199 | struct gpio_chip *gc; | 199 | struct gpio_chip *gc; |
200 | int status, val; | 200 | int val; |
201 | char buf; | 201 | char buf; |
202 | 202 | ||
203 | sfile = file->private_data; | 203 | sfile = file->private_data; |
@@ -206,9 +206,8 @@ static ssize_t gpio_mockup_event_write(struct file *file, | |||
206 | chip = priv->chip; | 206 | chip = priv->chip; |
207 | gc = &chip->gc; | 207 | gc = &chip->gc; |
208 | 208 | ||
209 | status = copy_from_user(&buf, usr_buf, 1); | 209 | if (copy_from_user(&buf, usr_buf, 1)) |
210 | if (status) | 210 | return -EFAULT; |
211 | return status; | ||
212 | 211 | ||
213 | if (buf == '0') | 212 | if (buf == '0') |
214 | val = 0; | 213 | val = 0; |
diff --git a/drivers/gpio/gpio-xgene.c b/drivers/gpio/gpio-xgene.c index 40a8881c2ce8..f1c6ec17b90a 100644 --- a/drivers/gpio/gpio-xgene.c +++ b/drivers/gpio/gpio-xgene.c | |||
@@ -42,9 +42,7 @@ struct xgene_gpio { | |||
42 | struct gpio_chip chip; | 42 | struct gpio_chip chip; |
43 | void __iomem *base; | 43 | void __iomem *base; |
44 | spinlock_t lock; | 44 | spinlock_t lock; |
45 | #ifdef CONFIG_PM | ||
46 | u32 set_dr_val[XGENE_MAX_GPIO_BANKS]; | 45 | u32 set_dr_val[XGENE_MAX_GPIO_BANKS]; |
47 | #endif | ||
48 | }; | 46 | }; |
49 | 47 | ||
50 | static int xgene_gpio_get(struct gpio_chip *gc, unsigned int offset) | 48 | static int xgene_gpio_get(struct gpio_chip *gc, unsigned int offset) |
@@ -138,8 +136,7 @@ static int xgene_gpio_dir_out(struct gpio_chip *gc, | |||
138 | return 0; | 136 | return 0; |
139 | } | 137 | } |
140 | 138 | ||
141 | #ifdef CONFIG_PM | 139 | static __maybe_unused int xgene_gpio_suspend(struct device *dev) |
142 | static int xgene_gpio_suspend(struct device *dev) | ||
143 | { | 140 | { |
144 | struct xgene_gpio *gpio = dev_get_drvdata(dev); | 141 | struct xgene_gpio *gpio = dev_get_drvdata(dev); |
145 | unsigned long bank_offset; | 142 | unsigned long bank_offset; |
@@ -152,7 +149,7 @@ static int xgene_gpio_suspend(struct device *dev) | |||
152 | return 0; | 149 | return 0; |
153 | } | 150 | } |
154 | 151 | ||
155 | static int xgene_gpio_resume(struct device *dev) | 152 | static __maybe_unused int xgene_gpio_resume(struct device *dev) |
156 | { | 153 | { |
157 | struct xgene_gpio *gpio = dev_get_drvdata(dev); | 154 | struct xgene_gpio *gpio = dev_get_drvdata(dev); |
158 | unsigned long bank_offset; | 155 | unsigned long bank_offset; |
@@ -166,10 +163,6 @@ static int xgene_gpio_resume(struct device *dev) | |||
166 | } | 163 | } |
167 | 164 | ||
168 | static SIMPLE_DEV_PM_OPS(xgene_gpio_pm, xgene_gpio_suspend, xgene_gpio_resume); | 165 | static SIMPLE_DEV_PM_OPS(xgene_gpio_pm, xgene_gpio_suspend, xgene_gpio_resume); |
169 | #define XGENE_GPIO_PM_OPS (&xgene_gpio_pm) | ||
170 | #else | ||
171 | #define XGENE_GPIO_PM_OPS NULL | ||
172 | #endif | ||
173 | 166 | ||
174 | static int xgene_gpio_probe(struct platform_device *pdev) | 167 | static int xgene_gpio_probe(struct platform_device *pdev) |
175 | { | 168 | { |
@@ -241,7 +234,7 @@ static struct platform_driver xgene_gpio_driver = { | |||
241 | .name = "xgene-gpio", | 234 | .name = "xgene-gpio", |
242 | .of_match_table = xgene_gpio_of_match, | 235 | .of_match_table = xgene_gpio_of_match, |
243 | .acpi_match_table = ACPI_PTR(xgene_gpio_acpi_match), | 236 | .acpi_match_table = ACPI_PTR(xgene_gpio_acpi_match), |
244 | .pm = XGENE_GPIO_PM_OPS, | 237 | .pm = &xgene_gpio_pm, |
245 | }, | 238 | }, |
246 | .probe = xgene_gpio_probe, | 239 | .probe = xgene_gpio_probe, |
247 | }; | 240 | }; |
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 1aeb80e52424..8c54cb8f5d6d 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig | |||
@@ -175,11 +175,11 @@ config HID_CHERRY | |||
175 | Support for Cherry Cymotion keyboard. | 175 | Support for Cherry Cymotion keyboard. |
176 | 176 | ||
177 | config HID_CHICONY | 177 | config HID_CHICONY |
178 | tristate "Chicony Tactical pad" | 178 | tristate "Chicony devices" |
179 | depends on HID | 179 | depends on HID |
180 | default !EXPERT | 180 | default !EXPERT |
181 | ---help--- | 181 | ---help--- |
182 | Support for Chicony Tactical pad. | 182 | Support for Chicony Tactical pad and special keys on Chicony keyboards. |
183 | 183 | ||
184 | config HID_CORSAIR | 184 | config HID_CORSAIR |
185 | tristate "Corsair devices" | 185 | tristate "Corsair devices" |
@@ -190,6 +190,7 @@ config HID_CORSAIR | |||
190 | 190 | ||
191 | Supported devices: | 191 | Supported devices: |
192 | - Vengeance K90 | 192 | - Vengeance K90 |
193 | - Scimitar PRO RGB | ||
193 | 194 | ||
194 | config HID_PRODIKEYS | 195 | config HID_PRODIKEYS |
195 | tristate "Prodikeys PC-MIDI Keyboard support" | 196 | tristate "Prodikeys PC-MIDI Keyboard support" |
diff --git a/drivers/hid/hid-chicony.c b/drivers/hid/hid-chicony.c index bc3cec199fee..f04ed9aabc3f 100644 --- a/drivers/hid/hid-chicony.c +++ b/drivers/hid/hid-chicony.c | |||
@@ -86,6 +86,7 @@ static const struct hid_device_id ch_devices[] = { | |||
86 | { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) }, | 86 | { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) }, |
87 | { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) }, | 87 | { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) }, |
88 | { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) }, | 88 | { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) }, |
89 | { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) }, | ||
89 | { } | 90 | { } |
90 | }; | 91 | }; |
91 | MODULE_DEVICE_TABLE(hid, ch_devices); | 92 | MODULE_DEVICE_TABLE(hid, ch_devices); |
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index e9e87d337446..3ceb4a2af381 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c | |||
@@ -1870,6 +1870,7 @@ static const struct hid_device_id hid_have_special_driver[] = { | |||
1870 | { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) }, | 1870 | { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) }, |
1871 | { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) }, | 1871 | { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) }, |
1872 | { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90) }, | 1872 | { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90) }, |
1873 | { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) }, | ||
1873 | { HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) }, | 1874 | { HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) }, |
1874 | { HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_CP2112) }, | 1875 | { HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_CP2112) }, |
1875 | { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1) }, | 1876 | { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1) }, |
@@ -1910,6 +1911,7 @@ static const struct hid_device_id hid_have_special_driver[] = { | |||
1910 | { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081) }, | 1911 | { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081) }, |
1911 | { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A0C2) }, | 1912 | { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A0C2) }, |
1912 | { HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) }, | 1913 | { HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) }, |
1914 | { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) }, | ||
1913 | { HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) }, | 1915 | { HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) }, |
1914 | { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ION, USB_DEVICE_ID_ICADE) }, | 1916 | { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ION, USB_DEVICE_ID_ICADE) }, |
1915 | { HID_USB_DEVICE(USB_VENDOR_ID_KENSINGTON, USB_DEVICE_ID_KS_SLIMBLADE) }, | 1917 | { HID_USB_DEVICE(USB_VENDOR_ID_KENSINGTON, USB_DEVICE_ID_KS_SLIMBLADE) }, |
diff --git a/drivers/hid/hid-corsair.c b/drivers/hid/hid-corsair.c index c0303f61c26a..9ba5d98a1180 100644 --- a/drivers/hid/hid-corsair.c +++ b/drivers/hid/hid-corsair.c | |||
@@ -3,8 +3,10 @@ | |||
3 | * | 3 | * |
4 | * Supported devices: | 4 | * Supported devices: |
5 | * - Vengeance K90 Keyboard | 5 | * - Vengeance K90 Keyboard |
6 | * - Scimitar PRO RGB Gaming Mouse | ||
6 | * | 7 | * |
7 | * Copyright (c) 2015 Clement Vuchener | 8 | * Copyright (c) 2015 Clement Vuchener |
9 | * Copyright (c) 2017 Oscar Campos | ||
8 | */ | 10 | */ |
9 | 11 | ||
10 | /* | 12 | /* |
@@ -670,10 +672,51 @@ static int corsair_input_mapping(struct hid_device *dev, | |||
670 | return 0; | 672 | return 0; |
671 | } | 673 | } |
672 | 674 | ||
675 | /* | ||
676 | * The report descriptor of Corsair Scimitar RGB Pro gaming mouse is | ||
677 | * non parseable as they define two consecutive Logical Minimum for | ||
678 | * the Usage Page (Consumer) in rdescs bytes 75 and 77 being 77 0x16 | ||
679 | * that should be obviousy 0x26 for Logical Magimum of 16 bits. This | ||
680 | * prevents poper parsing of the report descriptor due Logical | ||
681 | * Minimum being larger than Logical Maximum. | ||
682 | * | ||
683 | * This driver fixes the report descriptor for: | ||
684 | * - USB ID b1c:1b3e, sold as Scimitar RGB Pro Gaming mouse | ||
685 | */ | ||
686 | |||
687 | static __u8 *corsair_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc, | ||
688 | unsigned int *rsize) | ||
689 | { | ||
690 | struct usb_interface *intf = to_usb_interface(hdev->dev.parent); | ||
691 | |||
692 | if (intf->cur_altsetting->desc.bInterfaceNumber == 1) { | ||
693 | /* | ||
694 | * Corsair Scimitar RGB Pro report descriptor is broken and | ||
695 | * defines two different Logical Minimum for the Consumer | ||
696 | * Application. The byte 77 should be a 0x26 defining a 16 | ||
697 | * bits integer for the Logical Maximum but it is a 0x16 | ||
698 | * instead (Logical Minimum) | ||
699 | */ | ||
700 | switch (hdev->product) { | ||
701 | case USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB: | ||
702 | if (*rsize >= 172 && rdesc[75] == 0x15 && rdesc[77] == 0x16 | ||
703 | && rdesc[78] == 0xff && rdesc[79] == 0x0f) { | ||
704 | hid_info(hdev, "Fixing up report descriptor\n"); | ||
705 | rdesc[77] = 0x26; | ||
706 | } | ||
707 | break; | ||
708 | } | ||
709 | |||
710 | } | ||
711 | return rdesc; | ||
712 | } | ||
713 | |||
673 | static const struct hid_device_id corsair_devices[] = { | 714 | static const struct hid_device_id corsair_devices[] = { |
674 | { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90), | 715 | { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90), |
675 | .driver_data = CORSAIR_USE_K90_MACRO | | 716 | .driver_data = CORSAIR_USE_K90_MACRO | |
676 | CORSAIR_USE_K90_BACKLIGHT }, | 717 | CORSAIR_USE_K90_BACKLIGHT }, |
718 | { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, | ||
719 | USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) }, | ||
677 | {} | 720 | {} |
678 | }; | 721 | }; |
679 | 722 | ||
@@ -686,10 +729,14 @@ static struct hid_driver corsair_driver = { | |||
686 | .event = corsair_event, | 729 | .event = corsair_event, |
687 | .remove = corsair_remove, | 730 | .remove = corsair_remove, |
688 | .input_mapping = corsair_input_mapping, | 731 | .input_mapping = corsair_input_mapping, |
732 | .report_fixup = corsair_mouse_report_fixup, | ||
689 | }; | 733 | }; |
690 | 734 | ||
691 | module_hid_driver(corsair_driver); | 735 | module_hid_driver(corsair_driver); |
692 | 736 | ||
693 | MODULE_LICENSE("GPL"); | 737 | MODULE_LICENSE("GPL"); |
738 | /* Original K90 driver author */ | ||
694 | MODULE_AUTHOR("Clement Vuchener"); | 739 | MODULE_AUTHOR("Clement Vuchener"); |
740 | /* Scimitar PRO RGB driver author */ | ||
741 | MODULE_AUTHOR("Oscar Campos"); | ||
695 | MODULE_DESCRIPTION("HID driver for Corsair devices"); | 742 | MODULE_DESCRIPTION("HID driver for Corsair devices"); |
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 86c95d30ac80..0e2e7c571d22 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h | |||
@@ -278,6 +278,9 @@ | |||
278 | #define USB_DEVICE_ID_CORSAIR_K70RGB 0x1b13 | 278 | #define USB_DEVICE_ID_CORSAIR_K70RGB 0x1b13 |
279 | #define USB_DEVICE_ID_CORSAIR_STRAFE 0x1b15 | 279 | #define USB_DEVICE_ID_CORSAIR_STRAFE 0x1b15 |
280 | #define USB_DEVICE_ID_CORSAIR_K65RGB 0x1b17 | 280 | #define USB_DEVICE_ID_CORSAIR_K65RGB 0x1b17 |
281 | #define USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE 0x1b38 | ||
282 | #define USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE 0x1b39 | ||
283 | #define USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB 0x1b3e | ||
281 | 284 | ||
282 | #define USB_VENDOR_ID_CREATIVELABS 0x041e | 285 | #define USB_VENDOR_ID_CREATIVELABS 0x041e |
283 | #define USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51 0x322c | 286 | #define USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51 0x322c |
@@ -557,6 +560,7 @@ | |||
557 | 560 | ||
558 | #define USB_VENDOR_ID_JESS 0x0c45 | 561 | #define USB_VENDOR_ID_JESS 0x0c45 |
559 | #define USB_DEVICE_ID_JESS_YUREX 0x1010 | 562 | #define USB_DEVICE_ID_JESS_YUREX 0x1010 |
563 | #define USB_DEVICE_ID_JESS_ZEN_AIO_KBD 0x5112 | ||
560 | 564 | ||
561 | #define USB_VENDOR_ID_JESS2 0x0f30 | 565 | #define USB_VENDOR_ID_JESS2 0x0f30 |
562 | #define USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD 0x0111 | 566 | #define USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD 0x0111 |
diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index f405b07d0381..740996f9bdd4 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c | |||
@@ -2632,6 +2632,8 @@ err_stop: | |||
2632 | sony_leds_remove(sc); | 2632 | sony_leds_remove(sc); |
2633 | if (sc->quirks & SONY_BATTERY_SUPPORT) | 2633 | if (sc->quirks & SONY_BATTERY_SUPPORT) |
2634 | sony_battery_remove(sc); | 2634 | sony_battery_remove(sc); |
2635 | if (sc->touchpad) | ||
2636 | sony_unregister_touchpad(sc); | ||
2635 | sony_cancel_work_sync(sc); | 2637 | sony_cancel_work_sync(sc); |
2636 | kfree(sc->output_report_dmabuf); | 2638 | kfree(sc->output_report_dmabuf); |
2637 | sony_remove_dev_list(sc); | 2639 | sony_remove_dev_list(sc); |
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index d6847a664446..a69a3c88ab29 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c | |||
@@ -80,6 +80,9 @@ static const struct hid_blacklist { | |||
80 | { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB, HID_QUIRK_NO_INIT_REPORTS }, | 80 | { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB, HID_QUIRK_NO_INIT_REPORTS }, |
81 | { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB, HID_QUIRK_NO_INIT_REPORTS }, | 81 | { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB, HID_QUIRK_NO_INIT_REPORTS }, |
82 | { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_STRAFE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, | 82 | { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_STRAFE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, |
83 | { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, | ||
84 | { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, | ||
85 | { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, | ||
83 | { USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51, HID_QUIRK_NOGET }, | 86 | { USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51, HID_QUIRK_NOGET }, |
84 | { USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET }, | 87 | { USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET }, |
85 | { USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU, HID_QUIRK_MULTI_INPUT }, | 88 | { USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU, HID_QUIRK_MULTI_INPUT }, |
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index be8f7e2a026f..994bddc55b82 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c | |||
@@ -2579,7 +2579,9 @@ static void wacom_remove(struct hid_device *hdev) | |||
2579 | 2579 | ||
2580 | /* make sure we don't trigger the LEDs */ | 2580 | /* make sure we don't trigger the LEDs */ |
2581 | wacom_led_groups_release(wacom); | 2581 | wacom_led_groups_release(wacom); |
2582 | wacom_release_resources(wacom); | 2582 | |
2583 | if (wacom->wacom_wac.features.type != REMOTE) | ||
2584 | wacom_release_resources(wacom); | ||
2583 | 2585 | ||
2584 | hid_set_drvdata(hdev, NULL); | 2586 | hid_set_drvdata(hdev, NULL); |
2585 | } | 2587 | } |
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 4aa3de9f1163..94250c293be2 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c | |||
@@ -1959,8 +1959,10 @@ static void wacom_wac_pen_usage_mapping(struct hid_device *hdev, | |||
1959 | input_set_capability(input, EV_KEY, BTN_TOOL_BRUSH); | 1959 | input_set_capability(input, EV_KEY, BTN_TOOL_BRUSH); |
1960 | input_set_capability(input, EV_KEY, BTN_TOOL_PENCIL); | 1960 | input_set_capability(input, EV_KEY, BTN_TOOL_PENCIL); |
1961 | input_set_capability(input, EV_KEY, BTN_TOOL_AIRBRUSH); | 1961 | input_set_capability(input, EV_KEY, BTN_TOOL_AIRBRUSH); |
1962 | input_set_capability(input, EV_KEY, BTN_TOOL_MOUSE); | 1962 | if (!(features->device_type & WACOM_DEVICETYPE_DIRECT)) { |
1963 | input_set_capability(input, EV_KEY, BTN_TOOL_LENS); | 1963 | input_set_capability(input, EV_KEY, BTN_TOOL_MOUSE); |
1964 | input_set_capability(input, EV_KEY, BTN_TOOL_LENS); | ||
1965 | } | ||
1964 | break; | 1966 | break; |
1965 | case WACOM_HID_WD_FINGERWHEEL: | 1967 | case WACOM_HID_WD_FINGERWHEEL: |
1966 | wacom_map_usage(input, usage, field, EV_ABS, ABS_WHEEL, 0); | 1968 | wacom_map_usage(input, usage, field, EV_ABS, ABS_WHEEL, 0); |
@@ -4197,10 +4199,10 @@ static const struct wacom_features wacom_features_0x343 = | |||
4197 | WACOM_DTU_OFFSET, WACOM_DTU_OFFSET }; | 4199 | WACOM_DTU_OFFSET, WACOM_DTU_OFFSET }; |
4198 | static const struct wacom_features wacom_features_0x360 = | 4200 | static const struct wacom_features wacom_features_0x360 = |
4199 | { "Wacom Intuos Pro M", 44800, 29600, 8191, 63, | 4201 | { "Wacom Intuos Pro M", 44800, 29600, 8191, 63, |
4200 | INTUOSP2_BT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 9, .touch_max = 10 }; | 4202 | INTUOSP2_BT, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 9, .touch_max = 10 }; |
4201 | static const struct wacom_features wacom_features_0x361 = | 4203 | static const struct wacom_features wacom_features_0x361 = |
4202 | { "Wacom Intuos Pro L", 62200, 43200, 8191, 63, | 4204 | { "Wacom Intuos Pro L", 62200, 43200, 8191, 63, |
4203 | INTUOSP2_BT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 9, .touch_max = 10 }; | 4205 | INTUOSP2_BT, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 9, .touch_max = 10 }; |
4204 | 4206 | ||
4205 | static const struct wacom_features wacom_features_HID_ANY_ID = | 4207 | static const struct wacom_features wacom_features_HID_ANY_ID = |
4206 | { "Wacom HID", .type = HID_GENERIC, .oVid = HID_ANY_ID, .oPid = HID_ANY_ID }; | 4208 | { "Wacom HID", .type = HID_GENERIC, .oVid = HID_ANY_ID, .oPid = HID_ANY_ID }; |
diff --git a/drivers/ptp/ptp_kvm.c b/drivers/ptp/ptp_kvm.c index 09b4df74291e..bb865695d7a6 100644 --- a/drivers/ptp/ptp_kvm.c +++ b/drivers/ptp/ptp_kvm.c | |||
@@ -193,10 +193,7 @@ static int __init ptp_kvm_init(void) | |||
193 | 193 | ||
194 | kvm_ptp_clock.ptp_clock = ptp_clock_register(&kvm_ptp_clock.caps, NULL); | 194 | kvm_ptp_clock.ptp_clock = ptp_clock_register(&kvm_ptp_clock.caps, NULL); |
195 | 195 | ||
196 | if (IS_ERR(kvm_ptp_clock.ptp_clock)) | 196 | return PTR_ERR_OR_ZERO(kvm_ptp_clock.ptp_clock); |
197 | return PTR_ERR(kvm_ptp_clock.ptp_clock); | ||
198 | |||
199 | return 0; | ||
200 | } | 197 | } |
201 | 198 | ||
202 | module_init(ptp_kvm_init); | 199 | module_init(ptp_kvm_init); |
diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig index 65f86bc24c07..1dc43fc5f65f 100644 --- a/drivers/remoteproc/Kconfig +++ b/drivers/remoteproc/Kconfig | |||
@@ -76,7 +76,7 @@ config QCOM_ADSP_PIL | |||
76 | depends on OF && ARCH_QCOM | 76 | depends on OF && ARCH_QCOM |
77 | depends on REMOTEPROC | 77 | depends on REMOTEPROC |
78 | depends on QCOM_SMEM | 78 | depends on QCOM_SMEM |
79 | depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n) | 79 | depends on RPMSG_QCOM_SMD || QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n && RPMSG_QCOM_SMD=n) |
80 | select MFD_SYSCON | 80 | select MFD_SYSCON |
81 | select QCOM_MDT_LOADER | 81 | select QCOM_MDT_LOADER |
82 | select QCOM_RPROC_COMMON | 82 | select QCOM_RPROC_COMMON |
@@ -93,7 +93,7 @@ config QCOM_Q6V5_PIL | |||
93 | depends on OF && ARCH_QCOM | 93 | depends on OF && ARCH_QCOM |
94 | depends on QCOM_SMEM | 94 | depends on QCOM_SMEM |
95 | depends on REMOTEPROC | 95 | depends on REMOTEPROC |
96 | depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n) | 96 | depends on RPMSG_QCOM_SMD || QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n && RPMSG_QCOM_SMD=n) |
97 | select MFD_SYSCON | 97 | select MFD_SYSCON |
98 | select QCOM_RPROC_COMMON | 98 | select QCOM_RPROC_COMMON |
99 | select QCOM_SCM | 99 | select QCOM_SCM |
@@ -104,7 +104,7 @@ config QCOM_Q6V5_PIL | |||
104 | config QCOM_WCNSS_PIL | 104 | config QCOM_WCNSS_PIL |
105 | tristate "Qualcomm WCNSS Peripheral Image Loader" | 105 | tristate "Qualcomm WCNSS Peripheral Image Loader" |
106 | depends on OF && ARCH_QCOM | 106 | depends on OF && ARCH_QCOM |
107 | depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n) | 107 | depends on RPMSG_QCOM_SMD || QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n && RPMSG_QCOM_SMD=n) |
108 | depends on QCOM_SMEM | 108 | depends on QCOM_SMEM |
109 | depends on REMOTEPROC | 109 | depends on REMOTEPROC |
110 | select QCOM_MDT_LOADER | 110 | select QCOM_MDT_LOADER |
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 519ec1787117..efd84d1d178b 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c | |||
@@ -40,7 +40,8 @@ struct read_info_sccb { | |||
40 | u8 fac85; /* 85 */ | 40 | u8 fac85; /* 85 */ |
41 | u8 _pad_86[91 - 86]; /* 86-90 */ | 41 | u8 _pad_86[91 - 86]; /* 86-90 */ |
42 | u8 flags; /* 91 */ | 42 | u8 flags; /* 91 */ |
43 | u8 _pad_92[99 - 92]; /* 92-98 */ | 43 | u8 _pad_92[98 - 92]; /* 92-97 */ |
44 | u8 fac98; /* 98 */ | ||
44 | u8 hamaxpow; /* 99 */ | 45 | u8 hamaxpow; /* 99 */ |
45 | u32 rnsize2; /* 100-103 */ | 46 | u32 rnsize2; /* 100-103 */ |
46 | u64 rnmax2; /* 104-111 */ | 47 | u64 rnmax2; /* 104-111 */ |
@@ -99,6 +100,7 @@ static void __init sclp_early_facilities_detect(struct read_info_sccb *sccb) | |||
99 | sclp.has_pfmfi = !!(sccb->fac117 & 0x40); | 100 | sclp.has_pfmfi = !!(sccb->fac117 & 0x40); |
100 | sclp.has_ibs = !!(sccb->fac117 & 0x20); | 101 | sclp.has_ibs = !!(sccb->fac117 & 0x20); |
101 | sclp.has_hvs = !!(sccb->fac119 & 0x80); | 102 | sclp.has_hvs = !!(sccb->fac119 & 0x80); |
103 | sclp.has_kss = !!(sccb->fac98 & 0x01); | ||
102 | if (sccb->fac85 & 0x02) | 104 | if (sccb->fac85 & 0x02) |
103 | S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP; | 105 | S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP; |
104 | sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2; | 106 | sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2; |
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 4bf55b5d78be..3c52867dfe28 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig | |||
@@ -1253,20 +1253,6 @@ config SCSI_LPFC_DEBUG_FS | |||
1253 | This makes debugging information from the lpfc driver | 1253 | This makes debugging information from the lpfc driver |
1254 | available via the debugfs filesystem. | 1254 | available via the debugfs filesystem. |
1255 | 1255 | ||
1256 | config LPFC_NVME_INITIATOR | ||
1257 | bool "Emulex LightPulse Fibre Channel NVME Initiator Support" | ||
1258 | depends on SCSI_LPFC && NVME_FC | ||
1259 | ---help--- | ||
1260 | This enables NVME Initiator support in the Emulex lpfc driver. | ||
1261 | |||
1262 | config LPFC_NVME_TARGET | ||
1263 | bool "Emulex LightPulse Fibre Channel NVME Initiator Support" | ||
1264 | depends on SCSI_LPFC && NVME_TARGET_FC | ||
1265 | ---help--- | ||
1266 | This enables NVME Target support in the Emulex lpfc driver. | ||
1267 | Target enablement must still be enabled on a per adapter | ||
1268 | basis by module parameters. | ||
1269 | |||
1270 | config SCSI_SIM710 | 1256 | config SCSI_SIM710 |
1271 | tristate "Simple 53c710 SCSI support (Compaq, NCR machines)" | 1257 | tristate "Simple 53c710 SCSI support (Compaq, NCR machines)" |
1272 | depends on (EISA || MCA) && SCSI | 1258 | depends on (EISA || MCA) && SCSI |
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 524a0c755ed7..0d0be7754a65 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c | |||
@@ -2956,7 +2956,7 @@ static int hpsa_send_reset(struct ctlr_info *h, unsigned char *scsi3addr, | |||
2956 | /* fill_cmd can't fail here, no data buffer to map. */ | 2956 | /* fill_cmd can't fail here, no data buffer to map. */ |
2957 | (void) fill_cmd(c, reset_type, h, NULL, 0, 0, | 2957 | (void) fill_cmd(c, reset_type, h, NULL, 0, 0, |
2958 | scsi3addr, TYPE_MSG); | 2958 | scsi3addr, TYPE_MSG); |
2959 | rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, DEFAULT_TIMEOUT); | 2959 | rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, NO_TIMEOUT); |
2960 | if (rc) { | 2960 | if (rc) { |
2961 | dev_warn(&h->pdev->dev, "Failed to send reset command\n"); | 2961 | dev_warn(&h->pdev->dev, "Failed to send reset command\n"); |
2962 | goto out; | 2962 | goto out; |
@@ -3714,7 +3714,7 @@ exit_failed: | |||
3714 | * # (integer code indicating one of several NOT READY states | 3714 | * # (integer code indicating one of several NOT READY states |
3715 | * describing why a volume is to be kept offline) | 3715 | * describing why a volume is to be kept offline) |
3716 | */ | 3716 | */ |
3717 | static int hpsa_volume_offline(struct ctlr_info *h, | 3717 | static unsigned char hpsa_volume_offline(struct ctlr_info *h, |
3718 | unsigned char scsi3addr[]) | 3718 | unsigned char scsi3addr[]) |
3719 | { | 3719 | { |
3720 | struct CommandList *c; | 3720 | struct CommandList *c; |
@@ -3735,7 +3735,7 @@ static int hpsa_volume_offline(struct ctlr_info *h, | |||
3735 | DEFAULT_TIMEOUT); | 3735 | DEFAULT_TIMEOUT); |
3736 | if (rc) { | 3736 | if (rc) { |
3737 | cmd_free(h, c); | 3737 | cmd_free(h, c); |
3738 | return 0; | 3738 | return HPSA_VPD_LV_STATUS_UNSUPPORTED; |
3739 | } | 3739 | } |
3740 | sense = c->err_info->SenseInfo; | 3740 | sense = c->err_info->SenseInfo; |
3741 | if (c->err_info->SenseLen > sizeof(c->err_info->SenseInfo)) | 3741 | if (c->err_info->SenseLen > sizeof(c->err_info->SenseInfo)) |
@@ -3746,19 +3746,13 @@ static int hpsa_volume_offline(struct ctlr_info *h, | |||
3746 | cmd_status = c->err_info->CommandStatus; | 3746 | cmd_status = c->err_info->CommandStatus; |
3747 | scsi_status = c->err_info->ScsiStatus; | 3747 | scsi_status = c->err_info->ScsiStatus; |
3748 | cmd_free(h, c); | 3748 | cmd_free(h, c); |
3749 | /* Is the volume 'not ready'? */ | ||
3750 | if (cmd_status != CMD_TARGET_STATUS || | ||
3751 | scsi_status != SAM_STAT_CHECK_CONDITION || | ||
3752 | sense_key != NOT_READY || | ||
3753 | asc != ASC_LUN_NOT_READY) { | ||
3754 | return 0; | ||
3755 | } | ||
3756 | 3749 | ||
3757 | /* Determine the reason for not ready state */ | 3750 | /* Determine the reason for not ready state */ |
3758 | ldstat = hpsa_get_volume_status(h, scsi3addr); | 3751 | ldstat = hpsa_get_volume_status(h, scsi3addr); |
3759 | 3752 | ||
3760 | /* Keep volume offline in certain cases: */ | 3753 | /* Keep volume offline in certain cases: */ |
3761 | switch (ldstat) { | 3754 | switch (ldstat) { |
3755 | case HPSA_LV_FAILED: | ||
3762 | case HPSA_LV_UNDERGOING_ERASE: | 3756 | case HPSA_LV_UNDERGOING_ERASE: |
3763 | case HPSA_LV_NOT_AVAILABLE: | 3757 | case HPSA_LV_NOT_AVAILABLE: |
3764 | case HPSA_LV_UNDERGOING_RPI: | 3758 | case HPSA_LV_UNDERGOING_RPI: |
@@ -3780,7 +3774,7 @@ static int hpsa_volume_offline(struct ctlr_info *h, | |||
3780 | default: | 3774 | default: |
3781 | break; | 3775 | break; |
3782 | } | 3776 | } |
3783 | return 0; | 3777 | return HPSA_LV_OK; |
3784 | } | 3778 | } |
3785 | 3779 | ||
3786 | /* | 3780 | /* |
@@ -3853,10 +3847,10 @@ static int hpsa_update_device_info(struct ctlr_info *h, | |||
3853 | /* Do an inquiry to the device to see what it is. */ | 3847 | /* Do an inquiry to the device to see what it is. */ |
3854 | if (hpsa_scsi_do_inquiry(h, scsi3addr, 0, inq_buff, | 3848 | if (hpsa_scsi_do_inquiry(h, scsi3addr, 0, inq_buff, |
3855 | (unsigned char) OBDR_TAPE_INQ_SIZE) != 0) { | 3849 | (unsigned char) OBDR_TAPE_INQ_SIZE) != 0) { |
3856 | /* Inquiry failed (msg printed already) */ | ||
3857 | dev_err(&h->pdev->dev, | 3850 | dev_err(&h->pdev->dev, |
3858 | "hpsa_update_device_info: inquiry failed\n"); | 3851 | "%s: inquiry failed, device will be skipped.\n", |
3859 | rc = -EIO; | 3852 | __func__); |
3853 | rc = HPSA_INQUIRY_FAILED; | ||
3860 | goto bail_out; | 3854 | goto bail_out; |
3861 | } | 3855 | } |
3862 | 3856 | ||
@@ -3885,15 +3879,19 @@ static int hpsa_update_device_info(struct ctlr_info *h, | |||
3885 | if ((this_device->devtype == TYPE_DISK || | 3879 | if ((this_device->devtype == TYPE_DISK || |
3886 | this_device->devtype == TYPE_ZBC) && | 3880 | this_device->devtype == TYPE_ZBC) && |
3887 | is_logical_dev_addr_mode(scsi3addr)) { | 3881 | is_logical_dev_addr_mode(scsi3addr)) { |
3888 | int volume_offline; | 3882 | unsigned char volume_offline; |
3889 | 3883 | ||
3890 | hpsa_get_raid_level(h, scsi3addr, &this_device->raid_level); | 3884 | hpsa_get_raid_level(h, scsi3addr, &this_device->raid_level); |
3891 | if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC) | 3885 | if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC) |
3892 | hpsa_get_ioaccel_status(h, scsi3addr, this_device); | 3886 | hpsa_get_ioaccel_status(h, scsi3addr, this_device); |
3893 | volume_offline = hpsa_volume_offline(h, scsi3addr); | 3887 | volume_offline = hpsa_volume_offline(h, scsi3addr); |
3894 | if (volume_offline < 0 || volume_offline > 0xff) | 3888 | if (volume_offline == HPSA_LV_FAILED) { |
3895 | volume_offline = HPSA_VPD_LV_STATUS_UNSUPPORTED; | 3889 | rc = HPSA_LV_FAILED; |
3896 | this_device->volume_offline = volume_offline & 0xff; | 3890 | dev_err(&h->pdev->dev, |
3891 | "%s: LV failed, device will be skipped.\n", | ||
3892 | __func__); | ||
3893 | goto bail_out; | ||
3894 | } | ||
3897 | } else { | 3895 | } else { |
3898 | this_device->raid_level = RAID_UNKNOWN; | 3896 | this_device->raid_level = RAID_UNKNOWN; |
3899 | this_device->offload_config = 0; | 3897 | this_device->offload_config = 0; |
@@ -4379,8 +4377,7 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h) | |||
4379 | goto out; | 4377 | goto out; |
4380 | } | 4378 | } |
4381 | if (rc) { | 4379 | if (rc) { |
4382 | dev_warn(&h->pdev->dev, | 4380 | h->drv_req_rescan = 1; |
4383 | "Inquiry failed, skipping device.\n"); | ||
4384 | continue; | 4381 | continue; |
4385 | } | 4382 | } |
4386 | 4383 | ||
@@ -5558,7 +5555,7 @@ static void hpsa_scan_complete(struct ctlr_info *h) | |||
5558 | 5555 | ||
5559 | spin_lock_irqsave(&h->scan_lock, flags); | 5556 | spin_lock_irqsave(&h->scan_lock, flags); |
5560 | h->scan_finished = 1; | 5557 | h->scan_finished = 1; |
5561 | wake_up_all(&h->scan_wait_queue); | 5558 | wake_up(&h->scan_wait_queue); |
5562 | spin_unlock_irqrestore(&h->scan_lock, flags); | 5559 | spin_unlock_irqrestore(&h->scan_lock, flags); |
5563 | } | 5560 | } |
5564 | 5561 | ||
@@ -5576,11 +5573,23 @@ static void hpsa_scan_start(struct Scsi_Host *sh) | |||
5576 | if (unlikely(lockup_detected(h))) | 5573 | if (unlikely(lockup_detected(h))) |
5577 | return hpsa_scan_complete(h); | 5574 | return hpsa_scan_complete(h); |
5578 | 5575 | ||
5576 | /* | ||
5577 | * If a scan is already waiting to run, no need to add another | ||
5578 | */ | ||
5579 | spin_lock_irqsave(&h->scan_lock, flags); | ||
5580 | if (h->scan_waiting) { | ||
5581 | spin_unlock_irqrestore(&h->scan_lock, flags); | ||
5582 | return; | ||
5583 | } | ||
5584 | |||
5585 | spin_unlock_irqrestore(&h->scan_lock, flags); | ||
5586 | |||
5579 | /* wait until any scan already in progress is finished. */ | 5587 | /* wait until any scan already in progress is finished. */ |
5580 | while (1) { | 5588 | while (1) { |
5581 | spin_lock_irqsave(&h->scan_lock, flags); | 5589 | spin_lock_irqsave(&h->scan_lock, flags); |
5582 | if (h->scan_finished) | 5590 | if (h->scan_finished) |
5583 | break; | 5591 | break; |
5592 | h->scan_waiting = 1; | ||
5584 | spin_unlock_irqrestore(&h->scan_lock, flags); | 5593 | spin_unlock_irqrestore(&h->scan_lock, flags); |
5585 | wait_event(h->scan_wait_queue, h->scan_finished); | 5594 | wait_event(h->scan_wait_queue, h->scan_finished); |
5586 | /* Note: We don't need to worry about a race between this | 5595 | /* Note: We don't need to worry about a race between this |
@@ -5590,6 +5599,7 @@ static void hpsa_scan_start(struct Scsi_Host *sh) | |||
5590 | */ | 5599 | */ |
5591 | } | 5600 | } |
5592 | h->scan_finished = 0; /* mark scan as in progress */ | 5601 | h->scan_finished = 0; /* mark scan as in progress */ |
5602 | h->scan_waiting = 0; | ||
5593 | spin_unlock_irqrestore(&h->scan_lock, flags); | 5603 | spin_unlock_irqrestore(&h->scan_lock, flags); |
5594 | 5604 | ||
5595 | if (unlikely(lockup_detected(h))) | 5605 | if (unlikely(lockup_detected(h))) |
@@ -8792,6 +8802,7 @@ reinit_after_soft_reset: | |||
8792 | init_waitqueue_head(&h->event_sync_wait_queue); | 8802 | init_waitqueue_head(&h->event_sync_wait_queue); |
8793 | mutex_init(&h->reset_mutex); | 8803 | mutex_init(&h->reset_mutex); |
8794 | h->scan_finished = 1; /* no scan currently in progress */ | 8804 | h->scan_finished = 1; /* no scan currently in progress */ |
8805 | h->scan_waiting = 0; | ||
8795 | 8806 | ||
8796 | pci_set_drvdata(pdev, h); | 8807 | pci_set_drvdata(pdev, h); |
8797 | h->ndevices = 0; | 8808 | h->ndevices = 0; |
diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h index bf6cdc106654..6f04f2ad4125 100644 --- a/drivers/scsi/hpsa.h +++ b/drivers/scsi/hpsa.h | |||
@@ -201,6 +201,7 @@ struct ctlr_info { | |||
201 | dma_addr_t errinfo_pool_dhandle; | 201 | dma_addr_t errinfo_pool_dhandle; |
202 | unsigned long *cmd_pool_bits; | 202 | unsigned long *cmd_pool_bits; |
203 | int scan_finished; | 203 | int scan_finished; |
204 | u8 scan_waiting : 1; | ||
204 | spinlock_t scan_lock; | 205 | spinlock_t scan_lock; |
205 | wait_queue_head_t scan_wait_queue; | 206 | wait_queue_head_t scan_wait_queue; |
206 | 207 | ||
diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h index a584cdf07058..5961705eef76 100644 --- a/drivers/scsi/hpsa_cmd.h +++ b/drivers/scsi/hpsa_cmd.h | |||
@@ -156,6 +156,7 @@ | |||
156 | #define CFGTBL_BusType_Fibre2G 0x00000200l | 156 | #define CFGTBL_BusType_Fibre2G 0x00000200l |
157 | 157 | ||
158 | /* VPD Inquiry types */ | 158 | /* VPD Inquiry types */ |
159 | #define HPSA_INQUIRY_FAILED 0x02 | ||
159 | #define HPSA_VPD_SUPPORTED_PAGES 0x00 | 160 | #define HPSA_VPD_SUPPORTED_PAGES 0x00 |
160 | #define HPSA_VPD_LV_DEVICE_ID 0x83 | 161 | #define HPSA_VPD_LV_DEVICE_ID 0x83 |
161 | #define HPSA_VPD_LV_DEVICE_GEOMETRY 0xC1 | 162 | #define HPSA_VPD_LV_DEVICE_GEOMETRY 0xC1 |
@@ -166,6 +167,7 @@ | |||
166 | /* Logical volume states */ | 167 | /* Logical volume states */ |
167 | #define HPSA_VPD_LV_STATUS_UNSUPPORTED 0xff | 168 | #define HPSA_VPD_LV_STATUS_UNSUPPORTED 0xff |
168 | #define HPSA_LV_OK 0x0 | 169 | #define HPSA_LV_OK 0x0 |
170 | #define HPSA_LV_FAILED 0x01 | ||
169 | #define HPSA_LV_NOT_AVAILABLE 0x0b | 171 | #define HPSA_LV_NOT_AVAILABLE 0x0b |
170 | #define HPSA_LV_UNDERGOING_ERASE 0x0F | 172 | #define HPSA_LV_UNDERGOING_ERASE 0x0F |
171 | #define HPSA_LV_UNDERGOING_RPI 0x12 | 173 | #define HPSA_LV_UNDERGOING_RPI 0x12 |
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 5c3be3e6f5e2..22819afbaef5 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c | |||
@@ -3315,9 +3315,9 @@ LPFC_ATTR_R(nvmet_mrq_post, LPFC_DEF_MRQ_POST, | |||
3315 | * lpfc_enable_fc4_type: Defines what FC4 types are supported. | 3315 | * lpfc_enable_fc4_type: Defines what FC4 types are supported. |
3316 | * Supported Values: 1 - register just FCP | 3316 | * Supported Values: 1 - register just FCP |
3317 | * 3 - register both FCP and NVME | 3317 | * 3 - register both FCP and NVME |
3318 | * Supported values are [1,3]. Default value is 3 | 3318 | * Supported values are [1,3]. Default value is 1 |
3319 | */ | 3319 | */ |
3320 | LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_BOTH, | 3320 | LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_FCP, |
3321 | LPFC_ENABLE_FCP, LPFC_ENABLE_BOTH, | 3321 | LPFC_ENABLE_FCP, LPFC_ENABLE_BOTH, |
3322 | "Define fc4 type to register with fabric."); | 3322 | "Define fc4 type to register with fabric."); |
3323 | 3323 | ||
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 2697d49da4d7..6cc561b04211 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c | |||
@@ -5891,10 +5891,17 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) | |||
5891 | /* Check to see if it matches any module parameter */ | 5891 | /* Check to see if it matches any module parameter */ |
5892 | for (i = 0; i < lpfc_enable_nvmet_cnt; i++) { | 5892 | for (i = 0; i < lpfc_enable_nvmet_cnt; i++) { |
5893 | if (wwn == lpfc_enable_nvmet[i]) { | 5893 | if (wwn == lpfc_enable_nvmet[i]) { |
5894 | #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) | ||
5894 | lpfc_printf_log(phba, KERN_ERR, LOG_INIT, | 5895 | lpfc_printf_log(phba, KERN_ERR, LOG_INIT, |
5895 | "6017 NVME Target %016llx\n", | 5896 | "6017 NVME Target %016llx\n", |
5896 | wwn); | 5897 | wwn); |
5897 | phba->nvmet_support = 1; /* a match */ | 5898 | phba->nvmet_support = 1; /* a match */ |
5899 | #else | ||
5900 | lpfc_printf_log(phba, KERN_ERR, LOG_INIT, | ||
5901 | "6021 Can't enable NVME Target." | ||
5902 | " NVME_TARGET_FC infrastructure" | ||
5903 | " is not in kernel\n"); | ||
5904 | #endif | ||
5898 | } | 5905 | } |
5899 | } | 5906 | } |
5900 | } | 5907 | } |
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 0a4c19081409..0024de1c6c1f 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c | |||
@@ -2149,7 +2149,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport) | |||
2149 | /* localport is allocated from the stack, but the registration | 2149 | /* localport is allocated from the stack, but the registration |
2150 | * call allocates heap memory as well as the private area. | 2150 | * call allocates heap memory as well as the private area. |
2151 | */ | 2151 | */ |
2152 | #ifdef CONFIG_LPFC_NVME_INITIATOR | 2152 | #if (IS_ENABLED(CONFIG_NVME_FC)) |
2153 | ret = nvme_fc_register_localport(&nfcp_info, &lpfc_nvme_template, | 2153 | ret = nvme_fc_register_localport(&nfcp_info, &lpfc_nvme_template, |
2154 | &vport->phba->pcidev->dev, &localport); | 2154 | &vport->phba->pcidev->dev, &localport); |
2155 | #else | 2155 | #else |
@@ -2190,7 +2190,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport) | |||
2190 | void | 2190 | void |
2191 | lpfc_nvme_destroy_localport(struct lpfc_vport *vport) | 2191 | lpfc_nvme_destroy_localport(struct lpfc_vport *vport) |
2192 | { | 2192 | { |
2193 | #ifdef CONFIG_LPFC_NVME_INITIATOR | 2193 | #if (IS_ENABLED(CONFIG_NVME_FC)) |
2194 | struct nvme_fc_local_port *localport; | 2194 | struct nvme_fc_local_port *localport; |
2195 | struct lpfc_nvme_lport *lport; | 2195 | struct lpfc_nvme_lport *lport; |
2196 | struct lpfc_nvme_rport *rport = NULL, *rport_next = NULL; | 2196 | struct lpfc_nvme_rport *rport = NULL, *rport_next = NULL; |
@@ -2274,7 +2274,7 @@ lpfc_nvme_update_localport(struct lpfc_vport *vport) | |||
2274 | int | 2274 | int |
2275 | lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) | 2275 | lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) |
2276 | { | 2276 | { |
2277 | #ifdef CONFIG_LPFC_NVME_INITIATOR | 2277 | #if (IS_ENABLED(CONFIG_NVME_FC)) |
2278 | int ret = 0; | 2278 | int ret = 0; |
2279 | struct nvme_fc_local_port *localport; | 2279 | struct nvme_fc_local_port *localport; |
2280 | struct lpfc_nvme_lport *lport; | 2280 | struct lpfc_nvme_lport *lport; |
@@ -2403,7 +2403,7 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) | |||
2403 | void | 2403 | void |
2404 | lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) | 2404 | lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) |
2405 | { | 2405 | { |
2406 | #ifdef CONFIG_LPFC_NVME_INITIATOR | 2406 | #if (IS_ENABLED(CONFIG_NVME_FC)) |
2407 | int ret; | 2407 | int ret; |
2408 | struct nvme_fc_local_port *localport; | 2408 | struct nvme_fc_local_port *localport; |
2409 | struct lpfc_nvme_lport *lport; | 2409 | struct lpfc_nvme_lport *lport; |
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index b7739a554fe0..7ca868f394da 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c | |||
@@ -671,7 +671,7 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba) | |||
671 | lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP | | 671 | lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP | |
672 | NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED; | 672 | NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED; |
673 | 673 | ||
674 | #ifdef CONFIG_LPFC_NVME_TARGET | 674 | #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) |
675 | error = nvmet_fc_register_targetport(&pinfo, &lpfc_tgttemplate, | 675 | error = nvmet_fc_register_targetport(&pinfo, &lpfc_tgttemplate, |
676 | &phba->pcidev->dev, | 676 | &phba->pcidev->dev, |
677 | &phba->targetport); | 677 | &phba->targetport); |
@@ -756,7 +756,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba, | |||
756 | void | 756 | void |
757 | lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba) | 757 | lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba) |
758 | { | 758 | { |
759 | #ifdef CONFIG_LPFC_NVME_TARGET | 759 | #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) |
760 | struct lpfc_nvmet_tgtport *tgtp; | 760 | struct lpfc_nvmet_tgtport *tgtp; |
761 | 761 | ||
762 | if (phba->nvmet_support == 0) | 762 | if (phba->nvmet_support == 0) |
@@ -788,7 +788,7 @@ static void | |||
788 | lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, | 788 | lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, |
789 | struct hbq_dmabuf *nvmebuf) | 789 | struct hbq_dmabuf *nvmebuf) |
790 | { | 790 | { |
791 | #ifdef CONFIG_LPFC_NVME_TARGET | 791 | #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) |
792 | struct lpfc_nvmet_tgtport *tgtp; | 792 | struct lpfc_nvmet_tgtport *tgtp; |
793 | struct fc_frame_header *fc_hdr; | 793 | struct fc_frame_header *fc_hdr; |
794 | struct lpfc_nvmet_rcv_ctx *ctxp; | 794 | struct lpfc_nvmet_rcv_ctx *ctxp; |
@@ -891,7 +891,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, | |||
891 | struct rqb_dmabuf *nvmebuf, | 891 | struct rqb_dmabuf *nvmebuf, |
892 | uint64_t isr_timestamp) | 892 | uint64_t isr_timestamp) |
893 | { | 893 | { |
894 | #ifdef CONFIG_LPFC_NVME_TARGET | 894 | #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) |
895 | struct lpfc_nvmet_rcv_ctx *ctxp; | 895 | struct lpfc_nvmet_rcv_ctx *ctxp; |
896 | struct lpfc_nvmet_tgtport *tgtp; | 896 | struct lpfc_nvmet_tgtport *tgtp; |
897 | struct fc_frame_header *fc_hdr; | 897 | struct fc_frame_header *fc_hdr; |
diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index e7e5974e1a2c..2b209bbb4c91 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h | |||
@@ -35,8 +35,8 @@ | |||
35 | /* | 35 | /* |
36 | * MegaRAID SAS Driver meta data | 36 | * MegaRAID SAS Driver meta data |
37 | */ | 37 | */ |
38 | #define MEGASAS_VERSION "07.701.16.00-rc1" | 38 | #define MEGASAS_VERSION "07.701.17.00-rc1" |
39 | #define MEGASAS_RELDATE "February 2, 2017" | 39 | #define MEGASAS_RELDATE "March 2, 2017" |
40 | 40 | ||
41 | /* | 41 | /* |
42 | * Device IDs | 42 | * Device IDs |
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 7ac9a9ee9bd4..0016f12cc563 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c | |||
@@ -1963,6 +1963,9 @@ scan_target: | |||
1963 | if (!mr_device_priv_data) | 1963 | if (!mr_device_priv_data) |
1964 | return -ENOMEM; | 1964 | return -ENOMEM; |
1965 | sdev->hostdata = mr_device_priv_data; | 1965 | sdev->hostdata = mr_device_priv_data; |
1966 | |||
1967 | atomic_set(&mr_device_priv_data->r1_ldio_hint, | ||
1968 | instance->r1_ldio_hint_default); | ||
1966 | return 0; | 1969 | return 0; |
1967 | } | 1970 | } |
1968 | 1971 | ||
@@ -5034,10 +5037,12 @@ megasas_setup_irqs_msix(struct megasas_instance *instance, u8 is_probe) | |||
5034 | &instance->irq_context[j]); | 5037 | &instance->irq_context[j]); |
5035 | /* Retry irq register for IO_APIC*/ | 5038 | /* Retry irq register for IO_APIC*/ |
5036 | instance->msix_vectors = 0; | 5039 | instance->msix_vectors = 0; |
5037 | if (is_probe) | 5040 | if (is_probe) { |
5041 | pci_free_irq_vectors(instance->pdev); | ||
5038 | return megasas_setup_irqs_ioapic(instance); | 5042 | return megasas_setup_irqs_ioapic(instance); |
5039 | else | 5043 | } else { |
5040 | return -1; | 5044 | return -1; |
5045 | } | ||
5041 | } | 5046 | } |
5042 | } | 5047 | } |
5043 | return 0; | 5048 | return 0; |
@@ -5277,9 +5282,11 @@ static int megasas_init_fw(struct megasas_instance *instance) | |||
5277 | MPI2_REPLY_POST_HOST_INDEX_OFFSET); | 5282 | MPI2_REPLY_POST_HOST_INDEX_OFFSET); |
5278 | } | 5283 | } |
5279 | 5284 | ||
5280 | i = pci_alloc_irq_vectors(instance->pdev, 1, 1, PCI_IRQ_LEGACY); | 5285 | if (!instance->msix_vectors) { |
5281 | if (i < 0) | 5286 | i = pci_alloc_irq_vectors(instance->pdev, 1, 1, PCI_IRQ_LEGACY); |
5282 | goto fail_setup_irqs; | 5287 | if (i < 0) |
5288 | goto fail_setup_irqs; | ||
5289 | } | ||
5283 | 5290 | ||
5284 | dev_info(&instance->pdev->dev, | 5291 | dev_info(&instance->pdev->dev, |
5285 | "firmware supports msix\t: (%d)", fw_msix_count); | 5292 | "firmware supports msix\t: (%d)", fw_msix_count); |
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 29650ba669da..f990ab4d45e1 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c | |||
@@ -2159,7 +2159,7 @@ megasas_set_raidflag_cpu_affinity(union RAID_CONTEXT_UNION *praid_context, | |||
2159 | cpu_sel = MR_RAID_CTX_CPUSEL_1; | 2159 | cpu_sel = MR_RAID_CTX_CPUSEL_1; |
2160 | 2160 | ||
2161 | if (is_stream_detected(rctx_g35) && | 2161 | if (is_stream_detected(rctx_g35) && |
2162 | (raid->level == 5) && | 2162 | ((raid->level == 5) || (raid->level == 6)) && |
2163 | (raid->writeMode == MR_RL_WRITE_THROUGH_MODE) && | 2163 | (raid->writeMode == MR_RL_WRITE_THROUGH_MODE) && |
2164 | (cpu_sel == MR_RAID_CTX_CPUSEL_FCFS)) | 2164 | (cpu_sel == MR_RAID_CTX_CPUSEL_FCFS)) |
2165 | cpu_sel = MR_RAID_CTX_CPUSEL_0; | 2165 | cpu_sel = MR_RAID_CTX_CPUSEL_0; |
@@ -2338,7 +2338,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, | |||
2338 | fp_possible = false; | 2338 | fp_possible = false; |
2339 | atomic_dec(&instance->fw_outstanding); | 2339 | atomic_dec(&instance->fw_outstanding); |
2340 | } else if ((scsi_buff_len > MR_LARGE_IO_MIN_SIZE) || | 2340 | } else if ((scsi_buff_len > MR_LARGE_IO_MIN_SIZE) || |
2341 | atomic_dec_if_positive(&mrdev_priv->r1_ldio_hint)) { | 2341 | (atomic_dec_if_positive(&mrdev_priv->r1_ldio_hint) > 0)) { |
2342 | fp_possible = false; | 2342 | fp_possible = false; |
2343 | atomic_dec(&instance->fw_outstanding); | 2343 | atomic_dec(&instance->fw_outstanding); |
2344 | if (scsi_buff_len > MR_LARGE_IO_MIN_SIZE) | 2344 | if (scsi_buff_len > MR_LARGE_IO_MIN_SIZE) |
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 1359913bf840..e8c26e6e6237 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c | |||
@@ -7642,7 +7642,7 @@ static inline ssize_t ufshcd_pm_lvl_store(struct device *dev, | |||
7642 | if (kstrtoul(buf, 0, &value)) | 7642 | if (kstrtoul(buf, 0, &value)) |
7643 | return -EINVAL; | 7643 | return -EINVAL; |
7644 | 7644 | ||
7645 | if ((value < UFS_PM_LVL_0) || (value >= UFS_PM_LVL_MAX)) | 7645 | if (value >= UFS_PM_LVL_MAX) |
7646 | return -EINVAL; | 7646 | return -EINVAL; |
7647 | 7647 | ||
7648 | spin_lock_irqsave(hba->host->host_lock, flags); | 7648 | spin_lock_irqsave(hba->host->host_lock, flags); |
diff --git a/drivers/tty/serial/st-asc.c b/drivers/tty/serial/st-asc.c index bcf1d33e6ffe..c334bcc59c64 100644 --- a/drivers/tty/serial/st-asc.c +++ b/drivers/tty/serial/st-asc.c | |||
@@ -575,12 +575,13 @@ static void asc_set_termios(struct uart_port *port, struct ktermios *termios, | |||
575 | pinctrl_select_state(ascport->pinctrl, | 575 | pinctrl_select_state(ascport->pinctrl, |
576 | ascport->states[NO_HW_FLOWCTRL]); | 576 | ascport->states[NO_HW_FLOWCTRL]); |
577 | 577 | ||
578 | gpiod = devm_get_gpiod_from_child(port->dev, "rts", | 578 | gpiod = devm_fwnode_get_gpiod_from_child(port->dev, |
579 | &np->fwnode); | 579 | "rts", |
580 | if (!IS_ERR(gpiod)) { | 580 | &np->fwnode, |
581 | gpiod_direction_output(gpiod, 0); | 581 | GPIOD_OUT_LOW, |
582 | np->name); | ||
583 | if (!IS_ERR(gpiod)) | ||
582 | ascport->rts = gpiod; | 584 | ascport->rts = gpiod; |
583 | } | ||
584 | } | 585 | } |
585 | } | 586 | } |
586 | 587 | ||
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index cf3de91fbfe7..8031d3a55a17 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c | |||
@@ -680,7 +680,7 @@ static void tce_iommu_free_table(struct tce_container *container, | |||
680 | unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT; | 680 | unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT; |
681 | 681 | ||
682 | tce_iommu_userspace_view_free(tbl, container->mm); | 682 | tce_iommu_userspace_view_free(tbl, container->mm); |
683 | tbl->it_ops->free(tbl); | 683 | iommu_tce_table_put(tbl); |
684 | decrement_locked_vm(container->mm, pages); | 684 | decrement_locked_vm(container->mm, pages); |
685 | } | 685 | } |
686 | 686 | ||
@@ -1320,6 +1320,7 @@ void setup_new_exec(struct linux_binprm * bprm) | |||
1320 | else | 1320 | else |
1321 | set_dumpable(current->mm, suid_dumpable); | 1321 | set_dumpable(current->mm, suid_dumpable); |
1322 | 1322 | ||
1323 | arch_setup_new_exec(); | ||
1323 | perf_event_exec(); | 1324 | perf_event_exec(); |
1324 | __set_task_comm(current, kbasename(bprm->filename), true); | 1325 | __set_task_comm(current, kbasename(bprm->filename), true); |
1325 | 1326 | ||
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index a77df377e2e8..ee2d0a485fc3 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c | |||
@@ -196,6 +196,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) | |||
196 | si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS); | 196 | si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS); |
197 | si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE; | 197 | si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE; |
198 | si->base_mem += NM_I(sbi)->nat_blocks / 8; | 198 | si->base_mem += NM_I(sbi)->nat_blocks / 8; |
199 | si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short); | ||
199 | 200 | ||
200 | get_cache: | 201 | get_cache: |
201 | si->cache_mem = 0; | 202 | si->cache_mem = 0; |
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 4650c9b85de7..8d5c62b07b28 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c | |||
@@ -750,7 +750,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, | |||
750 | dentry_blk = page_address(page); | 750 | dentry_blk = page_address(page); |
751 | bit_pos = dentry - dentry_blk->dentry; | 751 | bit_pos = dentry - dentry_blk->dentry; |
752 | for (i = 0; i < slots; i++) | 752 | for (i = 0; i < slots; i++) |
753 | clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); | 753 | __clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); |
754 | 754 | ||
755 | /* Let's check and deallocate this dentry page */ | 755 | /* Let's check and deallocate this dentry page */ |
756 | bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, | 756 | bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, |
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e849f83d6114..0a6e115562f6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h | |||
@@ -561,6 +561,8 @@ struct f2fs_nm_info { | |||
561 | struct mutex build_lock; /* lock for build free nids */ | 561 | struct mutex build_lock; /* lock for build free nids */ |
562 | unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE]; | 562 | unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE]; |
563 | unsigned char *nat_block_bitmap; | 563 | unsigned char *nat_block_bitmap; |
564 | unsigned short *free_nid_count; /* free nid count of NAT block */ | ||
565 | spinlock_t free_nid_lock; /* protect updating of nid count */ | ||
564 | 566 | ||
565 | /* for checkpoint */ | 567 | /* for checkpoint */ |
566 | char *nat_bitmap; /* NAT bitmap pointer */ | 568 | char *nat_bitmap; /* NAT bitmap pointer */ |
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 94967171dee8..481aa8dc79f4 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c | |||
@@ -338,9 +338,6 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, | |||
338 | set_nat_flag(e, IS_CHECKPOINTED, false); | 338 | set_nat_flag(e, IS_CHECKPOINTED, false); |
339 | __set_nat_cache_dirty(nm_i, e); | 339 | __set_nat_cache_dirty(nm_i, e); |
340 | 340 | ||
341 | if (enabled_nat_bits(sbi, NULL) && new_blkaddr == NEW_ADDR) | ||
342 | clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits); | ||
343 | |||
344 | /* update fsync_mark if its inode nat entry is still alive */ | 341 | /* update fsync_mark if its inode nat entry is still alive */ |
345 | if (ni->nid != ni->ino) | 342 | if (ni->nid != ni->ino) |
346 | e = __lookup_nat_cache(nm_i, ni->ino); | 343 | e = __lookup_nat_cache(nm_i, ni->ino); |
@@ -1823,7 +1820,8 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) | |||
1823 | kmem_cache_free(free_nid_slab, i); | 1820 | kmem_cache_free(free_nid_slab, i); |
1824 | } | 1821 | } |
1825 | 1822 | ||
1826 | void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set) | 1823 | static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, |
1824 | bool set, bool build, bool locked) | ||
1827 | { | 1825 | { |
1828 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 1826 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
1829 | unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid); | 1827 | unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid); |
@@ -1833,9 +1831,18 @@ void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set) | |||
1833 | return; | 1831 | return; |
1834 | 1832 | ||
1835 | if (set) | 1833 | if (set) |
1836 | set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); | 1834 | __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); |
1837 | else | 1835 | else |
1838 | clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); | 1836 | __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); |
1837 | |||
1838 | if (!locked) | ||
1839 | spin_lock(&nm_i->free_nid_lock); | ||
1840 | if (set) | ||
1841 | nm_i->free_nid_count[nat_ofs]++; | ||
1842 | else if (!build) | ||
1843 | nm_i->free_nid_count[nat_ofs]--; | ||
1844 | if (!locked) | ||
1845 | spin_unlock(&nm_i->free_nid_lock); | ||
1839 | } | 1846 | } |
1840 | 1847 | ||
1841 | static void scan_nat_page(struct f2fs_sb_info *sbi, | 1848 | static void scan_nat_page(struct f2fs_sb_info *sbi, |
@@ -1847,7 +1854,10 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, | |||
1847 | unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid); | 1854 | unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid); |
1848 | int i; | 1855 | int i; |
1849 | 1856 | ||
1850 | set_bit_le(nat_ofs, nm_i->nat_block_bitmap); | 1857 | if (test_bit_le(nat_ofs, nm_i->nat_block_bitmap)) |
1858 | return; | ||
1859 | |||
1860 | __set_bit_le(nat_ofs, nm_i->nat_block_bitmap); | ||
1851 | 1861 | ||
1852 | i = start_nid % NAT_ENTRY_PER_BLOCK; | 1862 | i = start_nid % NAT_ENTRY_PER_BLOCK; |
1853 | 1863 | ||
@@ -1861,7 +1871,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, | |||
1861 | f2fs_bug_on(sbi, blk_addr == NEW_ADDR); | 1871 | f2fs_bug_on(sbi, blk_addr == NEW_ADDR); |
1862 | if (blk_addr == NULL_ADDR) | 1872 | if (blk_addr == NULL_ADDR) |
1863 | freed = add_free_nid(sbi, start_nid, true); | 1873 | freed = add_free_nid(sbi, start_nid, true); |
1864 | update_free_nid_bitmap(sbi, start_nid, freed); | 1874 | update_free_nid_bitmap(sbi, start_nid, freed, true, false); |
1865 | } | 1875 | } |
1866 | } | 1876 | } |
1867 | 1877 | ||
@@ -1877,6 +1887,8 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi) | |||
1877 | for (i = 0; i < nm_i->nat_blocks; i++) { | 1887 | for (i = 0; i < nm_i->nat_blocks; i++) { |
1878 | if (!test_bit_le(i, nm_i->nat_block_bitmap)) | 1888 | if (!test_bit_le(i, nm_i->nat_block_bitmap)) |
1879 | continue; | 1889 | continue; |
1890 | if (!nm_i->free_nid_count[i]) | ||
1891 | continue; | ||
1880 | for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) { | 1892 | for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) { |
1881 | nid_t nid; | 1893 | nid_t nid; |
1882 | 1894 | ||
@@ -1907,58 +1919,6 @@ out: | |||
1907 | up_read(&nm_i->nat_tree_lock); | 1919 | up_read(&nm_i->nat_tree_lock); |
1908 | } | 1920 | } |
1909 | 1921 | ||
1910 | static int scan_nat_bits(struct f2fs_sb_info *sbi) | ||
1911 | { | ||
1912 | struct f2fs_nm_info *nm_i = NM_I(sbi); | ||
1913 | struct page *page; | ||
1914 | unsigned int i = 0; | ||
1915 | nid_t nid; | ||
1916 | |||
1917 | if (!enabled_nat_bits(sbi, NULL)) | ||
1918 | return -EAGAIN; | ||
1919 | |||
1920 | down_read(&nm_i->nat_tree_lock); | ||
1921 | check_empty: | ||
1922 | i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i); | ||
1923 | if (i >= nm_i->nat_blocks) { | ||
1924 | i = 0; | ||
1925 | goto check_partial; | ||
1926 | } | ||
1927 | |||
1928 | for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK; | ||
1929 | nid++) { | ||
1930 | if (unlikely(nid >= nm_i->max_nid)) | ||
1931 | break; | ||
1932 | add_free_nid(sbi, nid, true); | ||
1933 | } | ||
1934 | |||
1935 | if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS) | ||
1936 | goto out; | ||
1937 | i++; | ||
1938 | goto check_empty; | ||
1939 | |||
1940 | check_partial: | ||
1941 | i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i); | ||
1942 | if (i >= nm_i->nat_blocks) { | ||
1943 | disable_nat_bits(sbi, true); | ||
1944 | up_read(&nm_i->nat_tree_lock); | ||
1945 | return -EINVAL; | ||
1946 | } | ||
1947 | |||
1948 | nid = i * NAT_ENTRY_PER_BLOCK; | ||
1949 | page = get_current_nat_page(sbi, nid); | ||
1950 | scan_nat_page(sbi, page, nid); | ||
1951 | f2fs_put_page(page, 1); | ||
1952 | |||
1953 | if (nm_i->nid_cnt[FREE_NID_LIST] < MAX_FREE_NIDS) { | ||
1954 | i++; | ||
1955 | goto check_partial; | ||
1956 | } | ||
1957 | out: | ||
1958 | up_read(&nm_i->nat_tree_lock); | ||
1959 | return 0; | ||
1960 | } | ||
1961 | |||
1962 | static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) | 1922 | static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) |
1963 | { | 1923 | { |
1964 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 1924 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
@@ -1980,21 +1940,6 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) | |||
1980 | 1940 | ||
1981 | if (nm_i->nid_cnt[FREE_NID_LIST]) | 1941 | if (nm_i->nid_cnt[FREE_NID_LIST]) |
1982 | return; | 1942 | return; |
1983 | |||
1984 | /* try to find free nids with nat_bits */ | ||
1985 | if (!scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST]) | ||
1986 | return; | ||
1987 | } | ||
1988 | |||
1989 | /* find next valid candidate */ | ||
1990 | if (enabled_nat_bits(sbi, NULL)) { | ||
1991 | int idx = find_next_zero_bit_le(nm_i->full_nat_bits, | ||
1992 | nm_i->nat_blocks, 0); | ||
1993 | |||
1994 | if (idx >= nm_i->nat_blocks) | ||
1995 | set_sbi_flag(sbi, SBI_NEED_FSCK); | ||
1996 | else | ||
1997 | nid = idx * NAT_ENTRY_PER_BLOCK; | ||
1998 | } | 1943 | } |
1999 | 1944 | ||
2000 | /* readahead nat pages to be scanned */ | 1945 | /* readahead nat pages to be scanned */ |
@@ -2081,7 +2026,7 @@ retry: | |||
2081 | __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false); | 2026 | __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false); |
2082 | nm_i->available_nids--; | 2027 | nm_i->available_nids--; |
2083 | 2028 | ||
2084 | update_free_nid_bitmap(sbi, *nid, false); | 2029 | update_free_nid_bitmap(sbi, *nid, false, false, false); |
2085 | 2030 | ||
2086 | spin_unlock(&nm_i->nid_list_lock); | 2031 | spin_unlock(&nm_i->nid_list_lock); |
2087 | return true; | 2032 | return true; |
@@ -2137,7 +2082,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) | |||
2137 | 2082 | ||
2138 | nm_i->available_nids++; | 2083 | nm_i->available_nids++; |
2139 | 2084 | ||
2140 | update_free_nid_bitmap(sbi, nid, true); | 2085 | update_free_nid_bitmap(sbi, nid, true, false, false); |
2141 | 2086 | ||
2142 | spin_unlock(&nm_i->nid_list_lock); | 2087 | spin_unlock(&nm_i->nid_list_lock); |
2143 | 2088 | ||
@@ -2383,7 +2328,7 @@ add_out: | |||
2383 | list_add_tail(&nes->set_list, head); | 2328 | list_add_tail(&nes->set_list, head); |
2384 | } | 2329 | } |
2385 | 2330 | ||
2386 | void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, | 2331 | static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, |
2387 | struct page *page) | 2332 | struct page *page) |
2388 | { | 2333 | { |
2389 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 2334 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
@@ -2402,16 +2347,16 @@ void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, | |||
2402 | valid++; | 2347 | valid++; |
2403 | } | 2348 | } |
2404 | if (valid == 0) { | 2349 | if (valid == 0) { |
2405 | set_bit_le(nat_index, nm_i->empty_nat_bits); | 2350 | __set_bit_le(nat_index, nm_i->empty_nat_bits); |
2406 | clear_bit_le(nat_index, nm_i->full_nat_bits); | 2351 | __clear_bit_le(nat_index, nm_i->full_nat_bits); |
2407 | return; | 2352 | return; |
2408 | } | 2353 | } |
2409 | 2354 | ||
2410 | clear_bit_le(nat_index, nm_i->empty_nat_bits); | 2355 | __clear_bit_le(nat_index, nm_i->empty_nat_bits); |
2411 | if (valid == NAT_ENTRY_PER_BLOCK) | 2356 | if (valid == NAT_ENTRY_PER_BLOCK) |
2412 | set_bit_le(nat_index, nm_i->full_nat_bits); | 2357 | __set_bit_le(nat_index, nm_i->full_nat_bits); |
2413 | else | 2358 | else |
2414 | clear_bit_le(nat_index, nm_i->full_nat_bits); | 2359 | __clear_bit_le(nat_index, nm_i->full_nat_bits); |
2415 | } | 2360 | } |
2416 | 2361 | ||
2417 | static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, | 2362 | static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, |
@@ -2467,11 +2412,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, | |||
2467 | add_free_nid(sbi, nid, false); | 2412 | add_free_nid(sbi, nid, false); |
2468 | spin_lock(&NM_I(sbi)->nid_list_lock); | 2413 | spin_lock(&NM_I(sbi)->nid_list_lock); |
2469 | NM_I(sbi)->available_nids++; | 2414 | NM_I(sbi)->available_nids++; |
2470 | update_free_nid_bitmap(sbi, nid, true); | 2415 | update_free_nid_bitmap(sbi, nid, true, false, false); |
2471 | spin_unlock(&NM_I(sbi)->nid_list_lock); | 2416 | spin_unlock(&NM_I(sbi)->nid_list_lock); |
2472 | } else { | 2417 | } else { |
2473 | spin_lock(&NM_I(sbi)->nid_list_lock); | 2418 | spin_lock(&NM_I(sbi)->nid_list_lock); |
2474 | update_free_nid_bitmap(sbi, nid, false); | 2419 | update_free_nid_bitmap(sbi, nid, false, false, false); |
2475 | spin_unlock(&NM_I(sbi)->nid_list_lock); | 2420 | spin_unlock(&NM_I(sbi)->nid_list_lock); |
2476 | } | 2421 | } |
2477 | } | 2422 | } |
@@ -2577,6 +2522,40 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) | |||
2577 | return 0; | 2522 | return 0; |
2578 | } | 2523 | } |
2579 | 2524 | ||
2525 | inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi) | ||
2526 | { | ||
2527 | struct f2fs_nm_info *nm_i = NM_I(sbi); | ||
2528 | unsigned int i = 0; | ||
2529 | nid_t nid, last_nid; | ||
2530 | |||
2531 | if (!enabled_nat_bits(sbi, NULL)) | ||
2532 | return; | ||
2533 | |||
2534 | for (i = 0; i < nm_i->nat_blocks; i++) { | ||
2535 | i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i); | ||
2536 | if (i >= nm_i->nat_blocks) | ||
2537 | break; | ||
2538 | |||
2539 | __set_bit_le(i, nm_i->nat_block_bitmap); | ||
2540 | |||
2541 | nid = i * NAT_ENTRY_PER_BLOCK; | ||
2542 | last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK; | ||
2543 | |||
2544 | spin_lock(&nm_i->free_nid_lock); | ||
2545 | for (; nid < last_nid; nid++) | ||
2546 | update_free_nid_bitmap(sbi, nid, true, true, true); | ||
2547 | spin_unlock(&nm_i->free_nid_lock); | ||
2548 | } | ||
2549 | |||
2550 | for (i = 0; i < nm_i->nat_blocks; i++) { | ||
2551 | i = find_next_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i); | ||
2552 | if (i >= nm_i->nat_blocks) | ||
2553 | break; | ||
2554 | |||
2555 | __set_bit_le(i, nm_i->nat_block_bitmap); | ||
2556 | } | ||
2557 | } | ||
2558 | |||
2580 | static int init_node_manager(struct f2fs_sb_info *sbi) | 2559 | static int init_node_manager(struct f2fs_sb_info *sbi) |
2581 | { | 2560 | { |
2582 | struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi); | 2561 | struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi); |
@@ -2638,7 +2617,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) | |||
2638 | return 0; | 2617 | return 0; |
2639 | } | 2618 | } |
2640 | 2619 | ||
2641 | int init_free_nid_cache(struct f2fs_sb_info *sbi) | 2620 | static int init_free_nid_cache(struct f2fs_sb_info *sbi) |
2642 | { | 2621 | { |
2643 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 2622 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
2644 | 2623 | ||
@@ -2651,6 +2630,14 @@ int init_free_nid_cache(struct f2fs_sb_info *sbi) | |||
2651 | GFP_KERNEL); | 2630 | GFP_KERNEL); |
2652 | if (!nm_i->nat_block_bitmap) | 2631 | if (!nm_i->nat_block_bitmap) |
2653 | return -ENOMEM; | 2632 | return -ENOMEM; |
2633 | |||
2634 | nm_i->free_nid_count = f2fs_kvzalloc(nm_i->nat_blocks * | ||
2635 | sizeof(unsigned short), GFP_KERNEL); | ||
2636 | if (!nm_i->free_nid_count) | ||
2637 | return -ENOMEM; | ||
2638 | |||
2639 | spin_lock_init(&nm_i->free_nid_lock); | ||
2640 | |||
2654 | return 0; | 2641 | return 0; |
2655 | } | 2642 | } |
2656 | 2643 | ||
@@ -2670,6 +2657,9 @@ int build_node_manager(struct f2fs_sb_info *sbi) | |||
2670 | if (err) | 2657 | if (err) |
2671 | return err; | 2658 | return err; |
2672 | 2659 | ||
2660 | /* load free nid status from nat_bits table */ | ||
2661 | load_free_nid_bitmap(sbi); | ||
2662 | |||
2673 | build_free_nids(sbi, true, true); | 2663 | build_free_nids(sbi, true, true); |
2674 | return 0; | 2664 | return 0; |
2675 | } | 2665 | } |
@@ -2730,6 +2720,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) | |||
2730 | 2720 | ||
2731 | kvfree(nm_i->nat_block_bitmap); | 2721 | kvfree(nm_i->nat_block_bitmap); |
2732 | kvfree(nm_i->free_nid_bitmap); | 2722 | kvfree(nm_i->free_nid_bitmap); |
2723 | kvfree(nm_i->free_nid_count); | ||
2733 | 2724 | ||
2734 | kfree(nm_i->nat_bitmap); | 2725 | kfree(nm_i->nat_bitmap); |
2735 | kfree(nm_i->nat_bits); | 2726 | kfree(nm_i->nat_bits); |
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4bd7a8b19332..29ef7088c558 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c | |||
@@ -1163,6 +1163,12 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) | |||
1163 | if (f2fs_discard_en(sbi) && | 1163 | if (f2fs_discard_en(sbi) && |
1164 | !f2fs_test_and_set_bit(offset, se->discard_map)) | 1164 | !f2fs_test_and_set_bit(offset, se->discard_map)) |
1165 | sbi->discard_blks--; | 1165 | sbi->discard_blks--; |
1166 | |||
1167 | /* don't overwrite by SSR to keep node chain */ | ||
1168 | if (se->type == CURSEG_WARM_NODE) { | ||
1169 | if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) | ||
1170 | se->ckpt_valid_blocks++; | ||
1171 | } | ||
1166 | } else { | 1172 | } else { |
1167 | if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) { | 1173 | if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) { |
1168 | #ifdef CONFIG_F2FS_CHECK_FS | 1174 | #ifdef CONFIG_F2FS_CHECK_FS |
diff --git a/include/linux/compat.h b/include/linux/compat.h index aef47be2a5c1..af9dbc44fd92 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h | |||
@@ -723,6 +723,8 @@ asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, | |||
723 | asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32, | 723 | asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32, |
724 | int, const char __user *); | 724 | int, const char __user *); |
725 | 725 | ||
726 | asmlinkage long compat_sys_arch_prctl(int option, unsigned long arg2); | ||
727 | |||
726 | /* | 728 | /* |
727 | * For most but not all architectures, "am I in a compat syscall?" and | 729 | * For most but not all architectures, "am I in a compat syscall?" and |
728 | * "am I a compat task?" are the same question. For architectures on which | 730 | * "am I a compat task?" are the same question. For architectures on which |
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 2484b2fcc6eb..933d93656605 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h | |||
@@ -143,15 +143,6 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev, | |||
143 | struct fwnode_handle *child, | 143 | struct fwnode_handle *child, |
144 | enum gpiod_flags flags, | 144 | enum gpiod_flags flags, |
145 | const char *label); | 145 | const char *label); |
146 | /* FIXME: delete this helper when users are switched over */ | ||
147 | static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev, | ||
148 | const char *con_id, struct fwnode_handle *child) | ||
149 | { | ||
150 | return devm_fwnode_get_index_gpiod_from_child(dev, con_id, | ||
151 | 0, child, | ||
152 | GPIOD_ASIS, | ||
153 | "?"); | ||
154 | } | ||
155 | 146 | ||
156 | #else /* CONFIG_GPIOLIB */ | 147 | #else /* CONFIG_GPIOLIB */ |
157 | 148 | ||
@@ -444,13 +435,6 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev, | |||
444 | return ERR_PTR(-ENOSYS); | 435 | return ERR_PTR(-ENOSYS); |
445 | } | 436 | } |
446 | 437 | ||
447 | /* FIXME: delete this when all users are switched over */ | ||
448 | static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev, | ||
449 | const char *con_id, struct fwnode_handle *child) | ||
450 | { | ||
451 | return ERR_PTR(-ENOSYS); | ||
452 | } | ||
453 | |||
454 | #endif /* CONFIG_GPIOLIB */ | 438 | #endif /* CONFIG_GPIOLIB */ |
455 | 439 | ||
456 | static inline | 440 | static inline |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d1a6e554ee68..9de1d3ca83b2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -162,8 +162,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, | |||
162 | int len, void *val); | 162 | int len, void *val); |
163 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | 163 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
164 | int len, struct kvm_io_device *dev); | 164 | int len, struct kvm_io_device *dev); |
165 | int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, | 165 | void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
166 | struct kvm_io_device *dev); | 166 | struct kvm_io_device *dev); |
167 | struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, | 167 | struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
168 | gpa_t addr); | 168 | gpa_t addr); |
169 | 169 | ||
@@ -403,7 +403,7 @@ struct kvm { | |||
403 | struct kvm_vm_stat stat; | 403 | struct kvm_vm_stat stat; |
404 | struct kvm_arch arch; | 404 | struct kvm_arch arch; |
405 | refcount_t users_count; | 405 | refcount_t users_count; |
406 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 406 | #ifdef CONFIG_KVM_MMIO |
407 | struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; | 407 | struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; |
408 | spinlock_t ring_lock; | 408 | spinlock_t ring_lock; |
409 | struct list_head coalesced_zones; | 409 | struct list_head coalesced_zones; |
@@ -502,10 +502,10 @@ int __must_check vcpu_load(struct kvm_vcpu *vcpu); | |||
502 | void vcpu_put(struct kvm_vcpu *vcpu); | 502 | void vcpu_put(struct kvm_vcpu *vcpu); |
503 | 503 | ||
504 | #ifdef __KVM_HAVE_IOAPIC | 504 | #ifdef __KVM_HAVE_IOAPIC |
505 | void kvm_vcpu_request_scan_ioapic(struct kvm *kvm); | 505 | void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm); |
506 | void kvm_arch_post_irq_routing_update(struct kvm *kvm); | 506 | void kvm_arch_post_irq_routing_update(struct kvm *kvm); |
507 | #else | 507 | #else |
508 | static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) | 508 | static inline void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm) |
509 | { | 509 | { |
510 | } | 510 | } |
511 | static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm) | 511 | static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm) |
@@ -877,22 +877,6 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, | |||
877 | int kvm_request_irq_source_id(struct kvm *kvm); | 877 | int kvm_request_irq_source_id(struct kvm *kvm); |
878 | void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); | 878 | void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); |
879 | 879 | ||
880 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT | ||
881 | int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot); | ||
882 | void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot); | ||
883 | #else | ||
884 | static inline int kvm_iommu_map_pages(struct kvm *kvm, | ||
885 | struct kvm_memory_slot *slot) | ||
886 | { | ||
887 | return 0; | ||
888 | } | ||
889 | |||
890 | static inline void kvm_iommu_unmap_pages(struct kvm *kvm, | ||
891 | struct kvm_memory_slot *slot) | ||
892 | { | ||
893 | } | ||
894 | #endif | ||
895 | |||
896 | /* | 880 | /* |
897 | * search_memslots() and __gfn_to_memslot() are here because they are | 881 | * search_memslots() and __gfn_to_memslot() are here because they are |
898 | * used in non-modular code in arch/powerpc/kvm/book3s_hv_rm_mmu.c. | 882 | * used in non-modular code in arch/powerpc/kvm/book3s_hv_rm_mmu.c. |
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 58373875e8ee..55125d674338 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h | |||
@@ -101,6 +101,10 @@ static inline void check_object_size(const void *ptr, unsigned long n, | |||
101 | { } | 101 | { } |
102 | #endif /* CONFIG_HARDENED_USERCOPY */ | 102 | #endif /* CONFIG_HARDENED_USERCOPY */ |
103 | 103 | ||
104 | #ifndef arch_setup_new_exec | ||
105 | static inline void arch_setup_new_exec(void) { } | ||
106 | #endif | ||
107 | |||
104 | #endif /* __KERNEL__ */ | 108 | #endif /* __KERNEL__ */ |
105 | 109 | ||
106 | #endif /* _LINUX_THREAD_INFO_H */ | 110 | #endif /* _LINUX_THREAD_INFO_H */ |
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index b59ee077a596..8c6d3bdb9a00 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h | |||
@@ -409,6 +409,7 @@ typedef struct elf64_shdr { | |||
409 | #define NT_S390_TDB 0x308 /* s390 transaction diagnostic block */ | 409 | #define NT_S390_TDB 0x308 /* s390 transaction diagnostic block */ |
410 | #define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 upper half */ | 410 | #define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 upper half */ |
411 | #define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */ | 411 | #define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */ |
412 | #define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */ | ||
412 | #define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */ | 413 | #define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */ |
413 | #define NT_ARM_TLS 0x401 /* ARM TLS register */ | 414 | #define NT_ARM_TLS 0x401 /* ARM TLS register */ |
414 | #define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ | 415 | #define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ |
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index f51d5082a377..3c168b6fd74b 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h | |||
@@ -702,6 +702,10 @@ struct kvm_ppc_resize_hpt { | |||
702 | #define KVM_VM_PPC_HV 1 | 702 | #define KVM_VM_PPC_HV 1 |
703 | #define KVM_VM_PPC_PR 2 | 703 | #define KVM_VM_PPC_PR 2 |
704 | 704 | ||
705 | /* on MIPS, 0 forces trap & emulate, 1 forces VZ ASE */ | ||
706 | #define KVM_VM_MIPS_TE 0 | ||
707 | #define KVM_VM_MIPS_VZ 1 | ||
708 | |||
705 | #define KVM_S390_SIE_PAGE_OFFSET 1 | 709 | #define KVM_S390_SIE_PAGE_OFFSET 1 |
706 | 710 | ||
707 | /* | 711 | /* |
@@ -883,6 +887,12 @@ struct kvm_ppc_resize_hpt { | |||
883 | #define KVM_CAP_PPC_MMU_RADIX 134 | 887 | #define KVM_CAP_PPC_MMU_RADIX 134 |
884 | #define KVM_CAP_PPC_MMU_HASH_V3 135 | 888 | #define KVM_CAP_PPC_MMU_HASH_V3 135 |
885 | #define KVM_CAP_IMMEDIATE_EXIT 136 | 889 | #define KVM_CAP_IMMEDIATE_EXIT 136 |
890 | #define KVM_CAP_MIPS_VZ 137 | ||
891 | #define KVM_CAP_MIPS_TE 138 | ||
892 | #define KVM_CAP_MIPS_64BIT 139 | ||
893 | #define KVM_CAP_S390_GS 140 | ||
894 | #define KVM_CAP_S390_AIS 141 | ||
895 | #define KVM_CAP_SPAPR_TCE_VFIO 142 | ||
886 | 896 | ||
887 | #ifdef KVM_CAP_IRQ_ROUTING | 897 | #ifdef KVM_CAP_IRQ_ROUTING |
888 | 898 | ||
@@ -1087,6 +1097,7 @@ struct kvm_device_attr { | |||
1087 | #define KVM_DEV_VFIO_GROUP 1 | 1097 | #define KVM_DEV_VFIO_GROUP 1 |
1088 | #define KVM_DEV_VFIO_GROUP_ADD 1 | 1098 | #define KVM_DEV_VFIO_GROUP_ADD 1 |
1089 | #define KVM_DEV_VFIO_GROUP_DEL 2 | 1099 | #define KVM_DEV_VFIO_GROUP_DEL 2 |
1100 | #define KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE 3 | ||
1090 | 1101 | ||
1091 | enum kvm_device_type { | 1102 | enum kvm_device_type { |
1092 | KVM_DEV_TYPE_FSL_MPIC_20 = 1, | 1103 | KVM_DEV_TYPE_FSL_MPIC_20 = 1, |
@@ -1108,6 +1119,11 @@ enum kvm_device_type { | |||
1108 | KVM_DEV_TYPE_MAX, | 1119 | KVM_DEV_TYPE_MAX, |
1109 | }; | 1120 | }; |
1110 | 1121 | ||
1122 | struct kvm_vfio_spapr_tce { | ||
1123 | __s32 groupfd; | ||
1124 | __s32 tablefd; | ||
1125 | }; | ||
1126 | |||
1111 | /* | 1127 | /* |
1112 | * ioctls for VM fds | 1128 | * ioctls for VM fds |
1113 | */ | 1129 | */ |
diff --git a/mm/swap_slots.c b/mm/swap_slots.c index 7ebb23836f68..b1ccb58ad397 100644 --- a/mm/swap_slots.c +++ b/mm/swap_slots.c | |||
@@ -267,8 +267,6 @@ int free_swap_slot(swp_entry_t entry) | |||
267 | { | 267 | { |
268 | struct swap_slots_cache *cache; | 268 | struct swap_slots_cache *cache; |
269 | 269 | ||
270 | WARN_ON_ONCE(!swap_slot_cache_initialized); | ||
271 | |||
272 | cache = &get_cpu_var(swp_slots); | 270 | cache = &get_cpu_var(swp_slots); |
273 | if (use_swap_slot_cache && cache->slots_ret) { | 271 | if (use_swap_slot_cache && cache->slots_ret) { |
274 | spin_lock_irq(&cache->free_lock); | 272 | spin_lock_irq(&cache->free_lock); |
diff --git a/scripts/checksyscalls.sh b/scripts/checksyscalls.sh index 2c9082ba6137..116b7735ee9f 100755 --- a/scripts/checksyscalls.sh +++ b/scripts/checksyscalls.sh | |||
@@ -148,6 +148,7 @@ cat << EOF | |||
148 | #define __IGNORE_sysfs | 148 | #define __IGNORE_sysfs |
149 | #define __IGNORE_uselib | 149 | #define __IGNORE_uselib |
150 | #define __IGNORE__sysctl | 150 | #define __IGNORE__sysctl |
151 | #define __IGNORE_arch_prctl | ||
151 | 152 | ||
152 | /* ... including the "new" 32-bit uid syscalls */ | 153 | /* ... including the "new" 32-bit uid syscalls */ |
153 | #define __IGNORE_lchown32 | 154 | #define __IGNORE_lchown32 |
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 581278c58488..8f74ed8e7237 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat | |||
@@ -30,8 +30,8 @@ import fcntl | |||
30 | import resource | 30 | import resource |
31 | import struct | 31 | import struct |
32 | import re | 32 | import re |
33 | import subprocess | ||
33 | from collections import defaultdict | 34 | from collections import defaultdict |
34 | from time import sleep | ||
35 | 35 | ||
36 | VMX_EXIT_REASONS = { | 36 | VMX_EXIT_REASONS = { |
37 | 'EXCEPTION_NMI': 0, | 37 | 'EXCEPTION_NMI': 0, |
@@ -225,6 +225,7 @@ IOCTL_NUMBERS = { | |||
225 | 'RESET': 0x00002403, | 225 | 'RESET': 0x00002403, |
226 | } | 226 | } |
227 | 227 | ||
228 | |||
228 | class Arch(object): | 229 | class Arch(object): |
229 | """Encapsulates global architecture specific data. | 230 | """Encapsulates global architecture specific data. |
230 | 231 | ||
@@ -255,12 +256,14 @@ class Arch(object): | |||
255 | return ArchX86(SVM_EXIT_REASONS) | 256 | return ArchX86(SVM_EXIT_REASONS) |
256 | return | 257 | return |
257 | 258 | ||
259 | |||
258 | class ArchX86(Arch): | 260 | class ArchX86(Arch): |
259 | def __init__(self, exit_reasons): | 261 | def __init__(self, exit_reasons): |
260 | self.sc_perf_evt_open = 298 | 262 | self.sc_perf_evt_open = 298 |
261 | self.ioctl_numbers = IOCTL_NUMBERS | 263 | self.ioctl_numbers = IOCTL_NUMBERS |
262 | self.exit_reasons = exit_reasons | 264 | self.exit_reasons = exit_reasons |
263 | 265 | ||
266 | |||
264 | class ArchPPC(Arch): | 267 | class ArchPPC(Arch): |
265 | def __init__(self): | 268 | def __init__(self): |
266 | self.sc_perf_evt_open = 319 | 269 | self.sc_perf_evt_open = 319 |
@@ -275,12 +278,14 @@ class ArchPPC(Arch): | |||
275 | self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16 | 278 | self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16 |
276 | self.exit_reasons = {} | 279 | self.exit_reasons = {} |
277 | 280 | ||
281 | |||
278 | class ArchA64(Arch): | 282 | class ArchA64(Arch): |
279 | def __init__(self): | 283 | def __init__(self): |
280 | self.sc_perf_evt_open = 241 | 284 | self.sc_perf_evt_open = 241 |
281 | self.ioctl_numbers = IOCTL_NUMBERS | 285 | self.ioctl_numbers = IOCTL_NUMBERS |
282 | self.exit_reasons = AARCH64_EXIT_REASONS | 286 | self.exit_reasons = AARCH64_EXIT_REASONS |
283 | 287 | ||
288 | |||
284 | class ArchS390(Arch): | 289 | class ArchS390(Arch): |
285 | def __init__(self): | 290 | def __init__(self): |
286 | self.sc_perf_evt_open = 331 | 291 | self.sc_perf_evt_open = 331 |
@@ -316,6 +321,61 @@ def parse_int_list(list_string): | |||
316 | return integers | 321 | return integers |
317 | 322 | ||
318 | 323 | ||
324 | def get_pid_from_gname(gname): | ||
325 | """Fuzzy function to convert guest name to QEMU process pid. | ||
326 | |||
327 | Returns a list of potential pids, can be empty if no match found. | ||
328 | Throws an exception on processing errors. | ||
329 | |||
330 | """ | ||
331 | pids = [] | ||
332 | try: | ||
333 | child = subprocess.Popen(['ps', '-A', '--format', 'pid,args'], | ||
334 | stdout=subprocess.PIPE) | ||
335 | except: | ||
336 | raise Exception | ||
337 | for line in child.stdout: | ||
338 | line = line.lstrip().split(' ', 1) | ||
339 | # perform a sanity check before calling the more expensive | ||
340 | # function to possibly extract the guest name | ||
341 | if ' -name ' in line[1] and gname == get_gname_from_pid(line[0]): | ||
342 | pids.append(int(line[0])) | ||
343 | child.stdout.close() | ||
344 | |||
345 | return pids | ||
346 | |||
347 | |||
348 | def get_gname_from_pid(pid): | ||
349 | """Returns the guest name for a QEMU process pid. | ||
350 | |||
351 | Extracts the guest name from the QEMU comma line by processing the '-name' | ||
352 | option. Will also handle names specified out of sequence. | ||
353 | |||
354 | """ | ||
355 | name = '' | ||
356 | try: | ||
357 | line = open('/proc/{}/cmdline'.format(pid), 'rb').read().split('\0') | ||
358 | parms = line[line.index('-name') + 1].split(',') | ||
359 | while '' in parms: | ||
360 | # commas are escaped (i.e. ',,'), hence e.g. 'foo,bar' results in | ||
361 | # ['foo', '', 'bar'], which we revert here | ||
362 | idx = parms.index('') | ||
363 | parms[idx - 1] += ',' + parms[idx + 1] | ||
364 | del parms[idx:idx+2] | ||
365 | # the '-name' switch allows for two ways to specify the guest name, | ||
366 | # where the plain name overrides the name specified via 'guest=' | ||
367 | for arg in parms: | ||
368 | if '=' not in arg: | ||
369 | name = arg | ||
370 | break | ||
371 | if arg[:6] == 'guest=': | ||
372 | name = arg[6:] | ||
373 | except (ValueError, IOError, IndexError): | ||
374 | pass | ||
375 | |||
376 | return name | ||
377 | |||
378 | |||
319 | def get_online_cpus(): | 379 | def get_online_cpus(): |
320 | """Returns a list of cpu id integers.""" | 380 | """Returns a list of cpu id integers.""" |
321 | with open('/sys/devices/system/cpu/online') as cpu_list: | 381 | with open('/sys/devices/system/cpu/online') as cpu_list: |
@@ -342,6 +402,7 @@ def get_filters(): | |||
342 | libc = ctypes.CDLL('libc.so.6', use_errno=True) | 402 | libc = ctypes.CDLL('libc.so.6', use_errno=True) |
343 | syscall = libc.syscall | 403 | syscall = libc.syscall |
344 | 404 | ||
405 | |||
345 | class perf_event_attr(ctypes.Structure): | 406 | class perf_event_attr(ctypes.Structure): |
346 | """Struct that holds the necessary data to set up a trace event. | 407 | """Struct that holds the necessary data to set up a trace event. |
347 | 408 | ||
@@ -370,6 +431,7 @@ class perf_event_attr(ctypes.Structure): | |||
370 | self.size = ctypes.sizeof(self) | 431 | self.size = ctypes.sizeof(self) |
371 | self.read_format = PERF_FORMAT_GROUP | 432 | self.read_format = PERF_FORMAT_GROUP |
372 | 433 | ||
434 | |||
373 | def perf_event_open(attr, pid, cpu, group_fd, flags): | 435 | def perf_event_open(attr, pid, cpu, group_fd, flags): |
374 | """Wrapper for the sys_perf_evt_open() syscall. | 436 | """Wrapper for the sys_perf_evt_open() syscall. |
375 | 437 | ||
@@ -395,6 +457,7 @@ PERF_FORMAT_GROUP = 1 << 3 | |||
395 | PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing' | 457 | PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing' |
396 | PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm' | 458 | PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm' |
397 | 459 | ||
460 | |||
398 | class Group(object): | 461 | class Group(object): |
399 | """Represents a perf event group.""" | 462 | """Represents a perf event group.""" |
400 | 463 | ||
@@ -427,6 +490,7 @@ class Group(object): | |||
427 | struct.unpack(read_format, | 490 | struct.unpack(read_format, |
428 | os.read(self.events[0].fd, length)))) | 491 | os.read(self.events[0].fd, length)))) |
429 | 492 | ||
493 | |||
430 | class Event(object): | 494 | class Event(object): |
431 | """Represents a performance event and manages its life cycle.""" | 495 | """Represents a performance event and manages its life cycle.""" |
432 | def __init__(self, name, group, trace_cpu, trace_pid, trace_point, | 496 | def __init__(self, name, group, trace_cpu, trace_pid, trace_point, |
@@ -510,6 +574,7 @@ class Event(object): | |||
510 | """Resets the count of the trace event in the kernel.""" | 574 | """Resets the count of the trace event in the kernel.""" |
511 | fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0) | 575 | fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0) |
512 | 576 | ||
577 | |||
513 | class TracepointProvider(object): | 578 | class TracepointProvider(object): |
514 | """Data provider for the stats class. | 579 | """Data provider for the stats class. |
515 | 580 | ||
@@ -551,6 +616,7 @@ class TracepointProvider(object): | |||
551 | def setup_traces(self): | 616 | def setup_traces(self): |
552 | """Creates all event and group objects needed to be able to retrieve | 617 | """Creates all event and group objects needed to be able to retrieve |
553 | data.""" | 618 | data.""" |
619 | fields = self.get_available_fields() | ||
554 | if self._pid > 0: | 620 | if self._pid > 0: |
555 | # Fetch list of all threads of the monitored pid, as qemu | 621 | # Fetch list of all threads of the monitored pid, as qemu |
556 | # starts a thread for each vcpu. | 622 | # starts a thread for each vcpu. |
@@ -561,7 +627,7 @@ class TracepointProvider(object): | |||
561 | 627 | ||
562 | # The constant is needed as a buffer for python libs, std | 628 | # The constant is needed as a buffer for python libs, std |
563 | # streams and other files that the script opens. | 629 | # streams and other files that the script opens. |
564 | newlim = len(groupids) * len(self._fields) + 50 | 630 | newlim = len(groupids) * len(fields) + 50 |
565 | try: | 631 | try: |
566 | softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE) | 632 | softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE) |
567 | 633 | ||
@@ -577,7 +643,7 @@ class TracepointProvider(object): | |||
577 | 643 | ||
578 | for groupid in groupids: | 644 | for groupid in groupids: |
579 | group = Group() | 645 | group = Group() |
580 | for name in self._fields: | 646 | for name in fields: |
581 | tracepoint = name | 647 | tracepoint = name |
582 | tracefilter = None | 648 | tracefilter = None |
583 | match = re.match(r'(.*)\((.*)\)', name) | 649 | match = re.match(r'(.*)\((.*)\)', name) |
@@ -650,13 +716,23 @@ class TracepointProvider(object): | |||
650 | ret[name] += val | 716 | ret[name] += val |
651 | return ret | 717 | return ret |
652 | 718 | ||
719 | def reset(self): | ||
720 | """Reset all field counters""" | ||
721 | for group in self.group_leaders: | ||
722 | for event in group.events: | ||
723 | event.reset() | ||
724 | |||
725 | |||
653 | class DebugfsProvider(object): | 726 | class DebugfsProvider(object): |
654 | """Provides data from the files that KVM creates in the kvm debugfs | 727 | """Provides data from the files that KVM creates in the kvm debugfs |
655 | folder.""" | 728 | folder.""" |
656 | def __init__(self): | 729 | def __init__(self): |
657 | self._fields = self.get_available_fields() | 730 | self._fields = self.get_available_fields() |
731 | self._baseline = {} | ||
658 | self._pid = 0 | 732 | self._pid = 0 |
659 | self.do_read = True | 733 | self.do_read = True |
734 | self.paths = [] | ||
735 | self.reset() | ||
660 | 736 | ||
661 | def get_available_fields(self): | 737 | def get_available_fields(self): |
662 | """"Returns a list of available fields. | 738 | """"Returns a list of available fields. |
@@ -673,6 +749,7 @@ class DebugfsProvider(object): | |||
673 | @fields.setter | 749 | @fields.setter |
674 | def fields(self, fields): | 750 | def fields(self, fields): |
675 | self._fields = fields | 751 | self._fields = fields |
752 | self.reset() | ||
676 | 753 | ||
677 | @property | 754 | @property |
678 | def pid(self): | 755 | def pid(self): |
@@ -690,10 +767,11 @@ class DebugfsProvider(object): | |||
690 | self.paths = filter(lambda x: "{}-".format(pid) in x, vms) | 767 | self.paths = filter(lambda x: "{}-".format(pid) in x, vms) |
691 | 768 | ||
692 | else: | 769 | else: |
693 | self.paths = [''] | 770 | self.paths = [] |
694 | self.do_read = True | 771 | self.do_read = True |
772 | self.reset() | ||
695 | 773 | ||
696 | def read(self): | 774 | def read(self, reset=0): |
697 | """Returns a dict with format:'file name / field -> current value'.""" | 775 | """Returns a dict with format:'file name / field -> current value'.""" |
698 | results = {} | 776 | results = {} |
699 | 777 | ||
@@ -701,10 +779,22 @@ class DebugfsProvider(object): | |||
701 | if not self.do_read: | 779 | if not self.do_read: |
702 | return results | 780 | return results |
703 | 781 | ||
704 | for path in self.paths: | 782 | paths = self.paths |
783 | if self._pid == 0: | ||
784 | paths = [] | ||
785 | for entry in os.walk(PATH_DEBUGFS_KVM): | ||
786 | for dir in entry[1]: | ||
787 | paths.append(dir) | ||
788 | for path in paths: | ||
705 | for field in self._fields: | 789 | for field in self._fields: |
706 | results[field] = results.get(field, 0) \ | 790 | value = self.read_field(field, path) |
707 | + self.read_field(field, path) | 791 | key = path + field |
792 | if reset: | ||
793 | self._baseline[key] = value | ||
794 | if self._baseline.get(key, -1) == -1: | ||
795 | self._baseline[key] = value | ||
796 | results[field] = (results.get(field, 0) + value - | ||
797 | self._baseline.get(key, 0)) | ||
708 | 798 | ||
709 | return results | 799 | return results |
710 | 800 | ||
@@ -718,6 +808,12 @@ class DebugfsProvider(object): | |||
718 | except IOError: | 808 | except IOError: |
719 | return 0 | 809 | return 0 |
720 | 810 | ||
811 | def reset(self): | ||
812 | """Reset field counters""" | ||
813 | self._baseline = {} | ||
814 | self.read(1) | ||
815 | |||
816 | |||
721 | class Stats(object): | 817 | class Stats(object): |
722 | """Manages the data providers and the data they provide. | 818 | """Manages the data providers and the data they provide. |
723 | 819 | ||
@@ -753,14 +849,20 @@ class Stats(object): | |||
753 | for provider in self.providers: | 849 | for provider in self.providers: |
754 | provider.pid = self._pid_filter | 850 | provider.pid = self._pid_filter |
755 | 851 | ||
852 | def reset(self): | ||
853 | self.values = {} | ||
854 | for provider in self.providers: | ||
855 | provider.reset() | ||
856 | |||
756 | @property | 857 | @property |
757 | def fields_filter(self): | 858 | def fields_filter(self): |
758 | return self._fields_filter | 859 | return self._fields_filter |
759 | 860 | ||
760 | @fields_filter.setter | 861 | @fields_filter.setter |
761 | def fields_filter(self, fields_filter): | 862 | def fields_filter(self, fields_filter): |
762 | self._fields_filter = fields_filter | 863 | if fields_filter != self._fields_filter: |
763 | self.update_provider_filters() | 864 | self._fields_filter = fields_filter |
865 | self.update_provider_filters() | ||
764 | 866 | ||
765 | @property | 867 | @property |
766 | def pid_filter(self): | 868 | def pid_filter(self): |
@@ -768,9 +870,10 @@ class Stats(object): | |||
768 | 870 | ||
769 | @pid_filter.setter | 871 | @pid_filter.setter |
770 | def pid_filter(self, pid): | 872 | def pid_filter(self, pid): |
771 | self._pid_filter = pid | 873 | if pid != self._pid_filter: |
772 | self.values = {} | 874 | self._pid_filter = pid |
773 | self.update_provider_pid() | 875 | self.values = {} |
876 | self.update_provider_pid() | ||
774 | 877 | ||
775 | def get(self): | 878 | def get(self): |
776 | """Returns a dict with field -> (value, delta to last value) of all | 879 | """Returns a dict with field -> (value, delta to last value) of all |
@@ -778,23 +881,26 @@ class Stats(object): | |||
778 | for provider in self.providers: | 881 | for provider in self.providers: |
779 | new = provider.read() | 882 | new = provider.read() |
780 | for key in provider.fields: | 883 | for key in provider.fields: |
781 | oldval = self.values.get(key, (0, 0)) | 884 | oldval = self.values.get(key, (0, 0))[0] |
782 | newval = new.get(key, 0) | 885 | newval = new.get(key, 0) |
783 | newdelta = None | 886 | newdelta = newval - oldval |
784 | if oldval is not None: | ||
785 | newdelta = newval - oldval[0] | ||
786 | self.values[key] = (newval, newdelta) | 887 | self.values[key] = (newval, newdelta) |
787 | return self.values | 888 | return self.values |
788 | 889 | ||
789 | LABEL_WIDTH = 40 | 890 | LABEL_WIDTH = 40 |
790 | NUMBER_WIDTH = 10 | 891 | NUMBER_WIDTH = 10 |
892 | DELAY_INITIAL = 0.25 | ||
893 | DELAY_REGULAR = 3.0 | ||
894 | MAX_GUEST_NAME_LEN = 48 | ||
895 | MAX_REGEX_LEN = 44 | ||
896 | DEFAULT_REGEX = r'^[^\(]*$' | ||
897 | |||
791 | 898 | ||
792 | class Tui(object): | 899 | class Tui(object): |
793 | """Instruments curses to draw a nice text ui.""" | 900 | """Instruments curses to draw a nice text ui.""" |
794 | def __init__(self, stats): | 901 | def __init__(self, stats): |
795 | self.stats = stats | 902 | self.stats = stats |
796 | self.screen = None | 903 | self.screen = None |
797 | self.drilldown = False | ||
798 | self.update_drilldown() | 904 | self.update_drilldown() |
799 | 905 | ||
800 | def __enter__(self): | 906 | def __enter__(self): |
@@ -809,7 +915,14 @@ class Tui(object): | |||
809 | # return from C start_color() is ignorable. | 915 | # return from C start_color() is ignorable. |
810 | try: | 916 | try: |
811 | curses.start_color() | 917 | curses.start_color() |
812 | except: | 918 | except curses.error: |
919 | pass | ||
920 | |||
921 | # Hide cursor in extra statement as some monochrome terminals | ||
922 | # might support hiding but not colors. | ||
923 | try: | ||
924 | curses.curs_set(0) | ||
925 | except curses.error: | ||
813 | pass | 926 | pass |
814 | 927 | ||
815 | curses.use_default_colors() | 928 | curses.use_default_colors() |
@@ -827,36 +940,60 @@ class Tui(object): | |||
827 | def update_drilldown(self): | 940 | def update_drilldown(self): |
828 | """Sets or removes a filter that only allows fields without braces.""" | 941 | """Sets or removes a filter that only allows fields without braces.""" |
829 | if not self.stats.fields_filter: | 942 | if not self.stats.fields_filter: |
830 | self.stats.fields_filter = r'^[^\(]*$' | 943 | self.stats.fields_filter = DEFAULT_REGEX |
831 | 944 | ||
832 | elif self.stats.fields_filter == r'^[^\(]*$': | 945 | elif self.stats.fields_filter == DEFAULT_REGEX: |
833 | self.stats.fields_filter = None | 946 | self.stats.fields_filter = None |
834 | 947 | ||
835 | def update_pid(self, pid): | 948 | def update_pid(self, pid): |
836 | """Propagates pid selection to stats object.""" | 949 | """Propagates pid selection to stats object.""" |
837 | self.stats.pid_filter = pid | 950 | self.stats.pid_filter = pid |
838 | 951 | ||
839 | def refresh(self, sleeptime): | 952 | def refresh_header(self, pid=None): |
840 | """Refreshes on-screen data.""" | 953 | """Refreshes the header.""" |
954 | if pid is None: | ||
955 | pid = self.stats.pid_filter | ||
841 | self.screen.erase() | 956 | self.screen.erase() |
842 | if self.stats.pid_filter > 0: | 957 | gname = get_gname_from_pid(pid) |
843 | self.screen.addstr(0, 0, 'kvm statistics - pid {0}' | 958 | if gname: |
844 | .format(self.stats.pid_filter), | 959 | gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...' |
845 | curses.A_BOLD) | 960 | if len(gname) > MAX_GUEST_NAME_LEN |
961 | else gname)) | ||
962 | if pid > 0: | ||
963 | self.screen.addstr(0, 0, 'kvm statistics - pid {0} {1}' | ||
964 | .format(pid, gname), curses.A_BOLD) | ||
846 | else: | 965 | else: |
847 | self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD) | 966 | self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD) |
967 | if self.stats.fields_filter and self.stats.fields_filter \ | ||
968 | != DEFAULT_REGEX: | ||
969 | regex = self.stats.fields_filter | ||
970 | if len(regex) > MAX_REGEX_LEN: | ||
971 | regex = regex[:MAX_REGEX_LEN] + '...' | ||
972 | self.screen.addstr(1, 17, 'regex filter: {0}'.format(regex)) | ||
848 | self.screen.addstr(2, 1, 'Event') | 973 | self.screen.addstr(2, 1, 'Event') |
849 | self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH - | 974 | self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH - |
850 | len('Total'), 'Total') | 975 | len('Total'), 'Total') |
851 | self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 8 - | 976 | self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 7 - |
977 | len('%Total'), '%Total') | ||
978 | self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 7 + 8 - | ||
852 | len('Current'), 'Current') | 979 | len('Current'), 'Current') |
980 | self.screen.addstr(4, 1, 'Collecting data...') | ||
981 | self.screen.refresh() | ||
982 | |||
983 | def refresh_body(self, sleeptime): | ||
853 | row = 3 | 984 | row = 3 |
985 | self.screen.move(row, 0) | ||
986 | self.screen.clrtobot() | ||
854 | stats = self.stats.get() | 987 | stats = self.stats.get() |
988 | |||
855 | def sortkey(x): | 989 | def sortkey(x): |
856 | if stats[x][1]: | 990 | if stats[x][1]: |
857 | return (-stats[x][1], -stats[x][0]) | 991 | return (-stats[x][1], -stats[x][0]) |
858 | else: | 992 | else: |
859 | return (0, -stats[x][0]) | 993 | return (0, -stats[x][0]) |
994 | total = 0. | ||
995 | for val in stats.values(): | ||
996 | total += val[0] | ||
860 | for key in sorted(stats.keys(), key=sortkey): | 997 | for key in sorted(stats.keys(), key=sortkey): |
861 | 998 | ||
862 | if row >= self.screen.getmaxyx()[0]: | 999 | if row >= self.screen.getmaxyx()[0]: |
@@ -869,6 +1006,8 @@ class Tui(object): | |||
869 | col += LABEL_WIDTH | 1006 | col += LABEL_WIDTH |
870 | self.screen.addstr(row, col, '%10d' % (values[0],)) | 1007 | self.screen.addstr(row, col, '%10d' % (values[0],)) |
871 | col += NUMBER_WIDTH | 1008 | col += NUMBER_WIDTH |
1009 | self.screen.addstr(row, col, '%7.1f' % (values[0] * 100 / total,)) | ||
1010 | col += 7 | ||
872 | if values[1] is not None: | 1011 | if values[1] is not None: |
873 | self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,)) | 1012 | self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,)) |
874 | row += 1 | 1013 | row += 1 |
@@ -893,20 +1032,24 @@ class Tui(object): | |||
893 | regex = self.screen.getstr() | 1032 | regex = self.screen.getstr() |
894 | curses.noecho() | 1033 | curses.noecho() |
895 | if len(regex) == 0: | 1034 | if len(regex) == 0: |
1035 | self.stats.fields_filter = DEFAULT_REGEX | ||
1036 | self.refresh_header() | ||
896 | return | 1037 | return |
897 | try: | 1038 | try: |
898 | re.compile(regex) | 1039 | re.compile(regex) |
899 | self.stats.fields_filter = regex | 1040 | self.stats.fields_filter = regex |
1041 | self.refresh_header() | ||
900 | return | 1042 | return |
901 | except re.error: | 1043 | except re.error: |
902 | continue | 1044 | continue |
903 | 1045 | ||
904 | def show_vm_selection(self): | 1046 | def show_vm_selection_by_pid(self): |
905 | """Draws PID selection mask. | 1047 | """Draws PID selection mask. |
906 | 1048 | ||
907 | Asks for a pid until a valid pid or 0 has been entered. | 1049 | Asks for a pid until a valid pid or 0 has been entered. |
908 | 1050 | ||
909 | """ | 1051 | """ |
1052 | msg = '' | ||
910 | while True: | 1053 | while True: |
911 | self.screen.erase() | 1054 | self.screen.erase() |
912 | self.screen.addstr(0, 0, | 1055 | self.screen.addstr(0, 0, |
@@ -915,6 +1058,7 @@ class Tui(object): | |||
915 | self.screen.addstr(1, 0, | 1058 | self.screen.addstr(1, 0, |
916 | 'This might limit the shown data to the trace ' | 1059 | 'This might limit the shown data to the trace ' |
917 | 'statistics.') | 1060 | 'statistics.') |
1061 | self.screen.addstr(5, 0, msg) | ||
918 | 1062 | ||
919 | curses.echo() | 1063 | curses.echo() |
920 | self.screen.addstr(3, 0, "Pid [0 or pid]: ") | 1064 | self.screen.addstr(3, 0, "Pid [0 or pid]: ") |
@@ -922,60 +1066,128 @@ class Tui(object): | |||
922 | curses.noecho() | 1066 | curses.noecho() |
923 | 1067 | ||
924 | try: | 1068 | try: |
925 | pid = int(pid) | 1069 | if len(pid) > 0: |
926 | 1070 | pid = int(pid) | |
927 | if pid == 0: | 1071 | if pid != 0 and not os.path.isdir(os.path.join('/proc/', |
928 | self.update_pid(pid) | 1072 | str(pid))): |
929 | break | 1073 | msg = '"' + str(pid) + '": Not a running process' |
930 | else: | ||
931 | if not os.path.isdir(os.path.join('/proc/', str(pid))): | ||
932 | continue | 1074 | continue |
933 | else: | 1075 | else: |
934 | self.update_pid(pid) | 1076 | pid = 0 |
935 | break | 1077 | self.refresh_header(pid) |
1078 | self.update_pid(pid) | ||
1079 | break | ||
936 | 1080 | ||
937 | except ValueError: | 1081 | except ValueError: |
1082 | msg = '"' + str(pid) + '": Not a valid pid' | ||
938 | continue | 1083 | continue |
939 | 1084 | ||
1085 | def show_vm_selection_by_guest_name(self): | ||
1086 | """Draws guest selection mask. | ||
1087 | |||
1088 | Asks for a guest name until a valid guest name or '' is entered. | ||
1089 | |||
1090 | """ | ||
1091 | msg = '' | ||
1092 | while True: | ||
1093 | self.screen.erase() | ||
1094 | self.screen.addstr(0, 0, | ||
1095 | 'Show statistics for specific guest.', | ||
1096 | curses.A_BOLD) | ||
1097 | self.screen.addstr(1, 0, | ||
1098 | 'This might limit the shown data to the trace ' | ||
1099 | 'statistics.') | ||
1100 | self.screen.addstr(5, 0, msg) | ||
1101 | curses.echo() | ||
1102 | self.screen.addstr(3, 0, "Guest [ENTER or guest]: ") | ||
1103 | gname = self.screen.getstr() | ||
1104 | curses.noecho() | ||
1105 | |||
1106 | if not gname: | ||
1107 | self.refresh_header(0) | ||
1108 | self.update_pid(0) | ||
1109 | break | ||
1110 | else: | ||
1111 | pids = [] | ||
1112 | try: | ||
1113 | pids = get_pid_from_gname(gname) | ||
1114 | except: | ||
1115 | msg = '"' + gname + '": Internal error while searching, ' \ | ||
1116 | 'use pid filter instead' | ||
1117 | continue | ||
1118 | if len(pids) == 0: | ||
1119 | msg = '"' + gname + '": Not an active guest' | ||
1120 | continue | ||
1121 | if len(pids) > 1: | ||
1122 | msg = '"' + gname + '": Multiple matches found, use pid ' \ | ||
1123 | 'filter instead' | ||
1124 | continue | ||
1125 | self.refresh_header(pids[0]) | ||
1126 | self.update_pid(pids[0]) | ||
1127 | break | ||
1128 | |||
940 | def show_stats(self): | 1129 | def show_stats(self): |
941 | """Refreshes the screen and processes user input.""" | 1130 | """Refreshes the screen and processes user input.""" |
942 | sleeptime = 0.25 | 1131 | sleeptime = DELAY_INITIAL |
1132 | self.refresh_header() | ||
943 | while True: | 1133 | while True: |
944 | self.refresh(sleeptime) | 1134 | self.refresh_body(sleeptime) |
945 | curses.halfdelay(int(sleeptime * 10)) | 1135 | curses.halfdelay(int(sleeptime * 10)) |
946 | sleeptime = 3 | 1136 | sleeptime = DELAY_REGULAR |
947 | try: | 1137 | try: |
948 | char = self.screen.getkey() | 1138 | char = self.screen.getkey() |
949 | if char == 'x': | 1139 | if char == 'x': |
950 | self.drilldown = not self.drilldown | 1140 | self.refresh_header() |
951 | self.update_drilldown() | 1141 | self.update_drilldown() |
1142 | sleeptime = DELAY_INITIAL | ||
952 | if char == 'q': | 1143 | if char == 'q': |
953 | break | 1144 | break |
1145 | if char == 'c': | ||
1146 | self.stats.fields_filter = DEFAULT_REGEX | ||
1147 | self.refresh_header(0) | ||
1148 | self.update_pid(0) | ||
1149 | sleeptime = DELAY_INITIAL | ||
954 | if char == 'f': | 1150 | if char == 'f': |
955 | self.show_filter_selection() | 1151 | self.show_filter_selection() |
1152 | sleeptime = DELAY_INITIAL | ||
1153 | if char == 'g': | ||
1154 | self.show_vm_selection_by_guest_name() | ||
1155 | sleeptime = DELAY_INITIAL | ||
956 | if char == 'p': | 1156 | if char == 'p': |
957 | self.show_vm_selection() | 1157 | self.show_vm_selection_by_pid() |
1158 | sleeptime = DELAY_INITIAL | ||
1159 | if char == 'r': | ||
1160 | self.refresh_header() | ||
1161 | self.stats.reset() | ||
1162 | sleeptime = DELAY_INITIAL | ||
958 | except KeyboardInterrupt: | 1163 | except KeyboardInterrupt: |
959 | break | 1164 | break |
960 | except curses.error: | 1165 | except curses.error: |
961 | continue | 1166 | continue |
962 | 1167 | ||
1168 | |||
963 | def batch(stats): | 1169 | def batch(stats): |
964 | """Prints statistics in a key, value format.""" | 1170 | """Prints statistics in a key, value format.""" |
965 | s = stats.get() | 1171 | try: |
966 | time.sleep(1) | 1172 | s = stats.get() |
967 | s = stats.get() | 1173 | time.sleep(1) |
968 | for key in sorted(s.keys()): | 1174 | s = stats.get() |
969 | values = s[key] | 1175 | for key in sorted(s.keys()): |
970 | print '%-42s%10d%10d' % (key, values[0], values[1]) | 1176 | values = s[key] |
1177 | print '%-42s%10d%10d' % (key, values[0], values[1]) | ||
1178 | except KeyboardInterrupt: | ||
1179 | pass | ||
1180 | |||
971 | 1181 | ||
972 | def log(stats): | 1182 | def log(stats): |
973 | """Prints statistics as reiterating key block, multiple value blocks.""" | 1183 | """Prints statistics as reiterating key block, multiple value blocks.""" |
974 | keys = sorted(stats.get().iterkeys()) | 1184 | keys = sorted(stats.get().iterkeys()) |
1185 | |||
975 | def banner(): | 1186 | def banner(): |
976 | for k in keys: | 1187 | for k in keys: |
977 | print '%s' % k, | 1188 | print '%s' % k, |
978 | 1189 | ||
1190 | |||
979 | def statline(): | 1191 | def statline(): |
980 | s = stats.get() | 1192 | s = stats.get() |
981 | for k in keys: | 1193 | for k in keys: |
@@ -984,11 +1196,15 @@ def log(stats): | |||
984 | line = 0 | 1196 | line = 0 |
985 | banner_repeat = 20 | 1197 | banner_repeat = 20 |
986 | while True: | 1198 | while True: |
987 | time.sleep(1) | 1199 | try: |
988 | if line % banner_repeat == 0: | 1200 | time.sleep(1) |
989 | banner() | 1201 | if line % banner_repeat == 0: |
990 | statline() | 1202 | banner() |
991 | line += 1 | 1203 | statline() |
1204 | line += 1 | ||
1205 | except KeyboardInterrupt: | ||
1206 | break | ||
1207 | |||
992 | 1208 | ||
993 | def get_options(): | 1209 | def get_options(): |
994 | """Returns processed program arguments.""" | 1210 | """Returns processed program arguments.""" |
@@ -1009,6 +1225,16 @@ Requirements: | |||
1009 | CAP_SYS_ADMIN and perf events are used. | 1225 | CAP_SYS_ADMIN and perf events are used. |
1010 | - CAP_SYS_RESOURCE if the hard limit is not high enough to allow | 1226 | - CAP_SYS_RESOURCE if the hard limit is not high enough to allow |
1011 | the large number of files that are possibly opened. | 1227 | the large number of files that are possibly opened. |
1228 | |||
1229 | Interactive Commands: | ||
1230 | c clear filter | ||
1231 | f filter by regular expression | ||
1232 | g filter by guest name | ||
1233 | p filter by PID | ||
1234 | q quit | ||
1235 | x toggle reporting of stats for individual child trace events | ||
1236 | r reset stats | ||
1237 | Press any other key to refresh statistics immediately. | ||
1012 | """ | 1238 | """ |
1013 | 1239 | ||
1014 | class PlainHelpFormatter(optparse.IndentedHelpFormatter): | 1240 | class PlainHelpFormatter(optparse.IndentedHelpFormatter): |
@@ -1018,6 +1244,22 @@ Requirements: | |||
1018 | else: | 1244 | else: |
1019 | return "" | 1245 | return "" |
1020 | 1246 | ||
1247 | def cb_guest_to_pid(option, opt, val, parser): | ||
1248 | try: | ||
1249 | pids = get_pid_from_gname(val) | ||
1250 | except: | ||
1251 | raise optparse.OptionValueError('Error while searching for guest ' | ||
1252 | '"{}", use "-p" to specify a pid ' | ||
1253 | 'instead'.format(val)) | ||
1254 | if len(pids) == 0: | ||
1255 | raise optparse.OptionValueError('No guest by the name "{}" ' | ||
1256 | 'found'.format(val)) | ||
1257 | if len(pids) > 1: | ||
1258 | raise optparse.OptionValueError('Multiple processes found (pids: ' | ||
1259 | '{}) - use "-p" to specify a pid ' | ||
1260 | 'instead'.format(" ".join(pids))) | ||
1261 | parser.values.pid = pids[0] | ||
1262 | |||
1021 | optparser = optparse.OptionParser(description=description_text, | 1263 | optparser = optparse.OptionParser(description=description_text, |
1022 | formatter=PlainHelpFormatter()) | 1264 | formatter=PlainHelpFormatter()) |
1023 | optparser.add_option('-1', '--once', '--batch', | 1265 | optparser.add_option('-1', '--once', '--batch', |
@@ -1051,15 +1293,24 @@ Requirements: | |||
1051 | help='fields to display (regex)', | 1293 | help='fields to display (regex)', |
1052 | ) | 1294 | ) |
1053 | optparser.add_option('-p', '--pid', | 1295 | optparser.add_option('-p', '--pid', |
1054 | action='store', | 1296 | action='store', |
1055 | default=0, | 1297 | default=0, |
1056 | type=int, | 1298 | type='int', |
1057 | dest='pid', | 1299 | dest='pid', |
1058 | help='restrict statistics to pid', | 1300 | help='restrict statistics to pid', |
1059 | ) | 1301 | ) |
1302 | optparser.add_option('-g', '--guest', | ||
1303 | action='callback', | ||
1304 | type='string', | ||
1305 | dest='pid', | ||
1306 | metavar='GUEST', | ||
1307 | help='restrict statistics to guest by name', | ||
1308 | callback=cb_guest_to_pid, | ||
1309 | ) | ||
1060 | (options, _) = optparser.parse_args(sys.argv) | 1310 | (options, _) = optparser.parse_args(sys.argv) |
1061 | return options | 1311 | return options |
1062 | 1312 | ||
1313 | |||
1063 | def get_providers(options): | 1314 | def get_providers(options): |
1064 | """Returns a list of data providers depending on the passed options.""" | 1315 | """Returns a list of data providers depending on the passed options.""" |
1065 | providers = [] | 1316 | providers = [] |
@@ -1073,6 +1324,7 @@ def get_providers(options): | |||
1073 | 1324 | ||
1074 | return providers | 1325 | return providers |
1075 | 1326 | ||
1327 | |||
1076 | def check_access(options): | 1328 | def check_access(options): |
1077 | """Exits if the current user can't access all needed directories.""" | 1329 | """Exits if the current user can't access all needed directories.""" |
1078 | if not os.path.exists('/sys/kernel/debug'): | 1330 | if not os.path.exists('/sys/kernel/debug'): |
@@ -1086,8 +1338,8 @@ def check_access(options): | |||
1086 | "Also ensure, that the kvm modules are loaded.\n") | 1338 | "Also ensure, that the kvm modules are loaded.\n") |
1087 | sys.exit(1) | 1339 | sys.exit(1) |
1088 | 1340 | ||
1089 | if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints | 1341 | if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or |
1090 | or not options.debugfs): | 1342 | not options.debugfs): |
1091 | sys.stderr.write("Please enable CONFIG_TRACING in your kernel " | 1343 | sys.stderr.write("Please enable CONFIG_TRACING in your kernel " |
1092 | "when using the option -t (default).\n" | 1344 | "when using the option -t (default).\n" |
1093 | "If it is enabled, make {0} readable by the " | 1345 | "If it is enabled, make {0} readable by the " |
@@ -1098,10 +1350,11 @@ def check_access(options): | |||
1098 | 1350 | ||
1099 | sys.stderr.write("Falling back to debugfs statistics!\n") | 1351 | sys.stderr.write("Falling back to debugfs statistics!\n") |
1100 | options.debugfs = True | 1352 | options.debugfs = True |
1101 | sleep(5) | 1353 | time.sleep(5) |
1102 | 1354 | ||
1103 | return options | 1355 | return options |
1104 | 1356 | ||
1357 | |||
1105 | def main(): | 1358 | def main(): |
1106 | options = get_options() | 1359 | options = get_options() |
1107 | options = check_access(options) | 1360 | options = check_access(options) |
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt index b92a153d7115..109431bdc63c 100644 --- a/tools/kvm/kvm_stat/kvm_stat.txt +++ b/tools/kvm/kvm_stat/kvm_stat.txt | |||
@@ -18,11 +18,33 @@ state transitions such as guest mode entry and exit. | |||
18 | This tool is useful for observing guest behavior from the host perspective. | 18 | This tool is useful for observing guest behavior from the host perspective. |
19 | Often conclusions about performance or buggy behavior can be drawn from the | 19 | Often conclusions about performance or buggy behavior can be drawn from the |
20 | output. | 20 | output. |
21 | While running in regular mode, use any of the keys listed in section | ||
22 | 'Interactive Commands' below. | ||
23 | Use batch and logging modes for scripting purposes. | ||
21 | 24 | ||
22 | The set of KVM kernel module trace events may be specific to the kernel version | 25 | The set of KVM kernel module trace events may be specific to the kernel version |
23 | or architecture. It is best to check the KVM kernel module source code for the | 26 | or architecture. It is best to check the KVM kernel module source code for the |
24 | meaning of events. | 27 | meaning of events. |
25 | 28 | ||
29 | INTERACTIVE COMMANDS | ||
30 | -------------------- | ||
31 | [horizontal] | ||
32 | *c*:: clear filter | ||
33 | |||
34 | *f*:: filter by regular expression | ||
35 | |||
36 | *g*:: filter by guest name | ||
37 | |||
38 | *p*:: filter by PID | ||
39 | |||
40 | *q*:: quit | ||
41 | |||
42 | *r*:: reset stats | ||
43 | |||
44 | *x*:: toggle reporting of stats for child trace events | ||
45 | |||
46 | Press any other key to refresh statistics immediately. | ||
47 | |||
26 | OPTIONS | 48 | OPTIONS |
27 | ------- | 49 | ------- |
28 | -1:: | 50 | -1:: |
@@ -46,6 +68,10 @@ OPTIONS | |||
46 | --pid=<pid>:: | 68 | --pid=<pid>:: |
47 | limit statistics to one virtual machine (pid) | 69 | limit statistics to one virtual machine (pid) |
48 | 70 | ||
71 | -g<guest>:: | ||
72 | --guest=<guest_name>:: | ||
73 | limit statistics to one virtual machine (guest name) | ||
74 | |||
49 | -f<fields>:: | 75 | -f<fields>:: |
50 | --fields=<fields>:: | 76 | --fields=<fields>:: |
51 | fields to display (regex) | 77 | fields to display (regex) |
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index a29786dd9522..a8d540398bbd 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
@@ -490,7 +490,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm, | |||
490 | mutex_lock(&kvm->irq_lock); | 490 | mutex_lock(&kvm->irq_lock); |
491 | hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); | 491 | hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); |
492 | mutex_unlock(&kvm->irq_lock); | 492 | mutex_unlock(&kvm->irq_lock); |
493 | kvm_vcpu_request_scan_ioapic(kvm); | 493 | kvm_arch_post_irq_ack_notifier_list_update(kvm); |
494 | } | 494 | } |
495 | 495 | ||
496 | void kvm_unregister_irq_ack_notifier(struct kvm *kvm, | 496 | void kvm_unregister_irq_ack_notifier(struct kvm *kvm, |
@@ -500,7 +500,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, | |||
500 | hlist_del_init_rcu(&kian->link); | 500 | hlist_del_init_rcu(&kian->link); |
501 | mutex_unlock(&kvm->irq_lock); | 501 | mutex_unlock(&kvm->irq_lock); |
502 | synchronize_srcu(&kvm->irq_srcu); | 502 | synchronize_srcu(&kvm->irq_srcu); |
503 | kvm_vcpu_request_scan_ioapic(kvm); | 503 | kvm_arch_post_irq_ack_notifier_list_update(kvm); |
504 | } | 504 | } |
505 | #endif | 505 | #endif |
506 | 506 | ||
@@ -870,7 +870,8 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, | |||
870 | continue; | 870 | continue; |
871 | 871 | ||
872 | kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); | 872 | kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); |
873 | kvm->buses[bus_idx]->ioeventfd_count--; | 873 | if (kvm->buses[bus_idx]) |
874 | kvm->buses[bus_idx]->ioeventfd_count--; | ||
874 | ioeventfd_release(p); | 875 | ioeventfd_release(p); |
875 | ret = 0; | 876 | ret = 0; |
876 | break; | 877 | break; |
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 3bcc9990adf7..cc30d01a56be 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c | |||
@@ -142,8 +142,8 @@ static int setup_routing_entry(struct kvm *kvm, | |||
142 | struct kvm_kernel_irq_routing_entry *e, | 142 | struct kvm_kernel_irq_routing_entry *e, |
143 | const struct kvm_irq_routing_entry *ue) | 143 | const struct kvm_irq_routing_entry *ue) |
144 | { | 144 | { |
145 | int r = -EINVAL; | ||
146 | struct kvm_kernel_irq_routing_entry *ei; | 145 | struct kvm_kernel_irq_routing_entry *ei; |
146 | int r; | ||
147 | 147 | ||
148 | /* | 148 | /* |
149 | * Do not allow GSI to be mapped to the same irqchip more than once. | 149 | * Do not allow GSI to be mapped to the same irqchip more than once. |
@@ -153,20 +153,19 @@ static int setup_routing_entry(struct kvm *kvm, | |||
153 | if (ei->type != KVM_IRQ_ROUTING_IRQCHIP || | 153 | if (ei->type != KVM_IRQ_ROUTING_IRQCHIP || |
154 | ue->type != KVM_IRQ_ROUTING_IRQCHIP || | 154 | ue->type != KVM_IRQ_ROUTING_IRQCHIP || |
155 | ue->u.irqchip.irqchip == ei->irqchip.irqchip) | 155 | ue->u.irqchip.irqchip == ei->irqchip.irqchip) |
156 | return r; | 156 | return -EINVAL; |
157 | 157 | ||
158 | e->gsi = ue->gsi; | 158 | e->gsi = ue->gsi; |
159 | e->type = ue->type; | 159 | e->type = ue->type; |
160 | r = kvm_set_routing_entry(kvm, e, ue); | 160 | r = kvm_set_routing_entry(kvm, e, ue); |
161 | if (r) | 161 | if (r) |
162 | goto out; | 162 | return r; |
163 | if (e->type == KVM_IRQ_ROUTING_IRQCHIP) | 163 | if (e->type == KVM_IRQ_ROUTING_IRQCHIP) |
164 | rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi; | 164 | rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi; |
165 | 165 | ||
166 | hlist_add_head(&e->link, &rt->map[e->gsi]); | 166 | hlist_add_head(&e->link, &rt->map[e->gsi]); |
167 | r = 0; | 167 | |
168 | out: | 168 | return 0; |
169 | return r; | ||
170 | } | 169 | } |
171 | 170 | ||
172 | void __attribute__((weak)) kvm_arch_irq_routing_update(struct kvm *kvm) | 171 | void __attribute__((weak)) kvm_arch_irq_routing_update(struct kvm *kvm) |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1b0da5771f71..4e19bc812c29 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -727,8 +727,11 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
727 | list_del(&kvm->vm_list); | 727 | list_del(&kvm->vm_list); |
728 | spin_unlock(&kvm_lock); | 728 | spin_unlock(&kvm_lock); |
729 | kvm_free_irq_routing(kvm); | 729 | kvm_free_irq_routing(kvm); |
730 | for (i = 0; i < KVM_NR_BUSES; i++) | 730 | for (i = 0; i < KVM_NR_BUSES; i++) { |
731 | kvm_io_bus_destroy(kvm->buses[i]); | 731 | if (kvm->buses[i]) |
732 | kvm_io_bus_destroy(kvm->buses[i]); | ||
733 | kvm->buses[i] = NULL; | ||
734 | } | ||
732 | kvm_coalesced_mmio_free(kvm); | 735 | kvm_coalesced_mmio_free(kvm); |
733 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | 736 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) |
734 | mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); | 737 | mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); |
@@ -1016,8 +1019,6 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
1016 | 1019 | ||
1017 | old_memslots = install_new_memslots(kvm, as_id, slots); | 1020 | old_memslots = install_new_memslots(kvm, as_id, slots); |
1018 | 1021 | ||
1019 | /* slot was deleted or moved, clear iommu mapping */ | ||
1020 | kvm_iommu_unmap_pages(kvm, &old); | ||
1021 | /* From this point no new shadow pages pointing to a deleted, | 1022 | /* From this point no new shadow pages pointing to a deleted, |
1022 | * or moved, memslot will be created. | 1023 | * or moved, memslot will be created. |
1023 | * | 1024 | * |
@@ -1052,21 +1053,6 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
1052 | 1053 | ||
1053 | kvm_free_memslot(kvm, &old, &new); | 1054 | kvm_free_memslot(kvm, &old, &new); |
1054 | kvfree(old_memslots); | 1055 | kvfree(old_memslots); |
1055 | |||
1056 | /* | ||
1057 | * IOMMU mapping: New slots need to be mapped. Old slots need to be | ||
1058 | * un-mapped and re-mapped if their base changes. Since base change | ||
1059 | * unmapping is handled above with slot deletion, mapping alone is | ||
1060 | * needed here. Anything else the iommu might care about for existing | ||
1061 | * slots (size changes, userspace addr changes and read-only flag | ||
1062 | * changes) is disallowed above, so any other attribute changes getting | ||
1063 | * here can be skipped. | ||
1064 | */ | ||
1065 | if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { | ||
1066 | r = kvm_iommu_map_pages(kvm, &new); | ||
1067 | return r; | ||
1068 | } | ||
1069 | |||
1070 | return 0; | 1056 | return 0; |
1071 | 1057 | ||
1072 | out_slots: | 1058 | out_slots: |
@@ -2363,7 +2349,7 @@ static int kvm_vcpu_fault(struct vm_fault *vmf) | |||
2363 | else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) | 2349 | else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) |
2364 | page = virt_to_page(vcpu->arch.pio_data); | 2350 | page = virt_to_page(vcpu->arch.pio_data); |
2365 | #endif | 2351 | #endif |
2366 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 2352 | #ifdef CONFIG_KVM_MMIO |
2367 | else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) | 2353 | else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) |
2368 | page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); | 2354 | page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); |
2369 | #endif | 2355 | #endif |
@@ -2928,6 +2914,10 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) | |||
2928 | case KVM_CAP_IOEVENTFD_ANY_LENGTH: | 2914 | case KVM_CAP_IOEVENTFD_ANY_LENGTH: |
2929 | case KVM_CAP_CHECK_EXTENSION_VM: | 2915 | case KVM_CAP_CHECK_EXTENSION_VM: |
2930 | return 1; | 2916 | return 1; |
2917 | #ifdef CONFIG_KVM_MMIO | ||
2918 | case KVM_CAP_COALESCED_MMIO: | ||
2919 | return KVM_COALESCED_MMIO_PAGE_OFFSET; | ||
2920 | #endif | ||
2931 | #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING | 2921 | #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING |
2932 | case KVM_CAP_IRQ_ROUTING: | 2922 | case KVM_CAP_IRQ_ROUTING: |
2933 | return KVM_MAX_IRQ_ROUTES; | 2923 | return KVM_MAX_IRQ_ROUTES; |
@@ -2977,7 +2967,7 @@ static long kvm_vm_ioctl(struct file *filp, | |||
2977 | r = kvm_vm_ioctl_get_dirty_log(kvm, &log); | 2967 | r = kvm_vm_ioctl_get_dirty_log(kvm, &log); |
2978 | break; | 2968 | break; |
2979 | } | 2969 | } |
2980 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 2970 | #ifdef CONFIG_KVM_MMIO |
2981 | case KVM_REGISTER_COALESCED_MMIO: { | 2971 | case KVM_REGISTER_COALESCED_MMIO: { |
2982 | struct kvm_coalesced_mmio_zone zone; | 2972 | struct kvm_coalesced_mmio_zone zone; |
2983 | 2973 | ||
@@ -3075,8 +3065,11 @@ static long kvm_vm_ioctl(struct file *filp, | |||
3075 | routing.nr * sizeof(*entries))) | 3065 | routing.nr * sizeof(*entries))) |
3076 | goto out_free_irq_routing; | 3066 | goto out_free_irq_routing; |
3077 | } | 3067 | } |
3068 | /* avoid races with KVM_CREATE_IRQCHIP on x86 */ | ||
3069 | mutex_lock(&kvm->lock); | ||
3078 | r = kvm_set_irq_routing(kvm, entries, routing.nr, | 3070 | r = kvm_set_irq_routing(kvm, entries, routing.nr, |
3079 | routing.flags); | 3071 | routing.flags); |
3072 | mutex_unlock(&kvm->lock); | ||
3080 | out_free_irq_routing: | 3073 | out_free_irq_routing: |
3081 | vfree(entries); | 3074 | vfree(entries); |
3082 | break; | 3075 | break; |
@@ -3169,7 +3162,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) | |||
3169 | kvm = kvm_create_vm(type); | 3162 | kvm = kvm_create_vm(type); |
3170 | if (IS_ERR(kvm)) | 3163 | if (IS_ERR(kvm)) |
3171 | return PTR_ERR(kvm); | 3164 | return PTR_ERR(kvm); |
3172 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 3165 | #ifdef CONFIG_KVM_MMIO |
3173 | r = kvm_coalesced_mmio_init(kvm); | 3166 | r = kvm_coalesced_mmio_init(kvm); |
3174 | if (r < 0) { | 3167 | if (r < 0) { |
3175 | kvm_put_kvm(kvm); | 3168 | kvm_put_kvm(kvm); |
@@ -3222,7 +3215,7 @@ static long kvm_dev_ioctl(struct file *filp, | |||
3222 | #ifdef CONFIG_X86 | 3215 | #ifdef CONFIG_X86 |
3223 | r += PAGE_SIZE; /* pio data page */ | 3216 | r += PAGE_SIZE; /* pio data page */ |
3224 | #endif | 3217 | #endif |
3225 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 3218 | #ifdef CONFIG_KVM_MMIO |
3226 | r += PAGE_SIZE; /* coalesced mmio ring page */ | 3219 | r += PAGE_SIZE; /* coalesced mmio ring page */ |
3227 | #endif | 3220 | #endif |
3228 | break; | 3221 | break; |
@@ -3470,6 +3463,8 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, | |||
3470 | }; | 3463 | }; |
3471 | 3464 | ||
3472 | bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); | 3465 | bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); |
3466 | if (!bus) | ||
3467 | return -ENOMEM; | ||
3473 | r = __kvm_io_bus_write(vcpu, bus, &range, val); | 3468 | r = __kvm_io_bus_write(vcpu, bus, &range, val); |
3474 | return r < 0 ? r : 0; | 3469 | return r < 0 ? r : 0; |
3475 | } | 3470 | } |
@@ -3487,6 +3482,8 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, | |||
3487 | }; | 3482 | }; |
3488 | 3483 | ||
3489 | bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); | 3484 | bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); |
3485 | if (!bus) | ||
3486 | return -ENOMEM; | ||
3490 | 3487 | ||
3491 | /* First try the device referenced by cookie. */ | 3488 | /* First try the device referenced by cookie. */ |
3492 | if ((cookie >= 0) && (cookie < bus->dev_count) && | 3489 | if ((cookie >= 0) && (cookie < bus->dev_count) && |
@@ -3537,6 +3534,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, | |||
3537 | }; | 3534 | }; |
3538 | 3535 | ||
3539 | bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); | 3536 | bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); |
3537 | if (!bus) | ||
3538 | return -ENOMEM; | ||
3540 | r = __kvm_io_bus_read(vcpu, bus, &range, val); | 3539 | r = __kvm_io_bus_read(vcpu, bus, &range, val); |
3541 | return r < 0 ? r : 0; | 3540 | return r < 0 ? r : 0; |
3542 | } | 3541 | } |
@@ -3549,6 +3548,9 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |||
3549 | struct kvm_io_bus *new_bus, *bus; | 3548 | struct kvm_io_bus *new_bus, *bus; |
3550 | 3549 | ||
3551 | bus = kvm->buses[bus_idx]; | 3550 | bus = kvm->buses[bus_idx]; |
3551 | if (!bus) | ||
3552 | return -ENOMEM; | ||
3553 | |||
3552 | /* exclude ioeventfd which is limited by maximum fd */ | 3554 | /* exclude ioeventfd which is limited by maximum fd */ |
3553 | if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) | 3555 | if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) |
3554 | return -ENOSPC; | 3556 | return -ENOSPC; |
@@ -3568,37 +3570,41 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |||
3568 | } | 3570 | } |
3569 | 3571 | ||
3570 | /* Caller must hold slots_lock. */ | 3572 | /* Caller must hold slots_lock. */ |
3571 | int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, | 3573 | void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
3572 | struct kvm_io_device *dev) | 3574 | struct kvm_io_device *dev) |
3573 | { | 3575 | { |
3574 | int i, r; | 3576 | int i; |
3575 | struct kvm_io_bus *new_bus, *bus; | 3577 | struct kvm_io_bus *new_bus, *bus; |
3576 | 3578 | ||
3577 | bus = kvm->buses[bus_idx]; | 3579 | bus = kvm->buses[bus_idx]; |
3578 | r = -ENOENT; | 3580 | if (!bus) |
3581 | return; | ||
3582 | |||
3579 | for (i = 0; i < bus->dev_count; i++) | 3583 | for (i = 0; i < bus->dev_count; i++) |
3580 | if (bus->range[i].dev == dev) { | 3584 | if (bus->range[i].dev == dev) { |
3581 | r = 0; | ||
3582 | break; | 3585 | break; |
3583 | } | 3586 | } |
3584 | 3587 | ||
3585 | if (r) | 3588 | if (i == bus->dev_count) |
3586 | return r; | 3589 | return; |
3587 | 3590 | ||
3588 | new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) * | 3591 | new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) * |
3589 | sizeof(struct kvm_io_range)), GFP_KERNEL); | 3592 | sizeof(struct kvm_io_range)), GFP_KERNEL); |
3590 | if (!new_bus) | 3593 | if (!new_bus) { |
3591 | return -ENOMEM; | 3594 | pr_err("kvm: failed to shrink bus, removing it completely\n"); |
3595 | goto broken; | ||
3596 | } | ||
3592 | 3597 | ||
3593 | memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range)); | 3598 | memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range)); |
3594 | new_bus->dev_count--; | 3599 | new_bus->dev_count--; |
3595 | memcpy(new_bus->range + i, bus->range + i + 1, | 3600 | memcpy(new_bus->range + i, bus->range + i + 1, |
3596 | (new_bus->dev_count - i) * sizeof(struct kvm_io_range)); | 3601 | (new_bus->dev_count - i) * sizeof(struct kvm_io_range)); |
3597 | 3602 | ||
3603 | broken: | ||
3598 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); | 3604 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); |
3599 | synchronize_srcu_expedited(&kvm->srcu); | 3605 | synchronize_srcu_expedited(&kvm->srcu); |
3600 | kfree(bus); | 3606 | kfree(bus); |
3601 | return r; | 3607 | return; |
3602 | } | 3608 | } |
3603 | 3609 | ||
3604 | struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, | 3610 | struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
@@ -3611,6 +3617,8 @@ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, | |||
3611 | srcu_idx = srcu_read_lock(&kvm->srcu); | 3617 | srcu_idx = srcu_read_lock(&kvm->srcu); |
3612 | 3618 | ||
3613 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | 3619 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); |
3620 | if (!bus) | ||
3621 | goto out_unlock; | ||
3614 | 3622 | ||
3615 | dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1); | 3623 | dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1); |
3616 | if (dev_idx < 0) | 3624 | if (dev_idx < 0) |
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index d32f239eb471..37d9118fd84b 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c | |||
@@ -20,6 +20,10 @@ | |||
20 | #include <linux/vfio.h> | 20 | #include <linux/vfio.h> |
21 | #include "vfio.h" | 21 | #include "vfio.h" |
22 | 22 | ||
23 | #ifdef CONFIG_SPAPR_TCE_IOMMU | ||
24 | #include <asm/kvm_ppc.h> | ||
25 | #endif | ||
26 | |||
23 | struct kvm_vfio_group { | 27 | struct kvm_vfio_group { |
24 | struct list_head node; | 28 | struct list_head node; |
25 | struct vfio_group *vfio_group; | 29 | struct vfio_group *vfio_group; |
@@ -89,6 +93,47 @@ static bool kvm_vfio_group_is_coherent(struct vfio_group *vfio_group) | |||
89 | return ret > 0; | 93 | return ret > 0; |
90 | } | 94 | } |
91 | 95 | ||
96 | #ifdef CONFIG_SPAPR_TCE_IOMMU | ||
97 | static int kvm_vfio_external_user_iommu_id(struct vfio_group *vfio_group) | ||
98 | { | ||
99 | int (*fn)(struct vfio_group *); | ||
100 | int ret = -EINVAL; | ||
101 | |||
102 | fn = symbol_get(vfio_external_user_iommu_id); | ||
103 | if (!fn) | ||
104 | return ret; | ||
105 | |||
106 | ret = fn(vfio_group); | ||
107 | |||
108 | symbol_put(vfio_external_user_iommu_id); | ||
109 | |||
110 | return ret; | ||
111 | } | ||
112 | |||
113 | static struct iommu_group *kvm_vfio_group_get_iommu_group( | ||
114 | struct vfio_group *group) | ||
115 | { | ||
116 | int group_id = kvm_vfio_external_user_iommu_id(group); | ||
117 | |||
118 | if (group_id < 0) | ||
119 | return NULL; | ||
120 | |||
121 | return iommu_group_get_by_id(group_id); | ||
122 | } | ||
123 | |||
124 | static void kvm_spapr_tce_release_vfio_group(struct kvm *kvm, | ||
125 | struct vfio_group *vfio_group) | ||
126 | { | ||
127 | struct iommu_group *grp = kvm_vfio_group_get_iommu_group(vfio_group); | ||
128 | |||
129 | if (WARN_ON_ONCE(!grp)) | ||
130 | return; | ||
131 | |||
132 | kvm_spapr_tce_release_iommu_group(kvm, grp); | ||
133 | iommu_group_put(grp); | ||
134 | } | ||
135 | #endif | ||
136 | |||
92 | /* | 137 | /* |
93 | * Groups can use the same or different IOMMU domains. If the same then | 138 | * Groups can use the same or different IOMMU domains. If the same then |
94 | * adding a new group may change the coherency of groups we've previously | 139 | * adding a new group may change the coherency of groups we've previously |
@@ -211,6 +256,9 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) | |||
211 | 256 | ||
212 | mutex_unlock(&kv->lock); | 257 | mutex_unlock(&kv->lock); |
213 | 258 | ||
259 | #ifdef CONFIG_SPAPR_TCE_IOMMU | ||
260 | kvm_spapr_tce_release_vfio_group(dev->kvm, vfio_group); | ||
261 | #endif | ||
214 | kvm_vfio_group_set_kvm(vfio_group, NULL); | 262 | kvm_vfio_group_set_kvm(vfio_group, NULL); |
215 | 263 | ||
216 | kvm_vfio_group_put_external_user(vfio_group); | 264 | kvm_vfio_group_put_external_user(vfio_group); |
@@ -218,6 +266,57 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) | |||
218 | kvm_vfio_update_coherency(dev); | 266 | kvm_vfio_update_coherency(dev); |
219 | 267 | ||
220 | return ret; | 268 | return ret; |
269 | |||
270 | #ifdef CONFIG_SPAPR_TCE_IOMMU | ||
271 | case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: { | ||
272 | struct kvm_vfio_spapr_tce param; | ||
273 | struct kvm_vfio *kv = dev->private; | ||
274 | struct vfio_group *vfio_group; | ||
275 | struct kvm_vfio_group *kvg; | ||
276 | struct fd f; | ||
277 | struct iommu_group *grp; | ||
278 | |||
279 | if (copy_from_user(¶m, (void __user *)arg, | ||
280 | sizeof(struct kvm_vfio_spapr_tce))) | ||
281 | return -EFAULT; | ||
282 | |||
283 | f = fdget(param.groupfd); | ||
284 | if (!f.file) | ||
285 | return -EBADF; | ||
286 | |||
287 | vfio_group = kvm_vfio_group_get_external_user(f.file); | ||
288 | fdput(f); | ||
289 | |||
290 | if (IS_ERR(vfio_group)) | ||
291 | return PTR_ERR(vfio_group); | ||
292 | |||
293 | grp = kvm_vfio_group_get_iommu_group(vfio_group); | ||
294 | if (WARN_ON_ONCE(!grp)) { | ||
295 | kvm_vfio_group_put_external_user(vfio_group); | ||
296 | return -EIO; | ||
297 | } | ||
298 | |||
299 | ret = -ENOENT; | ||
300 | |||
301 | mutex_lock(&kv->lock); | ||
302 | |||
303 | list_for_each_entry(kvg, &kv->group_list, node) { | ||
304 | if (kvg->vfio_group != vfio_group) | ||
305 | continue; | ||
306 | |||
307 | ret = kvm_spapr_tce_attach_iommu_group(dev->kvm, | ||
308 | param.tablefd, grp); | ||
309 | break; | ||
310 | } | ||
311 | |||
312 | mutex_unlock(&kv->lock); | ||
313 | |||
314 | iommu_group_put(grp); | ||
315 | kvm_vfio_group_put_external_user(vfio_group); | ||
316 | |||
317 | return ret; | ||
318 | } | ||
319 | #endif /* CONFIG_SPAPR_TCE_IOMMU */ | ||
221 | } | 320 | } |
222 | 321 | ||
223 | return -ENXIO; | 322 | return -ENXIO; |
@@ -242,6 +341,9 @@ static int kvm_vfio_has_attr(struct kvm_device *dev, | |||
242 | switch (attr->attr) { | 341 | switch (attr->attr) { |
243 | case KVM_DEV_VFIO_GROUP_ADD: | 342 | case KVM_DEV_VFIO_GROUP_ADD: |
244 | case KVM_DEV_VFIO_GROUP_DEL: | 343 | case KVM_DEV_VFIO_GROUP_DEL: |
344 | #ifdef CONFIG_SPAPR_TCE_IOMMU | ||
345 | case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: | ||
346 | #endif | ||
245 | return 0; | 347 | return 0; |
246 | } | 348 | } |
247 | 349 | ||
@@ -257,6 +359,9 @@ static void kvm_vfio_destroy(struct kvm_device *dev) | |||
257 | struct kvm_vfio_group *kvg, *tmp; | 359 | struct kvm_vfio_group *kvg, *tmp; |
258 | 360 | ||
259 | list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) { | 361 | list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) { |
362 | #ifdef CONFIG_SPAPR_TCE_IOMMU | ||
363 | kvm_spapr_tce_release_vfio_group(dev->kvm, kvg->vfio_group); | ||
364 | #endif | ||
260 | kvm_vfio_group_set_kvm(kvg->vfio_group, NULL); | 365 | kvm_vfio_group_set_kvm(kvg->vfio_group, NULL); |
261 | kvm_vfio_group_put_external_user(kvg->vfio_group); | 366 | kvm_vfio_group_put_external_user(kvg->vfio_group); |
262 | list_del(&kvg->node); | 367 | list_del(&kvg->node); |