aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/virtual/kvm/api.txt374
-rw-r--r--Documentation/virtual/kvm/devices/s390_flic.txt41
-rw-r--r--Documentation/virtual/kvm/devices/vfio.txt18
-rw-r--r--Documentation/virtual/kvm/hypercalls.txt5
-rw-r--r--arch/arm/include/asm/kvm_host.h1
-rw-r--r--arch/arm/include/uapi/asm/kvm.h2
-rw-r--r--arch/arm/kvm/arm.c3
-rw-r--r--arch/arm64/include/asm/kvm_host.h1
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h2
-rw-r--r--arch/mips/Kconfig1
-rw-r--r--arch/mips/include/asm/cpu-features.h10
-rw-r--r--arch/mips/include/asm/cpu-info.h2
-rw-r--r--arch/mips/include/asm/cpu.h1
-rw-r--r--arch/mips/include/asm/kvm_host.h468
-rw-r--r--arch/mips/include/asm/maar.h10
-rw-r--r--arch/mips/include/asm/mipsregs.h62
-rw-r--r--arch/mips/include/asm/tlb.h6
-rw-r--r--arch/mips/include/uapi/asm/inst.h2
-rw-r--r--arch/mips/include/uapi/asm/kvm.h22
-rw-r--r--arch/mips/kernel/cpu-probe.c13
-rw-r--r--arch/mips/kernel/time.c1
-rw-r--r--arch/mips/kvm/Kconfig27
-rw-r--r--arch/mips/kvm/Makefile9
-rw-r--r--arch/mips/kvm/emulate.c500
-rw-r--r--arch/mips/kvm/entry.c132
-rw-r--r--arch/mips/kvm/hypcall.c53
-rw-r--r--arch/mips/kvm/interrupt.h5
-rw-r--r--arch/mips/kvm/mips.c123
-rw-r--r--arch/mips/kvm/mmu.c20
-rw-r--r--arch/mips/kvm/tlb.c441
-rw-r--r--arch/mips/kvm/trace.h74
-rw-r--r--arch/mips/kvm/trap_emul.c73
-rw-r--r--arch/mips/kvm/vz.c3223
-rw-r--r--arch/mips/mm/cache.c1
-rw-r--r--arch/mips/mm/init.c2
-rw-r--r--arch/powerpc/include/asm/disassemble.h5
-rw-r--r--arch/powerpc/include/asm/iommu.h32
-rw-r--r--arch/powerpc/include/asm/kvm_host.h35
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h22
-rw-r--r--arch/powerpc/include/asm/mmu_context.h4
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h58
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h3
-rw-r--r--arch/powerpc/kernel/iommu.c91
-rw-r--r--arch/powerpc/kvm/Kconfig1
-rw-r--r--arch/powerpc/kvm/book3s.c18
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c7
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c315
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c303
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c34
-rw-r--r--arch/powerpc/kvm/book3s_hv.c10
-rw-r--r--arch/powerpc/kvm/book3s_pr.c12
-rw-r--r--arch/powerpc/kvm/booke.c5
-rw-r--r--arch/powerpc/kvm/e500_mmu_host.c5
-rw-r--r--arch/powerpc/kvm/emulate.c8
-rw-r--r--arch/powerpc/kvm/emulate_loadstore.c472
-rw-r--r--arch/powerpc/kvm/powerpc.c325
-rw-r--r--arch/powerpc/mm/mmu_context_iommu.c39
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c46
-rw-r--r--arch/powerpc/platforms/powernv/pci.c1
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c3
-rw-r--r--arch/powerpc/platforms/pseries/vio.c2
-rw-r--r--arch/s390/include/asm/elf.h1
-rw-r--r--arch/s390/include/asm/kvm_host.h42
-rw-r--r--arch/s390/include/asm/lowcore.h9
-rw-r--r--arch/s390/include/asm/nmi.h12
-rw-r--r--arch/s390/include/asm/processor.h5
-rw-r--r--arch/s390/include/asm/sclp.h1
-rw-r--r--arch/s390/include/asm/setup.h2
-rw-r--r--arch/s390/include/asm/switch_to.h3
-rw-r--r--arch/s390/include/asm/thread_info.h12
-rw-r--r--arch/s390/include/uapi/asm/Kbuild1
-rw-r--r--arch/s390/include/uapi/asm/guarded_storage.h77
-rw-r--r--arch/s390/include/uapi/asm/kvm.h26
-rw-r--r--arch/s390/include/uapi/asm/unistd.h2
-rw-r--r--arch/s390/kernel/Makefile2
-rw-r--r--arch/s390/kernel/asm-offsets.c2
-rw-r--r--arch/s390/kernel/compat_wrapper.c1
-rw-r--r--arch/s390/kernel/early.c2
-rw-r--r--arch/s390/kernel/entry.S26
-rw-r--r--arch/s390/kernel/entry.h2
-rw-r--r--arch/s390/kernel/guarded_storage.c128
-rw-r--r--arch/s390/kernel/machine_kexec.c13
-rw-r--r--arch/s390/kernel/nmi.c19
-rw-r--r--arch/s390/kernel/process.c7
-rw-r--r--arch/s390/kernel/processor.c2
-rw-r--r--arch/s390/kernel/ptrace.c86
-rw-r--r--arch/s390/kernel/setup.c18
-rw-r--r--arch/s390/kernel/smp.c43
-rw-r--r--arch/s390/kernel/syscalls.S2
-rw-r--r--arch/s390/kvm/gaccess.c6
-rw-r--r--arch/s390/kvm/intercept.c27
-rw-r--r--arch/s390/kvm/interrupt.c137
-rw-r--r--arch/s390/kvm/kvm-s390.c135
-rw-r--r--arch/s390/kvm/kvm-s390.h4
-rw-r--r--arch/s390/kvm/priv.c50
-rw-r--r--arch/s390/kvm/sthyi.c3
-rw-r--r--arch/s390/kvm/trace-s390.h52
-rw-r--r--arch/s390/kvm/vsie.c78
-rw-r--r--arch/um/include/shared/os.h4
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h8
-rw-r--r--arch/x86/include/asm/kvm_page_track.h1
-rw-r--r--arch/x86/include/asm/msr-index.h11
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/proto.h4
-rw-r--r--arch/x86/include/asm/thread_info.h6
-rw-r--r--arch/x86/include/asm/tlbflush.h10
-rw-r--r--arch/x86/include/asm/vmx.h4
-rw-r--r--arch/x86/include/uapi/asm/kvm.h3
-rw-r--r--arch/x86/include/uapi/asm/prctl.h11
-rw-r--r--arch/x86/include/uapi/asm/vmx.h25
-rw-r--r--arch/x86/kernel/cpu/intel.c40
-rw-r--r--arch/x86/kernel/kvm.c4
-rw-r--r--arch/x86/kernel/process.c151
-rw-r--r--arch/x86/kernel/process_32.c7
-rw-r--r--arch/x86/kernel/process_64.c48
-rw-r--r--arch/x86/kernel/ptrace.c8
-rw-r--r--arch/x86/kvm/Kconfig12
-rw-r--r--arch/x86/kvm/Makefile2
-rw-r--r--arch/x86/kvm/assigned-dev.c1058
-rw-r--r--arch/x86/kvm/assigned-dev.h32
-rw-r--r--arch/x86/kvm/i8259.c75
-rw-r--r--arch/x86/kvm/ioapic.c31
-rw-r--r--arch/x86/kvm/ioapic.h16
-rw-r--r--arch/x86/kvm/iommu.c356
-rw-r--r--arch/x86/kvm/irq.c2
-rw-r--r--arch/x86/kvm/irq.h32
-rw-r--r--arch/x86/kvm/irq_comm.c45
-rw-r--r--arch/x86/kvm/mmu.c4
-rw-r--r--arch/x86/kvm/mmu.h3
-rw-r--r--arch/x86/kvm/page_track.c8
-rw-r--r--arch/x86/kvm/paging_tmpl.h54
-rw-r--r--arch/x86/kvm/svm.c10
-rw-r--r--arch/x86/kvm/vmx.c304
-rw-r--r--arch/x86/kvm/x86.c147
-rw-r--r--arch/x86/um/Makefile2
-rw-r--r--arch/x86/um/asm/ptrace.h2
-rw-r--r--arch/x86/um/os-Linux/prctl.c4
-rw-r--r--arch/x86/um/syscalls_32.c7
-rw-r--r--arch/x86/um/syscalls_64.c20
-rw-r--r--drivers/gpio/gpio-altera-a10sr.c2
-rw-r--r--drivers/gpio/gpio-altera.c26
-rw-r--r--drivers/gpio/gpio-mcp23s08.c65
-rw-r--r--drivers/gpio/gpio-mockup.c7
-rw-r--r--drivers/gpio/gpio-xgene.c13
-rw-r--r--drivers/hid/Kconfig5
-rw-r--r--drivers/hid/hid-chicony.c1
-rw-r--r--drivers/hid/hid-core.c2
-rw-r--r--drivers/hid/hid-corsair.c47
-rw-r--r--drivers/hid/hid-ids.h4
-rw-r--r--drivers/hid/hid-sony.c2
-rw-r--r--drivers/hid/usbhid/hid-quirks.c3
-rw-r--r--drivers/hid/wacom_sys.c4
-rw-r--r--drivers/hid/wacom_wac.c10
-rw-r--r--drivers/ptp/ptp_kvm.c5
-rw-r--r--drivers/remoteproc/Kconfig6
-rw-r--r--drivers/s390/char/sclp_early.c4
-rw-r--r--drivers/scsi/Kconfig14
-rw-r--r--drivers/scsi/hpsa.c53
-rw-r--r--drivers/scsi/hpsa.h1
-rw-r--r--drivers/scsi/hpsa_cmd.h2
-rw-r--r--drivers/scsi/lpfc/lpfc_attr.c4
-rw-r--r--drivers/scsi/lpfc/lpfc_init.c7
-rw-r--r--drivers/scsi/lpfc/lpfc_nvme.c8
-rw-r--r--drivers/scsi/lpfc/lpfc_nvmet.c8
-rw-r--r--drivers/scsi/megaraid/megaraid_sas.h4
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_base.c17
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_fusion.c4
-rw-r--r--drivers/scsi/ufs/ufshcd.c2
-rw-r--r--drivers/tty/serial/st-asc.c11
-rw-r--r--drivers/vfio/vfio_iommu_spapr_tce.c2
-rw-r--r--fs/exec.c1
-rw-r--r--fs/f2fs/debug.c1
-rw-r--r--fs/f2fs/dir.c2
-rw-r--r--fs/f2fs/f2fs.h2
-rw-r--r--fs/f2fs/node.c163
-rw-r--r--fs/f2fs/segment.c6
-rw-r--r--include/linux/compat.h2
-rw-r--r--include/linux/gpio/consumer.h16
-rw-r--r--include/linux/kvm_host.h26
-rw-r--r--include/linux/thread_info.h4
-rw-r--r--include/uapi/linux/elf.h1
-rw-r--r--include/uapi/linux/kvm.h16
-rw-r--r--mm/swap_slots.c2
-rwxr-xr-xscripts/checksyscalls.sh1
-rwxr-xr-xtools/kvm/kvm_stat/kvm_stat381
-rw-r--r--tools/kvm/kvm_stat/kvm_stat.txt26
-rw-r--r--virt/kvm/eventfd.c7
-rw-r--r--virt/kvm/irqchip.c11
-rw-r--r--virt/kvm/kvm_main.c74
-rw-r--r--virt/kvm/vfio.c105
193 files changed, 9190 insertions, 3198 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 3c248f772ae6..e60be91d8036 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -115,12 +115,17 @@ will access the virtual machine's physical address space; offset zero
115corresponds to guest physical address zero. Use of mmap() on a VM fd 115corresponds to guest physical address zero. Use of mmap() on a VM fd
116is discouraged if userspace memory allocation (KVM_CAP_USER_MEMORY) is 116is discouraged if userspace memory allocation (KVM_CAP_USER_MEMORY) is
117available. 117available.
118You most certainly want to use 0 as machine type. 118You probably want to use 0 as machine type.
119 119
120In order to create user controlled virtual machines on S390, check 120In order to create user controlled virtual machines on S390, check
121KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL as 121KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL as
122privileged user (CAP_SYS_ADMIN). 122privileged user (CAP_SYS_ADMIN).
123 123
124To use hardware assisted virtualization on MIPS (VZ ASE) rather than
125the default trap & emulate implementation (which changes the virtual
126memory layout to fit in user mode), check KVM_CAP_MIPS_VZ and use the
127flag KVM_VM_MIPS_VZ.
128
124 129
1254.3 KVM_GET_MSR_INDEX_LIST 1304.3 KVM_GET_MSR_INDEX_LIST
126 131
@@ -1321,130 +1326,6 @@ The flags bitmap is defined as:
1321 /* the host supports the ePAPR idle hcall 1326 /* the host supports the ePAPR idle hcall
1322 #define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) 1327 #define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0)
1323 1328
13244.48 KVM_ASSIGN_PCI_DEVICE (deprecated)
1325
1326Capability: none
1327Architectures: x86
1328Type: vm ioctl
1329Parameters: struct kvm_assigned_pci_dev (in)
1330Returns: 0 on success, -1 on error
1331
1332Assigns a host PCI device to the VM.
1333
1334struct kvm_assigned_pci_dev {
1335 __u32 assigned_dev_id;
1336 __u32 busnr;
1337 __u32 devfn;
1338 __u32 flags;
1339 __u32 segnr;
1340 union {
1341 __u32 reserved[11];
1342 };
1343};
1344
1345The PCI device is specified by the triple segnr, busnr, and devfn.
1346Identification in succeeding service requests is done via assigned_dev_id. The
1347following flags are specified:
1348
1349/* Depends on KVM_CAP_IOMMU */
1350#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
1351/* The following two depend on KVM_CAP_PCI_2_3 */
1352#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
1353#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2)
1354
1355If KVM_DEV_ASSIGN_PCI_2_3 is set, the kernel will manage legacy INTx interrupts
1356via the PCI-2.3-compliant device-level mask, thus enable IRQ sharing with other
1357assigned devices or host devices. KVM_DEV_ASSIGN_MASK_INTX specifies the
1358guest's view on the INTx mask, see KVM_ASSIGN_SET_INTX_MASK for details.
1359
1360The KVM_DEV_ASSIGN_ENABLE_IOMMU flag is a mandatory option to ensure
1361isolation of the device. Usages not specifying this flag are deprecated.
1362
1363Only PCI header type 0 devices with PCI BAR resources are supported by
1364device assignment. The user requesting this ioctl must have read/write
1365access to the PCI sysfs resource files associated with the device.
1366
1367Errors:
1368 ENOTTY: kernel does not support this ioctl
1369
1370 Other error conditions may be defined by individual device types or
1371 have their standard meanings.
1372
1373
13744.49 KVM_DEASSIGN_PCI_DEVICE (deprecated)
1375
1376Capability: none
1377Architectures: x86
1378Type: vm ioctl
1379Parameters: struct kvm_assigned_pci_dev (in)
1380Returns: 0 on success, -1 on error
1381
1382Ends PCI device assignment, releasing all associated resources.
1383
1384See KVM_ASSIGN_PCI_DEVICE for the data structure. Only assigned_dev_id is
1385used in kvm_assigned_pci_dev to identify the device.
1386
1387Errors:
1388 ENOTTY: kernel does not support this ioctl
1389
1390 Other error conditions may be defined by individual device types or
1391 have their standard meanings.
1392
13934.50 KVM_ASSIGN_DEV_IRQ (deprecated)
1394
1395Capability: KVM_CAP_ASSIGN_DEV_IRQ
1396Architectures: x86
1397Type: vm ioctl
1398Parameters: struct kvm_assigned_irq (in)
1399Returns: 0 on success, -1 on error
1400
1401Assigns an IRQ to a passed-through device.
1402
1403struct kvm_assigned_irq {
1404 __u32 assigned_dev_id;
1405 __u32 host_irq; /* ignored (legacy field) */
1406 __u32 guest_irq;
1407 __u32 flags;
1408 union {
1409 __u32 reserved[12];
1410 };
1411};
1412
1413The following flags are defined:
1414
1415#define KVM_DEV_IRQ_HOST_INTX (1 << 0)
1416#define KVM_DEV_IRQ_HOST_MSI (1 << 1)
1417#define KVM_DEV_IRQ_HOST_MSIX (1 << 2)
1418
1419#define KVM_DEV_IRQ_GUEST_INTX (1 << 8)
1420#define KVM_DEV_IRQ_GUEST_MSI (1 << 9)
1421#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10)
1422
1423It is not valid to specify multiple types per host or guest IRQ. However, the
1424IRQ type of host and guest can differ or can even be null.
1425
1426Errors:
1427 ENOTTY: kernel does not support this ioctl
1428
1429 Other error conditions may be defined by individual device types or
1430 have their standard meanings.
1431
1432
14334.51 KVM_DEASSIGN_DEV_IRQ (deprecated)
1434
1435Capability: KVM_CAP_ASSIGN_DEV_IRQ
1436Architectures: x86
1437Type: vm ioctl
1438Parameters: struct kvm_assigned_irq (in)
1439Returns: 0 on success, -1 on error
1440
1441Ends an IRQ assignment to a passed-through device.
1442
1443See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified
1444by assigned_dev_id, flags must correspond to the IRQ type specified on
1445KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed.
1446
1447
14484.52 KVM_SET_GSI_ROUTING 13294.52 KVM_SET_GSI_ROUTING
1449 1330
1450Capability: KVM_CAP_IRQ_ROUTING 1331Capability: KVM_CAP_IRQ_ROUTING
@@ -1531,52 +1412,6 @@ struct kvm_irq_routing_hv_sint {
1531 __u32 sint; 1412 __u32 sint;
1532}; 1413};
1533 1414
15344.53 KVM_ASSIGN_SET_MSIX_NR (deprecated)
1535
1536Capability: none
1537Architectures: x86
1538Type: vm ioctl
1539Parameters: struct kvm_assigned_msix_nr (in)
1540Returns: 0 on success, -1 on error
1541
1542Set the number of MSI-X interrupts for an assigned device. The number is
1543reset again by terminating the MSI-X assignment of the device via
1544KVM_DEASSIGN_DEV_IRQ. Calling this service more than once at any earlier
1545point will fail.
1546
1547struct kvm_assigned_msix_nr {
1548 __u32 assigned_dev_id;
1549 __u16 entry_nr;
1550 __u16 padding;
1551};
1552
1553#define KVM_MAX_MSIX_PER_DEV 256
1554
1555
15564.54 KVM_ASSIGN_SET_MSIX_ENTRY (deprecated)
1557
1558Capability: none
1559Architectures: x86
1560Type: vm ioctl
1561Parameters: struct kvm_assigned_msix_entry (in)
1562Returns: 0 on success, -1 on error
1563
1564Specifies the routing of an MSI-X assigned device interrupt to a GSI. Setting
1565the GSI vector to zero means disabling the interrupt.
1566
1567struct kvm_assigned_msix_entry {
1568 __u32 assigned_dev_id;
1569 __u32 gsi;
1570 __u16 entry; /* The index of entry in the MSI-X table */
1571 __u16 padding[3];
1572};
1573
1574Errors:
1575 ENOTTY: kernel does not support this ioctl
1576
1577 Other error conditions may be defined by individual device types or
1578 have their standard meanings.
1579
1580 1415
15814.55 KVM_SET_TSC_KHZ 14164.55 KVM_SET_TSC_KHZ
1582 1417
@@ -1728,40 +1563,6 @@ should skip processing the bitmap and just invalidate everything. It must
1728be set to the number of set bits in the bitmap. 1563be set to the number of set bits in the bitmap.
1729 1564
1730 1565
17314.61 KVM_ASSIGN_SET_INTX_MASK (deprecated)
1732
1733Capability: KVM_CAP_PCI_2_3
1734Architectures: x86
1735Type: vm ioctl
1736Parameters: struct kvm_assigned_pci_dev (in)
1737Returns: 0 on success, -1 on error
1738
1739Allows userspace to mask PCI INTx interrupts from the assigned device. The
1740kernel will not deliver INTx interrupts to the guest between setting and
1741clearing of KVM_ASSIGN_SET_INTX_MASK via this interface. This enables use of
1742and emulation of PCI 2.3 INTx disable command register behavior.
1743
1744This may be used for both PCI 2.3 devices supporting INTx disable natively and
1745older devices lacking this support. Userspace is responsible for emulating the
1746read value of the INTx disable bit in the guest visible PCI command register.
1747When modifying the INTx disable state, userspace should precede updating the
1748physical device command register by calling this ioctl to inform the kernel of
1749the new intended INTx mask state.
1750
1751Note that the kernel uses the device INTx disable bit to internally manage the
1752device interrupt state for PCI 2.3 devices. Reads of this register may
1753therefore not match the expected value. Writes should always use the guest
1754intended INTx disable value rather than attempting to read-copy-update the
1755current physical device state. Races between user and kernel updates to the
1756INTx disable bit are handled lazily in the kernel. It's possible the device
1757may generate unintended interrupts, but they will not be injected into the
1758guest.
1759
1760See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified
1761by assigned_dev_id. In the flags field, only KVM_DEV_ASSIGN_MASK_INTX is
1762evaluated.
1763
1764
17654.62 KVM_CREATE_SPAPR_TCE 15664.62 KVM_CREATE_SPAPR_TCE
1766 1567
1767Capability: KVM_CAP_SPAPR_TCE 1568Capability: KVM_CAP_SPAPR_TCE
@@ -2068,11 +1869,23 @@ registers, find a list below:
2068 MIPS | KVM_REG_MIPS_CP0_ENTRYLO0 | 64 1869 MIPS | KVM_REG_MIPS_CP0_ENTRYLO0 | 64
2069 MIPS | KVM_REG_MIPS_CP0_ENTRYLO1 | 64 1870 MIPS | KVM_REG_MIPS_CP0_ENTRYLO1 | 64
2070 MIPS | KVM_REG_MIPS_CP0_CONTEXT | 64 1871 MIPS | KVM_REG_MIPS_CP0_CONTEXT | 64
1872 MIPS | KVM_REG_MIPS_CP0_CONTEXTCONFIG| 32
2071 MIPS | KVM_REG_MIPS_CP0_USERLOCAL | 64 1873 MIPS | KVM_REG_MIPS_CP0_USERLOCAL | 64
1874 MIPS | KVM_REG_MIPS_CP0_XCONTEXTCONFIG| 64
2072 MIPS | KVM_REG_MIPS_CP0_PAGEMASK | 32 1875 MIPS | KVM_REG_MIPS_CP0_PAGEMASK | 32
1876 MIPS | KVM_REG_MIPS_CP0_PAGEGRAIN | 32
1877 MIPS | KVM_REG_MIPS_CP0_SEGCTL0 | 64
1878 MIPS | KVM_REG_MIPS_CP0_SEGCTL1 | 64
1879 MIPS | KVM_REG_MIPS_CP0_SEGCTL2 | 64
1880 MIPS | KVM_REG_MIPS_CP0_PWBASE | 64
1881 MIPS | KVM_REG_MIPS_CP0_PWFIELD | 64
1882 MIPS | KVM_REG_MIPS_CP0_PWSIZE | 64
2073 MIPS | KVM_REG_MIPS_CP0_WIRED | 32 1883 MIPS | KVM_REG_MIPS_CP0_WIRED | 32
1884 MIPS | KVM_REG_MIPS_CP0_PWCTL | 32
2074 MIPS | KVM_REG_MIPS_CP0_HWRENA | 32 1885 MIPS | KVM_REG_MIPS_CP0_HWRENA | 32
2075 MIPS | KVM_REG_MIPS_CP0_BADVADDR | 64 1886 MIPS | KVM_REG_MIPS_CP0_BADVADDR | 64
1887 MIPS | KVM_REG_MIPS_CP0_BADINSTR | 32
1888 MIPS | KVM_REG_MIPS_CP0_BADINSTRP | 32
2076 MIPS | KVM_REG_MIPS_CP0_COUNT | 32 1889 MIPS | KVM_REG_MIPS_CP0_COUNT | 32
2077 MIPS | KVM_REG_MIPS_CP0_ENTRYHI | 64 1890 MIPS | KVM_REG_MIPS_CP0_ENTRYHI | 64
2078 MIPS | KVM_REG_MIPS_CP0_COMPARE | 32 1891 MIPS | KVM_REG_MIPS_CP0_COMPARE | 32
@@ -2089,6 +1902,7 @@ registers, find a list below:
2089 MIPS | KVM_REG_MIPS_CP0_CONFIG4 | 32 1902 MIPS | KVM_REG_MIPS_CP0_CONFIG4 | 32
2090 MIPS | KVM_REG_MIPS_CP0_CONFIG5 | 32 1903 MIPS | KVM_REG_MIPS_CP0_CONFIG5 | 32
2091 MIPS | KVM_REG_MIPS_CP0_CONFIG7 | 32 1904 MIPS | KVM_REG_MIPS_CP0_CONFIG7 | 32
1905 MIPS | KVM_REG_MIPS_CP0_XCONTEXT | 64
2092 MIPS | KVM_REG_MIPS_CP0_ERROREPC | 64 1906 MIPS | KVM_REG_MIPS_CP0_ERROREPC | 64
2093 MIPS | KVM_REG_MIPS_CP0_KSCRATCH1 | 64 1907 MIPS | KVM_REG_MIPS_CP0_KSCRATCH1 | 64
2094 MIPS | KVM_REG_MIPS_CP0_KSCRATCH2 | 64 1908 MIPS | KVM_REG_MIPS_CP0_KSCRATCH2 | 64
@@ -2096,6 +1910,7 @@ registers, find a list below:
2096 MIPS | KVM_REG_MIPS_CP0_KSCRATCH4 | 64 1910 MIPS | KVM_REG_MIPS_CP0_KSCRATCH4 | 64
2097 MIPS | KVM_REG_MIPS_CP0_KSCRATCH5 | 64 1911 MIPS | KVM_REG_MIPS_CP0_KSCRATCH5 | 64
2098 MIPS | KVM_REG_MIPS_CP0_KSCRATCH6 | 64 1912 MIPS | KVM_REG_MIPS_CP0_KSCRATCH6 | 64
1913 MIPS | KVM_REG_MIPS_CP0_MAAR(0..63) | 64
2099 MIPS | KVM_REG_MIPS_COUNT_CTL | 64 1914 MIPS | KVM_REG_MIPS_COUNT_CTL | 64
2100 MIPS | KVM_REG_MIPS_COUNT_RESUME | 64 1915 MIPS | KVM_REG_MIPS_COUNT_RESUME | 64
2101 MIPS | KVM_REG_MIPS_COUNT_HZ | 64 1916 MIPS | KVM_REG_MIPS_COUNT_HZ | 64
@@ -2162,6 +1977,10 @@ hardware, host kernel, guest, and whether XPA is present in the guest, i.e.
2162with the RI and XI bits (if they exist) in bits 63 and 62 respectively, and 1977with the RI and XI bits (if they exist) in bits 63 and 62 respectively, and
2163the PFNX field starting at bit 30. 1978the PFNX field starting at bit 30.
2164 1979
1980MIPS MAARs (see KVM_REG_MIPS_CP0_MAAR(*) above) have the following id bit
1981patterns:
1982 0x7030 0000 0001 01 <reg:8>
1983
2165MIPS KVM control registers (see above) have the following id bit patterns: 1984MIPS KVM control registers (see above) have the following id bit patterns:
2166 0x7030 0000 0002 <reg:16> 1985 0x7030 0000 0002 <reg:16>
2167 1986
@@ -3377,6 +3196,69 @@ struct kvm_ppc_resize_hpt {
3377 __u32 pad; 3196 __u32 pad;
3378}; 3197};
3379 3198
31994.104 KVM_X86_GET_MCE_CAP_SUPPORTED
3200
3201Capability: KVM_CAP_MCE
3202Architectures: x86
3203Type: system ioctl
3204Parameters: u64 mce_cap (out)
3205Returns: 0 on success, -1 on error
3206
3207Returns supported MCE capabilities. The u64 mce_cap parameter
3208has the same format as the MSR_IA32_MCG_CAP register. Supported
3209capabilities will have the corresponding bits set.
3210
32114.105 KVM_X86_SETUP_MCE
3212
3213Capability: KVM_CAP_MCE
3214Architectures: x86
3215Type: vcpu ioctl
3216Parameters: u64 mcg_cap (in)
3217Returns: 0 on success,
3218 -EFAULT if u64 mcg_cap cannot be read,
3219 -EINVAL if the requested number of banks is invalid,
3220 -EINVAL if requested MCE capability is not supported.
3221
3222Initializes MCE support for use. The u64 mcg_cap parameter
3223has the same format as the MSR_IA32_MCG_CAP register and
3224specifies which capabilities should be enabled. The maximum
3225supported number of error-reporting banks can be retrieved when
3226checking for KVM_CAP_MCE. The supported capabilities can be
3227retrieved with KVM_X86_GET_MCE_CAP_SUPPORTED.
3228
32294.106 KVM_X86_SET_MCE
3230
3231Capability: KVM_CAP_MCE
3232Architectures: x86
3233Type: vcpu ioctl
3234Parameters: struct kvm_x86_mce (in)
3235Returns: 0 on success,
3236 -EFAULT if struct kvm_x86_mce cannot be read,
3237 -EINVAL if the bank number is invalid,
3238 -EINVAL if VAL bit is not set in status field.
3239
3240Inject a machine check error (MCE) into the guest. The input
3241parameter is:
3242
3243struct kvm_x86_mce {
3244 __u64 status;
3245 __u64 addr;
3246 __u64 misc;
3247 __u64 mcg_status;
3248 __u8 bank;
3249 __u8 pad1[7];
3250 __u64 pad2[3];
3251};
3252
3253If the MCE being reported is an uncorrected error, KVM will
3254inject it as an MCE exception into the guest. If the guest
3255MCG_STATUS register reports that an MCE is in progress, KVM
3256causes an KVM_EXIT_SHUTDOWN vmexit.
3257
3258Otherwise, if the MCE is a corrected error, KVM will just
3259store it in the corresponding bank (provided this bank is
3260not holding a previously reported uncorrected error).
3261
33805. The kvm_run structure 32625. The kvm_run structure
3381------------------------ 3263------------------------
3382 3264
@@ -4101,6 +3983,23 @@ to take care of that.
4101This capability can be enabled dynamically even if VCPUs were already 3983This capability can be enabled dynamically even if VCPUs were already
4102created and are running. 3984created and are running.
4103 3985
39867.9 KVM_CAP_S390_GS
3987
3988Architectures: s390
3989Parameters: none
3990Returns: 0 on success; -EINVAL if the machine does not support
3991 guarded storage; -EBUSY if a VCPU has already been created.
3992
3993Allows use of guarded storage for the KVM guest.
3994
39957.10 KVM_CAP_S390_AIS
3996
3997Architectures: s390
3998Parameters: none
3999
4000Allow use of adapter-interruption suppression.
4001Returns: 0 on success; -EBUSY if a VCPU has already been created.
4002
41048. Other capabilities. 40038. Other capabilities.
4105---------------------- 4004----------------------
4106 4005
@@ -4147,3 +4046,68 @@ This capability, if KVM_CHECK_EXTENSION indicates that it is
4147available, means that that the kernel can support guests using the 4046available, means that that the kernel can support guests using the
4148hashed page table MMU defined in Power ISA V3.00 (as implemented in 4047hashed page table MMU defined in Power ISA V3.00 (as implemented in
4149the POWER9 processor), including in-memory segment tables. 4048the POWER9 processor), including in-memory segment tables.
4049
40508.5 KVM_CAP_MIPS_VZ
4051
4052Architectures: mips
4053
4054This capability, if KVM_CHECK_EXTENSION on the main kvm handle indicates that
4055it is available, means that full hardware assisted virtualization capabilities
4056of the hardware are available for use through KVM. An appropriate
4057KVM_VM_MIPS_* type must be passed to KVM_CREATE_VM to create a VM which
4058utilises it.
4059
4060If KVM_CHECK_EXTENSION on a kvm VM handle indicates that this capability is
4061available, it means that the VM is using full hardware assisted virtualization
4062capabilities of the hardware. This is useful to check after creating a VM with
4063KVM_VM_MIPS_DEFAULT.
4064
4065The value returned by KVM_CHECK_EXTENSION should be compared against known
4066values (see below). All other values are reserved. This is to allow for the
4067possibility of other hardware assisted virtualization implementations which
4068may be incompatible with the MIPS VZ ASE.
4069
4070 0: The trap & emulate implementation is in use to run guest code in user
4071 mode. Guest virtual memory segments are rearranged to fit the guest in the
4072 user mode address space.
4073
4074 1: The MIPS VZ ASE is in use, providing full hardware assisted
4075 virtualization, including standard guest virtual memory segments.
4076
40778.6 KVM_CAP_MIPS_TE
4078
4079Architectures: mips
4080
4081This capability, if KVM_CHECK_EXTENSION on the main kvm handle indicates that
4082it is available, means that the trap & emulate implementation is available to
4083run guest code in user mode, even if KVM_CAP_MIPS_VZ indicates that hardware
4084assisted virtualisation is also available. KVM_VM_MIPS_TE (0) must be passed
4085to KVM_CREATE_VM to create a VM which utilises it.
4086
4087If KVM_CHECK_EXTENSION on a kvm VM handle indicates that this capability is
4088available, it means that the VM is using trap & emulate.
4089
40908.7 KVM_CAP_MIPS_64BIT
4091
4092Architectures: mips
4093
4094This capability indicates the supported architecture type of the guest, i.e. the
4095supported register and address width.
4096
4097The values returned when this capability is checked by KVM_CHECK_EXTENSION on a
4098kvm VM handle correspond roughly to the CP0_Config.AT register field, and should
4099be checked specifically against known values (see below). All other values are
4100reserved.
4101
4102 0: MIPS32 or microMIPS32.
4103 Both registers and addresses are 32-bits wide.
4104 It will only be possible to run 32-bit guest code.
4105
4106 1: MIPS64 or microMIPS64 with access only to 32-bit compatibility segments.
4107 Registers are 64-bits wide, but addresses are 32-bits wide.
4108 64-bit guest code may run but cannot access MIPS64 memory segments.
4109 It will also be possible to run 32-bit guest code.
4110
4111 2: MIPS64 or microMIPS64 with access to all address segments.
4112 Both registers and addresses are 64-bits wide.
4113 It will be possible to run 64-bit or 32-bit guest code.
diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt
index 6b0e115301c8..c2518cea8ab4 100644
--- a/Documentation/virtual/kvm/devices/s390_flic.txt
+++ b/Documentation/virtual/kvm/devices/s390_flic.txt
@@ -14,6 +14,8 @@ FLIC provides support to
14- purge one pending floating I/O interrupt (KVM_DEV_FLIC_CLEAR_IO_IRQ) 14- purge one pending floating I/O interrupt (KVM_DEV_FLIC_CLEAR_IO_IRQ)
15- enable/disable for the guest transparent async page faults 15- enable/disable for the guest transparent async page faults
16- register and modify adapter interrupt sources (KVM_DEV_FLIC_ADAPTER_*) 16- register and modify adapter interrupt sources (KVM_DEV_FLIC_ADAPTER_*)
17- modify AIS (adapter-interruption-suppression) mode state (KVM_DEV_FLIC_AISM)
18- inject adapter interrupts on a specified adapter (KVM_DEV_FLIC_AIRQ_INJECT)
17 19
18Groups: 20Groups:
19 KVM_DEV_FLIC_ENQUEUE 21 KVM_DEV_FLIC_ENQUEUE
@@ -64,12 +66,18 @@ struct kvm_s390_io_adapter {
64 __u8 isc; 66 __u8 isc;
65 __u8 maskable; 67 __u8 maskable;
66 __u8 swap; 68 __u8 swap;
67 __u8 pad; 69 __u8 flags;
68}; 70};
69 71
70 id contains the unique id for the adapter, isc the I/O interruption subclass 72 id contains the unique id for the adapter, isc the I/O interruption subclass
71 to use, maskable whether this adapter may be masked (interrupts turned off) 73 to use, maskable whether this adapter may be masked (interrupts turned off),
72 and swap whether the indicators need to be byte swapped. 74 swap whether the indicators need to be byte swapped, and flags contains
75 further characteristics of the adapter.
76 Currently defined values for 'flags' are:
77 - KVM_S390_ADAPTER_SUPPRESSIBLE: adapter is subject to AIS
78 (adapter-interrupt-suppression) facility. This flag only has an effect if
79 the AIS capability is enabled.
80 Unknown flag values are ignored.
73 81
74 82
75 KVM_DEV_FLIC_ADAPTER_MODIFY 83 KVM_DEV_FLIC_ADAPTER_MODIFY
@@ -101,6 +109,33 @@ struct kvm_s390_io_adapter_req {
101 release a userspace page for the translated address specified in addr 109 release a userspace page for the translated address specified in addr
102 from the list of mappings 110 from the list of mappings
103 111
112 KVM_DEV_FLIC_AISM
113 modify the adapter-interruption-suppression mode for a given isc if the
114 AIS capability is enabled. Takes a kvm_s390_ais_req describing:
115
116struct kvm_s390_ais_req {
117 __u8 isc;
118 __u16 mode;
119};
120
121 isc contains the target I/O interruption subclass, mode the target
122 adapter-interruption-suppression mode. The following modes are
123 currently supported:
124 - KVM_S390_AIS_MODE_ALL: ALL-Interruptions Mode, i.e. airq injection
125 is always allowed;
126 - KVM_S390_AIS_MODE_SINGLE: SINGLE-Interruption Mode, i.e. airq
127 injection is only allowed once and the following adapter interrupts
128 will be suppressed until the mode is set again to ALL-Interruptions
129 or SINGLE-Interruption mode.
130
131 KVM_DEV_FLIC_AIRQ_INJECT
132 Inject adapter interrupts on a specified adapter.
133 attr->attr contains the unique id for the adapter, which allows for
134 adapter-specific checks and actions.
135 For adapters subject to AIS, handle the airq injection suppression for
136 an isc according to the adapter-interruption-suppression mode on condition
137 that the AIS capability is enabled.
138
104Note: The KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR device ioctls executed on 139Note: The KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR device ioctls executed on
105FLIC with an unknown group or attribute gives the error code EINVAL (instead of 140FLIC with an unknown group or attribute gives the error code EINVAL (instead of
106ENXIO, as specified in the API documentation). It is not possible to conclude 141ENXIO, as specified in the API documentation). It is not possible to conclude
diff --git a/Documentation/virtual/kvm/devices/vfio.txt b/Documentation/virtual/kvm/devices/vfio.txt
index ef51740c67ca..528c77c8022c 100644
--- a/Documentation/virtual/kvm/devices/vfio.txt
+++ b/Documentation/virtual/kvm/devices/vfio.txt
@@ -16,7 +16,21 @@ Groups:
16 16
17KVM_DEV_VFIO_GROUP attributes: 17KVM_DEV_VFIO_GROUP attributes:
18 KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking 18 KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking
19 kvm_device_attr.addr points to an int32_t file descriptor
20 for the VFIO group.
19 KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking 21 KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking
22 kvm_device_attr.addr points to an int32_t file descriptor
23 for the VFIO group.
24 KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: attaches a guest visible TCE table
25 allocated by sPAPR KVM.
26 kvm_device_attr.addr points to a struct:
20 27
21For each, kvm_device_attr.addr points to an int32_t file descriptor 28 struct kvm_vfio_spapr_tce {
22for the VFIO group. 29 __s32 groupfd;
30 __s32 tablefd;
31 };
32
33 where
34 @groupfd is a file descriptor for a VFIO group;
35 @tablefd is a file descriptor for a TCE table allocated via
36 KVM_CREATE_SPAPR_TCE.
diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt
index feaaa634f154..a890529c63ed 100644
--- a/Documentation/virtual/kvm/hypercalls.txt
+++ b/Documentation/virtual/kvm/hypercalls.txt
@@ -28,6 +28,11 @@ S390:
28 property inside the device tree's /hypervisor node. 28 property inside the device tree's /hypervisor node.
29 For more information refer to Documentation/virtual/kvm/ppc-pv.txt 29 For more information refer to Documentation/virtual/kvm/ppc-pv.txt
30 30
31MIPS:
32 KVM hypercalls use the HYPCALL instruction with code 0 and the hypercall
33 number in $2 (v0). Up to four arguments may be placed in $4-$7 (a0-a3) and
34 the return value is placed in $2 (v0).
35
31KVM Hypercalls Documentation 36KVM Hypercalls Documentation
32=========================== 37===========================
33The template for each hypercall is: 38The template for each hypercall is:
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 31ee468ce667..de67ce647501 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -30,7 +30,6 @@
30#define __KVM_HAVE_ARCH_INTC_INITIALIZED 30#define __KVM_HAVE_ARCH_INTC_INITIALIZED
31 31
32#define KVM_USER_MEM_SLOTS 32 32#define KVM_USER_MEM_SLOTS 32
33#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
34#define KVM_HAVE_ONE_REG 33#define KVM_HAVE_ONE_REG
35#define KVM_HALT_POLL_NS_DEFAULT 500000 34#define KVM_HALT_POLL_NS_DEFAULT 500000
36 35
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 6ebd3e6a1fd1..254a38cace2a 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -27,6 +27,8 @@
27#define __KVM_HAVE_IRQ_LINE 27#define __KVM_HAVE_IRQ_LINE
28#define __KVM_HAVE_READONLY_MEM 28#define __KVM_HAVE_READONLY_MEM
29 29
30#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
31
30#define KVM_REG_SIZE(id) \ 32#define KVM_REG_SIZE(id) \
31 (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) 33 (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
32 34
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 96dba7cd8be7..e3c8105ada65 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -209,9 +209,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
209 case KVM_CAP_IMMEDIATE_EXIT: 209 case KVM_CAP_IMMEDIATE_EXIT:
210 r = 1; 210 r = 1;
211 break; 211 break;
212 case KVM_CAP_COALESCED_MMIO:
213 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
214 break;
215 case KVM_CAP_ARM_SET_DEVICE_ADDR: 212 case KVM_CAP_ARM_SET_DEVICE_ADDR:
216 r = 1; 213 r = 1;
217 break; 214 break;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index e7705e7bb07b..522e4f60976e 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -31,7 +31,6 @@
31#define __KVM_HAVE_ARCH_INTC_INITIALIZED 31#define __KVM_HAVE_ARCH_INTC_INITIALIZED
32 32
33#define KVM_USER_MEM_SLOTS 512 33#define KVM_USER_MEM_SLOTS 512
34#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
35#define KVM_HALT_POLL_NS_DEFAULT 500000 34#define KVM_HALT_POLL_NS_DEFAULT 500000
36 35
37#include <kvm/arm_vgic.h> 36#include <kvm/arm_vgic.h>
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index c2860358ae3e..aa5ab69c1312 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -39,6 +39,8 @@
39#define __KVM_HAVE_IRQ_LINE 39#define __KVM_HAVE_IRQ_LINE
40#define __KVM_HAVE_READONLY_MEM 40#define __KVM_HAVE_READONLY_MEM
41 41
42#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
43
42#define KVM_REG_SIZE(id) \ 44#define KVM_REG_SIZE(id) \
43 (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) 45 (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
44 46
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index a008a9f03072..0a4adbc326e6 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1687,6 +1687,7 @@ config CPU_CAVIUM_OCTEON
1687 select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN 1687 select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
1688 select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN 1688 select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
1689 select MIPS_L1_CACHE_SHIFT_7 1689 select MIPS_L1_CACHE_SHIFT_7
1690 select HAVE_KVM
1690 help 1691 help
1691 The Cavium Octeon processor is a highly integrated chip containing 1692 The Cavium Octeon processor is a highly integrated chip containing
1692 many ethernet hardware widgets for networking tasks. The processor 1693 many ethernet hardware widgets for networking tasks. The processor
diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index e961c8a7ea66..494d38274142 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -444,6 +444,10 @@
444# define cpu_has_msa 0 444# define cpu_has_msa 0
445#endif 445#endif
446 446
447#ifndef cpu_has_ufr
448# define cpu_has_ufr (cpu_data[0].options & MIPS_CPU_UFR)
449#endif
450
447#ifndef cpu_has_fre 451#ifndef cpu_has_fre
448# define cpu_has_fre (cpu_data[0].options & MIPS_CPU_FRE) 452# define cpu_has_fre (cpu_data[0].options & MIPS_CPU_FRE)
449#endif 453#endif
@@ -528,6 +532,9 @@
528#ifndef cpu_guest_has_htw 532#ifndef cpu_guest_has_htw
529#define cpu_guest_has_htw (cpu_data[0].guest.options & MIPS_CPU_HTW) 533#define cpu_guest_has_htw (cpu_data[0].guest.options & MIPS_CPU_HTW)
530#endif 534#endif
535#ifndef cpu_guest_has_mvh
536#define cpu_guest_has_mvh (cpu_data[0].guest.options & MIPS_CPU_MVH)
537#endif
531#ifndef cpu_guest_has_msa 538#ifndef cpu_guest_has_msa
532#define cpu_guest_has_msa (cpu_data[0].guest.ases & MIPS_ASE_MSA) 539#define cpu_guest_has_msa (cpu_data[0].guest.ases & MIPS_ASE_MSA)
533#endif 540#endif
@@ -543,6 +550,9 @@
543#ifndef cpu_guest_has_maar 550#ifndef cpu_guest_has_maar
544#define cpu_guest_has_maar (cpu_data[0].guest.options & MIPS_CPU_MAAR) 551#define cpu_guest_has_maar (cpu_data[0].guest.options & MIPS_CPU_MAAR)
545#endif 552#endif
553#ifndef cpu_guest_has_userlocal
554#define cpu_guest_has_userlocal (cpu_data[0].guest.options & MIPS_CPU_ULRI)
555#endif
546 556
547/* 557/*
548 * Guest dynamic capabilities 558 * Guest dynamic capabilities
diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h
index edbe2734a1bf..be3b4c25f335 100644
--- a/arch/mips/include/asm/cpu-info.h
+++ b/arch/mips/include/asm/cpu-info.h
@@ -33,6 +33,7 @@ struct guest_info {
33 unsigned long ases_dyn; 33 unsigned long ases_dyn;
34 unsigned long long options; 34 unsigned long long options;
35 unsigned long long options_dyn; 35 unsigned long long options_dyn;
36 int tlbsize;
36 u8 conf; 37 u8 conf;
37 u8 kscratch_mask; 38 u8 kscratch_mask;
38}; 39};
@@ -109,6 +110,7 @@ struct cpuinfo_mips {
109 struct guest_info guest; 110 struct guest_info guest;
110 unsigned int gtoffset_mask; 111 unsigned int gtoffset_mask;
111 unsigned int guestid_mask; 112 unsigned int guestid_mask;
113 unsigned int guestid_cache;
112} __attribute__((aligned(SMP_CACHE_BYTES))); 114} __attribute__((aligned(SMP_CACHE_BYTES)));
113 115
114extern struct cpuinfo_mips cpu_data[]; 116extern struct cpuinfo_mips cpu_data[];
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 9a8372484edc..98f59307e6a3 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -415,6 +415,7 @@ enum cpu_type_enum {
415#define MIPS_CPU_GUESTCTL2 MBIT_ULL(50) /* CPU has VZ GuestCtl2 register */ 415#define MIPS_CPU_GUESTCTL2 MBIT_ULL(50) /* CPU has VZ GuestCtl2 register */
416#define MIPS_CPU_GUESTID MBIT_ULL(51) /* CPU uses VZ ASE GuestID feature */ 416#define MIPS_CPU_GUESTID MBIT_ULL(51) /* CPU uses VZ ASE GuestID feature */
417#define MIPS_CPU_DRG MBIT_ULL(52) /* CPU has VZ Direct Root to Guest (DRG) */ 417#define MIPS_CPU_DRG MBIT_ULL(52) /* CPU has VZ Direct Root to Guest (DRG) */
418#define MIPS_CPU_UFR MBIT_ULL(53) /* CPU supports User mode FR switching */
418 419
419/* 420/*
420 * CPU ASE encodings 421 * CPU ASE encodings
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 05e785fc061d..2998479fd4e8 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -10,6 +10,7 @@
10#ifndef __MIPS_KVM_HOST_H__ 10#ifndef __MIPS_KVM_HOST_H__
11#define __MIPS_KVM_HOST_H__ 11#define __MIPS_KVM_HOST_H__
12 12
13#include <linux/cpumask.h>
13#include <linux/mutex.h> 14#include <linux/mutex.h>
14#include <linux/hrtimer.h> 15#include <linux/hrtimer.h>
15#include <linux/interrupt.h> 16#include <linux/interrupt.h>
@@ -33,12 +34,23 @@
33#define KVM_REG_MIPS_CP0_ENTRYLO0 MIPS_CP0_64(2, 0) 34#define KVM_REG_MIPS_CP0_ENTRYLO0 MIPS_CP0_64(2, 0)
34#define KVM_REG_MIPS_CP0_ENTRYLO1 MIPS_CP0_64(3, 0) 35#define KVM_REG_MIPS_CP0_ENTRYLO1 MIPS_CP0_64(3, 0)
35#define KVM_REG_MIPS_CP0_CONTEXT MIPS_CP0_64(4, 0) 36#define KVM_REG_MIPS_CP0_CONTEXT MIPS_CP0_64(4, 0)
37#define KVM_REG_MIPS_CP0_CONTEXTCONFIG MIPS_CP0_32(4, 1)
36#define KVM_REG_MIPS_CP0_USERLOCAL MIPS_CP0_64(4, 2) 38#define KVM_REG_MIPS_CP0_USERLOCAL MIPS_CP0_64(4, 2)
39#define KVM_REG_MIPS_CP0_XCONTEXTCONFIG MIPS_CP0_64(4, 3)
37#define KVM_REG_MIPS_CP0_PAGEMASK MIPS_CP0_32(5, 0) 40#define KVM_REG_MIPS_CP0_PAGEMASK MIPS_CP0_32(5, 0)
38#define KVM_REG_MIPS_CP0_PAGEGRAIN MIPS_CP0_32(5, 1) 41#define KVM_REG_MIPS_CP0_PAGEGRAIN MIPS_CP0_32(5, 1)
42#define KVM_REG_MIPS_CP0_SEGCTL0 MIPS_CP0_64(5, 2)
43#define KVM_REG_MIPS_CP0_SEGCTL1 MIPS_CP0_64(5, 3)
44#define KVM_REG_MIPS_CP0_SEGCTL2 MIPS_CP0_64(5, 4)
45#define KVM_REG_MIPS_CP0_PWBASE MIPS_CP0_64(5, 5)
46#define KVM_REG_MIPS_CP0_PWFIELD MIPS_CP0_64(5, 6)
47#define KVM_REG_MIPS_CP0_PWSIZE MIPS_CP0_64(5, 7)
39#define KVM_REG_MIPS_CP0_WIRED MIPS_CP0_32(6, 0) 48#define KVM_REG_MIPS_CP0_WIRED MIPS_CP0_32(6, 0)
49#define KVM_REG_MIPS_CP0_PWCTL MIPS_CP0_32(6, 6)
40#define KVM_REG_MIPS_CP0_HWRENA MIPS_CP0_32(7, 0) 50#define KVM_REG_MIPS_CP0_HWRENA MIPS_CP0_32(7, 0)
41#define KVM_REG_MIPS_CP0_BADVADDR MIPS_CP0_64(8, 0) 51#define KVM_REG_MIPS_CP0_BADVADDR MIPS_CP0_64(8, 0)
52#define KVM_REG_MIPS_CP0_BADINSTR MIPS_CP0_32(8, 1)
53#define KVM_REG_MIPS_CP0_BADINSTRP MIPS_CP0_32(8, 2)
42#define KVM_REG_MIPS_CP0_COUNT MIPS_CP0_32(9, 0) 54#define KVM_REG_MIPS_CP0_COUNT MIPS_CP0_32(9, 0)
43#define KVM_REG_MIPS_CP0_ENTRYHI MIPS_CP0_64(10, 0) 55#define KVM_REG_MIPS_CP0_ENTRYHI MIPS_CP0_64(10, 0)
44#define KVM_REG_MIPS_CP0_COMPARE MIPS_CP0_32(11, 0) 56#define KVM_REG_MIPS_CP0_COMPARE MIPS_CP0_32(11, 0)
@@ -55,6 +67,7 @@
55#define KVM_REG_MIPS_CP0_CONFIG4 MIPS_CP0_32(16, 4) 67#define KVM_REG_MIPS_CP0_CONFIG4 MIPS_CP0_32(16, 4)
56#define KVM_REG_MIPS_CP0_CONFIG5 MIPS_CP0_32(16, 5) 68#define KVM_REG_MIPS_CP0_CONFIG5 MIPS_CP0_32(16, 5)
57#define KVM_REG_MIPS_CP0_CONFIG7 MIPS_CP0_32(16, 7) 69#define KVM_REG_MIPS_CP0_CONFIG7 MIPS_CP0_32(16, 7)
70#define KVM_REG_MIPS_CP0_MAARI MIPS_CP0_64(17, 2)
58#define KVM_REG_MIPS_CP0_XCONTEXT MIPS_CP0_64(20, 0) 71#define KVM_REG_MIPS_CP0_XCONTEXT MIPS_CP0_64(20, 0)
59#define KVM_REG_MIPS_CP0_ERROREPC MIPS_CP0_64(30, 0) 72#define KVM_REG_MIPS_CP0_ERROREPC MIPS_CP0_64(30, 0)
60#define KVM_REG_MIPS_CP0_KSCRATCH1 MIPS_CP0_64(31, 2) 73#define KVM_REG_MIPS_CP0_KSCRATCH1 MIPS_CP0_64(31, 2)
@@ -70,9 +83,13 @@
70/* memory slots that does not exposed to userspace */ 83/* memory slots that does not exposed to userspace */
71#define KVM_PRIVATE_MEM_SLOTS 0 84#define KVM_PRIVATE_MEM_SLOTS 0
72 85
73#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
74#define KVM_HALT_POLL_NS_DEFAULT 500000 86#define KVM_HALT_POLL_NS_DEFAULT 500000
75 87
88#ifdef CONFIG_KVM_MIPS_VZ
89extern unsigned long GUESTID_MASK;
90extern unsigned long GUESTID_FIRST_VERSION;
91extern unsigned long GUESTID_VERSION_MASK;
92#endif
76 93
77 94
78/* 95/*
@@ -145,6 +162,16 @@ struct kvm_vcpu_stat {
145 u64 fpe_exits; 162 u64 fpe_exits;
146 u64 msa_disabled_exits; 163 u64 msa_disabled_exits;
147 u64 flush_dcache_exits; 164 u64 flush_dcache_exits;
165#ifdef CONFIG_KVM_MIPS_VZ
166 u64 vz_gpsi_exits;
167 u64 vz_gsfc_exits;
168 u64 vz_hc_exits;
169 u64 vz_grr_exits;
170 u64 vz_gva_exits;
171 u64 vz_ghfc_exits;
172 u64 vz_gpa_exits;
173 u64 vz_resvd_exits;
174#endif
148 u64 halt_successful_poll; 175 u64 halt_successful_poll;
149 u64 halt_attempted_poll; 176 u64 halt_attempted_poll;
150 u64 halt_poll_invalid; 177 u64 halt_poll_invalid;
@@ -157,6 +184,8 @@ struct kvm_arch_memory_slot {
157struct kvm_arch { 184struct kvm_arch {
158 /* Guest physical mm */ 185 /* Guest physical mm */
159 struct mm_struct gpa_mm; 186 struct mm_struct gpa_mm;
187 /* Mask of CPUs needing GPA ASID flush */
188 cpumask_t asid_flush_mask;
160}; 189};
161 190
162#define N_MIPS_COPROC_REGS 32 191#define N_MIPS_COPROC_REGS 32
@@ -214,6 +243,11 @@ struct mips_coproc {
214#define MIPS_CP0_CONFIG4_SEL 4 243#define MIPS_CP0_CONFIG4_SEL 4
215#define MIPS_CP0_CONFIG5_SEL 5 244#define MIPS_CP0_CONFIG5_SEL 5
216 245
246#define MIPS_CP0_GUESTCTL2 10
247#define MIPS_CP0_GUESTCTL2_SEL 5
248#define MIPS_CP0_GTOFFSET 12
249#define MIPS_CP0_GTOFFSET_SEL 7
250
217/* Resume Flags */ 251/* Resume Flags */
218#define RESUME_FLAG_DR (1<<0) /* Reload guest nonvolatile state? */ 252#define RESUME_FLAG_DR (1<<0) /* Reload guest nonvolatile state? */
219#define RESUME_FLAG_HOST (1<<1) /* Resume host? */ 253#define RESUME_FLAG_HOST (1<<1) /* Resume host? */
@@ -229,6 +263,7 @@ enum emulation_result {
229 EMULATE_WAIT, /* WAIT instruction */ 263 EMULATE_WAIT, /* WAIT instruction */
230 EMULATE_PRIV_FAIL, 264 EMULATE_PRIV_FAIL,
231 EMULATE_EXCEPT, /* A guest exception has been generated */ 265 EMULATE_EXCEPT, /* A guest exception has been generated */
266 EMULATE_HYPERCALL, /* HYPCALL instruction */
232}; 267};
233 268
234#define mips3_paddr_to_tlbpfn(x) \ 269#define mips3_paddr_to_tlbpfn(x) \
@@ -276,13 +311,18 @@ struct kvm_mmu_memory_cache {
276struct kvm_vcpu_arch { 311struct kvm_vcpu_arch {
277 void *guest_ebase; 312 void *guest_ebase;
278 int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu); 313 int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu);
314
315 /* Host registers preserved across guest mode execution */
279 unsigned long host_stack; 316 unsigned long host_stack;
280 unsigned long host_gp; 317 unsigned long host_gp;
318 unsigned long host_pgd;
319 unsigned long host_entryhi;
281 320
282 /* Host CP0 registers used when handling exits from guest */ 321 /* Host CP0 registers used when handling exits from guest */
283 unsigned long host_cp0_badvaddr; 322 unsigned long host_cp0_badvaddr;
284 unsigned long host_cp0_epc; 323 unsigned long host_cp0_epc;
285 u32 host_cp0_cause; 324 u32 host_cp0_cause;
325 u32 host_cp0_guestctl0;
286 u32 host_cp0_badinstr; 326 u32 host_cp0_badinstr;
287 u32 host_cp0_badinstrp; 327 u32 host_cp0_badinstrp;
288 328
@@ -340,7 +380,23 @@ struct kvm_vcpu_arch {
340 /* Cache some mmu pages needed inside spinlock regions */ 380 /* Cache some mmu pages needed inside spinlock regions */
341 struct kvm_mmu_memory_cache mmu_page_cache; 381 struct kvm_mmu_memory_cache mmu_page_cache;
342 382
383#ifdef CONFIG_KVM_MIPS_VZ
384 /* vcpu's vzguestid is different on each host cpu in an smp system */
385 u32 vzguestid[NR_CPUS];
386
387 /* wired guest TLB entries */
388 struct kvm_mips_tlb *wired_tlb;
389 unsigned int wired_tlb_limit;
390 unsigned int wired_tlb_used;
391
392 /* emulated guest MAAR registers */
393 unsigned long maar[6];
394#endif
395
396 /* Last CPU the VCPU state was loaded on */
343 int last_sched_cpu; 397 int last_sched_cpu;
398 /* Last CPU the VCPU actually executed guest code on */
399 int last_exec_cpu;
344 400
345 /* WAIT executed */ 401 /* WAIT executed */
346 int wait; 402 int wait;
@@ -349,78 +405,6 @@ struct kvm_vcpu_arch {
349 u8 msa_enabled; 405 u8 msa_enabled;
350}; 406};
351 407
352
353#define kvm_read_c0_guest_index(cop0) (cop0->reg[MIPS_CP0_TLB_INDEX][0])
354#define kvm_write_c0_guest_index(cop0, val) (cop0->reg[MIPS_CP0_TLB_INDEX][0] = val)
355#define kvm_read_c0_guest_entrylo0(cop0) (cop0->reg[MIPS_CP0_TLB_LO0][0])
356#define kvm_write_c0_guest_entrylo0(cop0, val) (cop0->reg[MIPS_CP0_TLB_LO0][0] = (val))
357#define kvm_read_c0_guest_entrylo1(cop0) (cop0->reg[MIPS_CP0_TLB_LO1][0])
358#define kvm_write_c0_guest_entrylo1(cop0, val) (cop0->reg[MIPS_CP0_TLB_LO1][0] = (val))
359#define kvm_read_c0_guest_context(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0])
360#define kvm_write_c0_guest_context(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0] = (val))
361#define kvm_read_c0_guest_userlocal(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2])
362#define kvm_write_c0_guest_userlocal(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2] = (val))
363#define kvm_read_c0_guest_pagemask(cop0) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0])
364#define kvm_write_c0_guest_pagemask(cop0, val) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0] = (val))
365#define kvm_read_c0_guest_wired(cop0) (cop0->reg[MIPS_CP0_TLB_WIRED][0])
366#define kvm_write_c0_guest_wired(cop0, val) (cop0->reg[MIPS_CP0_TLB_WIRED][0] = (val))
367#define kvm_read_c0_guest_hwrena(cop0) (cop0->reg[MIPS_CP0_HWRENA][0])
368#define kvm_write_c0_guest_hwrena(cop0, val) (cop0->reg[MIPS_CP0_HWRENA][0] = (val))
369#define kvm_read_c0_guest_badvaddr(cop0) (cop0->reg[MIPS_CP0_BAD_VADDR][0])
370#define kvm_write_c0_guest_badvaddr(cop0, val) (cop0->reg[MIPS_CP0_BAD_VADDR][0] = (val))
371#define kvm_read_c0_guest_count(cop0) (cop0->reg[MIPS_CP0_COUNT][0])
372#define kvm_write_c0_guest_count(cop0, val) (cop0->reg[MIPS_CP0_COUNT][0] = (val))
373#define kvm_read_c0_guest_entryhi(cop0) (cop0->reg[MIPS_CP0_TLB_HI][0])
374#define kvm_write_c0_guest_entryhi(cop0, val) (cop0->reg[MIPS_CP0_TLB_HI][0] = (val))
375#define kvm_read_c0_guest_compare(cop0) (cop0->reg[MIPS_CP0_COMPARE][0])
376#define kvm_write_c0_guest_compare(cop0, val) (cop0->reg[MIPS_CP0_COMPARE][0] = (val))
377#define kvm_read_c0_guest_status(cop0) (cop0->reg[MIPS_CP0_STATUS][0])
378#define kvm_write_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] = (val))
379#define kvm_read_c0_guest_intctl(cop0) (cop0->reg[MIPS_CP0_STATUS][1])
380#define kvm_write_c0_guest_intctl(cop0, val) (cop0->reg[MIPS_CP0_STATUS][1] = (val))
381#define kvm_read_c0_guest_cause(cop0) (cop0->reg[MIPS_CP0_CAUSE][0])
382#define kvm_write_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] = (val))
383#define kvm_read_c0_guest_epc(cop0) (cop0->reg[MIPS_CP0_EXC_PC][0])
384#define kvm_write_c0_guest_epc(cop0, val) (cop0->reg[MIPS_CP0_EXC_PC][0] = (val))
385#define kvm_read_c0_guest_prid(cop0) (cop0->reg[MIPS_CP0_PRID][0])
386#define kvm_write_c0_guest_prid(cop0, val) (cop0->reg[MIPS_CP0_PRID][0] = (val))
387#define kvm_read_c0_guest_ebase(cop0) (cop0->reg[MIPS_CP0_PRID][1])
388#define kvm_write_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] = (val))
389#define kvm_read_c0_guest_config(cop0) (cop0->reg[MIPS_CP0_CONFIG][0])
390#define kvm_read_c0_guest_config1(cop0) (cop0->reg[MIPS_CP0_CONFIG][1])
391#define kvm_read_c0_guest_config2(cop0) (cop0->reg[MIPS_CP0_CONFIG][2])
392#define kvm_read_c0_guest_config3(cop0) (cop0->reg[MIPS_CP0_CONFIG][3])
393#define kvm_read_c0_guest_config4(cop0) (cop0->reg[MIPS_CP0_CONFIG][4])
394#define kvm_read_c0_guest_config5(cop0) (cop0->reg[MIPS_CP0_CONFIG][5])
395#define kvm_read_c0_guest_config7(cop0) (cop0->reg[MIPS_CP0_CONFIG][7])
396#define kvm_write_c0_guest_config(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][0] = (val))
397#define kvm_write_c0_guest_config1(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][1] = (val))
398#define kvm_write_c0_guest_config2(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][2] = (val))
399#define kvm_write_c0_guest_config3(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][3] = (val))
400#define kvm_write_c0_guest_config4(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][4] = (val))
401#define kvm_write_c0_guest_config5(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][5] = (val))
402#define kvm_write_c0_guest_config7(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][7] = (val))
403#define kvm_read_c0_guest_errorepc(cop0) (cop0->reg[MIPS_CP0_ERROR_PC][0])
404#define kvm_write_c0_guest_errorepc(cop0, val) (cop0->reg[MIPS_CP0_ERROR_PC][0] = (val))
405#define kvm_read_c0_guest_kscratch1(cop0) (cop0->reg[MIPS_CP0_DESAVE][2])
406#define kvm_read_c0_guest_kscratch2(cop0) (cop0->reg[MIPS_CP0_DESAVE][3])
407#define kvm_read_c0_guest_kscratch3(cop0) (cop0->reg[MIPS_CP0_DESAVE][4])
408#define kvm_read_c0_guest_kscratch4(cop0) (cop0->reg[MIPS_CP0_DESAVE][5])
409#define kvm_read_c0_guest_kscratch5(cop0) (cop0->reg[MIPS_CP0_DESAVE][6])
410#define kvm_read_c0_guest_kscratch6(cop0) (cop0->reg[MIPS_CP0_DESAVE][7])
411#define kvm_write_c0_guest_kscratch1(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][2] = (val))
412#define kvm_write_c0_guest_kscratch2(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][3] = (val))
413#define kvm_write_c0_guest_kscratch3(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][4] = (val))
414#define kvm_write_c0_guest_kscratch4(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][5] = (val))
415#define kvm_write_c0_guest_kscratch5(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][6] = (val))
416#define kvm_write_c0_guest_kscratch6(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][7] = (val))
417
418/*
419 * Some of the guest registers may be modified asynchronously (e.g. from a
420 * hrtimer callback in hard irq context) and therefore need stronger atomicity
421 * guarantees than other registers.
422 */
423
424static inline void _kvm_atomic_set_c0_guest_reg(unsigned long *reg, 408static inline void _kvm_atomic_set_c0_guest_reg(unsigned long *reg,
425 unsigned long val) 409 unsigned long val)
426{ 410{
@@ -471,26 +455,286 @@ static inline void _kvm_atomic_change_c0_guest_reg(unsigned long *reg,
471 } while (unlikely(!temp)); 455 } while (unlikely(!temp));
472} 456}
473 457
474#define kvm_set_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] |= (val)) 458/* Guest register types, used in accessor build below */
475#define kvm_clear_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] &= ~(val)) 459#define __KVMT32 u32
460#define __KVMTl unsigned long
476 461
477/* Cause can be modified asynchronously from hardirq hrtimer callback */ 462/*
478#define kvm_set_c0_guest_cause(cop0, val) \ 463 * __BUILD_KVM_$ops_SAVED(): kvm_$op_sw_gc0_$reg()
479 _kvm_atomic_set_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val) 464 * These operate on the saved guest C0 state in RAM.
480#define kvm_clear_c0_guest_cause(cop0, val) \ 465 */
481 _kvm_atomic_clear_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val) 466
482#define kvm_change_c0_guest_cause(cop0, change, val) \ 467/* Generate saved context simple accessors */
483 _kvm_atomic_change_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], \ 468#define __BUILD_KVM_RW_SAVED(name, type, _reg, sel) \
484 change, val) 469static inline __KVMT##type kvm_read_sw_gc0_##name(struct mips_coproc *cop0) \
485 470{ \
486#define kvm_set_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] |= (val)) 471 return cop0->reg[(_reg)][(sel)]; \
487#define kvm_clear_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] &= ~(val)) 472} \
488#define kvm_change_c0_guest_ebase(cop0, change, val) \ 473static inline void kvm_write_sw_gc0_##name(struct mips_coproc *cop0, \
474 __KVMT##type val) \
475{ \
476 cop0->reg[(_reg)][(sel)] = val; \
477}
478
479/* Generate saved context bitwise modifiers */
480#define __BUILD_KVM_SET_SAVED(name, type, _reg, sel) \
481static inline void kvm_set_sw_gc0_##name(struct mips_coproc *cop0, \
482 __KVMT##type val) \
483{ \
484 cop0->reg[(_reg)][(sel)] |= val; \
485} \
486static inline void kvm_clear_sw_gc0_##name(struct mips_coproc *cop0, \
487 __KVMT##type val) \
488{ \
489 cop0->reg[(_reg)][(sel)] &= ~val; \
490} \
491static inline void kvm_change_sw_gc0_##name(struct mips_coproc *cop0, \
492 __KVMT##type mask, \
493 __KVMT##type val) \
494{ \
495 unsigned long _mask = mask; \
496 cop0->reg[(_reg)][(sel)] &= ~_mask; \
497 cop0->reg[(_reg)][(sel)] |= val & _mask; \
498}
499
500/* Generate saved context atomic bitwise modifiers */
501#define __BUILD_KVM_ATOMIC_SAVED(name, type, _reg, sel) \
502static inline void kvm_set_sw_gc0_##name(struct mips_coproc *cop0, \
503 __KVMT##type val) \
504{ \
505 _kvm_atomic_set_c0_guest_reg(&cop0->reg[(_reg)][(sel)], val); \
506} \
507static inline void kvm_clear_sw_gc0_##name(struct mips_coproc *cop0, \
508 __KVMT##type val) \
509{ \
510 _kvm_atomic_clear_c0_guest_reg(&cop0->reg[(_reg)][(sel)], val); \
511} \
512static inline void kvm_change_sw_gc0_##name(struct mips_coproc *cop0, \
513 __KVMT##type mask, \
514 __KVMT##type val) \
515{ \
516 _kvm_atomic_change_c0_guest_reg(&cop0->reg[(_reg)][(sel)], mask, \
517 val); \
518}
519
520/*
521 * __BUILD_KVM_$ops_VZ(): kvm_$op_vz_gc0_$reg()
522 * These operate on the VZ guest C0 context in hardware.
523 */
524
525/* Generate VZ guest context simple accessors */
526#define __BUILD_KVM_RW_VZ(name, type, _reg, sel) \
527static inline __KVMT##type kvm_read_vz_gc0_##name(struct mips_coproc *cop0) \
528{ \
529 return read_gc0_##name(); \
530} \
531static inline void kvm_write_vz_gc0_##name(struct mips_coproc *cop0, \
532 __KVMT##type val) \
533{ \
534 write_gc0_##name(val); \
535}
536
537/* Generate VZ guest context bitwise modifiers */
538#define __BUILD_KVM_SET_VZ(name, type, _reg, sel) \
539static inline void kvm_set_vz_gc0_##name(struct mips_coproc *cop0, \
540 __KVMT##type val) \
541{ \
542 set_gc0_##name(val); \
543} \
544static inline void kvm_clear_vz_gc0_##name(struct mips_coproc *cop0, \
545 __KVMT##type val) \
546{ \
547 clear_gc0_##name(val); \
548} \
549static inline void kvm_change_vz_gc0_##name(struct mips_coproc *cop0, \
550 __KVMT##type mask, \
551 __KVMT##type val) \
552{ \
553 change_gc0_##name(mask, val); \
554}
555
556/* Generate VZ guest context save/restore to/from saved context */
557#define __BUILD_KVM_SAVE_VZ(name, _reg, sel) \
558static inline void kvm_restore_gc0_##name(struct mips_coproc *cop0) \
559{ \
560 write_gc0_##name(cop0->reg[(_reg)][(sel)]); \
561} \
562static inline void kvm_save_gc0_##name(struct mips_coproc *cop0) \
563{ \
564 cop0->reg[(_reg)][(sel)] = read_gc0_##name(); \
565}
566
567/*
568 * __BUILD_KVM_$ops_WRAP(): kvm_$op_$name1() -> kvm_$op_$name2()
569 * These wrap a set of operations to provide them with a different name.
570 */
571
572/* Generate simple accessor wrapper */
573#define __BUILD_KVM_RW_WRAP(name1, name2, type) \
574static inline __KVMT##type kvm_read_##name1(struct mips_coproc *cop0) \
575{ \
576 return kvm_read_##name2(cop0); \
577} \
578static inline void kvm_write_##name1(struct mips_coproc *cop0, \
579 __KVMT##type val) \
580{ \
581 kvm_write_##name2(cop0, val); \
582}
583
584/* Generate bitwise modifier wrapper */
585#define __BUILD_KVM_SET_WRAP(name1, name2, type) \
586static inline void kvm_set_##name1(struct mips_coproc *cop0, \
587 __KVMT##type val) \
489{ \ 588{ \
490 kvm_clear_c0_guest_ebase(cop0, change); \ 589 kvm_set_##name2(cop0, val); \
491 kvm_set_c0_guest_ebase(cop0, ((val) & (change))); \ 590} \
591static inline void kvm_clear_##name1(struct mips_coproc *cop0, \
592 __KVMT##type val) \
593{ \
594 kvm_clear_##name2(cop0, val); \
595} \
596static inline void kvm_change_##name1(struct mips_coproc *cop0, \
597 __KVMT##type mask, \
598 __KVMT##type val) \
599{ \
600 kvm_change_##name2(cop0, mask, val); \
492} 601}
493 602
603/*
604 * __BUILD_KVM_$ops_SW(): kvm_$op_c0_guest_$reg() -> kvm_$op_sw_gc0_$reg()
605 * These generate accessors operating on the saved context in RAM, and wrap them
606 * with the common guest C0 accessors (for use by common emulation code).
607 */
608
609#define __BUILD_KVM_RW_SW(name, type, _reg, sel) \
610 __BUILD_KVM_RW_SAVED(name, type, _reg, sel) \
611 __BUILD_KVM_RW_WRAP(c0_guest_##name, sw_gc0_##name, type)
612
613#define __BUILD_KVM_SET_SW(name, type, _reg, sel) \
614 __BUILD_KVM_SET_SAVED(name, type, _reg, sel) \
615 __BUILD_KVM_SET_WRAP(c0_guest_##name, sw_gc0_##name, type)
616
617#define __BUILD_KVM_ATOMIC_SW(name, type, _reg, sel) \
618 __BUILD_KVM_ATOMIC_SAVED(name, type, _reg, sel) \
619 __BUILD_KVM_SET_WRAP(c0_guest_##name, sw_gc0_##name, type)
620
621#ifndef CONFIG_KVM_MIPS_VZ
622
623/*
624 * T&E (trap & emulate software based virtualisation)
625 * We generate the common accessors operating exclusively on the saved context
626 * in RAM.
627 */
628
629#define __BUILD_KVM_RW_HW __BUILD_KVM_RW_SW
630#define __BUILD_KVM_SET_HW __BUILD_KVM_SET_SW
631#define __BUILD_KVM_ATOMIC_HW __BUILD_KVM_ATOMIC_SW
632
633#else
634
635/*
636 * VZ (hardware assisted virtualisation)
637 * These macros use the active guest state in VZ mode (hardware registers),
638 */
639
640/*
641 * __BUILD_KVM_$ops_HW(): kvm_$op_c0_guest_$reg() -> kvm_$op_vz_gc0_$reg()
642 * These generate accessors operating on the VZ guest context in hardware, and
643 * wrap them with the common guest C0 accessors (for use by common emulation
644 * code).
645 *
646 * Accessors operating on the saved context in RAM are also generated to allow
647 * convenient explicit saving and restoring of the state.
648 */
649
650#define __BUILD_KVM_RW_HW(name, type, _reg, sel) \
651 __BUILD_KVM_RW_SAVED(name, type, _reg, sel) \
652 __BUILD_KVM_RW_VZ(name, type, _reg, sel) \
653 __BUILD_KVM_RW_WRAP(c0_guest_##name, vz_gc0_##name, type) \
654 __BUILD_KVM_SAVE_VZ(name, _reg, sel)
655
656#define __BUILD_KVM_SET_HW(name, type, _reg, sel) \
657 __BUILD_KVM_SET_SAVED(name, type, _reg, sel) \
658 __BUILD_KVM_SET_VZ(name, type, _reg, sel) \
659 __BUILD_KVM_SET_WRAP(c0_guest_##name, vz_gc0_##name, type)
660
661/*
662 * We can't do atomic modifications of COP0 state if hardware can modify it.
663 * Races must be handled explicitly.
664 */
665#define __BUILD_KVM_ATOMIC_HW __BUILD_KVM_SET_HW
666
667#endif
668
669/*
670 * Define accessors for CP0 registers that are accessible to the guest. These
671 * are primarily used by common emulation code, which may need to access the
672 * registers differently depending on the implementation.
673 *
674 * fns_hw/sw name type reg num select
675 */
676__BUILD_KVM_RW_HW(index, 32, MIPS_CP0_TLB_INDEX, 0)
677__BUILD_KVM_RW_HW(entrylo0, l, MIPS_CP0_TLB_LO0, 0)
678__BUILD_KVM_RW_HW(entrylo1, l, MIPS_CP0_TLB_LO1, 0)
679__BUILD_KVM_RW_HW(context, l, MIPS_CP0_TLB_CONTEXT, 0)
680__BUILD_KVM_RW_HW(contextconfig, 32, MIPS_CP0_TLB_CONTEXT, 1)
681__BUILD_KVM_RW_HW(userlocal, l, MIPS_CP0_TLB_CONTEXT, 2)
682__BUILD_KVM_RW_HW(xcontextconfig, l, MIPS_CP0_TLB_CONTEXT, 3)
683__BUILD_KVM_RW_HW(pagemask, l, MIPS_CP0_TLB_PG_MASK, 0)
684__BUILD_KVM_RW_HW(pagegrain, 32, MIPS_CP0_TLB_PG_MASK, 1)
685__BUILD_KVM_RW_HW(segctl0, l, MIPS_CP0_TLB_PG_MASK, 2)
686__BUILD_KVM_RW_HW(segctl1, l, MIPS_CP0_TLB_PG_MASK, 3)
687__BUILD_KVM_RW_HW(segctl2, l, MIPS_CP0_TLB_PG_MASK, 4)
688__BUILD_KVM_RW_HW(pwbase, l, MIPS_CP0_TLB_PG_MASK, 5)
689__BUILD_KVM_RW_HW(pwfield, l, MIPS_CP0_TLB_PG_MASK, 6)
690__BUILD_KVM_RW_HW(pwsize, l, MIPS_CP0_TLB_PG_MASK, 7)
691__BUILD_KVM_RW_HW(wired, 32, MIPS_CP0_TLB_WIRED, 0)
692__BUILD_KVM_RW_HW(pwctl, 32, MIPS_CP0_TLB_WIRED, 6)
693__BUILD_KVM_RW_HW(hwrena, 32, MIPS_CP0_HWRENA, 0)
694__BUILD_KVM_RW_HW(badvaddr, l, MIPS_CP0_BAD_VADDR, 0)
695__BUILD_KVM_RW_HW(badinstr, 32, MIPS_CP0_BAD_VADDR, 1)
696__BUILD_KVM_RW_HW(badinstrp, 32, MIPS_CP0_BAD_VADDR, 2)
697__BUILD_KVM_RW_SW(count, 32, MIPS_CP0_COUNT, 0)
698__BUILD_KVM_RW_HW(entryhi, l, MIPS_CP0_TLB_HI, 0)
699__BUILD_KVM_RW_HW(compare, 32, MIPS_CP0_COMPARE, 0)
700__BUILD_KVM_RW_HW(status, 32, MIPS_CP0_STATUS, 0)
701__BUILD_KVM_RW_HW(intctl, 32, MIPS_CP0_STATUS, 1)
702__BUILD_KVM_RW_HW(cause, 32, MIPS_CP0_CAUSE, 0)
703__BUILD_KVM_RW_HW(epc, l, MIPS_CP0_EXC_PC, 0)
704__BUILD_KVM_RW_SW(prid, 32, MIPS_CP0_PRID, 0)
705__BUILD_KVM_RW_HW(ebase, l, MIPS_CP0_PRID, 1)
706__BUILD_KVM_RW_HW(config, 32, MIPS_CP0_CONFIG, 0)
707__BUILD_KVM_RW_HW(config1, 32, MIPS_CP0_CONFIG, 1)
708__BUILD_KVM_RW_HW(config2, 32, MIPS_CP0_CONFIG, 2)
709__BUILD_KVM_RW_HW(config3, 32, MIPS_CP0_CONFIG, 3)
710__BUILD_KVM_RW_HW(config4, 32, MIPS_CP0_CONFIG, 4)
711__BUILD_KVM_RW_HW(config5, 32, MIPS_CP0_CONFIG, 5)
712__BUILD_KVM_RW_HW(config6, 32, MIPS_CP0_CONFIG, 6)
713__BUILD_KVM_RW_HW(config7, 32, MIPS_CP0_CONFIG, 7)
714__BUILD_KVM_RW_SW(maari, l, MIPS_CP0_LLADDR, 2)
715__BUILD_KVM_RW_HW(xcontext, l, MIPS_CP0_TLB_XCONTEXT, 0)
716__BUILD_KVM_RW_HW(errorepc, l, MIPS_CP0_ERROR_PC, 0)
717__BUILD_KVM_RW_HW(kscratch1, l, MIPS_CP0_DESAVE, 2)
718__BUILD_KVM_RW_HW(kscratch2, l, MIPS_CP0_DESAVE, 3)
719__BUILD_KVM_RW_HW(kscratch3, l, MIPS_CP0_DESAVE, 4)
720__BUILD_KVM_RW_HW(kscratch4, l, MIPS_CP0_DESAVE, 5)
721__BUILD_KVM_RW_HW(kscratch5, l, MIPS_CP0_DESAVE, 6)
722__BUILD_KVM_RW_HW(kscratch6, l, MIPS_CP0_DESAVE, 7)
723
724/* Bitwise operations (on HW state) */
725__BUILD_KVM_SET_HW(status, 32, MIPS_CP0_STATUS, 0)
726/* Cause can be modified asynchronously from hardirq hrtimer callback */
727__BUILD_KVM_ATOMIC_HW(cause, 32, MIPS_CP0_CAUSE, 0)
728__BUILD_KVM_SET_HW(ebase, l, MIPS_CP0_PRID, 1)
729
730/* Bitwise operations (on saved state) */
731__BUILD_KVM_SET_SAVED(config, 32, MIPS_CP0_CONFIG, 0)
732__BUILD_KVM_SET_SAVED(config1, 32, MIPS_CP0_CONFIG, 1)
733__BUILD_KVM_SET_SAVED(config2, 32, MIPS_CP0_CONFIG, 2)
734__BUILD_KVM_SET_SAVED(config3, 32, MIPS_CP0_CONFIG, 3)
735__BUILD_KVM_SET_SAVED(config4, 32, MIPS_CP0_CONFIG, 4)
736__BUILD_KVM_SET_SAVED(config5, 32, MIPS_CP0_CONFIG, 5)
737
494/* Helpers */ 738/* Helpers */
495 739
496static inline bool kvm_mips_guest_can_have_fpu(struct kvm_vcpu_arch *vcpu) 740static inline bool kvm_mips_guest_can_have_fpu(struct kvm_vcpu_arch *vcpu)
@@ -531,6 +775,10 @@ struct kvm_mips_callbacks {
531 int (*handle_msa_fpe)(struct kvm_vcpu *vcpu); 775 int (*handle_msa_fpe)(struct kvm_vcpu *vcpu);
532 int (*handle_fpe)(struct kvm_vcpu *vcpu); 776 int (*handle_fpe)(struct kvm_vcpu *vcpu);
533 int (*handle_msa_disabled)(struct kvm_vcpu *vcpu); 777 int (*handle_msa_disabled)(struct kvm_vcpu *vcpu);
778 int (*handle_guest_exit)(struct kvm_vcpu *vcpu);
779 int (*hardware_enable)(void);
780 void (*hardware_disable)(void);
781 int (*check_extension)(struct kvm *kvm, long ext);
534 int (*vcpu_init)(struct kvm_vcpu *vcpu); 782 int (*vcpu_init)(struct kvm_vcpu *vcpu);
535 void (*vcpu_uninit)(struct kvm_vcpu *vcpu); 783 void (*vcpu_uninit)(struct kvm_vcpu *vcpu);
536 int (*vcpu_setup)(struct kvm_vcpu *vcpu); 784 int (*vcpu_setup)(struct kvm_vcpu *vcpu);
@@ -599,6 +847,10 @@ u32 kvm_get_user_asid(struct kvm_vcpu *vcpu);
599 847
600u32 kvm_get_commpage_asid (struct kvm_vcpu *vcpu); 848u32 kvm_get_commpage_asid (struct kvm_vcpu *vcpu);
601 849
850#ifdef CONFIG_KVM_MIPS_VZ
851int kvm_mips_handle_vz_root_tlb_fault(unsigned long badvaddr,
852 struct kvm_vcpu *vcpu, bool write_fault);
853#endif
602extern int kvm_mips_handle_kseg0_tlb_fault(unsigned long badbaddr, 854extern int kvm_mips_handle_kseg0_tlb_fault(unsigned long badbaddr,
603 struct kvm_vcpu *vcpu, 855 struct kvm_vcpu *vcpu,
604 bool write_fault); 856 bool write_fault);
@@ -625,6 +877,18 @@ extern int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long entryhi,
625extern int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, 877extern int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu,
626 unsigned long entryhi); 878 unsigned long entryhi);
627 879
880#ifdef CONFIG_KVM_MIPS_VZ
881int kvm_vz_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long entryhi);
882int kvm_vz_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long gva,
883 unsigned long *gpa);
884void kvm_vz_local_flush_roottlb_all_guests(void);
885void kvm_vz_local_flush_guesttlb_all(void);
886void kvm_vz_save_guesttlb(struct kvm_mips_tlb *buf, unsigned int index,
887 unsigned int count);
888void kvm_vz_load_guesttlb(const struct kvm_mips_tlb *buf, unsigned int index,
889 unsigned int count);
890#endif
891
628void kvm_mips_suspend_mm(int cpu); 892void kvm_mips_suspend_mm(int cpu);
629void kvm_mips_resume_mm(int cpu); 893void kvm_mips_resume_mm(int cpu);
630 894
@@ -795,7 +1059,7 @@ extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
795u32 kvm_mips_read_count(struct kvm_vcpu *vcpu); 1059u32 kvm_mips_read_count(struct kvm_vcpu *vcpu);
796void kvm_mips_write_count(struct kvm_vcpu *vcpu, u32 count); 1060void kvm_mips_write_count(struct kvm_vcpu *vcpu, u32 count);
797void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack); 1061void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack);
798void kvm_mips_init_count(struct kvm_vcpu *vcpu); 1062void kvm_mips_init_count(struct kvm_vcpu *vcpu, unsigned long count_hz);
799int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl); 1063int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl);
800int kvm_mips_set_count_resume(struct kvm_vcpu *vcpu, s64 count_resume); 1064int kvm_mips_set_count_resume(struct kvm_vcpu *vcpu, s64 count_resume);
801int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz); 1065int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz);
@@ -803,6 +1067,20 @@ void kvm_mips_count_enable_cause(struct kvm_vcpu *vcpu);
803void kvm_mips_count_disable_cause(struct kvm_vcpu *vcpu); 1067void kvm_mips_count_disable_cause(struct kvm_vcpu *vcpu);
804enum hrtimer_restart kvm_mips_count_timeout(struct kvm_vcpu *vcpu); 1068enum hrtimer_restart kvm_mips_count_timeout(struct kvm_vcpu *vcpu);
805 1069
1070/* fairly internal functions requiring some care to use */
1071int kvm_mips_count_disabled(struct kvm_vcpu *vcpu);
1072ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, u32 *count);
1073int kvm_mips_restore_hrtimer(struct kvm_vcpu *vcpu, ktime_t before,
1074 u32 count, int min_drift);
1075
1076#ifdef CONFIG_KVM_MIPS_VZ
1077void kvm_vz_acquire_htimer(struct kvm_vcpu *vcpu);
1078void kvm_vz_lose_htimer(struct kvm_vcpu *vcpu);
1079#else
1080static inline void kvm_vz_acquire_htimer(struct kvm_vcpu *vcpu) {}
1081static inline void kvm_vz_lose_htimer(struct kvm_vcpu *vcpu) {}
1082#endif
1083
806enum emulation_result kvm_mips_check_privilege(u32 cause, 1084enum emulation_result kvm_mips_check_privilege(u32 cause,
807 u32 *opc, 1085 u32 *opc,
808 struct kvm_run *run, 1086 struct kvm_run *run,
@@ -827,11 +1105,20 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
827 struct kvm_run *run, 1105 struct kvm_run *run,
828 struct kvm_vcpu *vcpu); 1106 struct kvm_vcpu *vcpu);
829 1107
1108/* COP0 */
1109enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu);
1110
830unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu); 1111unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu);
831unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu); 1112unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu);
832unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu); 1113unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu);
833unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu); 1114unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu);
834 1115
1116/* Hypercalls (hypcall.c) */
1117
1118enum emulation_result kvm_mips_emul_hypcall(struct kvm_vcpu *vcpu,
1119 union mips_instruction inst);
1120int kvm_mips_handle_hypcall(struct kvm_vcpu *vcpu);
1121
835/* Dynamic binary translation */ 1122/* Dynamic binary translation */
836extern int kvm_mips_trans_cache_index(union mips_instruction inst, 1123extern int kvm_mips_trans_cache_index(union mips_instruction inst,
837 u32 *opc, struct kvm_vcpu *vcpu); 1124 u32 *opc, struct kvm_vcpu *vcpu);
@@ -846,7 +1133,6 @@ extern int kvm_mips_trans_mtc0(union mips_instruction inst, u32 *opc,
846extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu); 1133extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu);
847extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm); 1134extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm);
848 1135
849static inline void kvm_arch_hardware_disable(void) {}
850static inline void kvm_arch_hardware_unsetup(void) {} 1136static inline void kvm_arch_hardware_unsetup(void) {}
851static inline void kvm_arch_sync_events(struct kvm *kvm) {} 1137static inline void kvm_arch_sync_events(struct kvm *kvm) {}
852static inline void kvm_arch_free_memslot(struct kvm *kvm, 1138static inline void kvm_arch_free_memslot(struct kvm *kvm,
diff --git a/arch/mips/include/asm/maar.h b/arch/mips/include/asm/maar.h
index 21d9607c80d7..e10f78befbd9 100644
--- a/arch/mips/include/asm/maar.h
+++ b/arch/mips/include/asm/maar.h
@@ -36,7 +36,7 @@ unsigned platform_maar_init(unsigned num_pairs);
36 * @upper: The highest address that the MAAR pair will affect. Must be 36 * @upper: The highest address that the MAAR pair will affect. Must be
37 * aligned to one byte before a 2^16 byte boundary. 37 * aligned to one byte before a 2^16 byte boundary.
38 * @attrs: The accessibility attributes to program, eg. MIPS_MAAR_S. The 38 * @attrs: The accessibility attributes to program, eg. MIPS_MAAR_S. The
39 * MIPS_MAAR_V attribute will automatically be set. 39 * MIPS_MAAR_VL attribute will automatically be set.
40 * 40 *
41 * Program the pair of MAAR registers specified by idx to apply the attributes 41 * Program the pair of MAAR registers specified by idx to apply the attributes
42 * specified by attrs to the range of addresses from lower to higher. 42 * specified by attrs to the range of addresses from lower to higher.
@@ -49,10 +49,10 @@ static inline void write_maar_pair(unsigned idx, phys_addr_t lower,
49 BUG_ON(((upper & 0xffff) != 0xffff) 49 BUG_ON(((upper & 0xffff) != 0xffff)
50 || ((upper & ~0xffffull) & ~(MIPS_MAAR_ADDR << 4))); 50 || ((upper & ~0xffffull) & ~(MIPS_MAAR_ADDR << 4)));
51 51
52 /* Automatically set MIPS_MAAR_V */ 52 /* Automatically set MIPS_MAAR_VL */
53 attrs |= MIPS_MAAR_V; 53 attrs |= MIPS_MAAR_VL;
54 54
55 /* Write the upper address & attributes (only MIPS_MAAR_V matters) */ 55 /* Write the upper address & attributes (only MIPS_MAAR_VL matters) */
56 write_c0_maari(idx << 1); 56 write_c0_maari(idx << 1);
57 back_to_back_c0_hazard(); 57 back_to_back_c0_hazard();
58 write_c0_maar(((upper >> 4) & MIPS_MAAR_ADDR) | attrs); 58 write_c0_maar(((upper >> 4) & MIPS_MAAR_ADDR) | attrs);
@@ -81,7 +81,7 @@ extern void maar_init(void);
81 * @upper: The highest address that the MAAR pair will affect. Must be 81 * @upper: The highest address that the MAAR pair will affect. Must be
82 * aligned to one byte before a 2^16 byte boundary. 82 * aligned to one byte before a 2^16 byte boundary.
83 * @attrs: The accessibility attributes to program, eg. MIPS_MAAR_S. The 83 * @attrs: The accessibility attributes to program, eg. MIPS_MAAR_S. The
84 * MIPS_MAAR_V attribute will automatically be set. 84 * MIPS_MAAR_VL attribute will automatically be set.
85 * 85 *
86 * Describes the configuration of a pair of Memory Accessibility Attribute 86 * Describes the configuration of a pair of Memory Accessibility Attribute
87 * Registers - applying attributes from attrs to the range of physical 87 * Registers - applying attributes from attrs to the range of physical
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index f8d1d2f1d80d..6875b69f59f7 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -34,8 +34,10 @@
34 */ 34 */
35#ifdef __ASSEMBLY__ 35#ifdef __ASSEMBLY__
36#define _ULCAST_ 36#define _ULCAST_
37#define _U64CAST_
37#else 38#else
38#define _ULCAST_ (unsigned long) 39#define _ULCAST_ (unsigned long)
40#define _U64CAST_ (u64)
39#endif 41#endif
40 42
41/* 43/*
@@ -217,8 +219,10 @@
217/* 219/*
218 * Wired register bits 220 * Wired register bits
219 */ 221 */
220#define MIPSR6_WIRED_LIMIT (_ULCAST_(0xffff) << 16) 222#define MIPSR6_WIRED_LIMIT_SHIFT 16
221#define MIPSR6_WIRED_WIRED (_ULCAST_(0xffff) << 0) 223#define MIPSR6_WIRED_LIMIT (_ULCAST_(0xffff) << MIPSR6_WIRED_LIMIT_SHIFT)
224#define MIPSR6_WIRED_WIRED_SHIFT 0
225#define MIPSR6_WIRED_WIRED (_ULCAST_(0xffff) << MIPSR6_WIRED_WIRED_SHIFT)
222 226
223/* 227/*
224 * Values used for computation of new tlb entries 228 * Values used for computation of new tlb entries
@@ -645,6 +649,7 @@
645#define MIPS_CONF5_LLB (_ULCAST_(1) << 4) 649#define MIPS_CONF5_LLB (_ULCAST_(1) << 4)
646#define MIPS_CONF5_MVH (_ULCAST_(1) << 5) 650#define MIPS_CONF5_MVH (_ULCAST_(1) << 5)
647#define MIPS_CONF5_VP (_ULCAST_(1) << 7) 651#define MIPS_CONF5_VP (_ULCAST_(1) << 7)
652#define MIPS_CONF5_SBRI (_ULCAST_(1) << 6)
648#define MIPS_CONF5_FRE (_ULCAST_(1) << 8) 653#define MIPS_CONF5_FRE (_ULCAST_(1) << 8)
649#define MIPS_CONF5_UFE (_ULCAST_(1) << 9) 654#define MIPS_CONF5_UFE (_ULCAST_(1) << 9)
650#define MIPS_CONF5_MSAEN (_ULCAST_(1) << 27) 655#define MIPS_CONF5_MSAEN (_ULCAST_(1) << 27)
@@ -719,10 +724,14 @@
719#define XLR_PERFCTRL_ALLTHREADS (_ULCAST_(1) << 13) 724#define XLR_PERFCTRL_ALLTHREADS (_ULCAST_(1) << 13)
720 725
721/* MAAR bit definitions */ 726/* MAAR bit definitions */
727#define MIPS_MAAR_VH (_U64CAST_(1) << 63)
722#define MIPS_MAAR_ADDR ((BIT_ULL(BITS_PER_LONG - 12) - 1) << 12) 728#define MIPS_MAAR_ADDR ((BIT_ULL(BITS_PER_LONG - 12) - 1) << 12)
723#define MIPS_MAAR_ADDR_SHIFT 12 729#define MIPS_MAAR_ADDR_SHIFT 12
724#define MIPS_MAAR_S (_ULCAST_(1) << 1) 730#define MIPS_MAAR_S (_ULCAST_(1) << 1)
725#define MIPS_MAAR_V (_ULCAST_(1) << 0) 731#define MIPS_MAAR_VL (_ULCAST_(1) << 0)
732
733/* MAARI bit definitions */
734#define MIPS_MAARI_INDEX (_ULCAST_(0x3f) << 0)
726 735
727/* EBase bit definitions */ 736/* EBase bit definitions */
728#define MIPS_EBASE_CPUNUM_SHIFT 0 737#define MIPS_EBASE_CPUNUM_SHIFT 0
@@ -736,6 +745,10 @@
736#define MIPS_CMGCRB_BASE 11 745#define MIPS_CMGCRB_BASE 11
737#define MIPS_CMGCRF_BASE (~_ULCAST_((1 << MIPS_CMGCRB_BASE) - 1)) 746#define MIPS_CMGCRF_BASE (~_ULCAST_((1 << MIPS_CMGCRB_BASE) - 1))
738 747
748/* LLAddr bit definitions */
749#define MIPS_LLADDR_LLB_SHIFT 0
750#define MIPS_LLADDR_LLB (_ULCAST_(1) << MIPS_LLADDR_LLB_SHIFT)
751
739/* 752/*
740 * Bits in the MIPS32 Memory Segmentation registers. 753 * Bits in the MIPS32 Memory Segmentation registers.
741 */ 754 */
@@ -961,6 +974,22 @@
961/* Flush FTLB */ 974/* Flush FTLB */
962#define LOONGSON_DIAG_FTLB (_ULCAST_(1) << 13) 975#define LOONGSON_DIAG_FTLB (_ULCAST_(1) << 13)
963 976
977/* CvmCtl register field definitions */
978#define CVMCTL_IPPCI_SHIFT 7
979#define CVMCTL_IPPCI (_U64CAST_(0x7) << CVMCTL_IPPCI_SHIFT)
980#define CVMCTL_IPTI_SHIFT 4
981#define CVMCTL_IPTI (_U64CAST_(0x7) << CVMCTL_IPTI_SHIFT)
982
983/* CvmMemCtl2 register field definitions */
984#define CVMMEMCTL2_INHIBITTS (_U64CAST_(1) << 17)
985
986/* CvmVMConfig register field definitions */
987#define CVMVMCONF_DGHT (_U64CAST_(1) << 60)
988#define CVMVMCONF_MMUSIZEM1_S 12
989#define CVMVMCONF_MMUSIZEM1 (_U64CAST_(0xff) << CVMVMCONF_MMUSIZEM1_S)
990#define CVMVMCONF_RMMUSIZEM1_S 0
991#define CVMVMCONF_RMMUSIZEM1 (_U64CAST_(0xff) << CVMVMCONF_RMMUSIZEM1_S)
992
964/* 993/*
965 * Coprocessor 1 (FPU) register names 994 * Coprocessor 1 (FPU) register names
966 */ 995 */
@@ -1720,6 +1749,13 @@ do { \
1720 1749
1721#define read_c0_cvmmemctl() __read_64bit_c0_register($11, 7) 1750#define read_c0_cvmmemctl() __read_64bit_c0_register($11, 7)
1722#define write_c0_cvmmemctl(val) __write_64bit_c0_register($11, 7, val) 1751#define write_c0_cvmmemctl(val) __write_64bit_c0_register($11, 7, val)
1752
1753#define read_c0_cvmmemctl2() __read_64bit_c0_register($16, 6)
1754#define write_c0_cvmmemctl2(val) __write_64bit_c0_register($16, 6, val)
1755
1756#define read_c0_cvmvmconfig() __read_64bit_c0_register($16, 7)
1757#define write_c0_cvmvmconfig(val) __write_64bit_c0_register($16, 7, val)
1758
1723/* 1759/*
1724 * The cacheerr registers are not standardized. On OCTEON, they are 1760 * The cacheerr registers are not standardized. On OCTEON, they are
1725 * 64 bits wide. 1761 * 64 bits wide.
@@ -1989,6 +2025,8 @@ do { \
1989#define read_gc0_epc() __read_ulong_gc0_register(14, 0) 2025#define read_gc0_epc() __read_ulong_gc0_register(14, 0)
1990#define write_gc0_epc(val) __write_ulong_gc0_register(14, 0, val) 2026#define write_gc0_epc(val) __write_ulong_gc0_register(14, 0, val)
1991 2027
2028#define read_gc0_prid() __read_32bit_gc0_register(15, 0)
2029
1992#define read_gc0_ebase() __read_32bit_gc0_register(15, 1) 2030#define read_gc0_ebase() __read_32bit_gc0_register(15, 1)
1993#define write_gc0_ebase(val) __write_32bit_gc0_register(15, 1, val) 2031#define write_gc0_ebase(val) __write_32bit_gc0_register(15, 1, val)
1994 2032
@@ -2012,6 +2050,9 @@ do { \
2012#define write_gc0_config6(val) __write_32bit_gc0_register(16, 6, val) 2050#define write_gc0_config6(val) __write_32bit_gc0_register(16, 6, val)
2013#define write_gc0_config7(val) __write_32bit_gc0_register(16, 7, val) 2051#define write_gc0_config7(val) __write_32bit_gc0_register(16, 7, val)
2014 2052
2053#define read_gc0_lladdr() __read_ulong_gc0_register(17, 0)
2054#define write_gc0_lladdr(val) __write_ulong_gc0_register(17, 0, val)
2055
2015#define read_gc0_watchlo0() __read_ulong_gc0_register(18, 0) 2056#define read_gc0_watchlo0() __read_ulong_gc0_register(18, 0)
2016#define read_gc0_watchlo1() __read_ulong_gc0_register(18, 1) 2057#define read_gc0_watchlo1() __read_ulong_gc0_register(18, 1)
2017#define read_gc0_watchlo2() __read_ulong_gc0_register(18, 2) 2058#define read_gc0_watchlo2() __read_ulong_gc0_register(18, 2)
@@ -2090,6 +2131,19 @@ do { \
2090#define write_gc0_kscratch5(val) __write_ulong_gc0_register(31, 6, val) 2131#define write_gc0_kscratch5(val) __write_ulong_gc0_register(31, 6, val)
2091#define write_gc0_kscratch6(val) __write_ulong_gc0_register(31, 7, val) 2132#define write_gc0_kscratch6(val) __write_ulong_gc0_register(31, 7, val)
2092 2133
2134/* Cavium OCTEON (cnMIPS) */
2135#define read_gc0_cvmcount() __read_ulong_gc0_register(9, 6)
2136#define write_gc0_cvmcount(val) __write_ulong_gc0_register(9, 6, val)
2137
2138#define read_gc0_cvmctl() __read_64bit_gc0_register(9, 7)
2139#define write_gc0_cvmctl(val) __write_64bit_gc0_register(9, 7, val)
2140
2141#define read_gc0_cvmmemctl() __read_64bit_gc0_register(11, 7)
2142#define write_gc0_cvmmemctl(val) __write_64bit_gc0_register(11, 7, val)
2143
2144#define read_gc0_cvmmemctl2() __read_64bit_gc0_register(16, 6)
2145#define write_gc0_cvmmemctl2(val) __write_64bit_gc0_register(16, 6, val)
2146
2093/* 2147/*
2094 * Macros to access the floating point coprocessor control registers 2148 * Macros to access the floating point coprocessor control registers
2095 */ 2149 */
@@ -2696,9 +2750,11 @@ __BUILD_SET_C0(brcm_mode)
2696 */ 2750 */
2697#define __BUILD_SET_GC0(name) __BUILD_SET_COMMON(gc0_##name) 2751#define __BUILD_SET_GC0(name) __BUILD_SET_COMMON(gc0_##name)
2698 2752
2753__BUILD_SET_GC0(wired)
2699__BUILD_SET_GC0(status) 2754__BUILD_SET_GC0(status)
2700__BUILD_SET_GC0(cause) 2755__BUILD_SET_GC0(cause)
2701__BUILD_SET_GC0(ebase) 2756__BUILD_SET_GC0(ebase)
2757__BUILD_SET_GC0(config1)
2702 2758
2703/* 2759/*
2704 * Return low 10 bits of ebase. 2760 * Return low 10 bits of ebase.
diff --git a/arch/mips/include/asm/tlb.h b/arch/mips/include/asm/tlb.h
index dd179fd8acda..939734de4359 100644
--- a/arch/mips/include/asm/tlb.h
+++ b/arch/mips/include/asm/tlb.h
@@ -21,9 +21,11 @@
21 */ 21 */
22#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) 22#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
23 23
24#define UNIQUE_ENTRYHI(idx) \ 24#define _UNIQUE_ENTRYHI(base, idx) \
25 ((CKSEG0 + ((idx) << (PAGE_SHIFT + 1))) | \ 25 (((base) + ((idx) << (PAGE_SHIFT + 1))) | \
26 (cpu_has_tlbinv ? MIPS_ENTRYHI_EHINV : 0)) 26 (cpu_has_tlbinv ? MIPS_ENTRYHI_EHINV : 0))
27#define UNIQUE_ENTRYHI(idx) _UNIQUE_ENTRYHI(CKSEG0, idx)
28#define UNIQUE_GUEST_ENTRYHI(idx) _UNIQUE_ENTRYHI(CKSEG1, idx)
27 29
28static inline unsigned int num_wired_entries(void) 30static inline unsigned int num_wired_entries(void)
29{ 31{
diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h
index 77429d1622b3..b5e46ae872d3 100644
--- a/arch/mips/include/uapi/asm/inst.h
+++ b/arch/mips/include/uapi/asm/inst.h
@@ -179,7 +179,7 @@ enum cop0_coi_func {
179 tlbr_op = 0x01, tlbwi_op = 0x02, 179 tlbr_op = 0x01, tlbwi_op = 0x02,
180 tlbwr_op = 0x06, tlbp_op = 0x08, 180 tlbwr_op = 0x06, tlbp_op = 0x08,
181 rfe_op = 0x10, eret_op = 0x18, 181 rfe_op = 0x10, eret_op = 0x18,
182 wait_op = 0x20, 182 wait_op = 0x20, hypcall_op = 0x28
183}; 183};
184 184
185/* 185/*
diff --git a/arch/mips/include/uapi/asm/kvm.h b/arch/mips/include/uapi/asm/kvm.h
index a8a0199bf760..0318c6b442ab 100644
--- a/arch/mips/include/uapi/asm/kvm.h
+++ b/arch/mips/include/uapi/asm/kvm.h
@@ -21,6 +21,8 @@
21 21
22#define __KVM_HAVE_READONLY_MEM 22#define __KVM_HAVE_READONLY_MEM
23 23
24#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
25
24/* 26/*
25 * for KVM_GET_REGS and KVM_SET_REGS 27 * for KVM_GET_REGS and KVM_SET_REGS
26 * 28 *
@@ -54,9 +56,14 @@ struct kvm_fpu {
54 * Register set = 0: GP registers from kvm_regs (see definitions below). 56 * Register set = 0: GP registers from kvm_regs (see definitions below).
55 * 57 *
56 * Register set = 1: CP0 registers. 58 * Register set = 1: CP0 registers.
57 * bits[15..8] - Must be zero. 59 * bits[15..8] - COP0 register set.
58 * bits[7..3] - Register 'rd' index. 60 *
59 * bits[2..0] - Register 'sel' index. 61 * COP0 register set = 0: Main CP0 registers.
62 * bits[7..3] - Register 'rd' index.
63 * bits[2..0] - Register 'sel' index.
64 *
65 * COP0 register set = 1: MAARs.
66 * bits[7..0] - MAAR index.
60 * 67 *
61 * Register set = 2: KVM specific registers (see definitions below). 68 * Register set = 2: KVM specific registers (see definitions below).
62 * 69 *
@@ -115,6 +122,15 @@ struct kvm_fpu {
115 122
116 123
117/* 124/*
125 * KVM_REG_MIPS_CP0 - Coprocessor 0 registers.
126 */
127
128#define KVM_REG_MIPS_MAAR (KVM_REG_MIPS_CP0 | (1 << 8))
129#define KVM_REG_MIPS_CP0_MAAR(n) (KVM_REG_MIPS_MAAR | \
130 KVM_REG_SIZE_U64 | (n))
131
132
133/*
118 * KVM_REG_MIPS_KVM - KVM specific control registers. 134 * KVM_REG_MIPS_KVM - KVM specific control registers.
119 */ 135 */
120 136
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 07718bb5fc9d..c72a4cda389c 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -289,6 +289,8 @@ static void cpu_set_fpu_opts(struct cpuinfo_mips *c)
289 MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) { 289 MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) {
290 if (c->fpu_id & MIPS_FPIR_3D) 290 if (c->fpu_id & MIPS_FPIR_3D)
291 c->ases |= MIPS_ASE_MIPS3D; 291 c->ases |= MIPS_ASE_MIPS3D;
292 if (c->fpu_id & MIPS_FPIR_UFRP)
293 c->options |= MIPS_CPU_UFR;
292 if (c->fpu_id & MIPS_FPIR_FREP) 294 if (c->fpu_id & MIPS_FPIR_FREP)
293 c->options |= MIPS_CPU_FRE; 295 c->options |= MIPS_CPU_FRE;
294 } 296 }
@@ -1003,7 +1005,8 @@ static inline unsigned int decode_guest_config3(struct cpuinfo_mips *c)
1003 unsigned int config3, config3_dyn; 1005 unsigned int config3, config3_dyn;
1004 1006
1005 probe_gc0_config_dyn(config3, config3, config3_dyn, 1007 probe_gc0_config_dyn(config3, config3, config3_dyn,
1006 MIPS_CONF_M | MIPS_CONF3_MSA | MIPS_CONF3_CTXTC); 1008 MIPS_CONF_M | MIPS_CONF3_MSA | MIPS_CONF3_ULRI |
1009 MIPS_CONF3_CTXTC);
1007 1010
1008 if (config3 & MIPS_CONF3_CTXTC) 1011 if (config3 & MIPS_CONF3_CTXTC)
1009 c->guest.options |= MIPS_CPU_CTXTC; 1012 c->guest.options |= MIPS_CPU_CTXTC;
@@ -1013,6 +1016,9 @@ static inline unsigned int decode_guest_config3(struct cpuinfo_mips *c)
1013 if (config3 & MIPS_CONF3_PW) 1016 if (config3 & MIPS_CONF3_PW)
1014 c->guest.options |= MIPS_CPU_HTW; 1017 c->guest.options |= MIPS_CPU_HTW;
1015 1018
1019 if (config3 & MIPS_CONF3_ULRI)
1020 c->guest.options |= MIPS_CPU_ULRI;
1021
1016 if (config3 & MIPS_CONF3_SC) 1022 if (config3 & MIPS_CONF3_SC)
1017 c->guest.options |= MIPS_CPU_SEGMENTS; 1023 c->guest.options |= MIPS_CPU_SEGMENTS;
1018 1024
@@ -1051,7 +1057,7 @@ static inline unsigned int decode_guest_config5(struct cpuinfo_mips *c)
1051 unsigned int config5, config5_dyn; 1057 unsigned int config5, config5_dyn;
1052 1058
1053 probe_gc0_config_dyn(config5, config5, config5_dyn, 1059 probe_gc0_config_dyn(config5, config5, config5_dyn,
1054 MIPS_CONF_M | MIPS_CONF5_MRP); 1060 MIPS_CONF_M | MIPS_CONF5_MVH | MIPS_CONF5_MRP);
1055 1061
1056 if (config5 & MIPS_CONF5_MRP) 1062 if (config5 & MIPS_CONF5_MRP)
1057 c->guest.options |= MIPS_CPU_MAAR; 1063 c->guest.options |= MIPS_CPU_MAAR;
@@ -1061,6 +1067,9 @@ static inline unsigned int decode_guest_config5(struct cpuinfo_mips *c)
1061 if (config5 & MIPS_CONF5_LLB) 1067 if (config5 & MIPS_CONF5_LLB)
1062 c->guest.options |= MIPS_CPU_RW_LLB; 1068 c->guest.options |= MIPS_CPU_RW_LLB;
1063 1069
1070 if (config5 & MIPS_CONF5_MVH)
1071 c->guest.options |= MIPS_CPU_MVH;
1072
1064 if (config5 & MIPS_CONF_M) 1073 if (config5 & MIPS_CONF_M)
1065 c->guest.conf |= BIT(6); 1074 c->guest.conf |= BIT(6);
1066 return config5 & MIPS_CONF_M; 1075 return config5 & MIPS_CONF_M;
diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
index a7f81261c781..c036157fb891 100644
--- a/arch/mips/kernel/time.c
+++ b/arch/mips/kernel/time.c
@@ -70,6 +70,7 @@ EXPORT_SYMBOL(perf_irq);
70 */ 70 */
71 71
72unsigned int mips_hpt_frequency; 72unsigned int mips_hpt_frequency;
73EXPORT_SYMBOL_GPL(mips_hpt_frequency);
73 74
74/* 75/*
75 * This function exists in order to cause an error due to a duplicate 76 * This function exists in order to cause an error due to a duplicate
diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig
index 65067327db12..50a722dfb236 100644
--- a/arch/mips/kvm/Kconfig
+++ b/arch/mips/kvm/Kconfig
@@ -26,11 +26,34 @@ config KVM
26 select SRCU 26 select SRCU
27 ---help--- 27 ---help---
28 Support for hosting Guest kernels. 28 Support for hosting Guest kernels.
29 Currently supported on MIPS32 processors. 29
30choice
31 prompt "Virtualization mode"
32 depends on KVM
33 default KVM_MIPS_TE
34
35config KVM_MIPS_TE
36 bool "Trap & Emulate"
37 ---help---
38 Use trap and emulate to virtualize 32-bit guests in user mode. This
39 does not require any special hardware Virtualization support beyond
40 standard MIPS32/64 r2 or later, but it does require the guest kernel
41 to be configured with CONFIG_KVM_GUEST=y so that it resides in the
42 user address segment.
43
44config KVM_MIPS_VZ
45 bool "MIPS Virtualization (VZ) ASE"
46 ---help---
47 Use the MIPS Virtualization (VZ) ASE to virtualize guests. This
48 supports running unmodified guest kernels (with CONFIG_KVM_GUEST=n),
49 but requires hardware support.
50
51endchoice
30 52
31config KVM_MIPS_DYN_TRANS 53config KVM_MIPS_DYN_TRANS
32 bool "KVM/MIPS: Dynamic binary translation to reduce traps" 54 bool "KVM/MIPS: Dynamic binary translation to reduce traps"
33 depends on KVM 55 depends on KVM_MIPS_TE
56 default y
34 ---help--- 57 ---help---
35 When running in Trap & Emulate mode patch privileged 58 When running in Trap & Emulate mode patch privileged
36 instructions to reduce the number of traps. 59 instructions to reduce the number of traps.
diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile
index 847429de780d..45d90f5d5177 100644
--- a/arch/mips/kvm/Makefile
+++ b/arch/mips/kvm/Makefile
@@ -9,8 +9,15 @@ common-objs-$(CONFIG_CPU_HAS_MSA) += msa.o
9 9
10kvm-objs := $(common-objs-y) mips.o emulate.o entry.o \ 10kvm-objs := $(common-objs-y) mips.o emulate.o entry.o \
11 interrupt.o stats.o commpage.o \ 11 interrupt.o stats.o commpage.o \
12 dyntrans.o trap_emul.o fpu.o 12 fpu.o
13kvm-objs += hypcall.o
13kvm-objs += mmu.o 14kvm-objs += mmu.o
14 15
16ifdef CONFIG_KVM_MIPS_VZ
17kvm-objs += vz.o
18else
19kvm-objs += dyntrans.o
20kvm-objs += trap_emul.o
21endif
15obj-$(CONFIG_KVM) += kvm.o 22obj-$(CONFIG_KVM) += kvm.o
16obj-y += callback.o tlb.o 23obj-y += callback.o tlb.o
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index d40cfaad4529..34e78a3ee9d7 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -308,7 +308,7 @@ int kvm_get_badinstrp(u32 *opc, struct kvm_vcpu *vcpu, u32 *out)
308 * CP0_Cause.DC bit or the count_ctl.DC bit. 308 * CP0_Cause.DC bit or the count_ctl.DC bit.
309 * 0 otherwise (in which case CP0_Count timer is running). 309 * 0 otherwise (in which case CP0_Count timer is running).
310 */ 310 */
311static inline int kvm_mips_count_disabled(struct kvm_vcpu *vcpu) 311int kvm_mips_count_disabled(struct kvm_vcpu *vcpu)
312{ 312{
313 struct mips_coproc *cop0 = vcpu->arch.cop0; 313 struct mips_coproc *cop0 = vcpu->arch.cop0;
314 314
@@ -467,7 +467,7 @@ u32 kvm_mips_read_count(struct kvm_vcpu *vcpu)
467 * 467 *
468 * Returns: The ktime at the point of freeze. 468 * Returns: The ktime at the point of freeze.
469 */ 469 */
470static ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, u32 *count) 470ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, u32 *count)
471{ 471{
472 ktime_t now; 472 ktime_t now;
473 473
@@ -517,6 +517,82 @@ static void kvm_mips_resume_hrtimer(struct kvm_vcpu *vcpu,
517} 517}
518 518
519/** 519/**
520 * kvm_mips_restore_hrtimer() - Restore hrtimer after a gap, updating expiry.
521 * @vcpu: Virtual CPU.
522 * @before: Time before Count was saved, lower bound of drift calculation.
523 * @count: CP0_Count at point of restore.
524 * @min_drift: Minimum amount of drift permitted before correction.
525 * Must be <= 0.
526 *
527 * Restores the timer from a particular @count, accounting for drift. This can
528 * be used in conjunction with kvm_mips_freeze_timer() when a hardware timer is
529 * to be used for a period of time, but the exact ktime corresponding to the
530 * final Count that must be restored is not known.
531 *
532 * It is gauranteed that a timer interrupt immediately after restore will be
533 * handled, but not if CP0_Compare is exactly at @count. That case should
534 * already be handled when the hardware timer state is saved.
535 *
536 * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is not
537 * stopped).
538 *
539 * Returns: Amount of correction to count_bias due to drift.
540 */
541int kvm_mips_restore_hrtimer(struct kvm_vcpu *vcpu, ktime_t before,
542 u32 count, int min_drift)
543{
544 ktime_t now, count_time;
545 u32 now_count, before_count;
546 u64 delta;
547 int drift, ret = 0;
548
549 /* Calculate expected count at before */
550 before_count = vcpu->arch.count_bias +
551 kvm_mips_ktime_to_count(vcpu, before);
552
553 /*
554 * Detect significantly negative drift, where count is lower than
555 * expected. Some negative drift is expected when hardware counter is
556 * set after kvm_mips_freeze_timer(), and it is harmless to allow the
557 * time to jump forwards a little, within reason. If the drift is too
558 * significant, adjust the bias to avoid a big Guest.CP0_Count jump.
559 */
560 drift = count - before_count;
561 if (drift < min_drift) {
562 count_time = before;
563 vcpu->arch.count_bias += drift;
564 ret = drift;
565 goto resume;
566 }
567
568 /* Calculate expected count right now */
569 now = ktime_get();
570 now_count = vcpu->arch.count_bias + kvm_mips_ktime_to_count(vcpu, now);
571
572 /*
573 * Detect positive drift, where count is higher than expected, and
574 * adjust the bias to avoid guest time going backwards.
575 */
576 drift = count - now_count;
577 if (drift > 0) {
578 count_time = now;
579 vcpu->arch.count_bias += drift;
580 ret = drift;
581 goto resume;
582 }
583
584 /* Subtract nanosecond delta to find ktime when count was read */
585 delta = (u64)(u32)(now_count - count);
586 delta = div_u64(delta * NSEC_PER_SEC, vcpu->arch.count_hz);
587 count_time = ktime_sub_ns(now, delta);
588
589resume:
590 /* Resume using the calculated ktime */
591 kvm_mips_resume_hrtimer(vcpu, count_time, count);
592 return ret;
593}
594
595/**
520 * kvm_mips_write_count() - Modify the count and update timer. 596 * kvm_mips_write_count() - Modify the count and update timer.
521 * @vcpu: Virtual CPU. 597 * @vcpu: Virtual CPU.
522 * @count: Guest CP0_Count value to set. 598 * @count: Guest CP0_Count value to set.
@@ -543,16 +619,15 @@ void kvm_mips_write_count(struct kvm_vcpu *vcpu, u32 count)
543/** 619/**
544 * kvm_mips_init_count() - Initialise timer. 620 * kvm_mips_init_count() - Initialise timer.
545 * @vcpu: Virtual CPU. 621 * @vcpu: Virtual CPU.
622 * @count_hz: Frequency of timer.
546 * 623 *
547 * Initialise the timer to a sensible frequency, namely 100MHz, zero it, and set 624 * Initialise the timer to the specified frequency, zero it, and set it going if
548 * it going if it's enabled. 625 * it's enabled.
549 */ 626 */
550void kvm_mips_init_count(struct kvm_vcpu *vcpu) 627void kvm_mips_init_count(struct kvm_vcpu *vcpu, unsigned long count_hz)
551{ 628{
552 /* 100 MHz */ 629 vcpu->arch.count_hz = count_hz;
553 vcpu->arch.count_hz = 100*1000*1000; 630 vcpu->arch.count_period = div_u64((u64)NSEC_PER_SEC << 32, count_hz);
554 vcpu->arch.count_period = div_u64((u64)NSEC_PER_SEC << 32,
555 vcpu->arch.count_hz);
556 vcpu->arch.count_dyn_bias = 0; 631 vcpu->arch.count_dyn_bias = 0;
557 632
558 /* Starting at 0 */ 633 /* Starting at 0 */
@@ -622,7 +697,9 @@ void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack)
622 struct mips_coproc *cop0 = vcpu->arch.cop0; 697 struct mips_coproc *cop0 = vcpu->arch.cop0;
623 int dc; 698 int dc;
624 u32 old_compare = kvm_read_c0_guest_compare(cop0); 699 u32 old_compare = kvm_read_c0_guest_compare(cop0);
625 ktime_t now; 700 s32 delta = compare - old_compare;
701 u32 cause;
702 ktime_t now = ktime_set(0, 0); /* silence bogus GCC warning */
626 u32 count; 703 u32 count;
627 704
628 /* if unchanged, must just be an ack */ 705 /* if unchanged, must just be an ack */
@@ -634,6 +711,21 @@ void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack)
634 return; 711 return;
635 } 712 }
636 713
714 /*
715 * If guest CP0_Compare moves forward, CP0_GTOffset should be adjusted
716 * too to prevent guest CP0_Count hitting guest CP0_Compare.
717 *
718 * The new GTOffset corresponds to the new value of CP0_Compare, and is
719 * set prior to it being written into the guest context. We disable
720 * preemption until the new value is written to prevent restore of a
721 * GTOffset corresponding to the old CP0_Compare value.
722 */
723 if (IS_ENABLED(CONFIG_KVM_MIPS_VZ) && delta > 0) {
724 preempt_disable();
725 write_c0_gtoffset(compare - read_c0_count());
726 back_to_back_c0_hazard();
727 }
728
637 /* freeze_hrtimer() takes care of timer interrupts <= count */ 729 /* freeze_hrtimer() takes care of timer interrupts <= count */
638 dc = kvm_mips_count_disabled(vcpu); 730 dc = kvm_mips_count_disabled(vcpu);
639 if (!dc) 731 if (!dc)
@@ -641,12 +733,36 @@ void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack)
641 733
642 if (ack) 734 if (ack)
643 kvm_mips_callbacks->dequeue_timer_int(vcpu); 735 kvm_mips_callbacks->dequeue_timer_int(vcpu);
736 else if (IS_ENABLED(CONFIG_KVM_MIPS_VZ))
737 /*
738 * With VZ, writing CP0_Compare acks (clears) CP0_Cause.TI, so
739 * preserve guest CP0_Cause.TI if we don't want to ack it.
740 */
741 cause = kvm_read_c0_guest_cause(cop0);
644 742
645 kvm_write_c0_guest_compare(cop0, compare); 743 kvm_write_c0_guest_compare(cop0, compare);
646 744
745 if (IS_ENABLED(CONFIG_KVM_MIPS_VZ)) {
746 if (delta > 0)
747 preempt_enable();
748
749 back_to_back_c0_hazard();
750
751 if (!ack && cause & CAUSEF_TI)
752 kvm_write_c0_guest_cause(cop0, cause);
753 }
754
647 /* resume_hrtimer() takes care of timer interrupts > count */ 755 /* resume_hrtimer() takes care of timer interrupts > count */
648 if (!dc) 756 if (!dc)
649 kvm_mips_resume_hrtimer(vcpu, now, count); 757 kvm_mips_resume_hrtimer(vcpu, now, count);
758
759 /*
760 * If guest CP0_Compare is moving backward, we delay CP0_GTOffset change
761 * until after the new CP0_Compare is written, otherwise new guest
762 * CP0_Count could hit new guest CP0_Compare.
763 */
764 if (IS_ENABLED(CONFIG_KVM_MIPS_VZ) && delta <= 0)
765 write_c0_gtoffset(compare - read_c0_count());
650} 766}
651 767
652/** 768/**
@@ -857,6 +973,7 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu)
857 ++vcpu->stat.wait_exits; 973 ++vcpu->stat.wait_exits;
858 trace_kvm_exit(vcpu, KVM_TRACE_EXIT_WAIT); 974 trace_kvm_exit(vcpu, KVM_TRACE_EXIT_WAIT);
859 if (!vcpu->arch.pending_exceptions) { 975 if (!vcpu->arch.pending_exceptions) {
976 kvm_vz_lose_htimer(vcpu);
860 vcpu->arch.wait = 1; 977 vcpu->arch.wait = 1;
861 kvm_vcpu_block(vcpu); 978 kvm_vcpu_block(vcpu);
862 979
@@ -873,17 +990,62 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu)
873 return EMULATE_DONE; 990 return EMULATE_DONE;
874} 991}
875 992
876/* 993static void kvm_mips_change_entryhi(struct kvm_vcpu *vcpu,
877 * XXXKYMA: Linux doesn't seem to use TLBR, return EMULATE_FAIL for now so that 994 unsigned long entryhi)
878 * we can catch this, if things ever change 995{
879 */ 996 struct mips_coproc *cop0 = vcpu->arch.cop0;
997 struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
998 int cpu, i;
999 u32 nasid = entryhi & KVM_ENTRYHI_ASID;
1000
1001 if (((kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID) != nasid)) {
1002 trace_kvm_asid_change(vcpu, kvm_read_c0_guest_entryhi(cop0) &
1003 KVM_ENTRYHI_ASID, nasid);
1004
1005 /*
1006 * Flush entries from the GVA page tables.
1007 * Guest user page table will get flushed lazily on re-entry to
1008 * guest user if the guest ASID actually changes.
1009 */
1010 kvm_mips_flush_gva_pt(kern_mm->pgd, KMF_KERN);
1011
1012 /*
1013 * Regenerate/invalidate kernel MMU context.
1014 * The user MMU context will be regenerated lazily on re-entry
1015 * to guest user if the guest ASID actually changes.
1016 */
1017 preempt_disable();
1018 cpu = smp_processor_id();
1019 get_new_mmu_context(kern_mm, cpu);
1020 for_each_possible_cpu(i)
1021 if (i != cpu)
1022 cpu_context(i, kern_mm) = 0;
1023 preempt_enable();
1024 }
1025 kvm_write_c0_guest_entryhi(cop0, entryhi);
1026}
1027
880enum emulation_result kvm_mips_emul_tlbr(struct kvm_vcpu *vcpu) 1028enum emulation_result kvm_mips_emul_tlbr(struct kvm_vcpu *vcpu)
881{ 1029{
882 struct mips_coproc *cop0 = vcpu->arch.cop0; 1030 struct mips_coproc *cop0 = vcpu->arch.cop0;
1031 struct kvm_mips_tlb *tlb;
883 unsigned long pc = vcpu->arch.pc; 1032 unsigned long pc = vcpu->arch.pc;
1033 int index;
884 1034
885 kvm_err("[%#lx] COP0_TLBR [%ld]\n", pc, kvm_read_c0_guest_index(cop0)); 1035 index = kvm_read_c0_guest_index(cop0);
886 return EMULATE_FAIL; 1036 if (index < 0 || index >= KVM_MIPS_GUEST_TLB_SIZE) {
1037 /* UNDEFINED */
1038 kvm_debug("[%#lx] TLBR Index %#x out of range\n", pc, index);
1039 index &= KVM_MIPS_GUEST_TLB_SIZE - 1;
1040 }
1041
1042 tlb = &vcpu->arch.guest_tlb[index];
1043 kvm_write_c0_guest_pagemask(cop0, tlb->tlb_mask);
1044 kvm_write_c0_guest_entrylo0(cop0, tlb->tlb_lo[0]);
1045 kvm_write_c0_guest_entrylo1(cop0, tlb->tlb_lo[1]);
1046 kvm_mips_change_entryhi(vcpu, tlb->tlb_hi);
1047
1048 return EMULATE_DONE;
887} 1049}
888 1050
889/** 1051/**
@@ -1105,11 +1267,9 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst,
1105 struct kvm_vcpu *vcpu) 1267 struct kvm_vcpu *vcpu)
1106{ 1268{
1107 struct mips_coproc *cop0 = vcpu->arch.cop0; 1269 struct mips_coproc *cop0 = vcpu->arch.cop0;
1108 struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
1109 enum emulation_result er = EMULATE_DONE; 1270 enum emulation_result er = EMULATE_DONE;
1110 u32 rt, rd, sel; 1271 u32 rt, rd, sel;
1111 unsigned long curr_pc; 1272 unsigned long curr_pc;
1112 int cpu, i;
1113 1273
1114 /* 1274 /*
1115 * Update PC and hold onto current PC in case there is 1275 * Update PC and hold onto current PC in case there is
@@ -1143,6 +1303,9 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst,
1143 case wait_op: 1303 case wait_op:
1144 er = kvm_mips_emul_wait(vcpu); 1304 er = kvm_mips_emul_wait(vcpu);
1145 break; 1305 break;
1306 case hypcall_op:
1307 er = kvm_mips_emul_hypcall(vcpu, inst);
1308 break;
1146 } 1309 }
1147 } else { 1310 } else {
1148 rt = inst.c0r_format.rt; 1311 rt = inst.c0r_format.rt;
@@ -1208,44 +1371,8 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst,
1208 kvm_change_c0_guest_ebase(cop0, 0x1ffff000, 1371 kvm_change_c0_guest_ebase(cop0, 0x1ffff000,
1209 vcpu->arch.gprs[rt]); 1372 vcpu->arch.gprs[rt]);
1210 } else if (rd == MIPS_CP0_TLB_HI && sel == 0) { 1373 } else if (rd == MIPS_CP0_TLB_HI && sel == 0) {
1211 u32 nasid = 1374 kvm_mips_change_entryhi(vcpu,
1212 vcpu->arch.gprs[rt] & KVM_ENTRYHI_ASID; 1375 vcpu->arch.gprs[rt]);
1213 if (((kvm_read_c0_guest_entryhi(cop0) &
1214 KVM_ENTRYHI_ASID) != nasid)) {
1215 trace_kvm_asid_change(vcpu,
1216 kvm_read_c0_guest_entryhi(cop0)
1217 & KVM_ENTRYHI_ASID,
1218 nasid);
1219
1220 /*
1221 * Flush entries from the GVA page
1222 * tables.
1223 * Guest user page table will get
1224 * flushed lazily on re-entry to guest
1225 * user if the guest ASID actually
1226 * changes.
1227 */
1228 kvm_mips_flush_gva_pt(kern_mm->pgd,
1229 KMF_KERN);
1230
1231 /*
1232 * Regenerate/invalidate kernel MMU
1233 * context.
1234 * The user MMU context will be
1235 * regenerated lazily on re-entry to
1236 * guest user if the guest ASID actually
1237 * changes.
1238 */
1239 preempt_disable();
1240 cpu = smp_processor_id();
1241 get_new_mmu_context(kern_mm, cpu);
1242 for_each_possible_cpu(i)
1243 if (i != cpu)
1244 cpu_context(i, kern_mm) = 0;
1245 preempt_enable();
1246 }
1247 kvm_write_c0_guest_entryhi(cop0,
1248 vcpu->arch.gprs[rt]);
1249 } 1376 }
1250 /* Are we writing to COUNT */ 1377 /* Are we writing to COUNT */
1251 else if ((rd == MIPS_CP0_COUNT) && (sel == 0)) { 1378 else if ((rd == MIPS_CP0_COUNT) && (sel == 0)) {
@@ -1474,9 +1601,8 @@ enum emulation_result kvm_mips_emulate_store(union mips_instruction inst,
1474 struct kvm_run *run, 1601 struct kvm_run *run,
1475 struct kvm_vcpu *vcpu) 1602 struct kvm_vcpu *vcpu)
1476{ 1603{
1477 enum emulation_result er = EMULATE_DO_MMIO; 1604 enum emulation_result er;
1478 u32 rt; 1605 u32 rt;
1479 u32 bytes;
1480 void *data = run->mmio.data; 1606 void *data = run->mmio.data;
1481 unsigned long curr_pc; 1607 unsigned long curr_pc;
1482 1608
@@ -1491,103 +1617,74 @@ enum emulation_result kvm_mips_emulate_store(union mips_instruction inst,
1491 1617
1492 rt = inst.i_format.rt; 1618 rt = inst.i_format.rt;
1493 1619
1620 run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa(
1621 vcpu->arch.host_cp0_badvaddr);
1622 if (run->mmio.phys_addr == KVM_INVALID_ADDR)
1623 goto out_fail;
1624
1494 switch (inst.i_format.opcode) { 1625 switch (inst.i_format.opcode) {
1495 case sb_op: 1626#if defined(CONFIG_64BIT) && defined(CONFIG_KVM_MIPS_VZ)
1496 bytes = 1; 1627 case sd_op:
1497 if (bytes > sizeof(run->mmio.data)) { 1628 run->mmio.len = 8;
1498 kvm_err("%s: bad MMIO length: %d\n", __func__, 1629 *(u64 *)data = vcpu->arch.gprs[rt];
1499 run->mmio.len);
1500 }
1501 run->mmio.phys_addr =
1502 kvm_mips_callbacks->gva_to_gpa(vcpu->arch.
1503 host_cp0_badvaddr);
1504 if (run->mmio.phys_addr == KVM_INVALID_ADDR) {
1505 er = EMULATE_FAIL;
1506 break;
1507 }
1508 run->mmio.len = bytes;
1509 run->mmio.is_write = 1;
1510 vcpu->mmio_needed = 1;
1511 vcpu->mmio_is_write = 1;
1512 *(u8 *) data = vcpu->arch.gprs[rt];
1513 kvm_debug("OP_SB: eaddr: %#lx, gpr: %#lx, data: %#x\n",
1514 vcpu->arch.host_cp0_badvaddr, vcpu->arch.gprs[rt],
1515 *(u8 *) data);
1516 1630
1631 kvm_debug("[%#lx] OP_SD: eaddr: %#lx, gpr: %#lx, data: %#llx\n",
1632 vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr,
1633 vcpu->arch.gprs[rt], *(u64 *)data);
1517 break; 1634 break;
1635#endif
1518 1636
1519 case sw_op: 1637 case sw_op:
1520 bytes = 4; 1638 run->mmio.len = 4;
1521 if (bytes > sizeof(run->mmio.data)) { 1639 *(u32 *)data = vcpu->arch.gprs[rt];
1522 kvm_err("%s: bad MMIO length: %d\n", __func__,
1523 run->mmio.len);
1524 }
1525 run->mmio.phys_addr =
1526 kvm_mips_callbacks->gva_to_gpa(vcpu->arch.
1527 host_cp0_badvaddr);
1528 if (run->mmio.phys_addr == KVM_INVALID_ADDR) {
1529 er = EMULATE_FAIL;
1530 break;
1531 }
1532
1533 run->mmio.len = bytes;
1534 run->mmio.is_write = 1;
1535 vcpu->mmio_needed = 1;
1536 vcpu->mmio_is_write = 1;
1537 *(u32 *) data = vcpu->arch.gprs[rt];
1538 1640
1539 kvm_debug("[%#lx] OP_SW: eaddr: %#lx, gpr: %#lx, data: %#x\n", 1641 kvm_debug("[%#lx] OP_SW: eaddr: %#lx, gpr: %#lx, data: %#x\n",
1540 vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, 1642 vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr,
1541 vcpu->arch.gprs[rt], *(u32 *) data); 1643 vcpu->arch.gprs[rt], *(u32 *)data);
1542 break; 1644 break;
1543 1645
1544 case sh_op: 1646 case sh_op:
1545 bytes = 2; 1647 run->mmio.len = 2;
1546 if (bytes > sizeof(run->mmio.data)) { 1648 *(u16 *)data = vcpu->arch.gprs[rt];
1547 kvm_err("%s: bad MMIO length: %d\n", __func__,
1548 run->mmio.len);
1549 }
1550 run->mmio.phys_addr =
1551 kvm_mips_callbacks->gva_to_gpa(vcpu->arch.
1552 host_cp0_badvaddr);
1553 if (run->mmio.phys_addr == KVM_INVALID_ADDR) {
1554 er = EMULATE_FAIL;
1555 break;
1556 }
1557
1558 run->mmio.len = bytes;
1559 run->mmio.is_write = 1;
1560 vcpu->mmio_needed = 1;
1561 vcpu->mmio_is_write = 1;
1562 *(u16 *) data = vcpu->arch.gprs[rt];
1563 1649
1564 kvm_debug("[%#lx] OP_SH: eaddr: %#lx, gpr: %#lx, data: %#x\n", 1650 kvm_debug("[%#lx] OP_SH: eaddr: %#lx, gpr: %#lx, data: %#x\n",
1565 vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, 1651 vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr,
1566 vcpu->arch.gprs[rt], *(u32 *) data); 1652 vcpu->arch.gprs[rt], *(u16 *)data);
1653 break;
1654
1655 case sb_op:
1656 run->mmio.len = 1;
1657 *(u8 *)data = vcpu->arch.gprs[rt];
1658
1659 kvm_debug("[%#lx] OP_SB: eaddr: %#lx, gpr: %#lx, data: %#x\n",
1660 vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr,
1661 vcpu->arch.gprs[rt], *(u8 *)data);
1567 break; 1662 break;
1568 1663
1569 default: 1664 default:
1570 kvm_err("Store not yet supported (inst=0x%08x)\n", 1665 kvm_err("Store not yet supported (inst=0x%08x)\n",
1571 inst.word); 1666 inst.word);
1572 er = EMULATE_FAIL; 1667 goto out_fail;
1573 break;
1574 } 1668 }
1575 1669
1576 /* Rollback PC if emulation was unsuccessful */ 1670 run->mmio.is_write = 1;
1577 if (er == EMULATE_FAIL) 1671 vcpu->mmio_needed = 1;
1578 vcpu->arch.pc = curr_pc; 1672 vcpu->mmio_is_write = 1;
1673 return EMULATE_DO_MMIO;
1579 1674
1580 return er; 1675out_fail:
1676 /* Rollback PC if emulation was unsuccessful */
1677 vcpu->arch.pc = curr_pc;
1678 return EMULATE_FAIL;
1581} 1679}
1582 1680
1583enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, 1681enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
1584 u32 cause, struct kvm_run *run, 1682 u32 cause, struct kvm_run *run,
1585 struct kvm_vcpu *vcpu) 1683 struct kvm_vcpu *vcpu)
1586{ 1684{
1587 enum emulation_result er = EMULATE_DO_MMIO; 1685 enum emulation_result er;
1588 unsigned long curr_pc; 1686 unsigned long curr_pc;
1589 u32 op, rt; 1687 u32 op, rt;
1590 u32 bytes;
1591 1688
1592 rt = inst.i_format.rt; 1689 rt = inst.i_format.rt;
1593 op = inst.i_format.opcode; 1690 op = inst.i_format.opcode;
@@ -1606,96 +1703,53 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
1606 1703
1607 vcpu->arch.io_gpr = rt; 1704 vcpu->arch.io_gpr = rt;
1608 1705
1706 run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa(
1707 vcpu->arch.host_cp0_badvaddr);
1708 if (run->mmio.phys_addr == KVM_INVALID_ADDR)
1709 return EMULATE_FAIL;
1710
1711 vcpu->mmio_needed = 2; /* signed */
1609 switch (op) { 1712 switch (op) {
1610 case lw_op: 1713#if defined(CONFIG_64BIT) && defined(CONFIG_KVM_MIPS_VZ)
1611 bytes = 4; 1714 case ld_op:
1612 if (bytes > sizeof(run->mmio.data)) { 1715 run->mmio.len = 8;
1613 kvm_err("%s: bad MMIO length: %d\n", __func__, 1716 break;
1614 run->mmio.len);
1615 er = EMULATE_FAIL;
1616 break;
1617 }
1618 run->mmio.phys_addr =
1619 kvm_mips_callbacks->gva_to_gpa(vcpu->arch.
1620 host_cp0_badvaddr);
1621 if (run->mmio.phys_addr == KVM_INVALID_ADDR) {
1622 er = EMULATE_FAIL;
1623 break;
1624 }
1625 1717
1626 run->mmio.len = bytes; 1718 case lwu_op:
1627 run->mmio.is_write = 0; 1719 vcpu->mmio_needed = 1; /* unsigned */
1628 vcpu->mmio_needed = 1; 1720 /* fall through */
1629 vcpu->mmio_is_write = 0; 1721#endif
1722 case lw_op:
1723 run->mmio.len = 4;
1630 break; 1724 break;
1631 1725
1632 case lh_op:
1633 case lhu_op: 1726 case lhu_op:
1634 bytes = 2; 1727 vcpu->mmio_needed = 1; /* unsigned */
1635 if (bytes > sizeof(run->mmio.data)) { 1728 /* fall through */
1636 kvm_err("%s: bad MMIO length: %d\n", __func__, 1729 case lh_op:
1637 run->mmio.len); 1730 run->mmio.len = 2;
1638 er = EMULATE_FAIL;
1639 break;
1640 }
1641 run->mmio.phys_addr =
1642 kvm_mips_callbacks->gva_to_gpa(vcpu->arch.
1643 host_cp0_badvaddr);
1644 if (run->mmio.phys_addr == KVM_INVALID_ADDR) {
1645 er = EMULATE_FAIL;
1646 break;
1647 }
1648
1649 run->mmio.len = bytes;
1650 run->mmio.is_write = 0;
1651 vcpu->mmio_needed = 1;
1652 vcpu->mmio_is_write = 0;
1653
1654 if (op == lh_op)
1655 vcpu->mmio_needed = 2;
1656 else
1657 vcpu->mmio_needed = 1;
1658
1659 break; 1731 break;
1660 1732
1661 case lbu_op: 1733 case lbu_op:
1734 vcpu->mmio_needed = 1; /* unsigned */
1735 /* fall through */
1662 case lb_op: 1736 case lb_op:
1663 bytes = 1; 1737 run->mmio.len = 1;
1664 if (bytes > sizeof(run->mmio.data)) {
1665 kvm_err("%s: bad MMIO length: %d\n", __func__,
1666 run->mmio.len);
1667 er = EMULATE_FAIL;
1668 break;
1669 }
1670 run->mmio.phys_addr =
1671 kvm_mips_callbacks->gva_to_gpa(vcpu->arch.
1672 host_cp0_badvaddr);
1673 if (run->mmio.phys_addr == KVM_INVALID_ADDR) {
1674 er = EMULATE_FAIL;
1675 break;
1676 }
1677
1678 run->mmio.len = bytes;
1679 run->mmio.is_write = 0;
1680 vcpu->mmio_is_write = 0;
1681
1682 if (op == lb_op)
1683 vcpu->mmio_needed = 2;
1684 else
1685 vcpu->mmio_needed = 1;
1686
1687 break; 1738 break;
1688 1739
1689 default: 1740 default:
1690 kvm_err("Load not yet supported (inst=0x%08x)\n", 1741 kvm_err("Load not yet supported (inst=0x%08x)\n",
1691 inst.word); 1742 inst.word);
1692 er = EMULATE_FAIL; 1743 vcpu->mmio_needed = 0;
1693 break; 1744 return EMULATE_FAIL;
1694 } 1745 }
1695 1746
1696 return er; 1747 run->mmio.is_write = 0;
1748 vcpu->mmio_is_write = 0;
1749 return EMULATE_DO_MMIO;
1697} 1750}
1698 1751
1752#ifndef CONFIG_KVM_MIPS_VZ
1699static enum emulation_result kvm_mips_guest_cache_op(int (*fn)(unsigned long), 1753static enum emulation_result kvm_mips_guest_cache_op(int (*fn)(unsigned long),
1700 unsigned long curr_pc, 1754 unsigned long curr_pc,
1701 unsigned long addr, 1755 unsigned long addr,
@@ -1786,11 +1840,35 @@ enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst,
1786 vcpu->arch.pc, vcpu->arch.gprs[31], cache, op, base, 1840 vcpu->arch.pc, vcpu->arch.gprs[31], cache, op, base,
1787 arch->gprs[base], offset); 1841 arch->gprs[base], offset);
1788 1842
1789 if (cache == Cache_D) 1843 if (cache == Cache_D) {
1844#ifdef CONFIG_CPU_R4K_CACHE_TLB
1790 r4k_blast_dcache(); 1845 r4k_blast_dcache();
1791 else if (cache == Cache_I) 1846#else
1847 switch (boot_cpu_type()) {
1848 case CPU_CAVIUM_OCTEON3:
1849 /* locally flush icache */
1850 local_flush_icache_range(0, 0);
1851 break;
1852 default:
1853 __flush_cache_all();
1854 break;
1855 }
1856#endif
1857 } else if (cache == Cache_I) {
1858#ifdef CONFIG_CPU_R4K_CACHE_TLB
1792 r4k_blast_icache(); 1859 r4k_blast_icache();
1793 else { 1860#else
1861 switch (boot_cpu_type()) {
1862 case CPU_CAVIUM_OCTEON3:
1863 /* locally flush icache */
1864 local_flush_icache_range(0, 0);
1865 break;
1866 default:
1867 flush_icache_all();
1868 break;
1869 }
1870#endif
1871 } else {
1794 kvm_err("%s: unsupported CACHE INDEX operation\n", 1872 kvm_err("%s: unsupported CACHE INDEX operation\n",
1795 __func__); 1873 __func__);
1796 return EMULATE_FAIL; 1874 return EMULATE_FAIL;
@@ -1870,18 +1948,6 @@ enum emulation_result kvm_mips_emulate_inst(u32 cause, u32 *opc,
1870 case cop0_op: 1948 case cop0_op:
1871 er = kvm_mips_emulate_CP0(inst, opc, cause, run, vcpu); 1949 er = kvm_mips_emulate_CP0(inst, opc, cause, run, vcpu);
1872 break; 1950 break;
1873 case sb_op:
1874 case sh_op:
1875 case sw_op:
1876 er = kvm_mips_emulate_store(inst, cause, run, vcpu);
1877 break;
1878 case lb_op:
1879 case lbu_op:
1880 case lhu_op:
1881 case lh_op:
1882 case lw_op:
1883 er = kvm_mips_emulate_load(inst, cause, run, vcpu);
1884 break;
1885 1951
1886#ifndef CONFIG_CPU_MIPSR6 1952#ifndef CONFIG_CPU_MIPSR6
1887 case cache_op: 1953 case cache_op:
@@ -1915,6 +1981,7 @@ unknown:
1915 1981
1916 return er; 1982 return er;
1917} 1983}
1984#endif /* CONFIG_KVM_MIPS_VZ */
1918 1985
1919/** 1986/**
1920 * kvm_mips_guest_exception_base() - Find guest exception vector base address. 1987 * kvm_mips_guest_exception_base() - Find guest exception vector base address.
@@ -2524,8 +2591,15 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
2524 vcpu->arch.pc = vcpu->arch.io_pc; 2591 vcpu->arch.pc = vcpu->arch.io_pc;
2525 2592
2526 switch (run->mmio.len) { 2593 switch (run->mmio.len) {
2594 case 8:
2595 *gpr = *(s64 *)run->mmio.data;
2596 break;
2597
2527 case 4: 2598 case 4:
2528 *gpr = *(s32 *) run->mmio.data; 2599 if (vcpu->mmio_needed == 2)
2600 *gpr = *(s32 *)run->mmio.data;
2601 else
2602 *gpr = *(u32 *)run->mmio.data;
2529 break; 2603 break;
2530 2604
2531 case 2: 2605 case 2:
diff --git a/arch/mips/kvm/entry.c b/arch/mips/kvm/entry.c
index c5b254c4d0da..16e1c93b484f 100644
--- a/arch/mips/kvm/entry.c
+++ b/arch/mips/kvm/entry.c
@@ -51,12 +51,15 @@
51#define RA 31 51#define RA 31
52 52
53/* Some CP0 registers */ 53/* Some CP0 registers */
54#define C0_PWBASE 5, 5
54#define C0_HWRENA 7, 0 55#define C0_HWRENA 7, 0
55#define C0_BADVADDR 8, 0 56#define C0_BADVADDR 8, 0
56#define C0_BADINSTR 8, 1 57#define C0_BADINSTR 8, 1
57#define C0_BADINSTRP 8, 2 58#define C0_BADINSTRP 8, 2
58#define C0_ENTRYHI 10, 0 59#define C0_ENTRYHI 10, 0
60#define C0_GUESTCTL1 10, 4
59#define C0_STATUS 12, 0 61#define C0_STATUS 12, 0
62#define C0_GUESTCTL0 12, 6
60#define C0_CAUSE 13, 0 63#define C0_CAUSE 13, 0
61#define C0_EPC 14, 0 64#define C0_EPC 14, 0
62#define C0_EBASE 15, 1 65#define C0_EBASE 15, 1
@@ -292,8 +295,8 @@ static void *kvm_mips_build_enter_guest(void *addr)
292 unsigned int i; 295 unsigned int i;
293 struct uasm_label labels[2]; 296 struct uasm_label labels[2];
294 struct uasm_reloc relocs[2]; 297 struct uasm_reloc relocs[2];
295 struct uasm_label *l = labels; 298 struct uasm_label __maybe_unused *l = labels;
296 struct uasm_reloc *r = relocs; 299 struct uasm_reloc __maybe_unused *r = relocs;
297 300
298 memset(labels, 0, sizeof(labels)); 301 memset(labels, 0, sizeof(labels));
299 memset(relocs, 0, sizeof(relocs)); 302 memset(relocs, 0, sizeof(relocs));
@@ -302,7 +305,67 @@ static void *kvm_mips_build_enter_guest(void *addr)
302 UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, pc), K1); 305 UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, pc), K1);
303 UASM_i_MTC0(&p, T0, C0_EPC); 306 UASM_i_MTC0(&p, T0, C0_EPC);
304 307
305 /* Set the ASID for the Guest Kernel */ 308#ifdef CONFIG_KVM_MIPS_VZ
309 /* Save normal linux process pgd (VZ guarantees pgd_reg is set) */
310 UASM_i_MFC0(&p, K0, c0_kscratch(), pgd_reg);
311 UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, host_pgd), K1);
312
313 /*
314 * Set up KVM GPA pgd.
315 * This does roughly the same as TLBMISS_HANDLER_SETUP_PGD():
316 * - call tlbmiss_handler_setup_pgd(mm->pgd)
317 * - write mm->pgd into CP0_PWBase
318 *
319 * We keep S0 pointing at struct kvm so we can load the ASID below.
320 */
321 UASM_i_LW(&p, S0, (int)offsetof(struct kvm_vcpu, kvm) -
322 (int)offsetof(struct kvm_vcpu, arch), K1);
323 UASM_i_LW(&p, A0, offsetof(struct kvm, arch.gpa_mm.pgd), S0);
324 UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd);
325 uasm_i_jalr(&p, RA, T9);
326 /* delay slot */
327 if (cpu_has_htw)
328 UASM_i_MTC0(&p, A0, C0_PWBASE);
329 else
330 uasm_i_nop(&p);
331
332 /* Set GM bit to setup eret to VZ guest context */
333 uasm_i_addiu(&p, V1, ZERO, 1);
334 uasm_i_mfc0(&p, K0, C0_GUESTCTL0);
335 uasm_i_ins(&p, K0, V1, MIPS_GCTL0_GM_SHIFT, 1);
336 uasm_i_mtc0(&p, K0, C0_GUESTCTL0);
337
338 if (cpu_has_guestid) {
339 /*
340 * Set root mode GuestID, so that root TLB refill handler can
341 * use the correct GuestID in the root TLB.
342 */
343
344 /* Get current GuestID */
345 uasm_i_mfc0(&p, T0, C0_GUESTCTL1);
346 /* Set GuestCtl1.RID = GuestCtl1.ID */
347 uasm_i_ext(&p, T1, T0, MIPS_GCTL1_ID_SHIFT,
348 MIPS_GCTL1_ID_WIDTH);
349 uasm_i_ins(&p, T0, T1, MIPS_GCTL1_RID_SHIFT,
350 MIPS_GCTL1_RID_WIDTH);
351 uasm_i_mtc0(&p, T0, C0_GUESTCTL1);
352
353 /* GuestID handles dealiasing so we don't need to touch ASID */
354 goto skip_asid_restore;
355 }
356
357 /* Root ASID Dealias (RAD) */
358
359 /* Save host ASID */
360 UASM_i_MFC0(&p, K0, C0_ENTRYHI);
361 UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, host_entryhi),
362 K1);
363
364 /* Set the root ASID for the Guest */
365 UASM_i_ADDIU(&p, T1, S0,
366 offsetof(struct kvm, arch.gpa_mm.context.asid));
367#else
368 /* Set the ASID for the Guest Kernel or User */
306 UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, cop0), K1); 369 UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, cop0), K1);
307 UASM_i_LW(&p, T0, offsetof(struct mips_coproc, reg[MIPS_CP0_STATUS][0]), 370 UASM_i_LW(&p, T0, offsetof(struct mips_coproc, reg[MIPS_CP0_STATUS][0]),
308 T0); 371 T0);
@@ -315,6 +378,7 @@ static void *kvm_mips_build_enter_guest(void *addr)
315 UASM_i_ADDIU(&p, T1, K1, offsetof(struct kvm_vcpu_arch, 378 UASM_i_ADDIU(&p, T1, K1, offsetof(struct kvm_vcpu_arch,
316 guest_user_mm.context.asid)); 379 guest_user_mm.context.asid));
317 uasm_l_kernel_asid(&l, p); 380 uasm_l_kernel_asid(&l, p);
381#endif
318 382
319 /* t1: contains the base of the ASID array, need to get the cpu id */ 383 /* t1: contains the base of the ASID array, need to get the cpu id */
320 /* smp_processor_id */ 384 /* smp_processor_id */
@@ -339,6 +403,7 @@ static void *kvm_mips_build_enter_guest(void *addr)
339 uasm_i_andi(&p, K0, K0, MIPS_ENTRYHI_ASID); 403 uasm_i_andi(&p, K0, K0, MIPS_ENTRYHI_ASID);
340#endif 404#endif
341 405
406#ifndef CONFIG_KVM_MIPS_VZ
342 /* 407 /*
343 * Set up KVM T&E GVA pgd. 408 * Set up KVM T&E GVA pgd.
344 * This does roughly the same as TLBMISS_HANDLER_SETUP_PGD(): 409 * This does roughly the same as TLBMISS_HANDLER_SETUP_PGD():
@@ -351,7 +416,11 @@ static void *kvm_mips_build_enter_guest(void *addr)
351 UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd); 416 UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd);
352 uasm_i_jalr(&p, RA, T9); 417 uasm_i_jalr(&p, RA, T9);
353 uasm_i_mtc0(&p, K0, C0_ENTRYHI); 418 uasm_i_mtc0(&p, K0, C0_ENTRYHI);
354 419#else
420 /* Set up KVM VZ root ASID (!guestid) */
421 uasm_i_mtc0(&p, K0, C0_ENTRYHI);
422skip_asid_restore:
423#endif
355 uasm_i_ehb(&p); 424 uasm_i_ehb(&p);
356 425
357 /* Disable RDHWR access */ 426 /* Disable RDHWR access */
@@ -559,13 +628,10 @@ void *kvm_mips_build_exit(void *addr)
559 /* Now that context has been saved, we can use other registers */ 628 /* Now that context has been saved, we can use other registers */
560 629
561 /* Restore vcpu */ 630 /* Restore vcpu */
562 UASM_i_MFC0(&p, A1, scratch_vcpu[0], scratch_vcpu[1]); 631 UASM_i_MFC0(&p, S1, scratch_vcpu[0], scratch_vcpu[1]);
563 uasm_i_move(&p, S1, A1);
564 632
565 /* Restore run (vcpu->run) */ 633 /* Restore run (vcpu->run) */
566 UASM_i_LW(&p, A0, offsetof(struct kvm_vcpu, run), A1); 634 UASM_i_LW(&p, S0, offsetof(struct kvm_vcpu, run), S1);
567 /* Save pointer to run in s0, will be saved by the compiler */
568 uasm_i_move(&p, S0, A0);
569 635
570 /* 636 /*
571 * Save Host level EPC, BadVaddr and Cause to VCPU, useful to process 637 * Save Host level EPC, BadVaddr and Cause to VCPU, useful to process
@@ -641,6 +707,52 @@ void *kvm_mips_build_exit(void *addr)
641 uasm_l_msa_1(&l, p); 707 uasm_l_msa_1(&l, p);
642 } 708 }
643 709
710#ifdef CONFIG_KVM_MIPS_VZ
711 /* Restore host ASID */
712 if (!cpu_has_guestid) {
713 UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, host_entryhi),
714 K1);
715 UASM_i_MTC0(&p, K0, C0_ENTRYHI);
716 }
717
718 /*
719 * Set up normal Linux process pgd.
720 * This does roughly the same as TLBMISS_HANDLER_SETUP_PGD():
721 * - call tlbmiss_handler_setup_pgd(mm->pgd)
722 * - write mm->pgd into CP0_PWBase
723 */
724 UASM_i_LW(&p, A0,
725 offsetof(struct kvm_vcpu_arch, host_pgd), K1);
726 UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd);
727 uasm_i_jalr(&p, RA, T9);
728 /* delay slot */
729 if (cpu_has_htw)
730 UASM_i_MTC0(&p, A0, C0_PWBASE);
731 else
732 uasm_i_nop(&p);
733
734 /* Clear GM bit so we don't enter guest mode when EXL is cleared */
735 uasm_i_mfc0(&p, K0, C0_GUESTCTL0);
736 uasm_i_ins(&p, K0, ZERO, MIPS_GCTL0_GM_SHIFT, 1);
737 uasm_i_mtc0(&p, K0, C0_GUESTCTL0);
738
739 /* Save GuestCtl0 so we can access GExcCode after CPU migration */
740 uasm_i_sw(&p, K0,
741 offsetof(struct kvm_vcpu_arch, host_cp0_guestctl0), K1);
742
743 if (cpu_has_guestid) {
744 /*
745 * Clear root mode GuestID, so that root TLB operations use the
746 * root GuestID in the root TLB.
747 */
748 uasm_i_mfc0(&p, T0, C0_GUESTCTL1);
749 /* Set GuestCtl1.RID = MIPS_GCTL1_ROOT_GUESTID (i.e. 0) */
750 uasm_i_ins(&p, T0, ZERO, MIPS_GCTL1_RID_SHIFT,
751 MIPS_GCTL1_RID_WIDTH);
752 uasm_i_mtc0(&p, T0, C0_GUESTCTL1);
753 }
754#endif
755
644 /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */ 756 /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */
645 uasm_i_addiu(&p, AT, ZERO, ~(ST0_EXL | KSU_USER | ST0_IE)); 757 uasm_i_addiu(&p, AT, ZERO, ~(ST0_EXL | KSU_USER | ST0_IE));
646 uasm_i_and(&p, V0, V0, AT); 758 uasm_i_and(&p, V0, V0, AT);
@@ -680,6 +792,8 @@ void *kvm_mips_build_exit(void *addr)
680 * Now jump to the kvm_mips_handle_exit() to see if we can deal 792 * Now jump to the kvm_mips_handle_exit() to see if we can deal
681 * with this in the kernel 793 * with this in the kernel
682 */ 794 */
795 uasm_i_move(&p, A0, S0);
796 uasm_i_move(&p, A1, S1);
683 UASM_i_LA(&p, T9, (unsigned long)kvm_mips_handle_exit); 797 UASM_i_LA(&p, T9, (unsigned long)kvm_mips_handle_exit);
684 uasm_i_jalr(&p, RA, T9); 798 uasm_i_jalr(&p, RA, T9);
685 UASM_i_ADDIU(&p, SP, SP, -CALLFRAME_SIZ); 799 UASM_i_ADDIU(&p, SP, SP, -CALLFRAME_SIZ);
diff --git a/arch/mips/kvm/hypcall.c b/arch/mips/kvm/hypcall.c
new file mode 100644
index 000000000000..83063435195f
--- /dev/null
+++ b/arch/mips/kvm/hypcall.c
@@ -0,0 +1,53 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * KVM/MIPS: Hypercall handling.
7 *
8 * Copyright (C) 2015 Imagination Technologies Ltd.
9 */
10
11#include <linux/kernel.h>
12#include <linux/kvm_host.h>
13#include <linux/kvm_para.h>
14
15#define MAX_HYPCALL_ARGS 4
16
17enum emulation_result kvm_mips_emul_hypcall(struct kvm_vcpu *vcpu,
18 union mips_instruction inst)
19{
20 unsigned int code = (inst.co_format.code >> 5) & 0x3ff;
21
22 kvm_debug("[%#lx] HYPCALL %#03x\n", vcpu->arch.pc, code);
23
24 switch (code) {
25 case 0:
26 return EMULATE_HYPERCALL;
27 default:
28 return EMULATE_FAIL;
29 };
30}
31
32static int kvm_mips_hypercall(struct kvm_vcpu *vcpu, unsigned long num,
33 const unsigned long *args, unsigned long *hret)
34{
35 /* Report unimplemented hypercall to guest */
36 *hret = -KVM_ENOSYS;
37 return RESUME_GUEST;
38}
39
40int kvm_mips_handle_hypcall(struct kvm_vcpu *vcpu)
41{
42 unsigned long num, args[MAX_HYPCALL_ARGS];
43
44 /* read hypcall number and arguments */
45 num = vcpu->arch.gprs[2]; /* v0 */
46 args[0] = vcpu->arch.gprs[4]; /* a0 */
47 args[1] = vcpu->arch.gprs[5]; /* a1 */
48 args[2] = vcpu->arch.gprs[6]; /* a2 */
49 args[3] = vcpu->arch.gprs[7]; /* a3 */
50
51 return kvm_mips_hypercall(vcpu, num,
52 args, &vcpu->arch.gprs[2] /* v0 */);
53}
diff --git a/arch/mips/kvm/interrupt.h b/arch/mips/kvm/interrupt.h
index fb118a2c8379..3bf0a49725e8 100644
--- a/arch/mips/kvm/interrupt.h
+++ b/arch/mips/kvm/interrupt.h
@@ -30,8 +30,13 @@
30 30
31#define C_TI (_ULCAST_(1) << 30) 31#define C_TI (_ULCAST_(1) << 30)
32 32
33#ifdef CONFIG_KVM_MIPS_VZ
34#define KVM_MIPS_IRQ_DELIVER_ALL_AT_ONCE (1)
35#define KVM_MIPS_IRQ_CLEAR_ALL_AT_ONCE (1)
36#else
33#define KVM_MIPS_IRQ_DELIVER_ALL_AT_ONCE (0) 37#define KVM_MIPS_IRQ_DELIVER_ALL_AT_ONCE (0)
34#define KVM_MIPS_IRQ_CLEAR_ALL_AT_ONCE (0) 38#define KVM_MIPS_IRQ_CLEAR_ALL_AT_ONCE (0)
39#endif
35 40
36void kvm_mips_queue_irq(struct kvm_vcpu *vcpu, unsigned int priority); 41void kvm_mips_queue_irq(struct kvm_vcpu *vcpu, unsigned int priority);
37void kvm_mips_dequeue_irq(struct kvm_vcpu *vcpu, unsigned int priority); 42void kvm_mips_dequeue_irq(struct kvm_vcpu *vcpu, unsigned int priority);
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 15a1b1716c2e..d4b2ad18eef2 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -59,6 +59,16 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
59 { "fpe", VCPU_STAT(fpe_exits), KVM_STAT_VCPU }, 59 { "fpe", VCPU_STAT(fpe_exits), KVM_STAT_VCPU },
60 { "msa_disabled", VCPU_STAT(msa_disabled_exits), KVM_STAT_VCPU }, 60 { "msa_disabled", VCPU_STAT(msa_disabled_exits), KVM_STAT_VCPU },
61 { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU }, 61 { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU },
62#ifdef CONFIG_KVM_MIPS_VZ
63 { "vz_gpsi", VCPU_STAT(vz_gpsi_exits), KVM_STAT_VCPU },
64 { "vz_gsfc", VCPU_STAT(vz_gsfc_exits), KVM_STAT_VCPU },
65 { "vz_hc", VCPU_STAT(vz_hc_exits), KVM_STAT_VCPU },
66 { "vz_grr", VCPU_STAT(vz_grr_exits), KVM_STAT_VCPU },
67 { "vz_gva", VCPU_STAT(vz_gva_exits), KVM_STAT_VCPU },
68 { "vz_ghfc", VCPU_STAT(vz_ghfc_exits), KVM_STAT_VCPU },
69 { "vz_gpa", VCPU_STAT(vz_gpa_exits), KVM_STAT_VCPU },
70 { "vz_resvd", VCPU_STAT(vz_resvd_exits), KVM_STAT_VCPU },
71#endif
62 { "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU }, 72 { "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU },
63 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), KVM_STAT_VCPU }, 73 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), KVM_STAT_VCPU },
64 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid), KVM_STAT_VCPU }, 74 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid), KVM_STAT_VCPU },
@@ -66,6 +76,19 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
66 {NULL} 76 {NULL}
67}; 77};
68 78
79bool kvm_trace_guest_mode_change;
80
81int kvm_guest_mode_change_trace_reg(void)
82{
83 kvm_trace_guest_mode_change = 1;
84 return 0;
85}
86
87void kvm_guest_mode_change_trace_unreg(void)
88{
89 kvm_trace_guest_mode_change = 0;
90}
91
69/* 92/*
70 * XXXKYMA: We are simulatoring a processor that has the WII bit set in 93 * XXXKYMA: We are simulatoring a processor that has the WII bit set in
71 * Config7, so we are "runnable" if interrupts are pending 94 * Config7, so we are "runnable" if interrupts are pending
@@ -82,7 +105,12 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
82 105
83int kvm_arch_hardware_enable(void) 106int kvm_arch_hardware_enable(void)
84{ 107{
85 return 0; 108 return kvm_mips_callbacks->hardware_enable();
109}
110
111void kvm_arch_hardware_disable(void)
112{
113 kvm_mips_callbacks->hardware_disable();
86} 114}
87 115
88int kvm_arch_hardware_setup(void) 116int kvm_arch_hardware_setup(void)
@@ -97,6 +125,18 @@ void kvm_arch_check_processor_compat(void *rtn)
97 125
98int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 126int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
99{ 127{
128 switch (type) {
129#ifdef CONFIG_KVM_MIPS_VZ
130 case KVM_VM_MIPS_VZ:
131#else
132 case KVM_VM_MIPS_TE:
133#endif
134 break;
135 default:
136 /* Unsupported KVM type */
137 return -EINVAL;
138 };
139
100 /* Allocate page table to map GPA -> RPA */ 140 /* Allocate page table to map GPA -> RPA */
101 kvm->arch.gpa_mm.pgd = kvm_pgd_alloc(); 141 kvm->arch.gpa_mm.pgd = kvm_pgd_alloc();
102 if (!kvm->arch.gpa_mm.pgd) 142 if (!kvm->arch.gpa_mm.pgd)
@@ -301,8 +341,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
301 /* Build guest exception vectors dynamically in unmapped memory */ 341 /* Build guest exception vectors dynamically in unmapped memory */
302 handler = gebase + 0x2000; 342 handler = gebase + 0x2000;
303 343
304 /* TLB refill */ 344 /* TLB refill (or XTLB refill on 64-bit VZ where KX=1) */
305 refill_start = gebase; 345 refill_start = gebase;
346 if (IS_ENABLED(CONFIG_KVM_MIPS_VZ) && IS_ENABLED(CONFIG_64BIT))
347 refill_start += 0x080;
306 refill_end = kvm_mips_build_tlb_refill_exception(refill_start, handler); 348 refill_end = kvm_mips_build_tlb_refill_exception(refill_start, handler);
307 349
308 /* General Exception Entry point */ 350 /* General Exception Entry point */
@@ -353,9 +395,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
353 395
354 /* Init */ 396 /* Init */
355 vcpu->arch.last_sched_cpu = -1; 397 vcpu->arch.last_sched_cpu = -1;
356 398 vcpu->arch.last_exec_cpu = -1;
357 /* Start off the timer */
358 kvm_mips_init_count(vcpu);
359 399
360 return vcpu; 400 return vcpu;
361 401
@@ -1030,9 +1070,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
1030 case KVM_CAP_IMMEDIATE_EXIT: 1070 case KVM_CAP_IMMEDIATE_EXIT:
1031 r = 1; 1071 r = 1;
1032 break; 1072 break;
1033 case KVM_CAP_COALESCED_MMIO:
1034 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
1035 break;
1036 case KVM_CAP_NR_VCPUS: 1073 case KVM_CAP_NR_VCPUS:
1037 r = num_online_cpus(); 1074 r = num_online_cpus();
1038 break; 1075 break;
@@ -1059,7 +1096,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
1059 r = cpu_has_msa && !(boot_cpu_data.msa_id & MSA_IR_WRPF); 1096 r = cpu_has_msa && !(boot_cpu_data.msa_id & MSA_IR_WRPF);
1060 break; 1097 break;
1061 default: 1098 default:
1062 r = 0; 1099 r = kvm_mips_callbacks->check_extension(kvm, ext);
1063 break; 1100 break;
1064 } 1101 }
1065 return r; 1102 return r;
@@ -1067,7 +1104,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
1067 1104
1068int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) 1105int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
1069{ 1106{
1070 return kvm_mips_pending_timer(vcpu); 1107 return kvm_mips_pending_timer(vcpu) ||
1108 kvm_read_c0_guest_cause(vcpu->arch.cop0) & C_TI;
1071} 1109}
1072 1110
1073int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu) 1111int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu)
@@ -1092,7 +1130,7 @@ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu)
1092 kvm_debug("\tlo: 0x%08lx\n", vcpu->arch.lo); 1130 kvm_debug("\tlo: 0x%08lx\n", vcpu->arch.lo);
1093 1131
1094 cop0 = vcpu->arch.cop0; 1132 cop0 = vcpu->arch.cop0;
1095 kvm_debug("\tStatus: 0x%08lx, Cause: 0x%08lx\n", 1133 kvm_debug("\tStatus: 0x%08x, Cause: 0x%08x\n",
1096 kvm_read_c0_guest_status(cop0), 1134 kvm_read_c0_guest_status(cop0),
1097 kvm_read_c0_guest_cause(cop0)); 1135 kvm_read_c0_guest_cause(cop0));
1098 1136
@@ -1208,7 +1246,8 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
1208 vcpu->mode = OUTSIDE_GUEST_MODE; 1246 vcpu->mode = OUTSIDE_GUEST_MODE;
1209 1247
1210 /* re-enable HTW before enabling interrupts */ 1248 /* re-enable HTW before enabling interrupts */
1211 htw_start(); 1249 if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ))
1250 htw_start();
1212 1251
1213 /* Set a default exit reason */ 1252 /* Set a default exit reason */
1214 run->exit_reason = KVM_EXIT_UNKNOWN; 1253 run->exit_reason = KVM_EXIT_UNKNOWN;
@@ -1226,17 +1265,20 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
1226 cause, opc, run, vcpu); 1265 cause, opc, run, vcpu);
1227 trace_kvm_exit(vcpu, exccode); 1266 trace_kvm_exit(vcpu, exccode);
1228 1267
1229 /* 1268 if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ)) {
1230 * Do a privilege check, if in UM most of these exit conditions end up 1269 /*
1231 * causing an exception to be delivered to the Guest Kernel 1270 * Do a privilege check, if in UM most of these exit conditions
1232 */ 1271 * end up causing an exception to be delivered to the Guest
1233 er = kvm_mips_check_privilege(cause, opc, run, vcpu); 1272 * Kernel
1234 if (er == EMULATE_PRIV_FAIL) { 1273 */
1235 goto skip_emul; 1274 er = kvm_mips_check_privilege(cause, opc, run, vcpu);
1236 } else if (er == EMULATE_FAIL) { 1275 if (er == EMULATE_PRIV_FAIL) {
1237 run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 1276 goto skip_emul;
1238 ret = RESUME_HOST; 1277 } else if (er == EMULATE_FAIL) {
1239 goto skip_emul; 1278 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1279 ret = RESUME_HOST;
1280 goto skip_emul;
1281 }
1240 } 1282 }
1241 1283
1242 switch (exccode) { 1284 switch (exccode) {
@@ -1267,7 +1309,7 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
1267 break; 1309 break;
1268 1310
1269 case EXCCODE_TLBS: 1311 case EXCCODE_TLBS:
1270 kvm_debug("TLB ST fault: cause %#x, status %#lx, PC: %p, BadVaddr: %#lx\n", 1312 kvm_debug("TLB ST fault: cause %#x, status %#x, PC: %p, BadVaddr: %#lx\n",
1271 cause, kvm_read_c0_guest_status(vcpu->arch.cop0), opc, 1313 cause, kvm_read_c0_guest_status(vcpu->arch.cop0), opc,
1272 badvaddr); 1314 badvaddr);
1273 1315
@@ -1328,12 +1370,17 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
1328 ret = kvm_mips_callbacks->handle_msa_disabled(vcpu); 1370 ret = kvm_mips_callbacks->handle_msa_disabled(vcpu);
1329 break; 1371 break;
1330 1372
1373 case EXCCODE_GE:
1374 /* defer exit accounting to handler */
1375 ret = kvm_mips_callbacks->handle_guest_exit(vcpu);
1376 break;
1377
1331 default: 1378 default:
1332 if (cause & CAUSEF_BD) 1379 if (cause & CAUSEF_BD)
1333 opc += 1; 1380 opc += 1;
1334 inst = 0; 1381 inst = 0;
1335 kvm_get_badinstr(opc, vcpu, &inst); 1382 kvm_get_badinstr(opc, vcpu, &inst);
1336 kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#lx\n", 1383 kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#x\n",
1337 exccode, opc, inst, badvaddr, 1384 exccode, opc, inst, badvaddr,
1338 kvm_read_c0_guest_status(vcpu->arch.cop0)); 1385 kvm_read_c0_guest_status(vcpu->arch.cop0));
1339 kvm_arch_vcpu_dump_regs(vcpu); 1386 kvm_arch_vcpu_dump_regs(vcpu);
@@ -1346,6 +1393,9 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
1346skip_emul: 1393skip_emul:
1347 local_irq_disable(); 1394 local_irq_disable();
1348 1395
1396 if (ret == RESUME_GUEST)
1397 kvm_vz_acquire_htimer(vcpu);
1398
1349 if (er == EMULATE_DONE && !(ret & RESUME_HOST)) 1399 if (er == EMULATE_DONE && !(ret & RESUME_HOST))
1350 kvm_mips_deliver_interrupts(vcpu, cause); 1400 kvm_mips_deliver_interrupts(vcpu, cause);
1351 1401
@@ -1391,7 +1441,8 @@ skip_emul:
1391 } 1441 }
1392 1442
1393 /* Disable HTW before returning to guest or host */ 1443 /* Disable HTW before returning to guest or host */
1394 htw_stop(); 1444 if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ))
1445 htw_stop();
1395 1446
1396 return ret; 1447 return ret;
1397} 1448}
@@ -1527,16 +1578,18 @@ void kvm_drop_fpu(struct kvm_vcpu *vcpu)
1527void kvm_lose_fpu(struct kvm_vcpu *vcpu) 1578void kvm_lose_fpu(struct kvm_vcpu *vcpu)
1528{ 1579{
1529 /* 1580 /*
1530 * FPU & MSA get disabled in root context (hardware) when it is disabled 1581 * With T&E, FPU & MSA get disabled in root context (hardware) when it
1531 * in guest context (software), but the register state in the hardware 1582 * is disabled in guest context (software), but the register state in
1532 * may still be in use. This is why we explicitly re-enable the hardware 1583 * the hardware may still be in use.
1533 * before saving. 1584 * This is why we explicitly re-enable the hardware before saving.
1534 */ 1585 */
1535 1586
1536 preempt_disable(); 1587 preempt_disable();
1537 if (cpu_has_msa && vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) { 1588 if (cpu_has_msa && vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) {
1538 set_c0_config5(MIPS_CONF5_MSAEN); 1589 if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ)) {
1539 enable_fpu_hazard(); 1590 set_c0_config5(MIPS_CONF5_MSAEN);
1591 enable_fpu_hazard();
1592 }
1540 1593
1541 __kvm_save_msa(&vcpu->arch); 1594 __kvm_save_msa(&vcpu->arch);
1542 trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_FPU_MSA); 1595 trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_FPU_MSA);
@@ -1549,8 +1602,10 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu)
1549 } 1602 }
1550 vcpu->arch.aux_inuse &= ~(KVM_MIPS_AUX_FPU | KVM_MIPS_AUX_MSA); 1603 vcpu->arch.aux_inuse &= ~(KVM_MIPS_AUX_FPU | KVM_MIPS_AUX_MSA);
1551 } else if (vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) { 1604 } else if (vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) {
1552 set_c0_status(ST0_CU1); 1605 if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ)) {
1553 enable_fpu_hazard(); 1606 set_c0_status(ST0_CU1);
1607 enable_fpu_hazard();
1608 }
1554 1609
1555 __kvm_save_fpu(&vcpu->arch); 1610 __kvm_save_fpu(&vcpu->arch);
1556 vcpu->arch.aux_inuse &= ~KVM_MIPS_AUX_FPU; 1611 vcpu->arch.aux_inuse &= ~KVM_MIPS_AUX_FPU;
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index cb0faade311e..ee64db032793 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -992,6 +992,22 @@ static pte_t kvm_mips_gpa_pte_to_gva_mapped(pte_t pte, long entrylo)
992 return kvm_mips_gpa_pte_to_gva_unmapped(pte); 992 return kvm_mips_gpa_pte_to_gva_unmapped(pte);
993} 993}
994 994
995#ifdef CONFIG_KVM_MIPS_VZ
996int kvm_mips_handle_vz_root_tlb_fault(unsigned long badvaddr,
997 struct kvm_vcpu *vcpu,
998 bool write_fault)
999{
1000 int ret;
1001
1002 ret = kvm_mips_map_page(vcpu, badvaddr, write_fault, NULL, NULL);
1003 if (ret)
1004 return ret;
1005
1006 /* Invalidate this entry in the TLB */
1007 return kvm_vz_host_tlb_inv(vcpu, badvaddr);
1008}
1009#endif
1010
995/* XXXKYMA: Must be called with interrupts disabled */ 1011/* XXXKYMA: Must be called with interrupts disabled */
996int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr, 1012int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr,
997 struct kvm_vcpu *vcpu, 1013 struct kvm_vcpu *vcpu,
@@ -1225,6 +1241,10 @@ int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out)
1225{ 1241{
1226 int err; 1242 int err;
1227 1243
1244 if (WARN(IS_ENABLED(CONFIG_KVM_MIPS_VZ),
1245 "Expect BadInstr/BadInstrP registers to be used with VZ\n"))
1246 return -EINVAL;
1247
1228retry: 1248retry:
1229 kvm_trap_emul_gva_lockless_begin(vcpu); 1249 kvm_trap_emul_gva_lockless_begin(vcpu);
1230 err = get_user(*out, opc); 1250 err = get_user(*out, opc);
diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c
index 2819eb793345..7c6336dd2638 100644
--- a/arch/mips/kvm/tlb.c
+++ b/arch/mips/kvm/tlb.c
@@ -33,6 +33,25 @@
33#define KVM_GUEST_PC_TLB 0 33#define KVM_GUEST_PC_TLB 0
34#define KVM_GUEST_SP_TLB 1 34#define KVM_GUEST_SP_TLB 1
35 35
36#ifdef CONFIG_KVM_MIPS_VZ
37unsigned long GUESTID_MASK;
38EXPORT_SYMBOL_GPL(GUESTID_MASK);
39unsigned long GUESTID_FIRST_VERSION;
40EXPORT_SYMBOL_GPL(GUESTID_FIRST_VERSION);
41unsigned long GUESTID_VERSION_MASK;
42EXPORT_SYMBOL_GPL(GUESTID_VERSION_MASK);
43
44static u32 kvm_mips_get_root_asid(struct kvm_vcpu *vcpu)
45{
46 struct mm_struct *gpa_mm = &vcpu->kvm->arch.gpa_mm;
47
48 if (cpu_has_guestid)
49 return 0;
50 else
51 return cpu_asid(smp_processor_id(), gpa_mm);
52}
53#endif
54
36static u32 kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu) 55static u32 kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu)
37{ 56{
38 struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; 57 struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
@@ -166,6 +185,13 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va,
166 185
167 local_irq_restore(flags); 186 local_irq_restore(flags);
168 187
188 /*
189 * We don't want to get reserved instruction exceptions for missing tlb
190 * entries.
191 */
192 if (cpu_has_vtag_icache)
193 flush_icache_all();
194
169 if (user && idx_user >= 0) 195 if (user && idx_user >= 0)
170 kvm_debug("%s: Invalidated guest user entryhi %#lx @ idx %d\n", 196 kvm_debug("%s: Invalidated guest user entryhi %#lx @ idx %d\n",
171 __func__, (va & VPN2_MASK) | 197 __func__, (va & VPN2_MASK) |
@@ -179,6 +205,421 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va,
179} 205}
180EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_inv); 206EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_inv);
181 207
208#ifdef CONFIG_KVM_MIPS_VZ
209
210/* GuestID management */
211
212/**
213 * clear_root_gid() - Set GuestCtl1.RID for normal root operation.
214 */
215static inline void clear_root_gid(void)
216{
217 if (cpu_has_guestid) {
218 clear_c0_guestctl1(MIPS_GCTL1_RID);
219 mtc0_tlbw_hazard();
220 }
221}
222
223/**
224 * set_root_gid_to_guest_gid() - Set GuestCtl1.RID to match GuestCtl1.ID.
225 *
226 * Sets the root GuestID to match the current guest GuestID, for TLB operation
227 * on the GPA->RPA mappings in the root TLB.
228 *
229 * The caller must be sure to disable HTW while the root GID is set, and
230 * possibly longer if TLB registers are modified.
231 */
232static inline void set_root_gid_to_guest_gid(void)
233{
234 unsigned int guestctl1;
235
236 if (cpu_has_guestid) {
237 back_to_back_c0_hazard();
238 guestctl1 = read_c0_guestctl1();
239 guestctl1 = (guestctl1 & ~MIPS_GCTL1_RID) |
240 ((guestctl1 & MIPS_GCTL1_ID) >> MIPS_GCTL1_ID_SHIFT)
241 << MIPS_GCTL1_RID_SHIFT;
242 write_c0_guestctl1(guestctl1);
243 mtc0_tlbw_hazard();
244 }
245}
246
247int kvm_vz_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va)
248{
249 int idx;
250 unsigned long flags, old_entryhi;
251
252 local_irq_save(flags);
253 htw_stop();
254
255 /* Set root GuestID for root probe and write of guest TLB entry */
256 set_root_gid_to_guest_gid();
257
258 old_entryhi = read_c0_entryhi();
259
260 idx = _kvm_mips_host_tlb_inv((va & VPN2_MASK) |
261 kvm_mips_get_root_asid(vcpu));
262
263 write_c0_entryhi(old_entryhi);
264 clear_root_gid();
265 mtc0_tlbw_hazard();
266
267 htw_start();
268 local_irq_restore(flags);
269
270 /*
271 * We don't want to get reserved instruction exceptions for missing tlb
272 * entries.
273 */
274 if (cpu_has_vtag_icache)
275 flush_icache_all();
276
277 if (idx > 0)
278 kvm_debug("%s: Invalidated root entryhi %#lx @ idx %d\n",
279 __func__, (va & VPN2_MASK) |
280 kvm_mips_get_root_asid(vcpu), idx);
281
282 return 0;
283}
284EXPORT_SYMBOL_GPL(kvm_vz_host_tlb_inv);
285
286/**
287 * kvm_vz_guest_tlb_lookup() - Lookup a guest VZ TLB mapping.
288 * @vcpu: KVM VCPU pointer.
289 * @gpa: Guest virtual address in a TLB mapped guest segment.
290 * @gpa: Ponter to output guest physical address it maps to.
291 *
292 * Converts a guest virtual address in a guest TLB mapped segment to a guest
293 * physical address, by probing the guest TLB.
294 *
295 * Returns: 0 if guest TLB mapping exists for @gva. *@gpa will have been
296 * written.
297 * -EFAULT if no guest TLB mapping exists for @gva. *@gpa may not
298 * have been written.
299 */
300int kvm_vz_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long gva,
301 unsigned long *gpa)
302{
303 unsigned long o_entryhi, o_entrylo[2], o_pagemask;
304 unsigned int o_index;
305 unsigned long entrylo[2], pagemask, pagemaskbit, pa;
306 unsigned long flags;
307 int index;
308
309 /* Probe the guest TLB for a mapping */
310 local_irq_save(flags);
311 /* Set root GuestID for root probe of guest TLB entry */
312 htw_stop();
313 set_root_gid_to_guest_gid();
314
315 o_entryhi = read_gc0_entryhi();
316 o_index = read_gc0_index();
317
318 write_gc0_entryhi((o_entryhi & 0x3ff) | (gva & ~0xfffl));
319 mtc0_tlbw_hazard();
320 guest_tlb_probe();
321 tlb_probe_hazard();
322
323 index = read_gc0_index();
324 if (index < 0) {
325 /* No match, fail */
326 write_gc0_entryhi(o_entryhi);
327 write_gc0_index(o_index);
328
329 clear_root_gid();
330 htw_start();
331 local_irq_restore(flags);
332 return -EFAULT;
333 }
334
335 /* Match! read the TLB entry */
336 o_entrylo[0] = read_gc0_entrylo0();
337 o_entrylo[1] = read_gc0_entrylo1();
338 o_pagemask = read_gc0_pagemask();
339
340 mtc0_tlbr_hazard();
341 guest_tlb_read();
342 tlb_read_hazard();
343
344 entrylo[0] = read_gc0_entrylo0();
345 entrylo[1] = read_gc0_entrylo1();
346 pagemask = ~read_gc0_pagemask() & ~0x1fffl;
347
348 write_gc0_entryhi(o_entryhi);
349 write_gc0_index(o_index);
350 write_gc0_entrylo0(o_entrylo[0]);
351 write_gc0_entrylo1(o_entrylo[1]);
352 write_gc0_pagemask(o_pagemask);
353
354 clear_root_gid();
355 htw_start();
356 local_irq_restore(flags);
357
358 /* Select one of the EntryLo values and interpret the GPA */
359 pagemaskbit = (pagemask ^ (pagemask & (pagemask - 1))) >> 1;
360 pa = entrylo[!!(gva & pagemaskbit)];
361
362 /*
363 * TLB entry may have become invalid since TLB probe if physical FTLB
364 * entries are shared between threads (e.g. I6400).
365 */
366 if (!(pa & ENTRYLO_V))
367 return -EFAULT;
368
369 /*
370 * Note, this doesn't take guest MIPS32 XPA into account, where PFN is
371 * split with XI/RI in the middle.
372 */
373 pa = (pa << 6) & ~0xfffl;
374 pa |= gva & ~(pagemask | pagemaskbit);
375
376 *gpa = pa;
377 return 0;
378}
379EXPORT_SYMBOL_GPL(kvm_vz_guest_tlb_lookup);
380
381/**
382 * kvm_vz_local_flush_roottlb_all_guests() - Flush all root TLB entries for
383 * guests.
384 *
385 * Invalidate all entries in root tlb which are GPA mappings.
386 */
387void kvm_vz_local_flush_roottlb_all_guests(void)
388{
389 unsigned long flags;
390 unsigned long old_entryhi, old_pagemask, old_guestctl1;
391 int entry;
392
393 if (WARN_ON(!cpu_has_guestid))
394 return;
395
396 local_irq_save(flags);
397 htw_stop();
398
399 /* TLBR may clobber EntryHi.ASID, PageMask, and GuestCtl1.RID */
400 old_entryhi = read_c0_entryhi();
401 old_pagemask = read_c0_pagemask();
402 old_guestctl1 = read_c0_guestctl1();
403
404 /*
405 * Invalidate guest entries in root TLB while leaving root entries
406 * intact when possible.
407 */
408 for (entry = 0; entry < current_cpu_data.tlbsize; entry++) {
409 write_c0_index(entry);
410 mtc0_tlbw_hazard();
411 tlb_read();
412 tlb_read_hazard();
413
414 /* Don't invalidate non-guest (RVA) mappings in the root TLB */
415 if (!(read_c0_guestctl1() & MIPS_GCTL1_RID))
416 continue;
417
418 /* Make sure all entries differ. */
419 write_c0_entryhi(UNIQUE_ENTRYHI(entry));
420 write_c0_entrylo0(0);
421 write_c0_entrylo1(0);
422 write_c0_guestctl1(0);
423 mtc0_tlbw_hazard();
424 tlb_write_indexed();
425 }
426
427 write_c0_entryhi(old_entryhi);
428 write_c0_pagemask(old_pagemask);
429 write_c0_guestctl1(old_guestctl1);
430 tlbw_use_hazard();
431
432 htw_start();
433 local_irq_restore(flags);
434}
435EXPORT_SYMBOL_GPL(kvm_vz_local_flush_roottlb_all_guests);
436
437/**
438 * kvm_vz_local_flush_guesttlb_all() - Flush all guest TLB entries.
439 *
440 * Invalidate all entries in guest tlb irrespective of guestid.
441 */
442void kvm_vz_local_flush_guesttlb_all(void)
443{
444 unsigned long flags;
445 unsigned long old_index;
446 unsigned long old_entryhi;
447 unsigned long old_entrylo[2];
448 unsigned long old_pagemask;
449 int entry;
450 u64 cvmmemctl2 = 0;
451
452 local_irq_save(flags);
453
454 /* Preserve all clobbered guest registers */
455 old_index = read_gc0_index();
456 old_entryhi = read_gc0_entryhi();
457 old_entrylo[0] = read_gc0_entrylo0();
458 old_entrylo[1] = read_gc0_entrylo1();
459 old_pagemask = read_gc0_pagemask();
460
461 switch (current_cpu_type()) {
462 case CPU_CAVIUM_OCTEON3:
463 /* Inhibit machine check due to multiple matching TLB entries */
464 cvmmemctl2 = read_c0_cvmmemctl2();
465 cvmmemctl2 |= CVMMEMCTL2_INHIBITTS;
466 write_c0_cvmmemctl2(cvmmemctl2);
467 break;
468 };
469
470 /* Invalidate guest entries in guest TLB */
471 write_gc0_entrylo0(0);
472 write_gc0_entrylo1(0);
473 write_gc0_pagemask(0);
474 for (entry = 0; entry < current_cpu_data.guest.tlbsize; entry++) {
475 /* Make sure all entries differ. */
476 write_gc0_index(entry);
477 write_gc0_entryhi(UNIQUE_GUEST_ENTRYHI(entry));
478 mtc0_tlbw_hazard();
479 guest_tlb_write_indexed();
480 }
481
482 if (cvmmemctl2) {
483 cvmmemctl2 &= ~CVMMEMCTL2_INHIBITTS;
484 write_c0_cvmmemctl2(cvmmemctl2);
485 };
486
487 write_gc0_index(old_index);
488 write_gc0_entryhi(old_entryhi);
489 write_gc0_entrylo0(old_entrylo[0]);
490 write_gc0_entrylo1(old_entrylo[1]);
491 write_gc0_pagemask(old_pagemask);
492 tlbw_use_hazard();
493
494 local_irq_restore(flags);
495}
496EXPORT_SYMBOL_GPL(kvm_vz_local_flush_guesttlb_all);
497
498/**
499 * kvm_vz_save_guesttlb() - Save a range of guest TLB entries.
500 * @buf: Buffer to write TLB entries into.
501 * @index: Start index.
502 * @count: Number of entries to save.
503 *
504 * Save a range of guest TLB entries. The caller must ensure interrupts are
505 * disabled.
506 */
507void kvm_vz_save_guesttlb(struct kvm_mips_tlb *buf, unsigned int index,
508 unsigned int count)
509{
510 unsigned int end = index + count;
511 unsigned long old_entryhi, old_entrylo0, old_entrylo1, old_pagemask;
512 unsigned int guestctl1 = 0;
513 int old_index, i;
514
515 /* Save registers we're about to clobber */
516 old_index = read_gc0_index();
517 old_entryhi = read_gc0_entryhi();
518 old_entrylo0 = read_gc0_entrylo0();
519 old_entrylo1 = read_gc0_entrylo1();
520 old_pagemask = read_gc0_pagemask();
521
522 /* Set root GuestID for root probe */
523 htw_stop();
524 set_root_gid_to_guest_gid();
525 if (cpu_has_guestid)
526 guestctl1 = read_c0_guestctl1();
527
528 /* Read each entry from guest TLB */
529 for (i = index; i < end; ++i, ++buf) {
530 write_gc0_index(i);
531
532 mtc0_tlbr_hazard();
533 guest_tlb_read();
534 tlb_read_hazard();
535
536 if (cpu_has_guestid &&
537 (read_c0_guestctl1() ^ guestctl1) & MIPS_GCTL1_RID) {
538 /* Entry invalid or belongs to another guest */
539 buf->tlb_hi = UNIQUE_GUEST_ENTRYHI(i);
540 buf->tlb_lo[0] = 0;
541 buf->tlb_lo[1] = 0;
542 buf->tlb_mask = 0;
543 } else {
544 /* Entry belongs to the right guest */
545 buf->tlb_hi = read_gc0_entryhi();
546 buf->tlb_lo[0] = read_gc0_entrylo0();
547 buf->tlb_lo[1] = read_gc0_entrylo1();
548 buf->tlb_mask = read_gc0_pagemask();
549 }
550 }
551
552 /* Clear root GuestID again */
553 clear_root_gid();
554 htw_start();
555
556 /* Restore clobbered registers */
557 write_gc0_index(old_index);
558 write_gc0_entryhi(old_entryhi);
559 write_gc0_entrylo0(old_entrylo0);
560 write_gc0_entrylo1(old_entrylo1);
561 write_gc0_pagemask(old_pagemask);
562
563 tlbw_use_hazard();
564}
565EXPORT_SYMBOL_GPL(kvm_vz_save_guesttlb);
566
567/**
568 * kvm_vz_load_guesttlb() - Save a range of guest TLB entries.
569 * @buf: Buffer to read TLB entries from.
570 * @index: Start index.
571 * @count: Number of entries to load.
572 *
573 * Load a range of guest TLB entries. The caller must ensure interrupts are
574 * disabled.
575 */
576void kvm_vz_load_guesttlb(const struct kvm_mips_tlb *buf, unsigned int index,
577 unsigned int count)
578{
579 unsigned int end = index + count;
580 unsigned long old_entryhi, old_entrylo0, old_entrylo1, old_pagemask;
581 int old_index, i;
582
583 /* Save registers we're about to clobber */
584 old_index = read_gc0_index();
585 old_entryhi = read_gc0_entryhi();
586 old_entrylo0 = read_gc0_entrylo0();
587 old_entrylo1 = read_gc0_entrylo1();
588 old_pagemask = read_gc0_pagemask();
589
590 /* Set root GuestID for root probe */
591 htw_stop();
592 set_root_gid_to_guest_gid();
593
594 /* Write each entry to guest TLB */
595 for (i = index; i < end; ++i, ++buf) {
596 write_gc0_index(i);
597 write_gc0_entryhi(buf->tlb_hi);
598 write_gc0_entrylo0(buf->tlb_lo[0]);
599 write_gc0_entrylo1(buf->tlb_lo[1]);
600 write_gc0_pagemask(buf->tlb_mask);
601
602 mtc0_tlbw_hazard();
603 guest_tlb_write_indexed();
604 }
605
606 /* Clear root GuestID again */
607 clear_root_gid();
608 htw_start();
609
610 /* Restore clobbered registers */
611 write_gc0_index(old_index);
612 write_gc0_entryhi(old_entryhi);
613 write_gc0_entrylo0(old_entrylo0);
614 write_gc0_entrylo1(old_entrylo1);
615 write_gc0_pagemask(old_pagemask);
616
617 tlbw_use_hazard();
618}
619EXPORT_SYMBOL_GPL(kvm_vz_load_guesttlb);
620
621#endif
622
182/** 623/**
183 * kvm_mips_suspend_mm() - Suspend the active mm. 624 * kvm_mips_suspend_mm() - Suspend the active mm.
184 * @cpu The CPU we're running on. 625 * @cpu The CPU we're running on.
diff --git a/arch/mips/kvm/trace.h b/arch/mips/kvm/trace.h
index c858cf168078..a8c7fd7bf6d2 100644
--- a/arch/mips/kvm/trace.h
+++ b/arch/mips/kvm/trace.h
@@ -18,6 +18,13 @@
18#define TRACE_INCLUDE_FILE trace 18#define TRACE_INCLUDE_FILE trace
19 19
20/* 20/*
21 * arch/mips/kvm/mips.c
22 */
23extern bool kvm_trace_guest_mode_change;
24int kvm_guest_mode_change_trace_reg(void);
25void kvm_guest_mode_change_trace_unreg(void);
26
27/*
21 * Tracepoints for VM enters 28 * Tracepoints for VM enters
22 */ 29 */
23DECLARE_EVENT_CLASS(kvm_transition, 30DECLARE_EVENT_CLASS(kvm_transition,
@@ -62,10 +69,20 @@ DEFINE_EVENT(kvm_transition, kvm_out,
62#define KVM_TRACE_EXIT_MSA_FPE 14 69#define KVM_TRACE_EXIT_MSA_FPE 14
63#define KVM_TRACE_EXIT_FPE 15 70#define KVM_TRACE_EXIT_FPE 15
64#define KVM_TRACE_EXIT_MSA_DISABLED 21 71#define KVM_TRACE_EXIT_MSA_DISABLED 21
72#define KVM_TRACE_EXIT_GUEST_EXIT 27
65/* Further exit reasons */ 73/* Further exit reasons */
66#define KVM_TRACE_EXIT_WAIT 32 74#define KVM_TRACE_EXIT_WAIT 32
67#define KVM_TRACE_EXIT_CACHE 33 75#define KVM_TRACE_EXIT_CACHE 33
68#define KVM_TRACE_EXIT_SIGNAL 34 76#define KVM_TRACE_EXIT_SIGNAL 34
77/* 32 exit reasons correspond to GuestCtl0.GExcCode (VZ) */
78#define KVM_TRACE_EXIT_GEXCCODE_BASE 64
79#define KVM_TRACE_EXIT_GPSI 64 /* 0 */
80#define KVM_TRACE_EXIT_GSFC 65 /* 1 */
81#define KVM_TRACE_EXIT_HC 66 /* 2 */
82#define KVM_TRACE_EXIT_GRR 67 /* 3 */
83#define KVM_TRACE_EXIT_GVA 72 /* 8 */
84#define KVM_TRACE_EXIT_GHFC 73 /* 9 */
85#define KVM_TRACE_EXIT_GPA 74 /* 10 */
69 86
70/* Tracepoints for VM exits */ 87/* Tracepoints for VM exits */
71#define kvm_trace_symbol_exit_types \ 88#define kvm_trace_symbol_exit_types \
@@ -83,9 +100,17 @@ DEFINE_EVENT(kvm_transition, kvm_out,
83 { KVM_TRACE_EXIT_MSA_FPE, "MSA FPE" }, \ 100 { KVM_TRACE_EXIT_MSA_FPE, "MSA FPE" }, \
84 { KVM_TRACE_EXIT_FPE, "FPE" }, \ 101 { KVM_TRACE_EXIT_FPE, "FPE" }, \
85 { KVM_TRACE_EXIT_MSA_DISABLED, "MSA Disabled" }, \ 102 { KVM_TRACE_EXIT_MSA_DISABLED, "MSA Disabled" }, \
103 { KVM_TRACE_EXIT_GUEST_EXIT, "Guest Exit" }, \
86 { KVM_TRACE_EXIT_WAIT, "WAIT" }, \ 104 { KVM_TRACE_EXIT_WAIT, "WAIT" }, \
87 { KVM_TRACE_EXIT_CACHE, "CACHE" }, \ 105 { KVM_TRACE_EXIT_CACHE, "CACHE" }, \
88 { KVM_TRACE_EXIT_SIGNAL, "Signal" } 106 { KVM_TRACE_EXIT_SIGNAL, "Signal" }, \
107 { KVM_TRACE_EXIT_GPSI, "GPSI" }, \
108 { KVM_TRACE_EXIT_GSFC, "GSFC" }, \
109 { KVM_TRACE_EXIT_HC, "HC" }, \
110 { KVM_TRACE_EXIT_GRR, "GRR" }, \
111 { KVM_TRACE_EXIT_GVA, "GVA" }, \
112 { KVM_TRACE_EXIT_GHFC, "GHFC" }, \
113 { KVM_TRACE_EXIT_GPA, "GPA" }
89 114
90TRACE_EVENT(kvm_exit, 115TRACE_EVENT(kvm_exit,
91 TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason), 116 TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason),
@@ -158,6 +183,8 @@ TRACE_EVENT(kvm_exit,
158 { KVM_TRACE_COP0(16, 4), "Config4" }, \ 183 { KVM_TRACE_COP0(16, 4), "Config4" }, \
159 { KVM_TRACE_COP0(16, 5), "Config5" }, \ 184 { KVM_TRACE_COP0(16, 5), "Config5" }, \
160 { KVM_TRACE_COP0(16, 7), "Config7" }, \ 185 { KVM_TRACE_COP0(16, 7), "Config7" }, \
186 { KVM_TRACE_COP0(17, 1), "MAAR" }, \
187 { KVM_TRACE_COP0(17, 2), "MAARI" }, \
161 { KVM_TRACE_COP0(26, 0), "ECC" }, \ 188 { KVM_TRACE_COP0(26, 0), "ECC" }, \
162 { KVM_TRACE_COP0(30, 0), "ErrorEPC" }, \ 189 { KVM_TRACE_COP0(30, 0), "ErrorEPC" }, \
163 { KVM_TRACE_COP0(31, 2), "KScratch1" }, \ 190 { KVM_TRACE_COP0(31, 2), "KScratch1" }, \
@@ -268,6 +295,51 @@ TRACE_EVENT(kvm_asid_change,
268 __entry->new_asid) 295 __entry->new_asid)
269); 296);
270 297
298TRACE_EVENT(kvm_guestid_change,
299 TP_PROTO(struct kvm_vcpu *vcpu, unsigned int guestid),
300 TP_ARGS(vcpu, guestid),
301 TP_STRUCT__entry(
302 __field(unsigned int, guestid)
303 ),
304
305 TP_fast_assign(
306 __entry->guestid = guestid;
307 ),
308
309 TP_printk("GuestID: 0x%02x",
310 __entry->guestid)
311);
312
313TRACE_EVENT_FN(kvm_guest_mode_change,
314 TP_PROTO(struct kvm_vcpu *vcpu),
315 TP_ARGS(vcpu),
316 TP_STRUCT__entry(
317 __field(unsigned long, epc)
318 __field(unsigned long, pc)
319 __field(unsigned long, badvaddr)
320 __field(unsigned int, status)
321 __field(unsigned int, cause)
322 ),
323
324 TP_fast_assign(
325 __entry->epc = kvm_read_c0_guest_epc(vcpu->arch.cop0);
326 __entry->pc = vcpu->arch.pc;
327 __entry->badvaddr = kvm_read_c0_guest_badvaddr(vcpu->arch.cop0);
328 __entry->status = kvm_read_c0_guest_status(vcpu->arch.cop0);
329 __entry->cause = kvm_read_c0_guest_cause(vcpu->arch.cop0);
330 ),
331
332 TP_printk("EPC: 0x%08lx PC: 0x%08lx Status: 0x%08x Cause: 0x%08x BadVAddr: 0x%08lx",
333 __entry->epc,
334 __entry->pc,
335 __entry->status,
336 __entry->cause,
337 __entry->badvaddr),
338
339 kvm_guest_mode_change_trace_reg,
340 kvm_guest_mode_change_trace_unreg
341);
342
271#endif /* _TRACE_KVM_H */ 343#endif /* _TRACE_KVM_H */
272 344
273/* This part must be outside protection */ 345/* This part must be outside protection */
diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c
index b1fa53b252ea..a563759fd142 100644
--- a/arch/mips/kvm/trap_emul.c
+++ b/arch/mips/kvm/trap_emul.c
@@ -12,6 +12,7 @@
12#include <linux/errno.h> 12#include <linux/errno.h>
13#include <linux/err.h> 13#include <linux/err.h>
14#include <linux/kvm_host.h> 14#include <linux/kvm_host.h>
15#include <linux/log2.h>
15#include <linux/uaccess.h> 16#include <linux/uaccess.h>
16#include <linux/vmalloc.h> 17#include <linux/vmalloc.h>
17#include <asm/mmu_context.h> 18#include <asm/mmu_context.h>
@@ -40,6 +41,29 @@ static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva)
40 return gpa; 41 return gpa;
41} 42}
42 43
44static int kvm_trap_emul_no_handler(struct kvm_vcpu *vcpu)
45{
46 u32 __user *opc = (u32 __user *) vcpu->arch.pc;
47 u32 cause = vcpu->arch.host_cp0_cause;
48 u32 exccode = (cause & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE;
49 unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr;
50 u32 inst = 0;
51
52 /*
53 * Fetch the instruction.
54 */
55 if (cause & CAUSEF_BD)
56 opc += 1;
57 kvm_get_badinstr(opc, vcpu, &inst);
58
59 kvm_err("Exception Code: %d not handled @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#x\n",
60 exccode, opc, inst, badvaddr,
61 kvm_read_c0_guest_status(vcpu->arch.cop0));
62 kvm_arch_vcpu_dump_regs(vcpu);
63 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
64 return RESUME_HOST;
65}
66
43static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu) 67static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu)
44{ 68{
45 struct mips_coproc *cop0 = vcpu->arch.cop0; 69 struct mips_coproc *cop0 = vcpu->arch.cop0;
@@ -82,6 +106,10 @@ static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu)
82 ret = RESUME_HOST; 106 ret = RESUME_HOST;
83 break; 107 break;
84 108
109 case EMULATE_HYPERCALL:
110 ret = kvm_mips_handle_hypcall(vcpu);
111 break;
112
85 default: 113 default:
86 BUG(); 114 BUG();
87 } 115 }
@@ -484,6 +512,31 @@ static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu)
484 return ret; 512 return ret;
485} 513}
486 514
515static int kvm_trap_emul_hardware_enable(void)
516{
517 return 0;
518}
519
520static void kvm_trap_emul_hardware_disable(void)
521{
522}
523
524static int kvm_trap_emul_check_extension(struct kvm *kvm, long ext)
525{
526 int r;
527
528 switch (ext) {
529 case KVM_CAP_MIPS_TE:
530 r = 1;
531 break;
532 default:
533 r = 0;
534 break;
535 }
536
537 return r;
538}
539
487static int kvm_trap_emul_vcpu_init(struct kvm_vcpu *vcpu) 540static int kvm_trap_emul_vcpu_init(struct kvm_vcpu *vcpu)
488{ 541{
489 struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; 542 struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
@@ -561,6 +614,9 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
561 u32 config, config1; 614 u32 config, config1;
562 int vcpu_id = vcpu->vcpu_id; 615 int vcpu_id = vcpu->vcpu_id;
563 616
617 /* Start off the timer at 100 MHz */
618 kvm_mips_init_count(vcpu, 100*1000*1000);
619
564 /* 620 /*
565 * Arch specific stuff, set up config registers properly so that the 621 * Arch specific stuff, set up config registers properly so that the
566 * guest will come up as expected 622 * guest will come up as expected
@@ -589,6 +645,13 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
589 /* Read the cache characteristics from the host Config1 Register */ 645 /* Read the cache characteristics from the host Config1 Register */
590 config1 = (read_c0_config1() & ~0x7f); 646 config1 = (read_c0_config1() & ~0x7f);
591 647
648 /* DCache line size not correctly reported in Config1 on Octeon CPUs */
649 if (cpu_dcache_line_size()) {
650 config1 &= ~MIPS_CONF1_DL;
651 config1 |= ((ilog2(cpu_dcache_line_size()) - 1) <<
652 MIPS_CONF1_DL_SHF) & MIPS_CONF1_DL;
653 }
654
592 /* Set up MMU size */ 655 /* Set up MMU size */
593 config1 &= ~(0x3f << 25); 656 config1 &= ~(0x3f << 25);
594 config1 |= ((KVM_MIPS_GUEST_TLB_SIZE - 1) << 25); 657 config1 |= ((KVM_MIPS_GUEST_TLB_SIZE - 1) << 25);
@@ -892,10 +955,12 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
892 if (v & CAUSEF_DC) { 955 if (v & CAUSEF_DC) {
893 /* disable timer first */ 956 /* disable timer first */
894 kvm_mips_count_disable_cause(vcpu); 957 kvm_mips_count_disable_cause(vcpu);
895 kvm_change_c0_guest_cause(cop0, ~CAUSEF_DC, v); 958 kvm_change_c0_guest_cause(cop0, (u32)~CAUSEF_DC,
959 v);
896 } else { 960 } else {
897 /* enable timer last */ 961 /* enable timer last */
898 kvm_change_c0_guest_cause(cop0, ~CAUSEF_DC, v); 962 kvm_change_c0_guest_cause(cop0, (u32)~CAUSEF_DC,
963 v);
899 kvm_mips_count_enable_cause(vcpu); 964 kvm_mips_count_enable_cause(vcpu);
900 } 965 }
901 } else { 966 } else {
@@ -1230,7 +1295,11 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
1230 .handle_msa_fpe = kvm_trap_emul_handle_msa_fpe, 1295 .handle_msa_fpe = kvm_trap_emul_handle_msa_fpe,
1231 .handle_fpe = kvm_trap_emul_handle_fpe, 1296 .handle_fpe = kvm_trap_emul_handle_fpe,
1232 .handle_msa_disabled = kvm_trap_emul_handle_msa_disabled, 1297 .handle_msa_disabled = kvm_trap_emul_handle_msa_disabled,
1298 .handle_guest_exit = kvm_trap_emul_no_handler,
1233 1299
1300 .hardware_enable = kvm_trap_emul_hardware_enable,
1301 .hardware_disable = kvm_trap_emul_hardware_disable,
1302 .check_extension = kvm_trap_emul_check_extension,
1234 .vcpu_init = kvm_trap_emul_vcpu_init, 1303 .vcpu_init = kvm_trap_emul_vcpu_init,
1235 .vcpu_uninit = kvm_trap_emul_vcpu_uninit, 1304 .vcpu_uninit = kvm_trap_emul_vcpu_uninit,
1236 .vcpu_setup = kvm_trap_emul_vcpu_setup, 1305 .vcpu_setup = kvm_trap_emul_vcpu_setup,
diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c
new file mode 100644
index 000000000000..71d8856ade64
--- /dev/null
+++ b/arch/mips/kvm/vz.c
@@ -0,0 +1,3223 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * KVM/MIPS: Support for hardware virtualization extensions
7 *
8 * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved.
9 * Authors: Yann Le Du <ledu@kymasys.com>
10 */
11
12#include <linux/errno.h>
13#include <linux/err.h>
14#include <linux/module.h>
15#include <linux/preempt.h>
16#include <linux/vmalloc.h>
17#include <asm/cacheflush.h>
18#include <asm/cacheops.h>
19#include <asm/cmpxchg.h>
20#include <asm/fpu.h>
21#include <asm/hazards.h>
22#include <asm/inst.h>
23#include <asm/mmu_context.h>
24#include <asm/r4kcache.h>
25#include <asm/time.h>
26#include <asm/tlb.h>
27#include <asm/tlbex.h>
28
29#include <linux/kvm_host.h>
30
31#include "interrupt.h"
32
33#include "trace.h"
34
35/* Pointers to last VCPU loaded on each physical CPU */
36static struct kvm_vcpu *last_vcpu[NR_CPUS];
37/* Pointers to last VCPU executed on each physical CPU */
38static struct kvm_vcpu *last_exec_vcpu[NR_CPUS];
39
40/*
41 * Number of guest VTLB entries to use, so we can catch inconsistency between
42 * CPUs.
43 */
44static unsigned int kvm_vz_guest_vtlb_size;
45
46static inline long kvm_vz_read_gc0_ebase(void)
47{
48 if (sizeof(long) == 8 && cpu_has_ebase_wg)
49 return read_gc0_ebase_64();
50 else
51 return read_gc0_ebase();
52}
53
54static inline void kvm_vz_write_gc0_ebase(long v)
55{
56 /*
57 * First write with WG=1 to write upper bits, then write again in case
58 * WG should be left at 0.
59 * write_gc0_ebase_64() is no longer UNDEFINED since R6.
60 */
61 if (sizeof(long) == 8 &&
62 (cpu_has_mips64r6 || cpu_has_ebase_wg)) {
63 write_gc0_ebase_64(v | MIPS_EBASE_WG);
64 write_gc0_ebase_64(v);
65 } else {
66 write_gc0_ebase(v | MIPS_EBASE_WG);
67 write_gc0_ebase(v);
68 }
69}
70
71/*
72 * These Config bits may be writable by the guest:
73 * Config: [K23, KU] (!TLB), K0
74 * Config1: (none)
75 * Config2: [TU, SU] (impl)
76 * Config3: ISAOnExc
77 * Config4: FTLBPageSize
78 * Config5: K, CV, MSAEn, UFE, FRE, SBRI, UFR
79 */
80
81static inline unsigned int kvm_vz_config_guest_wrmask(struct kvm_vcpu *vcpu)
82{
83 return CONF_CM_CMASK;
84}
85
86static inline unsigned int kvm_vz_config1_guest_wrmask(struct kvm_vcpu *vcpu)
87{
88 return 0;
89}
90
91static inline unsigned int kvm_vz_config2_guest_wrmask(struct kvm_vcpu *vcpu)
92{
93 return 0;
94}
95
96static inline unsigned int kvm_vz_config3_guest_wrmask(struct kvm_vcpu *vcpu)
97{
98 return MIPS_CONF3_ISA_OE;
99}
100
101static inline unsigned int kvm_vz_config4_guest_wrmask(struct kvm_vcpu *vcpu)
102{
103 /* no need to be exact */
104 return MIPS_CONF4_VFTLBPAGESIZE;
105}
106
107static inline unsigned int kvm_vz_config5_guest_wrmask(struct kvm_vcpu *vcpu)
108{
109 unsigned int mask = MIPS_CONF5_K | MIPS_CONF5_CV | MIPS_CONF5_SBRI;
110
111 /* Permit MSAEn changes if MSA supported and enabled */
112 if (kvm_mips_guest_has_msa(&vcpu->arch))
113 mask |= MIPS_CONF5_MSAEN;
114
115 /*
116 * Permit guest FPU mode changes if FPU is enabled and the relevant
117 * feature exists according to FIR register.
118 */
119 if (kvm_mips_guest_has_fpu(&vcpu->arch)) {
120 if (cpu_has_ufr)
121 mask |= MIPS_CONF5_UFR;
122 if (cpu_has_fre)
123 mask |= MIPS_CONF5_FRE | MIPS_CONF5_UFE;
124 }
125
126 return mask;
127}
128
129/*
130 * VZ optionally allows these additional Config bits to be written by root:
131 * Config: M, [MT]
132 * Config1: M, [MMUSize-1, C2, MD, PC, WR, CA], FP
133 * Config2: M
134 * Config3: M, MSAP, [BPG], ULRI, [DSP2P, DSPP], CTXTC, [ITL, LPA, VEIC,
135 * VInt, SP, CDMM, MT, SM, TL]
136 * Config4: M, [VTLBSizeExt, MMUSizeExt]
137 * Config5: MRP
138 */
139
140static inline unsigned int kvm_vz_config_user_wrmask(struct kvm_vcpu *vcpu)
141{
142 return kvm_vz_config_guest_wrmask(vcpu) | MIPS_CONF_M;
143}
144
145static inline unsigned int kvm_vz_config1_user_wrmask(struct kvm_vcpu *vcpu)
146{
147 unsigned int mask = kvm_vz_config1_guest_wrmask(vcpu) | MIPS_CONF_M;
148
149 /* Permit FPU to be present if FPU is supported */
150 if (kvm_mips_guest_can_have_fpu(&vcpu->arch))
151 mask |= MIPS_CONF1_FP;
152
153 return mask;
154}
155
156static inline unsigned int kvm_vz_config2_user_wrmask(struct kvm_vcpu *vcpu)
157{
158 return kvm_vz_config2_guest_wrmask(vcpu) | MIPS_CONF_M;
159}
160
161static inline unsigned int kvm_vz_config3_user_wrmask(struct kvm_vcpu *vcpu)
162{
163 unsigned int mask = kvm_vz_config3_guest_wrmask(vcpu) | MIPS_CONF_M |
164 MIPS_CONF3_ULRI | MIPS_CONF3_CTXTC;
165
166 /* Permit MSA to be present if MSA is supported */
167 if (kvm_mips_guest_can_have_msa(&vcpu->arch))
168 mask |= MIPS_CONF3_MSA;
169
170 return mask;
171}
172
173static inline unsigned int kvm_vz_config4_user_wrmask(struct kvm_vcpu *vcpu)
174{
175 return kvm_vz_config4_guest_wrmask(vcpu) | MIPS_CONF_M;
176}
177
178static inline unsigned int kvm_vz_config5_user_wrmask(struct kvm_vcpu *vcpu)
179{
180 return kvm_vz_config5_guest_wrmask(vcpu) | MIPS_CONF5_MRP;
181}
182
183static gpa_t kvm_vz_gva_to_gpa_cb(gva_t gva)
184{
185 /* VZ guest has already converted gva to gpa */
186 return gva;
187}
188
189static void kvm_vz_queue_irq(struct kvm_vcpu *vcpu, unsigned int priority)
190{
191 set_bit(priority, &vcpu->arch.pending_exceptions);
192 clear_bit(priority, &vcpu->arch.pending_exceptions_clr);
193}
194
195static void kvm_vz_dequeue_irq(struct kvm_vcpu *vcpu, unsigned int priority)
196{
197 clear_bit(priority, &vcpu->arch.pending_exceptions);
198 set_bit(priority, &vcpu->arch.pending_exceptions_clr);
199}
200
201static void kvm_vz_queue_timer_int_cb(struct kvm_vcpu *vcpu)
202{
203 /*
204 * timer expiry is asynchronous to vcpu execution therefore defer guest
205 * cp0 accesses
206 */
207 kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_TIMER);
208}
209
210static void kvm_vz_dequeue_timer_int_cb(struct kvm_vcpu *vcpu)
211{
212 /*
213 * timer expiry is asynchronous to vcpu execution therefore defer guest
214 * cp0 accesses
215 */
216 kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_TIMER);
217}
218
219static void kvm_vz_queue_io_int_cb(struct kvm_vcpu *vcpu,
220 struct kvm_mips_interrupt *irq)
221{
222 int intr = (int)irq->irq;
223
224 /*
225 * interrupts are asynchronous to vcpu execution therefore defer guest
226 * cp0 accesses
227 */
228 switch (intr) {
229 case 2:
230 kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_IO);
231 break;
232
233 case 3:
234 kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_IPI_1);
235 break;
236
237 case 4:
238 kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_IPI_2);
239 break;
240
241 default:
242 break;
243 }
244
245}
246
247static void kvm_vz_dequeue_io_int_cb(struct kvm_vcpu *vcpu,
248 struct kvm_mips_interrupt *irq)
249{
250 int intr = (int)irq->irq;
251
252 /*
253 * interrupts are asynchronous to vcpu execution therefore defer guest
254 * cp0 accesses
255 */
256 switch (intr) {
257 case -2:
258 kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_IO);
259 break;
260
261 case -3:
262 kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_IPI_1);
263 break;
264
265 case -4:
266 kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_IPI_2);
267 break;
268
269 default:
270 break;
271 }
272
273}
274
275static u32 kvm_vz_priority_to_irq[MIPS_EXC_MAX] = {
276 [MIPS_EXC_INT_TIMER] = C_IRQ5,
277 [MIPS_EXC_INT_IO] = C_IRQ0,
278 [MIPS_EXC_INT_IPI_1] = C_IRQ1,
279 [MIPS_EXC_INT_IPI_2] = C_IRQ2,
280};
281
282static int kvm_vz_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
283 u32 cause)
284{
285 u32 irq = (priority < MIPS_EXC_MAX) ?
286 kvm_vz_priority_to_irq[priority] : 0;
287
288 switch (priority) {
289 case MIPS_EXC_INT_TIMER:
290 set_gc0_cause(C_TI);
291 break;
292
293 case MIPS_EXC_INT_IO:
294 case MIPS_EXC_INT_IPI_1:
295 case MIPS_EXC_INT_IPI_2:
296 if (cpu_has_guestctl2)
297 set_c0_guestctl2(irq);
298 else
299 set_gc0_cause(irq);
300 break;
301
302 default:
303 break;
304 }
305
306 clear_bit(priority, &vcpu->arch.pending_exceptions);
307 return 1;
308}
309
310static int kvm_vz_irq_clear_cb(struct kvm_vcpu *vcpu, unsigned int priority,
311 u32 cause)
312{
313 u32 irq = (priority < MIPS_EXC_MAX) ?
314 kvm_vz_priority_to_irq[priority] : 0;
315
316 switch (priority) {
317 case MIPS_EXC_INT_TIMER:
318 /*
319 * Call to kvm_write_c0_guest_compare() clears Cause.TI in
320 * kvm_mips_emulate_CP0(). Explicitly clear irq associated with
321 * Cause.IP[IPTI] if GuestCtl2 virtual interrupt register not
322 * supported or if not using GuestCtl2 Hardware Clear.
323 */
324 if (cpu_has_guestctl2) {
325 if (!(read_c0_guestctl2() & (irq << 14)))
326 clear_c0_guestctl2(irq);
327 } else {
328 clear_gc0_cause(irq);
329 }
330 break;
331
332 case MIPS_EXC_INT_IO:
333 case MIPS_EXC_INT_IPI_1:
334 case MIPS_EXC_INT_IPI_2:
335 /* Clear GuestCtl2.VIP irq if not using Hardware Clear */
336 if (cpu_has_guestctl2) {
337 if (!(read_c0_guestctl2() & (irq << 14)))
338 clear_c0_guestctl2(irq);
339 } else {
340 clear_gc0_cause(irq);
341 }
342 break;
343
344 default:
345 break;
346 }
347
348 clear_bit(priority, &vcpu->arch.pending_exceptions_clr);
349 return 1;
350}
351
352/*
353 * VZ guest timer handling.
354 */
355
356/**
357 * kvm_vz_should_use_htimer() - Find whether to use the VZ hard guest timer.
358 * @vcpu: Virtual CPU.
359 *
360 * Returns: true if the VZ GTOffset & real guest CP0_Count should be used
361 * instead of software emulation of guest timer.
362 * false otherwise.
363 */
364static bool kvm_vz_should_use_htimer(struct kvm_vcpu *vcpu)
365{
366 if (kvm_mips_count_disabled(vcpu))
367 return false;
368
369 /* Chosen frequency must match real frequency */
370 if (mips_hpt_frequency != vcpu->arch.count_hz)
371 return false;
372
373 /* We don't support a CP0_GTOffset with fewer bits than CP0_Count */
374 if (current_cpu_data.gtoffset_mask != 0xffffffff)
375 return false;
376
377 return true;
378}
379
380/**
381 * _kvm_vz_restore_stimer() - Restore soft timer state.
382 * @vcpu: Virtual CPU.
383 * @compare: CP0_Compare register value, restored by caller.
384 * @cause: CP0_Cause register to restore.
385 *
386 * Restore VZ state relating to the soft timer. The hard timer can be enabled
387 * later.
388 */
389static void _kvm_vz_restore_stimer(struct kvm_vcpu *vcpu, u32 compare,
390 u32 cause)
391{
392 /*
393 * Avoid spurious counter interrupts by setting Guest CP0_Count to just
394 * after Guest CP0_Compare.
395 */
396 write_c0_gtoffset(compare - read_c0_count());
397
398 back_to_back_c0_hazard();
399 write_gc0_cause(cause);
400}
401
402/**
403 * _kvm_vz_restore_htimer() - Restore hard timer state.
404 * @vcpu: Virtual CPU.
405 * @compare: CP0_Compare register value, restored by caller.
406 * @cause: CP0_Cause register to restore.
407 *
408 * Restore hard timer Guest.Count & Guest.Cause taking care to preserve the
409 * value of Guest.CP0_Cause.TI while restoring Guest.CP0_Cause.
410 */
411static void _kvm_vz_restore_htimer(struct kvm_vcpu *vcpu,
412 u32 compare, u32 cause)
413{
414 u32 start_count, after_count;
415 ktime_t freeze_time;
416 unsigned long flags;
417
418 /*
419 * Freeze the soft-timer and sync the guest CP0_Count with it. We do
420 * this with interrupts disabled to avoid latency.
421 */
422 local_irq_save(flags);
423 freeze_time = kvm_mips_freeze_hrtimer(vcpu, &start_count);
424 write_c0_gtoffset(start_count - read_c0_count());
425 local_irq_restore(flags);
426
427 /* restore guest CP0_Cause, as TI may already be set */
428 back_to_back_c0_hazard();
429 write_gc0_cause(cause);
430
431 /*
432 * The above sequence isn't atomic and would result in lost timer
433 * interrupts if we're not careful. Detect if a timer interrupt is due
434 * and assert it.
435 */
436 back_to_back_c0_hazard();
437 after_count = read_gc0_count();
438 if (after_count - start_count > compare - start_count - 1)
439 kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_TIMER);
440}
441
442/**
443 * kvm_vz_restore_timer() - Restore timer state.
444 * @vcpu: Virtual CPU.
445 *
446 * Restore soft timer state from saved context.
447 */
448static void kvm_vz_restore_timer(struct kvm_vcpu *vcpu)
449{
450 struct mips_coproc *cop0 = vcpu->arch.cop0;
451 u32 cause, compare;
452
453 compare = kvm_read_sw_gc0_compare(cop0);
454 cause = kvm_read_sw_gc0_cause(cop0);
455
456 write_gc0_compare(compare);
457 _kvm_vz_restore_stimer(vcpu, compare, cause);
458}
459
460/**
461 * kvm_vz_acquire_htimer() - Switch to hard timer state.
462 * @vcpu: Virtual CPU.
463 *
464 * Restore hard timer state on top of existing soft timer state if possible.
465 *
466 * Since hard timer won't remain active over preemption, preemption should be
467 * disabled by the caller.
468 */
469void kvm_vz_acquire_htimer(struct kvm_vcpu *vcpu)
470{
471 u32 gctl0;
472
473 gctl0 = read_c0_guestctl0();
474 if (!(gctl0 & MIPS_GCTL0_GT) && kvm_vz_should_use_htimer(vcpu)) {
475 /* enable guest access to hard timer */
476 write_c0_guestctl0(gctl0 | MIPS_GCTL0_GT);
477
478 _kvm_vz_restore_htimer(vcpu, read_gc0_compare(),
479 read_gc0_cause());
480 }
481}
482
483/**
484 * _kvm_vz_save_htimer() - Switch to software emulation of guest timer.
485 * @vcpu: Virtual CPU.
486 * @compare: Pointer to write compare value to.
487 * @cause: Pointer to write cause value to.
488 *
489 * Save VZ guest timer state and switch to software emulation of guest CP0
490 * timer. The hard timer must already be in use, so preemption should be
491 * disabled.
492 */
493static void _kvm_vz_save_htimer(struct kvm_vcpu *vcpu,
494 u32 *out_compare, u32 *out_cause)
495{
496 u32 cause, compare, before_count, end_count;
497 ktime_t before_time;
498
499 compare = read_gc0_compare();
500 *out_compare = compare;
501
502 before_time = ktime_get();
503
504 /*
505 * Record the CP0_Count *prior* to saving CP0_Cause, so we have a time
506 * at which no pending timer interrupt is missing.
507 */
508 before_count = read_gc0_count();
509 back_to_back_c0_hazard();
510 cause = read_gc0_cause();
511 *out_cause = cause;
512
513 /*
514 * Record a final CP0_Count which we will transfer to the soft-timer.
515 * This is recorded *after* saving CP0_Cause, so we don't get any timer
516 * interrupts from just after the final CP0_Count point.
517 */
518 back_to_back_c0_hazard();
519 end_count = read_gc0_count();
520
521 /*
522 * The above sequence isn't atomic, so we could miss a timer interrupt
523 * between reading CP0_Cause and end_count. Detect and record any timer
524 * interrupt due between before_count and end_count.
525 */
526 if (end_count - before_count > compare - before_count - 1)
527 kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_TIMER);
528
529 /*
530 * Restore soft-timer, ignoring a small amount of negative drift due to
531 * delay between freeze_hrtimer and setting CP0_GTOffset.
532 */
533 kvm_mips_restore_hrtimer(vcpu, before_time, end_count, -0x10000);
534}
535
536/**
537 * kvm_vz_save_timer() - Save guest timer state.
538 * @vcpu: Virtual CPU.
539 *
540 * Save VZ guest timer state and switch to soft guest timer if hard timer was in
541 * use.
542 */
543static void kvm_vz_save_timer(struct kvm_vcpu *vcpu)
544{
545 struct mips_coproc *cop0 = vcpu->arch.cop0;
546 u32 gctl0, compare, cause;
547
548 gctl0 = read_c0_guestctl0();
549 if (gctl0 & MIPS_GCTL0_GT) {
550 /* disable guest use of hard timer */
551 write_c0_guestctl0(gctl0 & ~MIPS_GCTL0_GT);
552
553 /* save hard timer state */
554 _kvm_vz_save_htimer(vcpu, &compare, &cause);
555 } else {
556 compare = read_gc0_compare();
557 cause = read_gc0_cause();
558 }
559
560 /* save timer-related state to VCPU context */
561 kvm_write_sw_gc0_cause(cop0, cause);
562 kvm_write_sw_gc0_compare(cop0, compare);
563}
564
565/**
566 * kvm_vz_lose_htimer() - Ensure hard guest timer is not in use.
567 * @vcpu: Virtual CPU.
568 *
569 * Transfers the state of the hard guest timer to the soft guest timer, leaving
570 * guest state intact so it can continue to be used with the soft timer.
571 */
572void kvm_vz_lose_htimer(struct kvm_vcpu *vcpu)
573{
574 u32 gctl0, compare, cause;
575
576 preempt_disable();
577 gctl0 = read_c0_guestctl0();
578 if (gctl0 & MIPS_GCTL0_GT) {
579 /* disable guest use of timer */
580 write_c0_guestctl0(gctl0 & ~MIPS_GCTL0_GT);
581
582 /* switch to soft timer */
583 _kvm_vz_save_htimer(vcpu, &compare, &cause);
584
585 /* leave soft timer in usable state */
586 _kvm_vz_restore_stimer(vcpu, compare, cause);
587 }
588 preempt_enable();
589}
590
591/**
592 * is_eva_access() - Find whether an instruction is an EVA memory accessor.
593 * @inst: 32-bit instruction encoding.
594 *
595 * Finds whether @inst encodes an EVA memory access instruction, which would
596 * indicate that emulation of it should access the user mode address space
597 * instead of the kernel mode address space. This matters for MUSUK segments
598 * which are TLB mapped for user mode but unmapped for kernel mode.
599 *
600 * Returns: Whether @inst encodes an EVA accessor instruction.
601 */
602static bool is_eva_access(union mips_instruction inst)
603{
604 if (inst.spec3_format.opcode != spec3_op)
605 return false;
606
607 switch (inst.spec3_format.func) {
608 case lwle_op:
609 case lwre_op:
610 case cachee_op:
611 case sbe_op:
612 case she_op:
613 case sce_op:
614 case swe_op:
615 case swle_op:
616 case swre_op:
617 case prefe_op:
618 case lbue_op:
619 case lhue_op:
620 case lbe_op:
621 case lhe_op:
622 case lle_op:
623 case lwe_op:
624 return true;
625 default:
626 return false;
627 }
628}
629
630/**
631 * is_eva_am_mapped() - Find whether an access mode is mapped.
632 * @vcpu: KVM VCPU state.
633 * @am: 3-bit encoded access mode.
634 * @eu: Segment becomes unmapped and uncached when Status.ERL=1.
635 *
636 * Decode @am to find whether it encodes a mapped segment for the current VCPU
637 * state. Where necessary @eu and the actual instruction causing the fault are
638 * taken into account to make the decision.
639 *
640 * Returns: Whether the VCPU faulted on a TLB mapped address.
641 */
642static bool is_eva_am_mapped(struct kvm_vcpu *vcpu, unsigned int am, bool eu)
643{
644 u32 am_lookup;
645 int err;
646
647 /*
648 * Interpret access control mode. We assume address errors will already
649 * have been caught by the guest, leaving us with:
650 * AM UM SM KM 31..24 23..16
651 * UK 0 000 Unm 0 0
652 * MK 1 001 TLB 1
653 * MSK 2 010 TLB TLB 1
654 * MUSK 3 011 TLB TLB TLB 1
655 * MUSUK 4 100 TLB TLB Unm 0 1
656 * USK 5 101 Unm Unm 0 0
657 * - 6 110 0 0
658 * UUSK 7 111 Unm Unm Unm 0 0
659 *
660 * We shift a magic value by AM across the sign bit to find if always
661 * TLB mapped, and if not shift by 8 again to find if it depends on KM.
662 */
663 am_lookup = 0x70080000 << am;
664 if ((s32)am_lookup < 0) {
665 /*
666 * MK, MSK, MUSK
667 * Always TLB mapped, unless SegCtl.EU && ERL
668 */
669 if (!eu || !(read_gc0_status() & ST0_ERL))
670 return true;
671 } else {
672 am_lookup <<= 8;
673 if ((s32)am_lookup < 0) {
674 union mips_instruction inst;
675 unsigned int status;
676 u32 *opc;
677
678 /*
679 * MUSUK
680 * TLB mapped if not in kernel mode
681 */
682 status = read_gc0_status();
683 if (!(status & (ST0_EXL | ST0_ERL)) &&
684 (status & ST0_KSU))
685 return true;
686 /*
687 * EVA access instructions in kernel
688 * mode access user address space.
689 */
690 opc = (u32 *)vcpu->arch.pc;
691 if (vcpu->arch.host_cp0_cause & CAUSEF_BD)
692 opc += 1;
693 err = kvm_get_badinstr(opc, vcpu, &inst.word);
694 if (!err && is_eva_access(inst))
695 return true;
696 }
697 }
698
699 return false;
700}
701
702/**
703 * kvm_vz_gva_to_gpa() - Convert valid GVA to GPA.
704 * @vcpu: KVM VCPU state.
705 * @gva: Guest virtual address to convert.
706 * @gpa: Output guest physical address.
707 *
708 * Convert a guest virtual address (GVA) which is valid according to the guest
709 * context, to a guest physical address (GPA).
710 *
711 * Returns: 0 on success.
712 * -errno on failure.
713 */
714static int kvm_vz_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
715 unsigned long *gpa)
716{
717 u32 gva32 = gva;
718 unsigned long segctl;
719
720 if ((long)gva == (s32)gva32) {
721 /* Handle canonical 32-bit virtual address */
722 if (cpu_guest_has_segments) {
723 unsigned long mask, pa;
724
725 switch (gva32 >> 29) {
726 case 0:
727 case 1: /* CFG5 (1GB) */
728 segctl = read_gc0_segctl2() >> 16;
729 mask = (unsigned long)0xfc0000000ull;
730 break;
731 case 2:
732 case 3: /* CFG4 (1GB) */
733 segctl = read_gc0_segctl2();
734 mask = (unsigned long)0xfc0000000ull;
735 break;
736 case 4: /* CFG3 (512MB) */
737 segctl = read_gc0_segctl1() >> 16;
738 mask = (unsigned long)0xfe0000000ull;
739 break;
740 case 5: /* CFG2 (512MB) */
741 segctl = read_gc0_segctl1();
742 mask = (unsigned long)0xfe0000000ull;
743 break;
744 case 6: /* CFG1 (512MB) */
745 segctl = read_gc0_segctl0() >> 16;
746 mask = (unsigned long)0xfe0000000ull;
747 break;
748 case 7: /* CFG0 (512MB) */
749 segctl = read_gc0_segctl0();
750 mask = (unsigned long)0xfe0000000ull;
751 break;
752 default:
753 /*
754 * GCC 4.9 isn't smart enough to figure out that
755 * segctl and mask are always initialised.
756 */
757 unreachable();
758 }
759
760 if (is_eva_am_mapped(vcpu, (segctl >> 4) & 0x7,
761 segctl & 0x0008))
762 goto tlb_mapped;
763
764 /* Unmapped, find guest physical address */
765 pa = (segctl << 20) & mask;
766 pa |= gva32 & ~mask;
767 *gpa = pa;
768 return 0;
769 } else if ((s32)gva32 < (s32)0xc0000000) {
770 /* legacy unmapped KSeg0 or KSeg1 */
771 *gpa = gva32 & 0x1fffffff;
772 return 0;
773 }
774#ifdef CONFIG_64BIT
775 } else if ((gva & 0xc000000000000000) == 0x8000000000000000) {
776 /* XKPHYS */
777 if (cpu_guest_has_segments) {
778 /*
779 * Each of the 8 regions can be overridden by SegCtl2.XR
780 * to use SegCtl1.XAM.
781 */
782 segctl = read_gc0_segctl2();
783 if (segctl & (1ull << (56 + ((gva >> 59) & 0x7)))) {
784 segctl = read_gc0_segctl1();
785 if (is_eva_am_mapped(vcpu, (segctl >> 59) & 0x7,
786 0))
787 goto tlb_mapped;
788 }
789
790 }
791 /*
792 * Traditionally fully unmapped.
793 * Bits 61:59 specify the CCA, which we can just mask off here.
794 * Bits 58:PABITS should be zero, but we shouldn't have got here
795 * if it wasn't.
796 */
797 *gpa = gva & 0x07ffffffffffffff;
798 return 0;
799#endif
800 }
801
802tlb_mapped:
803 return kvm_vz_guest_tlb_lookup(vcpu, gva, gpa);
804}
805
806/**
807 * kvm_vz_badvaddr_to_gpa() - Convert GVA BadVAddr from root exception to GPA.
808 * @vcpu: KVM VCPU state.
809 * @badvaddr: Root BadVAddr.
810 * @gpa: Output guest physical address.
811 *
812 * VZ implementations are permitted to report guest virtual addresses (GVA) in
813 * BadVAddr on a root exception during guest execution, instead of the more
814 * convenient guest physical addresses (GPA). When we get a GVA, this function
815 * converts it to a GPA, taking into account guest segmentation and guest TLB
816 * state.
817 *
818 * Returns: 0 on success.
819 * -errno on failure.
820 */
821static int kvm_vz_badvaddr_to_gpa(struct kvm_vcpu *vcpu, unsigned long badvaddr,
822 unsigned long *gpa)
823{
824 unsigned int gexccode = (vcpu->arch.host_cp0_guestctl0 &
825 MIPS_GCTL0_GEXC) >> MIPS_GCTL0_GEXC_SHIFT;
826
827 /* If BadVAddr is GPA, then all is well in the world */
828 if (likely(gexccode == MIPS_GCTL0_GEXC_GPA)) {
829 *gpa = badvaddr;
830 return 0;
831 }
832
833 /* Otherwise we'd expect it to be GVA ... */
834 if (WARN(gexccode != MIPS_GCTL0_GEXC_GVA,
835 "Unexpected gexccode %#x\n", gexccode))
836 return -EINVAL;
837
838 /* ... and we need to perform the GVA->GPA translation in software */
839 return kvm_vz_gva_to_gpa(vcpu, badvaddr, gpa);
840}
841
842static int kvm_trap_vz_no_handler(struct kvm_vcpu *vcpu)
843{
844 u32 *opc = (u32 *) vcpu->arch.pc;
845 u32 cause = vcpu->arch.host_cp0_cause;
846 u32 exccode = (cause & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE;
847 unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr;
848 u32 inst = 0;
849
850 /*
851 * Fetch the instruction.
852 */
853 if (cause & CAUSEF_BD)
854 opc += 1;
855 kvm_get_badinstr(opc, vcpu, &inst);
856
857 kvm_err("Exception Code: %d not handled @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#x\n",
858 exccode, opc, inst, badvaddr,
859 read_gc0_status());
860 kvm_arch_vcpu_dump_regs(vcpu);
861 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
862 return RESUME_HOST;
863}
864
865static unsigned long mips_process_maar(unsigned int op, unsigned long val)
866{
867 /* Mask off unused bits */
868 unsigned long mask = 0xfffff000 | MIPS_MAAR_S | MIPS_MAAR_VL;
869
870 if (read_gc0_pagegrain() & PG_ELPA)
871 mask |= 0x00ffffff00000000ull;
872 if (cpu_guest_has_mvh)
873 mask |= MIPS_MAAR_VH;
874
875 /* Set or clear VH */
876 if (op == mtc_op) {
877 /* clear VH */
878 val &= ~MIPS_MAAR_VH;
879 } else if (op == dmtc_op) {
880 /* set VH to match VL */
881 val &= ~MIPS_MAAR_VH;
882 if (val & MIPS_MAAR_VL)
883 val |= MIPS_MAAR_VH;
884 }
885
886 return val & mask;
887}
888
889static void kvm_write_maari(struct kvm_vcpu *vcpu, unsigned long val)
890{
891 struct mips_coproc *cop0 = vcpu->arch.cop0;
892
893 val &= MIPS_MAARI_INDEX;
894 if (val == MIPS_MAARI_INDEX)
895 kvm_write_sw_gc0_maari(cop0, ARRAY_SIZE(vcpu->arch.maar) - 1);
896 else if (val < ARRAY_SIZE(vcpu->arch.maar))
897 kvm_write_sw_gc0_maari(cop0, val);
898}
899
900static enum emulation_result kvm_vz_gpsi_cop0(union mips_instruction inst,
901 u32 *opc, u32 cause,
902 struct kvm_run *run,
903 struct kvm_vcpu *vcpu)
904{
905 struct mips_coproc *cop0 = vcpu->arch.cop0;
906 enum emulation_result er = EMULATE_DONE;
907 u32 rt, rd, sel;
908 unsigned long curr_pc;
909 unsigned long val;
910
911 /*
912 * Update PC and hold onto current PC in case there is
913 * an error and we want to rollback the PC
914 */
915 curr_pc = vcpu->arch.pc;
916 er = update_pc(vcpu, cause);
917 if (er == EMULATE_FAIL)
918 return er;
919
920 if (inst.co_format.co) {
921 switch (inst.co_format.func) {
922 case wait_op:
923 er = kvm_mips_emul_wait(vcpu);
924 break;
925 default:
926 er = EMULATE_FAIL;
927 }
928 } else {
929 rt = inst.c0r_format.rt;
930 rd = inst.c0r_format.rd;
931 sel = inst.c0r_format.sel;
932
933 switch (inst.c0r_format.rs) {
934 case dmfc_op:
935 case mfc_op:
936#ifdef CONFIG_KVM_MIPS_DEBUG_COP0_COUNTERS
937 cop0->stat[rd][sel]++;
938#endif
939 if (rd == MIPS_CP0_COUNT &&
940 sel == 0) { /* Count */
941 val = kvm_mips_read_count(vcpu);
942 } else if (rd == MIPS_CP0_COMPARE &&
943 sel == 0) { /* Compare */
944 val = read_gc0_compare();
945 } else if (rd == MIPS_CP0_LLADDR &&
946 sel == 0) { /* LLAddr */
947 if (cpu_guest_has_rw_llb)
948 val = read_gc0_lladdr() &
949 MIPS_LLADDR_LLB;
950 else
951 val = 0;
952 } else if (rd == MIPS_CP0_LLADDR &&
953 sel == 1 && /* MAAR */
954 cpu_guest_has_maar &&
955 !cpu_guest_has_dyn_maar) {
956 /* MAARI must be in range */
957 BUG_ON(kvm_read_sw_gc0_maari(cop0) >=
958 ARRAY_SIZE(vcpu->arch.maar));
959 val = vcpu->arch.maar[
960 kvm_read_sw_gc0_maari(cop0)];
961 } else if ((rd == MIPS_CP0_PRID &&
962 (sel == 0 || /* PRid */
963 sel == 2 || /* CDMMBase */
964 sel == 3)) || /* CMGCRBase */
965 (rd == MIPS_CP0_STATUS &&
966 (sel == 2 || /* SRSCtl */
967 sel == 3)) || /* SRSMap */
968 (rd == MIPS_CP0_CONFIG &&
969 (sel == 7)) || /* Config7 */
970 (rd == MIPS_CP0_LLADDR &&
971 (sel == 2) && /* MAARI */
972 cpu_guest_has_maar &&
973 !cpu_guest_has_dyn_maar) ||
974 (rd == MIPS_CP0_ERRCTL &&
975 (sel == 0))) { /* ErrCtl */
976 val = cop0->reg[rd][sel];
977 } else {
978 val = 0;
979 er = EMULATE_FAIL;
980 }
981
982 if (er != EMULATE_FAIL) {
983 /* Sign extend */
984 if (inst.c0r_format.rs == mfc_op)
985 val = (int)val;
986 vcpu->arch.gprs[rt] = val;
987 }
988
989 trace_kvm_hwr(vcpu, (inst.c0r_format.rs == mfc_op) ?
990 KVM_TRACE_MFC0 : KVM_TRACE_DMFC0,
991 KVM_TRACE_COP0(rd, sel), val);
992 break;
993
994 case dmtc_op:
995 case mtc_op:
996#ifdef CONFIG_KVM_MIPS_DEBUG_COP0_COUNTERS
997 cop0->stat[rd][sel]++;
998#endif
999 val = vcpu->arch.gprs[rt];
1000 trace_kvm_hwr(vcpu, (inst.c0r_format.rs == mtc_op) ?
1001 KVM_TRACE_MTC0 : KVM_TRACE_DMTC0,
1002 KVM_TRACE_COP0(rd, sel), val);
1003
1004 if (rd == MIPS_CP0_COUNT &&
1005 sel == 0) { /* Count */
1006 kvm_vz_lose_htimer(vcpu);
1007 kvm_mips_write_count(vcpu, vcpu->arch.gprs[rt]);
1008 } else if (rd == MIPS_CP0_COMPARE &&
1009 sel == 0) { /* Compare */
1010 kvm_mips_write_compare(vcpu,
1011 vcpu->arch.gprs[rt],
1012 true);
1013 } else if (rd == MIPS_CP0_LLADDR &&
1014 sel == 0) { /* LLAddr */
1015 /*
1016 * P5600 generates GPSI on guest MTC0 LLAddr.
1017 * Only allow the guest to clear LLB.
1018 */
1019 if (cpu_guest_has_rw_llb &&
1020 !(val & MIPS_LLADDR_LLB))
1021 write_gc0_lladdr(0);
1022 } else if (rd == MIPS_CP0_LLADDR &&
1023 sel == 1 && /* MAAR */
1024 cpu_guest_has_maar &&
1025 !cpu_guest_has_dyn_maar) {
1026 val = mips_process_maar(inst.c0r_format.rs,
1027 val);
1028
1029 /* MAARI must be in range */
1030 BUG_ON(kvm_read_sw_gc0_maari(cop0) >=
1031 ARRAY_SIZE(vcpu->arch.maar));
1032 vcpu->arch.maar[kvm_read_sw_gc0_maari(cop0)] =
1033 val;
1034 } else if (rd == MIPS_CP0_LLADDR &&
1035 (sel == 2) && /* MAARI */
1036 cpu_guest_has_maar &&
1037 !cpu_guest_has_dyn_maar) {
1038 kvm_write_maari(vcpu, val);
1039 } else if (rd == MIPS_CP0_ERRCTL &&
1040 (sel == 0)) { /* ErrCtl */
1041 /* ignore the written value */
1042 } else {
1043 er = EMULATE_FAIL;
1044 }
1045 break;
1046
1047 default:
1048 er = EMULATE_FAIL;
1049 break;
1050 }
1051 }
1052 /* Rollback PC only if emulation was unsuccessful */
1053 if (er == EMULATE_FAIL) {
1054 kvm_err("[%#lx]%s: unsupported cop0 instruction 0x%08x\n",
1055 curr_pc, __func__, inst.word);
1056
1057 vcpu->arch.pc = curr_pc;
1058 }
1059
1060 return er;
1061}
1062
1063static enum emulation_result kvm_vz_gpsi_cache(union mips_instruction inst,
1064 u32 *opc, u32 cause,
1065 struct kvm_run *run,
1066 struct kvm_vcpu *vcpu)
1067{
1068 enum emulation_result er = EMULATE_DONE;
1069 u32 cache, op_inst, op, base;
1070 s16 offset;
1071 struct kvm_vcpu_arch *arch = &vcpu->arch;
1072 unsigned long va, curr_pc;
1073
1074 /*
1075 * Update PC and hold onto current PC in case there is
1076 * an error and we want to rollback the PC
1077 */
1078 curr_pc = vcpu->arch.pc;
1079 er = update_pc(vcpu, cause);
1080 if (er == EMULATE_FAIL)
1081 return er;
1082
1083 base = inst.i_format.rs;
1084 op_inst = inst.i_format.rt;
1085 if (cpu_has_mips_r6)
1086 offset = inst.spec3_format.simmediate;
1087 else
1088 offset = inst.i_format.simmediate;
1089 cache = op_inst & CacheOp_Cache;
1090 op = op_inst & CacheOp_Op;
1091
1092 va = arch->gprs[base] + offset;
1093
1094 kvm_debug("CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n",
1095 cache, op, base, arch->gprs[base], offset);
1096
1097 /* Secondary or tirtiary cache ops ignored */
1098 if (cache != Cache_I && cache != Cache_D)
1099 return EMULATE_DONE;
1100
1101 switch (op_inst) {
1102 case Index_Invalidate_I:
1103 flush_icache_line_indexed(va);
1104 return EMULATE_DONE;
1105 case Index_Writeback_Inv_D:
1106 flush_dcache_line_indexed(va);
1107 return EMULATE_DONE;
1108 case Hit_Invalidate_I:
1109 case Hit_Invalidate_D:
1110 case Hit_Writeback_Inv_D:
1111 if (boot_cpu_type() == CPU_CAVIUM_OCTEON3) {
1112 /* We can just flush entire icache */
1113 local_flush_icache_range(0, 0);
1114 return EMULATE_DONE;
1115 }
1116
1117 /* So far, other platforms support guest hit cache ops */
1118 break;
1119 default:
1120 break;
1121 };
1122
1123 kvm_err("@ %#lx/%#lx CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n",
1124 curr_pc, vcpu->arch.gprs[31], cache, op, base, arch->gprs[base],
1125 offset);
1126 /* Rollback PC */
1127 vcpu->arch.pc = curr_pc;
1128
1129 return EMULATE_FAIL;
1130}
1131
1132static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc,
1133 struct kvm_vcpu *vcpu)
1134{
1135 enum emulation_result er = EMULATE_DONE;
1136 struct kvm_vcpu_arch *arch = &vcpu->arch;
1137 struct kvm_run *run = vcpu->run;
1138 union mips_instruction inst;
1139 int rd, rt, sel;
1140 int err;
1141
1142 /*
1143 * Fetch the instruction.
1144 */
1145 if (cause & CAUSEF_BD)
1146 opc += 1;
1147 err = kvm_get_badinstr(opc, vcpu, &inst.word);
1148 if (err)
1149 return EMULATE_FAIL;
1150
1151 switch (inst.r_format.opcode) {
1152 case cop0_op:
1153 er = kvm_vz_gpsi_cop0(inst, opc, cause, run, vcpu);
1154 break;
1155#ifndef CONFIG_CPU_MIPSR6
1156 case cache_op:
1157 trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE);
1158 er = kvm_vz_gpsi_cache(inst, opc, cause, run, vcpu);
1159 break;
1160#endif
1161 case spec3_op:
1162 switch (inst.spec3_format.func) {
1163#ifdef CONFIG_CPU_MIPSR6
1164 case cache6_op:
1165 trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE);
1166 er = kvm_vz_gpsi_cache(inst, opc, cause, run, vcpu);
1167 break;
1168#endif
1169 case rdhwr_op:
1170 if (inst.r_format.rs || (inst.r_format.re >> 3))
1171 goto unknown;
1172
1173 rd = inst.r_format.rd;
1174 rt = inst.r_format.rt;
1175 sel = inst.r_format.re & 0x7;
1176
1177 switch (rd) {
1178 case MIPS_HWR_CC: /* Read count register */
1179 arch->gprs[rt] =
1180 (long)(int)kvm_mips_read_count(vcpu);
1181 break;
1182 default:
1183 trace_kvm_hwr(vcpu, KVM_TRACE_RDHWR,
1184 KVM_TRACE_HWR(rd, sel), 0);
1185 goto unknown;
1186 };
1187
1188 trace_kvm_hwr(vcpu, KVM_TRACE_RDHWR,
1189 KVM_TRACE_HWR(rd, sel), arch->gprs[rt]);
1190
1191 er = update_pc(vcpu, cause);
1192 break;
1193 default:
1194 goto unknown;
1195 };
1196 break;
1197unknown:
1198
1199 default:
1200 kvm_err("GPSI exception not supported (%p/%#x)\n",
1201 opc, inst.word);
1202 kvm_arch_vcpu_dump_regs(vcpu);
1203 er = EMULATE_FAIL;
1204 break;
1205 }
1206
1207 return er;
1208}
1209
1210static enum emulation_result kvm_trap_vz_handle_gsfc(u32 cause, u32 *opc,
1211 struct kvm_vcpu *vcpu)
1212{
1213 enum emulation_result er = EMULATE_DONE;
1214 struct kvm_vcpu_arch *arch = &vcpu->arch;
1215 union mips_instruction inst;
1216 int err;
1217
1218 /*
1219 * Fetch the instruction.
1220 */
1221 if (cause & CAUSEF_BD)
1222 opc += 1;
1223 err = kvm_get_badinstr(opc, vcpu, &inst.word);
1224 if (err)
1225 return EMULATE_FAIL;
1226
1227 /* complete MTC0 on behalf of guest and advance EPC */
1228 if (inst.c0r_format.opcode == cop0_op &&
1229 inst.c0r_format.rs == mtc_op &&
1230 inst.c0r_format.z == 0) {
1231 int rt = inst.c0r_format.rt;
1232 int rd = inst.c0r_format.rd;
1233 int sel = inst.c0r_format.sel;
1234 unsigned int val = arch->gprs[rt];
1235 unsigned int old_val, change;
1236
1237 trace_kvm_hwr(vcpu, KVM_TRACE_MTC0, KVM_TRACE_COP0(rd, sel),
1238 val);
1239
1240 if ((rd == MIPS_CP0_STATUS) && (sel == 0)) {
1241 /* FR bit should read as zero if no FPU */
1242 if (!kvm_mips_guest_has_fpu(&vcpu->arch))
1243 val &= ~(ST0_CU1 | ST0_FR);
1244
1245 /*
1246 * Also don't allow FR to be set if host doesn't support
1247 * it.
1248 */
1249 if (!(boot_cpu_data.fpu_id & MIPS_FPIR_F64))
1250 val &= ~ST0_FR;
1251
1252 old_val = read_gc0_status();
1253 change = val ^ old_val;
1254
1255 if (change & ST0_FR) {
1256 /*
1257 * FPU and Vector register state is made
1258 * UNPREDICTABLE by a change of FR, so don't
1259 * even bother saving it.
1260 */
1261 kvm_drop_fpu(vcpu);
1262 }
1263
1264 /*
1265 * If MSA state is already live, it is undefined how it
1266 * interacts with FR=0 FPU state, and we don't want to
1267 * hit reserved instruction exceptions trying to save
1268 * the MSA state later when CU=1 && FR=1, so play it
1269 * safe and save it first.
1270 */
1271 if (change & ST0_CU1 && !(val & ST0_FR) &&
1272 vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA)
1273 kvm_lose_fpu(vcpu);
1274
1275 write_gc0_status(val);
1276 } else if ((rd == MIPS_CP0_CAUSE) && (sel == 0)) {
1277 u32 old_cause = read_gc0_cause();
1278 u32 change = old_cause ^ val;
1279
1280 /* DC bit enabling/disabling timer? */
1281 if (change & CAUSEF_DC) {
1282 if (val & CAUSEF_DC) {
1283 kvm_vz_lose_htimer(vcpu);
1284 kvm_mips_count_disable_cause(vcpu);
1285 } else {
1286 kvm_mips_count_enable_cause(vcpu);
1287 }
1288 }
1289
1290 /* Only certain bits are RW to the guest */
1291 change &= (CAUSEF_DC | CAUSEF_IV | CAUSEF_WP |
1292 CAUSEF_IP0 | CAUSEF_IP1);
1293
1294 /* WP can only be cleared */
1295 change &= ~CAUSEF_WP | old_cause;
1296
1297 write_gc0_cause(old_cause ^ change);
1298 } else if ((rd == MIPS_CP0_STATUS) && (sel == 1)) { /* IntCtl */
1299 write_gc0_intctl(val);
1300 } else if ((rd == MIPS_CP0_CONFIG) && (sel == 5)) {
1301 old_val = read_gc0_config5();
1302 change = val ^ old_val;
1303 /* Handle changes in FPU/MSA modes */
1304 preempt_disable();
1305
1306 /*
1307 * Propagate FRE changes immediately if the FPU
1308 * context is already loaded.
1309 */
1310 if (change & MIPS_CONF5_FRE &&
1311 vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU)
1312 change_c0_config5(MIPS_CONF5_FRE, val);
1313
1314 preempt_enable();
1315
1316 val = old_val ^
1317 (change & kvm_vz_config5_guest_wrmask(vcpu));
1318 write_gc0_config5(val);
1319 } else {
1320 kvm_err("Handle GSFC, unsupported field change @ %p: %#x\n",
1321 opc, inst.word);
1322 er = EMULATE_FAIL;
1323 }
1324
1325 if (er != EMULATE_FAIL)
1326 er = update_pc(vcpu, cause);
1327 } else {
1328 kvm_err("Handle GSFC, unrecognized instruction @ %p: %#x\n",
1329 opc, inst.word);
1330 er = EMULATE_FAIL;
1331 }
1332
1333 return er;
1334}
1335
1336static enum emulation_result kvm_trap_vz_handle_ghfc(u32 cause, u32 *opc,
1337 struct kvm_vcpu *vcpu)
1338{
1339 /*
1340 * Presumably this is due to MC (guest mode change), so lets trace some
1341 * relevant info.
1342 */
1343 trace_kvm_guest_mode_change(vcpu);
1344
1345 return EMULATE_DONE;
1346}
1347
1348static enum emulation_result kvm_trap_vz_handle_hc(u32 cause, u32 *opc,
1349 struct kvm_vcpu *vcpu)
1350{
1351 enum emulation_result er;
1352 union mips_instruction inst;
1353 unsigned long curr_pc;
1354 int err;
1355
1356 if (cause & CAUSEF_BD)
1357 opc += 1;
1358 err = kvm_get_badinstr(opc, vcpu, &inst.word);
1359 if (err)
1360 return EMULATE_FAIL;
1361
1362 /*
1363 * Update PC and hold onto current PC in case there is
1364 * an error and we want to rollback the PC
1365 */
1366 curr_pc = vcpu->arch.pc;
1367 er = update_pc(vcpu, cause);
1368 if (er == EMULATE_FAIL)
1369 return er;
1370
1371 er = kvm_mips_emul_hypcall(vcpu, inst);
1372 if (er == EMULATE_FAIL)
1373 vcpu->arch.pc = curr_pc;
1374
1375 return er;
1376}
1377
1378static enum emulation_result kvm_trap_vz_no_handler_guest_exit(u32 gexccode,
1379 u32 cause,
1380 u32 *opc,
1381 struct kvm_vcpu *vcpu)
1382{
1383 u32 inst;
1384
1385 /*
1386 * Fetch the instruction.
1387 */
1388 if (cause & CAUSEF_BD)
1389 opc += 1;
1390 kvm_get_badinstr(opc, vcpu, &inst);
1391
1392 kvm_err("Guest Exception Code: %d not yet handled @ PC: %p, inst: 0x%08x Status: %#x\n",
1393 gexccode, opc, inst, read_gc0_status());
1394
1395 return EMULATE_FAIL;
1396}
1397
1398static int kvm_trap_vz_handle_guest_exit(struct kvm_vcpu *vcpu)
1399{
1400 u32 *opc = (u32 *) vcpu->arch.pc;
1401 u32 cause = vcpu->arch.host_cp0_cause;
1402 enum emulation_result er = EMULATE_DONE;
1403 u32 gexccode = (vcpu->arch.host_cp0_guestctl0 &
1404 MIPS_GCTL0_GEXC) >> MIPS_GCTL0_GEXC_SHIFT;
1405 int ret = RESUME_GUEST;
1406
1407 trace_kvm_exit(vcpu, KVM_TRACE_EXIT_GEXCCODE_BASE + gexccode);
1408 switch (gexccode) {
1409 case MIPS_GCTL0_GEXC_GPSI:
1410 ++vcpu->stat.vz_gpsi_exits;
1411 er = kvm_trap_vz_handle_gpsi(cause, opc, vcpu);
1412 break;
1413 case MIPS_GCTL0_GEXC_GSFC:
1414 ++vcpu->stat.vz_gsfc_exits;
1415 er = kvm_trap_vz_handle_gsfc(cause, opc, vcpu);
1416 break;
1417 case MIPS_GCTL0_GEXC_HC:
1418 ++vcpu->stat.vz_hc_exits;
1419 er = kvm_trap_vz_handle_hc(cause, opc, vcpu);
1420 break;
1421 case MIPS_GCTL0_GEXC_GRR:
1422 ++vcpu->stat.vz_grr_exits;
1423 er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc,
1424 vcpu);
1425 break;
1426 case MIPS_GCTL0_GEXC_GVA:
1427 ++vcpu->stat.vz_gva_exits;
1428 er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc,
1429 vcpu);
1430 break;
1431 case MIPS_GCTL0_GEXC_GHFC:
1432 ++vcpu->stat.vz_ghfc_exits;
1433 er = kvm_trap_vz_handle_ghfc(cause, opc, vcpu);
1434 break;
1435 case MIPS_GCTL0_GEXC_GPA:
1436 ++vcpu->stat.vz_gpa_exits;
1437 er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc,
1438 vcpu);
1439 break;
1440 default:
1441 ++vcpu->stat.vz_resvd_exits;
1442 er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc,
1443 vcpu);
1444 break;
1445
1446 }
1447
1448 if (er == EMULATE_DONE) {
1449 ret = RESUME_GUEST;
1450 } else if (er == EMULATE_HYPERCALL) {
1451 ret = kvm_mips_handle_hypcall(vcpu);
1452 } else {
1453 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1454 ret = RESUME_HOST;
1455 }
1456 return ret;
1457}
1458
1459/**
1460 * kvm_trap_vz_handle_cop_unusuable() - Guest used unusable coprocessor.
1461 * @vcpu: Virtual CPU context.
1462 *
1463 * Handle when the guest attempts to use a coprocessor which hasn't been allowed
1464 * by the root context.
1465 */
1466static int kvm_trap_vz_handle_cop_unusable(struct kvm_vcpu *vcpu)
1467{
1468 struct kvm_run *run = vcpu->run;
1469 u32 cause = vcpu->arch.host_cp0_cause;
1470 enum emulation_result er = EMULATE_FAIL;
1471 int ret = RESUME_GUEST;
1472
1473 if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1) {
1474 /*
1475 * If guest FPU not present, the FPU operation should have been
1476 * treated as a reserved instruction!
1477 * If FPU already in use, we shouldn't get this at all.
1478 */
1479 if (WARN_ON(!kvm_mips_guest_has_fpu(&vcpu->arch) ||
1480 vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU)) {
1481 preempt_enable();
1482 return EMULATE_FAIL;
1483 }
1484
1485 kvm_own_fpu(vcpu);
1486 er = EMULATE_DONE;
1487 }
1488 /* other coprocessors not handled */
1489
1490 switch (er) {
1491 case EMULATE_DONE:
1492 ret = RESUME_GUEST;
1493 break;
1494
1495 case EMULATE_FAIL:
1496 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1497 ret = RESUME_HOST;
1498 break;
1499
1500 default:
1501 BUG();
1502 }
1503 return ret;
1504}
1505
1506/**
1507 * kvm_trap_vz_handle_msa_disabled() - Guest used MSA while disabled in root.
1508 * @vcpu: Virtual CPU context.
1509 *
1510 * Handle when the guest attempts to use MSA when it is disabled in the root
1511 * context.
1512 */
1513static int kvm_trap_vz_handle_msa_disabled(struct kvm_vcpu *vcpu)
1514{
1515 struct kvm_run *run = vcpu->run;
1516
1517 /*
1518 * If MSA not present or not exposed to guest or FR=0, the MSA operation
1519 * should have been treated as a reserved instruction!
1520 * Same if CU1=1, FR=0.
1521 * If MSA already in use, we shouldn't get this at all.
1522 */
1523 if (!kvm_mips_guest_has_msa(&vcpu->arch) ||
1524 (read_gc0_status() & (ST0_CU1 | ST0_FR)) == ST0_CU1 ||
1525 !(read_gc0_config5() & MIPS_CONF5_MSAEN) ||
1526 vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) {
1527 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1528 return RESUME_HOST;
1529 }
1530
1531 kvm_own_msa(vcpu);
1532
1533 return RESUME_GUEST;
1534}
1535
1536static int kvm_trap_vz_handle_tlb_ld_miss(struct kvm_vcpu *vcpu)
1537{
1538 struct kvm_run *run = vcpu->run;
1539 u32 *opc = (u32 *) vcpu->arch.pc;
1540 u32 cause = vcpu->arch.host_cp0_cause;
1541 ulong badvaddr = vcpu->arch.host_cp0_badvaddr;
1542 union mips_instruction inst;
1543 enum emulation_result er = EMULATE_DONE;
1544 int err, ret = RESUME_GUEST;
1545
1546 if (kvm_mips_handle_vz_root_tlb_fault(badvaddr, vcpu, false)) {
1547 /* A code fetch fault doesn't count as an MMIO */
1548 if (kvm_is_ifetch_fault(&vcpu->arch)) {
1549 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1550 return RESUME_HOST;
1551 }
1552
1553 /* Fetch the instruction */
1554 if (cause & CAUSEF_BD)
1555 opc += 1;
1556 err = kvm_get_badinstr(opc, vcpu, &inst.word);
1557 if (err) {
1558 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1559 return RESUME_HOST;
1560 }
1561
1562 /* Treat as MMIO */
1563 er = kvm_mips_emulate_load(inst, cause, run, vcpu);
1564 if (er == EMULATE_FAIL) {
1565 kvm_err("Guest Emulate Load from MMIO space failed: PC: %p, BadVaddr: %#lx\n",
1566 opc, badvaddr);
1567 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1568 }
1569 }
1570
1571 if (er == EMULATE_DONE) {
1572 ret = RESUME_GUEST;
1573 } else if (er == EMULATE_DO_MMIO) {
1574 run->exit_reason = KVM_EXIT_MMIO;
1575 ret = RESUME_HOST;
1576 } else {
1577 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1578 ret = RESUME_HOST;
1579 }
1580 return ret;
1581}
1582
1583static int kvm_trap_vz_handle_tlb_st_miss(struct kvm_vcpu *vcpu)
1584{
1585 struct kvm_run *run = vcpu->run;
1586 u32 *opc = (u32 *) vcpu->arch.pc;
1587 u32 cause = vcpu->arch.host_cp0_cause;
1588 ulong badvaddr = vcpu->arch.host_cp0_badvaddr;
1589 union mips_instruction inst;
1590 enum emulation_result er = EMULATE_DONE;
1591 int err;
1592 int ret = RESUME_GUEST;
1593
1594 /* Just try the access again if we couldn't do the translation */
1595 if (kvm_vz_badvaddr_to_gpa(vcpu, badvaddr, &badvaddr))
1596 return RESUME_GUEST;
1597 vcpu->arch.host_cp0_badvaddr = badvaddr;
1598
1599 if (kvm_mips_handle_vz_root_tlb_fault(badvaddr, vcpu, true)) {
1600 /* Fetch the instruction */
1601 if (cause & CAUSEF_BD)
1602 opc += 1;
1603 err = kvm_get_badinstr(opc, vcpu, &inst.word);
1604 if (err) {
1605 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1606 return RESUME_HOST;
1607 }
1608
1609 /* Treat as MMIO */
1610 er = kvm_mips_emulate_store(inst, cause, run, vcpu);
1611 if (er == EMULATE_FAIL) {
1612 kvm_err("Guest Emulate Store to MMIO space failed: PC: %p, BadVaddr: %#lx\n",
1613 opc, badvaddr);
1614 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1615 }
1616 }
1617
1618 if (er == EMULATE_DONE) {
1619 ret = RESUME_GUEST;
1620 } else if (er == EMULATE_DO_MMIO) {
1621 run->exit_reason = KVM_EXIT_MMIO;
1622 ret = RESUME_HOST;
1623 } else {
1624 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1625 ret = RESUME_HOST;
1626 }
1627 return ret;
1628}
1629
1630static u64 kvm_vz_get_one_regs[] = {
1631 KVM_REG_MIPS_CP0_INDEX,
1632 KVM_REG_MIPS_CP0_ENTRYLO0,
1633 KVM_REG_MIPS_CP0_ENTRYLO1,
1634 KVM_REG_MIPS_CP0_CONTEXT,
1635 KVM_REG_MIPS_CP0_PAGEMASK,
1636 KVM_REG_MIPS_CP0_PAGEGRAIN,
1637 KVM_REG_MIPS_CP0_WIRED,
1638 KVM_REG_MIPS_CP0_HWRENA,
1639 KVM_REG_MIPS_CP0_BADVADDR,
1640 KVM_REG_MIPS_CP0_COUNT,
1641 KVM_REG_MIPS_CP0_ENTRYHI,
1642 KVM_REG_MIPS_CP0_COMPARE,
1643 KVM_REG_MIPS_CP0_STATUS,
1644 KVM_REG_MIPS_CP0_INTCTL,
1645 KVM_REG_MIPS_CP0_CAUSE,
1646 KVM_REG_MIPS_CP0_EPC,
1647 KVM_REG_MIPS_CP0_PRID,
1648 KVM_REG_MIPS_CP0_EBASE,
1649 KVM_REG_MIPS_CP0_CONFIG,
1650 KVM_REG_MIPS_CP0_CONFIG1,
1651 KVM_REG_MIPS_CP0_CONFIG2,
1652 KVM_REG_MIPS_CP0_CONFIG3,
1653 KVM_REG_MIPS_CP0_CONFIG4,
1654 KVM_REG_MIPS_CP0_CONFIG5,
1655#ifdef CONFIG_64BIT
1656 KVM_REG_MIPS_CP0_XCONTEXT,
1657#endif
1658 KVM_REG_MIPS_CP0_ERROREPC,
1659
1660 KVM_REG_MIPS_COUNT_CTL,
1661 KVM_REG_MIPS_COUNT_RESUME,
1662 KVM_REG_MIPS_COUNT_HZ,
1663};
1664
1665static u64 kvm_vz_get_one_regs_contextconfig[] = {
1666 KVM_REG_MIPS_CP0_CONTEXTCONFIG,
1667#ifdef CONFIG_64BIT
1668 KVM_REG_MIPS_CP0_XCONTEXTCONFIG,
1669#endif
1670};
1671
1672static u64 kvm_vz_get_one_regs_segments[] = {
1673 KVM_REG_MIPS_CP0_SEGCTL0,
1674 KVM_REG_MIPS_CP0_SEGCTL1,
1675 KVM_REG_MIPS_CP0_SEGCTL2,
1676};
1677
1678static u64 kvm_vz_get_one_regs_htw[] = {
1679 KVM_REG_MIPS_CP0_PWBASE,
1680 KVM_REG_MIPS_CP0_PWFIELD,
1681 KVM_REG_MIPS_CP0_PWSIZE,
1682 KVM_REG_MIPS_CP0_PWCTL,
1683};
1684
1685static u64 kvm_vz_get_one_regs_kscratch[] = {
1686 KVM_REG_MIPS_CP0_KSCRATCH1,
1687 KVM_REG_MIPS_CP0_KSCRATCH2,
1688 KVM_REG_MIPS_CP0_KSCRATCH3,
1689 KVM_REG_MIPS_CP0_KSCRATCH4,
1690 KVM_REG_MIPS_CP0_KSCRATCH5,
1691 KVM_REG_MIPS_CP0_KSCRATCH6,
1692};
1693
1694static unsigned long kvm_vz_num_regs(struct kvm_vcpu *vcpu)
1695{
1696 unsigned long ret;
1697
1698 ret = ARRAY_SIZE(kvm_vz_get_one_regs);
1699 if (cpu_guest_has_userlocal)
1700 ++ret;
1701 if (cpu_guest_has_badinstr)
1702 ++ret;
1703 if (cpu_guest_has_badinstrp)
1704 ++ret;
1705 if (cpu_guest_has_contextconfig)
1706 ret += ARRAY_SIZE(kvm_vz_get_one_regs_contextconfig);
1707 if (cpu_guest_has_segments)
1708 ret += ARRAY_SIZE(kvm_vz_get_one_regs_segments);
1709 if (cpu_guest_has_htw)
1710 ret += ARRAY_SIZE(kvm_vz_get_one_regs_htw);
1711 if (cpu_guest_has_maar && !cpu_guest_has_dyn_maar)
1712 ret += 1 + ARRAY_SIZE(vcpu->arch.maar);
1713 ret += __arch_hweight8(cpu_data[0].guest.kscratch_mask);
1714
1715 return ret;
1716}
1717
1718static int kvm_vz_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices)
1719{
1720 u64 index;
1721 unsigned int i;
1722
1723 if (copy_to_user(indices, kvm_vz_get_one_regs,
1724 sizeof(kvm_vz_get_one_regs)))
1725 return -EFAULT;
1726 indices += ARRAY_SIZE(kvm_vz_get_one_regs);
1727
1728 if (cpu_guest_has_userlocal) {
1729 index = KVM_REG_MIPS_CP0_USERLOCAL;
1730 if (copy_to_user(indices, &index, sizeof(index)))
1731 return -EFAULT;
1732 ++indices;
1733 }
1734 if (cpu_guest_has_badinstr) {
1735 index = KVM_REG_MIPS_CP0_BADINSTR;
1736 if (copy_to_user(indices, &index, sizeof(index)))
1737 return -EFAULT;
1738 ++indices;
1739 }
1740 if (cpu_guest_has_badinstrp) {
1741 index = KVM_REG_MIPS_CP0_BADINSTRP;
1742 if (copy_to_user(indices, &index, sizeof(index)))
1743 return -EFAULT;
1744 ++indices;
1745 }
1746 if (cpu_guest_has_contextconfig) {
1747 if (copy_to_user(indices, kvm_vz_get_one_regs_contextconfig,
1748 sizeof(kvm_vz_get_one_regs_contextconfig)))
1749 return -EFAULT;
1750 indices += ARRAY_SIZE(kvm_vz_get_one_regs_contextconfig);
1751 }
1752 if (cpu_guest_has_segments) {
1753 if (copy_to_user(indices, kvm_vz_get_one_regs_segments,
1754 sizeof(kvm_vz_get_one_regs_segments)))
1755 return -EFAULT;
1756 indices += ARRAY_SIZE(kvm_vz_get_one_regs_segments);
1757 }
1758 if (cpu_guest_has_htw) {
1759 if (copy_to_user(indices, kvm_vz_get_one_regs_htw,
1760 sizeof(kvm_vz_get_one_regs_htw)))
1761 return -EFAULT;
1762 indices += ARRAY_SIZE(kvm_vz_get_one_regs_htw);
1763 }
1764 if (cpu_guest_has_maar && !cpu_guest_has_dyn_maar) {
1765 for (i = 0; i < ARRAY_SIZE(vcpu->arch.maar); ++i) {
1766 index = KVM_REG_MIPS_CP0_MAAR(i);
1767 if (copy_to_user(indices, &index, sizeof(index)))
1768 return -EFAULT;
1769 ++indices;
1770 }
1771
1772 index = KVM_REG_MIPS_CP0_MAARI;
1773 if (copy_to_user(indices, &index, sizeof(index)))
1774 return -EFAULT;
1775 ++indices;
1776 }
1777 for (i = 0; i < 6; ++i) {
1778 if (!cpu_guest_has_kscr(i + 2))
1779 continue;
1780
1781 if (copy_to_user(indices, &kvm_vz_get_one_regs_kscratch[i],
1782 sizeof(kvm_vz_get_one_regs_kscratch[i])))
1783 return -EFAULT;
1784 ++indices;
1785 }
1786
1787 return 0;
1788}
1789
1790static inline s64 entrylo_kvm_to_user(unsigned long v)
1791{
1792 s64 mask, ret = v;
1793
1794 if (BITS_PER_LONG == 32) {
1795 /*
1796 * KVM API exposes 64-bit version of the register, so move the
1797 * RI/XI bits up into place.
1798 */
1799 mask = MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI;
1800 ret &= ~mask;
1801 ret |= ((s64)v & mask) << 32;
1802 }
1803 return ret;
1804}
1805
1806static inline unsigned long entrylo_user_to_kvm(s64 v)
1807{
1808 unsigned long mask, ret = v;
1809
1810 if (BITS_PER_LONG == 32) {
1811 /*
1812 * KVM API exposes 64-bit versiono of the register, so move the
1813 * RI/XI bits down into place.
1814 */
1815 mask = MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI;
1816 ret &= ~mask;
1817 ret |= (v >> 32) & mask;
1818 }
1819 return ret;
1820}
1821
1822static int kvm_vz_get_one_reg(struct kvm_vcpu *vcpu,
1823 const struct kvm_one_reg *reg,
1824 s64 *v)
1825{
1826 struct mips_coproc *cop0 = vcpu->arch.cop0;
1827 unsigned int idx;
1828
1829 switch (reg->id) {
1830 case KVM_REG_MIPS_CP0_INDEX:
1831 *v = (long)read_gc0_index();
1832 break;
1833 case KVM_REG_MIPS_CP0_ENTRYLO0:
1834 *v = entrylo_kvm_to_user(read_gc0_entrylo0());
1835 break;
1836 case KVM_REG_MIPS_CP0_ENTRYLO1:
1837 *v = entrylo_kvm_to_user(read_gc0_entrylo1());
1838 break;
1839 case KVM_REG_MIPS_CP0_CONTEXT:
1840 *v = (long)read_gc0_context();
1841 break;
1842 case KVM_REG_MIPS_CP0_CONTEXTCONFIG:
1843 if (!cpu_guest_has_contextconfig)
1844 return -EINVAL;
1845 *v = read_gc0_contextconfig();
1846 break;
1847 case KVM_REG_MIPS_CP0_USERLOCAL:
1848 if (!cpu_guest_has_userlocal)
1849 return -EINVAL;
1850 *v = read_gc0_userlocal();
1851 break;
1852#ifdef CONFIG_64BIT
1853 case KVM_REG_MIPS_CP0_XCONTEXTCONFIG:
1854 if (!cpu_guest_has_contextconfig)
1855 return -EINVAL;
1856 *v = read_gc0_xcontextconfig();
1857 break;
1858#endif
1859 case KVM_REG_MIPS_CP0_PAGEMASK:
1860 *v = (long)read_gc0_pagemask();
1861 break;
1862 case KVM_REG_MIPS_CP0_PAGEGRAIN:
1863 *v = (long)read_gc0_pagegrain();
1864 break;
1865 case KVM_REG_MIPS_CP0_SEGCTL0:
1866 if (!cpu_guest_has_segments)
1867 return -EINVAL;
1868 *v = read_gc0_segctl0();
1869 break;
1870 case KVM_REG_MIPS_CP0_SEGCTL1:
1871 if (!cpu_guest_has_segments)
1872 return -EINVAL;
1873 *v = read_gc0_segctl1();
1874 break;
1875 case KVM_REG_MIPS_CP0_SEGCTL2:
1876 if (!cpu_guest_has_segments)
1877 return -EINVAL;
1878 *v = read_gc0_segctl2();
1879 break;
1880 case KVM_REG_MIPS_CP0_PWBASE:
1881 if (!cpu_guest_has_htw)
1882 return -EINVAL;
1883 *v = read_gc0_pwbase();
1884 break;
1885 case KVM_REG_MIPS_CP0_PWFIELD:
1886 if (!cpu_guest_has_htw)
1887 return -EINVAL;
1888 *v = read_gc0_pwfield();
1889 break;
1890 case KVM_REG_MIPS_CP0_PWSIZE:
1891 if (!cpu_guest_has_htw)
1892 return -EINVAL;
1893 *v = read_gc0_pwsize();
1894 break;
1895 case KVM_REG_MIPS_CP0_WIRED:
1896 *v = (long)read_gc0_wired();
1897 break;
1898 case KVM_REG_MIPS_CP0_PWCTL:
1899 if (!cpu_guest_has_htw)
1900 return -EINVAL;
1901 *v = read_gc0_pwctl();
1902 break;
1903 case KVM_REG_MIPS_CP0_HWRENA:
1904 *v = (long)read_gc0_hwrena();
1905 break;
1906 case KVM_REG_MIPS_CP0_BADVADDR:
1907 *v = (long)read_gc0_badvaddr();
1908 break;
1909 case KVM_REG_MIPS_CP0_BADINSTR:
1910 if (!cpu_guest_has_badinstr)
1911 return -EINVAL;
1912 *v = read_gc0_badinstr();
1913 break;
1914 case KVM_REG_MIPS_CP0_BADINSTRP:
1915 if (!cpu_guest_has_badinstrp)
1916 return -EINVAL;
1917 *v = read_gc0_badinstrp();
1918 break;
1919 case KVM_REG_MIPS_CP0_COUNT:
1920 *v = kvm_mips_read_count(vcpu);
1921 break;
1922 case KVM_REG_MIPS_CP0_ENTRYHI:
1923 *v = (long)read_gc0_entryhi();
1924 break;
1925 case KVM_REG_MIPS_CP0_COMPARE:
1926 *v = (long)read_gc0_compare();
1927 break;
1928 case KVM_REG_MIPS_CP0_STATUS:
1929 *v = (long)read_gc0_status();
1930 break;
1931 case KVM_REG_MIPS_CP0_INTCTL:
1932 *v = read_gc0_intctl();
1933 break;
1934 case KVM_REG_MIPS_CP0_CAUSE:
1935 *v = (long)read_gc0_cause();
1936 break;
1937 case KVM_REG_MIPS_CP0_EPC:
1938 *v = (long)read_gc0_epc();
1939 break;
1940 case KVM_REG_MIPS_CP0_PRID:
1941 switch (boot_cpu_type()) {
1942 case CPU_CAVIUM_OCTEON3:
1943 /* Octeon III has a read-only guest.PRid */
1944 *v = read_gc0_prid();
1945 break;
1946 default:
1947 *v = (long)kvm_read_c0_guest_prid(cop0);
1948 break;
1949 };
1950 break;
1951 case KVM_REG_MIPS_CP0_EBASE:
1952 *v = kvm_vz_read_gc0_ebase();
1953 break;
1954 case KVM_REG_MIPS_CP0_CONFIG:
1955 *v = read_gc0_config();
1956 break;
1957 case KVM_REG_MIPS_CP0_CONFIG1:
1958 if (!cpu_guest_has_conf1)
1959 return -EINVAL;
1960 *v = read_gc0_config1();
1961 break;
1962 case KVM_REG_MIPS_CP0_CONFIG2:
1963 if (!cpu_guest_has_conf2)
1964 return -EINVAL;
1965 *v = read_gc0_config2();
1966 break;
1967 case KVM_REG_MIPS_CP0_CONFIG3:
1968 if (!cpu_guest_has_conf3)
1969 return -EINVAL;
1970 *v = read_gc0_config3();
1971 break;
1972 case KVM_REG_MIPS_CP0_CONFIG4:
1973 if (!cpu_guest_has_conf4)
1974 return -EINVAL;
1975 *v = read_gc0_config4();
1976 break;
1977 case KVM_REG_MIPS_CP0_CONFIG5:
1978 if (!cpu_guest_has_conf5)
1979 return -EINVAL;
1980 *v = read_gc0_config5();
1981 break;
1982 case KVM_REG_MIPS_CP0_MAAR(0) ... KVM_REG_MIPS_CP0_MAAR(0x3f):
1983 if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar)
1984 return -EINVAL;
1985 idx = reg->id - KVM_REG_MIPS_CP0_MAAR(0);
1986 if (idx >= ARRAY_SIZE(vcpu->arch.maar))
1987 return -EINVAL;
1988 *v = vcpu->arch.maar[idx];
1989 break;
1990 case KVM_REG_MIPS_CP0_MAARI:
1991 if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar)
1992 return -EINVAL;
1993 *v = kvm_read_sw_gc0_maari(vcpu->arch.cop0);
1994 break;
1995#ifdef CONFIG_64BIT
1996 case KVM_REG_MIPS_CP0_XCONTEXT:
1997 *v = read_gc0_xcontext();
1998 break;
1999#endif
2000 case KVM_REG_MIPS_CP0_ERROREPC:
2001 *v = (long)read_gc0_errorepc();
2002 break;
2003 case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6:
2004 idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2;
2005 if (!cpu_guest_has_kscr(idx))
2006 return -EINVAL;
2007 switch (idx) {
2008 case 2:
2009 *v = (long)read_gc0_kscratch1();
2010 break;
2011 case 3:
2012 *v = (long)read_gc0_kscratch2();
2013 break;
2014 case 4:
2015 *v = (long)read_gc0_kscratch3();
2016 break;
2017 case 5:
2018 *v = (long)read_gc0_kscratch4();
2019 break;
2020 case 6:
2021 *v = (long)read_gc0_kscratch5();
2022 break;
2023 case 7:
2024 *v = (long)read_gc0_kscratch6();
2025 break;
2026 }
2027 break;
2028 case KVM_REG_MIPS_COUNT_CTL:
2029 *v = vcpu->arch.count_ctl;
2030 break;
2031 case KVM_REG_MIPS_COUNT_RESUME:
2032 *v = ktime_to_ns(vcpu->arch.count_resume);
2033 break;
2034 case KVM_REG_MIPS_COUNT_HZ:
2035 *v = vcpu->arch.count_hz;
2036 break;
2037 default:
2038 return -EINVAL;
2039 }
2040 return 0;
2041}
2042
2043static int kvm_vz_set_one_reg(struct kvm_vcpu *vcpu,
2044 const struct kvm_one_reg *reg,
2045 s64 v)
2046{
2047 struct mips_coproc *cop0 = vcpu->arch.cop0;
2048 unsigned int idx;
2049 int ret = 0;
2050 unsigned int cur, change;
2051
2052 switch (reg->id) {
2053 case KVM_REG_MIPS_CP0_INDEX:
2054 write_gc0_index(v);
2055 break;
2056 case KVM_REG_MIPS_CP0_ENTRYLO0:
2057 write_gc0_entrylo0(entrylo_user_to_kvm(v));
2058 break;
2059 case KVM_REG_MIPS_CP0_ENTRYLO1:
2060 write_gc0_entrylo1(entrylo_user_to_kvm(v));
2061 break;
2062 case KVM_REG_MIPS_CP0_CONTEXT:
2063 write_gc0_context(v);
2064 break;
2065 case KVM_REG_MIPS_CP0_CONTEXTCONFIG:
2066 if (!cpu_guest_has_contextconfig)
2067 return -EINVAL;
2068 write_gc0_contextconfig(v);
2069 break;
2070 case KVM_REG_MIPS_CP0_USERLOCAL:
2071 if (!cpu_guest_has_userlocal)
2072 return -EINVAL;
2073 write_gc0_userlocal(v);
2074 break;
2075#ifdef CONFIG_64BIT
2076 case KVM_REG_MIPS_CP0_XCONTEXTCONFIG:
2077 if (!cpu_guest_has_contextconfig)
2078 return -EINVAL;
2079 write_gc0_xcontextconfig(v);
2080 break;
2081#endif
2082 case KVM_REG_MIPS_CP0_PAGEMASK:
2083 write_gc0_pagemask(v);
2084 break;
2085 case KVM_REG_MIPS_CP0_PAGEGRAIN:
2086 write_gc0_pagegrain(v);
2087 break;
2088 case KVM_REG_MIPS_CP0_SEGCTL0:
2089 if (!cpu_guest_has_segments)
2090 return -EINVAL;
2091 write_gc0_segctl0(v);
2092 break;
2093 case KVM_REG_MIPS_CP0_SEGCTL1:
2094 if (!cpu_guest_has_segments)
2095 return -EINVAL;
2096 write_gc0_segctl1(v);
2097 break;
2098 case KVM_REG_MIPS_CP0_SEGCTL2:
2099 if (!cpu_guest_has_segments)
2100 return -EINVAL;
2101 write_gc0_segctl2(v);
2102 break;
2103 case KVM_REG_MIPS_CP0_PWBASE:
2104 if (!cpu_guest_has_htw)
2105 return -EINVAL;
2106 write_gc0_pwbase(v);
2107 break;
2108 case KVM_REG_MIPS_CP0_PWFIELD:
2109 if (!cpu_guest_has_htw)
2110 return -EINVAL;
2111 write_gc0_pwfield(v);
2112 break;
2113 case KVM_REG_MIPS_CP0_PWSIZE:
2114 if (!cpu_guest_has_htw)
2115 return -EINVAL;
2116 write_gc0_pwsize(v);
2117 break;
2118 case KVM_REG_MIPS_CP0_WIRED:
2119 change_gc0_wired(MIPSR6_WIRED_WIRED, v);
2120 break;
2121 case KVM_REG_MIPS_CP0_PWCTL:
2122 if (!cpu_guest_has_htw)
2123 return -EINVAL;
2124 write_gc0_pwctl(v);
2125 break;
2126 case KVM_REG_MIPS_CP0_HWRENA:
2127 write_gc0_hwrena(v);
2128 break;
2129 case KVM_REG_MIPS_CP0_BADVADDR:
2130 write_gc0_badvaddr(v);
2131 break;
2132 case KVM_REG_MIPS_CP0_BADINSTR:
2133 if (!cpu_guest_has_badinstr)
2134 return -EINVAL;
2135 write_gc0_badinstr(v);
2136 break;
2137 case KVM_REG_MIPS_CP0_BADINSTRP:
2138 if (!cpu_guest_has_badinstrp)
2139 return -EINVAL;
2140 write_gc0_badinstrp(v);
2141 break;
2142 case KVM_REG_MIPS_CP0_COUNT:
2143 kvm_mips_write_count(vcpu, v);
2144 break;
2145 case KVM_REG_MIPS_CP0_ENTRYHI:
2146 write_gc0_entryhi(v);
2147 break;
2148 case KVM_REG_MIPS_CP0_COMPARE:
2149 kvm_mips_write_compare(vcpu, v, false);
2150 break;
2151 case KVM_REG_MIPS_CP0_STATUS:
2152 write_gc0_status(v);
2153 break;
2154 case KVM_REG_MIPS_CP0_INTCTL:
2155 write_gc0_intctl(v);
2156 break;
2157 case KVM_REG_MIPS_CP0_CAUSE:
2158 /*
2159 * If the timer is stopped or started (DC bit) it must look
2160 * atomic with changes to the timer interrupt pending bit (TI).
2161 * A timer interrupt should not happen in between.
2162 */
2163 if ((read_gc0_cause() ^ v) & CAUSEF_DC) {
2164 if (v & CAUSEF_DC) {
2165 /* disable timer first */
2166 kvm_mips_count_disable_cause(vcpu);
2167 change_gc0_cause((u32)~CAUSEF_DC, v);
2168 } else {
2169 /* enable timer last */
2170 change_gc0_cause((u32)~CAUSEF_DC, v);
2171 kvm_mips_count_enable_cause(vcpu);
2172 }
2173 } else {
2174 write_gc0_cause(v);
2175 }
2176 break;
2177 case KVM_REG_MIPS_CP0_EPC:
2178 write_gc0_epc(v);
2179 break;
2180 case KVM_REG_MIPS_CP0_PRID:
2181 switch (boot_cpu_type()) {
2182 case CPU_CAVIUM_OCTEON3:
2183 /* Octeon III has a guest.PRid, but its read-only */
2184 break;
2185 default:
2186 kvm_write_c0_guest_prid(cop0, v);
2187 break;
2188 };
2189 break;
2190 case KVM_REG_MIPS_CP0_EBASE:
2191 kvm_vz_write_gc0_ebase(v);
2192 break;
2193 case KVM_REG_MIPS_CP0_CONFIG:
2194 cur = read_gc0_config();
2195 change = (cur ^ v) & kvm_vz_config_user_wrmask(vcpu);
2196 if (change) {
2197 v = cur ^ change;
2198 write_gc0_config(v);
2199 }
2200 break;
2201 case KVM_REG_MIPS_CP0_CONFIG1:
2202 if (!cpu_guest_has_conf1)
2203 break;
2204 cur = read_gc0_config1();
2205 change = (cur ^ v) & kvm_vz_config1_user_wrmask(vcpu);
2206 if (change) {
2207 v = cur ^ change;
2208 write_gc0_config1(v);
2209 }
2210 break;
2211 case KVM_REG_MIPS_CP0_CONFIG2:
2212 if (!cpu_guest_has_conf2)
2213 break;
2214 cur = read_gc0_config2();
2215 change = (cur ^ v) & kvm_vz_config2_user_wrmask(vcpu);
2216 if (change) {
2217 v = cur ^ change;
2218 write_gc0_config2(v);
2219 }
2220 break;
2221 case KVM_REG_MIPS_CP0_CONFIG3:
2222 if (!cpu_guest_has_conf3)
2223 break;
2224 cur = read_gc0_config3();
2225 change = (cur ^ v) & kvm_vz_config3_user_wrmask(vcpu);
2226 if (change) {
2227 v = cur ^ change;
2228 write_gc0_config3(v);
2229 }
2230 break;
2231 case KVM_REG_MIPS_CP0_CONFIG4:
2232 if (!cpu_guest_has_conf4)
2233 break;
2234 cur = read_gc0_config4();
2235 change = (cur ^ v) & kvm_vz_config4_user_wrmask(vcpu);
2236 if (change) {
2237 v = cur ^ change;
2238 write_gc0_config4(v);
2239 }
2240 break;
2241 case KVM_REG_MIPS_CP0_CONFIG5:
2242 if (!cpu_guest_has_conf5)
2243 break;
2244 cur = read_gc0_config5();
2245 change = (cur ^ v) & kvm_vz_config5_user_wrmask(vcpu);
2246 if (change) {
2247 v = cur ^ change;
2248 write_gc0_config5(v);
2249 }
2250 break;
2251 case KVM_REG_MIPS_CP0_MAAR(0) ... KVM_REG_MIPS_CP0_MAAR(0x3f):
2252 if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar)
2253 return -EINVAL;
2254 idx = reg->id - KVM_REG_MIPS_CP0_MAAR(0);
2255 if (idx >= ARRAY_SIZE(vcpu->arch.maar))
2256 return -EINVAL;
2257 vcpu->arch.maar[idx] = mips_process_maar(dmtc_op, v);
2258 break;
2259 case KVM_REG_MIPS_CP0_MAARI:
2260 if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar)
2261 return -EINVAL;
2262 kvm_write_maari(vcpu, v);
2263 break;
2264#ifdef CONFIG_64BIT
2265 case KVM_REG_MIPS_CP0_XCONTEXT:
2266 write_gc0_xcontext(v);
2267 break;
2268#endif
2269 case KVM_REG_MIPS_CP0_ERROREPC:
2270 write_gc0_errorepc(v);
2271 break;
2272 case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6:
2273 idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2;
2274 if (!cpu_guest_has_kscr(idx))
2275 return -EINVAL;
2276 switch (idx) {
2277 case 2:
2278 write_gc0_kscratch1(v);
2279 break;
2280 case 3:
2281 write_gc0_kscratch2(v);
2282 break;
2283 case 4:
2284 write_gc0_kscratch3(v);
2285 break;
2286 case 5:
2287 write_gc0_kscratch4(v);
2288 break;
2289 case 6:
2290 write_gc0_kscratch5(v);
2291 break;
2292 case 7:
2293 write_gc0_kscratch6(v);
2294 break;
2295 }
2296 break;
2297 case KVM_REG_MIPS_COUNT_CTL:
2298 ret = kvm_mips_set_count_ctl(vcpu, v);
2299 break;
2300 case KVM_REG_MIPS_COUNT_RESUME:
2301 ret = kvm_mips_set_count_resume(vcpu, v);
2302 break;
2303 case KVM_REG_MIPS_COUNT_HZ:
2304 ret = kvm_mips_set_count_hz(vcpu, v);
2305 break;
2306 default:
2307 return -EINVAL;
2308 }
2309 return ret;
2310}
2311
2312#define guestid_cache(cpu) (cpu_data[cpu].guestid_cache)
2313static void kvm_vz_get_new_guestid(unsigned long cpu, struct kvm_vcpu *vcpu)
2314{
2315 unsigned long guestid = guestid_cache(cpu);
2316
2317 if (!(++guestid & GUESTID_MASK)) {
2318 if (cpu_has_vtag_icache)
2319 flush_icache_all();
2320
2321 if (!guestid) /* fix version if needed */
2322 guestid = GUESTID_FIRST_VERSION;
2323
2324 ++guestid; /* guestid 0 reserved for root */
2325
2326 /* start new guestid cycle */
2327 kvm_vz_local_flush_roottlb_all_guests();
2328 kvm_vz_local_flush_guesttlb_all();
2329 }
2330
2331 guestid_cache(cpu) = guestid;
2332}
2333
2334/* Returns 1 if the guest TLB may be clobbered */
2335static int kvm_vz_check_requests(struct kvm_vcpu *vcpu, int cpu)
2336{
2337 int ret = 0;
2338 int i;
2339
2340 if (!vcpu->requests)
2341 return 0;
2342
2343 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2344 if (cpu_has_guestid) {
2345 /* Drop all GuestIDs for this VCPU */
2346 for_each_possible_cpu(i)
2347 vcpu->arch.vzguestid[i] = 0;
2348 /* This will clobber guest TLB contents too */
2349 ret = 1;
2350 }
2351 /*
2352 * For Root ASID Dealias (RAD) we don't do anything here, but we
2353 * still need the request to ensure we recheck asid_flush_mask.
2354 * We can still return 0 as only the root TLB will be affected
2355 * by a root ASID flush.
2356 */
2357 }
2358
2359 return ret;
2360}
2361
2362static void kvm_vz_vcpu_save_wired(struct kvm_vcpu *vcpu)
2363{
2364 unsigned int wired = read_gc0_wired();
2365 struct kvm_mips_tlb *tlbs;
2366 int i;
2367
2368 /* Expand the wired TLB array if necessary */
2369 wired &= MIPSR6_WIRED_WIRED;
2370 if (wired > vcpu->arch.wired_tlb_limit) {
2371 tlbs = krealloc(vcpu->arch.wired_tlb, wired *
2372 sizeof(*vcpu->arch.wired_tlb), GFP_ATOMIC);
2373 if (WARN_ON(!tlbs)) {
2374 /* Save whatever we can */
2375 wired = vcpu->arch.wired_tlb_limit;
2376 } else {
2377 vcpu->arch.wired_tlb = tlbs;
2378 vcpu->arch.wired_tlb_limit = wired;
2379 }
2380 }
2381
2382 if (wired)
2383 /* Save wired entries from the guest TLB */
2384 kvm_vz_save_guesttlb(vcpu->arch.wired_tlb, 0, wired);
2385 /* Invalidate any dropped entries since last time */
2386 for (i = wired; i < vcpu->arch.wired_tlb_used; ++i) {
2387 vcpu->arch.wired_tlb[i].tlb_hi = UNIQUE_GUEST_ENTRYHI(i);
2388 vcpu->arch.wired_tlb[i].tlb_lo[0] = 0;
2389 vcpu->arch.wired_tlb[i].tlb_lo[1] = 0;
2390 vcpu->arch.wired_tlb[i].tlb_mask = 0;
2391 }
2392 vcpu->arch.wired_tlb_used = wired;
2393}
2394
2395static void kvm_vz_vcpu_load_wired(struct kvm_vcpu *vcpu)
2396{
2397 /* Load wired entries into the guest TLB */
2398 if (vcpu->arch.wired_tlb)
2399 kvm_vz_load_guesttlb(vcpu->arch.wired_tlb, 0,
2400 vcpu->arch.wired_tlb_used);
2401}
2402
2403static void kvm_vz_vcpu_load_tlb(struct kvm_vcpu *vcpu, int cpu)
2404{
2405 struct kvm *kvm = vcpu->kvm;
2406 struct mm_struct *gpa_mm = &kvm->arch.gpa_mm;
2407 bool migrated;
2408
2409 /*
2410 * Are we entering guest context on a different CPU to last time?
2411 * If so, the VCPU's guest TLB state on this CPU may be stale.
2412 */
2413 migrated = (vcpu->arch.last_exec_cpu != cpu);
2414 vcpu->arch.last_exec_cpu = cpu;
2415
2416 /*
2417 * A vcpu's GuestID is set in GuestCtl1.ID when the vcpu is loaded and
2418 * remains set until another vcpu is loaded in. As a rule GuestRID
2419 * remains zeroed when in root context unless the kernel is busy
2420 * manipulating guest tlb entries.
2421 */
2422 if (cpu_has_guestid) {
2423 /*
2424 * Check if our GuestID is of an older version and thus invalid.
2425 *
2426 * We also discard the stored GuestID if we've executed on
2427 * another CPU, as the guest mappings may have changed without
2428 * hypervisor knowledge.
2429 */
2430 if (migrated ||
2431 (vcpu->arch.vzguestid[cpu] ^ guestid_cache(cpu)) &
2432 GUESTID_VERSION_MASK) {
2433 kvm_vz_get_new_guestid(cpu, vcpu);
2434 vcpu->arch.vzguestid[cpu] = guestid_cache(cpu);
2435 trace_kvm_guestid_change(vcpu,
2436 vcpu->arch.vzguestid[cpu]);
2437 }
2438
2439 /* Restore GuestID */
2440 change_c0_guestctl1(GUESTID_MASK, vcpu->arch.vzguestid[cpu]);
2441 } else {
2442 /*
2443 * The Guest TLB only stores a single guest's TLB state, so
2444 * flush it if another VCPU has executed on this CPU.
2445 *
2446 * We also flush if we've executed on another CPU, as the guest
2447 * mappings may have changed without hypervisor knowledge.
2448 */
2449 if (migrated || last_exec_vcpu[cpu] != vcpu)
2450 kvm_vz_local_flush_guesttlb_all();
2451 last_exec_vcpu[cpu] = vcpu;
2452
2453 /*
2454 * Root ASID dealiases guest GPA mappings in the root TLB.
2455 * Allocate new root ASID if needed.
2456 */
2457 if (cpumask_test_and_clear_cpu(cpu, &kvm->arch.asid_flush_mask)
2458 || (cpu_context(cpu, gpa_mm) ^ asid_cache(cpu)) &
2459 asid_version_mask(cpu))
2460 get_new_mmu_context(gpa_mm, cpu);
2461 }
2462}
2463
2464static int kvm_vz_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2465{
2466 struct mips_coproc *cop0 = vcpu->arch.cop0;
2467 bool migrated, all;
2468
2469 /*
2470 * Have we migrated to a different CPU?
2471 * If so, any old guest TLB state may be stale.
2472 */
2473 migrated = (vcpu->arch.last_sched_cpu != cpu);
2474
2475 /*
2476 * Was this the last VCPU to run on this CPU?
2477 * If not, any old guest state from this VCPU will have been clobbered.
2478 */
2479 all = migrated || (last_vcpu[cpu] != vcpu);
2480 last_vcpu[cpu] = vcpu;
2481
2482 /*
2483 * Restore CP0_Wired unconditionally as we clear it after use, and
2484 * restore wired guest TLB entries (while in guest context).
2485 */
2486 kvm_restore_gc0_wired(cop0);
2487 if (current->flags & PF_VCPU) {
2488 tlbw_use_hazard();
2489 kvm_vz_vcpu_load_tlb(vcpu, cpu);
2490 kvm_vz_vcpu_load_wired(vcpu);
2491 }
2492
2493 /*
2494 * Restore timer state regardless, as e.g. Cause.TI can change over time
2495 * if left unmaintained.
2496 */
2497 kvm_vz_restore_timer(vcpu);
2498
2499 /* Set MC bit if we want to trace guest mode changes */
2500 if (kvm_trace_guest_mode_change)
2501 set_c0_guestctl0(MIPS_GCTL0_MC);
2502 else
2503 clear_c0_guestctl0(MIPS_GCTL0_MC);
2504
2505 /* Don't bother restoring registers multiple times unless necessary */
2506 if (!all)
2507 return 0;
2508
2509 /*
2510 * Restore config registers first, as some implementations restrict
2511 * writes to other registers when the corresponding feature bits aren't
2512 * set. For example Status.CU1 cannot be set unless Config1.FP is set.
2513 */
2514 kvm_restore_gc0_config(cop0);
2515 if (cpu_guest_has_conf1)
2516 kvm_restore_gc0_config1(cop0);
2517 if (cpu_guest_has_conf2)
2518 kvm_restore_gc0_config2(cop0);
2519 if (cpu_guest_has_conf3)
2520 kvm_restore_gc0_config3(cop0);
2521 if (cpu_guest_has_conf4)
2522 kvm_restore_gc0_config4(cop0);
2523 if (cpu_guest_has_conf5)
2524 kvm_restore_gc0_config5(cop0);
2525 if (cpu_guest_has_conf6)
2526 kvm_restore_gc0_config6(cop0);
2527 if (cpu_guest_has_conf7)
2528 kvm_restore_gc0_config7(cop0);
2529
2530 kvm_restore_gc0_index(cop0);
2531 kvm_restore_gc0_entrylo0(cop0);
2532 kvm_restore_gc0_entrylo1(cop0);
2533 kvm_restore_gc0_context(cop0);
2534 if (cpu_guest_has_contextconfig)
2535 kvm_restore_gc0_contextconfig(cop0);
2536#ifdef CONFIG_64BIT
2537 kvm_restore_gc0_xcontext(cop0);
2538 if (cpu_guest_has_contextconfig)
2539 kvm_restore_gc0_xcontextconfig(cop0);
2540#endif
2541 kvm_restore_gc0_pagemask(cop0);
2542 kvm_restore_gc0_pagegrain(cop0);
2543 kvm_restore_gc0_hwrena(cop0);
2544 kvm_restore_gc0_badvaddr(cop0);
2545 kvm_restore_gc0_entryhi(cop0);
2546 kvm_restore_gc0_status(cop0);
2547 kvm_restore_gc0_intctl(cop0);
2548 kvm_restore_gc0_epc(cop0);
2549 kvm_vz_write_gc0_ebase(kvm_read_sw_gc0_ebase(cop0));
2550 if (cpu_guest_has_userlocal)
2551 kvm_restore_gc0_userlocal(cop0);
2552
2553 kvm_restore_gc0_errorepc(cop0);
2554
2555 /* restore KScratch registers if enabled in guest */
2556 if (cpu_guest_has_conf4) {
2557 if (cpu_guest_has_kscr(2))
2558 kvm_restore_gc0_kscratch1(cop0);
2559 if (cpu_guest_has_kscr(3))
2560 kvm_restore_gc0_kscratch2(cop0);
2561 if (cpu_guest_has_kscr(4))
2562 kvm_restore_gc0_kscratch3(cop0);
2563 if (cpu_guest_has_kscr(5))
2564 kvm_restore_gc0_kscratch4(cop0);
2565 if (cpu_guest_has_kscr(6))
2566 kvm_restore_gc0_kscratch5(cop0);
2567 if (cpu_guest_has_kscr(7))
2568 kvm_restore_gc0_kscratch6(cop0);
2569 }
2570
2571 if (cpu_guest_has_badinstr)
2572 kvm_restore_gc0_badinstr(cop0);
2573 if (cpu_guest_has_badinstrp)
2574 kvm_restore_gc0_badinstrp(cop0);
2575
2576 if (cpu_guest_has_segments) {
2577 kvm_restore_gc0_segctl0(cop0);
2578 kvm_restore_gc0_segctl1(cop0);
2579 kvm_restore_gc0_segctl2(cop0);
2580 }
2581
2582 /* restore HTW registers */
2583 if (cpu_guest_has_htw) {
2584 kvm_restore_gc0_pwbase(cop0);
2585 kvm_restore_gc0_pwfield(cop0);
2586 kvm_restore_gc0_pwsize(cop0);
2587 kvm_restore_gc0_pwctl(cop0);
2588 }
2589
2590 /* restore Root.GuestCtl2 from unused Guest guestctl2 register */
2591 if (cpu_has_guestctl2)
2592 write_c0_guestctl2(
2593 cop0->reg[MIPS_CP0_GUESTCTL2][MIPS_CP0_GUESTCTL2_SEL]);
2594
2595 /*
2596 * We should clear linked load bit to break interrupted atomics. This
2597 * prevents a SC on the next VCPU from succeeding by matching a LL on
2598 * the previous VCPU.
2599 */
2600 if (cpu_guest_has_rw_llb)
2601 write_gc0_lladdr(0);
2602
2603 return 0;
2604}
2605
2606static int kvm_vz_vcpu_put(struct kvm_vcpu *vcpu, int cpu)
2607{
2608 struct mips_coproc *cop0 = vcpu->arch.cop0;
2609
2610 if (current->flags & PF_VCPU)
2611 kvm_vz_vcpu_save_wired(vcpu);
2612
2613 kvm_lose_fpu(vcpu);
2614
2615 kvm_save_gc0_index(cop0);
2616 kvm_save_gc0_entrylo0(cop0);
2617 kvm_save_gc0_entrylo1(cop0);
2618 kvm_save_gc0_context(cop0);
2619 if (cpu_guest_has_contextconfig)
2620 kvm_save_gc0_contextconfig(cop0);
2621#ifdef CONFIG_64BIT
2622 kvm_save_gc0_xcontext(cop0);
2623 if (cpu_guest_has_contextconfig)
2624 kvm_save_gc0_xcontextconfig(cop0);
2625#endif
2626 kvm_save_gc0_pagemask(cop0);
2627 kvm_save_gc0_pagegrain(cop0);
2628 kvm_save_gc0_wired(cop0);
2629 /* allow wired TLB entries to be overwritten */
2630 clear_gc0_wired(MIPSR6_WIRED_WIRED);
2631 kvm_save_gc0_hwrena(cop0);
2632 kvm_save_gc0_badvaddr(cop0);
2633 kvm_save_gc0_entryhi(cop0);
2634 kvm_save_gc0_status(cop0);
2635 kvm_save_gc0_intctl(cop0);
2636 kvm_save_gc0_epc(cop0);
2637 kvm_write_sw_gc0_ebase(cop0, kvm_vz_read_gc0_ebase());
2638 if (cpu_guest_has_userlocal)
2639 kvm_save_gc0_userlocal(cop0);
2640
2641 /* only save implemented config registers */
2642 kvm_save_gc0_config(cop0);
2643 if (cpu_guest_has_conf1)
2644 kvm_save_gc0_config1(cop0);
2645 if (cpu_guest_has_conf2)
2646 kvm_save_gc0_config2(cop0);
2647 if (cpu_guest_has_conf3)
2648 kvm_save_gc0_config3(cop0);
2649 if (cpu_guest_has_conf4)
2650 kvm_save_gc0_config4(cop0);
2651 if (cpu_guest_has_conf5)
2652 kvm_save_gc0_config5(cop0);
2653 if (cpu_guest_has_conf6)
2654 kvm_save_gc0_config6(cop0);
2655 if (cpu_guest_has_conf7)
2656 kvm_save_gc0_config7(cop0);
2657
2658 kvm_save_gc0_errorepc(cop0);
2659
2660 /* save KScratch registers if enabled in guest */
2661 if (cpu_guest_has_conf4) {
2662 if (cpu_guest_has_kscr(2))
2663 kvm_save_gc0_kscratch1(cop0);
2664 if (cpu_guest_has_kscr(3))
2665 kvm_save_gc0_kscratch2(cop0);
2666 if (cpu_guest_has_kscr(4))
2667 kvm_save_gc0_kscratch3(cop0);
2668 if (cpu_guest_has_kscr(5))
2669 kvm_save_gc0_kscratch4(cop0);
2670 if (cpu_guest_has_kscr(6))
2671 kvm_save_gc0_kscratch5(cop0);
2672 if (cpu_guest_has_kscr(7))
2673 kvm_save_gc0_kscratch6(cop0);
2674 }
2675
2676 if (cpu_guest_has_badinstr)
2677 kvm_save_gc0_badinstr(cop0);
2678 if (cpu_guest_has_badinstrp)
2679 kvm_save_gc0_badinstrp(cop0);
2680
2681 if (cpu_guest_has_segments) {
2682 kvm_save_gc0_segctl0(cop0);
2683 kvm_save_gc0_segctl1(cop0);
2684 kvm_save_gc0_segctl2(cop0);
2685 }
2686
2687 /* save HTW registers if enabled in guest */
2688 if (cpu_guest_has_htw &&
2689 kvm_read_sw_gc0_config3(cop0) & MIPS_CONF3_PW) {
2690 kvm_save_gc0_pwbase(cop0);
2691 kvm_save_gc0_pwfield(cop0);
2692 kvm_save_gc0_pwsize(cop0);
2693 kvm_save_gc0_pwctl(cop0);
2694 }
2695
2696 kvm_vz_save_timer(vcpu);
2697
2698 /* save Root.GuestCtl2 in unused Guest guestctl2 register */
2699 if (cpu_has_guestctl2)
2700 cop0->reg[MIPS_CP0_GUESTCTL2][MIPS_CP0_GUESTCTL2_SEL] =
2701 read_c0_guestctl2();
2702
2703 return 0;
2704}
2705
2706/**
2707 * kvm_vz_resize_guest_vtlb() - Attempt to resize guest VTLB.
2708 * @size: Number of guest VTLB entries (0 < @size <= root VTLB entries).
2709 *
2710 * Attempt to resize the guest VTLB by writing guest Config registers. This is
2711 * necessary for cores with a shared root/guest TLB to avoid overlap with wired
2712 * entries in the root VTLB.
2713 *
2714 * Returns: The resulting guest VTLB size.
2715 */
2716static unsigned int kvm_vz_resize_guest_vtlb(unsigned int size)
2717{
2718 unsigned int config4 = 0, ret = 0, limit;
2719
2720 /* Write MMUSize - 1 into guest Config registers */
2721 if (cpu_guest_has_conf1)
2722 change_gc0_config1(MIPS_CONF1_TLBS,
2723 (size - 1) << MIPS_CONF1_TLBS_SHIFT);
2724 if (cpu_guest_has_conf4) {
2725 config4 = read_gc0_config4();
2726 if (cpu_has_mips_r6 || (config4 & MIPS_CONF4_MMUEXTDEF) ==
2727 MIPS_CONF4_MMUEXTDEF_VTLBSIZEEXT) {
2728 config4 &= ~MIPS_CONF4_VTLBSIZEEXT;
2729 config4 |= ((size - 1) >> MIPS_CONF1_TLBS_SIZE) <<
2730 MIPS_CONF4_VTLBSIZEEXT_SHIFT;
2731 } else if ((config4 & MIPS_CONF4_MMUEXTDEF) ==
2732 MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT) {
2733 config4 &= ~MIPS_CONF4_MMUSIZEEXT;
2734 config4 |= ((size - 1) >> MIPS_CONF1_TLBS_SIZE) <<
2735 MIPS_CONF4_MMUSIZEEXT_SHIFT;
2736 }
2737 write_gc0_config4(config4);
2738 }
2739
2740 /*
2741 * Set Guest.Wired.Limit = 0 (no limit up to Guest.MMUSize-1), unless it
2742 * would exceed Root.Wired.Limit (clearing Guest.Wired.Wired so write
2743 * not dropped)
2744 */
2745 if (cpu_has_mips_r6) {
2746 limit = (read_c0_wired() & MIPSR6_WIRED_LIMIT) >>
2747 MIPSR6_WIRED_LIMIT_SHIFT;
2748 if (size - 1 <= limit)
2749 limit = 0;
2750 write_gc0_wired(limit << MIPSR6_WIRED_LIMIT_SHIFT);
2751 }
2752
2753 /* Read back MMUSize - 1 */
2754 back_to_back_c0_hazard();
2755 if (cpu_guest_has_conf1)
2756 ret = (read_gc0_config1() & MIPS_CONF1_TLBS) >>
2757 MIPS_CONF1_TLBS_SHIFT;
2758 if (config4) {
2759 if (cpu_has_mips_r6 || (config4 & MIPS_CONF4_MMUEXTDEF) ==
2760 MIPS_CONF4_MMUEXTDEF_VTLBSIZEEXT)
2761 ret |= ((config4 & MIPS_CONF4_VTLBSIZEEXT) >>
2762 MIPS_CONF4_VTLBSIZEEXT_SHIFT) <<
2763 MIPS_CONF1_TLBS_SIZE;
2764 else if ((config4 & MIPS_CONF4_MMUEXTDEF) ==
2765 MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT)
2766 ret |= ((config4 & MIPS_CONF4_MMUSIZEEXT) >>
2767 MIPS_CONF4_MMUSIZEEXT_SHIFT) <<
2768 MIPS_CONF1_TLBS_SIZE;
2769 }
2770 return ret + 1;
2771}
2772
2773static int kvm_vz_hardware_enable(void)
2774{
2775 unsigned int mmu_size, guest_mmu_size, ftlb_size;
2776 u64 guest_cvmctl, cvmvmconfig;
2777
2778 switch (current_cpu_type()) {
2779 case CPU_CAVIUM_OCTEON3:
2780 /* Set up guest timer/perfcount IRQ lines */
2781 guest_cvmctl = read_gc0_cvmctl();
2782 guest_cvmctl &= ~CVMCTL_IPTI;
2783 guest_cvmctl |= 7ull << CVMCTL_IPTI_SHIFT;
2784 guest_cvmctl &= ~CVMCTL_IPPCI;
2785 guest_cvmctl |= 6ull << CVMCTL_IPPCI_SHIFT;
2786 write_gc0_cvmctl(guest_cvmctl);
2787
2788 cvmvmconfig = read_c0_cvmvmconfig();
2789 /* No I/O hole translation. */
2790 cvmvmconfig |= CVMVMCONF_DGHT;
2791 /* Halve the root MMU size */
2792 mmu_size = ((cvmvmconfig & CVMVMCONF_MMUSIZEM1)
2793 >> CVMVMCONF_MMUSIZEM1_S) + 1;
2794 guest_mmu_size = mmu_size / 2;
2795 mmu_size -= guest_mmu_size;
2796 cvmvmconfig &= ~CVMVMCONF_RMMUSIZEM1;
2797 cvmvmconfig |= mmu_size - 1;
2798 write_c0_cvmvmconfig(cvmvmconfig);
2799
2800 /* Update our records */
2801 current_cpu_data.tlbsize = mmu_size;
2802 current_cpu_data.tlbsizevtlb = mmu_size;
2803 current_cpu_data.guest.tlbsize = guest_mmu_size;
2804
2805 /* Flush moved entries in new (guest) context */
2806 kvm_vz_local_flush_guesttlb_all();
2807 break;
2808 default:
2809 /*
2810 * ImgTec cores tend to use a shared root/guest TLB. To avoid
2811 * overlap of root wired and guest entries, the guest TLB may
2812 * need resizing.
2813 */
2814 mmu_size = current_cpu_data.tlbsizevtlb;
2815 ftlb_size = current_cpu_data.tlbsize - mmu_size;
2816
2817 /* Try switching to maximum guest VTLB size for flush */
2818 guest_mmu_size = kvm_vz_resize_guest_vtlb(mmu_size);
2819 current_cpu_data.guest.tlbsize = guest_mmu_size + ftlb_size;
2820 kvm_vz_local_flush_guesttlb_all();
2821
2822 /*
2823 * Reduce to make space for root wired entries and at least 2
2824 * root non-wired entries. This does assume that long-term wired
2825 * entries won't be added later.
2826 */
2827 guest_mmu_size = mmu_size - num_wired_entries() - 2;
2828 guest_mmu_size = kvm_vz_resize_guest_vtlb(guest_mmu_size);
2829 current_cpu_data.guest.tlbsize = guest_mmu_size + ftlb_size;
2830
2831 /*
2832 * Write the VTLB size, but if another CPU has already written,
2833 * check it matches or we won't provide a consistent view to the
2834 * guest. If this ever happens it suggests an asymmetric number
2835 * of wired entries.
2836 */
2837 if (cmpxchg(&kvm_vz_guest_vtlb_size, 0, guest_mmu_size) &&
2838 WARN(guest_mmu_size != kvm_vz_guest_vtlb_size,
2839 "Available guest VTLB size mismatch"))
2840 return -EINVAL;
2841 break;
2842 }
2843
2844 /*
2845 * Enable virtualization features granting guest direct control of
2846 * certain features:
2847 * CP0=1: Guest coprocessor 0 context.
2848 * AT=Guest: Guest MMU.
2849 * CG=1: Hit (virtual address) CACHE operations (optional).
2850 * CF=1: Guest Config registers.
2851 * CGI=1: Indexed flush CACHE operations (optional).
2852 */
2853 write_c0_guestctl0(MIPS_GCTL0_CP0 |
2854 (MIPS_GCTL0_AT_GUEST << MIPS_GCTL0_AT_SHIFT) |
2855 MIPS_GCTL0_CG | MIPS_GCTL0_CF);
2856 if (cpu_has_guestctl0ext)
2857 set_c0_guestctl0ext(MIPS_GCTL0EXT_CGI);
2858
2859 if (cpu_has_guestid) {
2860 write_c0_guestctl1(0);
2861 kvm_vz_local_flush_roottlb_all_guests();
2862
2863 GUESTID_MASK = current_cpu_data.guestid_mask;
2864 GUESTID_FIRST_VERSION = GUESTID_MASK + 1;
2865 GUESTID_VERSION_MASK = ~GUESTID_MASK;
2866
2867 current_cpu_data.guestid_cache = GUESTID_FIRST_VERSION;
2868 }
2869
2870 /* clear any pending injected virtual guest interrupts */
2871 if (cpu_has_guestctl2)
2872 clear_c0_guestctl2(0x3f << 10);
2873
2874 return 0;
2875}
2876
2877static void kvm_vz_hardware_disable(void)
2878{
2879 u64 cvmvmconfig;
2880 unsigned int mmu_size;
2881
2882 /* Flush any remaining guest TLB entries */
2883 kvm_vz_local_flush_guesttlb_all();
2884
2885 switch (current_cpu_type()) {
2886 case CPU_CAVIUM_OCTEON3:
2887 /*
2888 * Allocate whole TLB for root. Existing guest TLB entries will
2889 * change ownership to the root TLB. We should be safe though as
2890 * they've already been flushed above while in guest TLB.
2891 */
2892 cvmvmconfig = read_c0_cvmvmconfig();
2893 mmu_size = ((cvmvmconfig & CVMVMCONF_MMUSIZEM1)
2894 >> CVMVMCONF_MMUSIZEM1_S) + 1;
2895 cvmvmconfig &= ~CVMVMCONF_RMMUSIZEM1;
2896 cvmvmconfig |= mmu_size - 1;
2897 write_c0_cvmvmconfig(cvmvmconfig);
2898
2899 /* Update our records */
2900 current_cpu_data.tlbsize = mmu_size;
2901 current_cpu_data.tlbsizevtlb = mmu_size;
2902 current_cpu_data.guest.tlbsize = 0;
2903
2904 /* Flush moved entries in new (root) context */
2905 local_flush_tlb_all();
2906 break;
2907 }
2908
2909 if (cpu_has_guestid) {
2910 write_c0_guestctl1(0);
2911 kvm_vz_local_flush_roottlb_all_guests();
2912 }
2913}
2914
2915static int kvm_vz_check_extension(struct kvm *kvm, long ext)
2916{
2917 int r;
2918
2919 switch (ext) {
2920 case KVM_CAP_MIPS_VZ:
2921 /* we wouldn't be here unless cpu_has_vz */
2922 r = 1;
2923 break;
2924#ifdef CONFIG_64BIT
2925 case KVM_CAP_MIPS_64BIT:
2926 /* We support 64-bit registers/operations and addresses */
2927 r = 2;
2928 break;
2929#endif
2930 default:
2931 r = 0;
2932 break;
2933 }
2934
2935 return r;
2936}
2937
2938static int kvm_vz_vcpu_init(struct kvm_vcpu *vcpu)
2939{
2940 int i;
2941
2942 for_each_possible_cpu(i)
2943 vcpu->arch.vzguestid[i] = 0;
2944
2945 return 0;
2946}
2947
2948static void kvm_vz_vcpu_uninit(struct kvm_vcpu *vcpu)
2949{
2950 int cpu;
2951
2952 /*
2953 * If the VCPU is freed and reused as another VCPU, we don't want the
2954 * matching pointer wrongly hanging around in last_vcpu[] or
2955 * last_exec_vcpu[].
2956 */
2957 for_each_possible_cpu(cpu) {
2958 if (last_vcpu[cpu] == vcpu)
2959 last_vcpu[cpu] = NULL;
2960 if (last_exec_vcpu[cpu] == vcpu)
2961 last_exec_vcpu[cpu] = NULL;
2962 }
2963}
2964
2965static int kvm_vz_vcpu_setup(struct kvm_vcpu *vcpu)
2966{
2967 struct mips_coproc *cop0 = vcpu->arch.cop0;
2968 unsigned long count_hz = 100*1000*1000; /* default to 100 MHz */
2969
2970 /*
2971 * Start off the timer at the same frequency as the host timer, but the
2972 * soft timer doesn't handle frequencies greater than 1GHz yet.
2973 */
2974 if (mips_hpt_frequency && mips_hpt_frequency <= NSEC_PER_SEC)
2975 count_hz = mips_hpt_frequency;
2976 kvm_mips_init_count(vcpu, count_hz);
2977
2978 /*
2979 * Initialize guest register state to valid architectural reset state.
2980 */
2981
2982 /* PageGrain */
2983 if (cpu_has_mips_r6)
2984 kvm_write_sw_gc0_pagegrain(cop0, PG_RIE | PG_XIE | PG_IEC);
2985 /* Wired */
2986 if (cpu_has_mips_r6)
2987 kvm_write_sw_gc0_wired(cop0,
2988 read_gc0_wired() & MIPSR6_WIRED_LIMIT);
2989 /* Status */
2990 kvm_write_sw_gc0_status(cop0, ST0_BEV | ST0_ERL);
2991 if (cpu_has_mips_r6)
2992 kvm_change_sw_gc0_status(cop0, ST0_FR, read_gc0_status());
2993 /* IntCtl */
2994 kvm_write_sw_gc0_intctl(cop0, read_gc0_intctl() &
2995 (INTCTLF_IPFDC | INTCTLF_IPPCI | INTCTLF_IPTI));
2996 /* PRId */
2997 kvm_write_sw_gc0_prid(cop0, boot_cpu_data.processor_id);
2998 /* EBase */
2999 kvm_write_sw_gc0_ebase(cop0, (s32)0x80000000 | vcpu->vcpu_id);
3000 /* Config */
3001 kvm_save_gc0_config(cop0);
3002 /* architecturally writable (e.g. from guest) */
3003 kvm_change_sw_gc0_config(cop0, CONF_CM_CMASK,
3004 _page_cachable_default >> _CACHE_SHIFT);
3005 /* architecturally read only, but maybe writable from root */
3006 kvm_change_sw_gc0_config(cop0, MIPS_CONF_MT, read_c0_config());
3007 if (cpu_guest_has_conf1) {
3008 kvm_set_sw_gc0_config(cop0, MIPS_CONF_M);
3009 /* Config1 */
3010 kvm_save_gc0_config1(cop0);
3011 /* architecturally read only, but maybe writable from root */
3012 kvm_clear_sw_gc0_config1(cop0, MIPS_CONF1_C2 |
3013 MIPS_CONF1_MD |
3014 MIPS_CONF1_PC |
3015 MIPS_CONF1_WR |
3016 MIPS_CONF1_CA |
3017 MIPS_CONF1_FP);
3018 }
3019 if (cpu_guest_has_conf2) {
3020 kvm_set_sw_gc0_config1(cop0, MIPS_CONF_M);
3021 /* Config2 */
3022 kvm_save_gc0_config2(cop0);
3023 }
3024 if (cpu_guest_has_conf3) {
3025 kvm_set_sw_gc0_config2(cop0, MIPS_CONF_M);
3026 /* Config3 */
3027 kvm_save_gc0_config3(cop0);
3028 /* architecturally writable (e.g. from guest) */
3029 kvm_clear_sw_gc0_config3(cop0, MIPS_CONF3_ISA_OE);
3030 /* architecturally read only, but maybe writable from root */
3031 kvm_clear_sw_gc0_config3(cop0, MIPS_CONF3_MSA |
3032 MIPS_CONF3_BPG |
3033 MIPS_CONF3_ULRI |
3034 MIPS_CONF3_DSP |
3035 MIPS_CONF3_CTXTC |
3036 MIPS_CONF3_ITL |
3037 MIPS_CONF3_LPA |
3038 MIPS_CONF3_VEIC |
3039 MIPS_CONF3_VINT |
3040 MIPS_CONF3_SP |
3041 MIPS_CONF3_CDMM |
3042 MIPS_CONF3_MT |
3043 MIPS_CONF3_SM |
3044 MIPS_CONF3_TL);
3045 }
3046 if (cpu_guest_has_conf4) {
3047 kvm_set_sw_gc0_config3(cop0, MIPS_CONF_M);
3048 /* Config4 */
3049 kvm_save_gc0_config4(cop0);
3050 }
3051 if (cpu_guest_has_conf5) {
3052 kvm_set_sw_gc0_config4(cop0, MIPS_CONF_M);
3053 /* Config5 */
3054 kvm_save_gc0_config5(cop0);
3055 /* architecturally writable (e.g. from guest) */
3056 kvm_clear_sw_gc0_config5(cop0, MIPS_CONF5_K |
3057 MIPS_CONF5_CV |
3058 MIPS_CONF5_MSAEN |
3059 MIPS_CONF5_UFE |
3060 MIPS_CONF5_FRE |
3061 MIPS_CONF5_SBRI |
3062 MIPS_CONF5_UFR);
3063 /* architecturally read only, but maybe writable from root */
3064 kvm_clear_sw_gc0_config5(cop0, MIPS_CONF5_MRP);
3065 }
3066
3067 if (cpu_guest_has_contextconfig) {
3068 /* ContextConfig */
3069 kvm_write_sw_gc0_contextconfig(cop0, 0x007ffff0);
3070#ifdef CONFIG_64BIT
3071 /* XContextConfig */
3072 /* bits SEGBITS-13+3:4 set */
3073 kvm_write_sw_gc0_xcontextconfig(cop0,
3074 ((1ull << (cpu_vmbits - 13)) - 1) << 4);
3075#endif
3076 }
3077
3078 /* Implementation dependent, use the legacy layout */
3079 if (cpu_guest_has_segments) {
3080 /* SegCtl0, SegCtl1, SegCtl2 */
3081 kvm_write_sw_gc0_segctl0(cop0, 0x00200010);
3082 kvm_write_sw_gc0_segctl1(cop0, 0x00000002 |
3083 (_page_cachable_default >> _CACHE_SHIFT) <<
3084 (16 + MIPS_SEGCFG_C_SHIFT));
3085 kvm_write_sw_gc0_segctl2(cop0, 0x00380438);
3086 }
3087
3088 /* reset HTW registers */
3089 if (cpu_guest_has_htw && cpu_has_mips_r6) {
3090 /* PWField */
3091 kvm_write_sw_gc0_pwfield(cop0, 0x0c30c302);
3092 /* PWSize */
3093 kvm_write_sw_gc0_pwsize(cop0, 1 << MIPS_PWSIZE_PTW_SHIFT);
3094 }
3095
3096 /* start with no pending virtual guest interrupts */
3097 if (cpu_has_guestctl2)
3098 cop0->reg[MIPS_CP0_GUESTCTL2][MIPS_CP0_GUESTCTL2_SEL] = 0;
3099
3100 /* Put PC at reset vector */
3101 vcpu->arch.pc = CKSEG1ADDR(0x1fc00000);
3102
3103 return 0;
3104}
3105
3106static void kvm_vz_flush_shadow_all(struct kvm *kvm)
3107{
3108 if (cpu_has_guestid) {
3109 /* Flush GuestID for each VCPU individually */
3110 kvm_flush_remote_tlbs(kvm);
3111 } else {
3112 /*
3113 * For each CPU there is a single GPA ASID used by all VCPUs in
3114 * the VM, so it doesn't make sense for the VCPUs to handle
3115 * invalidation of these ASIDs individually.
3116 *
3117 * Instead mark all CPUs as needing ASID invalidation in
3118 * asid_flush_mask, and just use kvm_flush_remote_tlbs(kvm) to
3119 * kick any running VCPUs so they check asid_flush_mask.
3120 */
3121 cpumask_setall(&kvm->arch.asid_flush_mask);
3122 kvm_flush_remote_tlbs(kvm);
3123 }
3124}
3125
3126static void kvm_vz_flush_shadow_memslot(struct kvm *kvm,
3127 const struct kvm_memory_slot *slot)
3128{
3129 kvm_vz_flush_shadow_all(kvm);
3130}
3131
3132static void kvm_vz_vcpu_reenter(struct kvm_run *run, struct kvm_vcpu *vcpu)
3133{
3134 int cpu = smp_processor_id();
3135 int preserve_guest_tlb;
3136
3137 preserve_guest_tlb = kvm_vz_check_requests(vcpu, cpu);
3138
3139 if (preserve_guest_tlb)
3140 kvm_vz_vcpu_save_wired(vcpu);
3141
3142 kvm_vz_vcpu_load_tlb(vcpu, cpu);
3143
3144 if (preserve_guest_tlb)
3145 kvm_vz_vcpu_load_wired(vcpu);
3146}
3147
3148static int kvm_vz_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
3149{
3150 int cpu = smp_processor_id();
3151 int r;
3152
3153 kvm_vz_acquire_htimer(vcpu);
3154 /* Check if we have any exceptions/interrupts pending */
3155 kvm_mips_deliver_interrupts(vcpu, read_gc0_cause());
3156
3157 kvm_vz_check_requests(vcpu, cpu);
3158 kvm_vz_vcpu_load_tlb(vcpu, cpu);
3159 kvm_vz_vcpu_load_wired(vcpu);
3160
3161 r = vcpu->arch.vcpu_run(run, vcpu);
3162
3163 kvm_vz_vcpu_save_wired(vcpu);
3164
3165 return r;
3166}
3167
3168static struct kvm_mips_callbacks kvm_vz_callbacks = {
3169 .handle_cop_unusable = kvm_trap_vz_handle_cop_unusable,
3170 .handle_tlb_mod = kvm_trap_vz_handle_tlb_st_miss,
3171 .handle_tlb_ld_miss = kvm_trap_vz_handle_tlb_ld_miss,
3172 .handle_tlb_st_miss = kvm_trap_vz_handle_tlb_st_miss,
3173 .handle_addr_err_st = kvm_trap_vz_no_handler,
3174 .handle_addr_err_ld = kvm_trap_vz_no_handler,
3175 .handle_syscall = kvm_trap_vz_no_handler,
3176 .handle_res_inst = kvm_trap_vz_no_handler,
3177 .handle_break = kvm_trap_vz_no_handler,
3178 .handle_msa_disabled = kvm_trap_vz_handle_msa_disabled,
3179 .handle_guest_exit = kvm_trap_vz_handle_guest_exit,
3180
3181 .hardware_enable = kvm_vz_hardware_enable,
3182 .hardware_disable = kvm_vz_hardware_disable,
3183 .check_extension = kvm_vz_check_extension,
3184 .vcpu_init = kvm_vz_vcpu_init,
3185 .vcpu_uninit = kvm_vz_vcpu_uninit,
3186 .vcpu_setup = kvm_vz_vcpu_setup,
3187 .flush_shadow_all = kvm_vz_flush_shadow_all,
3188 .flush_shadow_memslot = kvm_vz_flush_shadow_memslot,
3189 .gva_to_gpa = kvm_vz_gva_to_gpa_cb,
3190 .queue_timer_int = kvm_vz_queue_timer_int_cb,
3191 .dequeue_timer_int = kvm_vz_dequeue_timer_int_cb,
3192 .queue_io_int = kvm_vz_queue_io_int_cb,
3193 .dequeue_io_int = kvm_vz_dequeue_io_int_cb,
3194 .irq_deliver = kvm_vz_irq_deliver_cb,
3195 .irq_clear = kvm_vz_irq_clear_cb,
3196 .num_regs = kvm_vz_num_regs,
3197 .copy_reg_indices = kvm_vz_copy_reg_indices,
3198 .get_one_reg = kvm_vz_get_one_reg,
3199 .set_one_reg = kvm_vz_set_one_reg,
3200 .vcpu_load = kvm_vz_vcpu_load,
3201 .vcpu_put = kvm_vz_vcpu_put,
3202 .vcpu_run = kvm_vz_vcpu_run,
3203 .vcpu_reenter = kvm_vz_vcpu_reenter,
3204};
3205
3206int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks)
3207{
3208 if (!cpu_has_vz)
3209 return -ENODEV;
3210
3211 /*
3212 * VZ requires at least 2 KScratch registers, so it should have been
3213 * possible to allocate pgd_reg.
3214 */
3215 if (WARN(pgd_reg == -1,
3216 "pgd_reg not allocated even though cpu_has_vz\n"))
3217 return -ENODEV;
3218
3219 pr_info("Starting KVM with MIPS VZ extensions\n");
3220
3221 *install_callbacks = &kvm_vz_callbacks;
3222 return 0;
3223}
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index 6db341347202..899e46279902 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -24,6 +24,7 @@
24/* Cache operations. */ 24/* Cache operations. */
25void (*flush_cache_all)(void); 25void (*flush_cache_all)(void);
26void (*__flush_cache_all)(void); 26void (*__flush_cache_all)(void);
27EXPORT_SYMBOL_GPL(__flush_cache_all);
27void (*flush_cache_mm)(struct mm_struct *mm); 28void (*flush_cache_mm)(struct mm_struct *mm);
28void (*flush_cache_range)(struct vm_area_struct *vma, unsigned long start, 29void (*flush_cache_range)(struct vm_area_struct *vma, unsigned long start,
29 unsigned long end); 30 unsigned long end);
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index aa75849c36bc..3ca20283b31e 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -348,7 +348,7 @@ void maar_init(void)
348 upper = ((upper & MIPS_MAAR_ADDR) << 4) | 0xffff; 348 upper = ((upper & MIPS_MAAR_ADDR) << 4) | 0xffff;
349 349
350 pr_info(" [%d]: ", i / 2); 350 pr_info(" [%d]: ", i / 2);
351 if (!(attr & MIPS_MAAR_V)) { 351 if (!(attr & MIPS_MAAR_VL)) {
352 pr_cont("disabled\n"); 352 pr_cont("disabled\n");
353 continue; 353 continue;
354 } 354 }
diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h
index 4852e849128b..c0a55050f70f 100644
--- a/arch/powerpc/include/asm/disassemble.h
+++ b/arch/powerpc/include/asm/disassemble.h
@@ -87,6 +87,11 @@ static inline unsigned int get_oc(u32 inst)
87 return (inst >> 11) & 0x7fff; 87 return (inst >> 11) & 0x7fff;
88} 88}
89 89
90static inline unsigned int get_tx_or_sx(u32 inst)
91{
92 return (inst) & 0x1;
93}
94
90#define IS_XFORM(inst) (get_op(inst) == 31) 95#define IS_XFORM(inst) (get_op(inst) == 31)
91#define IS_DSFORM(inst) (get_op(inst) >= 56) 96#define IS_DSFORM(inst) (get_op(inst) >= 56)
92 97
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 2c1d50792944..8a8ce220d7d0 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -64,6 +64,11 @@ struct iommu_table_ops {
64 long index, 64 long index,
65 unsigned long *hpa, 65 unsigned long *hpa,
66 enum dma_data_direction *direction); 66 enum dma_data_direction *direction);
67 /* Real mode */
68 int (*exchange_rm)(struct iommu_table *tbl,
69 long index,
70 unsigned long *hpa,
71 enum dma_data_direction *direction);
67#endif 72#endif
68 void (*clear)(struct iommu_table *tbl, 73 void (*clear)(struct iommu_table *tbl,
69 long index, long npages); 74 long index, long npages);
@@ -114,6 +119,7 @@ struct iommu_table {
114 struct list_head it_group_list;/* List of iommu_table_group_link */ 119 struct list_head it_group_list;/* List of iommu_table_group_link */
115 unsigned long *it_userspace; /* userspace view of the table */ 120 unsigned long *it_userspace; /* userspace view of the table */
116 struct iommu_table_ops *it_ops; 121 struct iommu_table_ops *it_ops;
122 struct kref it_kref;
117}; 123};
118 124
119#define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \ 125#define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \
@@ -146,8 +152,8 @@ static inline void *get_iommu_table_base(struct device *dev)
146 152
147extern int dma_iommu_dma_supported(struct device *dev, u64 mask); 153extern int dma_iommu_dma_supported(struct device *dev, u64 mask);
148 154
149/* Frees table for an individual device node */ 155extern struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl);
150extern void iommu_free_table(struct iommu_table *tbl, const char *node_name); 156extern int iommu_tce_table_put(struct iommu_table *tbl);
151 157
152/* Initializes an iommu_table based in values set in the passed-in 158/* Initializes an iommu_table based in values set in the passed-in
153 * structure 159 * structure
@@ -208,6 +214,8 @@ extern void iommu_del_device(struct device *dev);
208extern int __init tce_iommu_bus_notifier_init(void); 214extern int __init tce_iommu_bus_notifier_init(void);
209extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, 215extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
210 unsigned long *hpa, enum dma_data_direction *direction); 216 unsigned long *hpa, enum dma_data_direction *direction);
217extern long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
218 unsigned long *hpa, enum dma_data_direction *direction);
211#else 219#else
212static inline void iommu_register_group(struct iommu_table_group *table_group, 220static inline void iommu_register_group(struct iommu_table_group *table_group,
213 int pci_domain_number, 221 int pci_domain_number,
@@ -288,11 +296,21 @@ static inline void iommu_restore(void)
288#endif 296#endif
289 297
290/* The API to support IOMMU operations for VFIO */ 298/* The API to support IOMMU operations for VFIO */
291extern int iommu_tce_clear_param_check(struct iommu_table *tbl, 299extern int iommu_tce_check_ioba(unsigned long page_shift,
292 unsigned long ioba, unsigned long tce_value, 300 unsigned long offset, unsigned long size,
293 unsigned long npages); 301 unsigned long ioba, unsigned long npages);
294extern int iommu_tce_put_param_check(struct iommu_table *tbl, 302extern int iommu_tce_check_gpa(unsigned long page_shift,
295 unsigned long ioba, unsigned long tce); 303 unsigned long gpa);
304
305#define iommu_tce_clear_param_check(tbl, ioba, tce_value, npages) \
306 (iommu_tce_check_ioba((tbl)->it_page_shift, \
307 (tbl)->it_offset, (tbl)->it_size, \
308 (ioba), (npages)) || (tce_value))
309#define iommu_tce_put_param_check(tbl, ioba, gpa) \
310 (iommu_tce_check_ioba((tbl)->it_page_shift, \
311 (tbl)->it_offset, (tbl)->it_size, \
312 (ioba), 1) || \
313 iommu_tce_check_gpa((tbl)->it_page_shift, (gpa)))
296 314
297extern void iommu_flush_tce(struct iommu_table *tbl); 315extern void iommu_flush_tce(struct iommu_table *tbl);
298extern int iommu_take_ownership(struct iommu_table *tbl); 316extern int iommu_take_ownership(struct iommu_table *tbl);
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 5a8ab4a758f1..9c51ac4b8f36 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -45,9 +45,6 @@
45 45
46#define __KVM_HAVE_ARCH_INTC_INITIALIZED 46#define __KVM_HAVE_ARCH_INTC_INITIALIZED
47 47
48#ifdef CONFIG_KVM_MMIO
49#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
50#endif
51#define KVM_HALT_POLL_NS_DEFAULT 10000 /* 10 us */ 48#define KVM_HALT_POLL_NS_DEFAULT 10000 /* 10 us */
52 49
53/* These values are internal and can be increased later */ 50/* These values are internal and can be increased later */
@@ -191,6 +188,13 @@ struct kvmppc_pginfo {
191 atomic_t refcnt; 188 atomic_t refcnt;
192}; 189};
193 190
191struct kvmppc_spapr_tce_iommu_table {
192 struct rcu_head rcu;
193 struct list_head next;
194 struct iommu_table *tbl;
195 struct kref kref;
196};
197
194struct kvmppc_spapr_tce_table { 198struct kvmppc_spapr_tce_table {
195 struct list_head list; 199 struct list_head list;
196 struct kvm *kvm; 200 struct kvm *kvm;
@@ -199,6 +203,7 @@ struct kvmppc_spapr_tce_table {
199 u32 page_shift; 203 u32 page_shift;
200 u64 offset; /* in pages */ 204 u64 offset; /* in pages */
201 u64 size; /* window size in pages */ 205 u64 size; /* window size in pages */
206 struct list_head iommu_tables;
202 struct page *pages[0]; 207 struct page *pages[0];
203}; 208};
204 209
@@ -352,6 +357,7 @@ struct kvmppc_pte {
352 bool may_read : 1; 357 bool may_read : 1;
353 bool may_write : 1; 358 bool may_write : 1;
354 bool may_execute : 1; 359 bool may_execute : 1;
360 unsigned long wimg;
355 u8 page_size; /* MMU_PAGE_xxx */ 361 u8 page_size; /* MMU_PAGE_xxx */
356}; 362};
357 363
@@ -448,6 +454,11 @@ struct mmio_hpte_cache {
448 unsigned int index; 454 unsigned int index;
449}; 455};
450 456
457#define KVMPPC_VSX_COPY_NONE 0
458#define KVMPPC_VSX_COPY_WORD 1
459#define KVMPPC_VSX_COPY_DWORD 2
460#define KVMPPC_VSX_COPY_DWORD_LOAD_DUMP 3
461
451struct openpic; 462struct openpic;
452 463
453/* W0 and W1 of a XIVE thread management context */ 464/* W0 and W1 of a XIVE thread management context */
@@ -666,6 +677,21 @@ struct kvm_vcpu_arch {
666 u8 io_gpr; /* GPR used as IO source/target */ 677 u8 io_gpr; /* GPR used as IO source/target */
667 u8 mmio_host_swabbed; 678 u8 mmio_host_swabbed;
668 u8 mmio_sign_extend; 679 u8 mmio_sign_extend;
680 /* conversion between single and double precision */
681 u8 mmio_sp64_extend;
682 /*
683 * Number of simulations for vsx.
684 * If we use 2*8bytes to simulate 1*16bytes,
685 * then the number should be 2 and
686 * mmio_vsx_copy_type=KVMPPC_VSX_COPY_DWORD.
687 * If we use 4*4bytes to simulate 1*16bytes,
688 * the number should be 4 and
689 * mmio_vsx_copy_type=KVMPPC_VSX_COPY_WORD.
690 */
691 u8 mmio_vsx_copy_nums;
692 u8 mmio_vsx_offset;
693 u8 mmio_vsx_copy_type;
694 u8 mmio_vsx_tx_sx_enabled;
669 u8 osi_needed; 695 u8 osi_needed;
670 u8 osi_enabled; 696 u8 osi_enabled;
671 u8 papr_enabled; 697 u8 papr_enabled;
@@ -758,6 +784,8 @@ struct kvm_vcpu_arch {
758}; 784};
759 785
760#define VCPU_FPR(vcpu, i) (vcpu)->arch.fp.fpr[i][TS_FPROFFSET] 786#define VCPU_FPR(vcpu, i) (vcpu)->arch.fp.fpr[i][TS_FPROFFSET]
787#define VCPU_VSX_FPR(vcpu, i, j) ((vcpu)->arch.fp.fpr[i][j])
788#define VCPU_VSX_VR(vcpu, i) ((vcpu)->arch.vr.vr[i])
761 789
762/* Values for vcpu->arch.state */ 790/* Values for vcpu->arch.state */
763#define KVMPPC_VCPU_NOTREADY 0 791#define KVMPPC_VCPU_NOTREADY 0
@@ -771,6 +799,7 @@ struct kvm_vcpu_arch {
771#define KVM_MMIO_REG_FPR 0x0020 799#define KVM_MMIO_REG_FPR 0x0020
772#define KVM_MMIO_REG_QPR 0x0040 800#define KVM_MMIO_REG_QPR 0x0040
773#define KVM_MMIO_REG_FQPR 0x0060 801#define KVM_MMIO_REG_FQPR 0x0060
802#define KVM_MMIO_REG_VSX 0x0080
774 803
775#define __KVM_HAVE_ARCH_WQP 804#define __KVM_HAVE_ARCH_WQP
776#define __KVM_HAVE_CREATE_DEVICE 805#define __KVM_HAVE_CREATE_DEVICE
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index ed52b13d9ffb..e0d88c38602b 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -78,9 +78,15 @@ extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
78extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, 78extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
79 unsigned int rt, unsigned int bytes, 79 unsigned int rt, unsigned int bytes,
80 int is_default_endian); 80 int is_default_endian);
81extern int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
82 unsigned int rt, unsigned int bytes,
83 int is_default_endian, int mmio_sign_extend);
81extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, 84extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
82 u64 val, unsigned int bytes, 85 u64 val, unsigned int bytes,
83 int is_default_endian); 86 int is_default_endian);
87extern int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
88 int rs, unsigned int bytes,
89 int is_default_endian);
84 90
85extern int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, 91extern int kvmppc_load_last_inst(struct kvm_vcpu *vcpu,
86 enum instruction_type type, u32 *inst); 92 enum instruction_type type, u32 *inst);
@@ -132,6 +138,9 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
132extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu); 138extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
133extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); 139extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
134extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags); 140extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
141extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu);
142extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu);
143extern void kvmppc_core_queue_vsx_unavail(struct kvm_vcpu *vcpu);
135extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); 144extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
136extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); 145extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu);
137extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, 146extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
@@ -164,13 +173,19 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm,
164extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, 173extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
165 struct kvm_memory_slot *memslot, unsigned long porder); 174 struct kvm_memory_slot *memslot, unsigned long porder);
166extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); 175extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
176extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
177 struct iommu_group *grp);
178extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
179 struct iommu_group *grp);
167 180
168extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, 181extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
169 struct kvm_create_spapr_tce_64 *args); 182 struct kvm_create_spapr_tce_64 *args);
170extern struct kvmppc_spapr_tce_table *kvmppc_find_table( 183extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
171 struct kvm_vcpu *vcpu, unsigned long liobn); 184 struct kvm *kvm, unsigned long liobn);
172extern long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt, 185#define kvmppc_ioba_validate(stt, ioba, npages) \
173 unsigned long ioba, unsigned long npages); 186 (iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \
187 (stt)->size, (ioba), (npages)) ? \
188 H_PARAMETER : H_SUCCESS)
174extern long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *tt, 189extern long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *tt,
175 unsigned long tce); 190 unsigned long tce);
176extern long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa, 191extern long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
@@ -241,6 +256,7 @@ union kvmppc_one_reg {
241 u64 dval; 256 u64 dval;
242 vector128 vval; 257 vector128 vval;
243 u64 vsxval[2]; 258 u64 vsxval[2];
259 u32 vsx32val[4];
244 struct { 260 struct {
245 u64 addr; 261 u64 addr;
246 u64 length; 262 u64 length;
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index b9e3f0aca261..c70c8272523d 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -29,10 +29,14 @@ extern void mm_iommu_init(struct mm_struct *mm);
29extern void mm_iommu_cleanup(struct mm_struct *mm); 29extern void mm_iommu_cleanup(struct mm_struct *mm);
30extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, 30extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
31 unsigned long ua, unsigned long size); 31 unsigned long ua, unsigned long size);
32extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(
33 struct mm_struct *mm, unsigned long ua, unsigned long size);
32extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, 34extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
33 unsigned long ua, unsigned long entries); 35 unsigned long ua, unsigned long entries);
34extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, 36extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
35 unsigned long ua, unsigned long *hpa); 37 unsigned long ua, unsigned long *hpa);
38extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
39 unsigned long ua, unsigned long *hpa);
36extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem); 40extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
37extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem); 41extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
38#endif 42#endif
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index e7d6d86563ee..73f06f4dddc7 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -86,32 +86,79 @@
86#define OP_TRAP_64 2 86#define OP_TRAP_64 2
87 87
88#define OP_31_XOP_TRAP 4 88#define OP_31_XOP_TRAP 4
89#define OP_31_XOP_LDX 21
89#define OP_31_XOP_LWZX 23 90#define OP_31_XOP_LWZX 23
91#define OP_31_XOP_LDUX 53
90#define OP_31_XOP_DCBST 54 92#define OP_31_XOP_DCBST 54
91#define OP_31_XOP_LWZUX 55 93#define OP_31_XOP_LWZUX 55
92#define OP_31_XOP_TRAP_64 68 94#define OP_31_XOP_TRAP_64 68
93#define OP_31_XOP_DCBF 86 95#define OP_31_XOP_DCBF 86
94#define OP_31_XOP_LBZX 87 96#define OP_31_XOP_LBZX 87
97#define OP_31_XOP_STDX 149
95#define OP_31_XOP_STWX 151 98#define OP_31_XOP_STWX 151
99#define OP_31_XOP_STDUX 181
100#define OP_31_XOP_STWUX 183
96#define OP_31_XOP_STBX 215 101#define OP_31_XOP_STBX 215
97#define OP_31_XOP_LBZUX 119 102#define OP_31_XOP_LBZUX 119
98#define OP_31_XOP_STBUX 247 103#define OP_31_XOP_STBUX 247
99#define OP_31_XOP_LHZX 279 104#define OP_31_XOP_LHZX 279
100#define OP_31_XOP_LHZUX 311 105#define OP_31_XOP_LHZUX 311
101#define OP_31_XOP_MFSPR 339 106#define OP_31_XOP_MFSPR 339
107#define OP_31_XOP_LWAX 341
102#define OP_31_XOP_LHAX 343 108#define OP_31_XOP_LHAX 343
109#define OP_31_XOP_LWAUX 373
103#define OP_31_XOP_LHAUX 375 110#define OP_31_XOP_LHAUX 375
104#define OP_31_XOP_STHX 407 111#define OP_31_XOP_STHX 407
105#define OP_31_XOP_STHUX 439 112#define OP_31_XOP_STHUX 439
106#define OP_31_XOP_MTSPR 467 113#define OP_31_XOP_MTSPR 467
107#define OP_31_XOP_DCBI 470 114#define OP_31_XOP_DCBI 470
115#define OP_31_XOP_LDBRX 532
108#define OP_31_XOP_LWBRX 534 116#define OP_31_XOP_LWBRX 534
109#define OP_31_XOP_TLBSYNC 566 117#define OP_31_XOP_TLBSYNC 566
118#define OP_31_XOP_STDBRX 660
110#define OP_31_XOP_STWBRX 662 119#define OP_31_XOP_STWBRX 662
120#define OP_31_XOP_STFSX 663
121#define OP_31_XOP_STFSUX 695
122#define OP_31_XOP_STFDX 727
123#define OP_31_XOP_STFDUX 759
111#define OP_31_XOP_LHBRX 790 124#define OP_31_XOP_LHBRX 790
125#define OP_31_XOP_LFIWAX 855
126#define OP_31_XOP_LFIWZX 887
112#define OP_31_XOP_STHBRX 918 127#define OP_31_XOP_STHBRX 918
128#define OP_31_XOP_STFIWX 983
129
130/* VSX Scalar Load Instructions */
131#define OP_31_XOP_LXSDX 588
132#define OP_31_XOP_LXSSPX 524
133#define OP_31_XOP_LXSIWAX 76
134#define OP_31_XOP_LXSIWZX 12
135
136/* VSX Scalar Store Instructions */
137#define OP_31_XOP_STXSDX 716
138#define OP_31_XOP_STXSSPX 652
139#define OP_31_XOP_STXSIWX 140
140
141/* VSX Vector Load Instructions */
142#define OP_31_XOP_LXVD2X 844
143#define OP_31_XOP_LXVW4X 780
144
145/* VSX Vector Load and Splat Instruction */
146#define OP_31_XOP_LXVDSX 332
147
148/* VSX Vector Store Instructions */
149#define OP_31_XOP_STXVD2X 972
150#define OP_31_XOP_STXVW4X 908
151
152#define OP_31_XOP_LFSX 535
153#define OP_31_XOP_LFSUX 567
154#define OP_31_XOP_LFDX 599
155#define OP_31_XOP_LFDUX 631
113 156
114#define OP_LWZ 32 157#define OP_LWZ 32
158#define OP_STFS 52
159#define OP_STFSU 53
160#define OP_STFD 54
161#define OP_STFDU 55
115#define OP_LD 58 162#define OP_LD 58
116#define OP_LWZU 33 163#define OP_LWZU 33
117#define OP_LBZ 34 164#define OP_LBZ 34
@@ -127,6 +174,17 @@
127#define OP_LHAU 43 174#define OP_LHAU 43
128#define OP_STH 44 175#define OP_STH 44
129#define OP_STHU 45 176#define OP_STHU 45
177#define OP_LMW 46
178#define OP_STMW 47
179#define OP_LFS 48
180#define OP_LFSU 49
181#define OP_LFD 50
182#define OP_LFDU 51
183#define OP_STFS 52
184#define OP_STFSU 53
185#define OP_STFD 54
186#define OP_STFDU 55
187#define OP_LQ 56
130 188
131/* sorted alphabetically */ 189/* sorted alphabetically */
132#define PPC_INST_BHRBE 0x7c00025c 190#define PPC_INST_BHRBE 0x7c00025c
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 4edbe4bb0e8b..07fbeb927834 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -29,6 +29,9 @@
29#define __KVM_HAVE_IRQ_LINE 29#define __KVM_HAVE_IRQ_LINE
30#define __KVM_HAVE_GUEST_DEBUG 30#define __KVM_HAVE_GUEST_DEBUG
31 31
32/* Not always available, but if it is, this is the correct offset. */
33#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
34
32struct kvm_regs { 35struct kvm_regs {
33 __u64 pc; 36 __u64 pc;
34 __u64 cr; 37 __u64 cr;
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 5f202a566ec5..f2b724cd9e64 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -711,13 +711,16 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
711 return tbl; 711 return tbl;
712} 712}
713 713
714void iommu_free_table(struct iommu_table *tbl, const char *node_name) 714static void iommu_table_free(struct kref *kref)
715{ 715{
716 unsigned long bitmap_sz; 716 unsigned long bitmap_sz;
717 unsigned int order; 717 unsigned int order;
718 struct iommu_table *tbl;
718 719
719 if (!tbl) 720 tbl = container_of(kref, struct iommu_table, it_kref);
720 return; 721
722 if (tbl->it_ops->free)
723 tbl->it_ops->free(tbl);
721 724
722 if (!tbl->it_map) { 725 if (!tbl->it_map) {
723 kfree(tbl); 726 kfree(tbl);
@@ -733,7 +736,7 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
733 736
734 /* verify that table contains no entries */ 737 /* verify that table contains no entries */
735 if (!bitmap_empty(tbl->it_map, tbl->it_size)) 738 if (!bitmap_empty(tbl->it_map, tbl->it_size))
736 pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name); 739 pr_warn("%s: Unexpected TCEs\n", __func__);
737 740
738 /* calculate bitmap size in bytes */ 741 /* calculate bitmap size in bytes */
739 bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long); 742 bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
@@ -746,6 +749,24 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
746 kfree(tbl); 749 kfree(tbl);
747} 750}
748 751
752struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl)
753{
754 if (kref_get_unless_zero(&tbl->it_kref))
755 return tbl;
756
757 return NULL;
758}
759EXPORT_SYMBOL_GPL(iommu_tce_table_get);
760
761int iommu_tce_table_put(struct iommu_table *tbl)
762{
763 if (WARN_ON(!tbl))
764 return 0;
765
766 return kref_put(&tbl->it_kref, iommu_table_free);
767}
768EXPORT_SYMBOL_GPL(iommu_tce_table_put);
769
749/* Creates TCEs for a user provided buffer. The user buffer must be 770/* Creates TCEs for a user provided buffer. The user buffer must be
750 * contiguous real kernel storage (not vmalloc). The address passed here 771 * contiguous real kernel storage (not vmalloc). The address passed here
751 * comprises a page address and offset into that page. The dma_addr_t 772 * comprises a page address and offset into that page. The dma_addr_t
@@ -942,47 +963,36 @@ void iommu_flush_tce(struct iommu_table *tbl)
942} 963}
943EXPORT_SYMBOL_GPL(iommu_flush_tce); 964EXPORT_SYMBOL_GPL(iommu_flush_tce);
944 965
945int iommu_tce_clear_param_check(struct iommu_table *tbl, 966int iommu_tce_check_ioba(unsigned long page_shift,
946 unsigned long ioba, unsigned long tce_value, 967 unsigned long offset, unsigned long size,
947 unsigned long npages) 968 unsigned long ioba, unsigned long npages)
948{ 969{
949 /* tbl->it_ops->clear() does not support any value but 0 */ 970 unsigned long mask = (1UL << page_shift) - 1;
950 if (tce_value)
951 return -EINVAL;
952 971
953 if (ioba & ~IOMMU_PAGE_MASK(tbl)) 972 if (ioba & mask)
954 return -EINVAL; 973 return -EINVAL;
955 974
956 ioba >>= tbl->it_page_shift; 975 ioba >>= page_shift;
957 if (ioba < tbl->it_offset) 976 if (ioba < offset)
958 return -EINVAL; 977 return -EINVAL;
959 978
960 if ((ioba + npages) > (tbl->it_offset + tbl->it_size)) 979 if ((ioba + 1) > (offset + size))
961 return -EINVAL; 980 return -EINVAL;
962 981
963 return 0; 982 return 0;
964} 983}
965EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check); 984EXPORT_SYMBOL_GPL(iommu_tce_check_ioba);
966 985
967int iommu_tce_put_param_check(struct iommu_table *tbl, 986int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa)
968 unsigned long ioba, unsigned long tce)
969{ 987{
970 if (tce & ~IOMMU_PAGE_MASK(tbl)) 988 unsigned long mask = (1UL << page_shift) - 1;
971 return -EINVAL;
972
973 if (ioba & ~IOMMU_PAGE_MASK(tbl))
974 return -EINVAL;
975 989
976 ioba >>= tbl->it_page_shift; 990 if (gpa & mask)
977 if (ioba < tbl->it_offset)
978 return -EINVAL;
979
980 if ((ioba + 1) > (tbl->it_offset + tbl->it_size))
981 return -EINVAL; 991 return -EINVAL;
982 992
983 return 0; 993 return 0;
984} 994}
985EXPORT_SYMBOL_GPL(iommu_tce_put_param_check); 995EXPORT_SYMBOL_GPL(iommu_tce_check_gpa);
986 996
987long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, 997long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
988 unsigned long *hpa, enum dma_data_direction *direction) 998 unsigned long *hpa, enum dma_data_direction *direction)
@@ -1004,6 +1014,31 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
1004} 1014}
1005EXPORT_SYMBOL_GPL(iommu_tce_xchg); 1015EXPORT_SYMBOL_GPL(iommu_tce_xchg);
1006 1016
1017#ifdef CONFIG_PPC_BOOK3S_64
1018long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
1019 unsigned long *hpa, enum dma_data_direction *direction)
1020{
1021 long ret;
1022
1023 ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
1024
1025 if (!ret && ((*direction == DMA_FROM_DEVICE) ||
1026 (*direction == DMA_BIDIRECTIONAL))) {
1027 struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT);
1028
1029 if (likely(pg)) {
1030 SetPageDirty(pg);
1031 } else {
1032 tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
1033 ret = -EFAULT;
1034 }
1035 }
1036
1037 return ret;
1038}
1039EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm);
1040#endif
1041
1007int iommu_take_ownership(struct iommu_table *tbl) 1042int iommu_take_ownership(struct iommu_table *tbl)
1008{ 1043{
1009 unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; 1044 unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index b9d66e53b773..24de532c1736 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -67,6 +67,7 @@ config KVM_BOOK3S_64
67 select KVM_BOOK3S_64_HANDLER 67 select KVM_BOOK3S_64_HANDLER
68 select KVM 68 select KVM
69 select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE 69 select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
70 select SPAPR_TCE_IOMMU if IOMMU_SUPPORT
70 ---help--- 71 ---help---
71 Support running unmodified book3s_64 and book3s_32 guest kernels 72 Support running unmodified book3s_64 and book3s_32 guest kernels
72 in virtual machines on book3s_64 host processors. 73 in virtual machines on book3s_64 host processors.
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index cb8009cd688d..72d977e30952 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -198,6 +198,24 @@ void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
198} 198}
199EXPORT_SYMBOL_GPL(kvmppc_core_queue_program); 199EXPORT_SYMBOL_GPL(kvmppc_core_queue_program);
200 200
201void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu)
202{
203 /* might as well deliver this straight away */
204 kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, 0);
205}
206
207void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu)
208{
209 /* might as well deliver this straight away */
210 kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_ALTIVEC, 0);
211}
212
213void kvmppc_core_queue_vsx_unavail(struct kvm_vcpu *vcpu)
214{
215 /* might as well deliver this straight away */
216 kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_VSX, 0);
217}
218
201void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) 219void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
202{ 220{
203 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER); 221 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 70153578131a..29ebe2fd5867 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -319,6 +319,7 @@ do_second:
319 gpte->may_execute = true; 319 gpte->may_execute = true;
320 gpte->may_read = false; 320 gpte->may_read = false;
321 gpte->may_write = false; 321 gpte->may_write = false;
322 gpte->wimg = r & HPTE_R_WIMG;
322 323
323 switch (pp) { 324 switch (pp) {
324 case 0: 325 case 0:
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index a587e8f4fd26..145a61892c48 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -145,6 +145,8 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
145 else 145 else
146 kvmppc_mmu_flush_icache(pfn); 146 kvmppc_mmu_flush_icache(pfn);
147 147
148 rflags = (rflags & ~HPTE_R_WIMG) | orig_pte->wimg;
149
148 /* 150 /*
149 * Use 64K pages if possible; otherwise, on 64K page kernels, 151 * Use 64K pages if possible; otherwise, on 64K page kernels,
150 * we need to transfer 4 more bits from guest real to host real addr. 152 * we need to transfer 4 more bits from guest real to host real addr.
@@ -177,12 +179,15 @@ map_again:
177 ret = mmu_hash_ops.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags, 179 ret = mmu_hash_ops.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags,
178 hpsize, hpsize, MMU_SEGSIZE_256M); 180 hpsize, hpsize, MMU_SEGSIZE_256M);
179 181
180 if (ret < 0) { 182 if (ret == -1) {
181 /* If we couldn't map a primary PTE, try a secondary */ 183 /* If we couldn't map a primary PTE, try a secondary */
182 hash = ~hash; 184 hash = ~hash;
183 vflags ^= HPTE_V_SECONDARY; 185 vflags ^= HPTE_V_SECONDARY;
184 attempt++; 186 attempt++;
185 goto map_again; 187 goto map_again;
188 } else if (ret < 0) {
189 r = -EIO;
190 goto out_unlock;
186 } else { 191 } else {
187 trace_kvm_book3s_64_mmu_map(rflags, hpteg, 192 trace_kvm_book3s_64_mmu_map(rflags, hpteg,
188 vpn, hpaddr, orig_pte); 193 vpn, hpaddr, orig_pte);
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 3e26cd4979f9..a160c14304eb 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -28,6 +28,8 @@
28#include <linux/hugetlb.h> 28#include <linux/hugetlb.h>
29#include <linux/list.h> 29#include <linux/list.h>
30#include <linux/anon_inodes.h> 30#include <linux/anon_inodes.h>
31#include <linux/iommu.h>
32#include <linux/file.h>
31 33
32#include <asm/tlbflush.h> 34#include <asm/tlbflush.h>
33#include <asm/kvm_ppc.h> 35#include <asm/kvm_ppc.h>
@@ -40,6 +42,7 @@
40#include <asm/udbg.h> 42#include <asm/udbg.h>
41#include <asm/iommu.h> 43#include <asm/iommu.h>
42#include <asm/tce.h> 44#include <asm/tce.h>
45#include <asm/mmu_context.h>
43 46
44static unsigned long kvmppc_tce_pages(unsigned long iommu_pages) 47static unsigned long kvmppc_tce_pages(unsigned long iommu_pages)
45{ 48{
@@ -91,6 +94,137 @@ static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc)
91 return ret; 94 return ret;
92} 95}
93 96
97static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head)
98{
99 struct kvmppc_spapr_tce_iommu_table *stit = container_of(head,
100 struct kvmppc_spapr_tce_iommu_table, rcu);
101
102 iommu_tce_table_put(stit->tbl);
103
104 kfree(stit);
105}
106
107static void kvm_spapr_tce_liobn_put(struct kref *kref)
108{
109 struct kvmppc_spapr_tce_iommu_table *stit = container_of(kref,
110 struct kvmppc_spapr_tce_iommu_table, kref);
111
112 list_del_rcu(&stit->next);
113
114 call_rcu(&stit->rcu, kvm_spapr_tce_iommu_table_free);
115}
116
117extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
118 struct iommu_group *grp)
119{
120 int i;
121 struct kvmppc_spapr_tce_table *stt;
122 struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
123 struct iommu_table_group *table_group = NULL;
124
125 list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
126
127 table_group = iommu_group_get_iommudata(grp);
128 if (WARN_ON(!table_group))
129 continue;
130
131 list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) {
132 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
133 if (table_group->tables[i] != stit->tbl)
134 continue;
135
136 kref_put(&stit->kref, kvm_spapr_tce_liobn_put);
137 return;
138 }
139 }
140 }
141}
142
143extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
144 struct iommu_group *grp)
145{
146 struct kvmppc_spapr_tce_table *stt = NULL;
147 bool found = false;
148 struct iommu_table *tbl = NULL;
149 struct iommu_table_group *table_group;
150 long i;
151 struct kvmppc_spapr_tce_iommu_table *stit;
152 struct fd f;
153
154 f = fdget(tablefd);
155 if (!f.file)
156 return -EBADF;
157
158 list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
159 if (stt == f.file->private_data) {
160 found = true;
161 break;
162 }
163 }
164
165 fdput(f);
166
167 if (!found)
168 return -EINVAL;
169
170 table_group = iommu_group_get_iommudata(grp);
171 if (WARN_ON(!table_group))
172 return -EFAULT;
173
174 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
175 struct iommu_table *tbltmp = table_group->tables[i];
176
177 if (!tbltmp)
178 continue;
179 /*
180 * Make sure hardware table parameters are exactly the same;
181 * this is used in the TCE handlers where boundary checks
182 * use only the first attached table.
183 */
184 if ((tbltmp->it_page_shift == stt->page_shift) &&
185 (tbltmp->it_offset == stt->offset) &&
186 (tbltmp->it_size == stt->size)) {
187 /*
188 * Reference the table to avoid races with
189 * add/remove DMA windows.
190 */
191 tbl = iommu_tce_table_get(tbltmp);
192 break;
193 }
194 }
195 if (!tbl)
196 return -EINVAL;
197
198 list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
199 if (tbl != stit->tbl)
200 continue;
201
202 if (!kref_get_unless_zero(&stit->kref)) {
203 /* stit is being destroyed */
204 iommu_tce_table_put(tbl);
205 return -ENOTTY;
206 }
207 /*
208 * The table is already known to this KVM, we just increased
209 * its KVM reference counter and can return.
210 */
211 return 0;
212 }
213
214 stit = kzalloc(sizeof(*stit), GFP_KERNEL);
215 if (!stit) {
216 iommu_tce_table_put(tbl);
217 return -ENOMEM;
218 }
219
220 stit->tbl = tbl;
221 kref_init(&stit->kref);
222
223 list_add_rcu(&stit->next, &stt->iommu_tables);
224
225 return 0;
226}
227
94static void release_spapr_tce_table(struct rcu_head *head) 228static void release_spapr_tce_table(struct rcu_head *head)
95{ 229{
96 struct kvmppc_spapr_tce_table *stt = container_of(head, 230 struct kvmppc_spapr_tce_table *stt = container_of(head,
@@ -130,9 +264,18 @@ static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
130static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) 264static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
131{ 265{
132 struct kvmppc_spapr_tce_table *stt = filp->private_data; 266 struct kvmppc_spapr_tce_table *stt = filp->private_data;
267 struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
133 268
134 list_del_rcu(&stt->list); 269 list_del_rcu(&stt->list);
135 270
271 list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) {
272 WARN_ON(!kref_read(&stit->kref));
273 while (1) {
274 if (kref_put(&stit->kref, kvm_spapr_tce_liobn_put))
275 break;
276 }
277 }
278
136 kvm_put_kvm(stt->kvm); 279 kvm_put_kvm(stt->kvm);
137 280
138 kvmppc_account_memlimit( 281 kvmppc_account_memlimit(
@@ -164,7 +307,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
164 return -EBUSY; 307 return -EBUSY;
165 } 308 }
166 309
167 size = args->size; 310 size = _ALIGN_UP(args->size, PAGE_SIZE >> 3);
168 npages = kvmppc_tce_pages(size); 311 npages = kvmppc_tce_pages(size);
169 ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); 312 ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
170 if (ret) { 313 if (ret) {
@@ -183,6 +326,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
183 stt->offset = args->offset; 326 stt->offset = args->offset;
184 stt->size = size; 327 stt->size = size;
185 stt->kvm = kvm; 328 stt->kvm = kvm;
329 INIT_LIST_HEAD_RCU(&stt->iommu_tables);
186 330
187 for (i = 0; i < npages; i++) { 331 for (i = 0; i < npages; i++) {
188 stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); 332 stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
@@ -211,15 +355,106 @@ fail:
211 return ret; 355 return ret;
212} 356}
213 357
358static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry)
359{
360 unsigned long hpa = 0;
361 enum dma_data_direction dir = DMA_NONE;
362
363 iommu_tce_xchg(tbl, entry, &hpa, &dir);
364}
365
366static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
367 struct iommu_table *tbl, unsigned long entry)
368{
369 struct mm_iommu_table_group_mem_t *mem = NULL;
370 const unsigned long pgsize = 1ULL << tbl->it_page_shift;
371 unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
372
373 if (!pua)
374 /* it_userspace allocation might be delayed */
375 return H_TOO_HARD;
376
377 mem = mm_iommu_lookup(kvm->mm, *pua, pgsize);
378 if (!mem)
379 return H_TOO_HARD;
380
381 mm_iommu_mapped_dec(mem);
382
383 *pua = 0;
384
385 return H_SUCCESS;
386}
387
388static long kvmppc_tce_iommu_unmap(struct kvm *kvm,
389 struct iommu_table *tbl, unsigned long entry)
390{
391 enum dma_data_direction dir = DMA_NONE;
392 unsigned long hpa = 0;
393 long ret;
394
395 if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir)))
396 return H_HARDWARE;
397
398 if (dir == DMA_NONE)
399 return H_SUCCESS;
400
401 ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
402 if (ret != H_SUCCESS)
403 iommu_tce_xchg(tbl, entry, &hpa, &dir);
404
405 return ret;
406}
407
408long kvmppc_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl,
409 unsigned long entry, unsigned long ua,
410 enum dma_data_direction dir)
411{
412 long ret;
413 unsigned long hpa, *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
414 struct mm_iommu_table_group_mem_t *mem;
415
416 if (!pua)
417 /* it_userspace allocation might be delayed */
418 return H_TOO_HARD;
419
420 mem = mm_iommu_lookup(kvm->mm, ua, 1ULL << tbl->it_page_shift);
421 if (!mem)
422 /* This only handles v2 IOMMU type, v1 is handled via ioctl() */
423 return H_TOO_HARD;
424
425 if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, &hpa)))
426 return H_HARDWARE;
427
428 if (mm_iommu_mapped_inc(mem))
429 return H_CLOSED;
430
431 ret = iommu_tce_xchg(tbl, entry, &hpa, &dir);
432 if (WARN_ON_ONCE(ret)) {
433 mm_iommu_mapped_dec(mem);
434 return H_HARDWARE;
435 }
436
437 if (dir != DMA_NONE)
438 kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
439
440 *pua = ua;
441
442 return 0;
443}
444
214long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, 445long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
215 unsigned long ioba, unsigned long tce) 446 unsigned long ioba, unsigned long tce)
216{ 447{
217 struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn); 448 struct kvmppc_spapr_tce_table *stt;
218 long ret; 449 long ret, idx;
450 struct kvmppc_spapr_tce_iommu_table *stit;
451 unsigned long entry, ua = 0;
452 enum dma_data_direction dir;
219 453
220 /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ 454 /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
221 /* liobn, ioba, tce); */ 455 /* liobn, ioba, tce); */
222 456
457 stt = kvmppc_find_table(vcpu->kvm, liobn);
223 if (!stt) 458 if (!stt)
224 return H_TOO_HARD; 459 return H_TOO_HARD;
225 460
@@ -231,7 +466,35 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
231 if (ret != H_SUCCESS) 466 if (ret != H_SUCCESS)
232 return ret; 467 return ret;
233 468
234 kvmppc_tce_put(stt, ioba >> stt->page_shift, tce); 469 dir = iommu_tce_direction(tce);
470 if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm,
471 tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL))
472 return H_PARAMETER;
473
474 entry = ioba >> stt->page_shift;
475
476 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
477 if (dir == DMA_NONE) {
478 ret = kvmppc_tce_iommu_unmap(vcpu->kvm,
479 stit->tbl, entry);
480 } else {
481 idx = srcu_read_lock(&vcpu->kvm->srcu);
482 ret = kvmppc_tce_iommu_map(vcpu->kvm, stit->tbl,
483 entry, ua, dir);
484 srcu_read_unlock(&vcpu->kvm->srcu, idx);
485 }
486
487 if (ret == H_SUCCESS)
488 continue;
489
490 if (ret == H_TOO_HARD)
491 return ret;
492
493 WARN_ON_ONCE(1);
494 kvmppc_clear_tce(stit->tbl, entry);
495 }
496
497 kvmppc_tce_put(stt, entry, tce);
235 498
236 return H_SUCCESS; 499 return H_SUCCESS;
237} 500}
@@ -246,8 +509,9 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
246 unsigned long entry, ua = 0; 509 unsigned long entry, ua = 0;
247 u64 __user *tces; 510 u64 __user *tces;
248 u64 tce; 511 u64 tce;
512 struct kvmppc_spapr_tce_iommu_table *stit;
249 513
250 stt = kvmppc_find_table(vcpu, liobn); 514 stt = kvmppc_find_table(vcpu->kvm, liobn);
251 if (!stt) 515 if (!stt)
252 return H_TOO_HARD; 516 return H_TOO_HARD;
253 517
@@ -284,6 +548,26 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
284 if (ret != H_SUCCESS) 548 if (ret != H_SUCCESS)
285 goto unlock_exit; 549 goto unlock_exit;
286 550
551 if (kvmppc_gpa_to_ua(vcpu->kvm,
552 tce & ~(TCE_PCI_READ | TCE_PCI_WRITE),
553 &ua, NULL))
554 return H_PARAMETER;
555
556 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
557 ret = kvmppc_tce_iommu_map(vcpu->kvm,
558 stit->tbl, entry + i, ua,
559 iommu_tce_direction(tce));
560
561 if (ret == H_SUCCESS)
562 continue;
563
564 if (ret == H_TOO_HARD)
565 goto unlock_exit;
566
567 WARN_ON_ONCE(1);
568 kvmppc_clear_tce(stit->tbl, entry);
569 }
570
287 kvmppc_tce_put(stt, entry + i, tce); 571 kvmppc_tce_put(stt, entry + i, tce);
288 } 572 }
289 573
@@ -300,8 +584,9 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
300{ 584{
301 struct kvmppc_spapr_tce_table *stt; 585 struct kvmppc_spapr_tce_table *stt;
302 long i, ret; 586 long i, ret;
587 struct kvmppc_spapr_tce_iommu_table *stit;
303 588
304 stt = kvmppc_find_table(vcpu, liobn); 589 stt = kvmppc_find_table(vcpu->kvm, liobn);
305 if (!stt) 590 if (!stt)
306 return H_TOO_HARD; 591 return H_TOO_HARD;
307 592
@@ -313,6 +598,24 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
313 if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ)) 598 if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ))
314 return H_PARAMETER; 599 return H_PARAMETER;
315 600
601 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
602 unsigned long entry = ioba >> stit->tbl->it_page_shift;
603
604 for (i = 0; i < npages; ++i) {
605 ret = kvmppc_tce_iommu_unmap(vcpu->kvm,
606 stit->tbl, entry + i);
607
608 if (ret == H_SUCCESS)
609 continue;
610
611 if (ret == H_TOO_HARD)
612 return ret;
613
614 WARN_ON_ONCE(1);
615 kvmppc_clear_tce(stit->tbl, entry);
616 }
617 }
618
316 for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) 619 for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
317 kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); 620 kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
318 621
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index e4c4ea973e57..eda0a8f6fae8 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -40,6 +40,31 @@
40#include <asm/iommu.h> 40#include <asm/iommu.h>
41#include <asm/tce.h> 41#include <asm/tce.h>
42 42
43#ifdef CONFIG_BUG
44
45#define WARN_ON_ONCE_RM(condition) ({ \
46 static bool __section(.data.unlikely) __warned; \
47 int __ret_warn_once = !!(condition); \
48 \
49 if (unlikely(__ret_warn_once && !__warned)) { \
50 __warned = true; \
51 pr_err("WARN_ON_ONCE_RM: (%s) at %s:%u\n", \
52 __stringify(condition), \
53 __func__, __LINE__); \
54 dump_stack(); \
55 } \
56 unlikely(__ret_warn_once); \
57})
58
59#else
60
61#define WARN_ON_ONCE_RM(condition) ({ \
62 int __ret_warn_on = !!(condition); \
63 unlikely(__ret_warn_on); \
64})
65
66#endif
67
43#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) 68#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
44 69
45/* 70/*
@@ -48,10 +73,9 @@
48 * WARNING: This will be called in real or virtual mode on HV KVM and virtual 73 * WARNING: This will be called in real or virtual mode on HV KVM and virtual
49 * mode on PR KVM 74 * mode on PR KVM
50 */ 75 */
51struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu, 76struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm,
52 unsigned long liobn) 77 unsigned long liobn)
53{ 78{
54 struct kvm *kvm = vcpu->kvm;
55 struct kvmppc_spapr_tce_table *stt; 79 struct kvmppc_spapr_tce_table *stt;
56 80
57 list_for_each_entry_lockless(stt, &kvm->arch.spapr_tce_tables, list) 81 list_for_each_entry_lockless(stt, &kvm->arch.spapr_tce_tables, list)
@@ -63,27 +87,6 @@ struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu,
63EXPORT_SYMBOL_GPL(kvmppc_find_table); 87EXPORT_SYMBOL_GPL(kvmppc_find_table);
64 88
65/* 89/*
66 * Validates IO address.
67 *
68 * WARNING: This will be called in real-mode on HV KVM and virtual
69 * mode on PR KVM
70 */
71long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt,
72 unsigned long ioba, unsigned long npages)
73{
74 unsigned long mask = (1ULL << stt->page_shift) - 1;
75 unsigned long idx = ioba >> stt->page_shift;
76
77 if ((ioba & mask) || (idx < stt->offset) ||
78 (idx - stt->offset + npages > stt->size) ||
79 (idx + npages < idx))
80 return H_PARAMETER;
81
82 return H_SUCCESS;
83}
84EXPORT_SYMBOL_GPL(kvmppc_ioba_validate);
85
86/*
87 * Validates TCE address. 90 * Validates TCE address.
88 * At the moment flags and page mask are validated. 91 * At the moment flags and page mask are validated.
89 * As the host kernel does not access those addresses (just puts them 92 * As the host kernel does not access those addresses (just puts them
@@ -96,10 +99,14 @@ EXPORT_SYMBOL_GPL(kvmppc_ioba_validate);
96 */ 99 */
97long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce) 100long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce)
98{ 101{
99 unsigned long page_mask = ~((1ULL << stt->page_shift) - 1); 102 unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
100 unsigned long mask = ~(page_mask | TCE_PCI_WRITE | TCE_PCI_READ); 103 enum dma_data_direction dir = iommu_tce_direction(tce);
104
105 /* Allow userspace to poison TCE table */
106 if (dir == DMA_NONE)
107 return H_SUCCESS;
101 108
102 if (tce & mask) 109 if (iommu_tce_check_gpa(stt->page_shift, gpa))
103 return H_PARAMETER; 110 return H_PARAMETER;
104 111
105 return H_SUCCESS; 112 return H_SUCCESS;
@@ -179,15 +186,122 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
179EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua); 186EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);
180 187
181#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 188#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
189static void kvmppc_rm_clear_tce(struct iommu_table *tbl, unsigned long entry)
190{
191 unsigned long hpa = 0;
192 enum dma_data_direction dir = DMA_NONE;
193
194 iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
195}
196
197static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
198 struct iommu_table *tbl, unsigned long entry)
199{
200 struct mm_iommu_table_group_mem_t *mem = NULL;
201 const unsigned long pgsize = 1ULL << tbl->it_page_shift;
202 unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
203
204 if (!pua)
205 /* it_userspace allocation might be delayed */
206 return H_TOO_HARD;
207
208 pua = (void *) vmalloc_to_phys(pua);
209 if (WARN_ON_ONCE_RM(!pua))
210 return H_HARDWARE;
211
212 mem = mm_iommu_lookup_rm(kvm->mm, *pua, pgsize);
213 if (!mem)
214 return H_TOO_HARD;
215
216 mm_iommu_mapped_dec(mem);
217
218 *pua = 0;
219
220 return H_SUCCESS;
221}
222
223static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm,
224 struct iommu_table *tbl, unsigned long entry)
225{
226 enum dma_data_direction dir = DMA_NONE;
227 unsigned long hpa = 0;
228 long ret;
229
230 if (iommu_tce_xchg_rm(tbl, entry, &hpa, &dir))
231 /*
232 * real mode xchg can fail if struct page crosses
233 * a page boundary
234 */
235 return H_TOO_HARD;
236
237 if (dir == DMA_NONE)
238 return H_SUCCESS;
239
240 ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);
241 if (ret)
242 iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
243
244 return ret;
245}
246
247static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl,
248 unsigned long entry, unsigned long ua,
249 enum dma_data_direction dir)
250{
251 long ret;
252 unsigned long hpa = 0;
253 unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
254 struct mm_iommu_table_group_mem_t *mem;
255
256 if (!pua)
257 /* it_userspace allocation might be delayed */
258 return H_TOO_HARD;
259
260 mem = mm_iommu_lookup_rm(kvm->mm, ua, 1ULL << tbl->it_page_shift);
261 if (!mem)
262 return H_TOO_HARD;
263
264 if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, &hpa)))
265 return H_HARDWARE;
266
267 pua = (void *) vmalloc_to_phys(pua);
268 if (WARN_ON_ONCE_RM(!pua))
269 return H_HARDWARE;
270
271 if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
272 return H_CLOSED;
273
274 ret = iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
275 if (ret) {
276 mm_iommu_mapped_dec(mem);
277 /*
278 * real mode xchg can fail if struct page crosses
279 * a page boundary
280 */
281 return H_TOO_HARD;
282 }
283
284 if (dir != DMA_NONE)
285 kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);
286
287 *pua = ua;
288
289 return 0;
290}
291
182long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, 292long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
183 unsigned long ioba, unsigned long tce) 293 unsigned long ioba, unsigned long tce)
184{ 294{
185 struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn); 295 struct kvmppc_spapr_tce_table *stt;
186 long ret; 296 long ret;
297 struct kvmppc_spapr_tce_iommu_table *stit;
298 unsigned long entry, ua = 0;
299 enum dma_data_direction dir;
187 300
188 /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ 301 /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
189 /* liobn, ioba, tce); */ 302 /* liobn, ioba, tce); */
190 303
304 stt = kvmppc_find_table(vcpu->kvm, liobn);
191 if (!stt) 305 if (!stt)
192 return H_TOO_HARD; 306 return H_TOO_HARD;
193 307
@@ -199,7 +313,32 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
199 if (ret != H_SUCCESS) 313 if (ret != H_SUCCESS)
200 return ret; 314 return ret;
201 315
202 kvmppc_tce_put(stt, ioba >> stt->page_shift, tce); 316 dir = iommu_tce_direction(tce);
317 if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm,
318 tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL))
319 return H_PARAMETER;
320
321 entry = ioba >> stt->page_shift;
322
323 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
324 if (dir == DMA_NONE)
325 ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm,
326 stit->tbl, entry);
327 else
328 ret = kvmppc_rm_tce_iommu_map(vcpu->kvm,
329 stit->tbl, entry, ua, dir);
330
331 if (ret == H_SUCCESS)
332 continue;
333
334 if (ret == H_TOO_HARD)
335 return ret;
336
337 WARN_ON_ONCE_RM(1);
338 kvmppc_rm_clear_tce(stit->tbl, entry);
339 }
340
341 kvmppc_tce_put(stt, entry, tce);
203 342
204 return H_SUCCESS; 343 return H_SUCCESS;
205} 344}
@@ -239,8 +378,10 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
239 long i, ret = H_SUCCESS; 378 long i, ret = H_SUCCESS;
240 unsigned long tces, entry, ua = 0; 379 unsigned long tces, entry, ua = 0;
241 unsigned long *rmap = NULL; 380 unsigned long *rmap = NULL;
381 bool prereg = false;
382 struct kvmppc_spapr_tce_iommu_table *stit;
242 383
243 stt = kvmppc_find_table(vcpu, liobn); 384 stt = kvmppc_find_table(vcpu->kvm, liobn);
244 if (!stt) 385 if (!stt)
245 return H_TOO_HARD; 386 return H_TOO_HARD;
246 387
@@ -259,23 +400,49 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
259 if (ret != H_SUCCESS) 400 if (ret != H_SUCCESS)
260 return ret; 401 return ret;
261 402
262 if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap)) 403 if (mm_iommu_preregistered(vcpu->kvm->mm)) {
263 return H_TOO_HARD; 404 /*
405 * We get here if guest memory was pre-registered which
406 * is normally VFIO case and gpa->hpa translation does not
407 * depend on hpt.
408 */
409 struct mm_iommu_table_group_mem_t *mem;
264 410
265 rmap = (void *) vmalloc_to_phys(rmap); 411 if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL))
412 return H_TOO_HARD;
266 413
267 /* 414 mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K);
268 * Synchronize with the MMU notifier callbacks in 415 if (mem)
269 * book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.). 416 prereg = mm_iommu_ua_to_hpa_rm(mem, ua, &tces) == 0;
270 * While we have the rmap lock, code running on other CPUs 417 }
271 * cannot finish unmapping the host real page that backs 418
272 * this guest real page, so we are OK to access the host 419 if (!prereg) {
273 * real page. 420 /*
274 */ 421 * This is usually a case of a guest with emulated devices only
275 lock_rmap(rmap); 422 * when TCE list is not in preregistered memory.
276 if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) { 423 * We do not require memory to be preregistered in this case
277 ret = H_TOO_HARD; 424 * so lock rmap and do __find_linux_pte_or_hugepte().
278 goto unlock_exit; 425 */
426 if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
427 return H_TOO_HARD;
428
429 rmap = (void *) vmalloc_to_phys(rmap);
430 if (WARN_ON_ONCE_RM(!rmap))
431 return H_HARDWARE;
432
433 /*
434 * Synchronize with the MMU notifier callbacks in
435 * book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.).
436 * While we have the rmap lock, code running on other CPUs
437 * cannot finish unmapping the host real page that backs
438 * this guest real page, so we are OK to access the host
439 * real page.
440 */
441 lock_rmap(rmap);
442 if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) {
443 ret = H_TOO_HARD;
444 goto unlock_exit;
445 }
279 } 446 }
280 447
281 for (i = 0; i < npages; ++i) { 448 for (i = 0; i < npages; ++i) {
@@ -285,11 +452,33 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
285 if (ret != H_SUCCESS) 452 if (ret != H_SUCCESS)
286 goto unlock_exit; 453 goto unlock_exit;
287 454
455 ua = 0;
456 if (kvmppc_gpa_to_ua(vcpu->kvm,
457 tce & ~(TCE_PCI_READ | TCE_PCI_WRITE),
458 &ua, NULL))
459 return H_PARAMETER;
460
461 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
462 ret = kvmppc_rm_tce_iommu_map(vcpu->kvm,
463 stit->tbl, entry + i, ua,
464 iommu_tce_direction(tce));
465
466 if (ret == H_SUCCESS)
467 continue;
468
469 if (ret == H_TOO_HARD)
470 goto unlock_exit;
471
472 WARN_ON_ONCE_RM(1);
473 kvmppc_rm_clear_tce(stit->tbl, entry);
474 }
475
288 kvmppc_tce_put(stt, entry + i, tce); 476 kvmppc_tce_put(stt, entry + i, tce);
289 } 477 }
290 478
291unlock_exit: 479unlock_exit:
292 unlock_rmap(rmap); 480 if (rmap)
481 unlock_rmap(rmap);
293 482
294 return ret; 483 return ret;
295} 484}
@@ -300,8 +489,9 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
300{ 489{
301 struct kvmppc_spapr_tce_table *stt; 490 struct kvmppc_spapr_tce_table *stt;
302 long i, ret; 491 long i, ret;
492 struct kvmppc_spapr_tce_iommu_table *stit;
303 493
304 stt = kvmppc_find_table(vcpu, liobn); 494 stt = kvmppc_find_table(vcpu->kvm, liobn);
305 if (!stt) 495 if (!stt)
306 return H_TOO_HARD; 496 return H_TOO_HARD;
307 497
@@ -313,6 +503,24 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
313 if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ)) 503 if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ))
314 return H_PARAMETER; 504 return H_PARAMETER;
315 505
506 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
507 unsigned long entry = ioba >> stit->tbl->it_page_shift;
508
509 for (i = 0; i < npages; ++i) {
510 ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm,
511 stit->tbl, entry + i);
512
513 if (ret == H_SUCCESS)
514 continue;
515
516 if (ret == H_TOO_HARD)
517 return ret;
518
519 WARN_ON_ONCE_RM(1);
520 kvmppc_rm_clear_tce(stit->tbl, entry);
521 }
522 }
523
316 for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) 524 for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
317 kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); 525 kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
318 526
@@ -322,12 +530,13 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
322long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, 530long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
323 unsigned long ioba) 531 unsigned long ioba)
324{ 532{
325 struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn); 533 struct kvmppc_spapr_tce_table *stt;
326 long ret; 534 long ret;
327 unsigned long idx; 535 unsigned long idx;
328 struct page *page; 536 struct page *page;
329 u64 *tbl; 537 u64 *tbl;
330 538
539 stt = kvmppc_find_table(vcpu->kvm, liobn);
331 if (!stt) 540 if (!stt)
332 return H_TOO_HARD; 541 return H_TOO_HARD;
333 542
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 8359752b3efc..68d68983948e 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -503,10 +503,18 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
503 break; 503 break;
504unprivileged: 504unprivileged:
505 default: 505 default:
506 printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn); 506 pr_info_ratelimited("KVM: invalid SPR write: %d\n", sprn);
507#ifndef DEBUG_SPR 507 if (sprn & 0x10) {
508 emulated = EMULATE_FAIL; 508 if (kvmppc_get_msr(vcpu) & MSR_PR) {
509#endif 509 kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
510 emulated = EMULATE_AGAIN;
511 }
512 } else {
513 if ((kvmppc_get_msr(vcpu) & MSR_PR) || sprn == 0) {
514 kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
515 emulated = EMULATE_AGAIN;
516 }
517 }
510 break; 518 break;
511 } 519 }
512 520
@@ -648,10 +656,20 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
648 break; 656 break;
649 default: 657 default:
650unprivileged: 658unprivileged:
651 printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn); 659 pr_info_ratelimited("KVM: invalid SPR read: %d\n", sprn);
652#ifndef DEBUG_SPR 660 if (sprn & 0x10) {
653 emulated = EMULATE_FAIL; 661 if (kvmppc_get_msr(vcpu) & MSR_PR) {
654#endif 662 kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
663 emulated = EMULATE_AGAIN;
664 }
665 } else {
666 if ((kvmppc_get_msr(vcpu) & MSR_PR) || sprn == 0 ||
667 sprn == 4 || sprn == 5 || sprn == 6) {
668 kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
669 emulated = EMULATE_AGAIN;
670 }
671 }
672
655 break; 673 break;
656 } 674 }
657 675
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 128efb42ec4e..42b7a4fd57d9 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3648,11 +3648,9 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
3648 return -EIO; 3648 return -EIO;
3649 3649
3650 mutex_lock(&kvm->lock); 3650 mutex_lock(&kvm->lock);
3651 if (!kvm->arch.pimap)
3652 goto unlock;
3651 3653
3652 if (kvm->arch.pimap == NULL) {
3653 mutex_unlock(&kvm->lock);
3654 return 0;
3655 }
3656 pimap = kvm->arch.pimap; 3654 pimap = kvm->arch.pimap;
3657 3655
3658 for (i = 0; i < pimap->n_mapped; i++) { 3656 for (i = 0; i < pimap->n_mapped; i++) {
@@ -3677,7 +3675,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
3677 * We don't free this structure even when the count goes to 3675 * We don't free this structure even when the count goes to
3678 * zero. The structure is freed when we destroy the VM. 3676 * zero. The structure is freed when we destroy the VM.
3679 */ 3677 */
3680 3678 unlock:
3681 mutex_unlock(&kvm->lock); 3679 mutex_unlock(&kvm->lock);
3682 return rc; 3680 return rc;
3683} 3681}
@@ -3957,7 +3955,7 @@ static int kvmppc_book3s_init_hv(void)
3957 * indirectly, via OPAL. 3955 * indirectly, via OPAL.
3958 */ 3956 */
3959#ifdef CONFIG_SMP 3957#ifdef CONFIG_SMP
3960 if (!xive_enabled() && !get_paca()->kvm_hstate.xics_phys) { 3958 if (!xive_enabled() && !local_paca->kvm_hstate.xics_phys) {
3961 struct device_node *np; 3959 struct device_node *np;
3962 3960
3963 np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc"); 3961 np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc");
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index d4dfc0ca2a44..f026b062c0ed 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -537,8 +537,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
537 int r = RESUME_GUEST; 537 int r = RESUME_GUEST;
538 int relocated; 538 int relocated;
539 int page_found = 0; 539 int page_found = 0;
540 struct kvmppc_pte pte; 540 struct kvmppc_pte pte = { 0 };
541 bool is_mmio = false;
542 bool dr = (kvmppc_get_msr(vcpu) & MSR_DR) ? true : false; 541 bool dr = (kvmppc_get_msr(vcpu) & MSR_DR) ? true : false;
543 bool ir = (kvmppc_get_msr(vcpu) & MSR_IR) ? true : false; 542 bool ir = (kvmppc_get_msr(vcpu) & MSR_IR) ? true : false;
544 u64 vsid; 543 u64 vsid;
@@ -616,8 +615,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
616 /* Page not found in guest SLB */ 615 /* Page not found in guest SLB */
617 kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu)); 616 kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
618 kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); 617 kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
619 } else if (!is_mmio && 618 } else if (kvmppc_visible_gpa(vcpu, pte.raddr)) {
620 kvmppc_visible_gpa(vcpu, pte.raddr)) {
621 if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) { 619 if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) {
622 /* 620 /*
623 * There is already a host HPTE there, presumably 621 * There is already a host HPTE there, presumably
@@ -627,7 +625,11 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
627 kvmppc_mmu_unmap_page(vcpu, &pte); 625 kvmppc_mmu_unmap_page(vcpu, &pte);
628 } 626 }
629 /* The guest's PTE is not mapped yet. Map on the host */ 627 /* The guest's PTE is not mapped yet. Map on the host */
630 kvmppc_mmu_map_page(vcpu, &pte, iswrite); 628 if (kvmppc_mmu_map_page(vcpu, &pte, iswrite) == -EIO) {
629 /* Exit KVM if mapping failed */
630 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
631 return RESUME_HOST;
632 }
631 if (data) 633 if (data)
632 vcpu->stat.sp_storage++; 634 vcpu->stat.sp_storage++;
633 else if (vcpu->arch.mmu.is_dcbz32(vcpu) && 635 else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 0514cbd4e533..3c296c2eacf8 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -300,6 +300,11 @@ void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags)
300 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM); 300 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
301} 301}
302 302
303void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu)
304{
305 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
306}
307
303void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) 308void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
304{ 309{
305 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER); 310 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER);
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 0fda4230f6c0..77fd043b3ecc 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -797,9 +797,8 @@ int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500)
797 host_tlb_params[0].sets = 797 host_tlb_params[0].sets =
798 host_tlb_params[0].entries / host_tlb_params[0].ways; 798 host_tlb_params[0].entries / host_tlb_params[0].ways;
799 host_tlb_params[1].sets = 1; 799 host_tlb_params[1].sets = 1;
800 800 vcpu_e500->h2g_tlb1_rmap = kcalloc(host_tlb_params[1].entries,
801 vcpu_e500->h2g_tlb1_rmap = kzalloc(sizeof(unsigned int) * 801 sizeof(*vcpu_e500->h2g_tlb1_rmap),
802 host_tlb_params[1].entries,
803 GFP_KERNEL); 802 GFP_KERNEL);
804 if (!vcpu_e500->h2g_tlb1_rmap) 803 if (!vcpu_e500->h2g_tlb1_rmap)
805 return -EINVAL; 804 return -EINVAL;
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index b379146de55b..c873ffe55362 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -259,10 +259,18 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
259 259
260 case OP_31_XOP_MFSPR: 260 case OP_31_XOP_MFSPR:
261 emulated = kvmppc_emulate_mfspr(vcpu, sprn, rt); 261 emulated = kvmppc_emulate_mfspr(vcpu, sprn, rt);
262 if (emulated == EMULATE_AGAIN) {
263 emulated = EMULATE_DONE;
264 advance = 0;
265 }
262 break; 266 break;
263 267
264 case OP_31_XOP_MTSPR: 268 case OP_31_XOP_MTSPR:
265 emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs); 269 emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs);
270 if (emulated == EMULATE_AGAIN) {
271 emulated = EMULATE_DONE;
272 advance = 0;
273 }
266 break; 274 break;
267 275
268 case OP_31_XOP_TLBSYNC: 276 case OP_31_XOP_TLBSYNC:
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index 6d3c0ee1d744..af833531af31 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -34,18 +34,38 @@
34#include "timing.h" 34#include "timing.h"
35#include "trace.h" 35#include "trace.h"
36 36
37/* XXX to do: 37#ifdef CONFIG_PPC_FPU
38 * lhax 38static bool kvmppc_check_fp_disabled(struct kvm_vcpu *vcpu)
39 * lhaux 39{
40 * lswx 40 if (!(kvmppc_get_msr(vcpu) & MSR_FP)) {
41 * lswi 41 kvmppc_core_queue_fpunavail(vcpu);
42 * stswx 42 return true;
43 * stswi 43 }
44 * lha 44
45 * lhau 45 return false;
46 * lmw 46}
47 * stmw 47#endif /* CONFIG_PPC_FPU */
48
49#ifdef CONFIG_VSX
50static bool kvmppc_check_vsx_disabled(struct kvm_vcpu *vcpu)
51{
52 if (!(kvmppc_get_msr(vcpu) & MSR_VSX)) {
53 kvmppc_core_queue_vsx_unavail(vcpu);
54 return true;
55 }
56
57 return false;
58}
59#endif /* CONFIG_VSX */
60
61/*
62 * XXX to do:
63 * lfiwax, lfiwzx
64 * vector loads and stores
48 * 65 *
66 * Instructions that trap when used on cache-inhibited mappings
67 * are not emulated here: multiple and string instructions,
68 * lq/stq, and the load-reserve/store-conditional instructions.
49 */ 69 */
50int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) 70int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
51{ 71{
@@ -66,6 +86,19 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
66 rs = get_rs(inst); 86 rs = get_rs(inst);
67 rt = get_rt(inst); 87 rt = get_rt(inst);
68 88
89 /*
90 * if mmio_vsx_tx_sx_enabled == 0, copy data between
91 * VSR[0..31] and memory
92 * if mmio_vsx_tx_sx_enabled == 1, copy data between
93 * VSR[32..63] and memory
94 */
95 vcpu->arch.mmio_vsx_tx_sx_enabled = get_tx_or_sx(inst);
96 vcpu->arch.mmio_vsx_copy_nums = 0;
97 vcpu->arch.mmio_vsx_offset = 0;
98 vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_NONE;
99 vcpu->arch.mmio_sp64_extend = 0;
100 vcpu->arch.mmio_sign_extend = 0;
101
69 switch (get_op(inst)) { 102 switch (get_op(inst)) {
70 case 31: 103 case 31:
71 switch (get_xop(inst)) { 104 switch (get_xop(inst)) {
@@ -73,6 +106,11 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
73 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); 106 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
74 break; 107 break;
75 108
109 case OP_31_XOP_LWZUX:
110 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
111 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
112 break;
113
76 case OP_31_XOP_LBZX: 114 case OP_31_XOP_LBZX:
77 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); 115 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
78 break; 116 break;
@@ -82,22 +120,36 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
82 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); 120 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
83 break; 121 break;
84 122
123 case OP_31_XOP_STDX:
124 emulated = kvmppc_handle_store(run, vcpu,
125 kvmppc_get_gpr(vcpu, rs), 8, 1);
126 break;
127
128 case OP_31_XOP_STDUX:
129 emulated = kvmppc_handle_store(run, vcpu,
130 kvmppc_get_gpr(vcpu, rs), 8, 1);
131 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
132 break;
133
85 case OP_31_XOP_STWX: 134 case OP_31_XOP_STWX:
86 emulated = kvmppc_handle_store(run, vcpu, 135 emulated = kvmppc_handle_store(run, vcpu,
87 kvmppc_get_gpr(vcpu, rs), 136 kvmppc_get_gpr(vcpu, rs), 4, 1);
88 4, 1); 137 break;
138
139 case OP_31_XOP_STWUX:
140 emulated = kvmppc_handle_store(run, vcpu,
141 kvmppc_get_gpr(vcpu, rs), 4, 1);
142 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
89 break; 143 break;
90 144
91 case OP_31_XOP_STBX: 145 case OP_31_XOP_STBX:
92 emulated = kvmppc_handle_store(run, vcpu, 146 emulated = kvmppc_handle_store(run, vcpu,
93 kvmppc_get_gpr(vcpu, rs), 147 kvmppc_get_gpr(vcpu, rs), 1, 1);
94 1, 1);
95 break; 148 break;
96 149
97 case OP_31_XOP_STBUX: 150 case OP_31_XOP_STBUX:
98 emulated = kvmppc_handle_store(run, vcpu, 151 emulated = kvmppc_handle_store(run, vcpu,
99 kvmppc_get_gpr(vcpu, rs), 152 kvmppc_get_gpr(vcpu, rs), 1, 1);
100 1, 1);
101 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); 153 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
102 break; 154 break;
103 155
@@ -105,6 +157,11 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
105 emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); 157 emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
106 break; 158 break;
107 159
160 case OP_31_XOP_LHAUX:
161 emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
162 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
163 break;
164
108 case OP_31_XOP_LHZX: 165 case OP_31_XOP_LHZX:
109 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); 166 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
110 break; 167 break;
@@ -116,14 +173,12 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
116 173
117 case OP_31_XOP_STHX: 174 case OP_31_XOP_STHX:
118 emulated = kvmppc_handle_store(run, vcpu, 175 emulated = kvmppc_handle_store(run, vcpu,
119 kvmppc_get_gpr(vcpu, rs), 176 kvmppc_get_gpr(vcpu, rs), 2, 1);
120 2, 1);
121 break; 177 break;
122 178
123 case OP_31_XOP_STHUX: 179 case OP_31_XOP_STHUX:
124 emulated = kvmppc_handle_store(run, vcpu, 180 emulated = kvmppc_handle_store(run, vcpu,
125 kvmppc_get_gpr(vcpu, rs), 181 kvmppc_get_gpr(vcpu, rs), 2, 1);
126 2, 1);
127 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); 182 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
128 break; 183 break;
129 184
@@ -143,8 +198,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
143 198
144 case OP_31_XOP_STWBRX: 199 case OP_31_XOP_STWBRX:
145 emulated = kvmppc_handle_store(run, vcpu, 200 emulated = kvmppc_handle_store(run, vcpu,
146 kvmppc_get_gpr(vcpu, rs), 201 kvmppc_get_gpr(vcpu, rs), 4, 0);
147 4, 0);
148 break; 202 break;
149 203
150 case OP_31_XOP_LHBRX: 204 case OP_31_XOP_LHBRX:
@@ -153,10 +207,258 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
153 207
154 case OP_31_XOP_STHBRX: 208 case OP_31_XOP_STHBRX:
155 emulated = kvmppc_handle_store(run, vcpu, 209 emulated = kvmppc_handle_store(run, vcpu,
156 kvmppc_get_gpr(vcpu, rs), 210 kvmppc_get_gpr(vcpu, rs), 2, 0);
157 2, 0); 211 break;
212
213 case OP_31_XOP_LDBRX:
214 emulated = kvmppc_handle_load(run, vcpu, rt, 8, 0);
215 break;
216
217 case OP_31_XOP_STDBRX:
218 emulated = kvmppc_handle_store(run, vcpu,
219 kvmppc_get_gpr(vcpu, rs), 8, 0);
220 break;
221
222 case OP_31_XOP_LDX:
223 emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
224 break;
225
226 case OP_31_XOP_LDUX:
227 emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
228 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
229 break;
230
231 case OP_31_XOP_LWAX:
232 emulated = kvmppc_handle_loads(run, vcpu, rt, 4, 1);
233 break;
234
235 case OP_31_XOP_LWAUX:
236 emulated = kvmppc_handle_loads(run, vcpu, rt, 4, 1);
237 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
238 break;
239
240#ifdef CONFIG_PPC_FPU
241 case OP_31_XOP_LFSX:
242 if (kvmppc_check_fp_disabled(vcpu))
243 return EMULATE_DONE;
244 vcpu->arch.mmio_sp64_extend = 1;
245 emulated = kvmppc_handle_load(run, vcpu,
246 KVM_MMIO_REG_FPR|rt, 4, 1);
247 break;
248
249 case OP_31_XOP_LFSUX:
250 if (kvmppc_check_fp_disabled(vcpu))
251 return EMULATE_DONE;
252 vcpu->arch.mmio_sp64_extend = 1;
253 emulated = kvmppc_handle_load(run, vcpu,
254 KVM_MMIO_REG_FPR|rt, 4, 1);
255 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
256 break;
257
258 case OP_31_XOP_LFDX:
259 if (kvmppc_check_fp_disabled(vcpu))
260 return EMULATE_DONE;
261 emulated = kvmppc_handle_load(run, vcpu,
262 KVM_MMIO_REG_FPR|rt, 8, 1);
263 break;
264
265 case OP_31_XOP_LFDUX:
266 if (kvmppc_check_fp_disabled(vcpu))
267 return EMULATE_DONE;
268 emulated = kvmppc_handle_load(run, vcpu,
269 KVM_MMIO_REG_FPR|rt, 8, 1);
270 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
271 break;
272
273 case OP_31_XOP_LFIWAX:
274 if (kvmppc_check_fp_disabled(vcpu))
275 return EMULATE_DONE;
276 emulated = kvmppc_handle_loads(run, vcpu,
277 KVM_MMIO_REG_FPR|rt, 4, 1);
278 break;
279
280 case OP_31_XOP_LFIWZX:
281 if (kvmppc_check_fp_disabled(vcpu))
282 return EMULATE_DONE;
283 emulated = kvmppc_handle_load(run, vcpu,
284 KVM_MMIO_REG_FPR|rt, 4, 1);
285 break;
286
287 case OP_31_XOP_STFSX:
288 if (kvmppc_check_fp_disabled(vcpu))
289 return EMULATE_DONE;
290 vcpu->arch.mmio_sp64_extend = 1;
291 emulated = kvmppc_handle_store(run, vcpu,
292 VCPU_FPR(vcpu, rs), 4, 1);
293 break;
294
295 case OP_31_XOP_STFSUX:
296 if (kvmppc_check_fp_disabled(vcpu))
297 return EMULATE_DONE;
298 vcpu->arch.mmio_sp64_extend = 1;
299 emulated = kvmppc_handle_store(run, vcpu,
300 VCPU_FPR(vcpu, rs), 4, 1);
301 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
302 break;
303
304 case OP_31_XOP_STFDX:
305 if (kvmppc_check_fp_disabled(vcpu))
306 return EMULATE_DONE;
307 emulated = kvmppc_handle_store(run, vcpu,
308 VCPU_FPR(vcpu, rs), 8, 1);
309 break;
310
311 case OP_31_XOP_STFDUX:
312 if (kvmppc_check_fp_disabled(vcpu))
313 return EMULATE_DONE;
314 emulated = kvmppc_handle_store(run, vcpu,
315 VCPU_FPR(vcpu, rs), 8, 1);
316 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
317 break;
318
319 case OP_31_XOP_STFIWX:
320 if (kvmppc_check_fp_disabled(vcpu))
321 return EMULATE_DONE;
322 emulated = kvmppc_handle_store(run, vcpu,
323 VCPU_FPR(vcpu, rs), 4, 1);
324 break;
325#endif
326
327#ifdef CONFIG_VSX
328 case OP_31_XOP_LXSDX:
329 if (kvmppc_check_vsx_disabled(vcpu))
330 return EMULATE_DONE;
331 vcpu->arch.mmio_vsx_copy_nums = 1;
332 vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
333 emulated = kvmppc_handle_vsx_load(run, vcpu,
334 KVM_MMIO_REG_VSX|rt, 8, 1, 0);
335 break;
336
337 case OP_31_XOP_LXSSPX:
338 if (kvmppc_check_vsx_disabled(vcpu))
339 return EMULATE_DONE;
340 vcpu->arch.mmio_vsx_copy_nums = 1;
341 vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
342 vcpu->arch.mmio_sp64_extend = 1;
343 emulated = kvmppc_handle_vsx_load(run, vcpu,
344 KVM_MMIO_REG_VSX|rt, 4, 1, 0);
345 break;
346
347 case OP_31_XOP_LXSIWAX:
348 if (kvmppc_check_vsx_disabled(vcpu))
349 return EMULATE_DONE;
350 vcpu->arch.mmio_vsx_copy_nums = 1;
351 vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
352 emulated = kvmppc_handle_vsx_load(run, vcpu,
353 KVM_MMIO_REG_VSX|rt, 4, 1, 1);
354 break;
355
356 case OP_31_XOP_LXSIWZX:
357 if (kvmppc_check_vsx_disabled(vcpu))
358 return EMULATE_DONE;
359 vcpu->arch.mmio_vsx_copy_nums = 1;
360 vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
361 emulated = kvmppc_handle_vsx_load(run, vcpu,
362 KVM_MMIO_REG_VSX|rt, 4, 1, 0);
363 break;
364
365 case OP_31_XOP_LXVD2X:
366 /*
367 * In this case, the official load/store process is like this:
368 * Step1, exit from vm by page fault isr, then kvm save vsr.
369 * Please see guest_exit_cont->store_fp_state->SAVE_32VSRS
370 * as reference.
371 *
372 * Step2, copy data between memory and VCPU
373 * Notice: for LXVD2X/STXVD2X/LXVW4X/STXVW4X, we use
374 * 2copies*8bytes or 4copies*4bytes
375 * to simulate one copy of 16bytes.
376 * Also there is an endian issue here, we should notice the
377 * layout of memory.
378 * Please see MARCO of LXVD2X_ROT/STXVD2X_ROT as more reference.
379 * If host is little-endian, kvm will call XXSWAPD for
380 * LXVD2X_ROT/STXVD2X_ROT.
381 * So, if host is little-endian,
382 * the postion of memeory should be swapped.
383 *
384 * Step3, return to guest, kvm reset register.
385 * Please see kvmppc_hv_entry->load_fp_state->REST_32VSRS
386 * as reference.
387 */
388 if (kvmppc_check_vsx_disabled(vcpu))
389 return EMULATE_DONE;
390 vcpu->arch.mmio_vsx_copy_nums = 2;
391 vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
392 emulated = kvmppc_handle_vsx_load(run, vcpu,
393 KVM_MMIO_REG_VSX|rt, 8, 1, 0);
394 break;
395
396 case OP_31_XOP_LXVW4X:
397 if (kvmppc_check_vsx_disabled(vcpu))
398 return EMULATE_DONE;
399 vcpu->arch.mmio_vsx_copy_nums = 4;
400 vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_WORD;
401 emulated = kvmppc_handle_vsx_load(run, vcpu,
402 KVM_MMIO_REG_VSX|rt, 4, 1, 0);
403 break;
404
405 case OP_31_XOP_LXVDSX:
406 if (kvmppc_check_vsx_disabled(vcpu))
407 return EMULATE_DONE;
408 vcpu->arch.mmio_vsx_copy_nums = 1;
409 vcpu->arch.mmio_vsx_copy_type =
410 KVMPPC_VSX_COPY_DWORD_LOAD_DUMP;
411 emulated = kvmppc_handle_vsx_load(run, vcpu,
412 KVM_MMIO_REG_VSX|rt, 8, 1, 0);
413 break;
414
415 case OP_31_XOP_STXSDX:
416 if (kvmppc_check_vsx_disabled(vcpu))
417 return EMULATE_DONE;
418 vcpu->arch.mmio_vsx_copy_nums = 1;
419 vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
420 emulated = kvmppc_handle_vsx_store(run, vcpu,
421 rs, 8, 1);
158 break; 422 break;
159 423
424 case OP_31_XOP_STXSSPX:
425 if (kvmppc_check_vsx_disabled(vcpu))
426 return EMULATE_DONE;
427 vcpu->arch.mmio_vsx_copy_nums = 1;
428 vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
429 vcpu->arch.mmio_sp64_extend = 1;
430 emulated = kvmppc_handle_vsx_store(run, vcpu,
431 rs, 4, 1);
432 break;
433
434 case OP_31_XOP_STXSIWX:
435 if (kvmppc_check_vsx_disabled(vcpu))
436 return EMULATE_DONE;
437 vcpu->arch.mmio_vsx_offset = 1;
438 vcpu->arch.mmio_vsx_copy_nums = 1;
439 vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_WORD;
440 emulated = kvmppc_handle_vsx_store(run, vcpu,
441 rs, 4, 1);
442 break;
443
444 case OP_31_XOP_STXVD2X:
445 if (kvmppc_check_vsx_disabled(vcpu))
446 return EMULATE_DONE;
447 vcpu->arch.mmio_vsx_copy_nums = 2;
448 vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
449 emulated = kvmppc_handle_vsx_store(run, vcpu,
450 rs, 8, 1);
451 break;
452
453 case OP_31_XOP_STXVW4X:
454 if (kvmppc_check_vsx_disabled(vcpu))
455 return EMULATE_DONE;
456 vcpu->arch.mmio_vsx_copy_nums = 4;
457 vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_WORD;
458 emulated = kvmppc_handle_vsx_store(run, vcpu,
459 rs, 4, 1);
460 break;
461#endif /* CONFIG_VSX */
160 default: 462 default:
161 emulated = EMULATE_FAIL; 463 emulated = EMULATE_FAIL;
162 break; 464 break;
@@ -167,10 +469,60 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
167 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); 469 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
168 break; 470 break;
169 471
170 /* TBD: Add support for other 64 bit load variants like ldu, ldux, ldx etc. */ 472#ifdef CONFIG_PPC_FPU
473 case OP_STFS:
474 if (kvmppc_check_fp_disabled(vcpu))
475 return EMULATE_DONE;
476 vcpu->arch.mmio_sp64_extend = 1;
477 emulated = kvmppc_handle_store(run, vcpu,
478 VCPU_FPR(vcpu, rs),
479 4, 1);
480 break;
481
482 case OP_STFSU:
483 if (kvmppc_check_fp_disabled(vcpu))
484 return EMULATE_DONE;
485 vcpu->arch.mmio_sp64_extend = 1;
486 emulated = kvmppc_handle_store(run, vcpu,
487 VCPU_FPR(vcpu, rs),
488 4, 1);
489 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
490 break;
491
492 case OP_STFD:
493 if (kvmppc_check_fp_disabled(vcpu))
494 return EMULATE_DONE;
495 emulated = kvmppc_handle_store(run, vcpu,
496 VCPU_FPR(vcpu, rs),
497 8, 1);
498 break;
499
500 case OP_STFDU:
501 if (kvmppc_check_fp_disabled(vcpu))
502 return EMULATE_DONE;
503 emulated = kvmppc_handle_store(run, vcpu,
504 VCPU_FPR(vcpu, rs),
505 8, 1);
506 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
507 break;
508#endif
509
171 case OP_LD: 510 case OP_LD:
172 rt = get_rt(inst); 511 rt = get_rt(inst);
173 emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); 512 switch (inst & 3) {
513 case 0: /* ld */
514 emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
515 break;
516 case 1: /* ldu */
517 emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
518 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
519 break;
520 case 2: /* lwa */
521 emulated = kvmppc_handle_loads(run, vcpu, rt, 4, 1);
522 break;
523 default:
524 emulated = EMULATE_FAIL;
525 }
174 break; 526 break;
175 527
176 case OP_LWZU: 528 case OP_LWZU:
@@ -193,31 +545,37 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
193 4, 1); 545 4, 1);
194 break; 546 break;
195 547
196 /* TBD: Add support for other 64 bit store variants like stdu, stdux, stdx etc. */
197 case OP_STD: 548 case OP_STD:
198 rs = get_rs(inst); 549 rs = get_rs(inst);
199 emulated = kvmppc_handle_store(run, vcpu, 550 switch (inst & 3) {
200 kvmppc_get_gpr(vcpu, rs), 551 case 0: /* std */
201 8, 1); 552 emulated = kvmppc_handle_store(run, vcpu,
553 kvmppc_get_gpr(vcpu, rs), 8, 1);
554 break;
555 case 1: /* stdu */
556 emulated = kvmppc_handle_store(run, vcpu,
557 kvmppc_get_gpr(vcpu, rs), 8, 1);
558 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
559 break;
560 default:
561 emulated = EMULATE_FAIL;
562 }
202 break; 563 break;
203 564
204 case OP_STWU: 565 case OP_STWU:
205 emulated = kvmppc_handle_store(run, vcpu, 566 emulated = kvmppc_handle_store(run, vcpu,
206 kvmppc_get_gpr(vcpu, rs), 567 kvmppc_get_gpr(vcpu, rs), 4, 1);
207 4, 1);
208 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); 568 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
209 break; 569 break;
210 570
211 case OP_STB: 571 case OP_STB:
212 emulated = kvmppc_handle_store(run, vcpu, 572 emulated = kvmppc_handle_store(run, vcpu,
213 kvmppc_get_gpr(vcpu, rs), 573 kvmppc_get_gpr(vcpu, rs), 1, 1);
214 1, 1);
215 break; 574 break;
216 575
217 case OP_STBU: 576 case OP_STBU:
218 emulated = kvmppc_handle_store(run, vcpu, 577 emulated = kvmppc_handle_store(run, vcpu,
219 kvmppc_get_gpr(vcpu, rs), 578 kvmppc_get_gpr(vcpu, rs), 1, 1);
220 1, 1);
221 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); 579 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
222 break; 580 break;
223 581
@@ -241,16 +599,48 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
241 599
242 case OP_STH: 600 case OP_STH:
243 emulated = kvmppc_handle_store(run, vcpu, 601 emulated = kvmppc_handle_store(run, vcpu,
244 kvmppc_get_gpr(vcpu, rs), 602 kvmppc_get_gpr(vcpu, rs), 2, 1);
245 2, 1);
246 break; 603 break;
247 604
248 case OP_STHU: 605 case OP_STHU:
249 emulated = kvmppc_handle_store(run, vcpu, 606 emulated = kvmppc_handle_store(run, vcpu,
250 kvmppc_get_gpr(vcpu, rs), 607 kvmppc_get_gpr(vcpu, rs), 2, 1);
251 2, 1); 608 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
609 break;
610
611#ifdef CONFIG_PPC_FPU
612 case OP_LFS:
613 if (kvmppc_check_fp_disabled(vcpu))
614 return EMULATE_DONE;
615 vcpu->arch.mmio_sp64_extend = 1;
616 emulated = kvmppc_handle_load(run, vcpu,
617 KVM_MMIO_REG_FPR|rt, 4, 1);
618 break;
619
620 case OP_LFSU:
621 if (kvmppc_check_fp_disabled(vcpu))
622 return EMULATE_DONE;
623 vcpu->arch.mmio_sp64_extend = 1;
624 emulated = kvmppc_handle_load(run, vcpu,
625 KVM_MMIO_REG_FPR|rt, 4, 1);
626 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
627 break;
628
629 case OP_LFD:
630 if (kvmppc_check_fp_disabled(vcpu))
631 return EMULATE_DONE;
632 emulated = kvmppc_handle_load(run, vcpu,
633 KVM_MMIO_REG_FPR|rt, 8, 1);
634 break;
635
636 case OP_LFDU:
637 if (kvmppc_check_fp_disabled(vcpu))
638 return EMULATE_DONE;
639 emulated = kvmppc_handle_load(run, vcpu,
640 KVM_MMIO_REG_FPR|rt, 8, 1);
252 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); 641 kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
253 break; 642 break;
643#endif
254 644
255 default: 645 default:
256 emulated = EMULATE_FAIL; 646 emulated = EMULATE_FAIL;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index de79bd721ec7..e4b58f2e335e 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -37,6 +37,7 @@
37#include <asm/cputhreads.h> 37#include <asm/cputhreads.h>
38#include <asm/irqflags.h> 38#include <asm/irqflags.h>
39#include <asm/iommu.h> 39#include <asm/iommu.h>
40#include <asm/switch_to.h>
40#include <asm/xive.h> 41#include <asm/xive.h>
41 42
42#include "timing.h" 43#include "timing.h"
@@ -526,11 +527,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
526 /* We support this only for PR */ 527 /* We support this only for PR */
527 r = !hv_enabled; 528 r = !hv_enabled;
528 break; 529 break;
529#ifdef CONFIG_KVM_MMIO
530 case KVM_CAP_COALESCED_MMIO:
531 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
532 break;
533#endif
534#ifdef CONFIG_KVM_MPIC 530#ifdef CONFIG_KVM_MPIC
535 case KVM_CAP_IRQ_MPIC: 531 case KVM_CAP_IRQ_MPIC:
536 r = 1; 532 r = 1;
@@ -540,6 +536,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
540#ifdef CONFIG_PPC_BOOK3S_64 536#ifdef CONFIG_PPC_BOOK3S_64
541 case KVM_CAP_SPAPR_TCE: 537 case KVM_CAP_SPAPR_TCE:
542 case KVM_CAP_SPAPR_TCE_64: 538 case KVM_CAP_SPAPR_TCE_64:
539 /* fallthrough */
540 case KVM_CAP_SPAPR_TCE_VFIO:
543 case KVM_CAP_PPC_RTAS: 541 case KVM_CAP_PPC_RTAS:
544 case KVM_CAP_PPC_FIXUP_HCALL: 542 case KVM_CAP_PPC_FIXUP_HCALL:
545 case KVM_CAP_PPC_ENABLE_HCALL: 543 case KVM_CAP_PPC_ENABLE_HCALL:
@@ -811,6 +809,129 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
811 kvm->arch.kvm_ops->irq_bypass_del_producer(cons, prod); 809 kvm->arch.kvm_ops->irq_bypass_del_producer(cons, prod);
812} 810}
813 811
812#ifdef CONFIG_VSX
813static inline int kvmppc_get_vsr_dword_offset(int index)
814{
815 int offset;
816
817 if ((index != 0) && (index != 1))
818 return -1;
819
820#ifdef __BIG_ENDIAN
821 offset = index;
822#else
823 offset = 1 - index;
824#endif
825
826 return offset;
827}
828
829static inline int kvmppc_get_vsr_word_offset(int index)
830{
831 int offset;
832
833 if ((index > 3) || (index < 0))
834 return -1;
835
836#ifdef __BIG_ENDIAN
837 offset = index;
838#else
839 offset = 3 - index;
840#endif
841 return offset;
842}
843
844static inline void kvmppc_set_vsr_dword(struct kvm_vcpu *vcpu,
845 u64 gpr)
846{
847 union kvmppc_one_reg val;
848 int offset = kvmppc_get_vsr_dword_offset(vcpu->arch.mmio_vsx_offset);
849 int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
850
851 if (offset == -1)
852 return;
853
854 if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
855 val.vval = VCPU_VSX_VR(vcpu, index);
856 val.vsxval[offset] = gpr;
857 VCPU_VSX_VR(vcpu, index) = val.vval;
858 } else {
859 VCPU_VSX_FPR(vcpu, index, offset) = gpr;
860 }
861}
862
863static inline void kvmppc_set_vsr_dword_dump(struct kvm_vcpu *vcpu,
864 u64 gpr)
865{
866 union kvmppc_one_reg val;
867 int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
868
869 if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
870 val.vval = VCPU_VSX_VR(vcpu, index);
871 val.vsxval[0] = gpr;
872 val.vsxval[1] = gpr;
873 VCPU_VSX_VR(vcpu, index) = val.vval;
874 } else {
875 VCPU_VSX_FPR(vcpu, index, 0) = gpr;
876 VCPU_VSX_FPR(vcpu, index, 1) = gpr;
877 }
878}
879
880static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu,
881 u32 gpr32)
882{
883 union kvmppc_one_reg val;
884 int offset = kvmppc_get_vsr_word_offset(vcpu->arch.mmio_vsx_offset);
885 int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
886 int dword_offset, word_offset;
887
888 if (offset == -1)
889 return;
890
891 if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
892 val.vval = VCPU_VSX_VR(vcpu, index);
893 val.vsx32val[offset] = gpr32;
894 VCPU_VSX_VR(vcpu, index) = val.vval;
895 } else {
896 dword_offset = offset / 2;
897 word_offset = offset % 2;
898 val.vsxval[0] = VCPU_VSX_FPR(vcpu, index, dword_offset);
899 val.vsx32val[word_offset] = gpr32;
900 VCPU_VSX_FPR(vcpu, index, dword_offset) = val.vsxval[0];
901 }
902}
903#endif /* CONFIG_VSX */
904
905#ifdef CONFIG_PPC_FPU
906static inline u64 sp_to_dp(u32 fprs)
907{
908 u64 fprd;
909
910 preempt_disable();
911 enable_kernel_fp();
912 asm ("lfs%U1%X1 0,%1; stfd%U0%X0 0,%0" : "=m" (fprd) : "m" (fprs)
913 : "fr0");
914 preempt_enable();
915 return fprd;
916}
917
918static inline u32 dp_to_sp(u64 fprd)
919{
920 u32 fprs;
921
922 preempt_disable();
923 enable_kernel_fp();
924 asm ("lfd%U1%X1 0,%1; stfs%U0%X0 0,%0" : "=m" (fprs) : "m" (fprd)
925 : "fr0");
926 preempt_enable();
927 return fprs;
928}
929
930#else
931#define sp_to_dp(x) (x)
932#define dp_to_sp(x) (x)
933#endif /* CONFIG_PPC_FPU */
934
814static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, 935static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
815 struct kvm_run *run) 936 struct kvm_run *run)
816{ 937{
@@ -837,6 +958,10 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
837 } 958 }
838 } 959 }
839 960
961 /* conversion between single and double precision */
962 if ((vcpu->arch.mmio_sp64_extend) && (run->mmio.len == 4))
963 gpr = sp_to_dp(gpr);
964
840 if (vcpu->arch.mmio_sign_extend) { 965 if (vcpu->arch.mmio_sign_extend) {
841 switch (run->mmio.len) { 966 switch (run->mmio.len) {
842#ifdef CONFIG_PPC64 967#ifdef CONFIG_PPC64
@@ -853,8 +978,6 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
853 } 978 }
854 } 979 }
855 980
856 kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
857
858 switch (vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) { 981 switch (vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) {
859 case KVM_MMIO_REG_GPR: 982 case KVM_MMIO_REG_GPR:
860 kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); 983 kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
@@ -871,6 +994,17 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
871 vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; 994 vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
872 break; 995 break;
873#endif 996#endif
997#ifdef CONFIG_VSX
998 case KVM_MMIO_REG_VSX:
999 if (vcpu->arch.mmio_vsx_copy_type == KVMPPC_VSX_COPY_DWORD)
1000 kvmppc_set_vsr_dword(vcpu, gpr);
1001 else if (vcpu->arch.mmio_vsx_copy_type == KVMPPC_VSX_COPY_WORD)
1002 kvmppc_set_vsr_word(vcpu, gpr);
1003 else if (vcpu->arch.mmio_vsx_copy_type ==
1004 KVMPPC_VSX_COPY_DWORD_LOAD_DUMP)
1005 kvmppc_set_vsr_dword_dump(vcpu, gpr);
1006 break;
1007#endif
874 default: 1008 default:
875 BUG(); 1009 BUG();
876 } 1010 }
@@ -937,6 +1071,35 @@ int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
937 return __kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian, 1); 1071 return __kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian, 1);
938} 1072}
939 1073
1074#ifdef CONFIG_VSX
1075int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
1076 unsigned int rt, unsigned int bytes,
1077 int is_default_endian, int mmio_sign_extend)
1078{
1079 enum emulation_result emulated = EMULATE_DONE;
1080
1081 /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */
1082 if ( (vcpu->arch.mmio_vsx_copy_nums > 4) ||
1083 (vcpu->arch.mmio_vsx_copy_nums < 0) ) {
1084 return EMULATE_FAIL;
1085 }
1086
1087 while (vcpu->arch.mmio_vsx_copy_nums) {
1088 emulated = __kvmppc_handle_load(run, vcpu, rt, bytes,
1089 is_default_endian, mmio_sign_extend);
1090
1091 if (emulated != EMULATE_DONE)
1092 break;
1093
1094 vcpu->arch.paddr_accessed += run->mmio.len;
1095
1096 vcpu->arch.mmio_vsx_copy_nums--;
1097 vcpu->arch.mmio_vsx_offset++;
1098 }
1099 return emulated;
1100}
1101#endif /* CONFIG_VSX */
1102
940int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, 1103int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
941 u64 val, unsigned int bytes, int is_default_endian) 1104 u64 val, unsigned int bytes, int is_default_endian)
942{ 1105{
@@ -962,6 +1125,9 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
962 vcpu->mmio_needed = 1; 1125 vcpu->mmio_needed = 1;
963 vcpu->mmio_is_write = 1; 1126 vcpu->mmio_is_write = 1;
964 1127
1128 if ((vcpu->arch.mmio_sp64_extend) && (bytes == 4))
1129 val = dp_to_sp(val);
1130
965 /* Store the value at the lowest bytes in 'data'. */ 1131 /* Store the value at the lowest bytes in 'data'. */
966 if (!host_swabbed) { 1132 if (!host_swabbed) {
967 switch (bytes) { 1133 switch (bytes) {
@@ -995,6 +1161,129 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
995} 1161}
996EXPORT_SYMBOL_GPL(kvmppc_handle_store); 1162EXPORT_SYMBOL_GPL(kvmppc_handle_store);
997 1163
1164#ifdef CONFIG_VSX
1165static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
1166{
1167 u32 dword_offset, word_offset;
1168 union kvmppc_one_reg reg;
1169 int vsx_offset = 0;
1170 int copy_type = vcpu->arch.mmio_vsx_copy_type;
1171 int result = 0;
1172
1173 switch (copy_type) {
1174 case KVMPPC_VSX_COPY_DWORD:
1175 vsx_offset =
1176 kvmppc_get_vsr_dword_offset(vcpu->arch.mmio_vsx_offset);
1177
1178 if (vsx_offset == -1) {
1179 result = -1;
1180 break;
1181 }
1182
1183 if (!vcpu->arch.mmio_vsx_tx_sx_enabled) {
1184 *val = VCPU_VSX_FPR(vcpu, rs, vsx_offset);
1185 } else {
1186 reg.vval = VCPU_VSX_VR(vcpu, rs);
1187 *val = reg.vsxval[vsx_offset];
1188 }
1189 break;
1190
1191 case KVMPPC_VSX_COPY_WORD:
1192 vsx_offset =
1193 kvmppc_get_vsr_word_offset(vcpu->arch.mmio_vsx_offset);
1194
1195 if (vsx_offset == -1) {
1196 result = -1;
1197 break;
1198 }
1199
1200 if (!vcpu->arch.mmio_vsx_tx_sx_enabled) {
1201 dword_offset = vsx_offset / 2;
1202 word_offset = vsx_offset % 2;
1203 reg.vsxval[0] = VCPU_VSX_FPR(vcpu, rs, dword_offset);
1204 *val = reg.vsx32val[word_offset];
1205 } else {
1206 reg.vval = VCPU_VSX_VR(vcpu, rs);
1207 *val = reg.vsx32val[vsx_offset];
1208 }
1209 break;
1210
1211 default:
1212 result = -1;
1213 break;
1214 }
1215
1216 return result;
1217}
1218
1219int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
1220 int rs, unsigned int bytes, int is_default_endian)
1221{
1222 u64 val;
1223 enum emulation_result emulated = EMULATE_DONE;
1224
1225 vcpu->arch.io_gpr = rs;
1226
1227 /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */
1228 if ( (vcpu->arch.mmio_vsx_copy_nums > 4) ||
1229 (vcpu->arch.mmio_vsx_copy_nums < 0) ) {
1230 return EMULATE_FAIL;
1231 }
1232
1233 while (vcpu->arch.mmio_vsx_copy_nums) {
1234 if (kvmppc_get_vsr_data(vcpu, rs, &val) == -1)
1235 return EMULATE_FAIL;
1236
1237 emulated = kvmppc_handle_store(run, vcpu,
1238 val, bytes, is_default_endian);
1239
1240 if (emulated != EMULATE_DONE)
1241 break;
1242
1243 vcpu->arch.paddr_accessed += run->mmio.len;
1244
1245 vcpu->arch.mmio_vsx_copy_nums--;
1246 vcpu->arch.mmio_vsx_offset++;
1247 }
1248
1249 return emulated;
1250}
1251
1252static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu,
1253 struct kvm_run *run)
1254{
1255 enum emulation_result emulated = EMULATE_FAIL;
1256 int r;
1257
1258 vcpu->arch.paddr_accessed += run->mmio.len;
1259
1260 if (!vcpu->mmio_is_write) {
1261 emulated = kvmppc_handle_vsx_load(run, vcpu, vcpu->arch.io_gpr,
1262 run->mmio.len, 1, vcpu->arch.mmio_sign_extend);
1263 } else {
1264 emulated = kvmppc_handle_vsx_store(run, vcpu,
1265 vcpu->arch.io_gpr, run->mmio.len, 1);
1266 }
1267
1268 switch (emulated) {
1269 case EMULATE_DO_MMIO:
1270 run->exit_reason = KVM_EXIT_MMIO;
1271 r = RESUME_HOST;
1272 break;
1273 case EMULATE_FAIL:
1274 pr_info("KVM: MMIO emulation failed (VSX repeat)\n");
1275 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1276 run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
1277 r = RESUME_HOST;
1278 break;
1279 default:
1280 r = RESUME_GUEST;
1281 break;
1282 }
1283 return r;
1284}
1285#endif /* CONFIG_VSX */
1286
998int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) 1287int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
999{ 1288{
1000 int r = 0; 1289 int r = 0;
@@ -1097,13 +1386,24 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
1097 int r; 1386 int r;
1098 sigset_t sigsaved; 1387 sigset_t sigsaved;
1099 1388
1100 if (vcpu->sigset_active)
1101 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
1102
1103 if (vcpu->mmio_needed) { 1389 if (vcpu->mmio_needed) {
1390 vcpu->mmio_needed = 0;
1104 if (!vcpu->mmio_is_write) 1391 if (!vcpu->mmio_is_write)
1105 kvmppc_complete_mmio_load(vcpu, run); 1392 kvmppc_complete_mmio_load(vcpu, run);
1106 vcpu->mmio_needed = 0; 1393#ifdef CONFIG_VSX
1394 if (vcpu->arch.mmio_vsx_copy_nums > 0) {
1395 vcpu->arch.mmio_vsx_copy_nums--;
1396 vcpu->arch.mmio_vsx_offset++;
1397 }
1398
1399 if (vcpu->arch.mmio_vsx_copy_nums > 0) {
1400 r = kvmppc_emulate_mmio_vsx_loadstore(vcpu, run);
1401 if (r == RESUME_HOST) {
1402 vcpu->mmio_needed = 1;
1403 return r;
1404 }
1405 }
1406#endif
1107 } else if (vcpu->arch.osi_needed) { 1407 } else if (vcpu->arch.osi_needed) {
1108 u64 *gprs = run->osi.gprs; 1408 u64 *gprs = run->osi.gprs;
1109 int i; 1409 int i;
@@ -1125,6 +1425,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
1125#endif 1425#endif
1126 } 1426 }
1127 1427
1428 if (vcpu->sigset_active)
1429 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
1430
1128 if (run->immediate_exit) 1431 if (run->immediate_exit)
1129 r = -EINTR; 1432 r = -EINTR;
1130 else 1433 else
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index 497130c5c742..fc67bd766eaf 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -314,6 +314,25 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
314} 314}
315EXPORT_SYMBOL_GPL(mm_iommu_lookup); 315EXPORT_SYMBOL_GPL(mm_iommu_lookup);
316 316
317struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
318 unsigned long ua, unsigned long size)
319{
320 struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
321
322 list_for_each_entry_lockless(mem, &mm->context.iommu_group_mem_list,
323 next) {
324 if ((mem->ua <= ua) &&
325 (ua + size <= mem->ua +
326 (mem->entries << PAGE_SHIFT))) {
327 ret = mem;
328 break;
329 }
330 }
331
332 return ret;
333}
334EXPORT_SYMBOL_GPL(mm_iommu_lookup_rm);
335
317struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, 336struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
318 unsigned long ua, unsigned long entries) 337 unsigned long ua, unsigned long entries)
319{ 338{
@@ -345,6 +364,26 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
345} 364}
346EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa); 365EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa);
347 366
367long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
368 unsigned long ua, unsigned long *hpa)
369{
370 const long entry = (ua - mem->ua) >> PAGE_SHIFT;
371 void *va = &mem->hpas[entry];
372 unsigned long *pa;
373
374 if (entry >= mem->entries)
375 return -EFAULT;
376
377 pa = (void *) vmalloc_to_phys(va);
378 if (!pa)
379 return -EFAULT;
380
381 *hpa = *pa | (ua & ~PAGE_MASK);
382
383 return 0;
384}
385EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa_rm);
386
348long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem) 387long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
349{ 388{
350 if (atomic64_inc_not_zero(&mem->mapped)) 389 if (atomic64_inc_not_zero(&mem->mapped))
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index e36738291c32..ee4cdb5b893f 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1424,8 +1424,7 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe
1424 iommu_group_put(pe->table_group.group); 1424 iommu_group_put(pe->table_group.group);
1425 BUG_ON(pe->table_group.group); 1425 BUG_ON(pe->table_group.group);
1426 } 1426 }
1427 pnv_pci_ioda2_table_free_pages(tbl); 1427 iommu_tce_table_put(tbl);
1428 iommu_free_table(tbl, of_node_full_name(dev->dev.of_node));
1429} 1428}
1430 1429
1431static void pnv_ioda_release_vf_PE(struct pci_dev *pdev) 1430static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
@@ -1860,6 +1859,17 @@ static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index,
1860 1859
1861 return ret; 1860 return ret;
1862} 1861}
1862
1863static int pnv_ioda1_tce_xchg_rm(struct iommu_table *tbl, long index,
1864 unsigned long *hpa, enum dma_data_direction *direction)
1865{
1866 long ret = pnv_tce_xchg(tbl, index, hpa, direction);
1867
1868 if (!ret)
1869 pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, true);
1870
1871 return ret;
1872}
1863#endif 1873#endif
1864 1874
1865static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index, 1875static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index,
@@ -1874,6 +1884,7 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = {
1874 .set = pnv_ioda1_tce_build, 1884 .set = pnv_ioda1_tce_build,
1875#ifdef CONFIG_IOMMU_API 1885#ifdef CONFIG_IOMMU_API
1876 .exchange = pnv_ioda1_tce_xchg, 1886 .exchange = pnv_ioda1_tce_xchg,
1887 .exchange_rm = pnv_ioda1_tce_xchg_rm,
1877#endif 1888#endif
1878 .clear = pnv_ioda1_tce_free, 1889 .clear = pnv_ioda1_tce_free,
1879 .get = pnv_tce_get, 1890 .get = pnv_tce_get,
@@ -1948,7 +1959,7 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
1948{ 1959{
1949 struct iommu_table_group_link *tgl; 1960 struct iommu_table_group_link *tgl;
1950 1961
1951 list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { 1962 list_for_each_entry_lockless(tgl, &tbl->it_group_list, next) {
1952 struct pnv_ioda_pe *pe = container_of(tgl->table_group, 1963 struct pnv_ioda_pe *pe = container_of(tgl->table_group,
1953 struct pnv_ioda_pe, table_group); 1964 struct pnv_ioda_pe, table_group);
1954 struct pnv_phb *phb = pe->phb; 1965 struct pnv_phb *phb = pe->phb;
@@ -2004,6 +2015,17 @@ static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index,
2004 2015
2005 return ret; 2016 return ret;
2006} 2017}
2018
2019static int pnv_ioda2_tce_xchg_rm(struct iommu_table *tbl, long index,
2020 unsigned long *hpa, enum dma_data_direction *direction)
2021{
2022 long ret = pnv_tce_xchg(tbl, index, hpa, direction);
2023
2024 if (!ret)
2025 pnv_pci_ioda2_tce_invalidate(tbl, index, 1, true);
2026
2027 return ret;
2028}
2007#endif 2029#endif
2008 2030
2009static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index, 2031static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
@@ -2017,13 +2039,13 @@ static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
2017static void pnv_ioda2_table_free(struct iommu_table *tbl) 2039static void pnv_ioda2_table_free(struct iommu_table *tbl)
2018{ 2040{
2019 pnv_pci_ioda2_table_free_pages(tbl); 2041 pnv_pci_ioda2_table_free_pages(tbl);
2020 iommu_free_table(tbl, "pnv");
2021} 2042}
2022 2043
2023static struct iommu_table_ops pnv_ioda2_iommu_ops = { 2044static struct iommu_table_ops pnv_ioda2_iommu_ops = {
2024 .set = pnv_ioda2_tce_build, 2045 .set = pnv_ioda2_tce_build,
2025#ifdef CONFIG_IOMMU_API 2046#ifdef CONFIG_IOMMU_API
2026 .exchange = pnv_ioda2_tce_xchg, 2047 .exchange = pnv_ioda2_tce_xchg,
2048 .exchange_rm = pnv_ioda2_tce_xchg_rm,
2027#endif 2049#endif
2028 .clear = pnv_ioda2_tce_free, 2050 .clear = pnv_ioda2_tce_free,
2029 .get = pnv_tce_get, 2051 .get = pnv_tce_get,
@@ -2203,7 +2225,7 @@ found:
2203 __free_pages(tce_mem, get_order(tce32_segsz * segs)); 2225 __free_pages(tce_mem, get_order(tce32_segsz * segs));
2204 if (tbl) { 2226 if (tbl) {
2205 pnv_pci_unlink_table_and_group(tbl, &pe->table_group); 2227 pnv_pci_unlink_table_and_group(tbl, &pe->table_group);
2206 iommu_free_table(tbl, "pnv"); 2228 iommu_tce_table_put(tbl);
2207 } 2229 }
2208} 2230}
2209 2231
@@ -2293,16 +2315,16 @@ static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group,
2293 if (!tbl) 2315 if (!tbl)
2294 return -ENOMEM; 2316 return -ENOMEM;
2295 2317
2318 tbl->it_ops = &pnv_ioda2_iommu_ops;
2319
2296 ret = pnv_pci_ioda2_table_alloc_pages(nid, 2320 ret = pnv_pci_ioda2_table_alloc_pages(nid,
2297 bus_offset, page_shift, window_size, 2321 bus_offset, page_shift, window_size,
2298 levels, tbl); 2322 levels, tbl);
2299 if (ret) { 2323 if (ret) {
2300 iommu_free_table(tbl, "pnv"); 2324 iommu_tce_table_put(tbl);
2301 return ret; 2325 return ret;
2302 } 2326 }
2303 2327
2304 tbl->it_ops = &pnv_ioda2_iommu_ops;
2305
2306 *ptbl = tbl; 2328 *ptbl = tbl;
2307 2329
2308 return 0; 2330 return 0;
@@ -2343,7 +2365,7 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
2343 if (rc) { 2365 if (rc) {
2344 pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n", 2366 pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n",
2345 rc); 2367 rc);
2346 pnv_ioda2_table_free(tbl); 2368 iommu_tce_table_put(tbl);
2347 return rc; 2369 return rc;
2348 } 2370 }
2349 2371
@@ -2431,7 +2453,7 @@ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
2431 pnv_pci_ioda2_unset_window(&pe->table_group, 0); 2453 pnv_pci_ioda2_unset_window(&pe->table_group, 0);
2432 if (pe->pbus) 2454 if (pe->pbus)
2433 pnv_ioda_setup_bus_dma(pe, pe->pbus, false); 2455 pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
2434 pnv_ioda2_table_free(tbl); 2456 iommu_tce_table_put(tbl);
2435} 2457}
2436 2458
2437static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) 2459static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
@@ -3406,7 +3428,7 @@ static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe)
3406 } 3428 }
3407 3429
3408 free_pages(tbl->it_base, get_order(tbl->it_size << 3)); 3430 free_pages(tbl->it_base, get_order(tbl->it_size << 3));
3409 iommu_free_table(tbl, "pnv"); 3431 iommu_tce_table_put(tbl);
3410} 3432}
3411 3433
3412static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe) 3434static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
@@ -3433,7 +3455,7 @@ static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
3433 } 3455 }
3434 3456
3435 pnv_pci_ioda2_table_free_pages(tbl); 3457 pnv_pci_ioda2_table_free_pages(tbl);
3436 iommu_free_table(tbl, "pnv"); 3458 iommu_tce_table_put(tbl);
3437} 3459}
3438 3460
3439static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe, 3461static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe,
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index eb835e977e33..204a829ff506 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -767,6 +767,7 @@ struct iommu_table *pnv_pci_table_alloc(int nid)
767 767
768 tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, nid); 768 tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, nid);
769 INIT_LIST_HEAD_RCU(&tbl->it_group_list); 769 INIT_LIST_HEAD_RCU(&tbl->it_group_list);
770 kref_init(&tbl->it_kref);
770 771
771 return tbl; 772 return tbl;
772} 773}
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 4d757eaa46bf..7ce5db209abf 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -74,6 +74,7 @@ static struct iommu_table_group *iommu_pseries_alloc_group(int node)
74 goto fail_exit; 74 goto fail_exit;
75 75
76 INIT_LIST_HEAD_RCU(&tbl->it_group_list); 76 INIT_LIST_HEAD_RCU(&tbl->it_group_list);
77 kref_init(&tbl->it_kref);
77 tgl->table_group = table_group; 78 tgl->table_group = table_group;
78 list_add_rcu(&tgl->next, &tbl->it_group_list); 79 list_add_rcu(&tgl->next, &tbl->it_group_list);
79 80
@@ -115,7 +116,7 @@ static void iommu_pseries_free_group(struct iommu_table_group *table_group,
115 BUG_ON(table_group->group); 116 BUG_ON(table_group->group);
116 } 117 }
117#endif 118#endif
118 iommu_free_table(tbl, node_name); 119 iommu_tce_table_put(tbl);
119 120
120 kfree(table_group); 121 kfree(table_group);
121} 122}
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index 720493932486..28b09fd797ec 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -1318,7 +1318,7 @@ static void vio_dev_release(struct device *dev)
1318 struct iommu_table *tbl = get_iommu_table_base(dev); 1318 struct iommu_table *tbl = get_iommu_table_base(dev);
1319 1319
1320 if (tbl) 1320 if (tbl)
1321 iommu_free_table(tbl, of_node_full_name(dev->of_node)); 1321 iommu_tce_table_put(tbl);
1322 of_node_put(dev->of_node); 1322 of_node_put(dev->of_node);
1323 kfree(to_vio_dev(dev)); 1323 kfree(to_vio_dev(dev));
1324} 1324}
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 1d48880b3cc1..e8f623041769 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -105,6 +105,7 @@
105#define HWCAP_S390_VXRS 2048 105#define HWCAP_S390_VXRS 2048
106#define HWCAP_S390_VXRS_BCD 4096 106#define HWCAP_S390_VXRS_BCD 4096
107#define HWCAP_S390_VXRS_EXT 8192 107#define HWCAP_S390_VXRS_EXT 8192
108#define HWCAP_S390_GS 16384
108 109
109/* Internal bits, not exposed via elf */ 110/* Internal bits, not exposed via elf */
110#define HWCAP_INT_SIE 1UL 111#define HWCAP_INT_SIE 1UL
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index a41faf34b034..426614a882a9 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -25,6 +25,7 @@
25#include <asm/cpu.h> 25#include <asm/cpu.h>
26#include <asm/fpu/api.h> 26#include <asm/fpu/api.h>
27#include <asm/isc.h> 27#include <asm/isc.h>
28#include <asm/guarded_storage.h>
28 29
29#define KVM_S390_BSCA_CPU_SLOTS 64 30#define KVM_S390_BSCA_CPU_SLOTS 64
30#define KVM_S390_ESCA_CPU_SLOTS 248 31#define KVM_S390_ESCA_CPU_SLOTS 248
@@ -121,6 +122,7 @@ struct esca_block {
121#define CPUSTAT_SLSR 0x00002000 122#define CPUSTAT_SLSR 0x00002000
122#define CPUSTAT_ZARCH 0x00000800 123#define CPUSTAT_ZARCH 0x00000800
123#define CPUSTAT_MCDS 0x00000100 124#define CPUSTAT_MCDS 0x00000100
125#define CPUSTAT_KSS 0x00000200
124#define CPUSTAT_SM 0x00000080 126#define CPUSTAT_SM 0x00000080
125#define CPUSTAT_IBS 0x00000040 127#define CPUSTAT_IBS 0x00000040
126#define CPUSTAT_GED2 0x00000010 128#define CPUSTAT_GED2 0x00000010
@@ -164,16 +166,27 @@ struct kvm_s390_sie_block {
164#define ICTL_RRBE 0x00001000 166#define ICTL_RRBE 0x00001000
165#define ICTL_TPROT 0x00000200 167#define ICTL_TPROT 0x00000200
166 __u32 ictl; /* 0x0048 */ 168 __u32 ictl; /* 0x0048 */
169#define ECA_CEI 0x80000000
170#define ECA_IB 0x40000000
171#define ECA_SIGPI 0x10000000
172#define ECA_MVPGI 0x01000000
173#define ECA_VX 0x00020000
174#define ECA_PROTEXCI 0x00002000
175#define ECA_SII 0x00000001
167 __u32 eca; /* 0x004c */ 176 __u32 eca; /* 0x004c */
168#define ICPT_INST 0x04 177#define ICPT_INST 0x04
169#define ICPT_PROGI 0x08 178#define ICPT_PROGI 0x08
170#define ICPT_INSTPROGI 0x0C 179#define ICPT_INSTPROGI 0x0C
180#define ICPT_EXTREQ 0x10
171#define ICPT_EXTINT 0x14 181#define ICPT_EXTINT 0x14
182#define ICPT_IOREQ 0x18
183#define ICPT_WAIT 0x1c
172#define ICPT_VALIDITY 0x20 184#define ICPT_VALIDITY 0x20
173#define ICPT_STOP 0x28 185#define ICPT_STOP 0x28
174#define ICPT_OPEREXC 0x2C 186#define ICPT_OPEREXC 0x2C
175#define ICPT_PARTEXEC 0x38 187#define ICPT_PARTEXEC 0x38
176#define ICPT_IOINST 0x40 188#define ICPT_IOINST 0x40
189#define ICPT_KSS 0x5c
177 __u8 icptcode; /* 0x0050 */ 190 __u8 icptcode; /* 0x0050 */
178 __u8 icptstatus; /* 0x0051 */ 191 __u8 icptstatus; /* 0x0051 */
179 __u16 ihcpu; /* 0x0052 */ 192 __u16 ihcpu; /* 0x0052 */
@@ -182,10 +195,19 @@ struct kvm_s390_sie_block {
182 __u32 ipb; /* 0x0058 */ 195 __u32 ipb; /* 0x0058 */
183 __u32 scaoh; /* 0x005c */ 196 __u32 scaoh; /* 0x005c */
184 __u8 reserved60; /* 0x0060 */ 197 __u8 reserved60; /* 0x0060 */
198#define ECB_GS 0x40
199#define ECB_TE 0x10
200#define ECB_SRSI 0x04
201#define ECB_HOSTPROTINT 0x02
185 __u8 ecb; /* 0x0061 */ 202 __u8 ecb; /* 0x0061 */
203#define ECB2_CMMA 0x80
204#define ECB2_IEP 0x20
205#define ECB2_PFMFI 0x08
206#define ECB2_ESCA 0x04
186 __u8 ecb2; /* 0x0062 */ 207 __u8 ecb2; /* 0x0062 */
187#define ECB3_AES 0x04
188#define ECB3_DEA 0x08 208#define ECB3_DEA 0x08
209#define ECB3_AES 0x04
210#define ECB3_RI 0x01
189 __u8 ecb3; /* 0x0063 */ 211 __u8 ecb3; /* 0x0063 */
190 __u32 scaol; /* 0x0064 */ 212 __u32 scaol; /* 0x0064 */
191 __u8 reserved68[4]; /* 0x0068 */ 213 __u8 reserved68[4]; /* 0x0068 */
@@ -219,11 +241,14 @@ struct kvm_s390_sie_block {
219 __u32 crycbd; /* 0x00fc */ 241 __u32 crycbd; /* 0x00fc */
220 __u64 gcr[16]; /* 0x0100 */ 242 __u64 gcr[16]; /* 0x0100 */
221 __u64 gbea; /* 0x0180 */ 243 __u64 gbea; /* 0x0180 */
222 __u8 reserved188[24]; /* 0x0188 */ 244 __u8 reserved188[8]; /* 0x0188 */
245 __u64 sdnxo; /* 0x0190 */
246 __u8 reserved198[8]; /* 0x0198 */
223 __u32 fac; /* 0x01a0 */ 247 __u32 fac; /* 0x01a0 */
224 __u8 reserved1a4[20]; /* 0x01a4 */ 248 __u8 reserved1a4[20]; /* 0x01a4 */
225 __u64 cbrlo; /* 0x01b8 */ 249 __u64 cbrlo; /* 0x01b8 */
226 __u8 reserved1c0[8]; /* 0x01c0 */ 250 __u8 reserved1c0[8]; /* 0x01c0 */
251#define ECD_HOSTREGMGMT 0x20000000
227 __u32 ecd; /* 0x01c8 */ 252 __u32 ecd; /* 0x01c8 */
228 __u8 reserved1cc[18]; /* 0x01cc */ 253 __u8 reserved1cc[18]; /* 0x01cc */
229 __u64 pp; /* 0x01de */ 254 __u64 pp; /* 0x01de */
@@ -498,6 +523,12 @@ struct kvm_s390_local_interrupt {
498#define FIRQ_CNTR_PFAULT 3 523#define FIRQ_CNTR_PFAULT 3
499#define FIRQ_MAX_COUNT 4 524#define FIRQ_MAX_COUNT 4
500 525
526/* mask the AIS mode for a given ISC */
527#define AIS_MODE_MASK(isc) (0x80 >> isc)
528
529#define KVM_S390_AIS_MODE_ALL 0
530#define KVM_S390_AIS_MODE_SINGLE 1
531
501struct kvm_s390_float_interrupt { 532struct kvm_s390_float_interrupt {
502 unsigned long pending_irqs; 533 unsigned long pending_irqs;
503 spinlock_t lock; 534 spinlock_t lock;
@@ -507,6 +538,10 @@ struct kvm_s390_float_interrupt {
507 struct kvm_s390_ext_info srv_signal; 538 struct kvm_s390_ext_info srv_signal;
508 int next_rr_cpu; 539 int next_rr_cpu;
509 unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)]; 540 unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
541 struct mutex ais_lock;
542 u8 simm;
543 u8 nimm;
544 int ais_enabled;
510}; 545};
511 546
512struct kvm_hw_wp_info_arch { 547struct kvm_hw_wp_info_arch {
@@ -554,6 +589,7 @@ struct kvm_vcpu_arch {
554 /* if vsie is active, currently executed shadow sie control block */ 589 /* if vsie is active, currently executed shadow sie control block */
555 struct kvm_s390_sie_block *vsie_block; 590 struct kvm_s390_sie_block *vsie_block;
556 unsigned int host_acrs[NUM_ACRS]; 591 unsigned int host_acrs[NUM_ACRS];
592 struct gs_cb *host_gscb;
557 struct fpu host_fpregs; 593 struct fpu host_fpregs;
558 struct kvm_s390_local_interrupt local_int; 594 struct kvm_s390_local_interrupt local_int;
559 struct hrtimer ckc_timer; 595 struct hrtimer ckc_timer;
@@ -574,6 +610,7 @@ struct kvm_vcpu_arch {
574 */ 610 */
575 seqcount_t cputm_seqcount; 611 seqcount_t cputm_seqcount;
576 __u64 cputm_start; 612 __u64 cputm_start;
613 bool gs_enabled;
577}; 614};
578 615
579struct kvm_vm_stat { 616struct kvm_vm_stat {
@@ -596,6 +633,7 @@ struct s390_io_adapter {
596 bool maskable; 633 bool maskable;
597 bool masked; 634 bool masked;
598 bool swap; 635 bool swap;
636 bool suppressible;
599 struct rw_semaphore maps_lock; 637 struct rw_semaphore maps_lock;
600 struct list_head maps; 638 struct list_head maps;
601 atomic_t nr_maps; 639 atomic_t nr_maps;
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 61261e0e95c0..8a5b082797f8 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -157,8 +157,8 @@ struct lowcore {
157 __u64 stfle_fac_list[32]; /* 0x0f00 */ 157 __u64 stfle_fac_list[32]; /* 0x0f00 */
158 __u8 pad_0x1000[0x11b0-0x1000]; /* 0x1000 */ 158 __u8 pad_0x1000[0x11b0-0x1000]; /* 0x1000 */
159 159
160 /* Pointer to vector register save area */ 160 /* Pointer to the machine check extended save area */
161 __u64 vector_save_area_addr; /* 0x11b0 */ 161 __u64 mcesad; /* 0x11b0 */
162 162
163 /* 64 bit extparam used for pfault/diag 250: defined by architecture */ 163 /* 64 bit extparam used for pfault/diag 250: defined by architecture */
164 __u64 ext_params2; /* 0x11B8 */ 164 __u64 ext_params2; /* 0x11B8 */
@@ -182,10 +182,7 @@ struct lowcore {
182 182
183 /* Transaction abort diagnostic block */ 183 /* Transaction abort diagnostic block */
184 __u8 pgm_tdb[256]; /* 0x1800 */ 184 __u8 pgm_tdb[256]; /* 0x1800 */
185 __u8 pad_0x1900[0x1c00-0x1900]; /* 0x1900 */ 185 __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */
186
187 /* Software defined save area for vector registers */
188 __u8 vector_save_area[1024]; /* 0x1c00 */
189} __packed; 186} __packed;
190 187
191#define S390_lowcore (*((struct lowcore *) 0)) 188#define S390_lowcore (*((struct lowcore *) 0))
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index b75fd910386a..e3e8895f5d3e 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -58,7 +58,9 @@ union mci {
58 u64 ie : 1; /* 32 indirect storage error */ 58 u64 ie : 1; /* 32 indirect storage error */
59 u64 ar : 1; /* 33 access register validity */ 59 u64 ar : 1; /* 33 access register validity */
60 u64 da : 1; /* 34 delayed access exception */ 60 u64 da : 1; /* 34 delayed access exception */
61 u64 : 7; /* 35-41 */ 61 u64 : 1; /* 35 */
62 u64 gs : 1; /* 36 guarded storage registers */
63 u64 : 5; /* 37-41 */
62 u64 pr : 1; /* 42 tod programmable register validity */ 64 u64 pr : 1; /* 42 tod programmable register validity */
63 u64 fc : 1; /* 43 fp control register validity */ 65 u64 fc : 1; /* 43 fp control register validity */
64 u64 ap : 1; /* 44 ancillary report */ 66 u64 ap : 1; /* 44 ancillary report */
@@ -69,6 +71,14 @@ union mci {
69 }; 71 };
70}; 72};
71 73
74#define MCESA_ORIGIN_MASK (~0x3ffUL)
75#define MCESA_LC_MASK (0xfUL)
76
77struct mcesa {
78 u8 vector_save_area[1024];
79 u8 guarded_storage_save_area[32];
80};
81
72struct pt_regs; 82struct pt_regs;
73 83
74extern void s390_handle_mcck(void); 84extern void s390_handle_mcck(void);
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index e4988710aa86..cc101f9371cb 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -135,6 +135,8 @@ struct thread_struct {
135 struct list_head list; 135 struct list_head list;
136 /* cpu runtime instrumentation */ 136 /* cpu runtime instrumentation */
137 struct runtime_instr_cb *ri_cb; 137 struct runtime_instr_cb *ri_cb;
138 struct gs_cb *gs_cb; /* Current guarded storage cb */
139 struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */
138 unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ 140 unsigned char trap_tdb[256]; /* Transaction abort diagnose block */
139 /* 141 /*
140 * Warning: 'fpu' is dynamically-sized. It *MUST* be at 142 * Warning: 'fpu' is dynamically-sized. It *MUST* be at
@@ -215,6 +217,9 @@ void show_cacheinfo(struct seq_file *m);
215/* Free all resources held by a thread. */ 217/* Free all resources held by a thread. */
216extern void release_thread(struct task_struct *); 218extern void release_thread(struct task_struct *);
217 219
220/* Free guarded storage control block for current */
221void exit_thread_gs(void);
222
218/* 223/*
219 * Return saved PC of a blocked thread. 224 * Return saved PC of a blocked thread.
220 */ 225 */
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index ace3bd315438..6f5167bc1928 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -75,6 +75,7 @@ struct sclp_info {
75 unsigned char has_pfmfi : 1; 75 unsigned char has_pfmfi : 1;
76 unsigned char has_ibs : 1; 76 unsigned char has_ibs : 1;
77 unsigned char has_skey : 1; 77 unsigned char has_skey : 1;
78 unsigned char has_kss : 1;
78 unsigned int ibc; 79 unsigned int ibc;
79 unsigned int mtid; 80 unsigned int mtid;
80 unsigned int mtid_cp; 81 unsigned int mtid_cp;
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 30bdb5a027f3..383bd8358a8c 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -31,6 +31,7 @@
31#define MACHINE_FLAG_VX _BITUL(13) 31#define MACHINE_FLAG_VX _BITUL(13)
32#define MACHINE_FLAG_CAD _BITUL(14) 32#define MACHINE_FLAG_CAD _BITUL(14)
33#define MACHINE_FLAG_NX _BITUL(15) 33#define MACHINE_FLAG_NX _BITUL(15)
34#define MACHINE_FLAG_GS _BITUL(16)
34 35
35#define LPP_MAGIC _BITUL(31) 36#define LPP_MAGIC _BITUL(31)
36#define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL) 37#define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL)
@@ -70,6 +71,7 @@ extern void detect_memory_memblock(void);
70#define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX) 71#define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX)
71#define MACHINE_HAS_CAD (S390_lowcore.machine_flags & MACHINE_FLAG_CAD) 72#define MACHINE_HAS_CAD (S390_lowcore.machine_flags & MACHINE_FLAG_CAD)
72#define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX) 73#define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX)
74#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS)
73 75
74/* 76/*
75 * Console mode. Override with conmode= 77 * Console mode. Override with conmode=
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index 12d45f0cfdd9..f6c2b5814ab0 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -10,6 +10,7 @@
10#include <linux/thread_info.h> 10#include <linux/thread_info.h>
11#include <asm/fpu/api.h> 11#include <asm/fpu/api.h>
12#include <asm/ptrace.h> 12#include <asm/ptrace.h>
13#include <asm/guarded_storage.h>
13 14
14extern struct task_struct *__switch_to(void *, void *); 15extern struct task_struct *__switch_to(void *, void *);
15extern void update_cr_regs(struct task_struct *task); 16extern void update_cr_regs(struct task_struct *task);
@@ -33,12 +34,14 @@ static inline void restore_access_regs(unsigned int *acrs)
33 save_fpu_regs(); \ 34 save_fpu_regs(); \
34 save_access_regs(&prev->thread.acrs[0]); \ 35 save_access_regs(&prev->thread.acrs[0]); \
35 save_ri_cb(prev->thread.ri_cb); \ 36 save_ri_cb(prev->thread.ri_cb); \
37 save_gs_cb(prev->thread.gs_cb); \
36 } \ 38 } \
37 if (next->mm) { \ 39 if (next->mm) { \
38 update_cr_regs(next); \ 40 update_cr_regs(next); \
39 set_cpu_flag(CIF_FPU); \ 41 set_cpu_flag(CIF_FPU); \
40 restore_access_regs(&next->thread.acrs[0]); \ 42 restore_access_regs(&next->thread.acrs[0]); \
41 restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ 43 restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
44 restore_gs_cb(next->thread.gs_cb); \
42 } \ 45 } \
43 prev = __switch_to(prev,next); \ 46 prev = __switch_to(prev,next); \
44} while (0) 47} while (0)
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index a5b54a445eb8..f36e6e2b73f0 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -54,11 +54,12 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
54#define TIF_NOTIFY_RESUME 0 /* callback before returning to user */ 54#define TIF_NOTIFY_RESUME 0 /* callback before returning to user */
55#define TIF_SIGPENDING 1 /* signal pending */ 55#define TIF_SIGPENDING 1 /* signal pending */
56#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ 56#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
57#define TIF_SYSCALL_TRACE 3 /* syscall trace active */ 57#define TIF_UPROBE 3 /* breakpointed or single-stepping */
58#define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */ 58#define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */
59#define TIF_SECCOMP 5 /* secure computing */ 59#define TIF_SYSCALL_TRACE 8 /* syscall trace active */
60#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ 60#define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */
61#define TIF_UPROBE 7 /* breakpointed or single-stepping */ 61#define TIF_SECCOMP 10 /* secure computing */
62#define TIF_SYSCALL_TRACEPOINT 11 /* syscall tracepoint instrumentation */
62#define TIF_31BIT 16 /* 32bit process */ 63#define TIF_31BIT 16 /* 32bit process */
63#define TIF_MEMDIE 17 /* is terminating due to OOM killer */ 64#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
64#define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */ 65#define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */
@@ -76,5 +77,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
76#define _TIF_UPROBE _BITUL(TIF_UPROBE) 77#define _TIF_UPROBE _BITUL(TIF_UPROBE)
77#define _TIF_31BIT _BITUL(TIF_31BIT) 78#define _TIF_31BIT _BITUL(TIF_31BIT)
78#define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP) 79#define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP)
80#define _TIF_GUARDED_STORAGE _BITUL(TIF_GUARDED_STORAGE)
79 81
80#endif /* _ASM_THREAD_INFO_H */ 82#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index 6848ba5c1454..86b761e583e3 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -12,6 +12,7 @@ header-y += dasd.h
12header-y += debug.h 12header-y += debug.h
13header-y += errno.h 13header-y += errno.h
14header-y += fcntl.h 14header-y += fcntl.h
15header-y += guarded_storage.h
15header-y += hypfs.h 16header-y += hypfs.h
16header-y += ioctl.h 17header-y += ioctl.h
17header-y += ioctls.h 18header-y += ioctls.h
diff --git a/arch/s390/include/uapi/asm/guarded_storage.h b/arch/s390/include/uapi/asm/guarded_storage.h
new file mode 100644
index 000000000000..852850e8e17e
--- /dev/null
+++ b/arch/s390/include/uapi/asm/guarded_storage.h
@@ -0,0 +1,77 @@
1#ifndef _GUARDED_STORAGE_H
2#define _GUARDED_STORAGE_H
3
4#include <linux/types.h>
5
6struct gs_cb {
7 __u64 reserved;
8 __u64 gsd;
9 __u64 gssm;
10 __u64 gs_epl_a;
11};
12
13struct gs_epl {
14 __u8 pad1;
15 union {
16 __u8 gs_eam;
17 struct {
18 __u8 : 6;
19 __u8 e : 1;
20 __u8 b : 1;
21 };
22 };
23 union {
24 __u8 gs_eci;
25 struct {
26 __u8 tx : 1;
27 __u8 cx : 1;
28 __u8 : 5;
29 __u8 in : 1;
30 };
31 };
32 union {
33 __u8 gs_eai;
34 struct {
35 __u8 : 1;
36 __u8 t : 1;
37 __u8 as : 2;
38 __u8 ar : 4;
39 };
40 };
41 __u32 pad2;
42 __u64 gs_eha;
43 __u64 gs_eia;
44 __u64 gs_eoa;
45 __u64 gs_eir;
46 __u64 gs_era;
47};
48
49#define GS_ENABLE 0
50#define GS_DISABLE 1
51#define GS_SET_BC_CB 2
52#define GS_CLEAR_BC_CB 3
53#define GS_BROADCAST 4
54
55static inline void load_gs_cb(struct gs_cb *gs_cb)
56{
57 asm volatile(".insn rxy,0xe3000000004d,0,%0" : : "Q" (*gs_cb));
58}
59
60static inline void store_gs_cb(struct gs_cb *gs_cb)
61{
62 asm volatile(".insn rxy,0xe30000000049,0,%0" : : "Q" (*gs_cb));
63}
64
65static inline void save_gs_cb(struct gs_cb *gs_cb)
66{
67 if (gs_cb)
68 store_gs_cb(gs_cb);
69}
70
71static inline void restore_gs_cb(struct gs_cb *gs_cb)
72{
73 if (gs_cb)
74 load_gs_cb(gs_cb);
75}
76
77#endif /* _GUARDED_STORAGE_H */
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index a2ffec4139ad..bf9267930939 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -26,6 +26,8 @@
26#define KVM_DEV_FLIC_ADAPTER_REGISTER 6 26#define KVM_DEV_FLIC_ADAPTER_REGISTER 6
27#define KVM_DEV_FLIC_ADAPTER_MODIFY 7 27#define KVM_DEV_FLIC_ADAPTER_MODIFY 7
28#define KVM_DEV_FLIC_CLEAR_IO_IRQ 8 28#define KVM_DEV_FLIC_CLEAR_IO_IRQ 8
29#define KVM_DEV_FLIC_AISM 9
30#define KVM_DEV_FLIC_AIRQ_INJECT 10
29/* 31/*
30 * We can have up to 4*64k pending subchannels + 8 adapter interrupts, 32 * We can have up to 4*64k pending subchannels + 8 adapter interrupts,
31 * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts. 33 * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
@@ -41,7 +43,14 @@ struct kvm_s390_io_adapter {
41 __u8 isc; 43 __u8 isc;
42 __u8 maskable; 44 __u8 maskable;
43 __u8 swap; 45 __u8 swap;
44 __u8 pad; 46 __u8 flags;
47};
48
49#define KVM_S390_ADAPTER_SUPPRESSIBLE 0x01
50
51struct kvm_s390_ais_req {
52 __u8 isc;
53 __u16 mode;
45}; 54};
46 55
47#define KVM_S390_IO_ADAPTER_MASK 1 56#define KVM_S390_IO_ADAPTER_MASK 1
@@ -110,6 +119,7 @@ struct kvm_s390_vm_cpu_machine {
110#define KVM_S390_VM_CPU_FEAT_CMMA 10 119#define KVM_S390_VM_CPU_FEAT_CMMA 10
111#define KVM_S390_VM_CPU_FEAT_PFMFI 11 120#define KVM_S390_VM_CPU_FEAT_PFMFI 11
112#define KVM_S390_VM_CPU_FEAT_SIGPIF 12 121#define KVM_S390_VM_CPU_FEAT_SIGPIF 12
122#define KVM_S390_VM_CPU_FEAT_KSS 13
113struct kvm_s390_vm_cpu_feat { 123struct kvm_s390_vm_cpu_feat {
114 __u64 feat[16]; 124 __u64 feat[16];
115}; 125};
@@ -197,6 +207,10 @@ struct kvm_guest_debug_arch {
197#define KVM_SYNC_VRS (1UL << 6) 207#define KVM_SYNC_VRS (1UL << 6)
198#define KVM_SYNC_RICCB (1UL << 7) 208#define KVM_SYNC_RICCB (1UL << 7)
199#define KVM_SYNC_FPRS (1UL << 8) 209#define KVM_SYNC_FPRS (1UL << 8)
210#define KVM_SYNC_GSCB (1UL << 9)
211/* length and alignment of the sdnx as a power of two */
212#define SDNXC 8
213#define SDNXL (1UL << SDNXC)
200/* definition of registers in kvm_run */ 214/* definition of registers in kvm_run */
201struct kvm_sync_regs { 215struct kvm_sync_regs {
202 __u64 prefix; /* prefix register */ 216 __u64 prefix; /* prefix register */
@@ -217,8 +231,16 @@ struct kvm_sync_regs {
217 }; 231 };
218 __u8 reserved[512]; /* for future vector expansion */ 232 __u8 reserved[512]; /* for future vector expansion */
219 __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */ 233 __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */
220 __u8 padding[52]; /* riccb needs to be 64byte aligned */ 234 __u8 padding1[52]; /* riccb needs to be 64byte aligned */
221 __u8 riccb[64]; /* runtime instrumentation controls block */ 235 __u8 riccb[64]; /* runtime instrumentation controls block */
236 __u8 padding2[192]; /* sdnx needs to be 256byte aligned */
237 union {
238 __u8 sdnx[SDNXL]; /* state description annex */
239 struct {
240 __u64 reserved1[2];
241 __u64 gscb[4];
242 };
243 };
222}; 244};
223 245
224#define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1) 246#define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 152de9b796e1..ea42290e7d51 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -313,7 +313,7 @@
313#define __NR_copy_file_range 375 313#define __NR_copy_file_range 375
314#define __NR_preadv2 376 314#define __NR_preadv2 376
315#define __NR_pwritev2 377 315#define __NR_pwritev2 377
316/* Number 378 is reserved for guarded storage */ 316#define __NR_s390_guarded_storage 378
317#define __NR_statx 379 317#define __NR_statx 379
318#define NR_syscalls 380 318#define NR_syscalls 380
319 319
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 060ce548fe8b..aa5adbdaf200 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -57,7 +57,7 @@ obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
57obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o 57obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
58obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o 58obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o
59obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o 59obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
60obj-y += runtime_instr.o cache.o fpu.o dumpstack.o 60obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o
61obj-y += entry.o reipl.o relocate_kernel.o 61obj-y += entry.o reipl.o relocate_kernel.o
62 62
63extra-y += head.o head64.o vmlinux.lds 63extra-y += head.o head64.o vmlinux.lds
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index c4b3570ded5b..6bb29633e1f1 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -175,7 +175,7 @@ int main(void)
175 /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ 175 /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
176 OFFSET(__LC_DUMP_REIPL, lowcore, ipib); 176 OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
177 /* hardware defined lowcore locations 0x1000 - 0x18ff */ 177 /* hardware defined lowcore locations 0x1000 - 0x18ff */
178 OFFSET(__LC_VX_SAVE_AREA_ADDR, lowcore, vector_save_area_addr); 178 OFFSET(__LC_MCESAD, lowcore, mcesad);
179 OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2); 179 OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2);
180 OFFSET(__LC_FPREGS_SAVE_AREA, lowcore, floating_pt_save_area); 180 OFFSET(__LC_FPREGS_SAVE_AREA, lowcore, floating_pt_save_area);
181 OFFSET(__LC_GPREGS_SAVE_AREA, lowcore, gpregs_save_area); 181 OFFSET(__LC_GPREGS_SAVE_AREA, lowcore, gpregs_save_area);
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index e89cc2e71db1..986642a3543b 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -178,4 +178,5 @@ COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
178COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len); 178COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len);
179COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags); 179COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
180COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags); 180COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
181COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb);
181COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer); 182COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer);
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 4e65c79cc5f2..95298a41076f 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -358,6 +358,8 @@ static __init void detect_machine_facilities(void)
358 S390_lowcore.machine_flags |= MACHINE_FLAG_NX; 358 S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
359 __ctl_set_bit(0, 20); 359 __ctl_set_bit(0, 20);
360 } 360 }
361 if (test_facility(133))
362 S390_lowcore.machine_flags |= MACHINE_FLAG_GS;
361} 363}
362 364
363static inline void save_vector_registers(void) 365static inline void save_vector_registers(void)
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 6a7d737d514c..fa8b8f28e08b 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -47,7 +47,7 @@ STACK_SIZE = 1 << STACK_SHIFT
47STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE 47STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
48 48
49_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ 49_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
50 _TIF_UPROBE) 50 _TIF_UPROBE | _TIF_GUARDED_STORAGE)
51_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ 51_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
52 _TIF_SYSCALL_TRACEPOINT) 52 _TIF_SYSCALL_TRACEPOINT)
53_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \ 53_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \
@@ -332,6 +332,8 @@ ENTRY(system_call)
332 TSTMSK __TI_flags(%r12),_TIF_UPROBE 332 TSTMSK __TI_flags(%r12),_TIF_UPROBE
333 jo .Lsysc_uprobe_notify 333 jo .Lsysc_uprobe_notify
334#endif 334#endif
335 TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE
336 jo .Lsysc_guarded_storage
335 TSTMSK __PT_FLAGS(%r11),_PIF_PER_TRAP 337 TSTMSK __PT_FLAGS(%r11),_PIF_PER_TRAP
336 jo .Lsysc_singlestep 338 jo .Lsysc_singlestep
337 TSTMSK __TI_flags(%r12),_TIF_SIGPENDING 339 TSTMSK __TI_flags(%r12),_TIF_SIGPENDING
@@ -409,6 +411,14 @@ ENTRY(system_call)
409#endif 411#endif
410 412
411# 413#
414# _TIF_GUARDED_STORAGE is set, call guarded_storage_load
415#
416.Lsysc_guarded_storage:
417 lgr %r2,%r11 # pass pointer to pt_regs
418 larl %r14,.Lsysc_return
419 jg gs_load_bc_cb
420
421#
412# _PIF_PER_TRAP is set, call do_per_trap 422# _PIF_PER_TRAP is set, call do_per_trap
413# 423#
414.Lsysc_singlestep: 424.Lsysc_singlestep:
@@ -663,6 +673,8 @@ ENTRY(io_int_handler)
663 jo .Lio_sigpending 673 jo .Lio_sigpending
664 TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME 674 TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME
665 jo .Lio_notify_resume 675 jo .Lio_notify_resume
676 TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE
677 jo .Lio_guarded_storage
666 TSTMSK __LC_CPU_FLAGS,_CIF_FPU 678 TSTMSK __LC_CPU_FLAGS,_CIF_FPU
667 jo .Lio_vxrs 679 jo .Lio_vxrs
668 TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY) 680 TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
@@ -697,6 +709,18 @@ ENTRY(io_int_handler)
697 jg load_fpu_regs 709 jg load_fpu_regs
698 710
699# 711#
712# _TIF_GUARDED_STORAGE is set, call guarded_storage_load
713#
714.Lio_guarded_storage:
715 # TRACE_IRQS_ON already done at .Lio_return
716 ssm __LC_SVC_NEW_PSW # reenable interrupts
717 lgr %r2,%r11 # pass pointer to pt_regs
718 brasl %r14,gs_load_bc_cb
719 ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
720 TRACE_IRQS_OFF
721 j .Lio_return
722
723#
700# _TIF_NEED_RESCHED is set, call schedule 724# _TIF_NEED_RESCHED is set, call schedule
701# 725#
702.Lio_reschedule: 726.Lio_reschedule:
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 33f901865326..dbf5f7e18246 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -74,12 +74,14 @@ long sys_sigreturn(void);
74 74
75long sys_s390_personality(unsigned int personality); 75long sys_s390_personality(unsigned int personality);
76long sys_s390_runtime_instr(int command, int signum); 76long sys_s390_runtime_instr(int command, int signum);
77long sys_s390_guarded_storage(int command, struct gs_cb __user *);
77long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t); 78long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t);
78long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t); 79long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t);
79 80
80DECLARE_PER_CPU(u64, mt_cycles[8]); 81DECLARE_PER_CPU(u64, mt_cycles[8]);
81 82
82void verify_facilities(void); 83void verify_facilities(void);
84void gs_load_bc_cb(struct pt_regs *regs);
83void set_fs_fixup(void); 85void set_fs_fixup(void);
84 86
85#endif /* _ENTRY_H */ 87#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c
new file mode 100644
index 000000000000..6f064745c3b1
--- /dev/null
+++ b/arch/s390/kernel/guarded_storage.c
@@ -0,0 +1,128 @@
1/*
2 * Copyright IBM Corp. 2016
3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
4 */
5
6#include <linux/kernel.h>
7#include <linux/syscalls.h>
8#include <linux/signal.h>
9#include <linux/mm.h>
10#include <linux/slab.h>
11#include <asm/guarded_storage.h>
12#include "entry.h"
13
14void exit_thread_gs(void)
15{
16 kfree(current->thread.gs_cb);
17 kfree(current->thread.gs_bc_cb);
18 current->thread.gs_cb = current->thread.gs_bc_cb = NULL;
19}
20
21static int gs_enable(void)
22{
23 struct gs_cb *gs_cb;
24
25 if (!current->thread.gs_cb) {
26 gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL);
27 if (!gs_cb)
28 return -ENOMEM;
29 gs_cb->gsd = 25;
30 preempt_disable();
31 __ctl_set_bit(2, 4);
32 load_gs_cb(gs_cb);
33 current->thread.gs_cb = gs_cb;
34 preempt_enable();
35 }
36 return 0;
37}
38
39static int gs_disable(void)
40{
41 if (current->thread.gs_cb) {
42 preempt_disable();
43 kfree(current->thread.gs_cb);
44 current->thread.gs_cb = NULL;
45 __ctl_clear_bit(2, 4);
46 preempt_enable();
47 }
48 return 0;
49}
50
51static int gs_set_bc_cb(struct gs_cb __user *u_gs_cb)
52{
53 struct gs_cb *gs_cb;
54
55 gs_cb = current->thread.gs_bc_cb;
56 if (!gs_cb) {
57 gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL);
58 if (!gs_cb)
59 return -ENOMEM;
60 current->thread.gs_bc_cb = gs_cb;
61 }
62 if (copy_from_user(gs_cb, u_gs_cb, sizeof(*gs_cb)))
63 return -EFAULT;
64 return 0;
65}
66
67static int gs_clear_bc_cb(void)
68{
69 struct gs_cb *gs_cb;
70
71 gs_cb = current->thread.gs_bc_cb;
72 current->thread.gs_bc_cb = NULL;
73 kfree(gs_cb);
74 return 0;
75}
76
77void gs_load_bc_cb(struct pt_regs *regs)
78{
79 struct gs_cb *gs_cb;
80
81 preempt_disable();
82 clear_thread_flag(TIF_GUARDED_STORAGE);
83 gs_cb = current->thread.gs_bc_cb;
84 if (gs_cb) {
85 kfree(current->thread.gs_cb);
86 current->thread.gs_bc_cb = NULL;
87 __ctl_set_bit(2, 4);
88 load_gs_cb(gs_cb);
89 current->thread.gs_cb = gs_cb;
90 }
91 preempt_enable();
92}
93
94static int gs_broadcast(void)
95{
96 struct task_struct *sibling;
97
98 read_lock(&tasklist_lock);
99 for_each_thread(current, sibling) {
100 if (!sibling->thread.gs_bc_cb)
101 continue;
102 if (test_and_set_tsk_thread_flag(sibling, TIF_GUARDED_STORAGE))
103 kick_process(sibling);
104 }
105 read_unlock(&tasklist_lock);
106 return 0;
107}
108
109SYSCALL_DEFINE2(s390_guarded_storage, int, command,
110 struct gs_cb __user *, gs_cb)
111{
112 if (!MACHINE_HAS_GS)
113 return -EOPNOTSUPP;
114 switch (command) {
115 case GS_ENABLE:
116 return gs_enable();
117 case GS_DISABLE:
118 return gs_disable();
119 case GS_SET_BC_CB:
120 return gs_set_bc_cb(gs_cb);
121 case GS_CLEAR_BC_CB:
122 return gs_clear_bc_cb();
123 case GS_BROADCAST:
124 return gs_broadcast();
125 default:
126 return -EINVAL;
127 }
128}
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 3074c1d83829..db5658daf994 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -27,6 +27,7 @@
27#include <asm/cacheflush.h> 27#include <asm/cacheflush.h>
28#include <asm/os_info.h> 28#include <asm/os_info.h>
29#include <asm/switch_to.h> 29#include <asm/switch_to.h>
30#include <asm/nmi.h>
30 31
31typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long); 32typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long);
32 33
@@ -102,6 +103,8 @@ static void __do_machine_kdump(void *image)
102 */ 103 */
103static noinline void __machine_kdump(void *image) 104static noinline void __machine_kdump(void *image)
104{ 105{
106 struct mcesa *mcesa;
107 unsigned long cr2_old, cr2_new;
105 int this_cpu, cpu; 108 int this_cpu, cpu;
106 109
107 lgr_info_log(); 110 lgr_info_log();
@@ -114,8 +117,16 @@ static noinline void __machine_kdump(void *image)
114 continue; 117 continue;
115 } 118 }
116 /* Store status of the boot CPU */ 119 /* Store status of the boot CPU */
120 mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
117 if (MACHINE_HAS_VX) 121 if (MACHINE_HAS_VX)
118 save_vx_regs((void *) &S390_lowcore.vector_save_area); 122 save_vx_regs((__vector128 *) mcesa->vector_save_area);
123 if (MACHINE_HAS_GS) {
124 __ctl_store(cr2_old, 2, 2);
125 cr2_new = cr2_old | (1UL << 4);
126 __ctl_load(cr2_new, 2, 2);
127 save_gs_cb((struct gs_cb *) mcesa->guarded_storage_save_area);
128 __ctl_load(cr2_old, 2, 2);
129 }
119 /* 130 /*
120 * To create a good backchain for this CPU in the dump store_status 131 * To create a good backchain for this CPU in the dump store_status
121 * is passed the address of a function. The address is saved into 132 * is passed the address of a function. The address is saved into
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 9bf8327154ee..985589523970 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -106,6 +106,7 @@ static int notrace s390_validate_registers(union mci mci, int umode)
106 int kill_task; 106 int kill_task;
107 u64 zero; 107 u64 zero;
108 void *fpt_save_area; 108 void *fpt_save_area;
109 struct mcesa *mcesa;
109 110
110 kill_task = 0; 111 kill_task = 0;
111 zero = 0; 112 zero = 0;
@@ -165,6 +166,7 @@ static int notrace s390_validate_registers(union mci mci, int umode)
165 : : "Q" (S390_lowcore.fpt_creg_save_area)); 166 : : "Q" (S390_lowcore.fpt_creg_save_area));
166 } 167 }
167 168
169 mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
168 if (!MACHINE_HAS_VX) { 170 if (!MACHINE_HAS_VX) {
169 /* Validate floating point registers */ 171 /* Validate floating point registers */
170 asm volatile( 172 asm volatile(
@@ -209,8 +211,8 @@ static int notrace s390_validate_registers(union mci mci, int umode)
209 " la 1,%0\n" 211 " la 1,%0\n"
210 " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ 212 " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */
211 " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ 213 " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */
212 : : "Q" (*(struct vx_array *) 214 : : "Q" (*(struct vx_array *) mcesa->vector_save_area)
213 &S390_lowcore.vector_save_area) : "1"); 215 : "1");
214 __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0); 216 __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
215 } 217 }
216 /* Validate access registers */ 218 /* Validate access registers */
@@ -224,6 +226,19 @@ static int notrace s390_validate_registers(union mci mci, int umode)
224 */ 226 */
225 kill_task = 1; 227 kill_task = 1;
226 } 228 }
229 /* Validate guarded storage registers */
230 if (MACHINE_HAS_GS && (S390_lowcore.cregs_save_area[2] & (1UL << 4))) {
231 if (!mci.gs)
232 /*
233 * Guarded storage register can't be restored and
234 * the current processes uses guarded storage.
235 * It has to be terminated.
236 */
237 kill_task = 1;
238 else
239 load_gs_cb((struct gs_cb *)
240 mcesa->guarded_storage_save_area);
241 }
227 /* 242 /*
228 * We don't even try to validate the TOD register, since we simply 243 * We don't even try to validate the TOD register, since we simply
229 * can't write something sensible into that register. 244 * can't write something sensible into that register.
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index f29e41c5e2ec..999d7154bbdc 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -73,8 +73,10 @@ extern void kernel_thread_starter(void);
73 */ 73 */
74void exit_thread(struct task_struct *tsk) 74void exit_thread(struct task_struct *tsk)
75{ 75{
76 if (tsk == current) 76 if (tsk == current) {
77 exit_thread_runtime_instr(); 77 exit_thread_runtime_instr();
78 exit_thread_gs();
79 }
78} 80}
79 81
80void flush_thread(void) 82void flush_thread(void)
@@ -159,6 +161,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp,
159 /* Don't copy runtime instrumentation info */ 161 /* Don't copy runtime instrumentation info */
160 p->thread.ri_cb = NULL; 162 p->thread.ri_cb = NULL;
161 frame->childregs.psw.mask &= ~PSW_MASK_RI; 163 frame->childregs.psw.mask &= ~PSW_MASK_RI;
164 /* Don't copy guarded storage control block */
165 p->thread.gs_cb = NULL;
166 p->thread.gs_bc_cb = NULL;
162 167
163 /* Set a new TLS ? */ 168 /* Set a new TLS ? */
164 if (clone_flags & CLONE_SETTLS) { 169 if (clone_flags & CLONE_SETTLS) {
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 928b929a6261..c73709869447 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -95,7 +95,7 @@ static void show_cpu_summary(struct seq_file *m, void *v)
95{ 95{
96 static const char *hwcap_str[] = { 96 static const char *hwcap_str[] = {
97 "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", 97 "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
98 "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe" 98 "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe", "gs"
99 }; 99 };
100 static const char * const int_hwcap_str[] = { 100 static const char * const int_hwcap_str[] = {
101 "sie" 101 "sie"
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index c14df0a1ec3c..c933e255b5d5 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -44,30 +44,42 @@ void update_cr_regs(struct task_struct *task)
44 struct pt_regs *regs = task_pt_regs(task); 44 struct pt_regs *regs = task_pt_regs(task);
45 struct thread_struct *thread = &task->thread; 45 struct thread_struct *thread = &task->thread;
46 struct per_regs old, new; 46 struct per_regs old, new;
47 47 unsigned long cr0_old, cr0_new;
48 unsigned long cr2_old, cr2_new;
49 int cr0_changed, cr2_changed;
50
51 __ctl_store(cr0_old, 0, 0);
52 __ctl_store(cr2_old, 2, 2);
53 cr0_new = cr0_old;
54 cr2_new = cr2_old;
48 /* Take care of the enable/disable of transactional execution. */ 55 /* Take care of the enable/disable of transactional execution. */
49 if (MACHINE_HAS_TE) { 56 if (MACHINE_HAS_TE) {
50 unsigned long cr, cr_new;
51
52 __ctl_store(cr, 0, 0);
53 /* Set or clear transaction execution TXC bit 8. */ 57 /* Set or clear transaction execution TXC bit 8. */
54 cr_new = cr | (1UL << 55); 58 cr0_new |= (1UL << 55);
55 if (task->thread.per_flags & PER_FLAG_NO_TE) 59 if (task->thread.per_flags & PER_FLAG_NO_TE)
56 cr_new &= ~(1UL << 55); 60 cr0_new &= ~(1UL << 55);
57 if (cr_new != cr)
58 __ctl_load(cr_new, 0, 0);
59 /* Set or clear transaction execution TDC bits 62 and 63. */ 61 /* Set or clear transaction execution TDC bits 62 and 63. */
60 __ctl_store(cr, 2, 2); 62 cr2_new &= ~3UL;
61 cr_new = cr & ~3UL;
62 if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { 63 if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
63 if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND) 64 if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
64 cr_new |= 1UL; 65 cr2_new |= 1UL;
65 else 66 else
66 cr_new |= 2UL; 67 cr2_new |= 2UL;
67 } 68 }
68 if (cr_new != cr)
69 __ctl_load(cr_new, 2, 2);
70 } 69 }
70 /* Take care of enable/disable of guarded storage. */
71 if (MACHINE_HAS_GS) {
72 cr2_new &= ~(1UL << 4);
73 if (task->thread.gs_cb)
74 cr2_new |= (1UL << 4);
75 }
76 /* Load control register 0/2 iff changed */
77 cr0_changed = cr0_new != cr0_old;
78 cr2_changed = cr2_new != cr2_old;
79 if (cr0_changed)
80 __ctl_load(cr0_new, 0, 0);
81 if (cr2_changed)
82 __ctl_load(cr2_new, 2, 2);
71 /* Copy user specified PER registers */ 83 /* Copy user specified PER registers */
72 new.control = thread->per_user.control; 84 new.control = thread->per_user.control;
73 new.start = thread->per_user.start; 85 new.start = thread->per_user.start;
@@ -1137,6 +1149,36 @@ static int s390_system_call_set(struct task_struct *target,
1137 data, 0, sizeof(unsigned int)); 1149 data, 0, sizeof(unsigned int));
1138} 1150}
1139 1151
1152static int s390_gs_cb_get(struct task_struct *target,
1153 const struct user_regset *regset,
1154 unsigned int pos, unsigned int count,
1155 void *kbuf, void __user *ubuf)
1156{
1157 struct gs_cb *data = target->thread.gs_cb;
1158
1159 if (!MACHINE_HAS_GS)
1160 return -ENODEV;
1161 if (!data)
1162 return -ENODATA;
1163 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
1164 data, 0, sizeof(struct gs_cb));
1165}
1166
1167static int s390_gs_cb_set(struct task_struct *target,
1168 const struct user_regset *regset,
1169 unsigned int pos, unsigned int count,
1170 const void *kbuf, const void __user *ubuf)
1171{
1172 struct gs_cb *data = target->thread.gs_cb;
1173
1174 if (!MACHINE_HAS_GS)
1175 return -ENODEV;
1176 if (!data)
1177 return -ENODATA;
1178 return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
1179 data, 0, sizeof(struct gs_cb));
1180}
1181
1140static const struct user_regset s390_regsets[] = { 1182static const struct user_regset s390_regsets[] = {
1141 { 1183 {
1142 .core_note_type = NT_PRSTATUS, 1184 .core_note_type = NT_PRSTATUS,
@@ -1194,6 +1236,14 @@ static const struct user_regset s390_regsets[] = {
1194 .get = s390_vxrs_high_get, 1236 .get = s390_vxrs_high_get,
1195 .set = s390_vxrs_high_set, 1237 .set = s390_vxrs_high_set,
1196 }, 1238 },
1239 {
1240 .core_note_type = NT_S390_GS_CB,
1241 .n = sizeof(struct gs_cb) / sizeof(__u64),
1242 .size = sizeof(__u64),
1243 .align = sizeof(__u64),
1244 .get = s390_gs_cb_get,
1245 .set = s390_gs_cb_set,
1246 },
1197}; 1247};
1198 1248
1199static const struct user_regset_view user_s390_view = { 1249static const struct user_regset_view user_s390_view = {
@@ -1422,6 +1472,14 @@ static const struct user_regset s390_compat_regsets[] = {
1422 .get = s390_compat_regs_high_get, 1472 .get = s390_compat_regs_high_get,
1423 .set = s390_compat_regs_high_set, 1473 .set = s390_compat_regs_high_set,
1424 }, 1474 },
1475 {
1476 .core_note_type = NT_S390_GS_CB,
1477 .n = sizeof(struct gs_cb) / sizeof(__u64),
1478 .size = sizeof(__u64),
1479 .align = sizeof(__u64),
1480 .get = s390_gs_cb_get,
1481 .set = s390_gs_cb_set,
1482 },
1425}; 1483};
1426 1484
1427static const struct user_regset_view user_s390_compat_view = { 1485static const struct user_regset_view user_s390_compat_view = {
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 911dc0b49be0..3ae756c0db3d 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -339,9 +339,15 @@ static void __init setup_lowcore(void)
339 lc->stfl_fac_list = S390_lowcore.stfl_fac_list; 339 lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
340 memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, 340 memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
341 MAX_FACILITY_BIT/8); 341 MAX_FACILITY_BIT/8);
342 if (MACHINE_HAS_VX) 342 if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
343 lc->vector_save_area_addr = 343 unsigned long bits, size;
344 (unsigned long) &lc->vector_save_area; 344
345 bits = MACHINE_HAS_GS ? 11 : 10;
346 size = 1UL << bits;
347 lc->mcesad = (__u64) memblock_virt_alloc(size, size);
348 if (MACHINE_HAS_GS)
349 lc->mcesad |= bits;
350 }
345 lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; 351 lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
346 lc->sync_enter_timer = S390_lowcore.sync_enter_timer; 352 lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
347 lc->async_enter_timer = S390_lowcore.async_enter_timer; 353 lc->async_enter_timer = S390_lowcore.async_enter_timer;
@@ -779,6 +785,12 @@ static int __init setup_hwcaps(void)
779 elf_hwcap |= HWCAP_S390_VXRS_BCD; 785 elf_hwcap |= HWCAP_S390_VXRS_BCD;
780 } 786 }
781 787
788 /*
789 * Guarded storage support HWCAP_S390_GS is bit 12.
790 */
791 if (MACHINE_HAS_GS)
792 elf_hwcap |= HWCAP_S390_GS;
793
782 get_cpu_id(&cpu_id); 794 get_cpu_id(&cpu_id);
783 add_device_randomness(&cpu_id, sizeof(cpu_id)); 795 add_device_randomness(&cpu_id, sizeof(cpu_id));
784 switch (cpu_id.machine) { 796 switch (cpu_id.machine) {
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 47a973b5b4f1..286bcee800f4 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -51,6 +51,7 @@
51#include <asm/os_info.h> 51#include <asm/os_info.h>
52#include <asm/sigp.h> 52#include <asm/sigp.h>
53#include <asm/idle.h> 53#include <asm/idle.h>
54#include <asm/nmi.h>
54#include "entry.h" 55#include "entry.h"
55 56
56enum { 57enum {
@@ -78,6 +79,8 @@ struct pcpu {
78static u8 boot_core_type; 79static u8 boot_core_type;
79static struct pcpu pcpu_devices[NR_CPUS]; 80static struct pcpu pcpu_devices[NR_CPUS];
80 81
82static struct kmem_cache *pcpu_mcesa_cache;
83
81unsigned int smp_cpu_mt_shift; 84unsigned int smp_cpu_mt_shift;
82EXPORT_SYMBOL(smp_cpu_mt_shift); 85EXPORT_SYMBOL(smp_cpu_mt_shift);
83 86
@@ -188,8 +191,10 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
188static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) 191static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
189{ 192{
190 unsigned long async_stack, panic_stack; 193 unsigned long async_stack, panic_stack;
194 unsigned long mcesa_origin, mcesa_bits;
191 struct lowcore *lc; 195 struct lowcore *lc;
192 196
197 mcesa_origin = mcesa_bits = 0;
193 if (pcpu != &pcpu_devices[0]) { 198 if (pcpu != &pcpu_devices[0]) {
194 pcpu->lowcore = (struct lowcore *) 199 pcpu->lowcore = (struct lowcore *)
195 __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); 200 __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
@@ -197,20 +202,27 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
197 panic_stack = __get_free_page(GFP_KERNEL); 202 panic_stack = __get_free_page(GFP_KERNEL);
198 if (!pcpu->lowcore || !panic_stack || !async_stack) 203 if (!pcpu->lowcore || !panic_stack || !async_stack)
199 goto out; 204 goto out;
205 if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
206 mcesa_origin = (unsigned long)
207 kmem_cache_alloc(pcpu_mcesa_cache, GFP_KERNEL);
208 if (!mcesa_origin)
209 goto out;
210 mcesa_bits = MACHINE_HAS_GS ? 11 : 0;
211 }
200 } else { 212 } else {
201 async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET; 213 async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET;
202 panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET; 214 panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET;
215 mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK;
216 mcesa_bits = pcpu->lowcore->mcesad & MCESA_LC_MASK;
203 } 217 }
204 lc = pcpu->lowcore; 218 lc = pcpu->lowcore;
205 memcpy(lc, &S390_lowcore, 512); 219 memcpy(lc, &S390_lowcore, 512);
206 memset((char *) lc + 512, 0, sizeof(*lc) - 512); 220 memset((char *) lc + 512, 0, sizeof(*lc) - 512);
207 lc->async_stack = async_stack + ASYNC_FRAME_OFFSET; 221 lc->async_stack = async_stack + ASYNC_FRAME_OFFSET;
208 lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET; 222 lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET;
223 lc->mcesad = mcesa_origin | mcesa_bits;
209 lc->cpu_nr = cpu; 224 lc->cpu_nr = cpu;
210 lc->spinlock_lockval = arch_spin_lockval(cpu); 225 lc->spinlock_lockval = arch_spin_lockval(cpu);
211 if (MACHINE_HAS_VX)
212 lc->vector_save_area_addr =
213 (unsigned long) &lc->vector_save_area;
214 if (vdso_alloc_per_cpu(lc)) 226 if (vdso_alloc_per_cpu(lc))
215 goto out; 227 goto out;
216 lowcore_ptr[cpu] = lc; 228 lowcore_ptr[cpu] = lc;
@@ -218,6 +230,9 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
218 return 0; 230 return 0;
219out: 231out:
220 if (pcpu != &pcpu_devices[0]) { 232 if (pcpu != &pcpu_devices[0]) {
233 if (mcesa_origin)
234 kmem_cache_free(pcpu_mcesa_cache,
235 (void *) mcesa_origin);
221 free_page(panic_stack); 236 free_page(panic_stack);
222 free_pages(async_stack, ASYNC_ORDER); 237 free_pages(async_stack, ASYNC_ORDER);
223 free_pages((unsigned long) pcpu->lowcore, LC_ORDER); 238 free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
@@ -229,11 +244,17 @@ out:
229 244
230static void pcpu_free_lowcore(struct pcpu *pcpu) 245static void pcpu_free_lowcore(struct pcpu *pcpu)
231{ 246{
247 unsigned long mcesa_origin;
248
232 pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); 249 pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
233 lowcore_ptr[pcpu - pcpu_devices] = NULL; 250 lowcore_ptr[pcpu - pcpu_devices] = NULL;
234 vdso_free_per_cpu(pcpu->lowcore); 251 vdso_free_per_cpu(pcpu->lowcore);
235 if (pcpu == &pcpu_devices[0]) 252 if (pcpu == &pcpu_devices[0])
236 return; 253 return;
254 if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
255 mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK;
256 kmem_cache_free(pcpu_mcesa_cache, (void *) mcesa_origin);
257 }
237 free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET); 258 free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET);
238 free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER); 259 free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER);
239 free_pages((unsigned long) pcpu->lowcore, LC_ORDER); 260 free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
@@ -550,9 +571,11 @@ int smp_store_status(int cpu)
550 if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, 571 if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS,
551 pa) != SIGP_CC_ORDER_CODE_ACCEPTED) 572 pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
552 return -EIO; 573 return -EIO;
553 if (!MACHINE_HAS_VX) 574 if (!MACHINE_HAS_VX && !MACHINE_HAS_GS)
554 return 0; 575 return 0;
555 pa = __pa(pcpu->lowcore->vector_save_area_addr); 576 pa = __pa(pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK);
577 if (MACHINE_HAS_GS)
578 pa |= pcpu->lowcore->mcesad & MCESA_LC_MASK;
556 if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, 579 if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
557 pa) != SIGP_CC_ORDER_CODE_ACCEPTED) 580 pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
558 return -EIO; 581 return -EIO;
@@ -897,12 +920,22 @@ void __init smp_fill_possible_mask(void)
897 920
898void __init smp_prepare_cpus(unsigned int max_cpus) 921void __init smp_prepare_cpus(unsigned int max_cpus)
899{ 922{
923 unsigned long size;
924
900 /* request the 0x1201 emergency signal external interrupt */ 925 /* request the 0x1201 emergency signal external interrupt */
901 if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt)) 926 if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt))
902 panic("Couldn't request external interrupt 0x1201"); 927 panic("Couldn't request external interrupt 0x1201");
903 /* request the 0x1202 external call external interrupt */ 928 /* request the 0x1202 external call external interrupt */
904 if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt)) 929 if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt))
905 panic("Couldn't request external interrupt 0x1202"); 930 panic("Couldn't request external interrupt 0x1202");
931 /* create slab cache for the machine-check-extended-save-areas */
932 if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
933 size = 1UL << (MACHINE_HAS_GS ? 11 : 10);
934 pcpu_mcesa_cache = kmem_cache_create("nmi_save_areas",
935 size, size, 0, NULL);
936 if (!pcpu_mcesa_cache)
937 panic("Couldn't create nmi save area cache");
938 }
906} 939}
907 940
908void __init smp_prepare_boot_cpu(void) 941void __init smp_prepare_boot_cpu(void)
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 2659b5cfeddb..54fce7b065de 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -386,5 +386,5 @@ SYSCALL(sys_mlock2,compat_sys_mlock2)
386SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */ 386SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */
387SYSCALL(sys_preadv2,compat_sys_preadv2) 387SYSCALL(sys_preadv2,compat_sys_preadv2)
388SYSCALL(sys_pwritev2,compat_sys_pwritev2) 388SYSCALL(sys_pwritev2,compat_sys_pwritev2)
389NI_SYSCALL 389SYSCALL(sys_s390_guarded_storage,compat_sys_s390_guarded_storage) /* 378 */
390SYSCALL(sys_statx,compat_sys_statx) 390SYSCALL(sys_statx,compat_sys_statx)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index d55c829a5944..709aca9ceb05 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -262,7 +262,7 @@ struct aste {
262 262
263int ipte_lock_held(struct kvm_vcpu *vcpu) 263int ipte_lock_held(struct kvm_vcpu *vcpu)
264{ 264{
265 if (vcpu->arch.sie_block->eca & 1) { 265 if (vcpu->arch.sie_block->eca & ECA_SII) {
266 int rc; 266 int rc;
267 267
268 read_lock(&vcpu->kvm->arch.sca_lock); 268 read_lock(&vcpu->kvm->arch.sca_lock);
@@ -361,7 +361,7 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
361 361
362void ipte_lock(struct kvm_vcpu *vcpu) 362void ipte_lock(struct kvm_vcpu *vcpu)
363{ 363{
364 if (vcpu->arch.sie_block->eca & 1) 364 if (vcpu->arch.sie_block->eca & ECA_SII)
365 ipte_lock_siif(vcpu); 365 ipte_lock_siif(vcpu);
366 else 366 else
367 ipte_lock_simple(vcpu); 367 ipte_lock_simple(vcpu);
@@ -369,7 +369,7 @@ void ipte_lock(struct kvm_vcpu *vcpu)
369 369
370void ipte_unlock(struct kvm_vcpu *vcpu) 370void ipte_unlock(struct kvm_vcpu *vcpu)
371{ 371{
372 if (vcpu->arch.sie_block->eca & 1) 372 if (vcpu->arch.sie_block->eca & ECA_SII)
373 ipte_unlock_siif(vcpu); 373 ipte_unlock_siif(vcpu);
374 else 374 else
375 ipte_unlock_simple(vcpu); 375 ipte_unlock_simple(vcpu);
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 59920f96ebc0..a4752bf6b526 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -35,6 +35,7 @@ static const intercept_handler_t instruction_handlers[256] = {
35 [0xb6] = kvm_s390_handle_stctl, 35 [0xb6] = kvm_s390_handle_stctl,
36 [0xb7] = kvm_s390_handle_lctl, 36 [0xb7] = kvm_s390_handle_lctl,
37 [0xb9] = kvm_s390_handle_b9, 37 [0xb9] = kvm_s390_handle_b9,
38 [0xe3] = kvm_s390_handle_e3,
38 [0xe5] = kvm_s390_handle_e5, 39 [0xe5] = kvm_s390_handle_e5,
39 [0xeb] = kvm_s390_handle_eb, 40 [0xeb] = kvm_s390_handle_eb,
40}; 41};
@@ -368,8 +369,7 @@ static int handle_operexc(struct kvm_vcpu *vcpu)
368 trace_kvm_s390_handle_operexc(vcpu, vcpu->arch.sie_block->ipa, 369 trace_kvm_s390_handle_operexc(vcpu, vcpu->arch.sie_block->ipa,
369 vcpu->arch.sie_block->ipb); 370 vcpu->arch.sie_block->ipb);
370 371
371 if (vcpu->arch.sie_block->ipa == 0xb256 && 372 if (vcpu->arch.sie_block->ipa == 0xb256)
372 test_kvm_facility(vcpu->kvm, 74))
373 return handle_sthyi(vcpu); 373 return handle_sthyi(vcpu);
374 374
375 if (vcpu->arch.sie_block->ipa == 0 && vcpu->kvm->arch.user_instr0) 375 if (vcpu->arch.sie_block->ipa == 0 && vcpu->kvm->arch.user_instr0)
@@ -404,28 +404,31 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
404 return -EOPNOTSUPP; 404 return -EOPNOTSUPP;
405 405
406 switch (vcpu->arch.sie_block->icptcode) { 406 switch (vcpu->arch.sie_block->icptcode) {
407 case 0x10: 407 case ICPT_EXTREQ:
408 case 0x18: 408 case ICPT_IOREQ:
409 return handle_noop(vcpu); 409 return handle_noop(vcpu);
410 case 0x04: 410 case ICPT_INST:
411 rc = handle_instruction(vcpu); 411 rc = handle_instruction(vcpu);
412 break; 412 break;
413 case 0x08: 413 case ICPT_PROGI:
414 return handle_prog(vcpu); 414 return handle_prog(vcpu);
415 case 0x14: 415 case ICPT_EXTINT:
416 return handle_external_interrupt(vcpu); 416 return handle_external_interrupt(vcpu);
417 case 0x1c: 417 case ICPT_WAIT:
418 return kvm_s390_handle_wait(vcpu); 418 return kvm_s390_handle_wait(vcpu);
419 case 0x20: 419 case ICPT_VALIDITY:
420 return handle_validity(vcpu); 420 return handle_validity(vcpu);
421 case 0x28: 421 case ICPT_STOP:
422 return handle_stop(vcpu); 422 return handle_stop(vcpu);
423 case 0x2c: 423 case ICPT_OPEREXC:
424 rc = handle_operexc(vcpu); 424 rc = handle_operexc(vcpu);
425 break; 425 break;
426 case 0x38: 426 case ICPT_PARTEXEC:
427 rc = handle_partial_execution(vcpu); 427 rc = handle_partial_execution(vcpu);
428 break; 428 break;
429 case ICPT_KSS:
430 rc = kvm_s390_skey_check_enable(vcpu);
431 break;
429 default: 432 default:
430 return -EOPNOTSUPP; 433 return -EOPNOTSUPP;
431 } 434 }
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 0f8f14199734..caf15c8a8948 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -410,6 +410,7 @@ static int __write_machine_check(struct kvm_vcpu *vcpu,
410 struct kvm_s390_mchk_info *mchk) 410 struct kvm_s390_mchk_info *mchk)
411{ 411{
412 unsigned long ext_sa_addr; 412 unsigned long ext_sa_addr;
413 unsigned long lc;
413 freg_t fprs[NUM_FPRS]; 414 freg_t fprs[NUM_FPRS];
414 union mci mci; 415 union mci mci;
415 int rc; 416 int rc;
@@ -418,12 +419,34 @@ static int __write_machine_check(struct kvm_vcpu *vcpu,
418 /* take care of lazy register loading */ 419 /* take care of lazy register loading */
419 save_fpu_regs(); 420 save_fpu_regs();
420 save_access_regs(vcpu->run->s.regs.acrs); 421 save_access_regs(vcpu->run->s.regs.acrs);
422 if (MACHINE_HAS_GS && vcpu->arch.gs_enabled)
423 save_gs_cb(current->thread.gs_cb);
421 424
422 /* Extended save area */ 425 /* Extended save area */
423 rc = read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR, &ext_sa_addr, 426 rc = read_guest_lc(vcpu, __LC_MCESAD, &ext_sa_addr,
424 sizeof(unsigned long)); 427 sizeof(unsigned long));
425 /* Only bits 0-53 are used for address formation */ 428 /* Only bits 0 through 63-LC are used for address formation */
426 ext_sa_addr &= ~0x3ffUL; 429 lc = ext_sa_addr & MCESA_LC_MASK;
430 if (test_kvm_facility(vcpu->kvm, 133)) {
431 switch (lc) {
432 case 0:
433 case 10:
434 ext_sa_addr &= ~0x3ffUL;
435 break;
436 case 11:
437 ext_sa_addr &= ~0x7ffUL;
438 break;
439 case 12:
440 ext_sa_addr &= ~0xfffUL;
441 break;
442 default:
443 ext_sa_addr = 0;
444 break;
445 }
446 } else {
447 ext_sa_addr &= ~0x3ffUL;
448 }
449
427 if (!rc && mci.vr && ext_sa_addr && test_kvm_facility(vcpu->kvm, 129)) { 450 if (!rc && mci.vr && ext_sa_addr && test_kvm_facility(vcpu->kvm, 129)) {
428 if (write_guest_abs(vcpu, ext_sa_addr, vcpu->run->s.regs.vrs, 451 if (write_guest_abs(vcpu, ext_sa_addr, vcpu->run->s.regs.vrs,
429 512)) 452 512))
@@ -431,6 +454,14 @@ static int __write_machine_check(struct kvm_vcpu *vcpu,
431 } else { 454 } else {
432 mci.vr = 0; 455 mci.vr = 0;
433 } 456 }
457 if (!rc && mci.gs && ext_sa_addr && test_kvm_facility(vcpu->kvm, 133)
458 && (lc == 11 || lc == 12)) {
459 if (write_guest_abs(vcpu, ext_sa_addr + 1024,
460 &vcpu->run->s.regs.gscb, 32))
461 mci.gs = 0;
462 } else {
463 mci.gs = 0;
464 }
434 465
435 /* General interruption information */ 466 /* General interruption information */
436 rc |= put_guest_lc(vcpu, 1, (u8 __user *) __LC_AR_MODE_ID); 467 rc |= put_guest_lc(vcpu, 1, (u8 __user *) __LC_AR_MODE_ID);
@@ -1968,6 +1999,8 @@ static int register_io_adapter(struct kvm_device *dev,
1968 adapter->maskable = adapter_info.maskable; 1999 adapter->maskable = adapter_info.maskable;
1969 adapter->masked = false; 2000 adapter->masked = false;
1970 adapter->swap = adapter_info.swap; 2001 adapter->swap = adapter_info.swap;
2002 adapter->suppressible = (adapter_info.flags) &
2003 KVM_S390_ADAPTER_SUPPRESSIBLE;
1971 dev->kvm->arch.adapters[adapter->id] = adapter; 2004 dev->kvm->arch.adapters[adapter->id] = adapter;
1972 2005
1973 return 0; 2006 return 0;
@@ -2121,6 +2154,87 @@ static int clear_io_irq(struct kvm *kvm, struct kvm_device_attr *attr)
2121 return 0; 2154 return 0;
2122} 2155}
2123 2156
2157static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr)
2158{
2159 struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
2160 struct kvm_s390_ais_req req;
2161 int ret = 0;
2162
2163 if (!fi->ais_enabled)
2164 return -ENOTSUPP;
2165
2166 if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req)))
2167 return -EFAULT;
2168
2169 if (req.isc > MAX_ISC)
2170 return -EINVAL;
2171
2172 trace_kvm_s390_modify_ais_mode(req.isc,
2173 (fi->simm & AIS_MODE_MASK(req.isc)) ?
2174 (fi->nimm & AIS_MODE_MASK(req.isc)) ?
2175 2 : KVM_S390_AIS_MODE_SINGLE :
2176 KVM_S390_AIS_MODE_ALL, req.mode);
2177
2178 mutex_lock(&fi->ais_lock);
2179 switch (req.mode) {
2180 case KVM_S390_AIS_MODE_ALL:
2181 fi->simm &= ~AIS_MODE_MASK(req.isc);
2182 fi->nimm &= ~AIS_MODE_MASK(req.isc);
2183 break;
2184 case KVM_S390_AIS_MODE_SINGLE:
2185 fi->simm |= AIS_MODE_MASK(req.isc);
2186 fi->nimm &= ~AIS_MODE_MASK(req.isc);
2187 break;
2188 default:
2189 ret = -EINVAL;
2190 }
2191 mutex_unlock(&fi->ais_lock);
2192
2193 return ret;
2194}
2195
2196static int kvm_s390_inject_airq(struct kvm *kvm,
2197 struct s390_io_adapter *adapter)
2198{
2199 struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
2200 struct kvm_s390_interrupt s390int = {
2201 .type = KVM_S390_INT_IO(1, 0, 0, 0),
2202 .parm = 0,
2203 .parm64 = (adapter->isc << 27) | 0x80000000,
2204 };
2205 int ret = 0;
2206
2207 if (!fi->ais_enabled || !adapter->suppressible)
2208 return kvm_s390_inject_vm(kvm, &s390int);
2209
2210 mutex_lock(&fi->ais_lock);
2211 if (fi->nimm & AIS_MODE_MASK(adapter->isc)) {
2212 trace_kvm_s390_airq_suppressed(adapter->id, adapter->isc);
2213 goto out;
2214 }
2215
2216 ret = kvm_s390_inject_vm(kvm, &s390int);
2217 if (!ret && (fi->simm & AIS_MODE_MASK(adapter->isc))) {
2218 fi->nimm |= AIS_MODE_MASK(adapter->isc);
2219 trace_kvm_s390_modify_ais_mode(adapter->isc,
2220 KVM_S390_AIS_MODE_SINGLE, 2);
2221 }
2222out:
2223 mutex_unlock(&fi->ais_lock);
2224 return ret;
2225}
2226
2227static int flic_inject_airq(struct kvm *kvm, struct kvm_device_attr *attr)
2228{
2229 unsigned int id = attr->attr;
2230 struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
2231
2232 if (!adapter)
2233 return -EINVAL;
2234
2235 return kvm_s390_inject_airq(kvm, adapter);
2236}
2237
2124static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) 2238static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
2125{ 2239{
2126 int r = 0; 2240 int r = 0;
@@ -2157,6 +2271,12 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
2157 case KVM_DEV_FLIC_CLEAR_IO_IRQ: 2271 case KVM_DEV_FLIC_CLEAR_IO_IRQ:
2158 r = clear_io_irq(dev->kvm, attr); 2272 r = clear_io_irq(dev->kvm, attr);
2159 break; 2273 break;
2274 case KVM_DEV_FLIC_AISM:
2275 r = modify_ais_mode(dev->kvm, attr);
2276 break;
2277 case KVM_DEV_FLIC_AIRQ_INJECT:
2278 r = flic_inject_airq(dev->kvm, attr);
2279 break;
2160 default: 2280 default:
2161 r = -EINVAL; 2281 r = -EINVAL;
2162 } 2282 }
@@ -2176,6 +2296,8 @@ static int flic_has_attr(struct kvm_device *dev,
2176 case KVM_DEV_FLIC_ADAPTER_REGISTER: 2296 case KVM_DEV_FLIC_ADAPTER_REGISTER:
2177 case KVM_DEV_FLIC_ADAPTER_MODIFY: 2297 case KVM_DEV_FLIC_ADAPTER_MODIFY:
2178 case KVM_DEV_FLIC_CLEAR_IO_IRQ: 2298 case KVM_DEV_FLIC_CLEAR_IO_IRQ:
2299 case KVM_DEV_FLIC_AISM:
2300 case KVM_DEV_FLIC_AIRQ_INJECT:
2179 return 0; 2301 return 0;
2180 } 2302 }
2181 return -ENXIO; 2303 return -ENXIO;
@@ -2286,12 +2408,7 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
2286 ret = adapter_indicators_set(kvm, adapter, &e->adapter); 2408 ret = adapter_indicators_set(kvm, adapter, &e->adapter);
2287 up_read(&adapter->maps_lock); 2409 up_read(&adapter->maps_lock);
2288 if ((ret > 0) && !adapter->masked) { 2410 if ((ret > 0) && !adapter->masked) {
2289 struct kvm_s390_interrupt s390int = { 2411 ret = kvm_s390_inject_airq(kvm, adapter);
2290 .type = KVM_S390_INT_IO(1, 0, 0, 0),
2291 .parm = 0,
2292 .parm64 = (adapter->isc << 27) | 0x80000000,
2293 };
2294 ret = kvm_s390_inject_vm(kvm, &s390int);
2295 if (ret == 0) 2412 if (ret == 0)
2296 ret = 1; 2413 ret = 1;
2297 } 2414 }
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index fd6cd05bb6a7..8771fef112a1 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -300,6 +300,8 @@ static void kvm_s390_cpu_feat_init(void)
300 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); 300 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
301 if (sclp.has_ibs) 301 if (sclp.has_ibs)
302 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); 302 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
303 if (sclp.has_kss)
304 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
303 /* 305 /*
304 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make 306 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
305 * all skey handling functions read/set the skey from the PGSTE 307 * all skey handling functions read/set the skey from the PGSTE
@@ -380,6 +382,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
380 case KVM_CAP_S390_SKEYS: 382 case KVM_CAP_S390_SKEYS:
381 case KVM_CAP_S390_IRQ_STATE: 383 case KVM_CAP_S390_IRQ_STATE:
382 case KVM_CAP_S390_USER_INSTR0: 384 case KVM_CAP_S390_USER_INSTR0:
385 case KVM_CAP_S390_AIS:
383 r = 1; 386 r = 1;
384 break; 387 break;
385 case KVM_CAP_S390_MEM_OP: 388 case KVM_CAP_S390_MEM_OP:
@@ -405,6 +408,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
405 case KVM_CAP_S390_RI: 408 case KVM_CAP_S390_RI:
406 r = test_facility(64); 409 r = test_facility(64);
407 break; 410 break;
411 case KVM_CAP_S390_GS:
412 r = test_facility(133);
413 break;
408 default: 414 default:
409 r = 0; 415 r = 0;
410 } 416 }
@@ -541,6 +547,34 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
541 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", 547 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
542 r ? "(not available)" : "(success)"); 548 r ? "(not available)" : "(success)");
543 break; 549 break;
550 case KVM_CAP_S390_AIS:
551 mutex_lock(&kvm->lock);
552 if (kvm->created_vcpus) {
553 r = -EBUSY;
554 } else {
555 set_kvm_facility(kvm->arch.model.fac_mask, 72);
556 set_kvm_facility(kvm->arch.model.fac_list, 72);
557 kvm->arch.float_int.ais_enabled = 1;
558 r = 0;
559 }
560 mutex_unlock(&kvm->lock);
561 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
562 r ? "(not available)" : "(success)");
563 break;
564 case KVM_CAP_S390_GS:
565 r = -EINVAL;
566 mutex_lock(&kvm->lock);
567 if (atomic_read(&kvm->online_vcpus)) {
568 r = -EBUSY;
569 } else if (test_facility(133)) {
570 set_kvm_facility(kvm->arch.model.fac_mask, 133);
571 set_kvm_facility(kvm->arch.model.fac_list, 133);
572 r = 0;
573 }
574 mutex_unlock(&kvm->lock);
575 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
576 r ? "(not available)" : "(success)");
577 break;
544 case KVM_CAP_S390_USER_STSI: 578 case KVM_CAP_S390_USER_STSI:
545 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); 579 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
546 kvm->arch.user_stsi = 1; 580 kvm->arch.user_stsi = 1;
@@ -1498,6 +1532,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1498 1532
1499 kvm_s390_crypto_init(kvm); 1533 kvm_s390_crypto_init(kvm);
1500 1534
1535 mutex_init(&kvm->arch.float_int.ais_lock);
1536 kvm->arch.float_int.simm = 0;
1537 kvm->arch.float_int.nimm = 0;
1538 kvm->arch.float_int.ais_enabled = 0;
1501 spin_lock_init(&kvm->arch.float_int.lock); 1539 spin_lock_init(&kvm->arch.float_int.lock);
1502 for (i = 0; i < FIRQ_LIST_COUNT; i++) 1540 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1503 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); 1541 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
@@ -1646,7 +1684,7 @@ static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1646 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 1684 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1647 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 1685 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1648 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; 1686 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1649 vcpu->arch.sie_block->ecb2 |= 0x04U; 1687 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
1650 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 1688 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1651 } else { 1689 } else {
1652 struct bsca_block *sca = vcpu->kvm->arch.sca; 1690 struct bsca_block *sca = vcpu->kvm->arch.sca;
@@ -1700,7 +1738,7 @@ static int sca_switch_to_extended(struct kvm *kvm)
1700 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { 1738 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1701 vcpu->arch.sie_block->scaoh = scaoh; 1739 vcpu->arch.sie_block->scaoh = scaoh;
1702 vcpu->arch.sie_block->scaol = scaol; 1740 vcpu->arch.sie_block->scaol = scaol;
1703 vcpu->arch.sie_block->ecb2 |= 0x04U; 1741 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
1704 } 1742 }
1705 kvm->arch.sca = new_sca; 1743 kvm->arch.sca = new_sca;
1706 kvm->arch.use_esca = 1; 1744 kvm->arch.use_esca = 1;
@@ -1749,6 +1787,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1749 kvm_s390_set_prefix(vcpu, 0); 1787 kvm_s390_set_prefix(vcpu, 0);
1750 if (test_kvm_facility(vcpu->kvm, 64)) 1788 if (test_kvm_facility(vcpu->kvm, 64))
1751 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; 1789 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1790 if (test_kvm_facility(vcpu->kvm, 133))
1791 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
1752 /* fprs can be synchronized via vrs, even if the guest has no vx. With 1792 /* fprs can be synchronized via vrs, even if the guest has no vx. With
1753 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. 1793 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1754 */ 1794 */
@@ -1939,8 +1979,8 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1939 if (!vcpu->arch.sie_block->cbrlo) 1979 if (!vcpu->arch.sie_block->cbrlo)
1940 return -ENOMEM; 1980 return -ENOMEM;
1941 1981
1942 vcpu->arch.sie_block->ecb2 |= 0x80; 1982 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
1943 vcpu->arch.sie_block->ecb2 &= ~0x08; 1983 vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
1944 return 0; 1984 return 0;
1945} 1985}
1946 1986
@@ -1970,31 +2010,37 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1970 2010
1971 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ 2011 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1972 if (MACHINE_HAS_ESOP) 2012 if (MACHINE_HAS_ESOP)
1973 vcpu->arch.sie_block->ecb |= 0x02; 2013 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
1974 if (test_kvm_facility(vcpu->kvm, 9)) 2014 if (test_kvm_facility(vcpu->kvm, 9))
1975 vcpu->arch.sie_block->ecb |= 0x04; 2015 vcpu->arch.sie_block->ecb |= ECB_SRSI;
1976 if (test_kvm_facility(vcpu->kvm, 73)) 2016 if (test_kvm_facility(vcpu->kvm, 73))
1977 vcpu->arch.sie_block->ecb |= 0x10; 2017 vcpu->arch.sie_block->ecb |= ECB_TE;
1978 2018
1979 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi) 2019 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1980 vcpu->arch.sie_block->ecb2 |= 0x08; 2020 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
1981 if (test_kvm_facility(vcpu->kvm, 130)) 2021 if (test_kvm_facility(vcpu->kvm, 130))
1982 vcpu->arch.sie_block->ecb2 |= 0x20; 2022 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
1983 vcpu->arch.sie_block->eca = 0x1002000U; 2023 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
1984 if (sclp.has_cei) 2024 if (sclp.has_cei)
1985 vcpu->arch.sie_block->eca |= 0x80000000U; 2025 vcpu->arch.sie_block->eca |= ECA_CEI;
1986 if (sclp.has_ib) 2026 if (sclp.has_ib)
1987 vcpu->arch.sie_block->eca |= 0x40000000U; 2027 vcpu->arch.sie_block->eca |= ECA_IB;
1988 if (sclp.has_siif) 2028 if (sclp.has_siif)
1989 vcpu->arch.sie_block->eca |= 1; 2029 vcpu->arch.sie_block->eca |= ECA_SII;
1990 if (sclp.has_sigpif) 2030 if (sclp.has_sigpif)
1991 vcpu->arch.sie_block->eca |= 0x10000000U; 2031 vcpu->arch.sie_block->eca |= ECA_SIGPI;
1992 if (test_kvm_facility(vcpu->kvm, 129)) { 2032 if (test_kvm_facility(vcpu->kvm, 129)) {
1993 vcpu->arch.sie_block->eca |= 0x00020000; 2033 vcpu->arch.sie_block->eca |= ECA_VX;
1994 vcpu->arch.sie_block->ecd |= 0x20000000; 2034 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
1995 } 2035 }
2036 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2037 | SDNXC;
1996 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; 2038 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1997 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; 2039
2040 if (sclp.has_kss)
2041 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2042 else
2043 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1998 2044
1999 if (vcpu->kvm->arch.use_cmma) { 2045 if (vcpu->kvm->arch.use_cmma) {
2000 rc = kvm_s390_vcpu_setup_cmma(vcpu); 2046 rc = kvm_s390_vcpu_setup_cmma(vcpu);
@@ -2719,6 +2765,11 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
2719 2765
2720static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2766static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2721{ 2767{
2768 struct runtime_instr_cb *riccb;
2769 struct gs_cb *gscb;
2770
2771 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2772 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
2722 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 2773 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2723 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 2774 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2724 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) 2775 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
@@ -2747,12 +2798,24 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2747 * we should enable RI here instead of doing the lazy enablement. 2798 * we should enable RI here instead of doing the lazy enablement.
2748 */ 2799 */
2749 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && 2800 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2750 test_kvm_facility(vcpu->kvm, 64)) { 2801 test_kvm_facility(vcpu->kvm, 64) &&
2751 struct runtime_instr_cb *riccb = 2802 riccb->valid &&
2752 (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; 2803 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
2753 2804 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
2754 if (riccb->valid) 2805 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
2755 vcpu->arch.sie_block->ecb3 |= 0x01; 2806 }
2807 /*
2808 * If userspace sets the gscb (e.g. after migration) to non-zero,
2809 * we should enable GS here instead of doing the lazy enablement.
2810 */
2811 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
2812 test_kvm_facility(vcpu->kvm, 133) &&
2813 gscb->gssm &&
2814 !vcpu->arch.gs_enabled) {
2815 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
2816 vcpu->arch.sie_block->ecb |= ECB_GS;
2817 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2818 vcpu->arch.gs_enabled = 1;
2756 } 2819 }
2757 save_access_regs(vcpu->arch.host_acrs); 2820 save_access_regs(vcpu->arch.host_acrs);
2758 restore_access_regs(vcpu->run->s.regs.acrs); 2821 restore_access_regs(vcpu->run->s.regs.acrs);
@@ -2768,6 +2831,20 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2768 if (test_fp_ctl(current->thread.fpu.fpc)) 2831 if (test_fp_ctl(current->thread.fpu.fpc))
2769 /* User space provided an invalid FPC, let's clear it */ 2832 /* User space provided an invalid FPC, let's clear it */
2770 current->thread.fpu.fpc = 0; 2833 current->thread.fpu.fpc = 0;
2834 if (MACHINE_HAS_GS) {
2835 preempt_disable();
2836 __ctl_set_bit(2, 4);
2837 if (current->thread.gs_cb) {
2838 vcpu->arch.host_gscb = current->thread.gs_cb;
2839 save_gs_cb(vcpu->arch.host_gscb);
2840 }
2841 if (vcpu->arch.gs_enabled) {
2842 current->thread.gs_cb = (struct gs_cb *)
2843 &vcpu->run->s.regs.gscb;
2844 restore_gs_cb(current->thread.gs_cb);
2845 }
2846 preempt_enable();
2847 }
2771 2848
2772 kvm_run->kvm_dirty_regs = 0; 2849 kvm_run->kvm_dirty_regs = 0;
2773} 2850}
@@ -2794,6 +2871,18 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2794 /* Restore will be done lazily at return */ 2871 /* Restore will be done lazily at return */
2795 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; 2872 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
2796 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; 2873 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
2874 if (MACHINE_HAS_GS) {
2875 __ctl_set_bit(2, 4);
2876 if (vcpu->arch.gs_enabled)
2877 save_gs_cb(current->thread.gs_cb);
2878 preempt_disable();
2879 current->thread.gs_cb = vcpu->arch.host_gscb;
2880 restore_gs_cb(vcpu->arch.host_gscb);
2881 preempt_enable();
2882 if (!vcpu->arch.host_gscb)
2883 __ctl_clear_bit(2, 4);
2884 vcpu->arch.host_gscb = NULL;
2885 }
2797 2886
2798} 2887}
2799 2888
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index af9fa91a0c91..55f5c8457d6d 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -25,7 +25,7 @@
25typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); 25typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
26 26
27/* Transactional Memory Execution related macros */ 27/* Transactional Memory Execution related macros */
28#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & 0x10)) 28#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & ECB_TE))
29#define TDB_FORMAT1 1 29#define TDB_FORMAT1 1
30#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1)) 30#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
31 31
@@ -246,6 +246,7 @@ static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu)
246int is_valid_psw(psw_t *psw); 246int is_valid_psw(psw_t *psw);
247int kvm_s390_handle_aa(struct kvm_vcpu *vcpu); 247int kvm_s390_handle_aa(struct kvm_vcpu *vcpu);
248int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); 248int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
249int kvm_s390_handle_e3(struct kvm_vcpu *vcpu);
249int kvm_s390_handle_e5(struct kvm_vcpu *vcpu); 250int kvm_s390_handle_e5(struct kvm_vcpu *vcpu);
250int kvm_s390_handle_01(struct kvm_vcpu *vcpu); 251int kvm_s390_handle_01(struct kvm_vcpu *vcpu);
251int kvm_s390_handle_b9(struct kvm_vcpu *vcpu); 252int kvm_s390_handle_b9(struct kvm_vcpu *vcpu);
@@ -253,6 +254,7 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu);
253int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu); 254int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu);
254int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu); 255int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu);
255int kvm_s390_handle_eb(struct kvm_vcpu *vcpu); 256int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
257int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu);
256 258
257/* implemented in vsie.c */ 259/* implemented in vsie.c */
258int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu); 260int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 64b6a309f2c4..c03106c428cf 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -37,7 +37,8 @@
37static int handle_ri(struct kvm_vcpu *vcpu) 37static int handle_ri(struct kvm_vcpu *vcpu)
38{ 38{
39 if (test_kvm_facility(vcpu->kvm, 64)) { 39 if (test_kvm_facility(vcpu->kvm, 64)) {
40 vcpu->arch.sie_block->ecb3 |= 0x01; 40 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (lazy)");
41 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
41 kvm_s390_retry_instr(vcpu); 42 kvm_s390_retry_instr(vcpu);
42 return 0; 43 return 0;
43 } else 44 } else
@@ -52,6 +53,33 @@ int kvm_s390_handle_aa(struct kvm_vcpu *vcpu)
52 return -EOPNOTSUPP; 53 return -EOPNOTSUPP;
53} 54}
54 55
56static int handle_gs(struct kvm_vcpu *vcpu)
57{
58 if (test_kvm_facility(vcpu->kvm, 133)) {
59 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (lazy)");
60 preempt_disable();
61 __ctl_set_bit(2, 4);
62 current->thread.gs_cb = (struct gs_cb *)&vcpu->run->s.regs.gscb;
63 restore_gs_cb(current->thread.gs_cb);
64 preempt_enable();
65 vcpu->arch.sie_block->ecb |= ECB_GS;
66 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
67 vcpu->arch.gs_enabled = 1;
68 kvm_s390_retry_instr(vcpu);
69 return 0;
70 } else
71 return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
72}
73
74int kvm_s390_handle_e3(struct kvm_vcpu *vcpu)
75{
76 int code = vcpu->arch.sie_block->ipb & 0xff;
77
78 if (code == 0x49 || code == 0x4d)
79 return handle_gs(vcpu);
80 else
81 return -EOPNOTSUPP;
82}
55/* Handle SCK (SET CLOCK) interception */ 83/* Handle SCK (SET CLOCK) interception */
56static int handle_set_clock(struct kvm_vcpu *vcpu) 84static int handle_set_clock(struct kvm_vcpu *vcpu)
57{ 85{
@@ -170,18 +198,25 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
170 return 0; 198 return 0;
171} 199}
172 200
173static int __skey_check_enable(struct kvm_vcpu *vcpu) 201int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
174{ 202{
175 int rc = 0; 203 int rc = 0;
204 struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
176 205
177 trace_kvm_s390_skey_related_inst(vcpu); 206 trace_kvm_s390_skey_related_inst(vcpu);
178 if (!(vcpu->arch.sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE))) 207 if (!(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) &&
208 !(atomic_read(&sie_block->cpuflags) & CPUSTAT_KSS))
179 return rc; 209 return rc;
180 210
181 rc = s390_enable_skey(); 211 rc = s390_enable_skey();
182 VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc); 212 VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc);
183 if (!rc) 213 if (!rc) {
184 vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); 214 if (atomic_read(&sie_block->cpuflags) & CPUSTAT_KSS)
215 atomic_andnot(CPUSTAT_KSS, &sie_block->cpuflags);
216 else
217 sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE |
218 ICTL_RRBE);
219 }
185 return rc; 220 return rc;
186} 221}
187 222
@@ -190,7 +225,7 @@ static int try_handle_skey(struct kvm_vcpu *vcpu)
190 int rc; 225 int rc;
191 226
192 vcpu->stat.instruction_storage_key++; 227 vcpu->stat.instruction_storage_key++;
193 rc = __skey_check_enable(vcpu); 228 rc = kvm_s390_skey_check_enable(vcpu);
194 if (rc) 229 if (rc)
195 return rc; 230 return rc;
196 if (sclp.has_skey) { 231 if (sclp.has_skey) {
@@ -759,6 +794,7 @@ static const intercept_handler_t b2_handlers[256] = {
759 [0x3b] = handle_io_inst, 794 [0x3b] = handle_io_inst,
760 [0x3c] = handle_io_inst, 795 [0x3c] = handle_io_inst,
761 [0x50] = handle_ipte_interlock, 796 [0x50] = handle_ipte_interlock,
797 [0x56] = handle_sthyi,
762 [0x5f] = handle_io_inst, 798 [0x5f] = handle_io_inst,
763 [0x74] = handle_io_inst, 799 [0x74] = handle_io_inst,
764 [0x76] = handle_io_inst, 800 [0x76] = handle_io_inst,
@@ -887,7 +923,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
887 } 923 }
888 924
889 if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) { 925 if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) {
890 int rc = __skey_check_enable(vcpu); 926 int rc = kvm_s390_skey_check_enable(vcpu);
891 927
892 if (rc) 928 if (rc)
893 return rc; 929 return rc;
diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c
index 05c98bb853cf..926b5244263e 100644
--- a/arch/s390/kvm/sthyi.c
+++ b/arch/s390/kvm/sthyi.c
@@ -404,6 +404,9 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
404 u64 code, addr, cc = 0; 404 u64 code, addr, cc = 0;
405 struct sthyi_sctns *sctns = NULL; 405 struct sthyi_sctns *sctns = NULL;
406 406
407 if (!test_kvm_facility(vcpu->kvm, 74))
408 return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
409
407 /* 410 /*
408 * STHYI requires extensive locking in the higher hypervisors 411 * STHYI requires extensive locking in the higher hypervisors
409 * and is very computational/memory expensive. Therefore we 412 * and is very computational/memory expensive. Therefore we
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
index 396485bca191..78b7e847984a 100644
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -280,6 +280,58 @@ TRACE_EVENT(kvm_s390_enable_disable_ibs,
280 __entry->state ? "enabling" : "disabling", __entry->id) 280 __entry->state ? "enabling" : "disabling", __entry->id)
281 ); 281 );
282 282
283/*
284 * Trace point for modifying ais mode for a given isc.
285 */
286TRACE_EVENT(kvm_s390_modify_ais_mode,
287 TP_PROTO(__u8 isc, __u16 from, __u16 to),
288 TP_ARGS(isc, from, to),
289
290 TP_STRUCT__entry(
291 __field(__u8, isc)
292 __field(__u16, from)
293 __field(__u16, to)
294 ),
295
296 TP_fast_assign(
297 __entry->isc = isc;
298 __entry->from = from;
299 __entry->to = to;
300 ),
301
302 TP_printk("for isc %x, modifying interruption mode from %s to %s",
303 __entry->isc,
304 (__entry->from == KVM_S390_AIS_MODE_ALL) ?
305 "ALL-Interruptions Mode" :
306 (__entry->from == KVM_S390_AIS_MODE_SINGLE) ?
307 "Single-Interruption Mode" : "No-Interruptions Mode",
308 (__entry->to == KVM_S390_AIS_MODE_ALL) ?
309 "ALL-Interruptions Mode" :
310 (__entry->to == KVM_S390_AIS_MODE_SINGLE) ?
311 "Single-Interruption Mode" : "No-Interruptions Mode")
312 );
313
314/*
315 * Trace point for suppressed adapter I/O interrupt.
316 */
317TRACE_EVENT(kvm_s390_airq_suppressed,
318 TP_PROTO(__u32 id, __u8 isc),
319 TP_ARGS(id, isc),
320
321 TP_STRUCT__entry(
322 __field(__u32, id)
323 __field(__u8, isc)
324 ),
325
326 TP_fast_assign(
327 __entry->id = id;
328 __entry->isc = isc;
329 ),
330
331 TP_printk("adapter I/O interrupt suppressed (id:%x isc:%x)",
332 __entry->id, __entry->isc)
333 );
334
283 335
284#endif /* _TRACE_KVMS390_H */ 336#endif /* _TRACE_KVMS390_H */
285 337
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 5491be39776b..4719ecb9ab42 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -117,6 +117,8 @@ static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
117 newflags |= cpuflags & CPUSTAT_SM; 117 newflags |= cpuflags & CPUSTAT_SM;
118 if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IBS)) 118 if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IBS))
119 newflags |= cpuflags & CPUSTAT_IBS; 119 newflags |= cpuflags & CPUSTAT_IBS;
120 if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_KSS))
121 newflags |= cpuflags & CPUSTAT_KSS;
120 122
121 atomic_set(&scb_s->cpuflags, newflags); 123 atomic_set(&scb_s->cpuflags, newflags);
122 return 0; 124 return 0;
@@ -249,7 +251,7 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
249{ 251{
250 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; 252 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
251 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; 253 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
252 bool had_tx = scb_s->ecb & 0x10U; 254 bool had_tx = scb_s->ecb & ECB_TE;
253 unsigned long new_mso = 0; 255 unsigned long new_mso = 0;
254 int rc; 256 int rc;
255 257
@@ -289,7 +291,9 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
289 * bits. Therefore we cannot provide interpretation and would later 291 * bits. Therefore we cannot provide interpretation and would later
290 * have to provide own emulation handlers. 292 * have to provide own emulation handlers.
291 */ 293 */
292 scb_s->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; 294 if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_KSS))
295 scb_s->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
296
293 scb_s->icpua = scb_o->icpua; 297 scb_s->icpua = scb_o->icpua;
294 298
295 if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_SM)) 299 if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_SM))
@@ -307,34 +311,39 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
307 scb_s->ihcpu = scb_o->ihcpu; 311 scb_s->ihcpu = scb_o->ihcpu;
308 312
309 /* MVPG and Protection Exception Interpretation are always available */ 313 /* MVPG and Protection Exception Interpretation are always available */
310 scb_s->eca |= scb_o->eca & 0x01002000U; 314 scb_s->eca |= scb_o->eca & (ECA_MVPGI | ECA_PROTEXCI);
311 /* Host-protection-interruption introduced with ESOP */ 315 /* Host-protection-interruption introduced with ESOP */
312 if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP)) 316 if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP))
313 scb_s->ecb |= scb_o->ecb & 0x02U; 317 scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT;
314 /* transactional execution */ 318 /* transactional execution */
315 if (test_kvm_facility(vcpu->kvm, 73)) { 319 if (test_kvm_facility(vcpu->kvm, 73)) {
316 /* remap the prefix is tx is toggled on */ 320 /* remap the prefix is tx is toggled on */
317 if ((scb_o->ecb & 0x10U) && !had_tx) 321 if ((scb_o->ecb & ECB_TE) && !had_tx)
318 prefix_unmapped(vsie_page); 322 prefix_unmapped(vsie_page);
319 scb_s->ecb |= scb_o->ecb & 0x10U; 323 scb_s->ecb |= scb_o->ecb & ECB_TE;
320 } 324 }
321 /* SIMD */ 325 /* SIMD */
322 if (test_kvm_facility(vcpu->kvm, 129)) { 326 if (test_kvm_facility(vcpu->kvm, 129)) {
323 scb_s->eca |= scb_o->eca & 0x00020000U; 327 scb_s->eca |= scb_o->eca & ECA_VX;
324 scb_s->ecd |= scb_o->ecd & 0x20000000U; 328 scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT;
325 } 329 }
326 /* Run-time-Instrumentation */ 330 /* Run-time-Instrumentation */
327 if (test_kvm_facility(vcpu->kvm, 64)) 331 if (test_kvm_facility(vcpu->kvm, 64))
328 scb_s->ecb3 |= scb_o->ecb3 & 0x01U; 332 scb_s->ecb3 |= scb_o->ecb3 & ECB3_RI;
329 /* Instruction Execution Prevention */ 333 /* Instruction Execution Prevention */
330 if (test_kvm_facility(vcpu->kvm, 130)) 334 if (test_kvm_facility(vcpu->kvm, 130))
331 scb_s->ecb2 |= scb_o->ecb2 & 0x20U; 335 scb_s->ecb2 |= scb_o->ecb2 & ECB2_IEP;
336 /* Guarded Storage */
337 if (test_kvm_facility(vcpu->kvm, 133)) {
338 scb_s->ecb |= scb_o->ecb & ECB_GS;
339 scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT;
340 }
332 if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIIF)) 341 if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIIF))
333 scb_s->eca |= scb_o->eca & 0x00000001U; 342 scb_s->eca |= scb_o->eca & ECA_SII;
334 if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IB)) 343 if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IB))
335 scb_s->eca |= scb_o->eca & 0x40000000U; 344 scb_s->eca |= scb_o->eca & ECA_IB;
336 if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI)) 345 if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI))
337 scb_s->eca |= scb_o->eca & 0x80000000U; 346 scb_s->eca |= scb_o->eca & ECA_CEI;
338 347
339 prepare_ibc(vcpu, vsie_page); 348 prepare_ibc(vcpu, vsie_page);
340 rc = shadow_crycb(vcpu, vsie_page); 349 rc = shadow_crycb(vcpu, vsie_page);
@@ -406,7 +415,7 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
406 prefix += scb_s->mso; 415 prefix += scb_s->mso;
407 416
408 rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix); 417 rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
409 if (!rc && (scb_s->ecb & 0x10U)) 418 if (!rc && (scb_s->ecb & ECB_TE))
410 rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, 419 rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
411 prefix + PAGE_SIZE); 420 prefix + PAGE_SIZE);
412 /* 421 /*
@@ -496,6 +505,13 @@ static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
496 unpin_guest_page(vcpu->kvm, gpa, hpa); 505 unpin_guest_page(vcpu->kvm, gpa, hpa);
497 scb_s->riccbd = 0; 506 scb_s->riccbd = 0;
498 } 507 }
508
509 hpa = scb_s->sdnxo;
510 if (hpa) {
511 gpa = scb_o->sdnxo;
512 unpin_guest_page(vcpu->kvm, gpa, hpa);
513 scb_s->sdnxo = 0;
514 }
499} 515}
500 516
501/* 517/*
@@ -543,7 +559,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
543 } 559 }
544 560
545 gpa = scb_o->itdba & ~0xffUL; 561 gpa = scb_o->itdba & ~0xffUL;
546 if (gpa && (scb_s->ecb & 0x10U)) { 562 if (gpa && (scb_s->ecb & ECB_TE)) {
547 if (!(gpa & ~0x1fffU)) { 563 if (!(gpa & ~0x1fffU)) {
548 rc = set_validity_icpt(scb_s, 0x0080U); 564 rc = set_validity_icpt(scb_s, 0x0080U);
549 goto unpin; 565 goto unpin;
@@ -558,8 +574,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
558 } 574 }
559 575
560 gpa = scb_o->gvrd & ~0x1ffUL; 576 gpa = scb_o->gvrd & ~0x1ffUL;
561 if (gpa && (scb_s->eca & 0x00020000U) && 577 if (gpa && (scb_s->eca & ECA_VX) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
562 !(scb_s->ecd & 0x20000000U)) {
563 if (!(gpa & ~0x1fffUL)) { 578 if (!(gpa & ~0x1fffUL)) {
564 rc = set_validity_icpt(scb_s, 0x1310U); 579 rc = set_validity_icpt(scb_s, 0x1310U);
565 goto unpin; 580 goto unpin;
@@ -577,7 +592,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
577 } 592 }
578 593
579 gpa = scb_o->riccbd & ~0x3fUL; 594 gpa = scb_o->riccbd & ~0x3fUL;
580 if (gpa && (scb_s->ecb3 & 0x01U)) { 595 if (gpa && (scb_s->ecb3 & ECB3_RI)) {
581 if (!(gpa & ~0x1fffUL)) { 596 if (!(gpa & ~0x1fffUL)) {
582 rc = set_validity_icpt(scb_s, 0x0043U); 597 rc = set_validity_icpt(scb_s, 0x0043U);
583 goto unpin; 598 goto unpin;
@@ -591,6 +606,33 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
591 goto unpin; 606 goto unpin;
592 scb_s->riccbd = hpa; 607 scb_s->riccbd = hpa;
593 } 608 }
609 if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
610 unsigned long sdnxc;
611
612 gpa = scb_o->sdnxo & ~0xfUL;
613 sdnxc = scb_o->sdnxo & 0xfUL;
614 if (!gpa || !(gpa & ~0x1fffUL)) {
615 rc = set_validity_icpt(scb_s, 0x10b0U);
616 goto unpin;
617 }
618 if (sdnxc < 6 || sdnxc > 12) {
619 rc = set_validity_icpt(scb_s, 0x10b1U);
620 goto unpin;
621 }
622 if (gpa & ((1 << sdnxc) - 1)) {
623 rc = set_validity_icpt(scb_s, 0x10b2U);
624 goto unpin;
625 }
626 /* Due to alignment rules (checked above) this cannot
627 * cross page boundaries
628 */
629 rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
630 if (rc == -EINVAL)
631 rc = set_validity_icpt(scb_s, 0x10b0U);
632 if (rc)
633 goto unpin;
634 scb_s->sdnxo = hpa | sdnxc;
635 }
594 return 0; 636 return 0;
595unpin: 637unpin:
596 unpin_blocks(vcpu, vsie_page); 638 unpin_blocks(vcpu, vsie_page);
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index de5d572225f3..cd1fa97776c3 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -302,8 +302,8 @@ extern int ignore_sigio_fd(int fd);
302extern void maybe_sigio_broken(int fd, int read); 302extern void maybe_sigio_broken(int fd, int read);
303extern void sigio_broken(int fd, int read); 303extern void sigio_broken(int fd, int read);
304 304
305/* sys-x86_64/prctl.c */ 305/* prctl.c */
306extern int os_arch_prctl(int pid, int code, unsigned long *addr); 306extern int os_arch_prctl(int pid, int option, unsigned long *arg2);
307 307
308/* tty.c */ 308/* tty.c */
309extern int get_pty(void); 309extern int get_pty(void);
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 9ba050fe47f3..0af59fa789ea 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -390,3 +390,4 @@
390381 i386 pkey_alloc sys_pkey_alloc 390381 i386 pkey_alloc sys_pkey_alloc
391382 i386 pkey_free sys_pkey_free 391382 i386 pkey_free sys_pkey_free
392383 i386 statx sys_statx 392383 i386 statx sys_statx
393384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index b04bb6dfed7f..0fe00446f9ca 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -187,6 +187,7 @@
187 * Reuse free bits when adding new feature flags! 187 * Reuse free bits when adding new feature flags!
188 */ 188 */
189#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */ 189#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
190#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
190#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ 191#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
191#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ 192#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
192#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ 193#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 74ef58c8ff53..2cc5ec7cc6f5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -43,8 +43,6 @@
43#define KVM_PRIVATE_MEM_SLOTS 3 43#define KVM_PRIVATE_MEM_SLOTS 3
44#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) 44#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
45 45
46#define KVM_PIO_PAGE_OFFSET 1
47#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
48#define KVM_HALT_POLL_NS_DEFAULT 400000 46#define KVM_HALT_POLL_NS_DEFAULT 400000
49 47
50#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS 48#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
@@ -343,9 +341,10 @@ struct kvm_mmu {
343 void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, 341 void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
344 u64 *spte, const void *pte); 342 u64 *spte, const void *pte);
345 hpa_t root_hpa; 343 hpa_t root_hpa;
346 int root_level;
347 int shadow_root_level;
348 union kvm_mmu_page_role base_role; 344 union kvm_mmu_page_role base_role;
345 u8 root_level;
346 u8 shadow_root_level;
347 u8 ept_ad;
349 bool direct_map; 348 bool direct_map;
350 349
351 /* 350 /*
@@ -727,6 +726,7 @@ struct kvm_hv {
727 726
728enum kvm_irqchip_mode { 727enum kvm_irqchip_mode {
729 KVM_IRQCHIP_NONE, 728 KVM_IRQCHIP_NONE,
729 KVM_IRQCHIP_INIT_IN_PROGRESS, /* temporarily set during creation */
730 KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */ 730 KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */
731 KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ 731 KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */
732}; 732};
diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
index d74747b031ec..c4eda791f877 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -46,6 +46,7 @@ struct kvm_page_track_notifier_node {
46}; 46};
47 47
48void kvm_page_track_init(struct kvm *kvm); 48void kvm_page_track_init(struct kvm *kvm);
49void kvm_page_track_cleanup(struct kvm *kvm);
49 50
50void kvm_page_track_free_memslot(struct kvm_memory_slot *free, 51void kvm_page_track_free_memslot(struct kvm_memory_slot *free,
51 struct kvm_memory_slot *dont); 52 struct kvm_memory_slot *dont);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index d8b5f8ab8ef9..673f9ac50f6d 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -45,6 +45,8 @@
45#define MSR_IA32_PERFCTR1 0x000000c2 45#define MSR_IA32_PERFCTR1 0x000000c2
46#define MSR_FSB_FREQ 0x000000cd 46#define MSR_FSB_FREQ 0x000000cd
47#define MSR_PLATFORM_INFO 0x000000ce 47#define MSR_PLATFORM_INFO 0x000000ce
48#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31
49#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
48 50
49#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 51#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
50#define NHM_C3_AUTO_DEMOTE (1UL << 25) 52#define NHM_C3_AUTO_DEMOTE (1UL << 25)
@@ -127,6 +129,7 @@
127 129
128/* DEBUGCTLMSR bits (others vary by model): */ 130/* DEBUGCTLMSR bits (others vary by model): */
129#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ 131#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
132#define DEBUGCTLMSR_BTF_SHIFT 1
130#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ 133#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
131#define DEBUGCTLMSR_TR (1UL << 6) 134#define DEBUGCTLMSR_TR (1UL << 6)
132#define DEBUGCTLMSR_BTS (1UL << 7) 135#define DEBUGCTLMSR_BTS (1UL << 7)
@@ -552,10 +555,12 @@
552#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39 555#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39
553#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT) 556#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT)
554 557
555/* MISC_FEATURE_ENABLES non-architectural features */ 558/* MISC_FEATURES_ENABLES non-architectural features */
556#define MSR_MISC_FEATURE_ENABLES 0x00000140 559#define MSR_MISC_FEATURES_ENABLES 0x00000140
557 560
558#define MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT 1 561#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0
562#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT)
563#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1
559 564
560#define MSR_IA32_TSC_DEADLINE 0x000006E0 565#define MSR_IA32_TSC_DEADLINE 0x000006E0
561 566
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f385eca5407a..a80c1b3997ed 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -884,6 +884,8 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
884extern int get_tsc_mode(unsigned long adr); 884extern int get_tsc_mode(unsigned long adr);
885extern int set_tsc_mode(unsigned int val); 885extern int set_tsc_mode(unsigned int val);
886 886
887DECLARE_PER_CPU(u64, msr_misc_features_shadow);
888
887/* Register/unregister a process' MPX related resource */ 889/* Register/unregister a process' MPX related resource */
888#define MPX_ENABLE_MANAGEMENT() mpx_enable_management() 890#define MPX_ENABLE_MANAGEMENT() mpx_enable_management()
889#define MPX_DISABLE_MANAGEMENT() mpx_disable_management() 891#define MPX_DISABLE_MANAGEMENT() mpx_disable_management()
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 9b9b30b19441..8d3964fc5f91 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -9,6 +9,7 @@ void syscall_init(void);
9 9
10#ifdef CONFIG_X86_64 10#ifdef CONFIG_X86_64
11void entry_SYSCALL_64(void); 11void entry_SYSCALL_64(void);
12long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
12#endif 13#endif
13 14
14#ifdef CONFIG_X86_32 15#ifdef CONFIG_X86_32
@@ -30,6 +31,7 @@ void x86_report_nx(void);
30 31
31extern int reboot_force; 32extern int reboot_force;
32 33
33long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); 34long do_arch_prctl_common(struct task_struct *task, int option,
35 unsigned long cpuid_enabled);
34 36
35#endif /* _ASM_X86_PROTO_H */ 37#endif /* _ASM_X86_PROTO_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index ad6f5eb07a95..9fc44b95f7cb 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -87,6 +87,7 @@ struct thread_info {
87#define TIF_SECCOMP 8 /* secure computing */ 87#define TIF_SECCOMP 8 /* secure computing */
88#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ 88#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
89#define TIF_UPROBE 12 /* breakpointed or singlestepping */ 89#define TIF_UPROBE 12 /* breakpointed or singlestepping */
90#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
90#define TIF_NOTSC 16 /* TSC is not accessible in userland */ 91#define TIF_NOTSC 16 /* TSC is not accessible in userland */
91#define TIF_IA32 17 /* IA32 compatibility process */ 92#define TIF_IA32 17 /* IA32 compatibility process */
92#define TIF_NOHZ 19 /* in adaptive nohz mode */ 93#define TIF_NOHZ 19 /* in adaptive nohz mode */
@@ -110,6 +111,7 @@ struct thread_info {
110#define _TIF_SECCOMP (1 << TIF_SECCOMP) 111#define _TIF_SECCOMP (1 << TIF_SECCOMP)
111#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) 112#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
112#define _TIF_UPROBE (1 << TIF_UPROBE) 113#define _TIF_UPROBE (1 << TIF_UPROBE)
114#define _TIF_NOCPUID (1 << TIF_NOCPUID)
113#define _TIF_NOTSC (1 << TIF_NOTSC) 115#define _TIF_NOTSC (1 << TIF_NOTSC)
114#define _TIF_IA32 (1 << TIF_IA32) 116#define _TIF_IA32 (1 << TIF_IA32)
115#define _TIF_NOHZ (1 << TIF_NOHZ) 117#define _TIF_NOHZ (1 << TIF_NOHZ)
@@ -138,7 +140,7 @@ struct thread_info {
138 140
139/* flags to check in __switch_to() */ 141/* flags to check in __switch_to() */
140#define _TIF_WORK_CTXSW \ 142#define _TIF_WORK_CTXSW \
141 (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) 143 (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
142 144
143#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) 145#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
144#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) 146#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
@@ -239,6 +241,8 @@ static inline int arch_within_stack_frames(const void * const stack,
239extern void arch_task_cache_init(void); 241extern void arch_task_cache_init(void);
240extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); 242extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
241extern void arch_release_task_struct(struct task_struct *tsk); 243extern void arch_release_task_struct(struct task_struct *tsk);
244extern void arch_setup_new_exec(void);
245#define arch_setup_new_exec arch_setup_new_exec
242#endif /* !__ASSEMBLY__ */ 246#endif /* !__ASSEMBLY__ */
243 247
244#endif /* _ASM_X86_THREAD_INFO_H */ 248#endif /* _ASM_X86_THREAD_INFO_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index fc5abff9b7fd..75d002bdb3f3 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -110,6 +110,16 @@ static inline void cr4_clear_bits(unsigned long mask)
110 } 110 }
111} 111}
112 112
113static inline void cr4_toggle_bits(unsigned long mask)
114{
115 unsigned long cr4;
116
117 cr4 = this_cpu_read(cpu_tlbstate.cr4);
118 cr4 ^= mask;
119 this_cpu_write(cpu_tlbstate.cr4, cr4);
120 __write_cr4(cr4);
121}
122
113/* Read the CR4 shadow. */ 123/* Read the CR4 shadow. */
114static inline unsigned long cr4_read_shadow(void) 124static inline unsigned long cr4_read_shadow(void)
115{ 125{
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index cc54b7026567..35cd06f636ab 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -70,8 +70,10 @@
70#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 70#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
71#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 71#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
72#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 72#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
73#define SECONDARY_EXEC_RDRAND 0x00000800
73#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 74#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
74#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 75#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
76#define SECONDARY_EXEC_RDSEED 0x00010000
75#define SECONDARY_EXEC_ENABLE_PML 0x00020000 77#define SECONDARY_EXEC_ENABLE_PML 0x00020000
76#define SECONDARY_EXEC_XSAVES 0x00100000 78#define SECONDARY_EXEC_XSAVES 0x00100000
77#define SECONDARY_EXEC_TSC_SCALING 0x02000000 79#define SECONDARY_EXEC_TSC_SCALING 0x02000000
@@ -516,12 +518,14 @@ struct vmx_msr_entry {
516#define EPT_VIOLATION_READABLE_BIT 3 518#define EPT_VIOLATION_READABLE_BIT 3
517#define EPT_VIOLATION_WRITABLE_BIT 4 519#define EPT_VIOLATION_WRITABLE_BIT 4
518#define EPT_VIOLATION_EXECUTABLE_BIT 5 520#define EPT_VIOLATION_EXECUTABLE_BIT 5
521#define EPT_VIOLATION_GVA_TRANSLATED_BIT 8
519#define EPT_VIOLATION_ACC_READ (1 << EPT_VIOLATION_ACC_READ_BIT) 522#define EPT_VIOLATION_ACC_READ (1 << EPT_VIOLATION_ACC_READ_BIT)
520#define EPT_VIOLATION_ACC_WRITE (1 << EPT_VIOLATION_ACC_WRITE_BIT) 523#define EPT_VIOLATION_ACC_WRITE (1 << EPT_VIOLATION_ACC_WRITE_BIT)
521#define EPT_VIOLATION_ACC_INSTR (1 << EPT_VIOLATION_ACC_INSTR_BIT) 524#define EPT_VIOLATION_ACC_INSTR (1 << EPT_VIOLATION_ACC_INSTR_BIT)
522#define EPT_VIOLATION_READABLE (1 << EPT_VIOLATION_READABLE_BIT) 525#define EPT_VIOLATION_READABLE (1 << EPT_VIOLATION_READABLE_BIT)
523#define EPT_VIOLATION_WRITABLE (1 << EPT_VIOLATION_WRITABLE_BIT) 526#define EPT_VIOLATION_WRITABLE (1 << EPT_VIOLATION_WRITABLE_BIT)
524#define EPT_VIOLATION_EXECUTABLE (1 << EPT_VIOLATION_EXECUTABLE_BIT) 527#define EPT_VIOLATION_EXECUTABLE (1 << EPT_VIOLATION_EXECUTABLE_BIT)
528#define EPT_VIOLATION_GVA_TRANSLATED (1 << EPT_VIOLATION_GVA_TRANSLATED_BIT)
525 529
526/* 530/*
527 * VM-instruction error numbers 531 * VM-instruction error numbers
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 739c0c594022..c2824d02ba37 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -9,6 +9,9 @@
9#include <linux/types.h> 9#include <linux/types.h>
10#include <linux/ioctl.h> 10#include <linux/ioctl.h>
11 11
12#define KVM_PIO_PAGE_OFFSET 1
13#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
14
12#define DE_VECTOR 0 15#define DE_VECTOR 0
13#define DB_VECTOR 1 16#define DB_VECTOR 1
14#define BP_VECTOR 3 17#define BP_VECTOR 3
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 835aa51c7f6e..c45765517092 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -1,10 +1,13 @@
1#ifndef _ASM_X86_PRCTL_H 1#ifndef _ASM_X86_PRCTL_H
2#define _ASM_X86_PRCTL_H 2#define _ASM_X86_PRCTL_H
3 3
4#define ARCH_SET_GS 0x1001 4#define ARCH_SET_GS 0x1001
5#define ARCH_SET_FS 0x1002 5#define ARCH_SET_FS 0x1002
6#define ARCH_GET_FS 0x1003 6#define ARCH_GET_FS 0x1003
7#define ARCH_GET_GS 0x1004 7#define ARCH_GET_GS 0x1004
8
9#define ARCH_GET_CPUID 0x1011
10#define ARCH_SET_CPUID 0x1012
8 11
9#define ARCH_MAP_VDSO_X32 0x2001 12#define ARCH_MAP_VDSO_X32 0x2001
10#define ARCH_MAP_VDSO_32 0x2002 13#define ARCH_MAP_VDSO_32 0x2002
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 14458658e988..690a2dcf4078 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -76,7 +76,11 @@
76#define EXIT_REASON_WBINVD 54 76#define EXIT_REASON_WBINVD 54
77#define EXIT_REASON_XSETBV 55 77#define EXIT_REASON_XSETBV 55
78#define EXIT_REASON_APIC_WRITE 56 78#define EXIT_REASON_APIC_WRITE 56
79#define EXIT_REASON_RDRAND 57
79#define EXIT_REASON_INVPCID 58 80#define EXIT_REASON_INVPCID 58
81#define EXIT_REASON_VMFUNC 59
82#define EXIT_REASON_ENCLS 60
83#define EXIT_REASON_RDSEED 61
80#define EXIT_REASON_PML_FULL 62 84#define EXIT_REASON_PML_FULL 62
81#define EXIT_REASON_XSAVES 63 85#define EXIT_REASON_XSAVES 63
82#define EXIT_REASON_XRSTORS 64 86#define EXIT_REASON_XRSTORS 64
@@ -90,6 +94,7 @@
90 { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ 94 { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \
91 { EXIT_REASON_CPUID, "CPUID" }, \ 95 { EXIT_REASON_CPUID, "CPUID" }, \
92 { EXIT_REASON_HLT, "HLT" }, \ 96 { EXIT_REASON_HLT, "HLT" }, \
97 { EXIT_REASON_INVD, "INVD" }, \
93 { EXIT_REASON_INVLPG, "INVLPG" }, \ 98 { EXIT_REASON_INVLPG, "INVLPG" }, \
94 { EXIT_REASON_RDPMC, "RDPMC" }, \ 99 { EXIT_REASON_RDPMC, "RDPMC" }, \
95 { EXIT_REASON_RDTSC, "RDTSC" }, \ 100 { EXIT_REASON_RDTSC, "RDTSC" }, \
@@ -108,6 +113,8 @@
108 { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \ 113 { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \
109 { EXIT_REASON_MSR_READ, "MSR_READ" }, \ 114 { EXIT_REASON_MSR_READ, "MSR_READ" }, \
110 { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ 115 { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \
116 { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
117 { EXIT_REASON_MSR_LOAD_FAIL, "MSR_LOAD_FAIL" }, \
111 { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ 118 { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \
112 { EXIT_REASON_MONITOR_TRAP_FLAG, "MONITOR_TRAP_FLAG" }, \ 119 { EXIT_REASON_MONITOR_TRAP_FLAG, "MONITOR_TRAP_FLAG" }, \
113 { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ 120 { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \
@@ -115,20 +122,24 @@
115 { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ 122 { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \
116 { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \ 123 { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \
117 { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ 124 { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \
118 { EXIT_REASON_GDTR_IDTR, "GDTR_IDTR" }, \ 125 { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
119 { EXIT_REASON_LDTR_TR, "LDTR_TR" }, \ 126 { EXIT_REASON_GDTR_IDTR, "GDTR_IDTR" }, \
127 { EXIT_REASON_LDTR_TR, "LDTR_TR" }, \
120 { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ 128 { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \
121 { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ 129 { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \
122 { EXIT_REASON_INVEPT, "INVEPT" }, \ 130 { EXIT_REASON_INVEPT, "INVEPT" }, \
131 { EXIT_REASON_RDTSCP, "RDTSCP" }, \
123 { EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" }, \ 132 { EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" }, \
133 { EXIT_REASON_INVVPID, "INVVPID" }, \
124 { EXIT_REASON_WBINVD, "WBINVD" }, \ 134 { EXIT_REASON_WBINVD, "WBINVD" }, \
135 { EXIT_REASON_XSETBV, "XSETBV" }, \
125 { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \ 136 { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \
126 { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ 137 { EXIT_REASON_RDRAND, "RDRAND" }, \
127 { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
128 { EXIT_REASON_MSR_LOAD_FAIL, "MSR_LOAD_FAIL" }, \
129 { EXIT_REASON_INVD, "INVD" }, \
130 { EXIT_REASON_INVVPID, "INVVPID" }, \
131 { EXIT_REASON_INVPCID, "INVPCID" }, \ 138 { EXIT_REASON_INVPCID, "INVPCID" }, \
139 { EXIT_REASON_VMFUNC, "VMFUNC" }, \
140 { EXIT_REASON_ENCLS, "ENCLS" }, \
141 { EXIT_REASON_RDSEED, "RDSEED" }, \
142 { EXIT_REASON_PML_FULL, "PML_FULL" }, \
132 { EXIT_REASON_XSAVES, "XSAVES" }, \ 143 { EXIT_REASON_XSAVES, "XSAVES" }, \
133 { EXIT_REASON_XRSTORS, "XRSTORS" } 144 { EXIT_REASON_XRSTORS, "XRSTORS" }
134 145
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 063197771b8d..dfa90a3a5145 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -90,16 +90,12 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
90 return; 90 return;
91 } 91 }
92 92
93 if (ring3mwait_disabled) { 93 if (ring3mwait_disabled)
94 msr_clear_bit(MSR_MISC_FEATURE_ENABLES,
95 MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT);
96 return; 94 return;
97 }
98
99 msr_set_bit(MSR_MISC_FEATURE_ENABLES,
100 MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT);
101 95
102 set_cpu_cap(c, X86_FEATURE_RING3MWAIT); 96 set_cpu_cap(c, X86_FEATURE_RING3MWAIT);
97 this_cpu_or(msr_misc_features_shadow,
98 1UL << MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT);
103 99
104 if (c == &boot_cpu_data) 100 if (c == &boot_cpu_data)
105 ELF_HWCAP2 |= HWCAP2_RING3MWAIT; 101 ELF_HWCAP2 |= HWCAP2_RING3MWAIT;
@@ -488,6 +484,34 @@ static void intel_bsp_resume(struct cpuinfo_x86 *c)
488 init_intel_energy_perf(c); 484 init_intel_energy_perf(c);
489} 485}
490 486
487static void init_cpuid_fault(struct cpuinfo_x86 *c)
488{
489 u64 msr;
490
491 if (!rdmsrl_safe(MSR_PLATFORM_INFO, &msr)) {
492 if (msr & MSR_PLATFORM_INFO_CPUID_FAULT)
493 set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
494 }
495}
496
497static void init_intel_misc_features(struct cpuinfo_x86 *c)
498{
499 u64 msr;
500
501 if (rdmsrl_safe(MSR_MISC_FEATURES_ENABLES, &msr))
502 return;
503
504 /* Clear all MISC features */
505 this_cpu_write(msr_misc_features_shadow, 0);
506
507 /* Check features and update capabilities and shadow control bits */
508 init_cpuid_fault(c);
509 probe_xeon_phi_r3mwait(c);
510
511 msr = this_cpu_read(msr_misc_features_shadow);
512 wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
513}
514
491static void init_intel(struct cpuinfo_x86 *c) 515static void init_intel(struct cpuinfo_x86 *c)
492{ 516{
493 unsigned int l2 = 0; 517 unsigned int l2 = 0;
@@ -602,7 +626,7 @@ static void init_intel(struct cpuinfo_x86 *c)
602 626
603 init_intel_energy_perf(c); 627 init_intel_energy_perf(c);
604 628
605 probe_xeon_phi_r3mwait(c); 629 init_intel_misc_features(c);
606} 630}
607 631
608#ifdef CONFIG_X86_32 632#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 14f65a5f938e..da5c09789984 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -396,9 +396,9 @@ static u64 kvm_steal_clock(int cpu)
396 src = &per_cpu(steal_time, cpu); 396 src = &per_cpu(steal_time, cpu);
397 do { 397 do {
398 version = src->version; 398 version = src->version;
399 rmb(); 399 virt_rmb();
400 steal = src->steal; 400 steal = src->steal;
401 rmb(); 401 virt_rmb();
402 } while ((version & 1) || (version != src->version)); 402 } while ((version & 1) || (version != src->version));
403 403
404 return steal; 404 return steal;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index f67591561711..0bb88428cbf2 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -37,6 +37,7 @@
37#include <asm/vm86.h> 37#include <asm/vm86.h>
38#include <asm/switch_to.h> 38#include <asm/switch_to.h>
39#include <asm/desc.h> 39#include <asm/desc.h>
40#include <asm/prctl.h>
40 41
41/* 42/*
42 * per-CPU TSS segments. Threads are completely 'soft' on Linux, 43 * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -124,11 +125,6 @@ void flush_thread(void)
124 fpu__clear(&tsk->thread.fpu); 125 fpu__clear(&tsk->thread.fpu);
125} 126}
126 127
127static void hard_disable_TSC(void)
128{
129 cr4_set_bits(X86_CR4_TSD);
130}
131
132void disable_TSC(void) 128void disable_TSC(void)
133{ 129{
134 preempt_disable(); 130 preempt_disable();
@@ -137,15 +133,10 @@ void disable_TSC(void)
137 * Must flip the CPU state synchronously with 133 * Must flip the CPU state synchronously with
138 * TIF_NOTSC in the current running context. 134 * TIF_NOTSC in the current running context.
139 */ 135 */
140 hard_disable_TSC(); 136 cr4_set_bits(X86_CR4_TSD);
141 preempt_enable(); 137 preempt_enable();
142} 138}
143 139
144static void hard_enable_TSC(void)
145{
146 cr4_clear_bits(X86_CR4_TSD);
147}
148
149static void enable_TSC(void) 140static void enable_TSC(void)
150{ 141{
151 preempt_disable(); 142 preempt_disable();
@@ -154,7 +145,7 @@ static void enable_TSC(void)
154 * Must flip the CPU state synchronously with 145 * Must flip the CPU state synchronously with
155 * TIF_NOTSC in the current running context. 146 * TIF_NOTSC in the current running context.
156 */ 147 */
157 hard_enable_TSC(); 148 cr4_clear_bits(X86_CR4_TSD);
158 preempt_enable(); 149 preempt_enable();
159} 150}
160 151
@@ -182,54 +173,129 @@ int set_tsc_mode(unsigned int val)
182 return 0; 173 return 0;
183} 174}
184 175
185void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, 176DEFINE_PER_CPU(u64, msr_misc_features_shadow);
186 struct tss_struct *tss)
187{
188 struct thread_struct *prev, *next;
189
190 prev = &prev_p->thread;
191 next = &next_p->thread;
192 177
193 if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^ 178static void set_cpuid_faulting(bool on)
194 test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) { 179{
195 unsigned long debugctl = get_debugctlmsr(); 180 u64 msrval;
196 181
197 debugctl &= ~DEBUGCTLMSR_BTF; 182 msrval = this_cpu_read(msr_misc_features_shadow);
198 if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) 183 msrval &= ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT;
199 debugctl |= DEBUGCTLMSR_BTF; 184 msrval |= (on << MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT);
185 this_cpu_write(msr_misc_features_shadow, msrval);
186 wrmsrl(MSR_MISC_FEATURES_ENABLES, msrval);
187}
200 188
201 update_debugctlmsr(debugctl); 189static void disable_cpuid(void)
190{
191 preempt_disable();
192 if (!test_and_set_thread_flag(TIF_NOCPUID)) {
193 /*
194 * Must flip the CPU state synchronously with
195 * TIF_NOCPUID in the current running context.
196 */
197 set_cpuid_faulting(true);
202 } 198 }
199 preempt_enable();
200}
203 201
204 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ 202static void enable_cpuid(void)
205 test_tsk_thread_flag(next_p, TIF_NOTSC)) { 203{
206 /* prev and next are different */ 204 preempt_disable();
207 if (test_tsk_thread_flag(next_p, TIF_NOTSC)) 205 if (test_and_clear_thread_flag(TIF_NOCPUID)) {
208 hard_disable_TSC(); 206 /*
209 else 207 * Must flip the CPU state synchronously with
210 hard_enable_TSC(); 208 * TIF_NOCPUID in the current running context.
209 */
210 set_cpuid_faulting(false);
211 } 211 }
212 preempt_enable();
213}
214
215static int get_cpuid_mode(void)
216{
217 return !test_thread_flag(TIF_NOCPUID);
218}
219
220static int set_cpuid_mode(struct task_struct *task, unsigned long cpuid_enabled)
221{
222 if (!static_cpu_has(X86_FEATURE_CPUID_FAULT))
223 return -ENODEV;
224
225 if (cpuid_enabled)
226 enable_cpuid();
227 else
228 disable_cpuid();
229
230 return 0;
231}
232
233/*
234 * Called immediately after a successful exec.
235 */
236void arch_setup_new_exec(void)
237{
238 /* If cpuid was previously disabled for this task, re-enable it. */
239 if (test_thread_flag(TIF_NOCPUID))
240 enable_cpuid();
241}
212 242
213 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { 243static inline void switch_to_bitmap(struct tss_struct *tss,
244 struct thread_struct *prev,
245 struct thread_struct *next,
246 unsigned long tifp, unsigned long tifn)
247{
248 if (tifn & _TIF_IO_BITMAP) {
214 /* 249 /*
215 * Copy the relevant range of the IO bitmap. 250 * Copy the relevant range of the IO bitmap.
216 * Normally this is 128 bytes or less: 251 * Normally this is 128 bytes or less:
217 */ 252 */
218 memcpy(tss->io_bitmap, next->io_bitmap_ptr, 253 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
219 max(prev->io_bitmap_max, next->io_bitmap_max)); 254 max(prev->io_bitmap_max, next->io_bitmap_max));
220
221 /* 255 /*
222 * Make sure that the TSS limit is correct for the CPU 256 * Make sure that the TSS limit is correct for the CPU
223 * to notice the IO bitmap. 257 * to notice the IO bitmap.
224 */ 258 */
225 refresh_tss_limit(); 259 refresh_tss_limit();
226 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { 260 } else if (tifp & _TIF_IO_BITMAP) {
227 /* 261 /*
228 * Clear any possible leftover bits: 262 * Clear any possible leftover bits:
229 */ 263 */
230 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); 264 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
231 } 265 }
266}
267
268void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
269 struct tss_struct *tss)
270{
271 struct thread_struct *prev, *next;
272 unsigned long tifp, tifn;
273
274 prev = &prev_p->thread;
275 next = &next_p->thread;
276
277 tifn = READ_ONCE(task_thread_info(next_p)->flags);
278 tifp = READ_ONCE(task_thread_info(prev_p)->flags);
279 switch_to_bitmap(tss, prev, next, tifp, tifn);
280
232 propagate_user_return_notify(prev_p, next_p); 281 propagate_user_return_notify(prev_p, next_p);
282
283 if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) &&
284 arch_has_block_step()) {
285 unsigned long debugctl, msk;
286
287 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
288 debugctl &= ~DEBUGCTLMSR_BTF;
289 msk = tifn & _TIF_BLOCKSTEP;
290 debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT;
291 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
292 }
293
294 if ((tifp ^ tifn) & _TIF_NOTSC)
295 cr4_toggle_bits(X86_CR4_TSD);
296
297 if ((tifp ^ tifn) & _TIF_NOCPUID)
298 set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
233} 299}
234 300
235/* 301/*
@@ -550,3 +616,16 @@ out:
550 put_task_stack(p); 616 put_task_stack(p);
551 return ret; 617 return ret;
552} 618}
619
620long do_arch_prctl_common(struct task_struct *task, int option,
621 unsigned long cpuid_enabled)
622{
623 switch (option) {
624 case ARCH_GET_CPUID:
625 return get_cpuid_mode();
626 case ARCH_SET_CPUID:
627 return set_cpuid_mode(task, cpuid_enabled);
628 }
629
630 return -EINVAL;
631}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 4c818f8bc135..ff40e74c9181 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -37,6 +37,7 @@
37#include <linux/uaccess.h> 37#include <linux/uaccess.h>
38#include <linux/io.h> 38#include <linux/io.h>
39#include <linux/kdebug.h> 39#include <linux/kdebug.h>
40#include <linux/syscalls.h>
40 41
41#include <asm/pgtable.h> 42#include <asm/pgtable.h>
42#include <asm/ldt.h> 43#include <asm/ldt.h>
@@ -56,6 +57,7 @@
56#include <asm/switch_to.h> 57#include <asm/switch_to.h>
57#include <asm/vm86.h> 58#include <asm/vm86.h>
58#include <asm/intel_rdt.h> 59#include <asm/intel_rdt.h>
60#include <asm/proto.h>
59 61
60void __show_regs(struct pt_regs *regs, int all) 62void __show_regs(struct pt_regs *regs, int all)
61{ 63{
@@ -304,3 +306,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
304 306
305 return prev_p; 307 return prev_p;
306} 308}
309
310SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
311{
312 return do_arch_prctl_common(current, option, arg2);
313}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index d6b784a5520d..ea1a6180bf39 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -37,6 +37,7 @@
37#include <linux/uaccess.h> 37#include <linux/uaccess.h>
38#include <linux/io.h> 38#include <linux/io.h>
39#include <linux/ftrace.h> 39#include <linux/ftrace.h>
40#include <linux/syscalls.h>
40 41
41#include <asm/pgtable.h> 42#include <asm/pgtable.h>
42#include <asm/processor.h> 43#include <asm/processor.h>
@@ -204,7 +205,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
204 (struct user_desc __user *)tls, 0); 205 (struct user_desc __user *)tls, 0);
205 else 206 else
206#endif 207#endif
207 err = do_arch_prctl(p, ARCH_SET_FS, tls); 208 err = do_arch_prctl_64(p, ARCH_SET_FS, tls);
208 if (err) 209 if (err)
209 goto out; 210 goto out;
210 } 211 }
@@ -547,70 +548,72 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
547} 548}
548#endif 549#endif
549 550
550long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) 551long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
551{ 552{
552 int ret = 0; 553 int ret = 0;
553 int doit = task == current; 554 int doit = task == current;
554 int cpu; 555 int cpu;
555 556
556 switch (code) { 557 switch (option) {
557 case ARCH_SET_GS: 558 case ARCH_SET_GS:
558 if (addr >= TASK_SIZE_MAX) 559 if (arg2 >= TASK_SIZE_MAX)
559 return -EPERM; 560 return -EPERM;
560 cpu = get_cpu(); 561 cpu = get_cpu();
561 task->thread.gsindex = 0; 562 task->thread.gsindex = 0;
562 task->thread.gsbase = addr; 563 task->thread.gsbase = arg2;
563 if (doit) { 564 if (doit) {
564 load_gs_index(0); 565 load_gs_index(0);
565 ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); 566 ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2);
566 } 567 }
567 put_cpu(); 568 put_cpu();
568 break; 569 break;
569 case ARCH_SET_FS: 570 case ARCH_SET_FS:
570 /* Not strictly needed for fs, but do it for symmetry 571 /* Not strictly needed for fs, but do it for symmetry
571 with gs */ 572 with gs */
572 if (addr >= TASK_SIZE_MAX) 573 if (arg2 >= TASK_SIZE_MAX)
573 return -EPERM; 574 return -EPERM;
574 cpu = get_cpu(); 575 cpu = get_cpu();
575 task->thread.fsindex = 0; 576 task->thread.fsindex = 0;
576 task->thread.fsbase = addr; 577 task->thread.fsbase = arg2;
577 if (doit) { 578 if (doit) {
578 /* set the selector to 0 to not confuse __switch_to */ 579 /* set the selector to 0 to not confuse __switch_to */
579 loadsegment(fs, 0); 580 loadsegment(fs, 0);
580 ret = wrmsrl_safe(MSR_FS_BASE, addr); 581 ret = wrmsrl_safe(MSR_FS_BASE, arg2);
581 } 582 }
582 put_cpu(); 583 put_cpu();
583 break; 584 break;
584 case ARCH_GET_FS: { 585 case ARCH_GET_FS: {
585 unsigned long base; 586 unsigned long base;
587
586 if (doit) 588 if (doit)
587 rdmsrl(MSR_FS_BASE, base); 589 rdmsrl(MSR_FS_BASE, base);
588 else 590 else
589 base = task->thread.fsbase; 591 base = task->thread.fsbase;
590 ret = put_user(base, (unsigned long __user *)addr); 592 ret = put_user(base, (unsigned long __user *)arg2);
591 break; 593 break;
592 } 594 }
593 case ARCH_GET_GS: { 595 case ARCH_GET_GS: {
594 unsigned long base; 596 unsigned long base;
597
595 if (doit) 598 if (doit)
596 rdmsrl(MSR_KERNEL_GS_BASE, base); 599 rdmsrl(MSR_KERNEL_GS_BASE, base);
597 else 600 else
598 base = task->thread.gsbase; 601 base = task->thread.gsbase;
599 ret = put_user(base, (unsigned long __user *)addr); 602 ret = put_user(base, (unsigned long __user *)arg2);
600 break; 603 break;
601 } 604 }
602 605
603#ifdef CONFIG_CHECKPOINT_RESTORE 606#ifdef CONFIG_CHECKPOINT_RESTORE
604# ifdef CONFIG_X86_X32_ABI 607# ifdef CONFIG_X86_X32_ABI
605 case ARCH_MAP_VDSO_X32: 608 case ARCH_MAP_VDSO_X32:
606 return prctl_map_vdso(&vdso_image_x32, addr); 609 return prctl_map_vdso(&vdso_image_x32, arg2);
607# endif 610# endif
608# if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 611# if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
609 case ARCH_MAP_VDSO_32: 612 case ARCH_MAP_VDSO_32:
610 return prctl_map_vdso(&vdso_image_32, addr); 613 return prctl_map_vdso(&vdso_image_32, arg2);
611# endif 614# endif
612 case ARCH_MAP_VDSO_64: 615 case ARCH_MAP_VDSO_64:
613 return prctl_map_vdso(&vdso_image_64, addr); 616 return prctl_map_vdso(&vdso_image_64, arg2);
614#endif 617#endif
615 618
616 default: 619 default:
@@ -621,10 +624,23 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
621 return ret; 624 return ret;
622} 625}
623 626
624long sys_arch_prctl(int code, unsigned long addr) 627SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
628{
629 long ret;
630
631 ret = do_arch_prctl_64(current, option, arg2);
632 if (ret == -EINVAL)
633 ret = do_arch_prctl_common(current, option, arg2);
634
635 return ret;
636}
637
638#ifdef CONFIG_IA32_EMULATION
639COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
625{ 640{
626 return do_arch_prctl(current, code, addr); 641 return do_arch_prctl_common(current, option, arg2);
627} 642}
643#endif
628 644
629unsigned long KSTK_ESP(struct task_struct *task) 645unsigned long KSTK_ESP(struct task_struct *task)
630{ 646{
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 2364b23ea3e5..f37d18124648 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -396,12 +396,12 @@ static int putreg(struct task_struct *child,
396 if (value >= TASK_SIZE_MAX) 396 if (value >= TASK_SIZE_MAX)
397 return -EIO; 397 return -EIO;
398 /* 398 /*
399 * When changing the segment base, use do_arch_prctl 399 * When changing the segment base, use do_arch_prctl_64
400 * to set either thread.fs or thread.fsindex and the 400 * to set either thread.fs or thread.fsindex and the
401 * corresponding GDT slot. 401 * corresponding GDT slot.
402 */ 402 */
403 if (child->thread.fsbase != value) 403 if (child->thread.fsbase != value)
404 return do_arch_prctl(child, ARCH_SET_FS, value); 404 return do_arch_prctl_64(child, ARCH_SET_FS, value);
405 return 0; 405 return 0;
406 case offsetof(struct user_regs_struct,gs_base): 406 case offsetof(struct user_regs_struct,gs_base):
407 /* 407 /*
@@ -410,7 +410,7 @@ static int putreg(struct task_struct *child,
410 if (value >= TASK_SIZE_MAX) 410 if (value >= TASK_SIZE_MAX)
411 return -EIO; 411 return -EIO;
412 if (child->thread.gsbase != value) 412 if (child->thread.gsbase != value)
413 return do_arch_prctl(child, ARCH_SET_GS, value); 413 return do_arch_prctl_64(child, ARCH_SET_GS, value);
414 return 0; 414 return 0;
415#endif 415#endif
416 } 416 }
@@ -869,7 +869,7 @@ long arch_ptrace(struct task_struct *child, long request,
869 Works just like arch_prctl, except that the arguments 869 Works just like arch_prctl, except that the arguments
870 are reversed. */ 870 are reversed. */
871 case PTRACE_ARCH_PRCTL: 871 case PTRACE_ARCH_PRCTL:
872 ret = do_arch_prctl(child, data, addr); 872 ret = do_arch_prctl_64(child, data, addr);
873 break; 873 break;
874#endif 874#endif
875 875
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index ab8e32f7b9a8..760433b2574a 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -86,18 +86,6 @@ config KVM_MMU_AUDIT
86 This option adds a R/W kVM module parameter 'mmu_audit', which allows 86 This option adds a R/W kVM module parameter 'mmu_audit', which allows
87 auditing of KVM MMU events at runtime. 87 auditing of KVM MMU events at runtime.
88 88
89config KVM_DEVICE_ASSIGNMENT
90 bool "KVM legacy PCI device assignment support (DEPRECATED)"
91 depends on KVM && PCI && IOMMU_API
92 default n
93 ---help---
94 Provide support for legacy PCI device assignment through KVM. The
95 kernel now also supports a full featured userspace device driver
96 framework through VFIO, which supersedes this support and provides
97 better security.
98
99 If unsure, say N.
100
101# OK, it's a little counter-intuitive to do this, but it puts it neatly under 89# OK, it's a little counter-intuitive to do this, but it puts it neatly under
102# the virtualization menu. 90# the virtualization menu.
103source drivers/vhost/Kconfig 91source drivers/vhost/Kconfig
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 3bff20710471..09d4b17be022 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -15,8 +15,6 @@ kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
15 i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \ 15 i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
16 hyperv.o page_track.o debugfs.o 16 hyperv.o page_track.o debugfs.o
17 17
18kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += assigned-dev.o iommu.o
19
20kvm-intel-y += vmx.o pmu_intel.o 18kvm-intel-y += vmx.o pmu_intel.o
21kvm-amd-y += svm.o pmu_amd.o 19kvm-amd-y += svm.o pmu_amd.o
22 20
diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
deleted file mode 100644
index 308b8597c691..000000000000
--- a/arch/x86/kvm/assigned-dev.c
+++ /dev/null
@@ -1,1058 +0,0 @@
1/*
2 * Kernel-based Virtual Machine - device assignment support
3 *
4 * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2. See
7 * the COPYING file in the top-level directory.
8 *
9 */
10
11#include <linux/kvm_host.h>
12#include <linux/kvm.h>
13#include <linux/uaccess.h>
14#include <linux/vmalloc.h>
15#include <linux/errno.h>
16#include <linux/spinlock.h>
17#include <linux/pci.h>
18#include <linux/interrupt.h>
19#include <linux/slab.h>
20#include <linux/namei.h>
21#include <linux/fs.h>
22#include "irq.h"
23#include "assigned-dev.h"
24#include "trace/events/kvm.h"
25
26struct kvm_assigned_dev_kernel {
27 struct kvm_irq_ack_notifier ack_notifier;
28 struct list_head list;
29 int assigned_dev_id;
30 int host_segnr;
31 int host_busnr;
32 int host_devfn;
33 unsigned int entries_nr;
34 int host_irq;
35 bool host_irq_disabled;
36 bool pci_2_3;
37 struct msix_entry *host_msix_entries;
38 int guest_irq;
39 struct msix_entry *guest_msix_entries;
40 unsigned long irq_requested_type;
41 int irq_source_id;
42 int flags;
43 struct pci_dev *dev;
44 struct kvm *kvm;
45 spinlock_t intx_lock;
46 spinlock_t intx_mask_lock;
47 char irq_name[32];
48 struct pci_saved_state *pci_saved_state;
49};
50
51static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
52 int assigned_dev_id)
53{
54 struct kvm_assigned_dev_kernel *match;
55
56 list_for_each_entry(match, head, list) {
57 if (match->assigned_dev_id == assigned_dev_id)
58 return match;
59 }
60 return NULL;
61}
62
63static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
64 *assigned_dev, int irq)
65{
66 int i, index;
67 struct msix_entry *host_msix_entries;
68
69 host_msix_entries = assigned_dev->host_msix_entries;
70
71 index = -1;
72 for (i = 0; i < assigned_dev->entries_nr; i++)
73 if (irq == host_msix_entries[i].vector) {
74 index = i;
75 break;
76 }
77 if (index < 0)
78 printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
79
80 return index;
81}
82
83static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id)
84{
85 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
86 int ret;
87
88 spin_lock(&assigned_dev->intx_lock);
89 if (pci_check_and_mask_intx(assigned_dev->dev)) {
90 assigned_dev->host_irq_disabled = true;
91 ret = IRQ_WAKE_THREAD;
92 } else
93 ret = IRQ_NONE;
94 spin_unlock(&assigned_dev->intx_lock);
95
96 return ret;
97}
98
99static void
100kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev,
101 int vector)
102{
103 if (unlikely(assigned_dev->irq_requested_type &
104 KVM_DEV_IRQ_GUEST_INTX)) {
105 spin_lock(&assigned_dev->intx_mask_lock);
106 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))
107 kvm_set_irq(assigned_dev->kvm,
108 assigned_dev->irq_source_id, vector, 1,
109 false);
110 spin_unlock(&assigned_dev->intx_mask_lock);
111 } else
112 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
113 vector, 1, false);
114}
115
116static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
117{
118 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
119
120 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
121 spin_lock_irq(&assigned_dev->intx_lock);
122 disable_irq_nosync(irq);
123 assigned_dev->host_irq_disabled = true;
124 spin_unlock_irq(&assigned_dev->intx_lock);
125 }
126
127 kvm_assigned_dev_raise_guest_irq(assigned_dev,
128 assigned_dev->guest_irq);
129
130 return IRQ_HANDLED;
131}
132
133/*
134 * Deliver an IRQ in an atomic context if we can, or return a failure,
135 * user can retry in a process context.
136 * Return value:
137 * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
138 * Other values - No need to retry.
139 */
140static int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq,
141 int level)
142{
143 struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
144 struct kvm_kernel_irq_routing_entry *e;
145 int ret = -EINVAL;
146 int idx;
147
148 trace_kvm_set_irq(irq, level, irq_source_id);
149
150 /*
151 * Injection into either PIC or IOAPIC might need to scan all CPUs,
152 * which would need to be retried from thread context; when same GSI
153 * is connected to both PIC and IOAPIC, we'd have to report a
154 * partial failure here.
155 * Since there's no easy way to do this, we only support injecting MSI
156 * which is limited to 1:1 GSI mapping.
157 */
158 idx = srcu_read_lock(&kvm->irq_srcu);
159 if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
160 e = &entries[0];
161 ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id,
162 irq, level);
163 }
164 srcu_read_unlock(&kvm->irq_srcu, idx);
165 return ret;
166}
167
168
169static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
170{
171 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
172 int ret = kvm_set_irq_inatomic(assigned_dev->kvm,
173 assigned_dev->irq_source_id,
174 assigned_dev->guest_irq, 1);
175 return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
176}
177
178static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
179{
180 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
181
182 kvm_assigned_dev_raise_guest_irq(assigned_dev,
183 assigned_dev->guest_irq);
184
185 return IRQ_HANDLED;
186}
187
188static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
189{
190 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
191 int index = find_index_from_host_irq(assigned_dev, irq);
192 u32 vector;
193 int ret = 0;
194
195 if (index >= 0) {
196 vector = assigned_dev->guest_msix_entries[index].vector;
197 ret = kvm_set_irq_inatomic(assigned_dev->kvm,
198 assigned_dev->irq_source_id,
199 vector, 1);
200 }
201
202 return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
203}
204
205static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
206{
207 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
208 int index = find_index_from_host_irq(assigned_dev, irq);
209 u32 vector;
210
211 if (index >= 0) {
212 vector = assigned_dev->guest_msix_entries[index].vector;
213 kvm_assigned_dev_raise_guest_irq(assigned_dev, vector);
214 }
215
216 return IRQ_HANDLED;
217}
218
219/* Ack the irq line for an assigned device */
220static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
221{
222 struct kvm_assigned_dev_kernel *dev =
223 container_of(kian, struct kvm_assigned_dev_kernel,
224 ack_notifier);
225
226 kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false);
227
228 spin_lock(&dev->intx_mask_lock);
229
230 if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) {
231 bool reassert = false;
232
233 spin_lock_irq(&dev->intx_lock);
234 /*
235 * The guest IRQ may be shared so this ack can come from an
236 * IRQ for another guest device.
237 */
238 if (dev->host_irq_disabled) {
239 if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3))
240 enable_irq(dev->host_irq);
241 else if (!pci_check_and_unmask_intx(dev->dev))
242 reassert = true;
243 dev->host_irq_disabled = reassert;
244 }
245 spin_unlock_irq(&dev->intx_lock);
246
247 if (reassert)
248 kvm_set_irq(dev->kvm, dev->irq_source_id,
249 dev->guest_irq, 1, false);
250 }
251
252 spin_unlock(&dev->intx_mask_lock);
253}
254
255static void deassign_guest_irq(struct kvm *kvm,
256 struct kvm_assigned_dev_kernel *assigned_dev)
257{
258 if (assigned_dev->ack_notifier.gsi != -1)
259 kvm_unregister_irq_ack_notifier(kvm,
260 &assigned_dev->ack_notifier);
261
262 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
263 assigned_dev->guest_irq, 0, false);
264
265 if (assigned_dev->irq_source_id != -1)
266 kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
267 assigned_dev->irq_source_id = -1;
268 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
269}
270
271/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
272static void deassign_host_irq(struct kvm *kvm,
273 struct kvm_assigned_dev_kernel *assigned_dev)
274{
275 /*
276 * We disable irq here to prevent further events.
277 *
278 * Notice this maybe result in nested disable if the interrupt type is
279 * INTx, but it's OK for we are going to free it.
280 *
281 * If this function is a part of VM destroy, please ensure that till
282 * now, the kvm state is still legal for probably we also have to wait
283 * on a currently running IRQ handler.
284 */
285 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
286 int i;
287 for (i = 0; i < assigned_dev->entries_nr; i++)
288 disable_irq(assigned_dev->host_msix_entries[i].vector);
289
290 for (i = 0; i < assigned_dev->entries_nr; i++)
291 free_irq(assigned_dev->host_msix_entries[i].vector,
292 assigned_dev);
293
294 assigned_dev->entries_nr = 0;
295 kfree(assigned_dev->host_msix_entries);
296 kfree(assigned_dev->guest_msix_entries);
297 pci_disable_msix(assigned_dev->dev);
298 } else {
299 /* Deal with MSI and INTx */
300 if ((assigned_dev->irq_requested_type &
301 KVM_DEV_IRQ_HOST_INTX) &&
302 (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
303 spin_lock_irq(&assigned_dev->intx_lock);
304 pci_intx(assigned_dev->dev, false);
305 spin_unlock_irq(&assigned_dev->intx_lock);
306 synchronize_irq(assigned_dev->host_irq);
307 } else
308 disable_irq(assigned_dev->host_irq);
309
310 free_irq(assigned_dev->host_irq, assigned_dev);
311
312 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
313 pci_disable_msi(assigned_dev->dev);
314 }
315
316 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
317}
318
319static int kvm_deassign_irq(struct kvm *kvm,
320 struct kvm_assigned_dev_kernel *assigned_dev,
321 unsigned long irq_requested_type)
322{
323 unsigned long guest_irq_type, host_irq_type;
324
325 if (!irqchip_in_kernel(kvm))
326 return -EINVAL;
327 /* no irq assignment to deassign */
328 if (!assigned_dev->irq_requested_type)
329 return -ENXIO;
330
331 host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
332 guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
333
334 if (host_irq_type)
335 deassign_host_irq(kvm, assigned_dev);
336 if (guest_irq_type)
337 deassign_guest_irq(kvm, assigned_dev);
338
339 return 0;
340}
341
342static void kvm_free_assigned_irq(struct kvm *kvm,
343 struct kvm_assigned_dev_kernel *assigned_dev)
344{
345 kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
346}
347
348static void kvm_free_assigned_device(struct kvm *kvm,
349 struct kvm_assigned_dev_kernel
350 *assigned_dev)
351{
352 kvm_free_assigned_irq(kvm, assigned_dev);
353
354 pci_reset_function(assigned_dev->dev);
355 if (pci_load_and_free_saved_state(assigned_dev->dev,
356 &assigned_dev->pci_saved_state))
357 printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
358 __func__, dev_name(&assigned_dev->dev->dev));
359 else
360 pci_restore_state(assigned_dev->dev);
361
362 pci_clear_dev_assigned(assigned_dev->dev);
363
364 pci_release_regions(assigned_dev->dev);
365 pci_disable_device(assigned_dev->dev);
366 pci_dev_put(assigned_dev->dev);
367
368 list_del(&assigned_dev->list);
369 kfree(assigned_dev);
370}
371
372void kvm_free_all_assigned_devices(struct kvm *kvm)
373{
374 struct kvm_assigned_dev_kernel *assigned_dev, *tmp;
375
376 list_for_each_entry_safe(assigned_dev, tmp,
377 &kvm->arch.assigned_dev_head, list) {
378 kvm_free_assigned_device(kvm, assigned_dev);
379 }
380}
381
382static int assigned_device_enable_host_intx(struct kvm *kvm,
383 struct kvm_assigned_dev_kernel *dev)
384{
385 irq_handler_t irq_handler;
386 unsigned long flags;
387
388 dev->host_irq = dev->dev->irq;
389
390 /*
391 * We can only share the IRQ line with other host devices if we are
392 * able to disable the IRQ source at device-level - independently of
393 * the guest driver. Otherwise host devices may suffer from unbounded
394 * IRQ latencies when the guest keeps the line asserted.
395 */
396 if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
397 irq_handler = kvm_assigned_dev_intx;
398 flags = IRQF_SHARED;
399 } else {
400 irq_handler = NULL;
401 flags = IRQF_ONESHOT;
402 }
403 if (request_threaded_irq(dev->host_irq, irq_handler,
404 kvm_assigned_dev_thread_intx, flags,
405 dev->irq_name, dev))
406 return -EIO;
407
408 if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
409 spin_lock_irq(&dev->intx_lock);
410 pci_intx(dev->dev, true);
411 spin_unlock_irq(&dev->intx_lock);
412 }
413 return 0;
414}
415
416static int assigned_device_enable_host_msi(struct kvm *kvm,
417 struct kvm_assigned_dev_kernel *dev)
418{
419 int r;
420
421 if (!dev->dev->msi_enabled) {
422 r = pci_enable_msi(dev->dev);
423 if (r)
424 return r;
425 }
426
427 dev->host_irq = dev->dev->irq;
428 if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi,
429 kvm_assigned_dev_thread_msi, 0,
430 dev->irq_name, dev)) {
431 pci_disable_msi(dev->dev);
432 return -EIO;
433 }
434
435 return 0;
436}
437
438static int assigned_device_enable_host_msix(struct kvm *kvm,
439 struct kvm_assigned_dev_kernel *dev)
440{
441 int i, r = -EINVAL;
442
443 /* host_msix_entries and guest_msix_entries should have been
444 * initialized */
445 if (dev->entries_nr == 0)
446 return r;
447
448 r = pci_enable_msix_exact(dev->dev,
449 dev->host_msix_entries, dev->entries_nr);
450 if (r)
451 return r;
452
453 for (i = 0; i < dev->entries_nr; i++) {
454 r = request_threaded_irq(dev->host_msix_entries[i].vector,
455 kvm_assigned_dev_msix,
456 kvm_assigned_dev_thread_msix,
457 0, dev->irq_name, dev);
458 if (r)
459 goto err;
460 }
461
462 return 0;
463err:
464 for (i -= 1; i >= 0; i--)
465 free_irq(dev->host_msix_entries[i].vector, dev);
466 pci_disable_msix(dev->dev);
467 return r;
468}
469
470static int assigned_device_enable_guest_intx(struct kvm *kvm,
471 struct kvm_assigned_dev_kernel *dev,
472 struct kvm_assigned_irq *irq)
473{
474 dev->guest_irq = irq->guest_irq;
475 dev->ack_notifier.gsi = irq->guest_irq;
476 return 0;
477}
478
479static int assigned_device_enable_guest_msi(struct kvm *kvm,
480 struct kvm_assigned_dev_kernel *dev,
481 struct kvm_assigned_irq *irq)
482{
483 dev->guest_irq = irq->guest_irq;
484 dev->ack_notifier.gsi = -1;
485 return 0;
486}
487
488static int assigned_device_enable_guest_msix(struct kvm *kvm,
489 struct kvm_assigned_dev_kernel *dev,
490 struct kvm_assigned_irq *irq)
491{
492 dev->guest_irq = irq->guest_irq;
493 dev->ack_notifier.gsi = -1;
494 return 0;
495}
496
497static int assign_host_irq(struct kvm *kvm,
498 struct kvm_assigned_dev_kernel *dev,
499 __u32 host_irq_type)
500{
501 int r = -EEXIST;
502
503 if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
504 return r;
505
506 snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s",
507 pci_name(dev->dev));
508
509 switch (host_irq_type) {
510 case KVM_DEV_IRQ_HOST_INTX:
511 r = assigned_device_enable_host_intx(kvm, dev);
512 break;
513 case KVM_DEV_IRQ_HOST_MSI:
514 r = assigned_device_enable_host_msi(kvm, dev);
515 break;
516 case KVM_DEV_IRQ_HOST_MSIX:
517 r = assigned_device_enable_host_msix(kvm, dev);
518 break;
519 default:
520 r = -EINVAL;
521 }
522 dev->host_irq_disabled = false;
523
524 if (!r)
525 dev->irq_requested_type |= host_irq_type;
526
527 return r;
528}
529
530static int assign_guest_irq(struct kvm *kvm,
531 struct kvm_assigned_dev_kernel *dev,
532 struct kvm_assigned_irq *irq,
533 unsigned long guest_irq_type)
534{
535 int id;
536 int r = -EEXIST;
537
538 if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
539 return r;
540
541 id = kvm_request_irq_source_id(kvm);
542 if (id < 0)
543 return id;
544
545 dev->irq_source_id = id;
546
547 switch (guest_irq_type) {
548 case KVM_DEV_IRQ_GUEST_INTX:
549 r = assigned_device_enable_guest_intx(kvm, dev, irq);
550 break;
551 case KVM_DEV_IRQ_GUEST_MSI:
552 r = assigned_device_enable_guest_msi(kvm, dev, irq);
553 break;
554 case KVM_DEV_IRQ_GUEST_MSIX:
555 r = assigned_device_enable_guest_msix(kvm, dev, irq);
556 break;
557 default:
558 r = -EINVAL;
559 }
560
561 if (!r) {
562 dev->irq_requested_type |= guest_irq_type;
563 if (dev->ack_notifier.gsi != -1)
564 kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
565 } else {
566 kvm_free_irq_source_id(kvm, dev->irq_source_id);
567 dev->irq_source_id = -1;
568 }
569
570 return r;
571}
572
573/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
574static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
575 struct kvm_assigned_irq *assigned_irq)
576{
577 int r = -EINVAL;
578 struct kvm_assigned_dev_kernel *match;
579 unsigned long host_irq_type, guest_irq_type;
580
581 if (!irqchip_in_kernel(kvm))
582 return r;
583
584 mutex_lock(&kvm->lock);
585 r = -ENODEV;
586 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
587 assigned_irq->assigned_dev_id);
588 if (!match)
589 goto out;
590
591 host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
592 guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
593
594 r = -EINVAL;
595 /* can only assign one type at a time */
596 if (hweight_long(host_irq_type) > 1)
597 goto out;
598 if (hweight_long(guest_irq_type) > 1)
599 goto out;
600 if (host_irq_type == 0 && guest_irq_type == 0)
601 goto out;
602
603 r = 0;
604 if (host_irq_type)
605 r = assign_host_irq(kvm, match, host_irq_type);
606 if (r)
607 goto out;
608
609 if (guest_irq_type)
610 r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
611out:
612 mutex_unlock(&kvm->lock);
613 return r;
614}
615
616static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
617 struct kvm_assigned_irq
618 *assigned_irq)
619{
620 int r = -ENODEV;
621 struct kvm_assigned_dev_kernel *match;
622 unsigned long irq_type;
623
624 mutex_lock(&kvm->lock);
625
626 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
627 assigned_irq->assigned_dev_id);
628 if (!match)
629 goto out;
630
631 irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK |
632 KVM_DEV_IRQ_GUEST_MASK);
633 r = kvm_deassign_irq(kvm, match, irq_type);
634out:
635 mutex_unlock(&kvm->lock);
636 return r;
637}
638
639/*
640 * We want to test whether the caller has been granted permissions to
641 * use this device. To be able to configure and control the device,
642 * the user needs access to PCI configuration space and BAR resources.
643 * These are accessed through PCI sysfs. PCI config space is often
644 * passed to the process calling this ioctl via file descriptor, so we
645 * can't rely on access to that file. We can check for permissions
646 * on each of the BAR resource files, which is a pretty clear
647 * indicator that the user has been granted access to the device.
648 */
649static int probe_sysfs_permissions(struct pci_dev *dev)
650{
651#ifdef CONFIG_SYSFS
652 int i;
653 bool bar_found = false;
654
655 for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
656 char *kpath, *syspath;
657 struct path path;
658 struct inode *inode;
659 int r;
660
661 if (!pci_resource_len(dev, i))
662 continue;
663
664 kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
665 if (!kpath)
666 return -ENOMEM;
667
668 /* Per sysfs-rules, sysfs is always at /sys */
669 syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
670 kfree(kpath);
671 if (!syspath)
672 return -ENOMEM;
673
674 r = kern_path(syspath, LOOKUP_FOLLOW, &path);
675 kfree(syspath);
676 if (r)
677 return r;
678
679 inode = d_backing_inode(path.dentry);
680
681 r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
682 path_put(&path);
683 if (r)
684 return r;
685
686 bar_found = true;
687 }
688
689 /* If no resources, probably something special */
690 if (!bar_found)
691 return -EPERM;
692
693 return 0;
694#else
695 return -EINVAL; /* No way to control the device without sysfs */
696#endif
697}
698
699static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
700 struct kvm_assigned_pci_dev *assigned_dev)
701{
702 int r = 0, idx;
703 struct kvm_assigned_dev_kernel *match;
704 struct pci_dev *dev;
705
706 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
707 return -EINVAL;
708
709 mutex_lock(&kvm->lock);
710 idx = srcu_read_lock(&kvm->srcu);
711
712 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
713 assigned_dev->assigned_dev_id);
714 if (match) {
715 /* device already assigned */
716 r = -EEXIST;
717 goto out;
718 }
719
720 match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
721 if (match == NULL) {
722 printk(KERN_INFO "%s: Couldn't allocate memory\n",
723 __func__);
724 r = -ENOMEM;
725 goto out;
726 }
727 dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
728 assigned_dev->busnr,
729 assigned_dev->devfn);
730 if (!dev) {
731 printk(KERN_INFO "%s: host device not found\n", __func__);
732 r = -EINVAL;
733 goto out_free;
734 }
735
736 /* Don't allow bridges to be assigned */
737 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) {
738 r = -EPERM;
739 goto out_put;
740 }
741
742 r = probe_sysfs_permissions(dev);
743 if (r)
744 goto out_put;
745
746 if (pci_enable_device(dev)) {
747 printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
748 r = -EBUSY;
749 goto out_put;
750 }
751 r = pci_request_regions(dev, "kvm_assigned_device");
752 if (r) {
753 printk(KERN_INFO "%s: Could not get access to device regions\n",
754 __func__);
755 goto out_disable;
756 }
757
758 pci_reset_function(dev);
759 pci_save_state(dev);
760 match->pci_saved_state = pci_store_saved_state(dev);
761 if (!match->pci_saved_state)
762 printk(KERN_DEBUG "%s: Couldn't store %s saved state\n",
763 __func__, dev_name(&dev->dev));
764
765 if (!pci_intx_mask_supported(dev))
766 assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3;
767
768 match->assigned_dev_id = assigned_dev->assigned_dev_id;
769 match->host_segnr = assigned_dev->segnr;
770 match->host_busnr = assigned_dev->busnr;
771 match->host_devfn = assigned_dev->devfn;
772 match->flags = assigned_dev->flags;
773 match->dev = dev;
774 spin_lock_init(&match->intx_lock);
775 spin_lock_init(&match->intx_mask_lock);
776 match->irq_source_id = -1;
777 match->kvm = kvm;
778 match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
779
780 list_add(&match->list, &kvm->arch.assigned_dev_head);
781
782 if (!kvm->arch.iommu_domain) {
783 r = kvm_iommu_map_guest(kvm);
784 if (r)
785 goto out_list_del;
786 }
787 r = kvm_assign_device(kvm, match->dev);
788 if (r)
789 goto out_list_del;
790
791out:
792 srcu_read_unlock(&kvm->srcu, idx);
793 mutex_unlock(&kvm->lock);
794 return r;
795out_list_del:
796 if (pci_load_and_free_saved_state(dev, &match->pci_saved_state))
797 printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
798 __func__, dev_name(&dev->dev));
799 list_del(&match->list);
800 pci_release_regions(dev);
801out_disable:
802 pci_disable_device(dev);
803out_put:
804 pci_dev_put(dev);
805out_free:
806 kfree(match);
807 srcu_read_unlock(&kvm->srcu, idx);
808 mutex_unlock(&kvm->lock);
809 return r;
810}
811
812static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
813 struct kvm_assigned_pci_dev *assigned_dev)
814{
815 int r = 0;
816 struct kvm_assigned_dev_kernel *match;
817
818 mutex_lock(&kvm->lock);
819
820 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
821 assigned_dev->assigned_dev_id);
822 if (!match) {
823 printk(KERN_INFO "%s: device hasn't been assigned before, "
824 "so cannot be deassigned\n", __func__);
825 r = -EINVAL;
826 goto out;
827 }
828
829 kvm_deassign_device(kvm, match->dev);
830
831 kvm_free_assigned_device(kvm, match);
832
833out:
834 mutex_unlock(&kvm->lock);
835 return r;
836}
837
838
839static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
840 struct kvm_assigned_msix_nr *entry_nr)
841{
842 int r = 0;
843 struct kvm_assigned_dev_kernel *adev;
844
845 mutex_lock(&kvm->lock);
846
847 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
848 entry_nr->assigned_dev_id);
849 if (!adev) {
850 r = -EINVAL;
851 goto msix_nr_out;
852 }
853
854 if (adev->entries_nr == 0) {
855 adev->entries_nr = entry_nr->entry_nr;
856 if (adev->entries_nr == 0 ||
857 adev->entries_nr > KVM_MAX_MSIX_PER_DEV) {
858 r = -EINVAL;
859 goto msix_nr_out;
860 }
861
862 adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
863 entry_nr->entry_nr,
864 GFP_KERNEL);
865 if (!adev->host_msix_entries) {
866 r = -ENOMEM;
867 goto msix_nr_out;
868 }
869 adev->guest_msix_entries =
870 kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr,
871 GFP_KERNEL);
872 if (!adev->guest_msix_entries) {
873 kfree(adev->host_msix_entries);
874 r = -ENOMEM;
875 goto msix_nr_out;
876 }
877 } else /* Not allowed set MSI-X number twice */
878 r = -EINVAL;
879msix_nr_out:
880 mutex_unlock(&kvm->lock);
881 return r;
882}
883
884static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
885 struct kvm_assigned_msix_entry *entry)
886{
887 int r = 0, i;
888 struct kvm_assigned_dev_kernel *adev;
889
890 mutex_lock(&kvm->lock);
891
892 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
893 entry->assigned_dev_id);
894
895 if (!adev) {
896 r = -EINVAL;
897 goto msix_entry_out;
898 }
899
900 for (i = 0; i < adev->entries_nr; i++)
901 if (adev->guest_msix_entries[i].vector == 0 ||
902 adev->guest_msix_entries[i].entry == entry->entry) {
903 adev->guest_msix_entries[i].entry = entry->entry;
904 adev->guest_msix_entries[i].vector = entry->gsi;
905 adev->host_msix_entries[i].entry = entry->entry;
906 break;
907 }
908 if (i == adev->entries_nr) {
909 r = -ENOSPC;
910 goto msix_entry_out;
911 }
912
913msix_entry_out:
914 mutex_unlock(&kvm->lock);
915
916 return r;
917}
918
919static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
920 struct kvm_assigned_pci_dev *assigned_dev)
921{
922 int r = 0;
923 struct kvm_assigned_dev_kernel *match;
924
925 mutex_lock(&kvm->lock);
926
927 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
928 assigned_dev->assigned_dev_id);
929 if (!match) {
930 r = -ENODEV;
931 goto out;
932 }
933
934 spin_lock(&match->intx_mask_lock);
935
936 match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX;
937 match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX;
938
939 if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
940 if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) {
941 kvm_set_irq(match->kvm, match->irq_source_id,
942 match->guest_irq, 0, false);
943 /*
944 * Masking at hardware-level is performed on demand,
945 * i.e. when an IRQ actually arrives at the host.
946 */
947 } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
948 /*
949 * Unmask the IRQ line if required. Unmasking at
950 * device level will be performed by user space.
951 */
952 spin_lock_irq(&match->intx_lock);
953 if (match->host_irq_disabled) {
954 enable_irq(match->host_irq);
955 match->host_irq_disabled = false;
956 }
957 spin_unlock_irq(&match->intx_lock);
958 }
959 }
960
961 spin_unlock(&match->intx_mask_lock);
962
963out:
964 mutex_unlock(&kvm->lock);
965 return r;
966}
967
968long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
969 unsigned long arg)
970{
971 void __user *argp = (void __user *)arg;
972 int r;
973
974 switch (ioctl) {
975 case KVM_ASSIGN_PCI_DEVICE: {
976 struct kvm_assigned_pci_dev assigned_dev;
977
978 r = -EFAULT;
979 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
980 goto out;
981 r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
982 if (r)
983 goto out;
984 break;
985 }
986 case KVM_ASSIGN_IRQ: {
987 r = -EOPNOTSUPP;
988 break;
989 }
990 case KVM_ASSIGN_DEV_IRQ: {
991 struct kvm_assigned_irq assigned_irq;
992
993 r = -EFAULT;
994 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
995 goto out;
996 r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
997 if (r)
998 goto out;
999 break;
1000 }
1001 case KVM_DEASSIGN_DEV_IRQ: {
1002 struct kvm_assigned_irq assigned_irq;
1003
1004 r = -EFAULT;
1005 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
1006 goto out;
1007 r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
1008 if (r)
1009 goto out;
1010 break;
1011 }
1012 case KVM_DEASSIGN_PCI_DEVICE: {
1013 struct kvm_assigned_pci_dev assigned_dev;
1014
1015 r = -EFAULT;
1016 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1017 goto out;
1018 r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
1019 if (r)
1020 goto out;
1021 break;
1022 }
1023 case KVM_ASSIGN_SET_MSIX_NR: {
1024 struct kvm_assigned_msix_nr entry_nr;
1025 r = -EFAULT;
1026 if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
1027 goto out;
1028 r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
1029 if (r)
1030 goto out;
1031 break;
1032 }
1033 case KVM_ASSIGN_SET_MSIX_ENTRY: {
1034 struct kvm_assigned_msix_entry entry;
1035 r = -EFAULT;
1036 if (copy_from_user(&entry, argp, sizeof entry))
1037 goto out;
1038 r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
1039 if (r)
1040 goto out;
1041 break;
1042 }
1043 case KVM_ASSIGN_SET_INTX_MASK: {
1044 struct kvm_assigned_pci_dev assigned_dev;
1045
1046 r = -EFAULT;
1047 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1048 goto out;
1049 r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
1050 break;
1051 }
1052 default:
1053 r = -ENOTTY;
1054 break;
1055 }
1056out:
1057 return r;
1058}
diff --git a/arch/x86/kvm/assigned-dev.h b/arch/x86/kvm/assigned-dev.h
deleted file mode 100644
index a428c1a211b2..000000000000
--- a/arch/x86/kvm/assigned-dev.h
+++ /dev/null
@@ -1,32 +0,0 @@
1#ifndef ARCH_X86_KVM_ASSIGNED_DEV_H
2#define ARCH_X86_KVM_ASSIGNED_DEV_H
3
4#include <linux/kvm_host.h>
5
6#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
7int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev);
8int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev);
9
10int kvm_iommu_map_guest(struct kvm *kvm);
11int kvm_iommu_unmap_guest(struct kvm *kvm);
12
13long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
14 unsigned long arg);
15
16void kvm_free_all_assigned_devices(struct kvm *kvm);
17#else
18static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
19{
20 return 0;
21}
22
23static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
24 unsigned long arg)
25{
26 return -ENOTTY;
27}
28
29static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
30#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */
31
32#endif /* ARCH_X86_KVM_ASSIGNED_DEV_H */
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 73ea24d4f119..bdcd4139eca9 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -49,7 +49,7 @@ static void pic_unlock(struct kvm_pic *s)
49 __releases(&s->lock) 49 __releases(&s->lock)
50{ 50{
51 bool wakeup = s->wakeup_needed; 51 bool wakeup = s->wakeup_needed;
52 struct kvm_vcpu *vcpu, *found = NULL; 52 struct kvm_vcpu *vcpu;
53 int i; 53 int i;
54 54
55 s->wakeup_needed = false; 55 s->wakeup_needed = false;
@@ -59,16 +59,11 @@ static void pic_unlock(struct kvm_pic *s)
59 if (wakeup) { 59 if (wakeup) {
60 kvm_for_each_vcpu(i, vcpu, s->kvm) { 60 kvm_for_each_vcpu(i, vcpu, s->kvm) {
61 if (kvm_apic_accept_pic_intr(vcpu)) { 61 if (kvm_apic_accept_pic_intr(vcpu)) {
62 found = vcpu; 62 kvm_make_request(KVM_REQ_EVENT, vcpu);
63 break; 63 kvm_vcpu_kick(vcpu);
64 return;
64 } 65 }
65 } 66 }
66
67 if (!found)
68 return;
69
70 kvm_make_request(KVM_REQ_EVENT, found);
71 kvm_vcpu_kick(found);
72 } 67 }
73} 68}
74 69
@@ -239,7 +234,7 @@ static inline void pic_intack(struct kvm_kpic_state *s, int irq)
239int kvm_pic_read_irq(struct kvm *kvm) 234int kvm_pic_read_irq(struct kvm *kvm)
240{ 235{
241 int irq, irq2, intno; 236 int irq, irq2, intno;
242 struct kvm_pic *s = pic_irqchip(kvm); 237 struct kvm_pic *s = kvm->arch.vpic;
243 238
244 s->output = 0; 239 s->output = 0;
245 240
@@ -273,7 +268,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
273 return intno; 268 return intno;
274} 269}
275 270
276void kvm_pic_reset(struct kvm_kpic_state *s) 271static void kvm_pic_reset(struct kvm_kpic_state *s)
277{ 272{
278 int irq, i; 273 int irq, i;
279 struct kvm_vcpu *vcpu; 274 struct kvm_vcpu *vcpu;
@@ -422,19 +417,16 @@ static u32 pic_poll_read(struct kvm_kpic_state *s, u32 addr1)
422 return ret; 417 return ret;
423} 418}
424 419
425static u32 pic_ioport_read(void *opaque, u32 addr1) 420static u32 pic_ioport_read(void *opaque, u32 addr)
426{ 421{
427 struct kvm_kpic_state *s = opaque; 422 struct kvm_kpic_state *s = opaque;
428 unsigned int addr;
429 int ret; 423 int ret;
430 424
431 addr = addr1;
432 addr &= 1;
433 if (s->poll) { 425 if (s->poll) {
434 ret = pic_poll_read(s, addr1); 426 ret = pic_poll_read(s, addr);
435 s->poll = 0; 427 s->poll = 0;
436 } else 428 } else
437 if (addr == 0) 429 if ((addr & 1) == 0)
438 if (s->read_reg_select) 430 if (s->read_reg_select)
439 ret = s->isr; 431 ret = s->isr;
440 else 432 else
@@ -456,76 +448,64 @@ static u32 elcr_ioport_read(void *opaque, u32 addr1)
456 return s->elcr; 448 return s->elcr;
457} 449}
458 450
459static int picdev_in_range(gpa_t addr)
460{
461 switch (addr) {
462 case 0x20:
463 case 0x21:
464 case 0xa0:
465 case 0xa1:
466 case 0x4d0:
467 case 0x4d1:
468 return 1;
469 default:
470 return 0;
471 }
472}
473
474static int picdev_write(struct kvm_pic *s, 451static int picdev_write(struct kvm_pic *s,
475 gpa_t addr, int len, const void *val) 452 gpa_t addr, int len, const void *val)
476{ 453{
477 unsigned char data = *(unsigned char *)val; 454 unsigned char data = *(unsigned char *)val;
478 if (!picdev_in_range(addr))
479 return -EOPNOTSUPP;
480 455
481 if (len != 1) { 456 if (len != 1) {
482 pr_pic_unimpl("non byte write\n"); 457 pr_pic_unimpl("non byte write\n");
483 return 0; 458 return 0;
484 } 459 }
485 pic_lock(s);
486 switch (addr) { 460 switch (addr) {
487 case 0x20: 461 case 0x20:
488 case 0x21: 462 case 0x21:
489 case 0xa0: 463 case 0xa0:
490 case 0xa1: 464 case 0xa1:
465 pic_lock(s);
491 pic_ioport_write(&s->pics[addr >> 7], addr, data); 466 pic_ioport_write(&s->pics[addr >> 7], addr, data);
467 pic_unlock(s);
492 break; 468 break;
493 case 0x4d0: 469 case 0x4d0:
494 case 0x4d1: 470 case 0x4d1:
471 pic_lock(s);
495 elcr_ioport_write(&s->pics[addr & 1], addr, data); 472 elcr_ioport_write(&s->pics[addr & 1], addr, data);
473 pic_unlock(s);
496 break; 474 break;
475 default:
476 return -EOPNOTSUPP;
497 } 477 }
498 pic_unlock(s);
499 return 0; 478 return 0;
500} 479}
501 480
502static int picdev_read(struct kvm_pic *s, 481static int picdev_read(struct kvm_pic *s,
503 gpa_t addr, int len, void *val) 482 gpa_t addr, int len, void *val)
504{ 483{
505 unsigned char data = 0; 484 unsigned char *data = (unsigned char *)val;
506 if (!picdev_in_range(addr))
507 return -EOPNOTSUPP;
508 485
509 if (len != 1) { 486 if (len != 1) {
510 memset(val, 0, len); 487 memset(val, 0, len);
511 pr_pic_unimpl("non byte read\n"); 488 pr_pic_unimpl("non byte read\n");
512 return 0; 489 return 0;
513 } 490 }
514 pic_lock(s);
515 switch (addr) { 491 switch (addr) {
516 case 0x20: 492 case 0x20:
517 case 0x21: 493 case 0x21:
518 case 0xa0: 494 case 0xa0:
519 case 0xa1: 495 case 0xa1:
520 data = pic_ioport_read(&s->pics[addr >> 7], addr); 496 pic_lock(s);
497 *data = pic_ioport_read(&s->pics[addr >> 7], addr);
498 pic_unlock(s);
521 break; 499 break;
522 case 0x4d0: 500 case 0x4d0:
523 case 0x4d1: 501 case 0x4d1:
524 data = elcr_ioport_read(&s->pics[addr & 1], addr); 502 pic_lock(s);
503 *data = elcr_ioport_read(&s->pics[addr & 1], addr);
504 pic_unlock(s);
525 break; 505 break;
506 default:
507 return -EOPNOTSUPP;
526 } 508 }
527 *(unsigned char *)val = data;
528 pic_unlock(s);
529 return 0; 509 return 0;
530} 510}
531 511
@@ -576,7 +556,7 @@ static int picdev_eclr_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
576 */ 556 */
577static void pic_irq_request(struct kvm *kvm, int level) 557static void pic_irq_request(struct kvm *kvm, int level)
578{ 558{
579 struct kvm_pic *s = pic_irqchip(kvm); 559 struct kvm_pic *s = kvm->arch.vpic;
580 560
581 if (!s->output) 561 if (!s->output)
582 s->wakeup_needed = true; 562 s->wakeup_needed = true;
@@ -657,9 +637,14 @@ void kvm_pic_destroy(struct kvm *kvm)
657{ 637{
658 struct kvm_pic *vpic = kvm->arch.vpic; 638 struct kvm_pic *vpic = kvm->arch.vpic;
659 639
640 if (!vpic)
641 return;
642
643 mutex_lock(&kvm->slots_lock);
660 kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master); 644 kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master);
661 kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave); 645 kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave);
662 kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr); 646 kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr);
647 mutex_unlock(&kvm->slots_lock);
663 648
664 kvm->arch.vpic = NULL; 649 kvm->arch.vpic = NULL;
665 kfree(vpic); 650 kfree(vpic);
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 6e219e5c07d2..bdff437acbcb 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -266,11 +266,9 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
266 spin_unlock(&ioapic->lock); 266 spin_unlock(&ioapic->lock);
267} 267}
268 268
269void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) 269void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm)
270{ 270{
271 struct kvm_ioapic *ioapic = kvm->arch.vioapic; 271 if (!ioapic_in_kernel(kvm))
272
273 if (!ioapic)
274 return; 272 return;
275 kvm_make_scan_ioapic_request(kvm); 273 kvm_make_scan_ioapic_request(kvm);
276} 274}
@@ -315,7 +313,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
315 if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG 313 if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
316 && ioapic->irr & (1 << index)) 314 && ioapic->irr & (1 << index))
317 ioapic_service(ioapic, index, false); 315 ioapic_service(ioapic, index, false);
318 kvm_vcpu_request_scan_ioapic(ioapic->kvm); 316 kvm_make_scan_ioapic_request(ioapic->kvm);
319 break; 317 break;
320 } 318 }
321} 319}
@@ -624,10 +622,8 @@ int kvm_ioapic_init(struct kvm *kvm)
624 if (ret < 0) { 622 if (ret < 0) {
625 kvm->arch.vioapic = NULL; 623 kvm->arch.vioapic = NULL;
626 kfree(ioapic); 624 kfree(ioapic);
627 return ret;
628 } 625 }
629 626
630 kvm_vcpu_request_scan_ioapic(kvm);
631 return ret; 627 return ret;
632} 628}
633 629
@@ -635,37 +631,36 @@ void kvm_ioapic_destroy(struct kvm *kvm)
635{ 631{
636 struct kvm_ioapic *ioapic = kvm->arch.vioapic; 632 struct kvm_ioapic *ioapic = kvm->arch.vioapic;
637 633
634 if (!ioapic)
635 return;
636
638 cancel_delayed_work_sync(&ioapic->eoi_inject); 637 cancel_delayed_work_sync(&ioapic->eoi_inject);
638 mutex_lock(&kvm->slots_lock);
639 kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); 639 kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
640 mutex_unlock(&kvm->slots_lock);
640 kvm->arch.vioapic = NULL; 641 kvm->arch.vioapic = NULL;
641 kfree(ioapic); 642 kfree(ioapic);
642} 643}
643 644
644int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) 645void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
645{ 646{
646 struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); 647 struct kvm_ioapic *ioapic = kvm->arch.vioapic;
647 if (!ioapic)
648 return -EINVAL;
649 648
650 spin_lock(&ioapic->lock); 649 spin_lock(&ioapic->lock);
651 memcpy(state, ioapic, sizeof(struct kvm_ioapic_state)); 650 memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
652 state->irr &= ~ioapic->irr_delivered; 651 state->irr &= ~ioapic->irr_delivered;
653 spin_unlock(&ioapic->lock); 652 spin_unlock(&ioapic->lock);
654 return 0;
655} 653}
656 654
657int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) 655void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
658{ 656{
659 struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); 657 struct kvm_ioapic *ioapic = kvm->arch.vioapic;
660 if (!ioapic)
661 return -EINVAL;
662 658
663 spin_lock(&ioapic->lock); 659 spin_lock(&ioapic->lock);
664 memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); 660 memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
665 ioapic->irr = 0; 661 ioapic->irr = 0;
666 ioapic->irr_delivered = 0; 662 ioapic->irr_delivered = 0;
667 kvm_vcpu_request_scan_ioapic(kvm); 663 kvm_make_scan_ioapic_request(kvm);
668 kvm_ioapic_inject_all(ioapic, state->irr); 664 kvm_ioapic_inject_all(ioapic, state->irr);
669 spin_unlock(&ioapic->lock); 665 spin_unlock(&ioapic->lock);
670 return 0;
671} 666}
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index 1cc6e54436db..29ce19732ccf 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -105,17 +105,13 @@ do { \
105#define ASSERT(x) do { } while (0) 105#define ASSERT(x) do { } while (0)
106#endif 106#endif
107 107
108static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
109{
110 return kvm->arch.vioapic;
111}
112
113static inline int ioapic_in_kernel(struct kvm *kvm) 108static inline int ioapic_in_kernel(struct kvm *kvm)
114{ 109{
115 int ret; 110 int mode = kvm->arch.irqchip_mode;
116 111
117 ret = (ioapic_irqchip(kvm) != NULL); 112 /* Matches smp_wmb() when setting irqchip_mode */
118 return ret; 113 smp_rmb();
114 return mode == KVM_IRQCHIP_KERNEL;
119} 115}
120 116
121void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); 117void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
@@ -132,8 +128,8 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
132int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, 128int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
133 struct kvm_lapic_irq *irq, 129 struct kvm_lapic_irq *irq,
134 struct dest_map *dest_map); 130 struct dest_map *dest_map);
135int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); 131void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
136int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); 132void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
137void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, 133void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu,
138 ulong *ioapic_handled_vectors); 134 ulong *ioapic_handled_vectors);
139void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, 135void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c
deleted file mode 100644
index b181426f67b4..000000000000
--- a/arch/x86/kvm/iommu.c
+++ /dev/null
@@ -1,356 +0,0 @@
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Copyright IBM Corporation, 2008
19 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
20 *
21 * Author: Allen M. Kay <allen.m.kay@intel.com>
22 * Author: Weidong Han <weidong.han@intel.com>
23 * Author: Ben-Ami Yassour <benami@il.ibm.com>
24 */
25
26#include <linux/list.h>
27#include <linux/kvm_host.h>
28#include <linux/moduleparam.h>
29#include <linux/pci.h>
30#include <linux/stat.h>
31#include <linux/iommu.h>
32#include "assigned-dev.h"
33
34static bool allow_unsafe_assigned_interrupts;
35module_param_named(allow_unsafe_assigned_interrupts,
36 allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR);
37MODULE_PARM_DESC(allow_unsafe_assigned_interrupts,
38 "Enable device assignment on platforms without interrupt remapping support.");
39
40static int kvm_iommu_unmap_memslots(struct kvm *kvm);
41static void kvm_iommu_put_pages(struct kvm *kvm,
42 gfn_t base_gfn, unsigned long npages);
43
44static kvm_pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
45 unsigned long npages)
46{
47 gfn_t end_gfn;
48 kvm_pfn_t pfn;
49
50 pfn = gfn_to_pfn_memslot(slot, gfn);
51 end_gfn = gfn + npages;
52 gfn += 1;
53
54 if (is_error_noslot_pfn(pfn))
55 return pfn;
56
57 while (gfn < end_gfn)
58 gfn_to_pfn_memslot(slot, gfn++);
59
60 return pfn;
61}
62
63static void kvm_unpin_pages(struct kvm *kvm, kvm_pfn_t pfn,
64 unsigned long npages)
65{
66 unsigned long i;
67
68 for (i = 0; i < npages; ++i)
69 kvm_release_pfn_clean(pfn + i);
70}
71
72int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
73{
74 gfn_t gfn, end_gfn;
75 kvm_pfn_t pfn;
76 int r = 0;
77 struct iommu_domain *domain = kvm->arch.iommu_domain;
78 int flags;
79
80 /* check if iommu exists and in use */
81 if (!domain)
82 return 0;
83
84 gfn = slot->base_gfn;
85 end_gfn = gfn + slot->npages;
86
87 flags = IOMMU_READ;
88 if (!(slot->flags & KVM_MEM_READONLY))
89 flags |= IOMMU_WRITE;
90 if (!kvm->arch.iommu_noncoherent)
91 flags |= IOMMU_CACHE;
92
93
94 while (gfn < end_gfn) {
95 unsigned long page_size;
96
97 /* Check if already mapped */
98 if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) {
99 gfn += 1;
100 continue;
101 }
102
103 /* Get the page size we could use to map */
104 page_size = kvm_host_page_size(kvm, gfn);
105
106 /* Make sure the page_size does not exceed the memslot */
107 while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn)
108 page_size >>= 1;
109
110 /* Make sure gfn is aligned to the page size we want to map */
111 while ((gfn << PAGE_SHIFT) & (page_size - 1))
112 page_size >>= 1;
113
114 /* Make sure hva is aligned to the page size we want to map */
115 while (__gfn_to_hva_memslot(slot, gfn) & (page_size - 1))
116 page_size >>= 1;
117
118 /*
119 * Pin all pages we are about to map in memory. This is
120 * important because we unmap and unpin in 4kb steps later.
121 */
122 pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT);
123 if (is_error_noslot_pfn(pfn)) {
124 gfn += 1;
125 continue;
126 }
127
128 /* Map into IO address space */
129 r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn),
130 page_size, flags);
131 if (r) {
132 printk(KERN_ERR "kvm_iommu_map_address:"
133 "iommu failed to map pfn=%llx\n", pfn);
134 kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT);
135 goto unmap_pages;
136 }
137
138 gfn += page_size >> PAGE_SHIFT;
139
140 cond_resched();
141 }
142
143 return 0;
144
145unmap_pages:
146 kvm_iommu_put_pages(kvm, slot->base_gfn, gfn - slot->base_gfn);
147 return r;
148}
149
150static int kvm_iommu_map_memslots(struct kvm *kvm)
151{
152 int idx, r = 0;
153 struct kvm_memslots *slots;
154 struct kvm_memory_slot *memslot;
155
156 if (kvm->arch.iommu_noncoherent)
157 kvm_arch_register_noncoherent_dma(kvm);
158
159 idx = srcu_read_lock(&kvm->srcu);
160 slots = kvm_memslots(kvm);
161
162 kvm_for_each_memslot(memslot, slots) {
163 r = kvm_iommu_map_pages(kvm, memslot);
164 if (r)
165 break;
166 }
167 srcu_read_unlock(&kvm->srcu, idx);
168
169 return r;
170}
171
172int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev)
173{
174 struct iommu_domain *domain = kvm->arch.iommu_domain;
175 int r;
176 bool noncoherent;
177
178 /* check if iommu exists and in use */
179 if (!domain)
180 return 0;
181
182 if (pdev == NULL)
183 return -ENODEV;
184
185 r = iommu_attach_device(domain, &pdev->dev);
186 if (r) {
187 dev_err(&pdev->dev, "kvm assign device failed ret %d", r);
188 return r;
189 }
190
191 noncoherent = !iommu_capable(&pci_bus_type, IOMMU_CAP_CACHE_COHERENCY);
192
193 /* Check if need to update IOMMU page table for guest memory */
194 if (noncoherent != kvm->arch.iommu_noncoherent) {
195 kvm_iommu_unmap_memslots(kvm);
196 kvm->arch.iommu_noncoherent = noncoherent;
197 r = kvm_iommu_map_memslots(kvm);
198 if (r)
199 goto out_unmap;
200 }
201
202 kvm_arch_start_assignment(kvm);
203 pci_set_dev_assigned(pdev);
204
205 dev_info(&pdev->dev, "kvm assign device\n");
206
207 return 0;
208out_unmap:
209 kvm_iommu_unmap_memslots(kvm);
210 return r;
211}
212
213int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev)
214{
215 struct iommu_domain *domain = kvm->arch.iommu_domain;
216
217 /* check if iommu exists and in use */
218 if (!domain)
219 return 0;
220
221 if (pdev == NULL)
222 return -ENODEV;
223
224 iommu_detach_device(domain, &pdev->dev);
225
226 pci_clear_dev_assigned(pdev);
227 kvm_arch_end_assignment(kvm);
228
229 dev_info(&pdev->dev, "kvm deassign device\n");
230
231 return 0;
232}
233
234int kvm_iommu_map_guest(struct kvm *kvm)
235{
236 int r;
237
238 if (!iommu_present(&pci_bus_type)) {
239 printk(KERN_ERR "%s: iommu not found\n", __func__);
240 return -ENODEV;
241 }
242
243 mutex_lock(&kvm->slots_lock);
244
245 kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type);
246 if (!kvm->arch.iommu_domain) {
247 r = -ENOMEM;
248 goto out_unlock;
249 }
250
251 if (!allow_unsafe_assigned_interrupts &&
252 !iommu_capable(&pci_bus_type, IOMMU_CAP_INTR_REMAP)) {
253 printk(KERN_WARNING "%s: No interrupt remapping support,"
254 " disallowing device assignment."
255 " Re-enable with \"allow_unsafe_assigned_interrupts=1\""
256 " module option.\n", __func__);
257 iommu_domain_free(kvm->arch.iommu_domain);
258 kvm->arch.iommu_domain = NULL;
259 r = -EPERM;
260 goto out_unlock;
261 }
262
263 r = kvm_iommu_map_memslots(kvm);
264 if (r)
265 kvm_iommu_unmap_memslots(kvm);
266
267out_unlock:
268 mutex_unlock(&kvm->slots_lock);
269 return r;
270}
271
272static void kvm_iommu_put_pages(struct kvm *kvm,
273 gfn_t base_gfn, unsigned long npages)
274{
275 struct iommu_domain *domain;
276 gfn_t end_gfn, gfn;
277 kvm_pfn_t pfn;
278 u64 phys;
279
280 domain = kvm->arch.iommu_domain;
281 end_gfn = base_gfn + npages;
282 gfn = base_gfn;
283
284 /* check if iommu exists and in use */
285 if (!domain)
286 return;
287
288 while (gfn < end_gfn) {
289 unsigned long unmap_pages;
290 size_t size;
291
292 /* Get physical address */
293 phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
294
295 if (!phys) {
296 gfn++;
297 continue;
298 }
299
300 pfn = phys >> PAGE_SHIFT;
301
302 /* Unmap address from IO address space */
303 size = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE);
304 unmap_pages = 1ULL << get_order(size);
305
306 /* Unpin all pages we just unmapped to not leak any memory */
307 kvm_unpin_pages(kvm, pfn, unmap_pages);
308
309 gfn += unmap_pages;
310
311 cond_resched();
312 }
313}
314
315void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
316{
317 kvm_iommu_put_pages(kvm, slot->base_gfn, slot->npages);
318}
319
320static int kvm_iommu_unmap_memslots(struct kvm *kvm)
321{
322 int idx;
323 struct kvm_memslots *slots;
324 struct kvm_memory_slot *memslot;
325
326 idx = srcu_read_lock(&kvm->srcu);
327 slots = kvm_memslots(kvm);
328
329 kvm_for_each_memslot(memslot, slots)
330 kvm_iommu_unmap_pages(kvm, memslot);
331
332 srcu_read_unlock(&kvm->srcu, idx);
333
334 if (kvm->arch.iommu_noncoherent)
335 kvm_arch_unregister_noncoherent_dma(kvm);
336
337 return 0;
338}
339
340int kvm_iommu_unmap_guest(struct kvm *kvm)
341{
342 struct iommu_domain *domain = kvm->arch.iommu_domain;
343
344 /* check if iommu exists and in use */
345 if (!domain)
346 return 0;
347
348 mutex_lock(&kvm->slots_lock);
349 kvm_iommu_unmap_memslots(kvm);
350 kvm->arch.iommu_domain = NULL;
351 kvm->arch.iommu_noncoherent = false;
352 mutex_unlock(&kvm->slots_lock);
353
354 iommu_domain_free(domain);
355 return 0;
356}
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 60d91c9d160c..5c24811e8b0b 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -60,7 +60,7 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v)
60 if (irqchip_split(v->kvm)) 60 if (irqchip_split(v->kvm))
61 return pending_userspace_extint(v); 61 return pending_userspace_extint(v);
62 else 62 else
63 return pic_irqchip(v->kvm)->output; 63 return v->kvm->arch.vpic->output;
64 } else 64 } else
65 return 0; 65 return 0;
66} 66}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 40d5b2cf6061..0edd22c3344c 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -78,40 +78,42 @@ void kvm_pic_destroy(struct kvm *kvm);
78int kvm_pic_read_irq(struct kvm *kvm); 78int kvm_pic_read_irq(struct kvm *kvm);
79void kvm_pic_update_irq(struct kvm_pic *s); 79void kvm_pic_update_irq(struct kvm_pic *s);
80 80
81static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
82{
83 return kvm->arch.vpic;
84}
85
86static inline int pic_in_kernel(struct kvm *kvm) 81static inline int pic_in_kernel(struct kvm *kvm)
87{ 82{
88 int ret; 83 int mode = kvm->arch.irqchip_mode;
89 84
90 ret = (pic_irqchip(kvm) != NULL); 85 /* Matches smp_wmb() when setting irqchip_mode */
91 return ret; 86 smp_rmb();
87 return mode == KVM_IRQCHIP_KERNEL;
92} 88}
93 89
94static inline int irqchip_split(struct kvm *kvm) 90static inline int irqchip_split(struct kvm *kvm)
95{ 91{
96 return kvm->arch.irqchip_mode == KVM_IRQCHIP_SPLIT; 92 int mode = kvm->arch.irqchip_mode;
93
94 /* Matches smp_wmb() when setting irqchip_mode */
95 smp_rmb();
96 return mode == KVM_IRQCHIP_SPLIT;
97} 97}
98 98
99static inline int irqchip_kernel(struct kvm *kvm) 99static inline int irqchip_kernel(struct kvm *kvm)
100{ 100{
101 return kvm->arch.irqchip_mode == KVM_IRQCHIP_KERNEL; 101 int mode = kvm->arch.irqchip_mode;
102
103 /* Matches smp_wmb() when setting irqchip_mode */
104 smp_rmb();
105 return mode == KVM_IRQCHIP_KERNEL;
102} 106}
103 107
104static inline int irqchip_in_kernel(struct kvm *kvm) 108static inline int irqchip_in_kernel(struct kvm *kvm)
105{ 109{
106 bool ret = kvm->arch.irqchip_mode != KVM_IRQCHIP_NONE; 110 int mode = kvm->arch.irqchip_mode;
107 111
108 /* Matches with wmb after initializing kvm->irq_routing. */ 112 /* Matches smp_wmb() when setting irqchip_mode */
109 smp_rmb(); 113 smp_rmb();
110 return ret; 114 return mode > KVM_IRQCHIP_INIT_IN_PROGRESS;
111} 115}
112 116
113void kvm_pic_reset(struct kvm_kpic_state *s);
114
115void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); 117void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
116void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); 118void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
117void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); 119void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 6825cd36d13b..4517a4c2ac3a 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -42,7 +42,7 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
42 struct kvm *kvm, int irq_source_id, int level, 42 struct kvm *kvm, int irq_source_id, int level,
43 bool line_status) 43 bool line_status)
44{ 44{
45 struct kvm_pic *pic = pic_irqchip(kvm); 45 struct kvm_pic *pic = kvm->arch.vpic;
46 return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level); 46 return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level);
47} 47}
48 48
@@ -232,11 +232,11 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
232 goto unlock; 232 goto unlock;
233 } 233 }
234 clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); 234 clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
235 if (!ioapic_in_kernel(kvm)) 235 if (!irqchip_kernel(kvm))
236 goto unlock; 236 goto unlock;
237 237
238 kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id); 238 kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
239 kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id); 239 kvm_pic_clear_all(kvm->arch.vpic, irq_source_id);
240unlock: 240unlock:
241 mutex_unlock(&kvm->irq_lock); 241 mutex_unlock(&kvm->irq_lock);
242} 242}
@@ -278,38 +278,35 @@ int kvm_set_routing_entry(struct kvm *kvm,
278 struct kvm_kernel_irq_routing_entry *e, 278 struct kvm_kernel_irq_routing_entry *e,
279 const struct kvm_irq_routing_entry *ue) 279 const struct kvm_irq_routing_entry *ue)
280{ 280{
281 int r = -EINVAL; 281 /* also allow creation of routes during KVM_IRQCHIP_INIT_IN_PROGRESS */
282 int delta; 282 if (kvm->arch.irqchip_mode == KVM_IRQCHIP_NONE)
283 unsigned max_pin; 283 return -EINVAL;
284 284
285 /* Matches smp_wmb() when setting irqchip_mode */
286 smp_rmb();
285 switch (ue->type) { 287 switch (ue->type) {
286 case KVM_IRQ_ROUTING_IRQCHIP: 288 case KVM_IRQ_ROUTING_IRQCHIP:
287 delta = 0; 289 if (irqchip_split(kvm))
290 return -EINVAL;
291 e->irqchip.pin = ue->u.irqchip.pin;
288 switch (ue->u.irqchip.irqchip) { 292 switch (ue->u.irqchip.irqchip) {
289 case KVM_IRQCHIP_PIC_SLAVE: 293 case KVM_IRQCHIP_PIC_SLAVE:
290 delta = 8; 294 e->irqchip.pin += PIC_NUM_PINS / 2;
291 /* fall through */ 295 /* fall through */
292 case KVM_IRQCHIP_PIC_MASTER: 296 case KVM_IRQCHIP_PIC_MASTER:
293 if (!pic_in_kernel(kvm)) 297 if (ue->u.irqchip.pin >= PIC_NUM_PINS / 2)
294 goto out; 298 return -EINVAL;
295
296 e->set = kvm_set_pic_irq; 299 e->set = kvm_set_pic_irq;
297 max_pin = PIC_NUM_PINS;
298 break; 300 break;
299 case KVM_IRQCHIP_IOAPIC: 301 case KVM_IRQCHIP_IOAPIC:
300 if (!ioapic_in_kernel(kvm)) 302 if (ue->u.irqchip.pin >= KVM_IOAPIC_NUM_PINS)
301 goto out; 303 return -EINVAL;
302
303 max_pin = KVM_IOAPIC_NUM_PINS;
304 e->set = kvm_set_ioapic_irq; 304 e->set = kvm_set_ioapic_irq;
305 break; 305 break;
306 default: 306 default:
307 goto out; 307 return -EINVAL;
308 } 308 }
309 e->irqchip.irqchip = ue->u.irqchip.irqchip; 309 e->irqchip.irqchip = ue->u.irqchip.irqchip;
310 e->irqchip.pin = ue->u.irqchip.pin + delta;
311 if (e->irqchip.pin >= max_pin)
312 goto out;
313 break; 310 break;
314 case KVM_IRQ_ROUTING_MSI: 311 case KVM_IRQ_ROUTING_MSI:
315 e->set = kvm_set_msi; 312 e->set = kvm_set_msi;
@@ -318,7 +315,7 @@ int kvm_set_routing_entry(struct kvm *kvm,
318 e->msi.data = ue->u.msi.data; 315 e->msi.data = ue->u.msi.data;
319 316
320 if (kvm_msi_route_invalid(kvm, e)) 317 if (kvm_msi_route_invalid(kvm, e))
321 goto out; 318 return -EINVAL;
322 break; 319 break;
323 case KVM_IRQ_ROUTING_HV_SINT: 320 case KVM_IRQ_ROUTING_HV_SINT:
324 e->set = kvm_hv_set_sint; 321 e->set = kvm_hv_set_sint;
@@ -326,12 +323,10 @@ int kvm_set_routing_entry(struct kvm *kvm,
326 e->hv_sint.sint = ue->u.hv_sint.sint; 323 e->hv_sint.sint = ue->u.hv_sint.sint;
327 break; 324 break;
328 default: 325 default:
329 goto out; 326 return -EINVAL;
330 } 327 }
331 328
332 r = 0; 329 return 0;
333out:
334 return r;
335} 330}
336 331
337bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, 332bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ac7810513d0e..558676538fca 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4340,7 +4340,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
4340} 4340}
4341EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); 4341EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
4342 4342
4343void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly) 4343void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
4344 bool accessed_dirty)
4344{ 4345{
4345 struct kvm_mmu *context = &vcpu->arch.mmu; 4346 struct kvm_mmu *context = &vcpu->arch.mmu;
4346 4347
@@ -4349,6 +4350,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly)
4349 context->shadow_root_level = kvm_x86_ops->get_tdp_level(); 4350 context->shadow_root_level = kvm_x86_ops->get_tdp_level();
4350 4351
4351 context->nx = true; 4352 context->nx = true;
4353 context->ept_ad = accessed_dirty;
4352 context->page_fault = ept_page_fault; 4354 context->page_fault = ept_page_fault;
4353 context->gva_to_gpa = ept_gva_to_gpa; 4355 context->gva_to_gpa = ept_gva_to_gpa;
4354 context->sync_page = ept_sync_page; 4356 context->sync_page = ept_sync_page;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index ddc56e91f2e4..d8ccb32f7308 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -74,7 +74,8 @@ enum {
74 74
75int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct); 75int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct);
76void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); 76void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
77void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly); 77void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
78 bool accessed_dirty);
78 79
79static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) 80static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
80{ 81{
diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c
index 37942e419c32..60168cdd0546 100644
--- a/arch/x86/kvm/page_track.c
+++ b/arch/x86/kvm/page_track.c
@@ -160,6 +160,14 @@ bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn,
160 return !!ACCESS_ONCE(slot->arch.gfn_track[mode][index]); 160 return !!ACCESS_ONCE(slot->arch.gfn_track[mode][index]);
161} 161}
162 162
163void kvm_page_track_cleanup(struct kvm *kvm)
164{
165 struct kvm_page_track_notifier_head *head;
166
167 head = &kvm->arch.track_notifier_head;
168 cleanup_srcu_struct(&head->track_srcu);
169}
170
163void kvm_page_track_init(struct kvm *kvm) 171void kvm_page_track_init(struct kvm *kvm)
164{ 172{
165 struct kvm_page_track_notifier_head *head; 173 struct kvm_page_track_notifier_head *head;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index a01105485315..314d2071b337 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -23,13 +23,6 @@
23 * so the code in this file is compiled twice, once per pte size. 23 * so the code in this file is compiled twice, once per pte size.
24 */ 24 */
25 25
26/*
27 * This is used to catch non optimized PT_GUEST_(DIRTY|ACCESS)_SHIFT macro
28 * uses for EPT without A/D paging type.
29 */
30extern u64 __pure __using_nonexistent_pte_bit(void)
31 __compiletime_error("wrong use of PT_GUEST_(DIRTY|ACCESS)_SHIFT");
32
33#if PTTYPE == 64 26#if PTTYPE == 64
34 #define pt_element_t u64 27 #define pt_element_t u64
35 #define guest_walker guest_walker64 28 #define guest_walker guest_walker64
@@ -39,10 +32,9 @@ extern u64 __pure __using_nonexistent_pte_bit(void)
39 #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) 32 #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
40 #define PT_INDEX(addr, level) PT64_INDEX(addr, level) 33 #define PT_INDEX(addr, level) PT64_INDEX(addr, level)
41 #define PT_LEVEL_BITS PT64_LEVEL_BITS 34 #define PT_LEVEL_BITS PT64_LEVEL_BITS
42 #define PT_GUEST_ACCESSED_MASK PT_ACCESSED_MASK
43 #define PT_GUEST_DIRTY_MASK PT_DIRTY_MASK
44 #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT 35 #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
45 #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT 36 #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
37 #define PT_HAVE_ACCESSED_DIRTY(mmu) true
46 #ifdef CONFIG_X86_64 38 #ifdef CONFIG_X86_64
47 #define PT_MAX_FULL_LEVELS 4 39 #define PT_MAX_FULL_LEVELS 4
48 #define CMPXCHG cmpxchg 40 #define CMPXCHG cmpxchg
@@ -60,10 +52,9 @@ extern u64 __pure __using_nonexistent_pte_bit(void)
60 #define PT_INDEX(addr, level) PT32_INDEX(addr, level) 52 #define PT_INDEX(addr, level) PT32_INDEX(addr, level)
61 #define PT_LEVEL_BITS PT32_LEVEL_BITS 53 #define PT_LEVEL_BITS PT32_LEVEL_BITS
62 #define PT_MAX_FULL_LEVELS 2 54 #define PT_MAX_FULL_LEVELS 2
63 #define PT_GUEST_ACCESSED_MASK PT_ACCESSED_MASK
64 #define PT_GUEST_DIRTY_MASK PT_DIRTY_MASK
65 #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT 55 #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
66 #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT 56 #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
57 #define PT_HAVE_ACCESSED_DIRTY(mmu) true
67 #define CMPXCHG cmpxchg 58 #define CMPXCHG cmpxchg
68#elif PTTYPE == PTTYPE_EPT 59#elif PTTYPE == PTTYPE_EPT
69 #define pt_element_t u64 60 #define pt_element_t u64
@@ -74,16 +65,18 @@ extern u64 __pure __using_nonexistent_pte_bit(void)
74 #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) 65 #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
75 #define PT_INDEX(addr, level) PT64_INDEX(addr, level) 66 #define PT_INDEX(addr, level) PT64_INDEX(addr, level)
76 #define PT_LEVEL_BITS PT64_LEVEL_BITS 67 #define PT_LEVEL_BITS PT64_LEVEL_BITS
77 #define PT_GUEST_ACCESSED_MASK 0 68 #define PT_GUEST_DIRTY_SHIFT 9
78 #define PT_GUEST_DIRTY_MASK 0 69 #define PT_GUEST_ACCESSED_SHIFT 8
79 #define PT_GUEST_DIRTY_SHIFT __using_nonexistent_pte_bit() 70 #define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad)
80 #define PT_GUEST_ACCESSED_SHIFT __using_nonexistent_pte_bit()
81 #define CMPXCHG cmpxchg64 71 #define CMPXCHG cmpxchg64
82 #define PT_MAX_FULL_LEVELS 4 72 #define PT_MAX_FULL_LEVELS 4
83#else 73#else
84 #error Invalid PTTYPE value 74 #error Invalid PTTYPE value
85#endif 75#endif
86 76
77#define PT_GUEST_DIRTY_MASK (1 << PT_GUEST_DIRTY_SHIFT)
78#define PT_GUEST_ACCESSED_MASK (1 << PT_GUEST_ACCESSED_SHIFT)
79
87#define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl) 80#define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl)
88#define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PT_PAGE_TABLE_LEVEL) 81#define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PT_PAGE_TABLE_LEVEL)
89 82
@@ -111,12 +104,13 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
111 return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT; 104 return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT;
112} 105}
113 106
114static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte) 107static inline void FNAME(protect_clean_gpte)(struct kvm_mmu *mmu, unsigned *access,
108 unsigned gpte)
115{ 109{
116 unsigned mask; 110 unsigned mask;
117 111
118 /* dirty bit is not supported, so no need to track it */ 112 /* dirty bit is not supported, so no need to track it */
119 if (!PT_GUEST_DIRTY_MASK) 113 if (!PT_HAVE_ACCESSED_DIRTY(mmu))
120 return; 114 return;
121 115
122 BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK); 116 BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK);
@@ -171,7 +165,7 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
171 goto no_present; 165 goto no_present;
172 166
173 /* if accessed bit is not supported prefetch non accessed gpte */ 167 /* if accessed bit is not supported prefetch non accessed gpte */
174 if (PT_GUEST_ACCESSED_MASK && !(gpte & PT_GUEST_ACCESSED_MASK)) 168 if (PT_HAVE_ACCESSED_DIRTY(&vcpu->arch.mmu) && !(gpte & PT_GUEST_ACCESSED_MASK))
175 goto no_present; 169 goto no_present;
176 170
177 return false; 171 return false;
@@ -217,7 +211,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
217 int ret; 211 int ret;
218 212
219 /* dirty/accessed bits are not supported, so no need to update them */ 213 /* dirty/accessed bits are not supported, so no need to update them */
220 if (!PT_GUEST_DIRTY_MASK) 214 if (!PT_HAVE_ACCESSED_DIRTY(mmu))
221 return 0; 215 return 0;
222 216
223 for (level = walker->max_level; level >= walker->level; --level) { 217 for (level = walker->max_level; level >= walker->level; --level) {
@@ -286,7 +280,9 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
286 pt_element_t __user *uninitialized_var(ptep_user); 280 pt_element_t __user *uninitialized_var(ptep_user);
287 gfn_t table_gfn; 281 gfn_t table_gfn;
288 unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey; 282 unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey;
283 unsigned nested_access;
289 gpa_t pte_gpa; 284 gpa_t pte_gpa;
285 bool have_ad;
290 int offset; 286 int offset;
291 const int write_fault = access & PFERR_WRITE_MASK; 287 const int write_fault = access & PFERR_WRITE_MASK;
292 const int user_fault = access & PFERR_USER_MASK; 288 const int user_fault = access & PFERR_USER_MASK;
@@ -299,6 +295,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
299retry_walk: 295retry_walk:
300 walker->level = mmu->root_level; 296 walker->level = mmu->root_level;
301 pte = mmu->get_cr3(vcpu); 297 pte = mmu->get_cr3(vcpu);
298 have_ad = PT_HAVE_ACCESSED_DIRTY(mmu);
302 299
303#if PTTYPE == 64 300#if PTTYPE == 64
304 if (walker->level == PT32E_ROOT_LEVEL) { 301 if (walker->level == PT32E_ROOT_LEVEL) {
@@ -312,7 +309,15 @@ retry_walk:
312 walker->max_level = walker->level; 309 walker->max_level = walker->level;
313 ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu))); 310 ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
314 311
315 accessed_dirty = PT_GUEST_ACCESSED_MASK; 312 accessed_dirty = have_ad ? PT_GUEST_ACCESSED_MASK : 0;
313
314 /*
315 * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging
316 * by the MOV to CR instruction are treated as reads and do not cause the
317 * processor to set the dirty flag in any EPT paging-structure entry.
318 */
319 nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK;
320
316 pt_access = pte_access = ACC_ALL; 321 pt_access = pte_access = ACC_ALL;
317 ++walker->level; 322 ++walker->level;
318 323
@@ -332,7 +337,7 @@ retry_walk:
332 walker->pte_gpa[walker->level - 1] = pte_gpa; 337 walker->pte_gpa[walker->level - 1] = pte_gpa;
333 338
334 real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), 339 real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn),
335 PFERR_USER_MASK|PFERR_WRITE_MASK, 340 nested_access,
336 &walker->fault); 341 &walker->fault);
337 342
338 /* 343 /*
@@ -394,7 +399,7 @@ retry_walk:
394 walker->gfn = real_gpa >> PAGE_SHIFT; 399 walker->gfn = real_gpa >> PAGE_SHIFT;
395 400
396 if (!write_fault) 401 if (!write_fault)
397 FNAME(protect_clean_gpte)(&pte_access, pte); 402 FNAME(protect_clean_gpte)(mmu, &pte_access, pte);
398 else 403 else
399 /* 404 /*
400 * On a write fault, fold the dirty bit into accessed_dirty. 405 * On a write fault, fold the dirty bit into accessed_dirty.
@@ -485,7 +490,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
485 490
486 gfn = gpte_to_gfn(gpte); 491 gfn = gpte_to_gfn(gpte);
487 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); 492 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
488 FNAME(protect_clean_gpte)(&pte_access, gpte); 493 FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
489 pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, 494 pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
490 no_dirty_log && (pte_access & ACC_WRITE_MASK)); 495 no_dirty_log && (pte_access & ACC_WRITE_MASK));
491 if (is_error_pfn(pfn)) 496 if (is_error_pfn(pfn))
@@ -979,7 +984,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
979 gfn = gpte_to_gfn(gpte); 984 gfn = gpte_to_gfn(gpte);
980 pte_access = sp->role.access; 985 pte_access = sp->role.access;
981 pte_access &= FNAME(gpte_access)(vcpu, gpte); 986 pte_access &= FNAME(gpte_access)(vcpu, gpte);
982 FNAME(protect_clean_gpte)(&pte_access, gpte); 987 FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
983 988
984 if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access, 989 if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access,
985 &nr_present)) 990 &nr_present))
@@ -1025,3 +1030,4 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1025#undef PT_GUEST_DIRTY_MASK 1030#undef PT_GUEST_DIRTY_MASK
1026#undef PT_GUEST_DIRTY_SHIFT 1031#undef PT_GUEST_DIRTY_SHIFT
1027#undef PT_GUEST_ACCESSED_SHIFT 1032#undef PT_GUEST_ACCESSED_SHIFT
1033#undef PT_HAVE_ACCESSED_DIRTY
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d1efe2c62b3f..1b203abf76e1 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1379,6 +1379,9 @@ static void avic_vm_destroy(struct kvm *kvm)
1379 unsigned long flags; 1379 unsigned long flags;
1380 struct kvm_arch *vm_data = &kvm->arch; 1380 struct kvm_arch *vm_data = &kvm->arch;
1381 1381
1382 if (!avic)
1383 return;
1384
1382 avic_free_vm_id(vm_data->avic_vm_id); 1385 avic_free_vm_id(vm_data->avic_vm_id);
1383 1386
1384 if (vm_data->avic_logical_id_table_page) 1387 if (vm_data->avic_logical_id_table_page)
@@ -5253,6 +5256,12 @@ static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
5253 avic_handle_ldr_update(vcpu); 5256 avic_handle_ldr_update(vcpu);
5254} 5257}
5255 5258
5259static void svm_setup_mce(struct kvm_vcpu *vcpu)
5260{
5261 /* [63:9] are reserved. */
5262 vcpu->arch.mcg_cap &= 0x1ff;
5263}
5264
5256static struct kvm_x86_ops svm_x86_ops __ro_after_init = { 5265static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
5257 .cpu_has_kvm_support = has_svm, 5266 .cpu_has_kvm_support = has_svm,
5258 .disabled_by_bios = is_disabled, 5267 .disabled_by_bios = is_disabled,
@@ -5364,6 +5373,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
5364 .pmu_ops = &amd_pmu_ops, 5373 .pmu_ops = &amd_pmu_ops,
5365 .deliver_posted_interrupt = svm_deliver_avic_intr, 5374 .deliver_posted_interrupt = svm_deliver_avic_intr,
5366 .update_pi_irte = svm_update_pi_irte, 5375 .update_pi_irte = svm_update_pi_irte,
5376 .setup_mce = svm_setup_mce,
5367}; 5377};
5368 5378
5369static int __init svm_init(void) 5379static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 98e82ee1e699..c1a12b94e1fd 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -84,9 +84,6 @@ module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
84static bool __read_mostly emulate_invalid_guest_state = true; 84static bool __read_mostly emulate_invalid_guest_state = true;
85module_param(emulate_invalid_guest_state, bool, S_IRUGO); 85module_param(emulate_invalid_guest_state, bool, S_IRUGO);
86 86
87static bool __read_mostly vmm_exclusive = 1;
88module_param(vmm_exclusive, bool, S_IRUGO);
89
90static bool __read_mostly fasteoi = 1; 87static bool __read_mostly fasteoi = 1;
91module_param(fasteoi, bool, S_IRUGO); 88module_param(fasteoi, bool, S_IRUGO);
92 89
@@ -615,10 +612,6 @@ struct vcpu_vmx {
615 int vpid; 612 int vpid;
616 bool emulation_required; 613 bool emulation_required;
617 614
618 /* Support for vnmi-less CPUs */
619 int soft_vnmi_blocked;
620 ktime_t entry_time;
621 s64 vnmi_blocked_time;
622 u32 exit_reason; 615 u32 exit_reason;
623 616
624 /* Posted interrupt descriptor */ 617 /* Posted interrupt descriptor */
@@ -914,8 +907,6 @@ static void nested_release_page_clean(struct page *page)
914 907
915static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); 908static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
916static u64 construct_eptp(unsigned long root_hpa); 909static u64 construct_eptp(unsigned long root_hpa);
917static void kvm_cpu_vmxon(u64 addr);
918static void kvm_cpu_vmxoff(void);
919static bool vmx_xsaves_supported(void); 910static bool vmx_xsaves_supported(void);
920static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); 911static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
921static void vmx_set_segment(struct kvm_vcpu *vcpu, 912static void vmx_set_segment(struct kvm_vcpu *vcpu,
@@ -1239,6 +1230,11 @@ static inline bool cpu_has_vmx_invvpid_global(void)
1239 return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT; 1230 return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
1240} 1231}
1241 1232
1233static inline bool cpu_has_vmx_invvpid(void)
1234{
1235 return vmx_capability.vpid & VMX_VPID_INVVPID_BIT;
1236}
1237
1242static inline bool cpu_has_vmx_ept(void) 1238static inline bool cpu_has_vmx_ept(void)
1243{ 1239{
1244 return vmcs_config.cpu_based_2nd_exec_ctrl & 1240 return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -1285,11 +1281,6 @@ static inline bool cpu_has_vmx_invpcid(void)
1285 SECONDARY_EXEC_ENABLE_INVPCID; 1281 SECONDARY_EXEC_ENABLE_INVPCID;
1286} 1282}
1287 1283
1288static inline bool cpu_has_virtual_nmis(void)
1289{
1290 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
1291}
1292
1293static inline bool cpu_has_vmx_wbinvd_exit(void) 1284static inline bool cpu_has_vmx_wbinvd_exit(void)
1294{ 1285{
1295 return vmcs_config.cpu_based_2nd_exec_ctrl & 1286 return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -2235,15 +2226,10 @@ static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
2235static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 2226static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2236{ 2227{
2237 struct vcpu_vmx *vmx = to_vmx(vcpu); 2228 struct vcpu_vmx *vmx = to_vmx(vcpu);
2238 u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
2239 bool already_loaded = vmx->loaded_vmcs->cpu == cpu; 2229 bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
2240 2230
2241 if (!vmm_exclusive)
2242 kvm_cpu_vmxon(phys_addr);
2243 else if (!already_loaded)
2244 loaded_vmcs_clear(vmx->loaded_vmcs);
2245
2246 if (!already_loaded) { 2231 if (!already_loaded) {
2232 loaded_vmcs_clear(vmx->loaded_vmcs);
2247 local_irq_disable(); 2233 local_irq_disable();
2248 crash_disable_local_vmclear(cpu); 2234 crash_disable_local_vmclear(cpu);
2249 2235
@@ -2321,11 +2307,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
2321 vmx_vcpu_pi_put(vcpu); 2307 vmx_vcpu_pi_put(vcpu);
2322 2308
2323 __vmx_load_host_state(to_vmx(vcpu)); 2309 __vmx_load_host_state(to_vmx(vcpu));
2324 if (!vmm_exclusive) {
2325 __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs);
2326 vcpu->cpu = -1;
2327 kvm_cpu_vmxoff();
2328 }
2329} 2310}
2330 2311
2331static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); 2312static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
@@ -2749,11 +2730,11 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
2749 vmx->nested.nested_vmx_secondary_ctls_high); 2730 vmx->nested.nested_vmx_secondary_ctls_high);
2750 vmx->nested.nested_vmx_secondary_ctls_low = 0; 2731 vmx->nested.nested_vmx_secondary_ctls_low = 0;
2751 vmx->nested.nested_vmx_secondary_ctls_high &= 2732 vmx->nested.nested_vmx_secondary_ctls_high &=
2733 SECONDARY_EXEC_RDRAND | SECONDARY_EXEC_RDSEED |
2752 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 2734 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
2753 SECONDARY_EXEC_RDTSCP | 2735 SECONDARY_EXEC_RDTSCP |
2754 SECONDARY_EXEC_DESC | 2736 SECONDARY_EXEC_DESC |
2755 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | 2737 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
2756 SECONDARY_EXEC_ENABLE_VPID |
2757 SECONDARY_EXEC_APIC_REGISTER_VIRT | 2738 SECONDARY_EXEC_APIC_REGISTER_VIRT |
2758 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | 2739 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
2759 SECONDARY_EXEC_WBINVD_EXITING | 2740 SECONDARY_EXEC_WBINVD_EXITING |
@@ -2764,14 +2745,16 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
2764 vmx->nested.nested_vmx_secondary_ctls_high |= 2745 vmx->nested.nested_vmx_secondary_ctls_high |=
2765 SECONDARY_EXEC_ENABLE_EPT; 2746 SECONDARY_EXEC_ENABLE_EPT;
2766 vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | 2747 vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
2767 VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT | 2748 VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT;
2768 VMX_EPT_INVEPT_BIT;
2769 if (cpu_has_vmx_ept_execute_only()) 2749 if (cpu_has_vmx_ept_execute_only())
2770 vmx->nested.nested_vmx_ept_caps |= 2750 vmx->nested.nested_vmx_ept_caps |=
2771 VMX_EPT_EXECUTE_ONLY_BIT; 2751 VMX_EPT_EXECUTE_ONLY_BIT;
2772 vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept; 2752 vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept;
2773 vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT | 2753 vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
2774 VMX_EPT_EXTENT_CONTEXT_BIT; 2754 VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
2755 VMX_EPT_1GB_PAGE_BIT;
2756 if (enable_ept_ad_bits)
2757 vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT;
2775 } else 2758 } else
2776 vmx->nested.nested_vmx_ept_caps = 0; 2759 vmx->nested.nested_vmx_ept_caps = 0;
2777 2760
@@ -2781,10 +2764,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
2781 * though it is treated as global context. The alternative is 2764 * though it is treated as global context. The alternative is
2782 * not failing the single-context invvpid, and it is worse. 2765 * not failing the single-context invvpid, and it is worse.
2783 */ 2766 */
2784 if (enable_vpid) 2767 if (enable_vpid) {
2768 vmx->nested.nested_vmx_secondary_ctls_high |=
2769 SECONDARY_EXEC_ENABLE_VPID;
2785 vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT | 2770 vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT |
2786 VMX_VPID_EXTENT_SUPPORTED_MASK; 2771 VMX_VPID_EXTENT_SUPPORTED_MASK;
2787 else 2772 } else
2788 vmx->nested.nested_vmx_vpid_caps = 0; 2773 vmx->nested.nested_vmx_vpid_caps = 0;
2789 2774
2790 if (enable_unrestricted_guest) 2775 if (enable_unrestricted_guest)
@@ -3416,6 +3401,7 @@ static __init int vmx_disabled_by_bios(void)
3416 3401
3417static void kvm_cpu_vmxon(u64 addr) 3402static void kvm_cpu_vmxon(u64 addr)
3418{ 3403{
3404 cr4_set_bits(X86_CR4_VMXE);
3419 intel_pt_handle_vmx(1); 3405 intel_pt_handle_vmx(1);
3420 3406
3421 asm volatile (ASM_VMX_VMXON_RAX 3407 asm volatile (ASM_VMX_VMXON_RAX
@@ -3458,12 +3444,8 @@ static int hardware_enable(void)
3458 /* enable and lock */ 3444 /* enable and lock */
3459 wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); 3445 wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
3460 } 3446 }
3461 cr4_set_bits(X86_CR4_VMXE); 3447 kvm_cpu_vmxon(phys_addr);
3462 3448 ept_sync_global();
3463 if (vmm_exclusive) {
3464 kvm_cpu_vmxon(phys_addr);
3465 ept_sync_global();
3466 }
3467 3449
3468 native_store_gdt(this_cpu_ptr(&host_gdt)); 3450 native_store_gdt(this_cpu_ptr(&host_gdt));
3469 3451
@@ -3489,15 +3471,13 @@ static void kvm_cpu_vmxoff(void)
3489 asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); 3471 asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
3490 3472
3491 intel_pt_handle_vmx(0); 3473 intel_pt_handle_vmx(0);
3474 cr4_clear_bits(X86_CR4_VMXE);
3492} 3475}
3493 3476
3494static void hardware_disable(void) 3477static void hardware_disable(void)
3495{ 3478{
3496 if (vmm_exclusive) { 3479 vmclear_local_loaded_vmcss();
3497 vmclear_local_loaded_vmcss(); 3480 kvm_cpu_vmxoff();
3498 kvm_cpu_vmxoff();
3499 }
3500 cr4_clear_bits(X86_CR4_VMXE);
3501} 3481}
3502 3482
3503static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, 3483static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
@@ -3617,9 +3597,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
3617 &_vmexit_control) < 0) 3597 &_vmexit_control) < 0)
3618 return -EIO; 3598 return -EIO;
3619 3599
3620 min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; 3600 min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
3621 opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR | 3601 PIN_BASED_VIRTUAL_NMIS;
3622 PIN_BASED_VMX_PREEMPTION_TIMER; 3602 opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER;
3623 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, 3603 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
3624 &_pin_based_exec_control) < 0) 3604 &_pin_based_exec_control) < 0)
3625 return -EIO; 3605 return -EIO;
@@ -4011,11 +3991,12 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
4011 3991
4012static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid) 3992static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid)
4013{ 3993{
4014 vpid_sync_context(vpid);
4015 if (enable_ept) { 3994 if (enable_ept) {
4016 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) 3995 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
4017 return; 3996 return;
4018 ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); 3997 ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
3998 } else {
3999 vpid_sync_context(vpid);
4019 } 4000 }
4020} 4001}
4021 4002
@@ -4024,6 +4005,12 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
4024 __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid); 4005 __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid);
4025} 4006}
4026 4007
4008static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu)
4009{
4010 if (enable_ept)
4011 vmx_flush_tlb(vcpu);
4012}
4013
4027static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) 4014static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
4028{ 4015{
4029 ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; 4016 ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
@@ -5285,8 +5272,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
5285 5272
5286 vmx->rmode.vm86_active = 0; 5273 vmx->rmode.vm86_active = 0;
5287 5274
5288 vmx->soft_vnmi_blocked = 0;
5289
5290 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); 5275 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
5291 kvm_set_cr8(vcpu, 0); 5276 kvm_set_cr8(vcpu, 0);
5292 5277
@@ -5406,8 +5391,7 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
5406 5391
5407static void enable_nmi_window(struct kvm_vcpu *vcpu) 5392static void enable_nmi_window(struct kvm_vcpu *vcpu)
5408{ 5393{
5409 if (!cpu_has_virtual_nmis() || 5394 if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
5410 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
5411 enable_irq_window(vcpu); 5395 enable_irq_window(vcpu);
5412 return; 5396 return;
5413 } 5397 }
@@ -5448,19 +5432,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
5448 struct vcpu_vmx *vmx = to_vmx(vcpu); 5432 struct vcpu_vmx *vmx = to_vmx(vcpu);
5449 5433
5450 if (!is_guest_mode(vcpu)) { 5434 if (!is_guest_mode(vcpu)) {
5451 if (!cpu_has_virtual_nmis()) {
5452 /*
5453 * Tracking the NMI-blocked state in software is built upon
5454 * finding the next open IRQ window. This, in turn, depends on
5455 * well-behaving guests: They have to keep IRQs disabled at
5456 * least as long as the NMI handler runs. Otherwise we may
5457 * cause NMI nesting, maybe breaking the guest. But as this is
5458 * highly unlikely, we can live with the residual risk.
5459 */
5460 vmx->soft_vnmi_blocked = 1;
5461 vmx->vnmi_blocked_time = 0;
5462 }
5463
5464 ++vcpu->stat.nmi_injections; 5435 ++vcpu->stat.nmi_injections;
5465 vmx->nmi_known_unmasked = false; 5436 vmx->nmi_known_unmasked = false;
5466 } 5437 }
@@ -5477,8 +5448,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
5477 5448
5478static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) 5449static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
5479{ 5450{
5480 if (!cpu_has_virtual_nmis())
5481 return to_vmx(vcpu)->soft_vnmi_blocked;
5482 if (to_vmx(vcpu)->nmi_known_unmasked) 5451 if (to_vmx(vcpu)->nmi_known_unmasked)
5483 return false; 5452 return false;
5484 return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; 5453 return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
@@ -5488,20 +5457,13 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
5488{ 5457{
5489 struct vcpu_vmx *vmx = to_vmx(vcpu); 5458 struct vcpu_vmx *vmx = to_vmx(vcpu);
5490 5459
5491 if (!cpu_has_virtual_nmis()) { 5460 vmx->nmi_known_unmasked = !masked;
5492 if (vmx->soft_vnmi_blocked != masked) { 5461 if (masked)
5493 vmx->soft_vnmi_blocked = masked; 5462 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
5494 vmx->vnmi_blocked_time = 0; 5463 GUEST_INTR_STATE_NMI);
5495 } 5464 else
5496 } else { 5465 vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
5497 vmx->nmi_known_unmasked = !masked; 5466 GUEST_INTR_STATE_NMI);
5498 if (masked)
5499 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
5500 GUEST_INTR_STATE_NMI);
5501 else
5502 vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
5503 GUEST_INTR_STATE_NMI);
5504 }
5505} 5467}
5506 5468
5507static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) 5469static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
@@ -5509,9 +5471,6 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
5509 if (to_vmx(vcpu)->nested.nested_run_pending) 5471 if (to_vmx(vcpu)->nested.nested_run_pending)
5510 return 0; 5472 return 0;
5511 5473
5512 if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
5513 return 0;
5514
5515 return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 5474 return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
5516 (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI 5475 (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
5517 | GUEST_INTR_STATE_NMI)); 5476 | GUEST_INTR_STATE_NMI));
@@ -6232,21 +6191,18 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
6232 unsigned long exit_qualification; 6191 unsigned long exit_qualification;
6233 gpa_t gpa; 6192 gpa_t gpa;
6234 u32 error_code; 6193 u32 error_code;
6235 int gla_validity;
6236 6194
6237 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 6195 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
6238 6196
6239 gla_validity = (exit_qualification >> 7) & 0x3; 6197 if (is_guest_mode(vcpu)
6240 if (gla_validity == 0x2) { 6198 && !(exit_qualification & EPT_VIOLATION_GVA_TRANSLATED)) {
6241 printk(KERN_ERR "EPT: Handling EPT violation failed!\n"); 6199 /*
6242 printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", 6200 * Fix up exit_qualification according to whether guest
6243 (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), 6201 * page table accesses are reads or writes.
6244 vmcs_readl(GUEST_LINEAR_ADDRESS)); 6202 */
6245 printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", 6203 u64 eptp = nested_ept_get_cr3(vcpu);
6246 (long unsigned int)exit_qualification); 6204 if (!(eptp & VMX_EPT_AD_ENABLE_BIT))
6247 vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; 6205 exit_qualification &= ~EPT_VIOLATION_ACC_WRITE;
6248 vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION;
6249 return 0;
6250 } 6206 }
6251 6207
6252 /* 6208 /*
@@ -6256,7 +6212,6 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
6256 * AAK134, BY25. 6212 * AAK134, BY25.
6257 */ 6213 */
6258 if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && 6214 if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
6259 cpu_has_virtual_nmis() &&
6260 (exit_qualification & INTR_INFO_UNBLOCK_NMI)) 6215 (exit_qualification & INTR_INFO_UNBLOCK_NMI))
6261 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); 6216 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
6262 6217
@@ -6517,8 +6472,10 @@ static __init int hardware_setup(void)
6517 if (boot_cpu_has(X86_FEATURE_NX)) 6472 if (boot_cpu_has(X86_FEATURE_NX))
6518 kvm_enable_efer_bits(EFER_NX); 6473 kvm_enable_efer_bits(EFER_NX);
6519 6474
6520 if (!cpu_has_vmx_vpid()) 6475 if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
6476 !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
6521 enable_vpid = 0; 6477 enable_vpid = 0;
6478
6522 if (!cpu_has_vmx_shadow_vmcs()) 6479 if (!cpu_has_vmx_shadow_vmcs())
6523 enable_shadow_vmcs = 0; 6480 enable_shadow_vmcs = 0;
6524 if (enable_shadow_vmcs) 6481 if (enable_shadow_vmcs)
@@ -7805,7 +7762,6 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
7805 * "blocked by NMI" bit has to be set before next VM entry. 7762 * "blocked by NMI" bit has to be set before next VM entry.
7806 */ 7763 */
7807 if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && 7764 if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
7808 cpu_has_virtual_nmis() &&
7809 (exit_qualification & INTR_INFO_UNBLOCK_NMI)) 7765 (exit_qualification & INTR_INFO_UNBLOCK_NMI))
7810 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 7766 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
7811 GUEST_INTR_STATE_NMI); 7767 GUEST_INTR_STATE_NMI);
@@ -8107,6 +8063,10 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
8107 return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); 8063 return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
8108 case EXIT_REASON_RDPMC: 8064 case EXIT_REASON_RDPMC:
8109 return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING); 8065 return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
8066 case EXIT_REASON_RDRAND:
8067 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND);
8068 case EXIT_REASON_RDSEED:
8069 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED);
8110 case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP: 8070 case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
8111 return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING); 8071 return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
8112 case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR: 8072 case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
@@ -8477,31 +8437,12 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
8477 return 0; 8437 return 0;
8478 } 8438 }
8479 8439
8480 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked &&
8481 !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(
8482 get_vmcs12(vcpu))))) {
8483 if (vmx_interrupt_allowed(vcpu)) {
8484 vmx->soft_vnmi_blocked = 0;
8485 } else if (vmx->vnmi_blocked_time > 1000000000LL &&
8486 vcpu->arch.nmi_pending) {
8487 /*
8488 * This CPU don't support us in finding the end of an
8489 * NMI-blocked window if the guest runs with IRQs
8490 * disabled. So we pull the trigger after 1 s of
8491 * futile waiting, but inform the user about this.
8492 */
8493 printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
8494 "state on VCPU %d after 1 s timeout\n",
8495 __func__, vcpu->vcpu_id);
8496 vmx->soft_vnmi_blocked = 0;
8497 }
8498 }
8499
8500 if (exit_reason < kvm_vmx_max_exit_handlers 8440 if (exit_reason < kvm_vmx_max_exit_handlers
8501 && kvm_vmx_exit_handlers[exit_reason]) 8441 && kvm_vmx_exit_handlers[exit_reason])
8502 return kvm_vmx_exit_handlers[exit_reason](vcpu); 8442 return kvm_vmx_exit_handlers[exit_reason](vcpu);
8503 else { 8443 else {
8504 WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason); 8444 vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
8445 exit_reason);
8505 kvm_queue_exception(vcpu, UD_VECTOR); 8446 kvm_queue_exception(vcpu, UD_VECTOR);
8506 return 1; 8447 return 1;
8507 } 8448 }
@@ -8547,6 +8488,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
8547 } else { 8488 } else {
8548 sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; 8489 sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
8549 sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; 8490 sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
8491 vmx_flush_tlb_ept_only(vcpu);
8550 } 8492 }
8551 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); 8493 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
8552 8494
@@ -8572,8 +8514,10 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
8572 */ 8514 */
8573 if (!is_guest_mode(vcpu) || 8515 if (!is_guest_mode(vcpu) ||
8574 !nested_cpu_has2(get_vmcs12(&vmx->vcpu), 8516 !nested_cpu_has2(get_vmcs12(&vmx->vcpu),
8575 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) 8517 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
8576 vmcs_write64(APIC_ACCESS_ADDR, hpa); 8518 vmcs_write64(APIC_ACCESS_ADDR, hpa);
8519 vmx_flush_tlb_ept_only(vcpu);
8520 }
8577} 8521}
8578 8522
8579static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) 8523static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
@@ -8768,37 +8712,33 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
8768 8712
8769 idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; 8713 idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
8770 8714
8771 if (cpu_has_virtual_nmis()) { 8715 if (vmx->nmi_known_unmasked)
8772 if (vmx->nmi_known_unmasked) 8716 return;
8773 return; 8717 /*
8774 /* 8718 * Can't use vmx->exit_intr_info since we're not sure what
8775 * Can't use vmx->exit_intr_info since we're not sure what 8719 * the exit reason is.
8776 * the exit reason is. 8720 */
8777 */ 8721 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
8778 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 8722 unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
8779 unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; 8723 vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
8780 vector = exit_intr_info & INTR_INFO_VECTOR_MASK; 8724 /*
8781 /* 8725 * SDM 3: 27.7.1.2 (September 2008)
8782 * SDM 3: 27.7.1.2 (September 2008) 8726 * Re-set bit "block by NMI" before VM entry if vmexit caused by
8783 * Re-set bit "block by NMI" before VM entry if vmexit caused by 8727 * a guest IRET fault.
8784 * a guest IRET fault. 8728 * SDM 3: 23.2.2 (September 2008)
8785 * SDM 3: 23.2.2 (September 2008) 8729 * Bit 12 is undefined in any of the following cases:
8786 * Bit 12 is undefined in any of the following cases: 8730 * If the VM exit sets the valid bit in the IDT-vectoring
8787 * If the VM exit sets the valid bit in the IDT-vectoring 8731 * information field.
8788 * information field. 8732 * If the VM exit is due to a double fault.
8789 * If the VM exit is due to a double fault. 8733 */
8790 */ 8734 if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
8791 if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && 8735 vector != DF_VECTOR && !idtv_info_valid)
8792 vector != DF_VECTOR && !idtv_info_valid) 8736 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
8793 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 8737 GUEST_INTR_STATE_NMI);
8794 GUEST_INTR_STATE_NMI); 8738 else
8795 else 8739 vmx->nmi_known_unmasked =
8796 vmx->nmi_known_unmasked = 8740 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
8797 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) 8741 & GUEST_INTR_STATE_NMI);
8798 & GUEST_INTR_STATE_NMI);
8799 } else if (unlikely(vmx->soft_vnmi_blocked))
8800 vmx->vnmi_blocked_time +=
8801 ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
8802} 8742}
8803 8743
8804static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, 8744static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
@@ -8915,10 +8855,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
8915 struct vcpu_vmx *vmx = to_vmx(vcpu); 8855 struct vcpu_vmx *vmx = to_vmx(vcpu);
8916 unsigned long debugctlmsr, cr4; 8856 unsigned long debugctlmsr, cr4;
8917 8857
8918 /* Record the guest's net vcpu time for enforced NMI injections. */
8919 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
8920 vmx->entry_time = ktime_get();
8921
8922 /* Don't enter VMX if guest state is invalid, let the exit handler 8858 /* Don't enter VMX if guest state is invalid, let the exit handler
8923 start emulation until we arrive back to a valid state */ 8859 start emulation until we arrive back to a valid state */
8924 if (vmx->emulation_required) 8860 if (vmx->emulation_required)
@@ -9126,16 +9062,16 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
9126 vmx_complete_interrupts(vmx); 9062 vmx_complete_interrupts(vmx);
9127} 9063}
9128 9064
9129static void vmx_load_vmcs01(struct kvm_vcpu *vcpu) 9065static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
9130{ 9066{
9131 struct vcpu_vmx *vmx = to_vmx(vcpu); 9067 struct vcpu_vmx *vmx = to_vmx(vcpu);
9132 int cpu; 9068 int cpu;
9133 9069
9134 if (vmx->loaded_vmcs == &vmx->vmcs01) 9070 if (vmx->loaded_vmcs == vmcs)
9135 return; 9071 return;
9136 9072
9137 cpu = get_cpu(); 9073 cpu = get_cpu();
9138 vmx->loaded_vmcs = &vmx->vmcs01; 9074 vmx->loaded_vmcs = vmcs;
9139 vmx_vcpu_put(vcpu); 9075 vmx_vcpu_put(vcpu);
9140 vmx_vcpu_load(vcpu, cpu); 9076 vmx_vcpu_load(vcpu, cpu);
9141 vcpu->cpu = cpu; 9077 vcpu->cpu = cpu;
@@ -9153,7 +9089,7 @@ static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
9153 9089
9154 r = vcpu_load(vcpu); 9090 r = vcpu_load(vcpu);
9155 BUG_ON(r); 9091 BUG_ON(r);
9156 vmx_load_vmcs01(vcpu); 9092 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
9157 free_nested(vmx); 9093 free_nested(vmx);
9158 vcpu_put(vcpu); 9094 vcpu_put(vcpu);
9159} 9095}
@@ -9214,11 +9150,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
9214 vmx->loaded_vmcs->shadow_vmcs = NULL; 9150 vmx->loaded_vmcs->shadow_vmcs = NULL;
9215 if (!vmx->loaded_vmcs->vmcs) 9151 if (!vmx->loaded_vmcs->vmcs)
9216 goto free_msrs; 9152 goto free_msrs;
9217 if (!vmm_exclusive)
9218 kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id())));
9219 loaded_vmcs_init(vmx->loaded_vmcs); 9153 loaded_vmcs_init(vmx->loaded_vmcs);
9220 if (!vmm_exclusive)
9221 kvm_cpu_vmxoff();
9222 9154
9223 cpu = get_cpu(); 9155 cpu = get_cpu();
9224 vmx_vcpu_load(&vmx->vcpu, cpu); 9156 vmx_vcpu_load(&vmx->vcpu, cpu);
@@ -9478,17 +9410,26 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
9478 return get_vmcs12(vcpu)->ept_pointer; 9410 return get_vmcs12(vcpu)->ept_pointer;
9479} 9411}
9480 9412
9481static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) 9413static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
9482{ 9414{
9415 u64 eptp;
9416
9483 WARN_ON(mmu_is_nested(vcpu)); 9417 WARN_ON(mmu_is_nested(vcpu));
9418 eptp = nested_ept_get_cr3(vcpu);
9419 if ((eptp & VMX_EPT_AD_ENABLE_BIT) && !enable_ept_ad_bits)
9420 return 1;
9421
9422 kvm_mmu_unload(vcpu);
9484 kvm_init_shadow_ept_mmu(vcpu, 9423 kvm_init_shadow_ept_mmu(vcpu,
9485 to_vmx(vcpu)->nested.nested_vmx_ept_caps & 9424 to_vmx(vcpu)->nested.nested_vmx_ept_caps &
9486 VMX_EPT_EXECUTE_ONLY_BIT); 9425 VMX_EPT_EXECUTE_ONLY_BIT,
9426 eptp & VMX_EPT_AD_ENABLE_BIT);
9487 vcpu->arch.mmu.set_cr3 = vmx_set_cr3; 9427 vcpu->arch.mmu.set_cr3 = vmx_set_cr3;
9488 vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3; 9428 vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3;
9489 vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; 9429 vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
9490 9430
9491 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; 9431 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
9432 return 0;
9492} 9433}
9493 9434
9494static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu) 9435static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
@@ -9974,7 +9915,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
9974{ 9915{
9975 struct vcpu_vmx *vmx = to_vmx(vcpu); 9916 struct vcpu_vmx *vmx = to_vmx(vcpu);
9976 u32 exec_control; 9917 u32 exec_control;
9977 bool nested_ept_enabled = false;
9978 9918
9979 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); 9919 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
9980 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); 9920 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -10121,8 +10061,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
10121 vmcs12->guest_intr_status); 10061 vmcs12->guest_intr_status);
10122 } 10062 }
10123 10063
10124 nested_ept_enabled = (exec_control & SECONDARY_EXEC_ENABLE_EPT) != 0;
10125
10126 /* 10064 /*
10127 * Write an illegal value to APIC_ACCESS_ADDR. Later, 10065 * Write an illegal value to APIC_ACCESS_ADDR. Later,
10128 * nested_get_vmcs12_pages will either fix it up or 10066 * nested_get_vmcs12_pages will either fix it up or
@@ -10253,8 +10191,13 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
10253 } 10191 }
10254 10192
10255 if (nested_cpu_has_ept(vmcs12)) { 10193 if (nested_cpu_has_ept(vmcs12)) {
10256 kvm_mmu_unload(vcpu); 10194 if (nested_ept_init_mmu_context(vcpu)) {
10257 nested_ept_init_mmu_context(vcpu); 10195 *entry_failure_code = ENTRY_FAIL_DEFAULT;
10196 return 1;
10197 }
10198 } else if (nested_cpu_has2(vmcs12,
10199 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
10200 vmx_flush_tlb_ept_only(vcpu);
10258 } 10201 }
10259 10202
10260 /* 10203 /*
@@ -10282,12 +10225,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
10282 vmx_set_efer(vcpu, vcpu->arch.efer); 10225 vmx_set_efer(vcpu, vcpu->arch.efer);
10283 10226
10284 /* Shadow page tables on either EPT or shadow page tables. */ 10227 /* Shadow page tables on either EPT or shadow page tables. */
10285 if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_ept_enabled, 10228 if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
10286 entry_failure_code)) 10229 entry_failure_code))
10287 return 1; 10230 return 1;
10288 10231
10289 kvm_mmu_reset_context(vcpu);
10290
10291 if (!enable_ept) 10232 if (!enable_ept)
10292 vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; 10233 vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
10293 10234
@@ -10407,7 +10348,6 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
10407 struct vcpu_vmx *vmx = to_vmx(vcpu); 10348 struct vcpu_vmx *vmx = to_vmx(vcpu);
10408 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 10349 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
10409 struct loaded_vmcs *vmcs02; 10350 struct loaded_vmcs *vmcs02;
10410 int cpu;
10411 u32 msr_entry_idx; 10351 u32 msr_entry_idx;
10412 u32 exit_qual; 10352 u32 exit_qual;
10413 10353
@@ -10420,18 +10360,12 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
10420 if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) 10360 if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
10421 vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); 10361 vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
10422 10362
10423 cpu = get_cpu(); 10363 vmx_switch_vmcs(vcpu, vmcs02);
10424 vmx->loaded_vmcs = vmcs02;
10425 vmx_vcpu_put(vcpu);
10426 vmx_vcpu_load(vcpu, cpu);
10427 vcpu->cpu = cpu;
10428 put_cpu();
10429
10430 vmx_segment_cache_clear(vmx); 10364 vmx_segment_cache_clear(vmx);
10431 10365
10432 if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) { 10366 if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) {
10433 leave_guest_mode(vcpu); 10367 leave_guest_mode(vcpu);
10434 vmx_load_vmcs01(vcpu); 10368 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
10435 nested_vmx_entry_failure(vcpu, vmcs12, 10369 nested_vmx_entry_failure(vcpu, vmcs12,
10436 EXIT_REASON_INVALID_STATE, exit_qual); 10370 EXIT_REASON_INVALID_STATE, exit_qual);
10437 return 1; 10371 return 1;
@@ -10444,7 +10378,7 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
10444 vmcs12->vm_entry_msr_load_count); 10378 vmcs12->vm_entry_msr_load_count);
10445 if (msr_entry_idx) { 10379 if (msr_entry_idx) {
10446 leave_guest_mode(vcpu); 10380 leave_guest_mode(vcpu);
10447 vmx_load_vmcs01(vcpu); 10381 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
10448 nested_vmx_entry_failure(vcpu, vmcs12, 10382 nested_vmx_entry_failure(vcpu, vmcs12,
10449 EXIT_REASON_MSR_LOAD_FAIL, msr_entry_idx); 10383 EXIT_REASON_MSR_LOAD_FAIL, msr_entry_idx);
10450 return 1; 10384 return 1;
@@ -11012,7 +10946,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
11012 if (unlikely(vmx->fail)) 10946 if (unlikely(vmx->fail))
11013 vm_inst_error = vmcs_read32(VM_INSTRUCTION_ERROR); 10947 vm_inst_error = vmcs_read32(VM_INSTRUCTION_ERROR);
11014 10948
11015 vmx_load_vmcs01(vcpu); 10949 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
11016 10950
11017 if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) 10951 if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
11018 && nested_exit_intr_ack_set(vcpu)) { 10952 && nested_exit_intr_ack_set(vcpu)) {
@@ -11056,6 +10990,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
11056 vmx->nested.change_vmcs01_virtual_x2apic_mode = false; 10990 vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
11057 vmx_set_virtual_x2apic_mode(vcpu, 10991 vmx_set_virtual_x2apic_mode(vcpu,
11058 vcpu->arch.apic_base & X2APIC_ENABLE); 10992 vcpu->arch.apic_base & X2APIC_ENABLE);
10993 } else if (!nested_cpu_has_ept(vmcs12) &&
10994 nested_cpu_has2(vmcs12,
10995 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
10996 vmx_flush_tlb_ept_only(vcpu);
11059 } 10997 }
11060 10998
11061 /* This is needed for same reason as it was needed in prepare_vmcs02 */ 10999 /* This is needed for same reason as it was needed in prepare_vmcs02 */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1faf620a6fdc..34bf64fb4dea 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -27,7 +27,6 @@
27#include "kvm_cache_regs.h" 27#include "kvm_cache_regs.h"
28#include "x86.h" 28#include "x86.h"
29#include "cpuid.h" 29#include "cpuid.h"
30#include "assigned-dev.h"
31#include "pmu.h" 30#include "pmu.h"
32#include "hyperv.h" 31#include "hyperv.h"
33 32
@@ -1444,10 +1443,10 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1444 struct kvm *kvm = vcpu->kvm; 1443 struct kvm *kvm = vcpu->kvm;
1445 u64 offset, ns, elapsed; 1444 u64 offset, ns, elapsed;
1446 unsigned long flags; 1445 unsigned long flags;
1447 s64 usdiff;
1448 bool matched; 1446 bool matched;
1449 bool already_matched; 1447 bool already_matched;
1450 u64 data = msr->data; 1448 u64 data = msr->data;
1449 bool synchronizing = false;
1451 1450
1452 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); 1451 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
1453 offset = kvm_compute_tsc_offset(vcpu, data); 1452 offset = kvm_compute_tsc_offset(vcpu, data);
@@ -1455,51 +1454,34 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1455 elapsed = ns - kvm->arch.last_tsc_nsec; 1454 elapsed = ns - kvm->arch.last_tsc_nsec;
1456 1455
1457 if (vcpu->arch.virtual_tsc_khz) { 1456 if (vcpu->arch.virtual_tsc_khz) {
1458 int faulted = 0; 1457 if (data == 0 && msr->host_initiated) {
1459 1458 /*
1460 /* n.b - signed multiplication and division required */ 1459 * detection of vcpu initialization -- need to sync
1461 usdiff = data - kvm->arch.last_tsc_write; 1460 * with other vCPUs. This particularly helps to keep
1462#ifdef CONFIG_X86_64 1461 * kvm_clock stable after CPU hotplug
1463 usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz; 1462 */
1464#else 1463 synchronizing = true;
1465 /* do_div() only does unsigned */ 1464 } else {
1466 asm("1: idivl %[divisor]\n" 1465 u64 tsc_exp = kvm->arch.last_tsc_write +
1467 "2: xor %%edx, %%edx\n" 1466 nsec_to_cycles(vcpu, elapsed);
1468 " movl $0, %[faulted]\n" 1467 u64 tsc_hz = vcpu->arch.virtual_tsc_khz * 1000LL;
1469 "3:\n" 1468 /*
1470 ".section .fixup,\"ax\"\n" 1469 * Special case: TSC write with a small delta (1 second)
1471 "4: movl $1, %[faulted]\n" 1470 * of virtual cycle time against real time is
1472 " jmp 3b\n" 1471 * interpreted as an attempt to synchronize the CPU.
1473 ".previous\n" 1472 */
1474 1473 synchronizing = data < tsc_exp + tsc_hz &&
1475 _ASM_EXTABLE(1b, 4b) 1474 data + tsc_hz > tsc_exp;
1476 1475 }
1477 : "=A"(usdiff), [faulted] "=r" (faulted) 1476 }
1478 : "A"(usdiff * 1000), [divisor] "rm"(vcpu->arch.virtual_tsc_khz));
1479
1480#endif
1481 do_div(elapsed, 1000);
1482 usdiff -= elapsed;
1483 if (usdiff < 0)
1484 usdiff = -usdiff;
1485
1486 /* idivl overflow => difference is larger than USEC_PER_SEC */
1487 if (faulted)
1488 usdiff = USEC_PER_SEC;
1489 } else
1490 usdiff = USEC_PER_SEC; /* disable TSC match window below */
1491 1477
1492 /* 1478 /*
1493 * Special case: TSC write with a small delta (1 second) of virtual
1494 * cycle time against real time is interpreted as an attempt to
1495 * synchronize the CPU.
1496 *
1497 * For a reliable TSC, we can match TSC offsets, and for an unstable 1479 * For a reliable TSC, we can match TSC offsets, and for an unstable
1498 * TSC, we add elapsed time in this computation. We could let the 1480 * TSC, we add elapsed time in this computation. We could let the
1499 * compensation code attempt to catch up if we fall behind, but 1481 * compensation code attempt to catch up if we fall behind, but
1500 * it's better to try to match offsets from the beginning. 1482 * it's better to try to match offsets from the beginning.
1501 */ 1483 */
1502 if (usdiff < USEC_PER_SEC && 1484 if (synchronizing &&
1503 vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) { 1485 vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
1504 if (!check_tsc_unstable()) { 1486 if (!check_tsc_unstable()) {
1505 offset = kvm->arch.cur_tsc_offset; 1487 offset = kvm->arch.cur_tsc_offset;
@@ -2155,6 +2137,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2155 case MSR_VM_HSAVE_PA: 2137 case MSR_VM_HSAVE_PA:
2156 case MSR_AMD64_PATCH_LOADER: 2138 case MSR_AMD64_PATCH_LOADER:
2157 case MSR_AMD64_BU_CFG2: 2139 case MSR_AMD64_BU_CFG2:
2140 case MSR_AMD64_DC_CFG:
2158 break; 2141 break;
2159 2142
2160 case MSR_EFER: 2143 case MSR_EFER:
@@ -2417,6 +2400,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2417 case MSR_FAM10H_MMIO_CONF_BASE: 2400 case MSR_FAM10H_MMIO_CONF_BASE:
2418 case MSR_AMD64_BU_CFG2: 2401 case MSR_AMD64_BU_CFG2:
2419 case MSR_IA32_PERF_CTL: 2402 case MSR_IA32_PERF_CTL:
2403 case MSR_AMD64_DC_CFG:
2420 msr_info->data = 0; 2404 msr_info->data = 0;
2421 break; 2405 break;
2422 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3: 2406 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
@@ -2675,10 +2659,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
2675 case KVM_CAP_SET_BOOT_CPU_ID: 2659 case KVM_CAP_SET_BOOT_CPU_ID:
2676 case KVM_CAP_SPLIT_IRQCHIP: 2660 case KVM_CAP_SPLIT_IRQCHIP:
2677 case KVM_CAP_IMMEDIATE_EXIT: 2661 case KVM_CAP_IMMEDIATE_EXIT:
2678#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
2679 case KVM_CAP_ASSIGN_DEV_IRQ:
2680 case KVM_CAP_PCI_2_3:
2681#endif
2682 r = 1; 2662 r = 1;
2683 break; 2663 break;
2684 case KVM_CAP_ADJUST_CLOCK: 2664 case KVM_CAP_ADJUST_CLOCK:
@@ -2695,9 +2675,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
2695 */ 2675 */
2696 r = kvm_x86_ops->cpu_has_high_real_mode_segbase(); 2676 r = kvm_x86_ops->cpu_has_high_real_mode_segbase();
2697 break; 2677 break;
2698 case KVM_CAP_COALESCED_MMIO:
2699 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
2700 break;
2701 case KVM_CAP_VAPIC: 2678 case KVM_CAP_VAPIC:
2702 r = !kvm_x86_ops->cpu_has_accelerated_tpr(); 2679 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
2703 break; 2680 break;
@@ -2713,11 +2690,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
2713 case KVM_CAP_PV_MMU: /* obsolete */ 2690 case KVM_CAP_PV_MMU: /* obsolete */
2714 r = 0; 2691 r = 0;
2715 break; 2692 break;
2716#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
2717 case KVM_CAP_IOMMU:
2718 r = iommu_present(&pci_bus_type);
2719 break;
2720#endif
2721 case KVM_CAP_MCE: 2693 case KVM_CAP_MCE:
2722 r = KVM_MAX_MCE_BANKS; 2694 r = KVM_MAX_MCE_BANKS;
2723 break; 2695 break;
@@ -3124,7 +3096,14 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
3124 return -EINVAL; 3096 return -EINVAL;
3125 3097
3126 if (events->exception.injected && 3098 if (events->exception.injected &&
3127 (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR)) 3099 (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR ||
3100 is_guest_mode(vcpu)))
3101 return -EINVAL;
3102
3103 /* INITs are latched while in SMM */
3104 if (events->flags & KVM_VCPUEVENT_VALID_SMM &&
3105 (events->smi.smm || events->smi.pending) &&
3106 vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
3128 return -EINVAL; 3107 return -EINVAL;
3129 3108
3130 process_nmi(vcpu); 3109 process_nmi(vcpu);
@@ -3721,22 +3700,21 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
3721 3700
3722static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) 3701static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
3723{ 3702{
3703 struct kvm_pic *pic = kvm->arch.vpic;
3724 int r; 3704 int r;
3725 3705
3726 r = 0; 3706 r = 0;
3727 switch (chip->chip_id) { 3707 switch (chip->chip_id) {
3728 case KVM_IRQCHIP_PIC_MASTER: 3708 case KVM_IRQCHIP_PIC_MASTER:
3729 memcpy(&chip->chip.pic, 3709 memcpy(&chip->chip.pic, &pic->pics[0],
3730 &pic_irqchip(kvm)->pics[0],
3731 sizeof(struct kvm_pic_state)); 3710 sizeof(struct kvm_pic_state));
3732 break; 3711 break;
3733 case KVM_IRQCHIP_PIC_SLAVE: 3712 case KVM_IRQCHIP_PIC_SLAVE:
3734 memcpy(&chip->chip.pic, 3713 memcpy(&chip->chip.pic, &pic->pics[1],
3735 &pic_irqchip(kvm)->pics[1],
3736 sizeof(struct kvm_pic_state)); 3714 sizeof(struct kvm_pic_state));
3737 break; 3715 break;
3738 case KVM_IRQCHIP_IOAPIC: 3716 case KVM_IRQCHIP_IOAPIC:
3739 r = kvm_get_ioapic(kvm, &chip->chip.ioapic); 3717 kvm_get_ioapic(kvm, &chip->chip.ioapic);
3740 break; 3718 break;
3741 default: 3719 default:
3742 r = -EINVAL; 3720 r = -EINVAL;
@@ -3747,32 +3725,31 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
3747 3725
3748static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) 3726static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
3749{ 3727{
3728 struct kvm_pic *pic = kvm->arch.vpic;
3750 int r; 3729 int r;
3751 3730
3752 r = 0; 3731 r = 0;
3753 switch (chip->chip_id) { 3732 switch (chip->chip_id) {
3754 case KVM_IRQCHIP_PIC_MASTER: 3733 case KVM_IRQCHIP_PIC_MASTER:
3755 spin_lock(&pic_irqchip(kvm)->lock); 3734 spin_lock(&pic->lock);
3756 memcpy(&pic_irqchip(kvm)->pics[0], 3735 memcpy(&pic->pics[0], &chip->chip.pic,
3757 &chip->chip.pic,
3758 sizeof(struct kvm_pic_state)); 3736 sizeof(struct kvm_pic_state));
3759 spin_unlock(&pic_irqchip(kvm)->lock); 3737 spin_unlock(&pic->lock);
3760 break; 3738 break;
3761 case KVM_IRQCHIP_PIC_SLAVE: 3739 case KVM_IRQCHIP_PIC_SLAVE:
3762 spin_lock(&pic_irqchip(kvm)->lock); 3740 spin_lock(&pic->lock);
3763 memcpy(&pic_irqchip(kvm)->pics[1], 3741 memcpy(&pic->pics[1], &chip->chip.pic,
3764 &chip->chip.pic,
3765 sizeof(struct kvm_pic_state)); 3742 sizeof(struct kvm_pic_state));
3766 spin_unlock(&pic_irqchip(kvm)->lock); 3743 spin_unlock(&pic->lock);
3767 break; 3744 break;
3768 case KVM_IRQCHIP_IOAPIC: 3745 case KVM_IRQCHIP_IOAPIC:
3769 r = kvm_set_ioapic(kvm, &chip->chip.ioapic); 3746 kvm_set_ioapic(kvm, &chip->chip.ioapic);
3770 break; 3747 break;
3771 default: 3748 default:
3772 r = -EINVAL; 3749 r = -EINVAL;
3773 break; 3750 break;
3774 } 3751 }
3775 kvm_pic_update_irq(pic_irqchip(kvm)); 3752 kvm_pic_update_irq(pic);
3776 return r; 3753 return r;
3777} 3754}
3778 3755
@@ -3934,9 +3911,14 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
3934 goto split_irqchip_unlock; 3911 goto split_irqchip_unlock;
3935 if (kvm->created_vcpus) 3912 if (kvm->created_vcpus)
3936 goto split_irqchip_unlock; 3913 goto split_irqchip_unlock;
3914 kvm->arch.irqchip_mode = KVM_IRQCHIP_INIT_IN_PROGRESS;
3937 r = kvm_setup_empty_irq_routing(kvm); 3915 r = kvm_setup_empty_irq_routing(kvm);
3938 if (r) 3916 if (r) {
3917 kvm->arch.irqchip_mode = KVM_IRQCHIP_NONE;
3918 /* Pairs with smp_rmb() when reading irqchip_mode */
3919 smp_wmb();
3939 goto split_irqchip_unlock; 3920 goto split_irqchip_unlock;
3921 }
3940 /* Pairs with irqchip_in_kernel. */ 3922 /* Pairs with irqchip_in_kernel. */
3941 smp_wmb(); 3923 smp_wmb();
3942 kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT; 3924 kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT;
@@ -4018,20 +4000,18 @@ long kvm_arch_vm_ioctl(struct file *filp,
4018 4000
4019 r = kvm_ioapic_init(kvm); 4001 r = kvm_ioapic_init(kvm);
4020 if (r) { 4002 if (r) {
4021 mutex_lock(&kvm->slots_lock);
4022 kvm_pic_destroy(kvm); 4003 kvm_pic_destroy(kvm);
4023 mutex_unlock(&kvm->slots_lock);
4024 goto create_irqchip_unlock; 4004 goto create_irqchip_unlock;
4025 } 4005 }
4026 4006
4007 kvm->arch.irqchip_mode = KVM_IRQCHIP_INIT_IN_PROGRESS;
4027 r = kvm_setup_default_irq_routing(kvm); 4008 r = kvm_setup_default_irq_routing(kvm);
4028 if (r) { 4009 if (r) {
4029 mutex_lock(&kvm->slots_lock); 4010 kvm->arch.irqchip_mode = KVM_IRQCHIP_NONE;
4030 mutex_lock(&kvm->irq_lock); 4011 /* Pairs with smp_rmb() when reading irqchip_mode */
4012 smp_wmb();
4031 kvm_ioapic_destroy(kvm); 4013 kvm_ioapic_destroy(kvm);
4032 kvm_pic_destroy(kvm); 4014 kvm_pic_destroy(kvm);
4033 mutex_unlock(&kvm->irq_lock);
4034 mutex_unlock(&kvm->slots_lock);
4035 goto create_irqchip_unlock; 4015 goto create_irqchip_unlock;
4036 } 4016 }
4037 /* Write kvm->irq_routing before enabling irqchip_in_kernel. */ 4017 /* Write kvm->irq_routing before enabling irqchip_in_kernel. */
@@ -4230,7 +4210,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
4230 break; 4210 break;
4231 } 4211 }
4232 default: 4212 default:
4233 r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); 4213 r = -ENOTTY;
4234 } 4214 }
4235out: 4215out:
4236 return r; 4216 return r;
@@ -7355,6 +7335,12 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
7355 mp_state->mp_state != KVM_MP_STATE_RUNNABLE) 7335 mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
7356 return -EINVAL; 7336 return -EINVAL;
7357 7337
7338 /* INITs are latched while in SMM */
7339 if ((is_smm(vcpu) || vcpu->arch.smi_pending) &&
7340 (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
7341 mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
7342 return -EINVAL;
7343
7358 if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) { 7344 if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
7359 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; 7345 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
7360 set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events); 7346 set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
@@ -8068,7 +8054,6 @@ void kvm_arch_sync_events(struct kvm *kvm)
8068{ 8054{
8069 cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work); 8055 cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
8070 cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work); 8056 cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
8071 kvm_free_all_assigned_devices(kvm);
8072 kvm_free_pit(kvm); 8057 kvm_free_pit(kvm);
8073} 8058}
8074 8059
@@ -8152,12 +8137,12 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
8152 } 8137 }
8153 if (kvm_x86_ops->vm_destroy) 8138 if (kvm_x86_ops->vm_destroy)
8154 kvm_x86_ops->vm_destroy(kvm); 8139 kvm_x86_ops->vm_destroy(kvm);
8155 kvm_iommu_unmap_guest(kvm); 8140 kvm_pic_destroy(kvm);
8156 kfree(kvm->arch.vpic); 8141 kvm_ioapic_destroy(kvm);
8157 kfree(kvm->arch.vioapic);
8158 kvm_free_vcpus(kvm); 8142 kvm_free_vcpus(kvm);
8159 kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); 8143 kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
8160 kvm_mmu_uninit_vm(kvm); 8144 kvm_mmu_uninit_vm(kvm);
8145 kvm_page_track_cleanup(kvm);
8161} 8146}
8162 8147
8163void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, 8148void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
@@ -8566,11 +8551,11 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
8566{ 8551{
8567 struct x86_exception fault; 8552 struct x86_exception fault;
8568 8553
8569 trace_kvm_async_pf_ready(work->arch.token, work->gva);
8570 if (work->wakeup_all) 8554 if (work->wakeup_all)
8571 work->arch.token = ~0; /* broadcast wakeup */ 8555 work->arch.token = ~0; /* broadcast wakeup */
8572 else 8556 else
8573 kvm_del_async_pf_gfn(vcpu, work->arch.gfn); 8557 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
8558 trace_kvm_async_pf_ready(work->arch.token, work->gva);
8574 8559
8575 if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) && 8560 if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
8576 !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) { 8561 !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index e7e7055a8658..69f0827d5f53 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -16,7 +16,7 @@ obj-y = bug.o bugs_$(BITS).o delay.o fault.o ldt.o \
16 16
17ifeq ($(CONFIG_X86_32),y) 17ifeq ($(CONFIG_X86_32),y)
18 18
19obj-y += checksum_32.o 19obj-y += checksum_32.o syscalls_32.o
20obj-$(CONFIG_ELF_CORE) += elfcore.o 20obj-$(CONFIG_ELF_CORE) += elfcore.o
21 21
22subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o 22subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h
index e59eef20647b..b291ca5cf66b 100644
--- a/arch/x86/um/asm/ptrace.h
+++ b/arch/x86/um/asm/ptrace.h
@@ -78,7 +78,7 @@ static inline int ptrace_set_thread_area(struct task_struct *child, int idx,
78 return -ENOSYS; 78 return -ENOSYS;
79} 79}
80 80
81extern long arch_prctl(struct task_struct *task, int code, 81extern long arch_prctl(struct task_struct *task, int option,
82 unsigned long __user *addr); 82 unsigned long __user *addr);
83 83
84#endif 84#endif
diff --git a/arch/x86/um/os-Linux/prctl.c b/arch/x86/um/os-Linux/prctl.c
index 96eb2bd28832..8431e87ac333 100644
--- a/arch/x86/um/os-Linux/prctl.c
+++ b/arch/x86/um/os-Linux/prctl.c
@@ -6,7 +6,7 @@
6#include <sys/ptrace.h> 6#include <sys/ptrace.h>
7#include <asm/ptrace.h> 7#include <asm/ptrace.h>
8 8
9int os_arch_prctl(int pid, int code, unsigned long *addr) 9int os_arch_prctl(int pid, int option, unsigned long *arg2)
10{ 10{
11 return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) addr, code); 11 return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) arg2, option);
12} 12}
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c
new file mode 100644
index 000000000000..627d68836b16
--- /dev/null
+++ b/arch/x86/um/syscalls_32.c
@@ -0,0 +1,7 @@
1#include <linux/syscalls.h>
2#include <os.h>
3
4SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
5{
6 return -EINVAL;
7}
diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c
index 10d907098c26..58f51667e2e4 100644
--- a/arch/x86/um/syscalls_64.c
+++ b/arch/x86/um/syscalls_64.c
@@ -7,13 +7,15 @@
7 7
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <linux/sched/mm.h> 9#include <linux/sched/mm.h>
10#include <linux/syscalls.h>
10#include <linux/uaccess.h> 11#include <linux/uaccess.h>
11#include <asm/prctl.h> /* XXX This should get the constants from libc */ 12#include <asm/prctl.h> /* XXX This should get the constants from libc */
12#include <os.h> 13#include <os.h>
13 14
14long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr) 15long arch_prctl(struct task_struct *task, int option,
16 unsigned long __user *arg2)
15{ 17{
16 unsigned long *ptr = addr, tmp; 18 unsigned long *ptr = arg2, tmp;
17 long ret; 19 long ret;
18 int pid = task->mm->context.id.u.pid; 20 int pid = task->mm->context.id.u.pid;
19 21
@@ -30,7 +32,7 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
30 * arch_prctl is run on the host, then the registers are read 32 * arch_prctl is run on the host, then the registers are read
31 * back. 33 * back.
32 */ 34 */
33 switch (code) { 35 switch (option) {
34 case ARCH_SET_FS: 36 case ARCH_SET_FS:
35 case ARCH_SET_GS: 37 case ARCH_SET_GS:
36 ret = restore_registers(pid, &current->thread.regs.regs); 38 ret = restore_registers(pid, &current->thread.regs.regs);
@@ -50,11 +52,11 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
50 ptr = &tmp; 52 ptr = &tmp;
51 } 53 }
52 54
53 ret = os_arch_prctl(pid, code, ptr); 55 ret = os_arch_prctl(pid, option, ptr);
54 if (ret) 56 if (ret)
55 return ret; 57 return ret;
56 58
57 switch (code) { 59 switch (option) {
58 case ARCH_SET_FS: 60 case ARCH_SET_FS:
59 current->thread.arch.fs = (unsigned long) ptr; 61 current->thread.arch.fs = (unsigned long) ptr;
60 ret = save_registers(pid, &current->thread.regs.regs); 62 ret = save_registers(pid, &current->thread.regs.regs);
@@ -63,19 +65,19 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
63 ret = save_registers(pid, &current->thread.regs.regs); 65 ret = save_registers(pid, &current->thread.regs.regs);
64 break; 66 break;
65 case ARCH_GET_FS: 67 case ARCH_GET_FS:
66 ret = put_user(tmp, addr); 68 ret = put_user(tmp, arg2);
67 break; 69 break;
68 case ARCH_GET_GS: 70 case ARCH_GET_GS:
69 ret = put_user(tmp, addr); 71 ret = put_user(tmp, arg2);
70 break; 72 break;
71 } 73 }
72 74
73 return ret; 75 return ret;
74} 76}
75 77
76long sys_arch_prctl(int code, unsigned long addr) 78SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
77{ 79{
78 return arch_prctl(current, code, (unsigned long __user *) addr); 80 return arch_prctl(current, option, (unsigned long __user *) arg2);
79} 81}
80 82
81void arch_switch_to(struct task_struct *to) 83void arch_switch_to(struct task_struct *to)
diff --git a/drivers/gpio/gpio-altera-a10sr.c b/drivers/gpio/gpio-altera-a10sr.c
index 9e1a138fed53..16a8951b2bed 100644
--- a/drivers/gpio/gpio-altera-a10sr.c
+++ b/drivers/gpio/gpio-altera-a10sr.c
@@ -96,7 +96,7 @@ static int altr_a10sr_gpio_probe(struct platform_device *pdev)
96 gpio->regmap = a10sr->regmap; 96 gpio->regmap = a10sr->regmap;
97 97
98 gpio->gp = altr_a10sr_gc; 98 gpio->gp = altr_a10sr_gc;
99 99 gpio->gp.parent = pdev->dev.parent;
100 gpio->gp.of_node = pdev->dev.of_node; 100 gpio->gp.of_node = pdev->dev.of_node;
101 101
102 ret = devm_gpiochip_add_data(&pdev->dev, &gpio->gp, gpio); 102 ret = devm_gpiochip_add_data(&pdev->dev, &gpio->gp, gpio);
diff --git a/drivers/gpio/gpio-altera.c b/drivers/gpio/gpio-altera.c
index 5bddbd507ca9..3fe6a21e05a5 100644
--- a/drivers/gpio/gpio-altera.c
+++ b/drivers/gpio/gpio-altera.c
@@ -90,21 +90,18 @@ static int altera_gpio_irq_set_type(struct irq_data *d,
90 90
91 altera_gc = gpiochip_get_data(irq_data_get_irq_chip_data(d)); 91 altera_gc = gpiochip_get_data(irq_data_get_irq_chip_data(d));
92 92
93 if (type == IRQ_TYPE_NONE) 93 if (type == IRQ_TYPE_NONE) {
94 irq_set_handler_locked(d, handle_bad_irq);
94 return 0; 95 return 0;
95 if (type == IRQ_TYPE_LEVEL_HIGH && 96 }
96 altera_gc->interrupt_trigger == IRQ_TYPE_LEVEL_HIGH) 97 if (type == altera_gc->interrupt_trigger) {
97 return 0; 98 if (type == IRQ_TYPE_LEVEL_HIGH)
98 if (type == IRQ_TYPE_EDGE_RISING && 99 irq_set_handler_locked(d, handle_level_irq);
99 altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_RISING) 100 else
100 return 0; 101 irq_set_handler_locked(d, handle_simple_irq);
101 if (type == IRQ_TYPE_EDGE_FALLING &&
102 altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_FALLING)
103 return 0;
104 if (type == IRQ_TYPE_EDGE_BOTH &&
105 altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_BOTH)
106 return 0; 102 return 0;
107 103 }
104 irq_set_handler_locked(d, handle_bad_irq);
108 return -EINVAL; 105 return -EINVAL;
109} 106}
110 107
@@ -230,7 +227,6 @@ static void altera_gpio_irq_edge_handler(struct irq_desc *desc)
230 chained_irq_exit(chip, desc); 227 chained_irq_exit(chip, desc);
231} 228}
232 229
233
234static void altera_gpio_irq_leveL_high_handler(struct irq_desc *desc) 230static void altera_gpio_irq_leveL_high_handler(struct irq_desc *desc)
235{ 231{
236 struct altera_gpio_chip *altera_gc; 232 struct altera_gpio_chip *altera_gc;
@@ -310,7 +306,7 @@ static int altera_gpio_probe(struct platform_device *pdev)
310 altera_gc->interrupt_trigger = reg; 306 altera_gc->interrupt_trigger = reg;
311 307
312 ret = gpiochip_irqchip_add(&altera_gc->mmchip.gc, &altera_irq_chip, 0, 308 ret = gpiochip_irqchip_add(&altera_gc->mmchip.gc, &altera_irq_chip, 0,
313 handle_simple_irq, IRQ_TYPE_NONE); 309 handle_bad_irq, IRQ_TYPE_NONE);
314 310
315 if (ret) { 311 if (ret) {
316 dev_err(&pdev->dev, "could not add irqchip\n"); 312 dev_err(&pdev->dev, "could not add irqchip\n");
diff --git a/drivers/gpio/gpio-mcp23s08.c b/drivers/gpio/gpio-mcp23s08.c
index bdb692345428..2a57d024481d 100644
--- a/drivers/gpio/gpio-mcp23s08.c
+++ b/drivers/gpio/gpio-mcp23s08.c
@@ -270,8 +270,10 @@ mcp23s08_direction_output(struct gpio_chip *chip, unsigned offset, int value)
270static irqreturn_t mcp23s08_irq(int irq, void *data) 270static irqreturn_t mcp23s08_irq(int irq, void *data)
271{ 271{
272 struct mcp23s08 *mcp = data; 272 struct mcp23s08 *mcp = data;
273 int intcap, intf, i; 273 int intcap, intf, i, gpio, gpio_orig, intcap_mask;
274 unsigned int child_irq; 274 unsigned int child_irq;
275 bool intf_set, intcap_changed, gpio_bit_changed,
276 defval_changed, gpio_set;
275 277
276 mutex_lock(&mcp->lock); 278 mutex_lock(&mcp->lock);
277 if (mcp_read(mcp, MCP_INTF, &intf) < 0) { 279 if (mcp_read(mcp, MCP_INTF, &intf) < 0) {
@@ -287,14 +289,67 @@ static irqreturn_t mcp23s08_irq(int irq, void *data)
287 } 289 }
288 290
289 mcp->cache[MCP_INTCAP] = intcap; 291 mcp->cache[MCP_INTCAP] = intcap;
292
293 /* This clears the interrupt(configurable on S18) */
294 if (mcp_read(mcp, MCP_GPIO, &gpio) < 0) {
295 mutex_unlock(&mcp->lock);
296 return IRQ_HANDLED;
297 }
298 gpio_orig = mcp->cache[MCP_GPIO];
299 mcp->cache[MCP_GPIO] = gpio;
290 mutex_unlock(&mcp->lock); 300 mutex_unlock(&mcp->lock);
291 301
302 if (mcp->cache[MCP_INTF] == 0) {
303 /* There is no interrupt pending */
304 return IRQ_HANDLED;
305 }
306
307 dev_dbg(mcp->chip.parent,
308 "intcap 0x%04X intf 0x%04X gpio_orig 0x%04X gpio 0x%04X\n",
309 intcap, intf, gpio_orig, gpio);
292 310
293 for (i = 0; i < mcp->chip.ngpio; i++) { 311 for (i = 0; i < mcp->chip.ngpio; i++) {
294 if ((BIT(i) & mcp->cache[MCP_INTF]) && 312 /* We must check all of the inputs on the chip,
295 ((BIT(i) & intcap & mcp->irq_rise) || 313 * otherwise we may not notice a change on >=2 pins.
296 (mcp->irq_fall & ~intcap & BIT(i)) || 314 *
297 (BIT(i) & mcp->cache[MCP_INTCON]))) { 315 * On at least the mcp23s17, INTCAP is only updated
316 * one byte at a time(INTCAPA and INTCAPB are
317 * not written to at the same time - only on a per-bank
318 * basis).
319 *
320 * INTF only contains the single bit that caused the
321 * interrupt per-bank. On the mcp23s17, there is
322 * INTFA and INTFB. If two pins are changed on the A
323 * side at the same time, INTF will only have one bit
324 * set. If one pin on the A side and one pin on the B
325 * side are changed at the same time, INTF will have
326 * two bits set. Thus, INTF can't be the only check
327 * to see if the input has changed.
328 */
329
330 intf_set = BIT(i) & mcp->cache[MCP_INTF];
331 if (i < 8 && intf_set)
332 intcap_mask = 0x00FF;
333 else if (i >= 8 && intf_set)
334 intcap_mask = 0xFF00;
335 else
336 intcap_mask = 0x00;
337
338 intcap_changed = (intcap_mask &
339 (BIT(i) & mcp->cache[MCP_INTCAP])) !=
340 (intcap_mask & (BIT(i) & gpio_orig));
341 gpio_set = BIT(i) & mcp->cache[MCP_GPIO];
342 gpio_bit_changed = (BIT(i) & gpio_orig) !=
343 (BIT(i) & mcp->cache[MCP_GPIO]);
344 defval_changed = (BIT(i) & mcp->cache[MCP_INTCON]) &&
345 ((BIT(i) & mcp->cache[MCP_GPIO]) !=
346 (BIT(i) & mcp->cache[MCP_DEFVAL]));
347
348 if (((gpio_bit_changed || intcap_changed) &&
349 (BIT(i) & mcp->irq_rise) && gpio_set) ||
350 ((gpio_bit_changed || intcap_changed) &&
351 (BIT(i) & mcp->irq_fall) && !gpio_set) ||
352 defval_changed) {
298 child_irq = irq_find_mapping(mcp->chip.irqdomain, i); 353 child_irq = irq_find_mapping(mcp->chip.irqdomain, i);
299 handle_nested_irq(child_irq); 354 handle_nested_irq(child_irq);
300 } 355 }
diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c
index 06dac72cb69c..d99338689213 100644
--- a/drivers/gpio/gpio-mockup.c
+++ b/drivers/gpio/gpio-mockup.c
@@ -197,7 +197,7 @@ static ssize_t gpio_mockup_event_write(struct file *file,
197 struct seq_file *sfile; 197 struct seq_file *sfile;
198 struct gpio_desc *desc; 198 struct gpio_desc *desc;
199 struct gpio_chip *gc; 199 struct gpio_chip *gc;
200 int status, val; 200 int val;
201 char buf; 201 char buf;
202 202
203 sfile = file->private_data; 203 sfile = file->private_data;
@@ -206,9 +206,8 @@ static ssize_t gpio_mockup_event_write(struct file *file,
206 chip = priv->chip; 206 chip = priv->chip;
207 gc = &chip->gc; 207 gc = &chip->gc;
208 208
209 status = copy_from_user(&buf, usr_buf, 1); 209 if (copy_from_user(&buf, usr_buf, 1))
210 if (status) 210 return -EFAULT;
211 return status;
212 211
213 if (buf == '0') 212 if (buf == '0')
214 val = 0; 213 val = 0;
diff --git a/drivers/gpio/gpio-xgene.c b/drivers/gpio/gpio-xgene.c
index 40a8881c2ce8..f1c6ec17b90a 100644
--- a/drivers/gpio/gpio-xgene.c
+++ b/drivers/gpio/gpio-xgene.c
@@ -42,9 +42,7 @@ struct xgene_gpio {
42 struct gpio_chip chip; 42 struct gpio_chip chip;
43 void __iomem *base; 43 void __iomem *base;
44 spinlock_t lock; 44 spinlock_t lock;
45#ifdef CONFIG_PM
46 u32 set_dr_val[XGENE_MAX_GPIO_BANKS]; 45 u32 set_dr_val[XGENE_MAX_GPIO_BANKS];
47#endif
48}; 46};
49 47
50static int xgene_gpio_get(struct gpio_chip *gc, unsigned int offset) 48static int xgene_gpio_get(struct gpio_chip *gc, unsigned int offset)
@@ -138,8 +136,7 @@ static int xgene_gpio_dir_out(struct gpio_chip *gc,
138 return 0; 136 return 0;
139} 137}
140 138
141#ifdef CONFIG_PM 139static __maybe_unused int xgene_gpio_suspend(struct device *dev)
142static int xgene_gpio_suspend(struct device *dev)
143{ 140{
144 struct xgene_gpio *gpio = dev_get_drvdata(dev); 141 struct xgene_gpio *gpio = dev_get_drvdata(dev);
145 unsigned long bank_offset; 142 unsigned long bank_offset;
@@ -152,7 +149,7 @@ static int xgene_gpio_suspend(struct device *dev)
152 return 0; 149 return 0;
153} 150}
154 151
155static int xgene_gpio_resume(struct device *dev) 152static __maybe_unused int xgene_gpio_resume(struct device *dev)
156{ 153{
157 struct xgene_gpio *gpio = dev_get_drvdata(dev); 154 struct xgene_gpio *gpio = dev_get_drvdata(dev);
158 unsigned long bank_offset; 155 unsigned long bank_offset;
@@ -166,10 +163,6 @@ static int xgene_gpio_resume(struct device *dev)
166} 163}
167 164
168static SIMPLE_DEV_PM_OPS(xgene_gpio_pm, xgene_gpio_suspend, xgene_gpio_resume); 165static SIMPLE_DEV_PM_OPS(xgene_gpio_pm, xgene_gpio_suspend, xgene_gpio_resume);
169#define XGENE_GPIO_PM_OPS (&xgene_gpio_pm)
170#else
171#define XGENE_GPIO_PM_OPS NULL
172#endif
173 166
174static int xgene_gpio_probe(struct platform_device *pdev) 167static int xgene_gpio_probe(struct platform_device *pdev)
175{ 168{
@@ -241,7 +234,7 @@ static struct platform_driver xgene_gpio_driver = {
241 .name = "xgene-gpio", 234 .name = "xgene-gpio",
242 .of_match_table = xgene_gpio_of_match, 235 .of_match_table = xgene_gpio_of_match,
243 .acpi_match_table = ACPI_PTR(xgene_gpio_acpi_match), 236 .acpi_match_table = ACPI_PTR(xgene_gpio_acpi_match),
244 .pm = XGENE_GPIO_PM_OPS, 237 .pm = &xgene_gpio_pm,
245 }, 238 },
246 .probe = xgene_gpio_probe, 239 .probe = xgene_gpio_probe,
247}; 240};
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 1aeb80e52424..8c54cb8f5d6d 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -175,11 +175,11 @@ config HID_CHERRY
175 Support for Cherry Cymotion keyboard. 175 Support for Cherry Cymotion keyboard.
176 176
177config HID_CHICONY 177config HID_CHICONY
178 tristate "Chicony Tactical pad" 178 tristate "Chicony devices"
179 depends on HID 179 depends on HID
180 default !EXPERT 180 default !EXPERT
181 ---help--- 181 ---help---
182 Support for Chicony Tactical pad. 182 Support for Chicony Tactical pad and special keys on Chicony keyboards.
183 183
184config HID_CORSAIR 184config HID_CORSAIR
185 tristate "Corsair devices" 185 tristate "Corsair devices"
@@ -190,6 +190,7 @@ config HID_CORSAIR
190 190
191 Supported devices: 191 Supported devices:
192 - Vengeance K90 192 - Vengeance K90
193 - Scimitar PRO RGB
193 194
194config HID_PRODIKEYS 195config HID_PRODIKEYS
195 tristate "Prodikeys PC-MIDI Keyboard support" 196 tristate "Prodikeys PC-MIDI Keyboard support"
diff --git a/drivers/hid/hid-chicony.c b/drivers/hid/hid-chicony.c
index bc3cec199fee..f04ed9aabc3f 100644
--- a/drivers/hid/hid-chicony.c
+++ b/drivers/hid/hid-chicony.c
@@ -86,6 +86,7 @@ static const struct hid_device_id ch_devices[] = {
86 { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) }, 86 { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) },
87 { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) }, 87 { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) },
88 { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) }, 88 { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) },
89 { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
89 { } 90 { }
90}; 91};
91MODULE_DEVICE_TABLE(hid, ch_devices); 92MODULE_DEVICE_TABLE(hid, ch_devices);
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index e9e87d337446..3ceb4a2af381 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1870,6 +1870,7 @@ static const struct hid_device_id hid_have_special_driver[] = {
1870 { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) }, 1870 { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) },
1871 { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) }, 1871 { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) },
1872 { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90) }, 1872 { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90) },
1873 { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) },
1873 { HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) }, 1874 { HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) },
1874 { HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_CP2112) }, 1875 { HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_CP2112) },
1875 { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1) }, 1876 { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1) },
@@ -1910,6 +1911,7 @@ static const struct hid_device_id hid_have_special_driver[] = {
1910 { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081) }, 1911 { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081) },
1911 { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A0C2) }, 1912 { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A0C2) },
1912 { HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) }, 1913 { HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) },
1914 { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
1913 { HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) }, 1915 { HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) },
1914 { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ION, USB_DEVICE_ID_ICADE) }, 1916 { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ION, USB_DEVICE_ID_ICADE) },
1915 { HID_USB_DEVICE(USB_VENDOR_ID_KENSINGTON, USB_DEVICE_ID_KS_SLIMBLADE) }, 1917 { HID_USB_DEVICE(USB_VENDOR_ID_KENSINGTON, USB_DEVICE_ID_KS_SLIMBLADE) },
diff --git a/drivers/hid/hid-corsair.c b/drivers/hid/hid-corsair.c
index c0303f61c26a..9ba5d98a1180 100644
--- a/drivers/hid/hid-corsair.c
+++ b/drivers/hid/hid-corsair.c
@@ -3,8 +3,10 @@
3 * 3 *
4 * Supported devices: 4 * Supported devices:
5 * - Vengeance K90 Keyboard 5 * - Vengeance K90 Keyboard
6 * - Scimitar PRO RGB Gaming Mouse
6 * 7 *
7 * Copyright (c) 2015 Clement Vuchener 8 * Copyright (c) 2015 Clement Vuchener
9 * Copyright (c) 2017 Oscar Campos
8 */ 10 */
9 11
10/* 12/*
@@ -670,10 +672,51 @@ static int corsair_input_mapping(struct hid_device *dev,
670 return 0; 672 return 0;
671} 673}
672 674
675/*
676 * The report descriptor of Corsair Scimitar RGB Pro gaming mouse is
677 * non parseable as they define two consecutive Logical Minimum for
678 * the Usage Page (Consumer) in rdescs bytes 75 and 77 being 77 0x16
679 * that should be obviousy 0x26 for Logical Magimum of 16 bits. This
680 * prevents poper parsing of the report descriptor due Logical
681 * Minimum being larger than Logical Maximum.
682 *
683 * This driver fixes the report descriptor for:
684 * - USB ID b1c:1b3e, sold as Scimitar RGB Pro Gaming mouse
685 */
686
687static __u8 *corsair_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc,
688 unsigned int *rsize)
689{
690 struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
691
692 if (intf->cur_altsetting->desc.bInterfaceNumber == 1) {
693 /*
694 * Corsair Scimitar RGB Pro report descriptor is broken and
695 * defines two different Logical Minimum for the Consumer
696 * Application. The byte 77 should be a 0x26 defining a 16
697 * bits integer for the Logical Maximum but it is a 0x16
698 * instead (Logical Minimum)
699 */
700 switch (hdev->product) {
701 case USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB:
702 if (*rsize >= 172 && rdesc[75] == 0x15 && rdesc[77] == 0x16
703 && rdesc[78] == 0xff && rdesc[79] == 0x0f) {
704 hid_info(hdev, "Fixing up report descriptor\n");
705 rdesc[77] = 0x26;
706 }
707 break;
708 }
709
710 }
711 return rdesc;
712}
713
673static const struct hid_device_id corsair_devices[] = { 714static const struct hid_device_id corsair_devices[] = {
674 { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90), 715 { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90),
675 .driver_data = CORSAIR_USE_K90_MACRO | 716 .driver_data = CORSAIR_USE_K90_MACRO |
676 CORSAIR_USE_K90_BACKLIGHT }, 717 CORSAIR_USE_K90_BACKLIGHT },
718 { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR,
719 USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) },
677 {} 720 {}
678}; 721};
679 722
@@ -686,10 +729,14 @@ static struct hid_driver corsair_driver = {
686 .event = corsair_event, 729 .event = corsair_event,
687 .remove = corsair_remove, 730 .remove = corsair_remove,
688 .input_mapping = corsair_input_mapping, 731 .input_mapping = corsair_input_mapping,
732 .report_fixup = corsair_mouse_report_fixup,
689}; 733};
690 734
691module_hid_driver(corsair_driver); 735module_hid_driver(corsair_driver);
692 736
693MODULE_LICENSE("GPL"); 737MODULE_LICENSE("GPL");
738/* Original K90 driver author */
694MODULE_AUTHOR("Clement Vuchener"); 739MODULE_AUTHOR("Clement Vuchener");
740/* Scimitar PRO RGB driver author */
741MODULE_AUTHOR("Oscar Campos");
695MODULE_DESCRIPTION("HID driver for Corsair devices"); 742MODULE_DESCRIPTION("HID driver for Corsair devices");
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 86c95d30ac80..0e2e7c571d22 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -278,6 +278,9 @@
278#define USB_DEVICE_ID_CORSAIR_K70RGB 0x1b13 278#define USB_DEVICE_ID_CORSAIR_K70RGB 0x1b13
279#define USB_DEVICE_ID_CORSAIR_STRAFE 0x1b15 279#define USB_DEVICE_ID_CORSAIR_STRAFE 0x1b15
280#define USB_DEVICE_ID_CORSAIR_K65RGB 0x1b17 280#define USB_DEVICE_ID_CORSAIR_K65RGB 0x1b17
281#define USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE 0x1b38
282#define USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE 0x1b39
283#define USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB 0x1b3e
281 284
282#define USB_VENDOR_ID_CREATIVELABS 0x041e 285#define USB_VENDOR_ID_CREATIVELABS 0x041e
283#define USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51 0x322c 286#define USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51 0x322c
@@ -557,6 +560,7 @@
557 560
558#define USB_VENDOR_ID_JESS 0x0c45 561#define USB_VENDOR_ID_JESS 0x0c45
559#define USB_DEVICE_ID_JESS_YUREX 0x1010 562#define USB_DEVICE_ID_JESS_YUREX 0x1010
563#define USB_DEVICE_ID_JESS_ZEN_AIO_KBD 0x5112
560 564
561#define USB_VENDOR_ID_JESS2 0x0f30 565#define USB_VENDOR_ID_JESS2 0x0f30
562#define USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD 0x0111 566#define USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD 0x0111
diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
index f405b07d0381..740996f9bdd4 100644
--- a/drivers/hid/hid-sony.c
+++ b/drivers/hid/hid-sony.c
@@ -2632,6 +2632,8 @@ err_stop:
2632 sony_leds_remove(sc); 2632 sony_leds_remove(sc);
2633 if (sc->quirks & SONY_BATTERY_SUPPORT) 2633 if (sc->quirks & SONY_BATTERY_SUPPORT)
2634 sony_battery_remove(sc); 2634 sony_battery_remove(sc);
2635 if (sc->touchpad)
2636 sony_unregister_touchpad(sc);
2635 sony_cancel_work_sync(sc); 2637 sony_cancel_work_sync(sc);
2636 kfree(sc->output_report_dmabuf); 2638 kfree(sc->output_report_dmabuf);
2637 sony_remove_dev_list(sc); 2639 sony_remove_dev_list(sc);
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index d6847a664446..a69a3c88ab29 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -80,6 +80,9 @@ static const struct hid_blacklist {
80 { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB, HID_QUIRK_NO_INIT_REPORTS }, 80 { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB, HID_QUIRK_NO_INIT_REPORTS },
81 { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB, HID_QUIRK_NO_INIT_REPORTS }, 81 { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB, HID_QUIRK_NO_INIT_REPORTS },
82 { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_STRAFE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, 82 { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_STRAFE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
83 { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
84 { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
85 { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
83 { USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51, HID_QUIRK_NOGET }, 86 { USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51, HID_QUIRK_NOGET },
84 { USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET }, 87 { USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET },
85 { USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU, HID_QUIRK_MULTI_INPUT }, 88 { USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU, HID_QUIRK_MULTI_INPUT },
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index be8f7e2a026f..994bddc55b82 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -2579,7 +2579,9 @@ static void wacom_remove(struct hid_device *hdev)
2579 2579
2580 /* make sure we don't trigger the LEDs */ 2580 /* make sure we don't trigger the LEDs */
2581 wacom_led_groups_release(wacom); 2581 wacom_led_groups_release(wacom);
2582 wacom_release_resources(wacom); 2582
2583 if (wacom->wacom_wac.features.type != REMOTE)
2584 wacom_release_resources(wacom);
2583 2585
2584 hid_set_drvdata(hdev, NULL); 2586 hid_set_drvdata(hdev, NULL);
2585} 2587}
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 4aa3de9f1163..94250c293be2 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -1959,8 +1959,10 @@ static void wacom_wac_pen_usage_mapping(struct hid_device *hdev,
1959 input_set_capability(input, EV_KEY, BTN_TOOL_BRUSH); 1959 input_set_capability(input, EV_KEY, BTN_TOOL_BRUSH);
1960 input_set_capability(input, EV_KEY, BTN_TOOL_PENCIL); 1960 input_set_capability(input, EV_KEY, BTN_TOOL_PENCIL);
1961 input_set_capability(input, EV_KEY, BTN_TOOL_AIRBRUSH); 1961 input_set_capability(input, EV_KEY, BTN_TOOL_AIRBRUSH);
1962 input_set_capability(input, EV_KEY, BTN_TOOL_MOUSE); 1962 if (!(features->device_type & WACOM_DEVICETYPE_DIRECT)) {
1963 input_set_capability(input, EV_KEY, BTN_TOOL_LENS); 1963 input_set_capability(input, EV_KEY, BTN_TOOL_MOUSE);
1964 input_set_capability(input, EV_KEY, BTN_TOOL_LENS);
1965 }
1964 break; 1966 break;
1965 case WACOM_HID_WD_FINGERWHEEL: 1967 case WACOM_HID_WD_FINGERWHEEL:
1966 wacom_map_usage(input, usage, field, EV_ABS, ABS_WHEEL, 0); 1968 wacom_map_usage(input, usage, field, EV_ABS, ABS_WHEEL, 0);
@@ -4197,10 +4199,10 @@ static const struct wacom_features wacom_features_0x343 =
4197 WACOM_DTU_OFFSET, WACOM_DTU_OFFSET }; 4199 WACOM_DTU_OFFSET, WACOM_DTU_OFFSET };
4198static const struct wacom_features wacom_features_0x360 = 4200static const struct wacom_features wacom_features_0x360 =
4199 { "Wacom Intuos Pro M", 44800, 29600, 8191, 63, 4201 { "Wacom Intuos Pro M", 44800, 29600, 8191, 63,
4200 INTUOSP2_BT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 9, .touch_max = 10 }; 4202 INTUOSP2_BT, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 9, .touch_max = 10 };
4201static const struct wacom_features wacom_features_0x361 = 4203static const struct wacom_features wacom_features_0x361 =
4202 { "Wacom Intuos Pro L", 62200, 43200, 8191, 63, 4204 { "Wacom Intuos Pro L", 62200, 43200, 8191, 63,
4203 INTUOSP2_BT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 9, .touch_max = 10 }; 4205 INTUOSP2_BT, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 9, .touch_max = 10 };
4204 4206
4205static const struct wacom_features wacom_features_HID_ANY_ID = 4207static const struct wacom_features wacom_features_HID_ANY_ID =
4206 { "Wacom HID", .type = HID_GENERIC, .oVid = HID_ANY_ID, .oPid = HID_ANY_ID }; 4208 { "Wacom HID", .type = HID_GENERIC, .oVid = HID_ANY_ID, .oPid = HID_ANY_ID };
diff --git a/drivers/ptp/ptp_kvm.c b/drivers/ptp/ptp_kvm.c
index 09b4df74291e..bb865695d7a6 100644
--- a/drivers/ptp/ptp_kvm.c
+++ b/drivers/ptp/ptp_kvm.c
@@ -193,10 +193,7 @@ static int __init ptp_kvm_init(void)
193 193
194 kvm_ptp_clock.ptp_clock = ptp_clock_register(&kvm_ptp_clock.caps, NULL); 194 kvm_ptp_clock.ptp_clock = ptp_clock_register(&kvm_ptp_clock.caps, NULL);
195 195
196 if (IS_ERR(kvm_ptp_clock.ptp_clock)) 196 return PTR_ERR_OR_ZERO(kvm_ptp_clock.ptp_clock);
197 return PTR_ERR(kvm_ptp_clock.ptp_clock);
198
199 return 0;
200} 197}
201 198
202module_init(ptp_kvm_init); 199module_init(ptp_kvm_init);
diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
index 65f86bc24c07..1dc43fc5f65f 100644
--- a/drivers/remoteproc/Kconfig
+++ b/drivers/remoteproc/Kconfig
@@ -76,7 +76,7 @@ config QCOM_ADSP_PIL
76 depends on OF && ARCH_QCOM 76 depends on OF && ARCH_QCOM
77 depends on REMOTEPROC 77 depends on REMOTEPROC
78 depends on QCOM_SMEM 78 depends on QCOM_SMEM
79 depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n) 79 depends on RPMSG_QCOM_SMD || QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n && RPMSG_QCOM_SMD=n)
80 select MFD_SYSCON 80 select MFD_SYSCON
81 select QCOM_MDT_LOADER 81 select QCOM_MDT_LOADER
82 select QCOM_RPROC_COMMON 82 select QCOM_RPROC_COMMON
@@ -93,7 +93,7 @@ config QCOM_Q6V5_PIL
93 depends on OF && ARCH_QCOM 93 depends on OF && ARCH_QCOM
94 depends on QCOM_SMEM 94 depends on QCOM_SMEM
95 depends on REMOTEPROC 95 depends on REMOTEPROC
96 depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n) 96 depends on RPMSG_QCOM_SMD || QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n && RPMSG_QCOM_SMD=n)
97 select MFD_SYSCON 97 select MFD_SYSCON
98 select QCOM_RPROC_COMMON 98 select QCOM_RPROC_COMMON
99 select QCOM_SCM 99 select QCOM_SCM
@@ -104,7 +104,7 @@ config QCOM_Q6V5_PIL
104config QCOM_WCNSS_PIL 104config QCOM_WCNSS_PIL
105 tristate "Qualcomm WCNSS Peripheral Image Loader" 105 tristate "Qualcomm WCNSS Peripheral Image Loader"
106 depends on OF && ARCH_QCOM 106 depends on OF && ARCH_QCOM
107 depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n) 107 depends on RPMSG_QCOM_SMD || QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n && RPMSG_QCOM_SMD=n)
108 depends on QCOM_SMEM 108 depends on QCOM_SMEM
109 depends on REMOTEPROC 109 depends on REMOTEPROC
110 select QCOM_MDT_LOADER 110 select QCOM_MDT_LOADER
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index 519ec1787117..efd84d1d178b 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -40,7 +40,8 @@ struct read_info_sccb {
40 u8 fac85; /* 85 */ 40 u8 fac85; /* 85 */
41 u8 _pad_86[91 - 86]; /* 86-90 */ 41 u8 _pad_86[91 - 86]; /* 86-90 */
42 u8 flags; /* 91 */ 42 u8 flags; /* 91 */
43 u8 _pad_92[99 - 92]; /* 92-98 */ 43 u8 _pad_92[98 - 92]; /* 92-97 */
44 u8 fac98; /* 98 */
44 u8 hamaxpow; /* 99 */ 45 u8 hamaxpow; /* 99 */
45 u32 rnsize2; /* 100-103 */ 46 u32 rnsize2; /* 100-103 */
46 u64 rnmax2; /* 104-111 */ 47 u64 rnmax2; /* 104-111 */
@@ -99,6 +100,7 @@ static void __init sclp_early_facilities_detect(struct read_info_sccb *sccb)
99 sclp.has_pfmfi = !!(sccb->fac117 & 0x40); 100 sclp.has_pfmfi = !!(sccb->fac117 & 0x40);
100 sclp.has_ibs = !!(sccb->fac117 & 0x20); 101 sclp.has_ibs = !!(sccb->fac117 & 0x20);
101 sclp.has_hvs = !!(sccb->fac119 & 0x80); 102 sclp.has_hvs = !!(sccb->fac119 & 0x80);
103 sclp.has_kss = !!(sccb->fac98 & 0x01);
102 if (sccb->fac85 & 0x02) 104 if (sccb->fac85 & 0x02)
103 S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP; 105 S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP;
104 sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2; 106 sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2;
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 4bf55b5d78be..3c52867dfe28 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1253,20 +1253,6 @@ config SCSI_LPFC_DEBUG_FS
1253 This makes debugging information from the lpfc driver 1253 This makes debugging information from the lpfc driver
1254 available via the debugfs filesystem. 1254 available via the debugfs filesystem.
1255 1255
1256config LPFC_NVME_INITIATOR
1257 bool "Emulex LightPulse Fibre Channel NVME Initiator Support"
1258 depends on SCSI_LPFC && NVME_FC
1259 ---help---
1260 This enables NVME Initiator support in the Emulex lpfc driver.
1261
1262config LPFC_NVME_TARGET
1263 bool "Emulex LightPulse Fibre Channel NVME Initiator Support"
1264 depends on SCSI_LPFC && NVME_TARGET_FC
1265 ---help---
1266 This enables NVME Target support in the Emulex lpfc driver.
1267 Target enablement must still be enabled on a per adapter
1268 basis by module parameters.
1269
1270config SCSI_SIM710 1256config SCSI_SIM710
1271 tristate "Simple 53c710 SCSI support (Compaq, NCR machines)" 1257 tristate "Simple 53c710 SCSI support (Compaq, NCR machines)"
1272 depends on (EISA || MCA) && SCSI 1258 depends on (EISA || MCA) && SCSI
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 524a0c755ed7..0d0be7754a65 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -2956,7 +2956,7 @@ static int hpsa_send_reset(struct ctlr_info *h, unsigned char *scsi3addr,
2956 /* fill_cmd can't fail here, no data buffer to map. */ 2956 /* fill_cmd can't fail here, no data buffer to map. */
2957 (void) fill_cmd(c, reset_type, h, NULL, 0, 0, 2957 (void) fill_cmd(c, reset_type, h, NULL, 0, 0,
2958 scsi3addr, TYPE_MSG); 2958 scsi3addr, TYPE_MSG);
2959 rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, DEFAULT_TIMEOUT); 2959 rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, NO_TIMEOUT);
2960 if (rc) { 2960 if (rc) {
2961 dev_warn(&h->pdev->dev, "Failed to send reset command\n"); 2961 dev_warn(&h->pdev->dev, "Failed to send reset command\n");
2962 goto out; 2962 goto out;
@@ -3714,7 +3714,7 @@ exit_failed:
3714 * # (integer code indicating one of several NOT READY states 3714 * # (integer code indicating one of several NOT READY states
3715 * describing why a volume is to be kept offline) 3715 * describing why a volume is to be kept offline)
3716 */ 3716 */
3717static int hpsa_volume_offline(struct ctlr_info *h, 3717static unsigned char hpsa_volume_offline(struct ctlr_info *h,
3718 unsigned char scsi3addr[]) 3718 unsigned char scsi3addr[])
3719{ 3719{
3720 struct CommandList *c; 3720 struct CommandList *c;
@@ -3735,7 +3735,7 @@ static int hpsa_volume_offline(struct ctlr_info *h,
3735 DEFAULT_TIMEOUT); 3735 DEFAULT_TIMEOUT);
3736 if (rc) { 3736 if (rc) {
3737 cmd_free(h, c); 3737 cmd_free(h, c);
3738 return 0; 3738 return HPSA_VPD_LV_STATUS_UNSUPPORTED;
3739 } 3739 }
3740 sense = c->err_info->SenseInfo; 3740 sense = c->err_info->SenseInfo;
3741 if (c->err_info->SenseLen > sizeof(c->err_info->SenseInfo)) 3741 if (c->err_info->SenseLen > sizeof(c->err_info->SenseInfo))
@@ -3746,19 +3746,13 @@ static int hpsa_volume_offline(struct ctlr_info *h,
3746 cmd_status = c->err_info->CommandStatus; 3746 cmd_status = c->err_info->CommandStatus;
3747 scsi_status = c->err_info->ScsiStatus; 3747 scsi_status = c->err_info->ScsiStatus;
3748 cmd_free(h, c); 3748 cmd_free(h, c);
3749 /* Is the volume 'not ready'? */
3750 if (cmd_status != CMD_TARGET_STATUS ||
3751 scsi_status != SAM_STAT_CHECK_CONDITION ||
3752 sense_key != NOT_READY ||
3753 asc != ASC_LUN_NOT_READY) {
3754 return 0;
3755 }
3756 3749
3757 /* Determine the reason for not ready state */ 3750 /* Determine the reason for not ready state */
3758 ldstat = hpsa_get_volume_status(h, scsi3addr); 3751 ldstat = hpsa_get_volume_status(h, scsi3addr);
3759 3752
3760 /* Keep volume offline in certain cases: */ 3753 /* Keep volume offline in certain cases: */
3761 switch (ldstat) { 3754 switch (ldstat) {
3755 case HPSA_LV_FAILED:
3762 case HPSA_LV_UNDERGOING_ERASE: 3756 case HPSA_LV_UNDERGOING_ERASE:
3763 case HPSA_LV_NOT_AVAILABLE: 3757 case HPSA_LV_NOT_AVAILABLE:
3764 case HPSA_LV_UNDERGOING_RPI: 3758 case HPSA_LV_UNDERGOING_RPI:
@@ -3780,7 +3774,7 @@ static int hpsa_volume_offline(struct ctlr_info *h,
3780 default: 3774 default:
3781 break; 3775 break;
3782 } 3776 }
3783 return 0; 3777 return HPSA_LV_OK;
3784} 3778}
3785 3779
3786/* 3780/*
@@ -3853,10 +3847,10 @@ static int hpsa_update_device_info(struct ctlr_info *h,
3853 /* Do an inquiry to the device to see what it is. */ 3847 /* Do an inquiry to the device to see what it is. */
3854 if (hpsa_scsi_do_inquiry(h, scsi3addr, 0, inq_buff, 3848 if (hpsa_scsi_do_inquiry(h, scsi3addr, 0, inq_buff,
3855 (unsigned char) OBDR_TAPE_INQ_SIZE) != 0) { 3849 (unsigned char) OBDR_TAPE_INQ_SIZE) != 0) {
3856 /* Inquiry failed (msg printed already) */
3857 dev_err(&h->pdev->dev, 3850 dev_err(&h->pdev->dev,
3858 "hpsa_update_device_info: inquiry failed\n"); 3851 "%s: inquiry failed, device will be skipped.\n",
3859 rc = -EIO; 3852 __func__);
3853 rc = HPSA_INQUIRY_FAILED;
3860 goto bail_out; 3854 goto bail_out;
3861 } 3855 }
3862 3856
@@ -3885,15 +3879,19 @@ static int hpsa_update_device_info(struct ctlr_info *h,
3885 if ((this_device->devtype == TYPE_DISK || 3879 if ((this_device->devtype == TYPE_DISK ||
3886 this_device->devtype == TYPE_ZBC) && 3880 this_device->devtype == TYPE_ZBC) &&
3887 is_logical_dev_addr_mode(scsi3addr)) { 3881 is_logical_dev_addr_mode(scsi3addr)) {
3888 int volume_offline; 3882 unsigned char volume_offline;
3889 3883
3890 hpsa_get_raid_level(h, scsi3addr, &this_device->raid_level); 3884 hpsa_get_raid_level(h, scsi3addr, &this_device->raid_level);
3891 if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC) 3885 if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC)
3892 hpsa_get_ioaccel_status(h, scsi3addr, this_device); 3886 hpsa_get_ioaccel_status(h, scsi3addr, this_device);
3893 volume_offline = hpsa_volume_offline(h, scsi3addr); 3887 volume_offline = hpsa_volume_offline(h, scsi3addr);
3894 if (volume_offline < 0 || volume_offline > 0xff) 3888 if (volume_offline == HPSA_LV_FAILED) {
3895 volume_offline = HPSA_VPD_LV_STATUS_UNSUPPORTED; 3889 rc = HPSA_LV_FAILED;
3896 this_device->volume_offline = volume_offline & 0xff; 3890 dev_err(&h->pdev->dev,
3891 "%s: LV failed, device will be skipped.\n",
3892 __func__);
3893 goto bail_out;
3894 }
3897 } else { 3895 } else {
3898 this_device->raid_level = RAID_UNKNOWN; 3896 this_device->raid_level = RAID_UNKNOWN;
3899 this_device->offload_config = 0; 3897 this_device->offload_config = 0;
@@ -4379,8 +4377,7 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h)
4379 goto out; 4377 goto out;
4380 } 4378 }
4381 if (rc) { 4379 if (rc) {
4382 dev_warn(&h->pdev->dev, 4380 h->drv_req_rescan = 1;
4383 "Inquiry failed, skipping device.\n");
4384 continue; 4381 continue;
4385 } 4382 }
4386 4383
@@ -5558,7 +5555,7 @@ static void hpsa_scan_complete(struct ctlr_info *h)
5558 5555
5559 spin_lock_irqsave(&h->scan_lock, flags); 5556 spin_lock_irqsave(&h->scan_lock, flags);
5560 h->scan_finished = 1; 5557 h->scan_finished = 1;
5561 wake_up_all(&h->scan_wait_queue); 5558 wake_up(&h->scan_wait_queue);
5562 spin_unlock_irqrestore(&h->scan_lock, flags); 5559 spin_unlock_irqrestore(&h->scan_lock, flags);
5563} 5560}
5564 5561
@@ -5576,11 +5573,23 @@ static void hpsa_scan_start(struct Scsi_Host *sh)
5576 if (unlikely(lockup_detected(h))) 5573 if (unlikely(lockup_detected(h)))
5577 return hpsa_scan_complete(h); 5574 return hpsa_scan_complete(h);
5578 5575
5576 /*
5577 * If a scan is already waiting to run, no need to add another
5578 */
5579 spin_lock_irqsave(&h->scan_lock, flags);
5580 if (h->scan_waiting) {
5581 spin_unlock_irqrestore(&h->scan_lock, flags);
5582 return;
5583 }
5584
5585 spin_unlock_irqrestore(&h->scan_lock, flags);
5586
5579 /* wait until any scan already in progress is finished. */ 5587 /* wait until any scan already in progress is finished. */
5580 while (1) { 5588 while (1) {
5581 spin_lock_irqsave(&h->scan_lock, flags); 5589 spin_lock_irqsave(&h->scan_lock, flags);
5582 if (h->scan_finished) 5590 if (h->scan_finished)
5583 break; 5591 break;
5592 h->scan_waiting = 1;
5584 spin_unlock_irqrestore(&h->scan_lock, flags); 5593 spin_unlock_irqrestore(&h->scan_lock, flags);
5585 wait_event(h->scan_wait_queue, h->scan_finished); 5594 wait_event(h->scan_wait_queue, h->scan_finished);
5586 /* Note: We don't need to worry about a race between this 5595 /* Note: We don't need to worry about a race between this
@@ -5590,6 +5599,7 @@ static void hpsa_scan_start(struct Scsi_Host *sh)
5590 */ 5599 */
5591 } 5600 }
5592 h->scan_finished = 0; /* mark scan as in progress */ 5601 h->scan_finished = 0; /* mark scan as in progress */
5602 h->scan_waiting = 0;
5593 spin_unlock_irqrestore(&h->scan_lock, flags); 5603 spin_unlock_irqrestore(&h->scan_lock, flags);
5594 5604
5595 if (unlikely(lockup_detected(h))) 5605 if (unlikely(lockup_detected(h)))
@@ -8792,6 +8802,7 @@ reinit_after_soft_reset:
8792 init_waitqueue_head(&h->event_sync_wait_queue); 8802 init_waitqueue_head(&h->event_sync_wait_queue);
8793 mutex_init(&h->reset_mutex); 8803 mutex_init(&h->reset_mutex);
8794 h->scan_finished = 1; /* no scan currently in progress */ 8804 h->scan_finished = 1; /* no scan currently in progress */
8805 h->scan_waiting = 0;
8795 8806
8796 pci_set_drvdata(pdev, h); 8807 pci_set_drvdata(pdev, h);
8797 h->ndevices = 0; 8808 h->ndevices = 0;
diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h
index bf6cdc106654..6f04f2ad4125 100644
--- a/drivers/scsi/hpsa.h
+++ b/drivers/scsi/hpsa.h
@@ -201,6 +201,7 @@ struct ctlr_info {
201 dma_addr_t errinfo_pool_dhandle; 201 dma_addr_t errinfo_pool_dhandle;
202 unsigned long *cmd_pool_bits; 202 unsigned long *cmd_pool_bits;
203 int scan_finished; 203 int scan_finished;
204 u8 scan_waiting : 1;
204 spinlock_t scan_lock; 205 spinlock_t scan_lock;
205 wait_queue_head_t scan_wait_queue; 206 wait_queue_head_t scan_wait_queue;
206 207
diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h
index a584cdf07058..5961705eef76 100644
--- a/drivers/scsi/hpsa_cmd.h
+++ b/drivers/scsi/hpsa_cmd.h
@@ -156,6 +156,7 @@
156#define CFGTBL_BusType_Fibre2G 0x00000200l 156#define CFGTBL_BusType_Fibre2G 0x00000200l
157 157
158/* VPD Inquiry types */ 158/* VPD Inquiry types */
159#define HPSA_INQUIRY_FAILED 0x02
159#define HPSA_VPD_SUPPORTED_PAGES 0x00 160#define HPSA_VPD_SUPPORTED_PAGES 0x00
160#define HPSA_VPD_LV_DEVICE_ID 0x83 161#define HPSA_VPD_LV_DEVICE_ID 0x83
161#define HPSA_VPD_LV_DEVICE_GEOMETRY 0xC1 162#define HPSA_VPD_LV_DEVICE_GEOMETRY 0xC1
@@ -166,6 +167,7 @@
166/* Logical volume states */ 167/* Logical volume states */
167#define HPSA_VPD_LV_STATUS_UNSUPPORTED 0xff 168#define HPSA_VPD_LV_STATUS_UNSUPPORTED 0xff
168#define HPSA_LV_OK 0x0 169#define HPSA_LV_OK 0x0
170#define HPSA_LV_FAILED 0x01
169#define HPSA_LV_NOT_AVAILABLE 0x0b 171#define HPSA_LV_NOT_AVAILABLE 0x0b
170#define HPSA_LV_UNDERGOING_ERASE 0x0F 172#define HPSA_LV_UNDERGOING_ERASE 0x0F
171#define HPSA_LV_UNDERGOING_RPI 0x12 173#define HPSA_LV_UNDERGOING_RPI 0x12
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 5c3be3e6f5e2..22819afbaef5 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -3315,9 +3315,9 @@ LPFC_ATTR_R(nvmet_mrq_post, LPFC_DEF_MRQ_POST,
3315 * lpfc_enable_fc4_type: Defines what FC4 types are supported. 3315 * lpfc_enable_fc4_type: Defines what FC4 types are supported.
3316 * Supported Values: 1 - register just FCP 3316 * Supported Values: 1 - register just FCP
3317 * 3 - register both FCP and NVME 3317 * 3 - register both FCP and NVME
3318 * Supported values are [1,3]. Default value is 3 3318 * Supported values are [1,3]. Default value is 1
3319 */ 3319 */
3320LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_BOTH, 3320LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_FCP,
3321 LPFC_ENABLE_FCP, LPFC_ENABLE_BOTH, 3321 LPFC_ENABLE_FCP, LPFC_ENABLE_BOTH,
3322 "Define fc4 type to register with fabric."); 3322 "Define fc4 type to register with fabric.");
3323 3323
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 2697d49da4d7..6cc561b04211 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -5891,10 +5891,17 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
5891 /* Check to see if it matches any module parameter */ 5891 /* Check to see if it matches any module parameter */
5892 for (i = 0; i < lpfc_enable_nvmet_cnt; i++) { 5892 for (i = 0; i < lpfc_enable_nvmet_cnt; i++) {
5893 if (wwn == lpfc_enable_nvmet[i]) { 5893 if (wwn == lpfc_enable_nvmet[i]) {
5894#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
5894 lpfc_printf_log(phba, KERN_ERR, LOG_INIT, 5895 lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
5895 "6017 NVME Target %016llx\n", 5896 "6017 NVME Target %016llx\n",
5896 wwn); 5897 wwn);
5897 phba->nvmet_support = 1; /* a match */ 5898 phba->nvmet_support = 1; /* a match */
5899#else
5900 lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
5901 "6021 Can't enable NVME Target."
5902 " NVME_TARGET_FC infrastructure"
5903 " is not in kernel\n");
5904#endif
5898 } 5905 }
5899 } 5906 }
5900 } 5907 }
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 0a4c19081409..0024de1c6c1f 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -2149,7 +2149,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
2149 /* localport is allocated from the stack, but the registration 2149 /* localport is allocated from the stack, but the registration
2150 * call allocates heap memory as well as the private area. 2150 * call allocates heap memory as well as the private area.
2151 */ 2151 */
2152#ifdef CONFIG_LPFC_NVME_INITIATOR 2152#if (IS_ENABLED(CONFIG_NVME_FC))
2153 ret = nvme_fc_register_localport(&nfcp_info, &lpfc_nvme_template, 2153 ret = nvme_fc_register_localport(&nfcp_info, &lpfc_nvme_template,
2154 &vport->phba->pcidev->dev, &localport); 2154 &vport->phba->pcidev->dev, &localport);
2155#else 2155#else
@@ -2190,7 +2190,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
2190void 2190void
2191lpfc_nvme_destroy_localport(struct lpfc_vport *vport) 2191lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
2192{ 2192{
2193#ifdef CONFIG_LPFC_NVME_INITIATOR 2193#if (IS_ENABLED(CONFIG_NVME_FC))
2194 struct nvme_fc_local_port *localport; 2194 struct nvme_fc_local_port *localport;
2195 struct lpfc_nvme_lport *lport; 2195 struct lpfc_nvme_lport *lport;
2196 struct lpfc_nvme_rport *rport = NULL, *rport_next = NULL; 2196 struct lpfc_nvme_rport *rport = NULL, *rport_next = NULL;
@@ -2274,7 +2274,7 @@ lpfc_nvme_update_localport(struct lpfc_vport *vport)
2274int 2274int
2275lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) 2275lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
2276{ 2276{
2277#ifdef CONFIG_LPFC_NVME_INITIATOR 2277#if (IS_ENABLED(CONFIG_NVME_FC))
2278 int ret = 0; 2278 int ret = 0;
2279 struct nvme_fc_local_port *localport; 2279 struct nvme_fc_local_port *localport;
2280 struct lpfc_nvme_lport *lport; 2280 struct lpfc_nvme_lport *lport;
@@ -2403,7 +2403,7 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
2403void 2403void
2404lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) 2404lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
2405{ 2405{
2406#ifdef CONFIG_LPFC_NVME_INITIATOR 2406#if (IS_ENABLED(CONFIG_NVME_FC))
2407 int ret; 2407 int ret;
2408 struct nvme_fc_local_port *localport; 2408 struct nvme_fc_local_port *localport;
2409 struct lpfc_nvme_lport *lport; 2409 struct lpfc_nvme_lport *lport;
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index b7739a554fe0..7ca868f394da 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -671,7 +671,7 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
671 lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP | 671 lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP |
672 NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED; 672 NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED;
673 673
674#ifdef CONFIG_LPFC_NVME_TARGET 674#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
675 error = nvmet_fc_register_targetport(&pinfo, &lpfc_tgttemplate, 675 error = nvmet_fc_register_targetport(&pinfo, &lpfc_tgttemplate,
676 &phba->pcidev->dev, 676 &phba->pcidev->dev,
677 &phba->targetport); 677 &phba->targetport);
@@ -756,7 +756,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
756void 756void
757lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba) 757lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
758{ 758{
759#ifdef CONFIG_LPFC_NVME_TARGET 759#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
760 struct lpfc_nvmet_tgtport *tgtp; 760 struct lpfc_nvmet_tgtport *tgtp;
761 761
762 if (phba->nvmet_support == 0) 762 if (phba->nvmet_support == 0)
@@ -788,7 +788,7 @@ static void
788lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, 788lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
789 struct hbq_dmabuf *nvmebuf) 789 struct hbq_dmabuf *nvmebuf)
790{ 790{
791#ifdef CONFIG_LPFC_NVME_TARGET 791#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
792 struct lpfc_nvmet_tgtport *tgtp; 792 struct lpfc_nvmet_tgtport *tgtp;
793 struct fc_frame_header *fc_hdr; 793 struct fc_frame_header *fc_hdr;
794 struct lpfc_nvmet_rcv_ctx *ctxp; 794 struct lpfc_nvmet_rcv_ctx *ctxp;
@@ -891,7 +891,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
891 struct rqb_dmabuf *nvmebuf, 891 struct rqb_dmabuf *nvmebuf,
892 uint64_t isr_timestamp) 892 uint64_t isr_timestamp)
893{ 893{
894#ifdef CONFIG_LPFC_NVME_TARGET 894#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
895 struct lpfc_nvmet_rcv_ctx *ctxp; 895 struct lpfc_nvmet_rcv_ctx *ctxp;
896 struct lpfc_nvmet_tgtport *tgtp; 896 struct lpfc_nvmet_tgtport *tgtp;
897 struct fc_frame_header *fc_hdr; 897 struct fc_frame_header *fc_hdr;
diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h
index e7e5974e1a2c..2b209bbb4c91 100644
--- a/drivers/scsi/megaraid/megaraid_sas.h
+++ b/drivers/scsi/megaraid/megaraid_sas.h
@@ -35,8 +35,8 @@
35/* 35/*
36 * MegaRAID SAS Driver meta data 36 * MegaRAID SAS Driver meta data
37 */ 37 */
38#define MEGASAS_VERSION "07.701.16.00-rc1" 38#define MEGASAS_VERSION "07.701.17.00-rc1"
39#define MEGASAS_RELDATE "February 2, 2017" 39#define MEGASAS_RELDATE "March 2, 2017"
40 40
41/* 41/*
42 * Device IDs 42 * Device IDs
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 7ac9a9ee9bd4..0016f12cc563 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -1963,6 +1963,9 @@ scan_target:
1963 if (!mr_device_priv_data) 1963 if (!mr_device_priv_data)
1964 return -ENOMEM; 1964 return -ENOMEM;
1965 sdev->hostdata = mr_device_priv_data; 1965 sdev->hostdata = mr_device_priv_data;
1966
1967 atomic_set(&mr_device_priv_data->r1_ldio_hint,
1968 instance->r1_ldio_hint_default);
1966 return 0; 1969 return 0;
1967} 1970}
1968 1971
@@ -5034,10 +5037,12 @@ megasas_setup_irqs_msix(struct megasas_instance *instance, u8 is_probe)
5034 &instance->irq_context[j]); 5037 &instance->irq_context[j]);
5035 /* Retry irq register for IO_APIC*/ 5038 /* Retry irq register for IO_APIC*/
5036 instance->msix_vectors = 0; 5039 instance->msix_vectors = 0;
5037 if (is_probe) 5040 if (is_probe) {
5041 pci_free_irq_vectors(instance->pdev);
5038 return megasas_setup_irqs_ioapic(instance); 5042 return megasas_setup_irqs_ioapic(instance);
5039 else 5043 } else {
5040 return -1; 5044 return -1;
5045 }
5041 } 5046 }
5042 } 5047 }
5043 return 0; 5048 return 0;
@@ -5277,9 +5282,11 @@ static int megasas_init_fw(struct megasas_instance *instance)
5277 MPI2_REPLY_POST_HOST_INDEX_OFFSET); 5282 MPI2_REPLY_POST_HOST_INDEX_OFFSET);
5278 } 5283 }
5279 5284
5280 i = pci_alloc_irq_vectors(instance->pdev, 1, 1, PCI_IRQ_LEGACY); 5285 if (!instance->msix_vectors) {
5281 if (i < 0) 5286 i = pci_alloc_irq_vectors(instance->pdev, 1, 1, PCI_IRQ_LEGACY);
5282 goto fail_setup_irqs; 5287 if (i < 0)
5288 goto fail_setup_irqs;
5289 }
5283 5290
5284 dev_info(&instance->pdev->dev, 5291 dev_info(&instance->pdev->dev,
5285 "firmware supports msix\t: (%d)", fw_msix_count); 5292 "firmware supports msix\t: (%d)", fw_msix_count);
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index 29650ba669da..f990ab4d45e1 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -2159,7 +2159,7 @@ megasas_set_raidflag_cpu_affinity(union RAID_CONTEXT_UNION *praid_context,
2159 cpu_sel = MR_RAID_CTX_CPUSEL_1; 2159 cpu_sel = MR_RAID_CTX_CPUSEL_1;
2160 2160
2161 if (is_stream_detected(rctx_g35) && 2161 if (is_stream_detected(rctx_g35) &&
2162 (raid->level == 5) && 2162 ((raid->level == 5) || (raid->level == 6)) &&
2163 (raid->writeMode == MR_RL_WRITE_THROUGH_MODE) && 2163 (raid->writeMode == MR_RL_WRITE_THROUGH_MODE) &&
2164 (cpu_sel == MR_RAID_CTX_CPUSEL_FCFS)) 2164 (cpu_sel == MR_RAID_CTX_CPUSEL_FCFS))
2165 cpu_sel = MR_RAID_CTX_CPUSEL_0; 2165 cpu_sel = MR_RAID_CTX_CPUSEL_0;
@@ -2338,7 +2338,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance,
2338 fp_possible = false; 2338 fp_possible = false;
2339 atomic_dec(&instance->fw_outstanding); 2339 atomic_dec(&instance->fw_outstanding);
2340 } else if ((scsi_buff_len > MR_LARGE_IO_MIN_SIZE) || 2340 } else if ((scsi_buff_len > MR_LARGE_IO_MIN_SIZE) ||
2341 atomic_dec_if_positive(&mrdev_priv->r1_ldio_hint)) { 2341 (atomic_dec_if_positive(&mrdev_priv->r1_ldio_hint) > 0)) {
2342 fp_possible = false; 2342 fp_possible = false;
2343 atomic_dec(&instance->fw_outstanding); 2343 atomic_dec(&instance->fw_outstanding);
2344 if (scsi_buff_len > MR_LARGE_IO_MIN_SIZE) 2344 if (scsi_buff_len > MR_LARGE_IO_MIN_SIZE)
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 1359913bf840..e8c26e6e6237 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -7642,7 +7642,7 @@ static inline ssize_t ufshcd_pm_lvl_store(struct device *dev,
7642 if (kstrtoul(buf, 0, &value)) 7642 if (kstrtoul(buf, 0, &value))
7643 return -EINVAL; 7643 return -EINVAL;
7644 7644
7645 if ((value < UFS_PM_LVL_0) || (value >= UFS_PM_LVL_MAX)) 7645 if (value >= UFS_PM_LVL_MAX)
7646 return -EINVAL; 7646 return -EINVAL;
7647 7647
7648 spin_lock_irqsave(hba->host->host_lock, flags); 7648 spin_lock_irqsave(hba->host->host_lock, flags);
diff --git a/drivers/tty/serial/st-asc.c b/drivers/tty/serial/st-asc.c
index bcf1d33e6ffe..c334bcc59c64 100644
--- a/drivers/tty/serial/st-asc.c
+++ b/drivers/tty/serial/st-asc.c
@@ -575,12 +575,13 @@ static void asc_set_termios(struct uart_port *port, struct ktermios *termios,
575 pinctrl_select_state(ascport->pinctrl, 575 pinctrl_select_state(ascport->pinctrl,
576 ascport->states[NO_HW_FLOWCTRL]); 576 ascport->states[NO_HW_FLOWCTRL]);
577 577
578 gpiod = devm_get_gpiod_from_child(port->dev, "rts", 578 gpiod = devm_fwnode_get_gpiod_from_child(port->dev,
579 &np->fwnode); 579 "rts",
580 if (!IS_ERR(gpiod)) { 580 &np->fwnode,
581 gpiod_direction_output(gpiod, 0); 581 GPIOD_OUT_LOW,
582 np->name);
583 if (!IS_ERR(gpiod))
582 ascport->rts = gpiod; 584 ascport->rts = gpiod;
583 }
584 } 585 }
585 } 586 }
586 587
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index cf3de91fbfe7..8031d3a55a17 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -680,7 +680,7 @@ static void tce_iommu_free_table(struct tce_container *container,
680 unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT; 680 unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
681 681
682 tce_iommu_userspace_view_free(tbl, container->mm); 682 tce_iommu_userspace_view_free(tbl, container->mm);
683 tbl->it_ops->free(tbl); 683 iommu_tce_table_put(tbl);
684 decrement_locked_vm(container->mm, pages); 684 decrement_locked_vm(container->mm, pages);
685} 685}
686 686
diff --git a/fs/exec.c b/fs/exec.c
index 65145a3df065..72934df68471 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1320,6 +1320,7 @@ void setup_new_exec(struct linux_binprm * bprm)
1320 else 1320 else
1321 set_dumpable(current->mm, suid_dumpable); 1321 set_dumpable(current->mm, suid_dumpable);
1322 1322
1323 arch_setup_new_exec();
1323 perf_event_exec(); 1324 perf_event_exec();
1324 __set_task_comm(current, kbasename(bprm->filename), true); 1325 __set_task_comm(current, kbasename(bprm->filename), true);
1325 1326
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index a77df377e2e8..ee2d0a485fc3 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -196,6 +196,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
196 si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS); 196 si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
197 si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE; 197 si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
198 si->base_mem += NM_I(sbi)->nat_blocks / 8; 198 si->base_mem += NM_I(sbi)->nat_blocks / 8;
199 si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short);
199 200
200get_cache: 201get_cache:
201 si->cache_mem = 0; 202 si->cache_mem = 0;
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 4650c9b85de7..8d5c62b07b28 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -750,7 +750,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
750 dentry_blk = page_address(page); 750 dentry_blk = page_address(page);
751 bit_pos = dentry - dentry_blk->dentry; 751 bit_pos = dentry - dentry_blk->dentry;
752 for (i = 0; i < slots; i++) 752 for (i = 0; i < slots; i++)
753 clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); 753 __clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
754 754
755 /* Let's check and deallocate this dentry page */ 755 /* Let's check and deallocate this dentry page */
756 bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, 756 bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e849f83d6114..0a6e115562f6 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -561,6 +561,8 @@ struct f2fs_nm_info {
561 struct mutex build_lock; /* lock for build free nids */ 561 struct mutex build_lock; /* lock for build free nids */
562 unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE]; 562 unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE];
563 unsigned char *nat_block_bitmap; 563 unsigned char *nat_block_bitmap;
564 unsigned short *free_nid_count; /* free nid count of NAT block */
565 spinlock_t free_nid_lock; /* protect updating of nid count */
564 566
565 /* for checkpoint */ 567 /* for checkpoint */
566 char *nat_bitmap; /* NAT bitmap pointer */ 568 char *nat_bitmap; /* NAT bitmap pointer */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 94967171dee8..481aa8dc79f4 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -338,9 +338,6 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
338 set_nat_flag(e, IS_CHECKPOINTED, false); 338 set_nat_flag(e, IS_CHECKPOINTED, false);
339 __set_nat_cache_dirty(nm_i, e); 339 __set_nat_cache_dirty(nm_i, e);
340 340
341 if (enabled_nat_bits(sbi, NULL) && new_blkaddr == NEW_ADDR)
342 clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits);
343
344 /* update fsync_mark if its inode nat entry is still alive */ 341 /* update fsync_mark if its inode nat entry is still alive */
345 if (ni->nid != ni->ino) 342 if (ni->nid != ni->ino)
346 e = __lookup_nat_cache(nm_i, ni->ino); 343 e = __lookup_nat_cache(nm_i, ni->ino);
@@ -1823,7 +1820,8 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
1823 kmem_cache_free(free_nid_slab, i); 1820 kmem_cache_free(free_nid_slab, i);
1824} 1821}
1825 1822
1826void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set) 1823static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
1824 bool set, bool build, bool locked)
1827{ 1825{
1828 struct f2fs_nm_info *nm_i = NM_I(sbi); 1826 struct f2fs_nm_info *nm_i = NM_I(sbi);
1829 unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid); 1827 unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
@@ -1833,9 +1831,18 @@ void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set)
1833 return; 1831 return;
1834 1832
1835 if (set) 1833 if (set)
1836 set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); 1834 __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
1837 else 1835 else
1838 clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); 1836 __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
1837
1838 if (!locked)
1839 spin_lock(&nm_i->free_nid_lock);
1840 if (set)
1841 nm_i->free_nid_count[nat_ofs]++;
1842 else if (!build)
1843 nm_i->free_nid_count[nat_ofs]--;
1844 if (!locked)
1845 spin_unlock(&nm_i->free_nid_lock);
1839} 1846}
1840 1847
1841static void scan_nat_page(struct f2fs_sb_info *sbi, 1848static void scan_nat_page(struct f2fs_sb_info *sbi,
@@ -1847,7 +1854,10 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
1847 unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid); 1854 unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
1848 int i; 1855 int i;
1849 1856
1850 set_bit_le(nat_ofs, nm_i->nat_block_bitmap); 1857 if (test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
1858 return;
1859
1860 __set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
1851 1861
1852 i = start_nid % NAT_ENTRY_PER_BLOCK; 1862 i = start_nid % NAT_ENTRY_PER_BLOCK;
1853 1863
@@ -1861,7 +1871,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
1861 f2fs_bug_on(sbi, blk_addr == NEW_ADDR); 1871 f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
1862 if (blk_addr == NULL_ADDR) 1872 if (blk_addr == NULL_ADDR)
1863 freed = add_free_nid(sbi, start_nid, true); 1873 freed = add_free_nid(sbi, start_nid, true);
1864 update_free_nid_bitmap(sbi, start_nid, freed); 1874 update_free_nid_bitmap(sbi, start_nid, freed, true, false);
1865 } 1875 }
1866} 1876}
1867 1877
@@ -1877,6 +1887,8 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
1877 for (i = 0; i < nm_i->nat_blocks; i++) { 1887 for (i = 0; i < nm_i->nat_blocks; i++) {
1878 if (!test_bit_le(i, nm_i->nat_block_bitmap)) 1888 if (!test_bit_le(i, nm_i->nat_block_bitmap))
1879 continue; 1889 continue;
1890 if (!nm_i->free_nid_count[i])
1891 continue;
1880 for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) { 1892 for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
1881 nid_t nid; 1893 nid_t nid;
1882 1894
@@ -1907,58 +1919,6 @@ out:
1907 up_read(&nm_i->nat_tree_lock); 1919 up_read(&nm_i->nat_tree_lock);
1908} 1920}
1909 1921
1910static int scan_nat_bits(struct f2fs_sb_info *sbi)
1911{
1912 struct f2fs_nm_info *nm_i = NM_I(sbi);
1913 struct page *page;
1914 unsigned int i = 0;
1915 nid_t nid;
1916
1917 if (!enabled_nat_bits(sbi, NULL))
1918 return -EAGAIN;
1919
1920 down_read(&nm_i->nat_tree_lock);
1921check_empty:
1922 i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
1923 if (i >= nm_i->nat_blocks) {
1924 i = 0;
1925 goto check_partial;
1926 }
1927
1928 for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
1929 nid++) {
1930 if (unlikely(nid >= nm_i->max_nid))
1931 break;
1932 add_free_nid(sbi, nid, true);
1933 }
1934
1935 if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS)
1936 goto out;
1937 i++;
1938 goto check_empty;
1939
1940check_partial:
1941 i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
1942 if (i >= nm_i->nat_blocks) {
1943 disable_nat_bits(sbi, true);
1944 up_read(&nm_i->nat_tree_lock);
1945 return -EINVAL;
1946 }
1947
1948 nid = i * NAT_ENTRY_PER_BLOCK;
1949 page = get_current_nat_page(sbi, nid);
1950 scan_nat_page(sbi, page, nid);
1951 f2fs_put_page(page, 1);
1952
1953 if (nm_i->nid_cnt[FREE_NID_LIST] < MAX_FREE_NIDS) {
1954 i++;
1955 goto check_partial;
1956 }
1957out:
1958 up_read(&nm_i->nat_tree_lock);
1959 return 0;
1960}
1961
1962static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) 1922static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
1963{ 1923{
1964 struct f2fs_nm_info *nm_i = NM_I(sbi); 1924 struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -1980,21 +1940,6 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
1980 1940
1981 if (nm_i->nid_cnt[FREE_NID_LIST]) 1941 if (nm_i->nid_cnt[FREE_NID_LIST])
1982 return; 1942 return;
1983
1984 /* try to find free nids with nat_bits */
1985 if (!scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
1986 return;
1987 }
1988
1989 /* find next valid candidate */
1990 if (enabled_nat_bits(sbi, NULL)) {
1991 int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
1992 nm_i->nat_blocks, 0);
1993
1994 if (idx >= nm_i->nat_blocks)
1995 set_sbi_flag(sbi, SBI_NEED_FSCK);
1996 else
1997 nid = idx * NAT_ENTRY_PER_BLOCK;
1998 } 1943 }
1999 1944
2000 /* readahead nat pages to be scanned */ 1945 /* readahead nat pages to be scanned */
@@ -2081,7 +2026,7 @@ retry:
2081 __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false); 2026 __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
2082 nm_i->available_nids--; 2027 nm_i->available_nids--;
2083 2028
2084 update_free_nid_bitmap(sbi, *nid, false); 2029 update_free_nid_bitmap(sbi, *nid, false, false, false);
2085 2030
2086 spin_unlock(&nm_i->nid_list_lock); 2031 spin_unlock(&nm_i->nid_list_lock);
2087 return true; 2032 return true;
@@ -2137,7 +2082,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
2137 2082
2138 nm_i->available_nids++; 2083 nm_i->available_nids++;
2139 2084
2140 update_free_nid_bitmap(sbi, nid, true); 2085 update_free_nid_bitmap(sbi, nid, true, false, false);
2141 2086
2142 spin_unlock(&nm_i->nid_list_lock); 2087 spin_unlock(&nm_i->nid_list_lock);
2143 2088
@@ -2383,7 +2328,7 @@ add_out:
2383 list_add_tail(&nes->set_list, head); 2328 list_add_tail(&nes->set_list, head);
2384} 2329}
2385 2330
2386void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, 2331static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
2387 struct page *page) 2332 struct page *page)
2388{ 2333{
2389 struct f2fs_nm_info *nm_i = NM_I(sbi); 2334 struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -2402,16 +2347,16 @@ void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
2402 valid++; 2347 valid++;
2403 } 2348 }
2404 if (valid == 0) { 2349 if (valid == 0) {
2405 set_bit_le(nat_index, nm_i->empty_nat_bits); 2350 __set_bit_le(nat_index, nm_i->empty_nat_bits);
2406 clear_bit_le(nat_index, nm_i->full_nat_bits); 2351 __clear_bit_le(nat_index, nm_i->full_nat_bits);
2407 return; 2352 return;
2408 } 2353 }
2409 2354
2410 clear_bit_le(nat_index, nm_i->empty_nat_bits); 2355 __clear_bit_le(nat_index, nm_i->empty_nat_bits);
2411 if (valid == NAT_ENTRY_PER_BLOCK) 2356 if (valid == NAT_ENTRY_PER_BLOCK)
2412 set_bit_le(nat_index, nm_i->full_nat_bits); 2357 __set_bit_le(nat_index, nm_i->full_nat_bits);
2413 else 2358 else
2414 clear_bit_le(nat_index, nm_i->full_nat_bits); 2359 __clear_bit_le(nat_index, nm_i->full_nat_bits);
2415} 2360}
2416 2361
2417static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, 2362static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
@@ -2467,11 +2412,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
2467 add_free_nid(sbi, nid, false); 2412 add_free_nid(sbi, nid, false);
2468 spin_lock(&NM_I(sbi)->nid_list_lock); 2413 spin_lock(&NM_I(sbi)->nid_list_lock);
2469 NM_I(sbi)->available_nids++; 2414 NM_I(sbi)->available_nids++;
2470 update_free_nid_bitmap(sbi, nid, true); 2415 update_free_nid_bitmap(sbi, nid, true, false, false);
2471 spin_unlock(&NM_I(sbi)->nid_list_lock); 2416 spin_unlock(&NM_I(sbi)->nid_list_lock);
2472 } else { 2417 } else {
2473 spin_lock(&NM_I(sbi)->nid_list_lock); 2418 spin_lock(&NM_I(sbi)->nid_list_lock);
2474 update_free_nid_bitmap(sbi, nid, false); 2419 update_free_nid_bitmap(sbi, nid, false, false, false);
2475 spin_unlock(&NM_I(sbi)->nid_list_lock); 2420 spin_unlock(&NM_I(sbi)->nid_list_lock);
2476 } 2421 }
2477 } 2422 }
@@ -2577,6 +2522,40 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
2577 return 0; 2522 return 0;
2578} 2523}
2579 2524
2525inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
2526{
2527 struct f2fs_nm_info *nm_i = NM_I(sbi);
2528 unsigned int i = 0;
2529 nid_t nid, last_nid;
2530
2531 if (!enabled_nat_bits(sbi, NULL))
2532 return;
2533
2534 for (i = 0; i < nm_i->nat_blocks; i++) {
2535 i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
2536 if (i >= nm_i->nat_blocks)
2537 break;
2538
2539 __set_bit_le(i, nm_i->nat_block_bitmap);
2540
2541 nid = i * NAT_ENTRY_PER_BLOCK;
2542 last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK;
2543
2544 spin_lock(&nm_i->free_nid_lock);
2545 for (; nid < last_nid; nid++)
2546 update_free_nid_bitmap(sbi, nid, true, true, true);
2547 spin_unlock(&nm_i->free_nid_lock);
2548 }
2549
2550 for (i = 0; i < nm_i->nat_blocks; i++) {
2551 i = find_next_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
2552 if (i >= nm_i->nat_blocks)
2553 break;
2554
2555 __set_bit_le(i, nm_i->nat_block_bitmap);
2556 }
2557}
2558
2580static int init_node_manager(struct f2fs_sb_info *sbi) 2559static int init_node_manager(struct f2fs_sb_info *sbi)
2581{ 2560{
2582 struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi); 2561 struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
@@ -2638,7 +2617,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
2638 return 0; 2617 return 0;
2639} 2618}
2640 2619
2641int init_free_nid_cache(struct f2fs_sb_info *sbi) 2620static int init_free_nid_cache(struct f2fs_sb_info *sbi)
2642{ 2621{
2643 struct f2fs_nm_info *nm_i = NM_I(sbi); 2622 struct f2fs_nm_info *nm_i = NM_I(sbi);
2644 2623
@@ -2651,6 +2630,14 @@ int init_free_nid_cache(struct f2fs_sb_info *sbi)
2651 GFP_KERNEL); 2630 GFP_KERNEL);
2652 if (!nm_i->nat_block_bitmap) 2631 if (!nm_i->nat_block_bitmap)
2653 return -ENOMEM; 2632 return -ENOMEM;
2633
2634 nm_i->free_nid_count = f2fs_kvzalloc(nm_i->nat_blocks *
2635 sizeof(unsigned short), GFP_KERNEL);
2636 if (!nm_i->free_nid_count)
2637 return -ENOMEM;
2638
2639 spin_lock_init(&nm_i->free_nid_lock);
2640
2654 return 0; 2641 return 0;
2655} 2642}
2656 2643
@@ -2670,6 +2657,9 @@ int build_node_manager(struct f2fs_sb_info *sbi)
2670 if (err) 2657 if (err)
2671 return err; 2658 return err;
2672 2659
2660 /* load free nid status from nat_bits table */
2661 load_free_nid_bitmap(sbi);
2662
2673 build_free_nids(sbi, true, true); 2663 build_free_nids(sbi, true, true);
2674 return 0; 2664 return 0;
2675} 2665}
@@ -2730,6 +2720,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
2730 2720
2731 kvfree(nm_i->nat_block_bitmap); 2721 kvfree(nm_i->nat_block_bitmap);
2732 kvfree(nm_i->free_nid_bitmap); 2722 kvfree(nm_i->free_nid_bitmap);
2723 kvfree(nm_i->free_nid_count);
2733 2724
2734 kfree(nm_i->nat_bitmap); 2725 kfree(nm_i->nat_bitmap);
2735 kfree(nm_i->nat_bits); 2726 kfree(nm_i->nat_bits);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 4bd7a8b19332..29ef7088c558 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1163,6 +1163,12 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
1163 if (f2fs_discard_en(sbi) && 1163 if (f2fs_discard_en(sbi) &&
1164 !f2fs_test_and_set_bit(offset, se->discard_map)) 1164 !f2fs_test_and_set_bit(offset, se->discard_map))
1165 sbi->discard_blks--; 1165 sbi->discard_blks--;
1166
1167 /* don't overwrite by SSR to keep node chain */
1168 if (se->type == CURSEG_WARM_NODE) {
1169 if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
1170 se->ckpt_valid_blocks++;
1171 }
1166 } else { 1172 } else {
1167 if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) { 1173 if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) {
1168#ifdef CONFIG_F2FS_CHECK_FS 1174#ifdef CONFIG_F2FS_CHECK_FS
diff --git a/include/linux/compat.h b/include/linux/compat.h
index aef47be2a5c1..af9dbc44fd92 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -723,6 +723,8 @@ asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
723asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32, 723asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32,
724 int, const char __user *); 724 int, const char __user *);
725 725
726asmlinkage long compat_sys_arch_prctl(int option, unsigned long arg2);
727
726/* 728/*
727 * For most but not all architectures, "am I in a compat syscall?" and 729 * For most but not all architectures, "am I in a compat syscall?" and
728 * "am I a compat task?" are the same question. For architectures on which 730 * "am I a compat task?" are the same question. For architectures on which
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 2484b2fcc6eb..933d93656605 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -143,15 +143,6 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev,
143 struct fwnode_handle *child, 143 struct fwnode_handle *child,
144 enum gpiod_flags flags, 144 enum gpiod_flags flags,
145 const char *label); 145 const char *label);
146/* FIXME: delete this helper when users are switched over */
147static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev,
148 const char *con_id, struct fwnode_handle *child)
149{
150 return devm_fwnode_get_index_gpiod_from_child(dev, con_id,
151 0, child,
152 GPIOD_ASIS,
153 "?");
154}
155 146
156#else /* CONFIG_GPIOLIB */ 147#else /* CONFIG_GPIOLIB */
157 148
@@ -444,13 +435,6 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev,
444 return ERR_PTR(-ENOSYS); 435 return ERR_PTR(-ENOSYS);
445} 436}
446 437
447/* FIXME: delete this when all users are switched over */
448static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev,
449 const char *con_id, struct fwnode_handle *child)
450{
451 return ERR_PTR(-ENOSYS);
452}
453
454#endif /* CONFIG_GPIOLIB */ 438#endif /* CONFIG_GPIOLIB */
455 439
456static inline 440static inline
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d1a6e554ee68..9de1d3ca83b2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -162,8 +162,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
162 int len, void *val); 162 int len, void *val);
163int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, 163int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
164 int len, struct kvm_io_device *dev); 164 int len, struct kvm_io_device *dev);
165int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, 165void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
166 struct kvm_io_device *dev); 166 struct kvm_io_device *dev);
167struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, 167struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
168 gpa_t addr); 168 gpa_t addr);
169 169
@@ -403,7 +403,7 @@ struct kvm {
403 struct kvm_vm_stat stat; 403 struct kvm_vm_stat stat;
404 struct kvm_arch arch; 404 struct kvm_arch arch;
405 refcount_t users_count; 405 refcount_t users_count;
406#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 406#ifdef CONFIG_KVM_MMIO
407 struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; 407 struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
408 spinlock_t ring_lock; 408 spinlock_t ring_lock;
409 struct list_head coalesced_zones; 409 struct list_head coalesced_zones;
@@ -502,10 +502,10 @@ int __must_check vcpu_load(struct kvm_vcpu *vcpu);
502void vcpu_put(struct kvm_vcpu *vcpu); 502void vcpu_put(struct kvm_vcpu *vcpu);
503 503
504#ifdef __KVM_HAVE_IOAPIC 504#ifdef __KVM_HAVE_IOAPIC
505void kvm_vcpu_request_scan_ioapic(struct kvm *kvm); 505void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm);
506void kvm_arch_post_irq_routing_update(struct kvm *kvm); 506void kvm_arch_post_irq_routing_update(struct kvm *kvm);
507#else 507#else
508static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) 508static inline void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm)
509{ 509{
510} 510}
511static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm) 511static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm)
@@ -877,22 +877,6 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
877int kvm_request_irq_source_id(struct kvm *kvm); 877int kvm_request_irq_source_id(struct kvm *kvm);
878void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); 878void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
879 879
880#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
881int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
882void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
883#else
884static inline int kvm_iommu_map_pages(struct kvm *kvm,
885 struct kvm_memory_slot *slot)
886{
887 return 0;
888}
889
890static inline void kvm_iommu_unmap_pages(struct kvm *kvm,
891 struct kvm_memory_slot *slot)
892{
893}
894#endif
895
896/* 880/*
897 * search_memslots() and __gfn_to_memslot() are here because they are 881 * search_memslots() and __gfn_to_memslot() are here because they are
898 * used in non-modular code in arch/powerpc/kvm/book3s_hv_rm_mmu.c. 882 * used in non-modular code in arch/powerpc/kvm/book3s_hv_rm_mmu.c.
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 58373875e8ee..55125d674338 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -101,6 +101,10 @@ static inline void check_object_size(const void *ptr, unsigned long n,
101{ } 101{ }
102#endif /* CONFIG_HARDENED_USERCOPY */ 102#endif /* CONFIG_HARDENED_USERCOPY */
103 103
104#ifndef arch_setup_new_exec
105static inline void arch_setup_new_exec(void) { }
106#endif
107
104#endif /* __KERNEL__ */ 108#endif /* __KERNEL__ */
105 109
106#endif /* _LINUX_THREAD_INFO_H */ 110#endif /* _LINUX_THREAD_INFO_H */
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index b59ee077a596..8c6d3bdb9a00 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -409,6 +409,7 @@ typedef struct elf64_shdr {
409#define NT_S390_TDB 0x308 /* s390 transaction diagnostic block */ 409#define NT_S390_TDB 0x308 /* s390 transaction diagnostic block */
410#define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 upper half */ 410#define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 upper half */
411#define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */ 411#define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */
412#define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */
412#define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */ 413#define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */
413#define NT_ARM_TLS 0x401 /* ARM TLS register */ 414#define NT_ARM_TLS 0x401 /* ARM TLS register */
414#define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ 415#define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index f51d5082a377..3c168b6fd74b 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -702,6 +702,10 @@ struct kvm_ppc_resize_hpt {
702#define KVM_VM_PPC_HV 1 702#define KVM_VM_PPC_HV 1
703#define KVM_VM_PPC_PR 2 703#define KVM_VM_PPC_PR 2
704 704
705/* on MIPS, 0 forces trap & emulate, 1 forces VZ ASE */
706#define KVM_VM_MIPS_TE 0
707#define KVM_VM_MIPS_VZ 1
708
705#define KVM_S390_SIE_PAGE_OFFSET 1 709#define KVM_S390_SIE_PAGE_OFFSET 1
706 710
707/* 711/*
@@ -883,6 +887,12 @@ struct kvm_ppc_resize_hpt {
883#define KVM_CAP_PPC_MMU_RADIX 134 887#define KVM_CAP_PPC_MMU_RADIX 134
884#define KVM_CAP_PPC_MMU_HASH_V3 135 888#define KVM_CAP_PPC_MMU_HASH_V3 135
885#define KVM_CAP_IMMEDIATE_EXIT 136 889#define KVM_CAP_IMMEDIATE_EXIT 136
890#define KVM_CAP_MIPS_VZ 137
891#define KVM_CAP_MIPS_TE 138
892#define KVM_CAP_MIPS_64BIT 139
893#define KVM_CAP_S390_GS 140
894#define KVM_CAP_S390_AIS 141
895#define KVM_CAP_SPAPR_TCE_VFIO 142
886 896
887#ifdef KVM_CAP_IRQ_ROUTING 897#ifdef KVM_CAP_IRQ_ROUTING
888 898
@@ -1087,6 +1097,7 @@ struct kvm_device_attr {
1087#define KVM_DEV_VFIO_GROUP 1 1097#define KVM_DEV_VFIO_GROUP 1
1088#define KVM_DEV_VFIO_GROUP_ADD 1 1098#define KVM_DEV_VFIO_GROUP_ADD 1
1089#define KVM_DEV_VFIO_GROUP_DEL 2 1099#define KVM_DEV_VFIO_GROUP_DEL 2
1100#define KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE 3
1090 1101
1091enum kvm_device_type { 1102enum kvm_device_type {
1092 KVM_DEV_TYPE_FSL_MPIC_20 = 1, 1103 KVM_DEV_TYPE_FSL_MPIC_20 = 1,
@@ -1108,6 +1119,11 @@ enum kvm_device_type {
1108 KVM_DEV_TYPE_MAX, 1119 KVM_DEV_TYPE_MAX,
1109}; 1120};
1110 1121
1122struct kvm_vfio_spapr_tce {
1123 __s32 groupfd;
1124 __s32 tablefd;
1125};
1126
1111/* 1127/*
1112 * ioctls for VM fds 1128 * ioctls for VM fds
1113 */ 1129 */
diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index 7ebb23836f68..b1ccb58ad397 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -267,8 +267,6 @@ int free_swap_slot(swp_entry_t entry)
267{ 267{
268 struct swap_slots_cache *cache; 268 struct swap_slots_cache *cache;
269 269
270 WARN_ON_ONCE(!swap_slot_cache_initialized);
271
272 cache = &get_cpu_var(swp_slots); 270 cache = &get_cpu_var(swp_slots);
273 if (use_swap_slot_cache && cache->slots_ret) { 271 if (use_swap_slot_cache && cache->slots_ret) {
274 spin_lock_irq(&cache->free_lock); 272 spin_lock_irq(&cache->free_lock);
diff --git a/scripts/checksyscalls.sh b/scripts/checksyscalls.sh
index 2c9082ba6137..116b7735ee9f 100755
--- a/scripts/checksyscalls.sh
+++ b/scripts/checksyscalls.sh
@@ -148,6 +148,7 @@ cat << EOF
148#define __IGNORE_sysfs 148#define __IGNORE_sysfs
149#define __IGNORE_uselib 149#define __IGNORE_uselib
150#define __IGNORE__sysctl 150#define __IGNORE__sysctl
151#define __IGNORE_arch_prctl
151 152
152/* ... including the "new" 32-bit uid syscalls */ 153/* ... including the "new" 32-bit uid syscalls */
153#define __IGNORE_lchown32 154#define __IGNORE_lchown32
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 581278c58488..8f74ed8e7237 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -30,8 +30,8 @@ import fcntl
30import resource 30import resource
31import struct 31import struct
32import re 32import re
33import subprocess
33from collections import defaultdict 34from collections import defaultdict
34from time import sleep
35 35
36VMX_EXIT_REASONS = { 36VMX_EXIT_REASONS = {
37 'EXCEPTION_NMI': 0, 37 'EXCEPTION_NMI': 0,
@@ -225,6 +225,7 @@ IOCTL_NUMBERS = {
225 'RESET': 0x00002403, 225 'RESET': 0x00002403,
226} 226}
227 227
228
228class Arch(object): 229class Arch(object):
229 """Encapsulates global architecture specific data. 230 """Encapsulates global architecture specific data.
230 231
@@ -255,12 +256,14 @@ class Arch(object):
255 return ArchX86(SVM_EXIT_REASONS) 256 return ArchX86(SVM_EXIT_REASONS)
256 return 257 return
257 258
259
258class ArchX86(Arch): 260class ArchX86(Arch):
259 def __init__(self, exit_reasons): 261 def __init__(self, exit_reasons):
260 self.sc_perf_evt_open = 298 262 self.sc_perf_evt_open = 298
261 self.ioctl_numbers = IOCTL_NUMBERS 263 self.ioctl_numbers = IOCTL_NUMBERS
262 self.exit_reasons = exit_reasons 264 self.exit_reasons = exit_reasons
263 265
266
264class ArchPPC(Arch): 267class ArchPPC(Arch):
265 def __init__(self): 268 def __init__(self):
266 self.sc_perf_evt_open = 319 269 self.sc_perf_evt_open = 319
@@ -275,12 +278,14 @@ class ArchPPC(Arch):
275 self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16 278 self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
276 self.exit_reasons = {} 279 self.exit_reasons = {}
277 280
281
278class ArchA64(Arch): 282class ArchA64(Arch):
279 def __init__(self): 283 def __init__(self):
280 self.sc_perf_evt_open = 241 284 self.sc_perf_evt_open = 241
281 self.ioctl_numbers = IOCTL_NUMBERS 285 self.ioctl_numbers = IOCTL_NUMBERS
282 self.exit_reasons = AARCH64_EXIT_REASONS 286 self.exit_reasons = AARCH64_EXIT_REASONS
283 287
288
284class ArchS390(Arch): 289class ArchS390(Arch):
285 def __init__(self): 290 def __init__(self):
286 self.sc_perf_evt_open = 331 291 self.sc_perf_evt_open = 331
@@ -316,6 +321,61 @@ def parse_int_list(list_string):
316 return integers 321 return integers
317 322
318 323
324def get_pid_from_gname(gname):
325 """Fuzzy function to convert guest name to QEMU process pid.
326
327 Returns a list of potential pids, can be empty if no match found.
328 Throws an exception on processing errors.
329
330 """
331 pids = []
332 try:
333 child = subprocess.Popen(['ps', '-A', '--format', 'pid,args'],
334 stdout=subprocess.PIPE)
335 except:
336 raise Exception
337 for line in child.stdout:
338 line = line.lstrip().split(' ', 1)
339 # perform a sanity check before calling the more expensive
340 # function to possibly extract the guest name
341 if ' -name ' in line[1] and gname == get_gname_from_pid(line[0]):
342 pids.append(int(line[0]))
343 child.stdout.close()
344
345 return pids
346
347
348def get_gname_from_pid(pid):
349 """Returns the guest name for a QEMU process pid.
350
351 Extracts the guest name from the QEMU comma line by processing the '-name'
352 option. Will also handle names specified out of sequence.
353
354 """
355 name = ''
356 try:
357 line = open('/proc/{}/cmdline'.format(pid), 'rb').read().split('\0')
358 parms = line[line.index('-name') + 1].split(',')
359 while '' in parms:
360 # commas are escaped (i.e. ',,'), hence e.g. 'foo,bar' results in
361 # ['foo', '', 'bar'], which we revert here
362 idx = parms.index('')
363 parms[idx - 1] += ',' + parms[idx + 1]
364 del parms[idx:idx+2]
365 # the '-name' switch allows for two ways to specify the guest name,
366 # where the plain name overrides the name specified via 'guest='
367 for arg in parms:
368 if '=' not in arg:
369 name = arg
370 break
371 if arg[:6] == 'guest=':
372 name = arg[6:]
373 except (ValueError, IOError, IndexError):
374 pass
375
376 return name
377
378
319def get_online_cpus(): 379def get_online_cpus():
320 """Returns a list of cpu id integers.""" 380 """Returns a list of cpu id integers."""
321 with open('/sys/devices/system/cpu/online') as cpu_list: 381 with open('/sys/devices/system/cpu/online') as cpu_list:
@@ -342,6 +402,7 @@ def get_filters():
342libc = ctypes.CDLL('libc.so.6', use_errno=True) 402libc = ctypes.CDLL('libc.so.6', use_errno=True)
343syscall = libc.syscall 403syscall = libc.syscall
344 404
405
345class perf_event_attr(ctypes.Structure): 406class perf_event_attr(ctypes.Structure):
346 """Struct that holds the necessary data to set up a trace event. 407 """Struct that holds the necessary data to set up a trace event.
347 408
@@ -370,6 +431,7 @@ class perf_event_attr(ctypes.Structure):
370 self.size = ctypes.sizeof(self) 431 self.size = ctypes.sizeof(self)
371 self.read_format = PERF_FORMAT_GROUP 432 self.read_format = PERF_FORMAT_GROUP
372 433
434
373def perf_event_open(attr, pid, cpu, group_fd, flags): 435def perf_event_open(attr, pid, cpu, group_fd, flags):
374 """Wrapper for the sys_perf_evt_open() syscall. 436 """Wrapper for the sys_perf_evt_open() syscall.
375 437
@@ -395,6 +457,7 @@ PERF_FORMAT_GROUP = 1 << 3
395PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing' 457PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing'
396PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm' 458PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm'
397 459
460
398class Group(object): 461class Group(object):
399 """Represents a perf event group.""" 462 """Represents a perf event group."""
400 463
@@ -427,6 +490,7 @@ class Group(object):
427 struct.unpack(read_format, 490 struct.unpack(read_format,
428 os.read(self.events[0].fd, length)))) 491 os.read(self.events[0].fd, length))))
429 492
493
430class Event(object): 494class Event(object):
431 """Represents a performance event and manages its life cycle.""" 495 """Represents a performance event and manages its life cycle."""
432 def __init__(self, name, group, trace_cpu, trace_pid, trace_point, 496 def __init__(self, name, group, trace_cpu, trace_pid, trace_point,
@@ -510,6 +574,7 @@ class Event(object):
510 """Resets the count of the trace event in the kernel.""" 574 """Resets the count of the trace event in the kernel."""
511 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0) 575 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0)
512 576
577
513class TracepointProvider(object): 578class TracepointProvider(object):
514 """Data provider for the stats class. 579 """Data provider for the stats class.
515 580
@@ -551,6 +616,7 @@ class TracepointProvider(object):
551 def setup_traces(self): 616 def setup_traces(self):
552 """Creates all event and group objects needed to be able to retrieve 617 """Creates all event and group objects needed to be able to retrieve
553 data.""" 618 data."""
619 fields = self.get_available_fields()
554 if self._pid > 0: 620 if self._pid > 0:
555 # Fetch list of all threads of the monitored pid, as qemu 621 # Fetch list of all threads of the monitored pid, as qemu
556 # starts a thread for each vcpu. 622 # starts a thread for each vcpu.
@@ -561,7 +627,7 @@ class TracepointProvider(object):
561 627
562 # The constant is needed as a buffer for python libs, std 628 # The constant is needed as a buffer for python libs, std
563 # streams and other files that the script opens. 629 # streams and other files that the script opens.
564 newlim = len(groupids) * len(self._fields) + 50 630 newlim = len(groupids) * len(fields) + 50
565 try: 631 try:
566 softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE) 632 softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
567 633
@@ -577,7 +643,7 @@ class TracepointProvider(object):
577 643
578 for groupid in groupids: 644 for groupid in groupids:
579 group = Group() 645 group = Group()
580 for name in self._fields: 646 for name in fields:
581 tracepoint = name 647 tracepoint = name
582 tracefilter = None 648 tracefilter = None
583 match = re.match(r'(.*)\((.*)\)', name) 649 match = re.match(r'(.*)\((.*)\)', name)
@@ -650,13 +716,23 @@ class TracepointProvider(object):
650 ret[name] += val 716 ret[name] += val
651 return ret 717 return ret
652 718
719 def reset(self):
720 """Reset all field counters"""
721 for group in self.group_leaders:
722 for event in group.events:
723 event.reset()
724
725
653class DebugfsProvider(object): 726class DebugfsProvider(object):
654 """Provides data from the files that KVM creates in the kvm debugfs 727 """Provides data from the files that KVM creates in the kvm debugfs
655 folder.""" 728 folder."""
656 def __init__(self): 729 def __init__(self):
657 self._fields = self.get_available_fields() 730 self._fields = self.get_available_fields()
731 self._baseline = {}
658 self._pid = 0 732 self._pid = 0
659 self.do_read = True 733 self.do_read = True
734 self.paths = []
735 self.reset()
660 736
661 def get_available_fields(self): 737 def get_available_fields(self):
662 """"Returns a list of available fields. 738 """"Returns a list of available fields.
@@ -673,6 +749,7 @@ class DebugfsProvider(object):
673 @fields.setter 749 @fields.setter
674 def fields(self, fields): 750 def fields(self, fields):
675 self._fields = fields 751 self._fields = fields
752 self.reset()
676 753
677 @property 754 @property
678 def pid(self): 755 def pid(self):
@@ -690,10 +767,11 @@ class DebugfsProvider(object):
690 self.paths = filter(lambda x: "{}-".format(pid) in x, vms) 767 self.paths = filter(lambda x: "{}-".format(pid) in x, vms)
691 768
692 else: 769 else:
693 self.paths = [''] 770 self.paths = []
694 self.do_read = True 771 self.do_read = True
772 self.reset()
695 773
696 def read(self): 774 def read(self, reset=0):
697 """Returns a dict with format:'file name / field -> current value'.""" 775 """Returns a dict with format:'file name / field -> current value'."""
698 results = {} 776 results = {}
699 777
@@ -701,10 +779,22 @@ class DebugfsProvider(object):
701 if not self.do_read: 779 if not self.do_read:
702 return results 780 return results
703 781
704 for path in self.paths: 782 paths = self.paths
783 if self._pid == 0:
784 paths = []
785 for entry in os.walk(PATH_DEBUGFS_KVM):
786 for dir in entry[1]:
787 paths.append(dir)
788 for path in paths:
705 for field in self._fields: 789 for field in self._fields:
706 results[field] = results.get(field, 0) \ 790 value = self.read_field(field, path)
707 + self.read_field(field, path) 791 key = path + field
792 if reset:
793 self._baseline[key] = value
794 if self._baseline.get(key, -1) == -1:
795 self._baseline[key] = value
796 results[field] = (results.get(field, 0) + value -
797 self._baseline.get(key, 0))
708 798
709 return results 799 return results
710 800
@@ -718,6 +808,12 @@ class DebugfsProvider(object):
718 except IOError: 808 except IOError:
719 return 0 809 return 0
720 810
811 def reset(self):
812 """Reset field counters"""
813 self._baseline = {}
814 self.read(1)
815
816
721class Stats(object): 817class Stats(object):
722 """Manages the data providers and the data they provide. 818 """Manages the data providers and the data they provide.
723 819
@@ -753,14 +849,20 @@ class Stats(object):
753 for provider in self.providers: 849 for provider in self.providers:
754 provider.pid = self._pid_filter 850 provider.pid = self._pid_filter
755 851
852 def reset(self):
853 self.values = {}
854 for provider in self.providers:
855 provider.reset()
856
756 @property 857 @property
757 def fields_filter(self): 858 def fields_filter(self):
758 return self._fields_filter 859 return self._fields_filter
759 860
760 @fields_filter.setter 861 @fields_filter.setter
761 def fields_filter(self, fields_filter): 862 def fields_filter(self, fields_filter):
762 self._fields_filter = fields_filter 863 if fields_filter != self._fields_filter:
763 self.update_provider_filters() 864 self._fields_filter = fields_filter
865 self.update_provider_filters()
764 866
765 @property 867 @property
766 def pid_filter(self): 868 def pid_filter(self):
@@ -768,9 +870,10 @@ class Stats(object):
768 870
769 @pid_filter.setter 871 @pid_filter.setter
770 def pid_filter(self, pid): 872 def pid_filter(self, pid):
771 self._pid_filter = pid 873 if pid != self._pid_filter:
772 self.values = {} 874 self._pid_filter = pid
773 self.update_provider_pid() 875 self.values = {}
876 self.update_provider_pid()
774 877
775 def get(self): 878 def get(self):
776 """Returns a dict with field -> (value, delta to last value) of all 879 """Returns a dict with field -> (value, delta to last value) of all
@@ -778,23 +881,26 @@ class Stats(object):
778 for provider in self.providers: 881 for provider in self.providers:
779 new = provider.read() 882 new = provider.read()
780 for key in provider.fields: 883 for key in provider.fields:
781 oldval = self.values.get(key, (0, 0)) 884 oldval = self.values.get(key, (0, 0))[0]
782 newval = new.get(key, 0) 885 newval = new.get(key, 0)
783 newdelta = None 886 newdelta = newval - oldval
784 if oldval is not None:
785 newdelta = newval - oldval[0]
786 self.values[key] = (newval, newdelta) 887 self.values[key] = (newval, newdelta)
787 return self.values 888 return self.values
788 889
789LABEL_WIDTH = 40 890LABEL_WIDTH = 40
790NUMBER_WIDTH = 10 891NUMBER_WIDTH = 10
892DELAY_INITIAL = 0.25
893DELAY_REGULAR = 3.0
894MAX_GUEST_NAME_LEN = 48
895MAX_REGEX_LEN = 44
896DEFAULT_REGEX = r'^[^\(]*$'
897
791 898
792class Tui(object): 899class Tui(object):
793 """Instruments curses to draw a nice text ui.""" 900 """Instruments curses to draw a nice text ui."""
794 def __init__(self, stats): 901 def __init__(self, stats):
795 self.stats = stats 902 self.stats = stats
796 self.screen = None 903 self.screen = None
797 self.drilldown = False
798 self.update_drilldown() 904 self.update_drilldown()
799 905
800 def __enter__(self): 906 def __enter__(self):
@@ -809,7 +915,14 @@ class Tui(object):
809 # return from C start_color() is ignorable. 915 # return from C start_color() is ignorable.
810 try: 916 try:
811 curses.start_color() 917 curses.start_color()
812 except: 918 except curses.error:
919 pass
920
921 # Hide cursor in extra statement as some monochrome terminals
922 # might support hiding but not colors.
923 try:
924 curses.curs_set(0)
925 except curses.error:
813 pass 926 pass
814 927
815 curses.use_default_colors() 928 curses.use_default_colors()
@@ -827,36 +940,60 @@ class Tui(object):
827 def update_drilldown(self): 940 def update_drilldown(self):
828 """Sets or removes a filter that only allows fields without braces.""" 941 """Sets or removes a filter that only allows fields without braces."""
829 if not self.stats.fields_filter: 942 if not self.stats.fields_filter:
830 self.stats.fields_filter = r'^[^\(]*$' 943 self.stats.fields_filter = DEFAULT_REGEX
831 944
832 elif self.stats.fields_filter == r'^[^\(]*$': 945 elif self.stats.fields_filter == DEFAULT_REGEX:
833 self.stats.fields_filter = None 946 self.stats.fields_filter = None
834 947
835 def update_pid(self, pid): 948 def update_pid(self, pid):
836 """Propagates pid selection to stats object.""" 949 """Propagates pid selection to stats object."""
837 self.stats.pid_filter = pid 950 self.stats.pid_filter = pid
838 951
839 def refresh(self, sleeptime): 952 def refresh_header(self, pid=None):
840 """Refreshes on-screen data.""" 953 """Refreshes the header."""
954 if pid is None:
955 pid = self.stats.pid_filter
841 self.screen.erase() 956 self.screen.erase()
842 if self.stats.pid_filter > 0: 957 gname = get_gname_from_pid(pid)
843 self.screen.addstr(0, 0, 'kvm statistics - pid {0}' 958 if gname:
844 .format(self.stats.pid_filter), 959 gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...'
845 curses.A_BOLD) 960 if len(gname) > MAX_GUEST_NAME_LEN
961 else gname))
962 if pid > 0:
963 self.screen.addstr(0, 0, 'kvm statistics - pid {0} {1}'
964 .format(pid, gname), curses.A_BOLD)
846 else: 965 else:
847 self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD) 966 self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
967 if self.stats.fields_filter and self.stats.fields_filter \
968 != DEFAULT_REGEX:
969 regex = self.stats.fields_filter
970 if len(regex) > MAX_REGEX_LEN:
971 regex = regex[:MAX_REGEX_LEN] + '...'
972 self.screen.addstr(1, 17, 'regex filter: {0}'.format(regex))
848 self.screen.addstr(2, 1, 'Event') 973 self.screen.addstr(2, 1, 'Event')
849 self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH - 974 self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH -
850 len('Total'), 'Total') 975 len('Total'), 'Total')
851 self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 8 - 976 self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 7 -
977 len('%Total'), '%Total')
978 self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 7 + 8 -
852 len('Current'), 'Current') 979 len('Current'), 'Current')
980 self.screen.addstr(4, 1, 'Collecting data...')
981 self.screen.refresh()
982
983 def refresh_body(self, sleeptime):
853 row = 3 984 row = 3
985 self.screen.move(row, 0)
986 self.screen.clrtobot()
854 stats = self.stats.get() 987 stats = self.stats.get()
988
855 def sortkey(x): 989 def sortkey(x):
856 if stats[x][1]: 990 if stats[x][1]:
857 return (-stats[x][1], -stats[x][0]) 991 return (-stats[x][1], -stats[x][0])
858 else: 992 else:
859 return (0, -stats[x][0]) 993 return (0, -stats[x][0])
994 total = 0.
995 for val in stats.values():
996 total += val[0]
860 for key in sorted(stats.keys(), key=sortkey): 997 for key in sorted(stats.keys(), key=sortkey):
861 998
862 if row >= self.screen.getmaxyx()[0]: 999 if row >= self.screen.getmaxyx()[0]:
@@ -869,6 +1006,8 @@ class Tui(object):
869 col += LABEL_WIDTH 1006 col += LABEL_WIDTH
870 self.screen.addstr(row, col, '%10d' % (values[0],)) 1007 self.screen.addstr(row, col, '%10d' % (values[0],))
871 col += NUMBER_WIDTH 1008 col += NUMBER_WIDTH
1009 self.screen.addstr(row, col, '%7.1f' % (values[0] * 100 / total,))
1010 col += 7
872 if values[1] is not None: 1011 if values[1] is not None:
873 self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,)) 1012 self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,))
874 row += 1 1013 row += 1
@@ -893,20 +1032,24 @@ class Tui(object):
893 regex = self.screen.getstr() 1032 regex = self.screen.getstr()
894 curses.noecho() 1033 curses.noecho()
895 if len(regex) == 0: 1034 if len(regex) == 0:
1035 self.stats.fields_filter = DEFAULT_REGEX
1036 self.refresh_header()
896 return 1037 return
897 try: 1038 try:
898 re.compile(regex) 1039 re.compile(regex)
899 self.stats.fields_filter = regex 1040 self.stats.fields_filter = regex
1041 self.refresh_header()
900 return 1042 return
901 except re.error: 1043 except re.error:
902 continue 1044 continue
903 1045
904 def show_vm_selection(self): 1046 def show_vm_selection_by_pid(self):
905 """Draws PID selection mask. 1047 """Draws PID selection mask.
906 1048
907 Asks for a pid until a valid pid or 0 has been entered. 1049 Asks for a pid until a valid pid or 0 has been entered.
908 1050
909 """ 1051 """
1052 msg = ''
910 while True: 1053 while True:
911 self.screen.erase() 1054 self.screen.erase()
912 self.screen.addstr(0, 0, 1055 self.screen.addstr(0, 0,
@@ -915,6 +1058,7 @@ class Tui(object):
915 self.screen.addstr(1, 0, 1058 self.screen.addstr(1, 0,
916 'This might limit the shown data to the trace ' 1059 'This might limit the shown data to the trace '
917 'statistics.') 1060 'statistics.')
1061 self.screen.addstr(5, 0, msg)
918 1062
919 curses.echo() 1063 curses.echo()
920 self.screen.addstr(3, 0, "Pid [0 or pid]: ") 1064 self.screen.addstr(3, 0, "Pid [0 or pid]: ")
@@ -922,60 +1066,128 @@ class Tui(object):
922 curses.noecho() 1066 curses.noecho()
923 1067
924 try: 1068 try:
925 pid = int(pid) 1069 if len(pid) > 0:
926 1070 pid = int(pid)
927 if pid == 0: 1071 if pid != 0 and not os.path.isdir(os.path.join('/proc/',
928 self.update_pid(pid) 1072 str(pid))):
929 break 1073 msg = '"' + str(pid) + '": Not a running process'
930 else:
931 if not os.path.isdir(os.path.join('/proc/', str(pid))):
932 continue 1074 continue
933 else: 1075 else:
934 self.update_pid(pid) 1076 pid = 0
935 break 1077 self.refresh_header(pid)
1078 self.update_pid(pid)
1079 break
936 1080
937 except ValueError: 1081 except ValueError:
1082 msg = '"' + str(pid) + '": Not a valid pid'
938 continue 1083 continue
939 1084
1085 def show_vm_selection_by_guest_name(self):
1086 """Draws guest selection mask.
1087
1088 Asks for a guest name until a valid guest name or '' is entered.
1089
1090 """
1091 msg = ''
1092 while True:
1093 self.screen.erase()
1094 self.screen.addstr(0, 0,
1095 'Show statistics for specific guest.',
1096 curses.A_BOLD)
1097 self.screen.addstr(1, 0,
1098 'This might limit the shown data to the trace '
1099 'statistics.')
1100 self.screen.addstr(5, 0, msg)
1101 curses.echo()
1102 self.screen.addstr(3, 0, "Guest [ENTER or guest]: ")
1103 gname = self.screen.getstr()
1104 curses.noecho()
1105
1106 if not gname:
1107 self.refresh_header(0)
1108 self.update_pid(0)
1109 break
1110 else:
1111 pids = []
1112 try:
1113 pids = get_pid_from_gname(gname)
1114 except:
1115 msg = '"' + gname + '": Internal error while searching, ' \
1116 'use pid filter instead'
1117 continue
1118 if len(pids) == 0:
1119 msg = '"' + gname + '": Not an active guest'
1120 continue
1121 if len(pids) > 1:
1122 msg = '"' + gname + '": Multiple matches found, use pid ' \
1123 'filter instead'
1124 continue
1125 self.refresh_header(pids[0])
1126 self.update_pid(pids[0])
1127 break
1128
940 def show_stats(self): 1129 def show_stats(self):
941 """Refreshes the screen and processes user input.""" 1130 """Refreshes the screen and processes user input."""
942 sleeptime = 0.25 1131 sleeptime = DELAY_INITIAL
1132 self.refresh_header()
943 while True: 1133 while True:
944 self.refresh(sleeptime) 1134 self.refresh_body(sleeptime)
945 curses.halfdelay(int(sleeptime * 10)) 1135 curses.halfdelay(int(sleeptime * 10))
946 sleeptime = 3 1136 sleeptime = DELAY_REGULAR
947 try: 1137 try:
948 char = self.screen.getkey() 1138 char = self.screen.getkey()
949 if char == 'x': 1139 if char == 'x':
950 self.drilldown = not self.drilldown 1140 self.refresh_header()
951 self.update_drilldown() 1141 self.update_drilldown()
1142 sleeptime = DELAY_INITIAL
952 if char == 'q': 1143 if char == 'q':
953 break 1144 break
1145 if char == 'c':
1146 self.stats.fields_filter = DEFAULT_REGEX
1147 self.refresh_header(0)
1148 self.update_pid(0)
1149 sleeptime = DELAY_INITIAL
954 if char == 'f': 1150 if char == 'f':
955 self.show_filter_selection() 1151 self.show_filter_selection()
1152 sleeptime = DELAY_INITIAL
1153 if char == 'g':
1154 self.show_vm_selection_by_guest_name()
1155 sleeptime = DELAY_INITIAL
956 if char == 'p': 1156 if char == 'p':
957 self.show_vm_selection() 1157 self.show_vm_selection_by_pid()
1158 sleeptime = DELAY_INITIAL
1159 if char == 'r':
1160 self.refresh_header()
1161 self.stats.reset()
1162 sleeptime = DELAY_INITIAL
958 except KeyboardInterrupt: 1163 except KeyboardInterrupt:
959 break 1164 break
960 except curses.error: 1165 except curses.error:
961 continue 1166 continue
962 1167
1168
963def batch(stats): 1169def batch(stats):
964 """Prints statistics in a key, value format.""" 1170 """Prints statistics in a key, value format."""
965 s = stats.get() 1171 try:
966 time.sleep(1) 1172 s = stats.get()
967 s = stats.get() 1173 time.sleep(1)
968 for key in sorted(s.keys()): 1174 s = stats.get()
969 values = s[key] 1175 for key in sorted(s.keys()):
970 print '%-42s%10d%10d' % (key, values[0], values[1]) 1176 values = s[key]
1177 print '%-42s%10d%10d' % (key, values[0], values[1])
1178 except KeyboardInterrupt:
1179 pass
1180
971 1181
972def log(stats): 1182def log(stats):
973 """Prints statistics as reiterating key block, multiple value blocks.""" 1183 """Prints statistics as reiterating key block, multiple value blocks."""
974 keys = sorted(stats.get().iterkeys()) 1184 keys = sorted(stats.get().iterkeys())
1185
975 def banner(): 1186 def banner():
976 for k in keys: 1187 for k in keys:
977 print '%s' % k, 1188 print '%s' % k,
978 print 1189 print
1190
979 def statline(): 1191 def statline():
980 s = stats.get() 1192 s = stats.get()
981 for k in keys: 1193 for k in keys:
@@ -984,11 +1196,15 @@ def log(stats):
984 line = 0 1196 line = 0
985 banner_repeat = 20 1197 banner_repeat = 20
986 while True: 1198 while True:
987 time.sleep(1) 1199 try:
988 if line % banner_repeat == 0: 1200 time.sleep(1)
989 banner() 1201 if line % banner_repeat == 0:
990 statline() 1202 banner()
991 line += 1 1203 statline()
1204 line += 1
1205 except KeyboardInterrupt:
1206 break
1207
992 1208
993def get_options(): 1209def get_options():
994 """Returns processed program arguments.""" 1210 """Returns processed program arguments."""
@@ -1009,6 +1225,16 @@ Requirements:
1009 CAP_SYS_ADMIN and perf events are used. 1225 CAP_SYS_ADMIN and perf events are used.
1010- CAP_SYS_RESOURCE if the hard limit is not high enough to allow 1226- CAP_SYS_RESOURCE if the hard limit is not high enough to allow
1011 the large number of files that are possibly opened. 1227 the large number of files that are possibly opened.
1228
1229Interactive Commands:
1230 c clear filter
1231 f filter by regular expression
1232 g filter by guest name
1233 p filter by PID
1234 q quit
1235 x toggle reporting of stats for individual child trace events
1236 r reset stats
1237Press any other key to refresh statistics immediately.
1012""" 1238"""
1013 1239
1014 class PlainHelpFormatter(optparse.IndentedHelpFormatter): 1240 class PlainHelpFormatter(optparse.IndentedHelpFormatter):
@@ -1018,6 +1244,22 @@ Requirements:
1018 else: 1244 else:
1019 return "" 1245 return ""
1020 1246
1247 def cb_guest_to_pid(option, opt, val, parser):
1248 try:
1249 pids = get_pid_from_gname(val)
1250 except:
1251 raise optparse.OptionValueError('Error while searching for guest '
1252 '"{}", use "-p" to specify a pid '
1253 'instead'.format(val))
1254 if len(pids) == 0:
1255 raise optparse.OptionValueError('No guest by the name "{}" '
1256 'found'.format(val))
1257 if len(pids) > 1:
1258 raise optparse.OptionValueError('Multiple processes found (pids: '
1259 '{}) - use "-p" to specify a pid '
1260 'instead'.format(" ".join(pids)))
1261 parser.values.pid = pids[0]
1262
1021 optparser = optparse.OptionParser(description=description_text, 1263 optparser = optparse.OptionParser(description=description_text,
1022 formatter=PlainHelpFormatter()) 1264 formatter=PlainHelpFormatter())
1023 optparser.add_option('-1', '--once', '--batch', 1265 optparser.add_option('-1', '--once', '--batch',
@@ -1051,15 +1293,24 @@ Requirements:
1051 help='fields to display (regex)', 1293 help='fields to display (regex)',
1052 ) 1294 )
1053 optparser.add_option('-p', '--pid', 1295 optparser.add_option('-p', '--pid',
1054 action='store', 1296 action='store',
1055 default=0, 1297 default=0,
1056 type=int, 1298 type='int',
1057 dest='pid', 1299 dest='pid',
1058 help='restrict statistics to pid', 1300 help='restrict statistics to pid',
1059 ) 1301 )
1302 optparser.add_option('-g', '--guest',
1303 action='callback',
1304 type='string',
1305 dest='pid',
1306 metavar='GUEST',
1307 help='restrict statistics to guest by name',
1308 callback=cb_guest_to_pid,
1309 )
1060 (options, _) = optparser.parse_args(sys.argv) 1310 (options, _) = optparser.parse_args(sys.argv)
1061 return options 1311 return options
1062 1312
1313
1063def get_providers(options): 1314def get_providers(options):
1064 """Returns a list of data providers depending on the passed options.""" 1315 """Returns a list of data providers depending on the passed options."""
1065 providers = [] 1316 providers = []
@@ -1073,6 +1324,7 @@ def get_providers(options):
1073 1324
1074 return providers 1325 return providers
1075 1326
1327
1076def check_access(options): 1328def check_access(options):
1077 """Exits if the current user can't access all needed directories.""" 1329 """Exits if the current user can't access all needed directories."""
1078 if not os.path.exists('/sys/kernel/debug'): 1330 if not os.path.exists('/sys/kernel/debug'):
@@ -1086,8 +1338,8 @@ def check_access(options):
1086 "Also ensure, that the kvm modules are loaded.\n") 1338 "Also ensure, that the kvm modules are loaded.\n")
1087 sys.exit(1) 1339 sys.exit(1)
1088 1340
1089 if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints 1341 if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or
1090 or not options.debugfs): 1342 not options.debugfs):
1091 sys.stderr.write("Please enable CONFIG_TRACING in your kernel " 1343 sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
1092 "when using the option -t (default).\n" 1344 "when using the option -t (default).\n"
1093 "If it is enabled, make {0} readable by the " 1345 "If it is enabled, make {0} readable by the "
@@ -1098,10 +1350,11 @@ def check_access(options):
1098 1350
1099 sys.stderr.write("Falling back to debugfs statistics!\n") 1351 sys.stderr.write("Falling back to debugfs statistics!\n")
1100 options.debugfs = True 1352 options.debugfs = True
1101 sleep(5) 1353 time.sleep(5)
1102 1354
1103 return options 1355 return options
1104 1356
1357
1105def main(): 1358def main():
1106 options = get_options() 1359 options = get_options()
1107 options = check_access(options) 1360 options = check_access(options)
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt
index b92a153d7115..109431bdc63c 100644
--- a/tools/kvm/kvm_stat/kvm_stat.txt
+++ b/tools/kvm/kvm_stat/kvm_stat.txt
@@ -18,11 +18,33 @@ state transitions such as guest mode entry and exit.
18This tool is useful for observing guest behavior from the host perspective. 18This tool is useful for observing guest behavior from the host perspective.
19Often conclusions about performance or buggy behavior can be drawn from the 19Often conclusions about performance or buggy behavior can be drawn from the
20output. 20output.
21While running in regular mode, use any of the keys listed in section
22'Interactive Commands' below.
23Use batch and logging modes for scripting purposes.
21 24
22The set of KVM kernel module trace events may be specific to the kernel version 25The set of KVM kernel module trace events may be specific to the kernel version
23or architecture. It is best to check the KVM kernel module source code for the 26or architecture. It is best to check the KVM kernel module source code for the
24meaning of events. 27meaning of events.
25 28
29INTERACTIVE COMMANDS
30--------------------
31[horizontal]
32*c*:: clear filter
33
34*f*:: filter by regular expression
35
36*g*:: filter by guest name
37
38*p*:: filter by PID
39
40*q*:: quit
41
42*r*:: reset stats
43
44*x*:: toggle reporting of stats for child trace events
45
46Press any other key to refresh statistics immediately.
47
26OPTIONS 48OPTIONS
27------- 49-------
28-1:: 50-1::
@@ -46,6 +68,10 @@ OPTIONS
46--pid=<pid>:: 68--pid=<pid>::
47 limit statistics to one virtual machine (pid) 69 limit statistics to one virtual machine (pid)
48 70
71-g<guest>::
72--guest=<guest_name>::
73 limit statistics to one virtual machine (guest name)
74
49-f<fields>:: 75-f<fields>::
50--fields=<fields>:: 76--fields=<fields>::
51 fields to display (regex) 77 fields to display (regex)
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a29786dd9522..a8d540398bbd 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -490,7 +490,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
490 mutex_lock(&kvm->irq_lock); 490 mutex_lock(&kvm->irq_lock);
491 hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); 491 hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
492 mutex_unlock(&kvm->irq_lock); 492 mutex_unlock(&kvm->irq_lock);
493 kvm_vcpu_request_scan_ioapic(kvm); 493 kvm_arch_post_irq_ack_notifier_list_update(kvm);
494} 494}
495 495
496void kvm_unregister_irq_ack_notifier(struct kvm *kvm, 496void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
@@ -500,7 +500,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
500 hlist_del_init_rcu(&kian->link); 500 hlist_del_init_rcu(&kian->link);
501 mutex_unlock(&kvm->irq_lock); 501 mutex_unlock(&kvm->irq_lock);
502 synchronize_srcu(&kvm->irq_srcu); 502 synchronize_srcu(&kvm->irq_srcu);
503 kvm_vcpu_request_scan_ioapic(kvm); 503 kvm_arch_post_irq_ack_notifier_list_update(kvm);
504} 504}
505#endif 505#endif
506 506
@@ -870,7 +870,8 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
870 continue; 870 continue;
871 871
872 kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); 872 kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
873 kvm->buses[bus_idx]->ioeventfd_count--; 873 if (kvm->buses[bus_idx])
874 kvm->buses[bus_idx]->ioeventfd_count--;
874 ioeventfd_release(p); 875 ioeventfd_release(p);
875 ret = 0; 876 ret = 0;
876 break; 877 break;
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 3bcc9990adf7..cc30d01a56be 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -142,8 +142,8 @@ static int setup_routing_entry(struct kvm *kvm,
142 struct kvm_kernel_irq_routing_entry *e, 142 struct kvm_kernel_irq_routing_entry *e,
143 const struct kvm_irq_routing_entry *ue) 143 const struct kvm_irq_routing_entry *ue)
144{ 144{
145 int r = -EINVAL;
146 struct kvm_kernel_irq_routing_entry *ei; 145 struct kvm_kernel_irq_routing_entry *ei;
146 int r;
147 147
148 /* 148 /*
149 * Do not allow GSI to be mapped to the same irqchip more than once. 149 * Do not allow GSI to be mapped to the same irqchip more than once.
@@ -153,20 +153,19 @@ static int setup_routing_entry(struct kvm *kvm,
153 if (ei->type != KVM_IRQ_ROUTING_IRQCHIP || 153 if (ei->type != KVM_IRQ_ROUTING_IRQCHIP ||
154 ue->type != KVM_IRQ_ROUTING_IRQCHIP || 154 ue->type != KVM_IRQ_ROUTING_IRQCHIP ||
155 ue->u.irqchip.irqchip == ei->irqchip.irqchip) 155 ue->u.irqchip.irqchip == ei->irqchip.irqchip)
156 return r; 156 return -EINVAL;
157 157
158 e->gsi = ue->gsi; 158 e->gsi = ue->gsi;
159 e->type = ue->type; 159 e->type = ue->type;
160 r = kvm_set_routing_entry(kvm, e, ue); 160 r = kvm_set_routing_entry(kvm, e, ue);
161 if (r) 161 if (r)
162 goto out; 162 return r;
163 if (e->type == KVM_IRQ_ROUTING_IRQCHIP) 163 if (e->type == KVM_IRQ_ROUTING_IRQCHIP)
164 rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi; 164 rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi;
165 165
166 hlist_add_head(&e->link, &rt->map[e->gsi]); 166 hlist_add_head(&e->link, &rt->map[e->gsi]);
167 r = 0; 167
168out: 168 return 0;
169 return r;
170} 169}
171 170
172void __attribute__((weak)) kvm_arch_irq_routing_update(struct kvm *kvm) 171void __attribute__((weak)) kvm_arch_irq_routing_update(struct kvm *kvm)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1b0da5771f71..4e19bc812c29 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -727,8 +727,11 @@ static void kvm_destroy_vm(struct kvm *kvm)
727 list_del(&kvm->vm_list); 727 list_del(&kvm->vm_list);
728 spin_unlock(&kvm_lock); 728 spin_unlock(&kvm_lock);
729 kvm_free_irq_routing(kvm); 729 kvm_free_irq_routing(kvm);
730 for (i = 0; i < KVM_NR_BUSES; i++) 730 for (i = 0; i < KVM_NR_BUSES; i++) {
731 kvm_io_bus_destroy(kvm->buses[i]); 731 if (kvm->buses[i])
732 kvm_io_bus_destroy(kvm->buses[i]);
733 kvm->buses[i] = NULL;
734 }
732 kvm_coalesced_mmio_free(kvm); 735 kvm_coalesced_mmio_free(kvm);
733#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 736#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
734 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); 737 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
@@ -1016,8 +1019,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
1016 1019
1017 old_memslots = install_new_memslots(kvm, as_id, slots); 1020 old_memslots = install_new_memslots(kvm, as_id, slots);
1018 1021
1019 /* slot was deleted or moved, clear iommu mapping */
1020 kvm_iommu_unmap_pages(kvm, &old);
1021 /* From this point no new shadow pages pointing to a deleted, 1022 /* From this point no new shadow pages pointing to a deleted,
1022 * or moved, memslot will be created. 1023 * or moved, memslot will be created.
1023 * 1024 *
@@ -1052,21 +1053,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
1052 1053
1053 kvm_free_memslot(kvm, &old, &new); 1054 kvm_free_memslot(kvm, &old, &new);
1054 kvfree(old_memslots); 1055 kvfree(old_memslots);
1055
1056 /*
1057 * IOMMU mapping: New slots need to be mapped. Old slots need to be
1058 * un-mapped and re-mapped if their base changes. Since base change
1059 * unmapping is handled above with slot deletion, mapping alone is
1060 * needed here. Anything else the iommu might care about for existing
1061 * slots (size changes, userspace addr changes and read-only flag
1062 * changes) is disallowed above, so any other attribute changes getting
1063 * here can be skipped.
1064 */
1065 if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
1066 r = kvm_iommu_map_pages(kvm, &new);
1067 return r;
1068 }
1069
1070 return 0; 1056 return 0;
1071 1057
1072out_slots: 1058out_slots:
@@ -2363,7 +2349,7 @@ static int kvm_vcpu_fault(struct vm_fault *vmf)
2363 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) 2349 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
2364 page = virt_to_page(vcpu->arch.pio_data); 2350 page = virt_to_page(vcpu->arch.pio_data);
2365#endif 2351#endif
2366#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 2352#ifdef CONFIG_KVM_MMIO
2367 else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) 2353 else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET)
2368 page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); 2354 page = virt_to_page(vcpu->kvm->coalesced_mmio_ring);
2369#endif 2355#endif
@@ -2928,6 +2914,10 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
2928 case KVM_CAP_IOEVENTFD_ANY_LENGTH: 2914 case KVM_CAP_IOEVENTFD_ANY_LENGTH:
2929 case KVM_CAP_CHECK_EXTENSION_VM: 2915 case KVM_CAP_CHECK_EXTENSION_VM:
2930 return 1; 2916 return 1;
2917#ifdef CONFIG_KVM_MMIO
2918 case KVM_CAP_COALESCED_MMIO:
2919 return KVM_COALESCED_MMIO_PAGE_OFFSET;
2920#endif
2931#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING 2921#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
2932 case KVM_CAP_IRQ_ROUTING: 2922 case KVM_CAP_IRQ_ROUTING:
2933 return KVM_MAX_IRQ_ROUTES; 2923 return KVM_MAX_IRQ_ROUTES;
@@ -2977,7 +2967,7 @@ static long kvm_vm_ioctl(struct file *filp,
2977 r = kvm_vm_ioctl_get_dirty_log(kvm, &log); 2967 r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
2978 break; 2968 break;
2979 } 2969 }
2980#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 2970#ifdef CONFIG_KVM_MMIO
2981 case KVM_REGISTER_COALESCED_MMIO: { 2971 case KVM_REGISTER_COALESCED_MMIO: {
2982 struct kvm_coalesced_mmio_zone zone; 2972 struct kvm_coalesced_mmio_zone zone;
2983 2973
@@ -3075,8 +3065,11 @@ static long kvm_vm_ioctl(struct file *filp,
3075 routing.nr * sizeof(*entries))) 3065 routing.nr * sizeof(*entries)))
3076 goto out_free_irq_routing; 3066 goto out_free_irq_routing;
3077 } 3067 }
3068 /* avoid races with KVM_CREATE_IRQCHIP on x86 */
3069 mutex_lock(&kvm->lock);
3078 r = kvm_set_irq_routing(kvm, entries, routing.nr, 3070 r = kvm_set_irq_routing(kvm, entries, routing.nr,
3079 routing.flags); 3071 routing.flags);
3072 mutex_unlock(&kvm->lock);
3080out_free_irq_routing: 3073out_free_irq_routing:
3081 vfree(entries); 3074 vfree(entries);
3082 break; 3075 break;
@@ -3169,7 +3162,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
3169 kvm = kvm_create_vm(type); 3162 kvm = kvm_create_vm(type);
3170 if (IS_ERR(kvm)) 3163 if (IS_ERR(kvm))
3171 return PTR_ERR(kvm); 3164 return PTR_ERR(kvm);
3172#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 3165#ifdef CONFIG_KVM_MMIO
3173 r = kvm_coalesced_mmio_init(kvm); 3166 r = kvm_coalesced_mmio_init(kvm);
3174 if (r < 0) { 3167 if (r < 0) {
3175 kvm_put_kvm(kvm); 3168 kvm_put_kvm(kvm);
@@ -3222,7 +3215,7 @@ static long kvm_dev_ioctl(struct file *filp,
3222#ifdef CONFIG_X86 3215#ifdef CONFIG_X86
3223 r += PAGE_SIZE; /* pio data page */ 3216 r += PAGE_SIZE; /* pio data page */
3224#endif 3217#endif
3225#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 3218#ifdef CONFIG_KVM_MMIO
3226 r += PAGE_SIZE; /* coalesced mmio ring page */ 3219 r += PAGE_SIZE; /* coalesced mmio ring page */
3227#endif 3220#endif
3228 break; 3221 break;
@@ -3470,6 +3463,8 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
3470 }; 3463 };
3471 3464
3472 bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); 3465 bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
3466 if (!bus)
3467 return -ENOMEM;
3473 r = __kvm_io_bus_write(vcpu, bus, &range, val); 3468 r = __kvm_io_bus_write(vcpu, bus, &range, val);
3474 return r < 0 ? r : 0; 3469 return r < 0 ? r : 0;
3475} 3470}
@@ -3487,6 +3482,8 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
3487 }; 3482 };
3488 3483
3489 bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); 3484 bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
3485 if (!bus)
3486 return -ENOMEM;
3490 3487
3491 /* First try the device referenced by cookie. */ 3488 /* First try the device referenced by cookie. */
3492 if ((cookie >= 0) && (cookie < bus->dev_count) && 3489 if ((cookie >= 0) && (cookie < bus->dev_count) &&
@@ -3537,6 +3534,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
3537 }; 3534 };
3538 3535
3539 bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); 3536 bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
3537 if (!bus)
3538 return -ENOMEM;
3540 r = __kvm_io_bus_read(vcpu, bus, &range, val); 3539 r = __kvm_io_bus_read(vcpu, bus, &range, val);
3541 return r < 0 ? r : 0; 3540 return r < 0 ? r : 0;
3542} 3541}
@@ -3549,6 +3548,9 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
3549 struct kvm_io_bus *new_bus, *bus; 3548 struct kvm_io_bus *new_bus, *bus;
3550 3549
3551 bus = kvm->buses[bus_idx]; 3550 bus = kvm->buses[bus_idx];
3551 if (!bus)
3552 return -ENOMEM;
3553
3552 /* exclude ioeventfd which is limited by maximum fd */ 3554 /* exclude ioeventfd which is limited by maximum fd */
3553 if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) 3555 if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1)
3554 return -ENOSPC; 3556 return -ENOSPC;
@@ -3568,37 +3570,41 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
3568} 3570}
3569 3571
3570/* Caller must hold slots_lock. */ 3572/* Caller must hold slots_lock. */
3571int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, 3573void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
3572 struct kvm_io_device *dev) 3574 struct kvm_io_device *dev)
3573{ 3575{
3574 int i, r; 3576 int i;
3575 struct kvm_io_bus *new_bus, *bus; 3577 struct kvm_io_bus *new_bus, *bus;
3576 3578
3577 bus = kvm->buses[bus_idx]; 3579 bus = kvm->buses[bus_idx];
3578 r = -ENOENT; 3580 if (!bus)
3581 return;
3582
3579 for (i = 0; i < bus->dev_count; i++) 3583 for (i = 0; i < bus->dev_count; i++)
3580 if (bus->range[i].dev == dev) { 3584 if (bus->range[i].dev == dev) {
3581 r = 0;
3582 break; 3585 break;
3583 } 3586 }
3584 3587
3585 if (r) 3588 if (i == bus->dev_count)
3586 return r; 3589 return;
3587 3590
3588 new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) * 3591 new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) *
3589 sizeof(struct kvm_io_range)), GFP_KERNEL); 3592 sizeof(struct kvm_io_range)), GFP_KERNEL);
3590 if (!new_bus) 3593 if (!new_bus) {
3591 return -ENOMEM; 3594 pr_err("kvm: failed to shrink bus, removing it completely\n");
3595 goto broken;
3596 }
3592 3597
3593 memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range)); 3598 memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
3594 new_bus->dev_count--; 3599 new_bus->dev_count--;
3595 memcpy(new_bus->range + i, bus->range + i + 1, 3600 memcpy(new_bus->range + i, bus->range + i + 1,
3596 (new_bus->dev_count - i) * sizeof(struct kvm_io_range)); 3601 (new_bus->dev_count - i) * sizeof(struct kvm_io_range));
3597 3602
3603broken:
3598 rcu_assign_pointer(kvm->buses[bus_idx], new_bus); 3604 rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
3599 synchronize_srcu_expedited(&kvm->srcu); 3605 synchronize_srcu_expedited(&kvm->srcu);
3600 kfree(bus); 3606 kfree(bus);
3601 return r; 3607 return;
3602} 3608}
3603 3609
3604struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, 3610struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
@@ -3611,6 +3617,8 @@ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
3611 srcu_idx = srcu_read_lock(&kvm->srcu); 3617 srcu_idx = srcu_read_lock(&kvm->srcu);
3612 3618
3613 bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); 3619 bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
3620 if (!bus)
3621 goto out_unlock;
3614 3622
3615 dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1); 3623 dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1);
3616 if (dev_idx < 0) 3624 if (dev_idx < 0)
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index d32f239eb471..37d9118fd84b 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -20,6 +20,10 @@
20#include <linux/vfio.h> 20#include <linux/vfio.h>
21#include "vfio.h" 21#include "vfio.h"
22 22
23#ifdef CONFIG_SPAPR_TCE_IOMMU
24#include <asm/kvm_ppc.h>
25#endif
26
23struct kvm_vfio_group { 27struct kvm_vfio_group {
24 struct list_head node; 28 struct list_head node;
25 struct vfio_group *vfio_group; 29 struct vfio_group *vfio_group;
@@ -89,6 +93,47 @@ static bool kvm_vfio_group_is_coherent(struct vfio_group *vfio_group)
89 return ret > 0; 93 return ret > 0;
90} 94}
91 95
96#ifdef CONFIG_SPAPR_TCE_IOMMU
97static int kvm_vfio_external_user_iommu_id(struct vfio_group *vfio_group)
98{
99 int (*fn)(struct vfio_group *);
100 int ret = -EINVAL;
101
102 fn = symbol_get(vfio_external_user_iommu_id);
103 if (!fn)
104 return ret;
105
106 ret = fn(vfio_group);
107
108 symbol_put(vfio_external_user_iommu_id);
109
110 return ret;
111}
112
113static struct iommu_group *kvm_vfio_group_get_iommu_group(
114 struct vfio_group *group)
115{
116 int group_id = kvm_vfio_external_user_iommu_id(group);
117
118 if (group_id < 0)
119 return NULL;
120
121 return iommu_group_get_by_id(group_id);
122}
123
124static void kvm_spapr_tce_release_vfio_group(struct kvm *kvm,
125 struct vfio_group *vfio_group)
126{
127 struct iommu_group *grp = kvm_vfio_group_get_iommu_group(vfio_group);
128
129 if (WARN_ON_ONCE(!grp))
130 return;
131
132 kvm_spapr_tce_release_iommu_group(kvm, grp);
133 iommu_group_put(grp);
134}
135#endif
136
92/* 137/*
93 * Groups can use the same or different IOMMU domains. If the same then 138 * Groups can use the same or different IOMMU domains. If the same then
94 * adding a new group may change the coherency of groups we've previously 139 * adding a new group may change the coherency of groups we've previously
@@ -211,6 +256,9 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
211 256
212 mutex_unlock(&kv->lock); 257 mutex_unlock(&kv->lock);
213 258
259#ifdef CONFIG_SPAPR_TCE_IOMMU
260 kvm_spapr_tce_release_vfio_group(dev->kvm, vfio_group);
261#endif
214 kvm_vfio_group_set_kvm(vfio_group, NULL); 262 kvm_vfio_group_set_kvm(vfio_group, NULL);
215 263
216 kvm_vfio_group_put_external_user(vfio_group); 264 kvm_vfio_group_put_external_user(vfio_group);
@@ -218,6 +266,57 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
218 kvm_vfio_update_coherency(dev); 266 kvm_vfio_update_coherency(dev);
219 267
220 return ret; 268 return ret;
269
270#ifdef CONFIG_SPAPR_TCE_IOMMU
271 case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: {
272 struct kvm_vfio_spapr_tce param;
273 struct kvm_vfio *kv = dev->private;
274 struct vfio_group *vfio_group;
275 struct kvm_vfio_group *kvg;
276 struct fd f;
277 struct iommu_group *grp;
278
279 if (copy_from_user(&param, (void __user *)arg,
280 sizeof(struct kvm_vfio_spapr_tce)))
281 return -EFAULT;
282
283 f = fdget(param.groupfd);
284 if (!f.file)
285 return -EBADF;
286
287 vfio_group = kvm_vfio_group_get_external_user(f.file);
288 fdput(f);
289
290 if (IS_ERR(vfio_group))
291 return PTR_ERR(vfio_group);
292
293 grp = kvm_vfio_group_get_iommu_group(vfio_group);
294 if (WARN_ON_ONCE(!grp)) {
295 kvm_vfio_group_put_external_user(vfio_group);
296 return -EIO;
297 }
298
299 ret = -ENOENT;
300
301 mutex_lock(&kv->lock);
302
303 list_for_each_entry(kvg, &kv->group_list, node) {
304 if (kvg->vfio_group != vfio_group)
305 continue;
306
307 ret = kvm_spapr_tce_attach_iommu_group(dev->kvm,
308 param.tablefd, grp);
309 break;
310 }
311
312 mutex_unlock(&kv->lock);
313
314 iommu_group_put(grp);
315 kvm_vfio_group_put_external_user(vfio_group);
316
317 return ret;
318 }
319#endif /* CONFIG_SPAPR_TCE_IOMMU */
221 } 320 }
222 321
223 return -ENXIO; 322 return -ENXIO;
@@ -242,6 +341,9 @@ static int kvm_vfio_has_attr(struct kvm_device *dev,
242 switch (attr->attr) { 341 switch (attr->attr) {
243 case KVM_DEV_VFIO_GROUP_ADD: 342 case KVM_DEV_VFIO_GROUP_ADD:
244 case KVM_DEV_VFIO_GROUP_DEL: 343 case KVM_DEV_VFIO_GROUP_DEL:
344#ifdef CONFIG_SPAPR_TCE_IOMMU
345 case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE:
346#endif
245 return 0; 347 return 0;
246 } 348 }
247 349
@@ -257,6 +359,9 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
257 struct kvm_vfio_group *kvg, *tmp; 359 struct kvm_vfio_group *kvg, *tmp;
258 360
259 list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) { 361 list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) {
362#ifdef CONFIG_SPAPR_TCE_IOMMU
363 kvm_spapr_tce_release_vfio_group(dev->kvm, kvg->vfio_group);
364#endif
260 kvm_vfio_group_set_kvm(kvg->vfio_group, NULL); 365 kvm_vfio_group_set_kvm(kvg->vfio_group, NULL);
261 kvm_vfio_group_put_external_user(kvg->vfio_group); 366 kvm_vfio_group_put_external_user(kvg->vfio_group);
262 list_del(&kvg->node); 367 list_del(&kvg->node);