diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-24 12:36:49 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-24 12:36:49 -0400 |
commit | 4e241557fc1cb560bd9e77ca1b4a9352732a5427 (patch) | |
tree | da4dbe5e5b3a8792daf9ed7e6bd320c56c86d252 | |
parent | 08d183e3c1f650b4db1d07d764502116861542fa (diff) | |
parent | f2ae45edbca7ba5324eef01719ede0151dc5cead (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull first batch of KVM updates from Paolo Bonzini:
"The bulk of the changes here is for x86. And for once it's not for
silicon that no one owns: these are really new features for everyone.
Details:
- ARM:
several features are in progress but missed the 4.2 deadline.
So here is just a smattering of bug fixes, plus enabling the
VFIO integration.
- s390:
Some fixes/refactorings/optimizations, plus support for 2GB
pages.
- x86:
* host and guest support for marking kvmclock as a stable
scheduler clock.
* support for write combining.
* support for system management mode, needed for secure boot in
guests.
* a bunch of cleanups required for the above
* support for virtualized performance counters on AMD
* legacy PCI device assignment is deprecated and defaults to "n"
in Kconfig; VFIO replaces it
On top of this there are also bug fixes and eager FPU context
loading for FPU-heavy guests.
- Common code:
Support for multiple address spaces; for now it is used only for
x86 SMM but the s390 folks also have plans"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (124 commits)
KVM: s390: clear floating interrupt bitmap and parameters
KVM: x86/vPMU: Enable PMU handling for AMD PERFCTRn and EVNTSELn MSRs
KVM: x86/vPMU: Implement AMD vPMU code for KVM
KVM: x86/vPMU: Define kvm_pmu_ops to support vPMU function dispatch
KVM: x86/vPMU: introduce kvm_pmu_msr_idx_to_pmc
KVM: x86/vPMU: reorder PMU functions
KVM: x86/vPMU: whitespace and stylistic adjustments in PMU code
KVM: x86/vPMU: use the new macros to go between PMC, PMU and VCPU
KVM: x86/vPMU: introduce pmu.h header
KVM: x86/vPMU: rename a few PMU functions
KVM: MTRR: do not map huge page for non-consistent range
KVM: MTRR: simplify kvm_mtrr_get_guest_memory_type
KVM: MTRR: introduce mtrr_for_each_mem_type
KVM: MTRR: introduce fixed_mtrr_addr_* functions
KVM: MTRR: sort variable MTRRs
KVM: MTRR: introduce var_mtrr_range
KVM: MTRR: introduce fixed_mtrr_segment table
KVM: MTRR: improve kvm_mtrr_get_guest_memory_type
KVM: MTRR: do not split 64 bits MSR content
KVM: MTRR: clean up mtrr default type
...
72 files changed, 4077 insertions, 1702 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 9fa2bf8c3f6f..a7926a90156f 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
@@ -254,6 +254,11 @@ since the last call to this ioctl. Bit 0 is the first page in the | |||
254 | memory slot. Ensure the entire structure is cleared to avoid padding | 254 | memory slot. Ensure the entire structure is cleared to avoid padding |
255 | issues. | 255 | issues. |
256 | 256 | ||
257 | If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 specifies | ||
258 | the address space for which you want to return the dirty bitmap. | ||
259 | They must be less than the value that KVM_CHECK_EXTENSION returns for | ||
260 | the KVM_CAP_MULTI_ADDRESS_SPACE capability. | ||
261 | |||
257 | 262 | ||
258 | 4.9 KVM_SET_MEMORY_ALIAS | 263 | 4.9 KVM_SET_MEMORY_ALIAS |
259 | 264 | ||
@@ -820,11 +825,21 @@ struct kvm_vcpu_events { | |||
820 | } nmi; | 825 | } nmi; |
821 | __u32 sipi_vector; | 826 | __u32 sipi_vector; |
822 | __u32 flags; | 827 | __u32 flags; |
828 | struct { | ||
829 | __u8 smm; | ||
830 | __u8 pending; | ||
831 | __u8 smm_inside_nmi; | ||
832 | __u8 latched_init; | ||
833 | } smi; | ||
823 | }; | 834 | }; |
824 | 835 | ||
825 | KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that | 836 | Only two fields are defined in the flags field: |
826 | interrupt.shadow contains a valid state. Otherwise, this field is undefined. | 837 | |
838 | - KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that | ||
839 | interrupt.shadow contains a valid state. | ||
827 | 840 | ||
841 | - KVM_VCPUEVENT_VALID_SMM may be set in the flags field to signal that | ||
842 | smi contains a valid state. | ||
828 | 843 | ||
829 | 4.32 KVM_SET_VCPU_EVENTS | 844 | 4.32 KVM_SET_VCPU_EVENTS |
830 | 845 | ||
@@ -841,17 +856,20 @@ vcpu. | |||
841 | See KVM_GET_VCPU_EVENTS for the data structure. | 856 | See KVM_GET_VCPU_EVENTS for the data structure. |
842 | 857 | ||
843 | Fields that may be modified asynchronously by running VCPUs can be excluded | 858 | Fields that may be modified asynchronously by running VCPUs can be excluded |
844 | from the update. These fields are nmi.pending and sipi_vector. Keep the | 859 | from the update. These fields are nmi.pending, sipi_vector, smi.smm, |
845 | corresponding bits in the flags field cleared to suppress overwriting the | 860 | smi.pending. Keep the corresponding bits in the flags field cleared to |
846 | current in-kernel state. The bits are: | 861 | suppress overwriting the current in-kernel state. The bits are: |
847 | 862 | ||
848 | KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel | 863 | KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel |
849 | KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector | 864 | KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector |
865 | KVM_VCPUEVENT_VALID_SMM - transfer the smi sub-struct. | ||
850 | 866 | ||
851 | If KVM_CAP_INTR_SHADOW is available, KVM_VCPUEVENT_VALID_SHADOW can be set in | 867 | If KVM_CAP_INTR_SHADOW is available, KVM_VCPUEVENT_VALID_SHADOW can be set in |
852 | the flags field to signal that interrupt.shadow contains a valid state and | 868 | the flags field to signal that interrupt.shadow contains a valid state and |
853 | shall be written into the VCPU. | 869 | shall be written into the VCPU. |
854 | 870 | ||
871 | KVM_VCPUEVENT_VALID_SMM can only be set if KVM_CAP_X86_SMM is available. | ||
872 | |||
855 | 873 | ||
856 | 4.33 KVM_GET_DEBUGREGS | 874 | 4.33 KVM_GET_DEBUGREGS |
857 | 875 | ||
@@ -911,6 +929,13 @@ slot. When changing an existing slot, it may be moved in the guest | |||
911 | physical memory space, or its flags may be modified. It may not be | 929 | physical memory space, or its flags may be modified. It may not be |
912 | resized. Slots may not overlap in guest physical address space. | 930 | resized. Slots may not overlap in guest physical address space. |
913 | 931 | ||
932 | If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot" | ||
933 | specifies the address space which is being modified. They must be | ||
934 | less than the value that KVM_CHECK_EXTENSION returns for the | ||
935 | KVM_CAP_MULTI_ADDRESS_SPACE capability. Slots in separate address spaces | ||
936 | are unrelated; the restriction on overlapping slots only applies within | ||
937 | each address space. | ||
938 | |||
914 | Memory for the region is taken starting at the address denoted by the | 939 | Memory for the region is taken starting at the address denoted by the |
915 | field userspace_addr, which must point at user addressable memory for | 940 | field userspace_addr, which must point at user addressable memory for |
916 | the entire memory slot size. Any object may back this memory, including | 941 | the entire memory slot size. Any object may back this memory, including |
@@ -959,7 +984,8 @@ documentation when it pops into existence). | |||
959 | 4.37 KVM_ENABLE_CAP | 984 | 4.37 KVM_ENABLE_CAP |
960 | 985 | ||
961 | Capability: KVM_CAP_ENABLE_CAP, KVM_CAP_ENABLE_CAP_VM | 986 | Capability: KVM_CAP_ENABLE_CAP, KVM_CAP_ENABLE_CAP_VM |
962 | Architectures: ppc, s390 | 987 | Architectures: x86 (only KVM_CAP_ENABLE_CAP_VM), |
988 | mips (only KVM_CAP_ENABLE_CAP), ppc, s390 | ||
963 | Type: vcpu ioctl, vm ioctl (with KVM_CAP_ENABLE_CAP_VM) | 989 | Type: vcpu ioctl, vm ioctl (with KVM_CAP_ENABLE_CAP_VM) |
964 | Parameters: struct kvm_enable_cap (in) | 990 | Parameters: struct kvm_enable_cap (in) |
965 | Returns: 0 on success; -1 on error | 991 | Returns: 0 on success; -1 on error |
@@ -1268,7 +1294,7 @@ The flags bitmap is defined as: | |||
1268 | /* the host supports the ePAPR idle hcall | 1294 | /* the host supports the ePAPR idle hcall |
1269 | #define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) | 1295 | #define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) |
1270 | 1296 | ||
1271 | 4.48 KVM_ASSIGN_PCI_DEVICE | 1297 | 4.48 KVM_ASSIGN_PCI_DEVICE (deprecated) |
1272 | 1298 | ||
1273 | Capability: none | 1299 | Capability: none |
1274 | Architectures: x86 | 1300 | Architectures: x86 |
@@ -1318,7 +1344,7 @@ Errors: | |||
1318 | have their standard meanings. | 1344 | have their standard meanings. |
1319 | 1345 | ||
1320 | 1346 | ||
1321 | 4.49 KVM_DEASSIGN_PCI_DEVICE | 1347 | 4.49 KVM_DEASSIGN_PCI_DEVICE (deprecated) |
1322 | 1348 | ||
1323 | Capability: none | 1349 | Capability: none |
1324 | Architectures: x86 | 1350 | Architectures: x86 |
@@ -1337,7 +1363,7 @@ Errors: | |||
1337 | Other error conditions may be defined by individual device types or | 1363 | Other error conditions may be defined by individual device types or |
1338 | have their standard meanings. | 1364 | have their standard meanings. |
1339 | 1365 | ||
1340 | 4.50 KVM_ASSIGN_DEV_IRQ | 1366 | 4.50 KVM_ASSIGN_DEV_IRQ (deprecated) |
1341 | 1367 | ||
1342 | Capability: KVM_CAP_ASSIGN_DEV_IRQ | 1368 | Capability: KVM_CAP_ASSIGN_DEV_IRQ |
1343 | Architectures: x86 | 1369 | Architectures: x86 |
@@ -1377,7 +1403,7 @@ Errors: | |||
1377 | have their standard meanings. | 1403 | have their standard meanings. |
1378 | 1404 | ||
1379 | 1405 | ||
1380 | 4.51 KVM_DEASSIGN_DEV_IRQ | 1406 | 4.51 KVM_DEASSIGN_DEV_IRQ (deprecated) |
1381 | 1407 | ||
1382 | Capability: KVM_CAP_ASSIGN_DEV_IRQ | 1408 | Capability: KVM_CAP_ASSIGN_DEV_IRQ |
1383 | Architectures: x86 | 1409 | Architectures: x86 |
@@ -1451,7 +1477,7 @@ struct kvm_irq_routing_s390_adapter { | |||
1451 | }; | 1477 | }; |
1452 | 1478 | ||
1453 | 1479 | ||
1454 | 4.53 KVM_ASSIGN_SET_MSIX_NR | 1480 | 4.53 KVM_ASSIGN_SET_MSIX_NR (deprecated) |
1455 | 1481 | ||
1456 | Capability: none | 1482 | Capability: none |
1457 | Architectures: x86 | 1483 | Architectures: x86 |
@@ -1473,7 +1499,7 @@ struct kvm_assigned_msix_nr { | |||
1473 | #define KVM_MAX_MSIX_PER_DEV 256 | 1499 | #define KVM_MAX_MSIX_PER_DEV 256 |
1474 | 1500 | ||
1475 | 1501 | ||
1476 | 4.54 KVM_ASSIGN_SET_MSIX_ENTRY | 1502 | 4.54 KVM_ASSIGN_SET_MSIX_ENTRY (deprecated) |
1477 | 1503 | ||
1478 | Capability: none | 1504 | Capability: none |
1479 | Architectures: x86 | 1505 | Architectures: x86 |
@@ -1629,7 +1655,7 @@ should skip processing the bitmap and just invalidate everything. It must | |||
1629 | be set to the number of set bits in the bitmap. | 1655 | be set to the number of set bits in the bitmap. |
1630 | 1656 | ||
1631 | 1657 | ||
1632 | 4.61 KVM_ASSIGN_SET_INTX_MASK | 1658 | 4.61 KVM_ASSIGN_SET_INTX_MASK (deprecated) |
1633 | 1659 | ||
1634 | Capability: KVM_CAP_PCI_2_3 | 1660 | Capability: KVM_CAP_PCI_2_3 |
1635 | Architectures: x86 | 1661 | Architectures: x86 |
@@ -2978,6 +3004,16 @@ len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0 | |||
2978 | and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq), | 3004 | and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq), |
2979 | which is the maximum number of possibly pending cpu-local interrupts. | 3005 | which is the maximum number of possibly pending cpu-local interrupts. |
2980 | 3006 | ||
3007 | 4.90 KVM_SMI | ||
3008 | |||
3009 | Capability: KVM_CAP_X86_SMM | ||
3010 | Architectures: x86 | ||
3011 | Type: vcpu ioctl | ||
3012 | Parameters: none | ||
3013 | Returns: 0 on success, -1 on error | ||
3014 | |||
3015 | Queues an SMI on the thread's vcpu. | ||
3016 | |||
2981 | 5. The kvm_run structure | 3017 | 5. The kvm_run structure |
2982 | ------------------------ | 3018 | ------------------------ |
2983 | 3019 | ||
@@ -3013,7 +3049,12 @@ an interrupt can be injected now with KVM_INTERRUPT. | |||
3013 | The value of the current interrupt flag. Only valid if in-kernel | 3049 | The value of the current interrupt flag. Only valid if in-kernel |
3014 | local APIC is not used. | 3050 | local APIC is not used. |
3015 | 3051 | ||
3016 | __u8 padding2[2]; | 3052 | __u16 flags; |
3053 | |||
3054 | More architecture-specific flags detailing state of the VCPU that may | ||
3055 | affect the device's behavior. The only currently defined flag is | ||
3056 | KVM_RUN_X86_SMM, which is valid on x86 machines and is set if the | ||
3057 | VCPU is in system management mode. | ||
3017 | 3058 | ||
3018 | /* in (pre_kvm_run), out (post_kvm_run) */ | 3059 | /* in (pre_kvm_run), out (post_kvm_run) */ |
3019 | __u64 cr8; | 3060 | __u64 cr8; |
diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt index c59bd9bc41ef..3a4d681c3e98 100644 --- a/Documentation/virtual/kvm/mmu.txt +++ b/Documentation/virtual/kvm/mmu.txt | |||
@@ -173,6 +173,12 @@ Shadow pages contain the following information: | |||
173 | Contains the value of cr4.smap && !cr0.wp for which the page is valid | 173 | Contains the value of cr4.smap && !cr0.wp for which the page is valid |
174 | (pages for which this is true are different from other pages; see the | 174 | (pages for which this is true are different from other pages; see the |
175 | treatment of cr0.wp=0 below). | 175 | treatment of cr0.wp=0 below). |
176 | role.smm: | ||
177 | Is 1 if the page is valid in system management mode. This field | ||
178 | determines which of the kvm_memslots array was used to build this | ||
179 | shadow page; it is also used to go back from a struct kvm_mmu_page | ||
180 | to a memslot, through the kvm_memslots_for_spte_role macro and | ||
181 | __gfn_to_memslot. | ||
176 | gfn: | 182 | gfn: |
177 | Either the guest page table containing the translations shadowed by this | 183 | Either the guest page table containing the translations shadowed by this |
178 | page, or the base page frame for linear translations. See role.direct. | 184 | page, or the base page frame for linear translations. See role.direct. |
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index f1f79d104309..bfb915d05665 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig | |||
@@ -28,6 +28,7 @@ config KVM | |||
28 | select KVM_GENERIC_DIRTYLOG_READ_PROTECT | 28 | select KVM_GENERIC_DIRTYLOG_READ_PROTECT |
29 | select SRCU | 29 | select SRCU |
30 | select MMU_NOTIFIER | 30 | select MMU_NOTIFIER |
31 | select KVM_VFIO | ||
31 | select HAVE_KVM_EVENTFD | 32 | select HAVE_KVM_EVENTFD |
32 | select HAVE_KVM_IRQFD | 33 | select HAVE_KVM_IRQFD |
33 | depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER | 34 | depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER |
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 139e46c08b6e..c5eef02c52ba 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile | |||
@@ -15,7 +15,7 @@ AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt) | |||
15 | AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) | 15 | AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) |
16 | 16 | ||
17 | KVM := ../../../virt/kvm | 17 | KVM := ../../../virt/kvm |
18 | kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o | 18 | kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o |
19 | 19 | ||
20 | obj-y += kvm-arm.o init.o interrupts.o | 20 | obj-y += kvm-arm.o init.o interrupts.o |
21 | obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o | 21 | obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o |
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index d9631ecddd56..bc738d2b8392 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c | |||
@@ -171,7 +171,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
171 | int r; | 171 | int r; |
172 | switch (ext) { | 172 | switch (ext) { |
173 | case KVM_CAP_IRQCHIP: | 173 | case KVM_CAP_IRQCHIP: |
174 | case KVM_CAP_IRQFD: | ||
175 | case KVM_CAP_IOEVENTFD: | 174 | case KVM_CAP_IOEVENTFD: |
176 | case KVM_CAP_DEVICE_CTRL: | 175 | case KVM_CAP_DEVICE_CTRL: |
177 | case KVM_CAP_USER_MEMORY: | 176 | case KVM_CAP_USER_MEMORY: |
@@ -532,6 +531,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
532 | kvm_vgic_flush_hwstate(vcpu); | 531 | kvm_vgic_flush_hwstate(vcpu); |
533 | kvm_timer_flush_hwstate(vcpu); | 532 | kvm_timer_flush_hwstate(vcpu); |
534 | 533 | ||
534 | preempt_disable(); | ||
535 | local_irq_disable(); | 535 | local_irq_disable(); |
536 | 536 | ||
537 | /* | 537 | /* |
@@ -544,6 +544,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
544 | 544 | ||
545 | if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) { | 545 | if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) { |
546 | local_irq_enable(); | 546 | local_irq_enable(); |
547 | preempt_enable(); | ||
547 | kvm_timer_sync_hwstate(vcpu); | 548 | kvm_timer_sync_hwstate(vcpu); |
548 | kvm_vgic_sync_hwstate(vcpu); | 549 | kvm_vgic_sync_hwstate(vcpu); |
549 | continue; | 550 | continue; |
@@ -553,14 +554,16 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
553 | * Enter the guest | 554 | * Enter the guest |
554 | */ | 555 | */ |
555 | trace_kvm_entry(*vcpu_pc(vcpu)); | 556 | trace_kvm_entry(*vcpu_pc(vcpu)); |
556 | kvm_guest_enter(); | 557 | __kvm_guest_enter(); |
557 | vcpu->mode = IN_GUEST_MODE; | 558 | vcpu->mode = IN_GUEST_MODE; |
558 | 559 | ||
559 | ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); | 560 | ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); |
560 | 561 | ||
561 | vcpu->mode = OUTSIDE_GUEST_MODE; | 562 | vcpu->mode = OUTSIDE_GUEST_MODE; |
562 | kvm_guest_exit(); | 563 | /* |
563 | trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); | 564 | * Back from guest |
565 | *************************************************************/ | ||
566 | |||
564 | /* | 567 | /* |
565 | * We may have taken a host interrupt in HYP mode (ie | 568 | * We may have taken a host interrupt in HYP mode (ie |
566 | * while executing the guest). This interrupt is still | 569 | * while executing the guest). This interrupt is still |
@@ -574,8 +577,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
574 | local_irq_enable(); | 577 | local_irq_enable(); |
575 | 578 | ||
576 | /* | 579 | /* |
577 | * Back from guest | 580 | * We do local_irq_enable() before calling kvm_guest_exit() so |
578 | *************************************************************/ | 581 | * that if a timer interrupt hits while running the guest we |
582 | * account that tick as being spent in the guest. We enable | ||
583 | * preemption after calling kvm_guest_exit() so that if we get | ||
584 | * preempted we make sure ticks after that is not counted as | ||
585 | * guest time. | ||
586 | */ | ||
587 | kvm_guest_exit(); | ||
588 | trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); | ||
589 | preempt_enable(); | ||
590 | |||
579 | 591 | ||
580 | kvm_timer_sync_hwstate(vcpu); | 592 | kvm_timer_sync_hwstate(vcpu); |
581 | kvm_vgic_sync_hwstate(vcpu); | 593 | kvm_vgic_sync_hwstate(vcpu); |
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 79caf79b304a..f7db3a5d80e3 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S | |||
@@ -170,13 +170,9 @@ __kvm_vcpu_return: | |||
170 | @ Don't trap coprocessor accesses for host kernel | 170 | @ Don't trap coprocessor accesses for host kernel |
171 | set_hstr vmexit | 171 | set_hstr vmexit |
172 | set_hdcr vmexit | 172 | set_hdcr vmexit |
173 | set_hcptr vmexit, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)) | 173 | set_hcptr vmexit, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)), after_vfp_restore |
174 | 174 | ||
175 | #ifdef CONFIG_VFPv3 | 175 | #ifdef CONFIG_VFPv3 |
176 | @ Save floating point registers we if let guest use them. | ||
177 | tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11)) | ||
178 | bne after_vfp_restore | ||
179 | |||
180 | @ Switch VFP/NEON hardware state to the host's | 176 | @ Switch VFP/NEON hardware state to the host's |
181 | add r7, vcpu, #VCPU_VFP_GUEST | 177 | add r7, vcpu, #VCPU_VFP_GUEST |
182 | store_vfp_state r7 | 178 | store_vfp_state r7 |
@@ -188,6 +184,8 @@ after_vfp_restore: | |||
188 | @ Restore FPEXC_EN which we clobbered on entry | 184 | @ Restore FPEXC_EN which we clobbered on entry |
189 | pop {r2} | 185 | pop {r2} |
190 | VFPFMXR FPEXC, r2 | 186 | VFPFMXR FPEXC, r2 |
187 | #else | ||
188 | after_vfp_restore: | ||
191 | #endif | 189 | #endif |
192 | 190 | ||
193 | @ Reset Hyp-role | 191 | @ Reset Hyp-role |
@@ -483,7 +481,7 @@ switch_to_guest_vfp: | |||
483 | push {r3-r7} | 481 | push {r3-r7} |
484 | 482 | ||
485 | @ NEON/VFP used. Turn on VFP access. | 483 | @ NEON/VFP used. Turn on VFP access. |
486 | set_hcptr vmexit, (HCPTR_TCP(10) | HCPTR_TCP(11)) | 484 | set_hcptr vmtrap, (HCPTR_TCP(10) | HCPTR_TCP(11)) |
487 | 485 | ||
488 | @ Switch VFP/NEON hardware state to the guest's | 486 | @ Switch VFP/NEON hardware state to the guest's |
489 | add r7, r0, #VCPU_VFP_HOST | 487 | add r7, r0, #VCPU_VFP_HOST |
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 35e4a3a0c476..702740d37465 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S | |||
@@ -412,7 +412,6 @@ vcpu .req r0 @ vcpu pointer always in r0 | |||
412 | add r11, vcpu, #VCPU_VGIC_CPU | 412 | add r11, vcpu, #VCPU_VGIC_CPU |
413 | 413 | ||
414 | /* Save all interesting registers */ | 414 | /* Save all interesting registers */ |
415 | ldr r3, [r2, #GICH_HCR] | ||
416 | ldr r4, [r2, #GICH_VMCR] | 415 | ldr r4, [r2, #GICH_VMCR] |
417 | ldr r5, [r2, #GICH_MISR] | 416 | ldr r5, [r2, #GICH_MISR] |
418 | ldr r6, [r2, #GICH_EISR0] | 417 | ldr r6, [r2, #GICH_EISR0] |
@@ -420,7 +419,6 @@ vcpu .req r0 @ vcpu pointer always in r0 | |||
420 | ldr r8, [r2, #GICH_ELRSR0] | 419 | ldr r8, [r2, #GICH_ELRSR0] |
421 | ldr r9, [r2, #GICH_ELRSR1] | 420 | ldr r9, [r2, #GICH_ELRSR1] |
422 | ldr r10, [r2, #GICH_APR] | 421 | ldr r10, [r2, #GICH_APR] |
423 | ARM_BE8(rev r3, r3 ) | ||
424 | ARM_BE8(rev r4, r4 ) | 422 | ARM_BE8(rev r4, r4 ) |
425 | ARM_BE8(rev r5, r5 ) | 423 | ARM_BE8(rev r5, r5 ) |
426 | ARM_BE8(rev r6, r6 ) | 424 | ARM_BE8(rev r6, r6 ) |
@@ -429,7 +427,6 @@ ARM_BE8(rev r8, r8 ) | |||
429 | ARM_BE8(rev r9, r9 ) | 427 | ARM_BE8(rev r9, r9 ) |
430 | ARM_BE8(rev r10, r10 ) | 428 | ARM_BE8(rev r10, r10 ) |
431 | 429 | ||
432 | str r3, [r11, #VGIC_V2_CPU_HCR] | ||
433 | str r4, [r11, #VGIC_V2_CPU_VMCR] | 430 | str r4, [r11, #VGIC_V2_CPU_VMCR] |
434 | str r5, [r11, #VGIC_V2_CPU_MISR] | 431 | str r5, [r11, #VGIC_V2_CPU_MISR] |
435 | #ifdef CONFIG_CPU_ENDIAN_BE8 | 432 | #ifdef CONFIG_CPU_ENDIAN_BE8 |
@@ -591,8 +588,13 @@ ARM_BE8(rev r6, r6 ) | |||
591 | .endm | 588 | .endm |
592 | 589 | ||
593 | /* Configures the HCPTR (Hyp Coprocessor Trap Register) on entry/return | 590 | /* Configures the HCPTR (Hyp Coprocessor Trap Register) on entry/return |
594 | * (hardware reset value is 0). Keep previous value in r2. */ | 591 | * (hardware reset value is 0). Keep previous value in r2. |
595 | .macro set_hcptr operation, mask | 592 | * An ISB is emited on vmexit/vmtrap, but executed on vmexit only if |
593 | * VFP wasn't already enabled (always executed on vmtrap). | ||
594 | * If a label is specified with vmexit, it is branched to if VFP wasn't | ||
595 | * enabled. | ||
596 | */ | ||
597 | .macro set_hcptr operation, mask, label = none | ||
596 | mrc p15, 4, r2, c1, c1, 2 | 598 | mrc p15, 4, r2, c1, c1, 2 |
597 | ldr r3, =\mask | 599 | ldr r3, =\mask |
598 | .if \operation == vmentry | 600 | .if \operation == vmentry |
@@ -601,6 +603,17 @@ ARM_BE8(rev r6, r6 ) | |||
601 | bic r3, r2, r3 @ Don't trap defined coproc-accesses | 603 | bic r3, r2, r3 @ Don't trap defined coproc-accesses |
602 | .endif | 604 | .endif |
603 | mcr p15, 4, r3, c1, c1, 2 | 605 | mcr p15, 4, r3, c1, c1, 2 |
606 | .if \operation != vmentry | ||
607 | .if \operation == vmexit | ||
608 | tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11)) | ||
609 | beq 1f | ||
610 | .endif | ||
611 | isb | ||
612 | .if \label != none | ||
613 | b \label | ||
614 | .endif | ||
615 | 1: | ||
616 | .endif | ||
604 | .endm | 617 | .endm |
605 | 618 | ||
606 | /* Configures the HDCR (Hyp Debug Configuration Register) on entry/return | 619 | /* Configures the HDCR (Hyp Debug Configuration Register) on entry/return |
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 1d5accbd3dcf..7b4201294187 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c | |||
@@ -691,8 +691,8 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) | |||
691 | * work. This is not used by the hardware and we have no | 691 | * work. This is not used by the hardware and we have no |
692 | * alignment requirement for this allocation. | 692 | * alignment requirement for this allocation. |
693 | */ | 693 | */ |
694 | pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t), | 694 | pgd = kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t), |
695 | GFP_KERNEL | __GFP_ZERO); | 695 | GFP_KERNEL | __GFP_ZERO); |
696 | 696 | ||
697 | if (!pgd) { | 697 | if (!pgd) { |
698 | kvm_free_hwpgd(hwpgd); | 698 | kvm_free_hwpgd(hwpgd); |
@@ -1155,7 +1155,8 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) | |||
1155 | */ | 1155 | */ |
1156 | void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) | 1156 | void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) |
1157 | { | 1157 | { |
1158 | struct kvm_memory_slot *memslot = id_to_memslot(kvm->memslots, slot); | 1158 | struct kvm_memslots *slots = kvm_memslots(kvm); |
1159 | struct kvm_memory_slot *memslot = id_to_memslot(slots, slot); | ||
1159 | phys_addr_t start = memslot->base_gfn << PAGE_SHIFT; | 1160 | phys_addr_t start = memslot->base_gfn << PAGE_SHIFT; |
1160 | phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; | 1161 | phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; |
1161 | 1162 | ||
@@ -1718,8 +1719,9 @@ out: | |||
1718 | } | 1719 | } |
1719 | 1720 | ||
1720 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 1721 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
1721 | struct kvm_userspace_memory_region *mem, | 1722 | const struct kvm_userspace_memory_region *mem, |
1722 | const struct kvm_memory_slot *old, | 1723 | const struct kvm_memory_slot *old, |
1724 | const struct kvm_memory_slot *new, | ||
1723 | enum kvm_mr_change change) | 1725 | enum kvm_mr_change change) |
1724 | { | 1726 | { |
1725 | /* | 1727 | /* |
@@ -1733,7 +1735,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
1733 | 1735 | ||
1734 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 1736 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
1735 | struct kvm_memory_slot *memslot, | 1737 | struct kvm_memory_slot *memslot, |
1736 | struct kvm_userspace_memory_region *mem, | 1738 | const struct kvm_userspace_memory_region *mem, |
1737 | enum kvm_mr_change change) | 1739 | enum kvm_mr_change change) |
1738 | { | 1740 | { |
1739 | hva_t hva = mem->userspace_addr; | 1741 | hva_t hva = mem->userspace_addr; |
@@ -1838,7 +1840,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, | |||
1838 | return 0; | 1840 | return 0; |
1839 | } | 1841 | } |
1840 | 1842 | ||
1841 | void kvm_arch_memslots_updated(struct kvm *kvm) | 1843 | void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) |
1842 | { | 1844 | { |
1843 | } | 1845 | } |
1844 | 1846 | ||
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 02fa8eff6ae1..531e922486b2 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c | |||
@@ -230,10 +230,6 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) | |||
230 | case PSCI_0_2_FN64_AFFINITY_INFO: | 230 | case PSCI_0_2_FN64_AFFINITY_INFO: |
231 | val = kvm_psci_vcpu_affinity_info(vcpu); | 231 | val = kvm_psci_vcpu_affinity_info(vcpu); |
232 | break; | 232 | break; |
233 | case PSCI_0_2_FN_MIGRATE: | ||
234 | case PSCI_0_2_FN64_MIGRATE: | ||
235 | val = PSCI_RET_NOT_SUPPORTED; | ||
236 | break; | ||
237 | case PSCI_0_2_FN_MIGRATE_INFO_TYPE: | 233 | case PSCI_0_2_FN_MIGRATE_INFO_TYPE: |
238 | /* | 234 | /* |
239 | * Trusted OS is MP hence does not require migration | 235 | * Trusted OS is MP hence does not require migration |
@@ -242,10 +238,6 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) | |||
242 | */ | 238 | */ |
243 | val = PSCI_0_2_TOS_MP; | 239 | val = PSCI_0_2_TOS_MP; |
244 | break; | 240 | break; |
245 | case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU: | ||
246 | case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU: | ||
247 | val = PSCI_RET_NOT_SUPPORTED; | ||
248 | break; | ||
249 | case PSCI_0_2_FN_SYSTEM_OFF: | 241 | case PSCI_0_2_FN_SYSTEM_OFF: |
250 | kvm_psci_system_off(vcpu); | 242 | kvm_psci_system_off(vcpu); |
251 | /* | 243 | /* |
@@ -271,7 +263,8 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) | |||
271 | ret = 0; | 263 | ret = 0; |
272 | break; | 264 | break; |
273 | default: | 265 | default: |
274 | return -EINVAL; | 266 | val = PSCI_RET_NOT_SUPPORTED; |
267 | break; | ||
275 | } | 268 | } |
276 | 269 | ||
277 | *vcpu_reg(vcpu, 0) = val; | 270 | *vcpu_reg(vcpu, 0) = val; |
@@ -291,12 +284,9 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) | |||
291 | case KVM_PSCI_FN_CPU_ON: | 284 | case KVM_PSCI_FN_CPU_ON: |
292 | val = kvm_psci_vcpu_on(vcpu); | 285 | val = kvm_psci_vcpu_on(vcpu); |
293 | break; | 286 | break; |
294 | case KVM_PSCI_FN_CPU_SUSPEND: | 287 | default: |
295 | case KVM_PSCI_FN_MIGRATE: | ||
296 | val = PSCI_RET_NOT_SUPPORTED; | 288 | val = PSCI_RET_NOT_SUPPORTED; |
297 | break; | 289 | break; |
298 | default: | ||
299 | return -EINVAL; | ||
300 | } | 290 | } |
301 | 291 | ||
302 | *vcpu_reg(vcpu, 0) = val; | 292 | *vcpu_reg(vcpu, 0) = val; |
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 5105e297ed5f..bfffe8f4bd53 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig | |||
@@ -28,6 +28,7 @@ config KVM | |||
28 | select KVM_ARM_HOST | 28 | select KVM_ARM_HOST |
29 | select KVM_GENERIC_DIRTYLOG_READ_PROTECT | 29 | select KVM_GENERIC_DIRTYLOG_READ_PROTECT |
30 | select SRCU | 30 | select SRCU |
31 | select KVM_VFIO | ||
31 | select HAVE_KVM_EVENTFD | 32 | select HAVE_KVM_EVENTFD |
32 | select HAVE_KVM_IRQFD | 33 | select HAVE_KVM_IRQFD |
33 | ---help--- | 34 | ---help--- |
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index d5904f876cdb..f90f4aa7f88d 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile | |||
@@ -11,7 +11,7 @@ ARM=../../../arch/arm/kvm | |||
11 | 11 | ||
12 | obj-$(CONFIG_KVM_ARM_HOST) += kvm.o | 12 | obj-$(CONFIG_KVM_ARM_HOST) += kvm.o |
13 | 13 | ||
14 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o | 14 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o |
15 | kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o | 15 | kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o |
16 | kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o | 16 | kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o |
17 | 17 | ||
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 5befd010e232..519805f71876 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S | |||
@@ -50,8 +50,8 @@ | |||
50 | stp x29, lr, [x3, #80] | 50 | stp x29, lr, [x3, #80] |
51 | 51 | ||
52 | mrs x19, sp_el0 | 52 | mrs x19, sp_el0 |
53 | mrs x20, elr_el2 // EL1 PC | 53 | mrs x20, elr_el2 // pc before entering el2 |
54 | mrs x21, spsr_el2 // EL1 pstate | 54 | mrs x21, spsr_el2 // pstate before entering el2 |
55 | 55 | ||
56 | stp x19, x20, [x3, #96] | 56 | stp x19, x20, [x3, #96] |
57 | str x21, [x3, #112] | 57 | str x21, [x3, #112] |
@@ -82,8 +82,8 @@ | |||
82 | ldr x21, [x3, #16] | 82 | ldr x21, [x3, #16] |
83 | 83 | ||
84 | msr sp_el0, x19 | 84 | msr sp_el0, x19 |
85 | msr elr_el2, x20 // EL1 PC | 85 | msr elr_el2, x20 // pc on return from el2 |
86 | msr spsr_el2, x21 // EL1 pstate | 86 | msr spsr_el2, x21 // pstate on return from el2 |
87 | 87 | ||
88 | add x3, x2, #CPU_XREG_OFFSET(19) | 88 | add x3, x2, #CPU_XREG_OFFSET(19) |
89 | ldp x19, x20, [x3] | 89 | ldp x19, x20, [x3] |
diff --git a/arch/arm64/kvm/vgic-v2-switch.S b/arch/arm64/kvm/vgic-v2-switch.S index f002fe1c3700..3f000712a85d 100644 --- a/arch/arm64/kvm/vgic-v2-switch.S +++ b/arch/arm64/kvm/vgic-v2-switch.S | |||
@@ -47,7 +47,6 @@ __save_vgic_v2_state: | |||
47 | add x3, x0, #VCPU_VGIC_CPU | 47 | add x3, x0, #VCPU_VGIC_CPU |
48 | 48 | ||
49 | /* Save all interesting registers */ | 49 | /* Save all interesting registers */ |
50 | ldr w4, [x2, #GICH_HCR] | ||
51 | ldr w5, [x2, #GICH_VMCR] | 50 | ldr w5, [x2, #GICH_VMCR] |
52 | ldr w6, [x2, #GICH_MISR] | 51 | ldr w6, [x2, #GICH_MISR] |
53 | ldr w7, [x2, #GICH_EISR0] | 52 | ldr w7, [x2, #GICH_EISR0] |
@@ -55,7 +54,6 @@ __save_vgic_v2_state: | |||
55 | ldr w9, [x2, #GICH_ELRSR0] | 54 | ldr w9, [x2, #GICH_ELRSR0] |
56 | ldr w10, [x2, #GICH_ELRSR1] | 55 | ldr w10, [x2, #GICH_ELRSR1] |
57 | ldr w11, [x2, #GICH_APR] | 56 | ldr w11, [x2, #GICH_APR] |
58 | CPU_BE( rev w4, w4 ) | ||
59 | CPU_BE( rev w5, w5 ) | 57 | CPU_BE( rev w5, w5 ) |
60 | CPU_BE( rev w6, w6 ) | 58 | CPU_BE( rev w6, w6 ) |
61 | CPU_BE( rev w7, w7 ) | 59 | CPU_BE( rev w7, w7 ) |
@@ -64,7 +62,6 @@ CPU_BE( rev w9, w9 ) | |||
64 | CPU_BE( rev w10, w10 ) | 62 | CPU_BE( rev w10, w10 ) |
65 | CPU_BE( rev w11, w11 ) | 63 | CPU_BE( rev w11, w11 ) |
66 | 64 | ||
67 | str w4, [x3, #VGIC_V2_CPU_HCR] | ||
68 | str w5, [x3, #VGIC_V2_CPU_VMCR] | 65 | str w5, [x3, #VGIC_V2_CPU_VMCR] |
69 | str w6, [x3, #VGIC_V2_CPU_MISR] | 66 | str w6, [x3, #VGIC_V2_CPU_MISR] |
70 | CPU_LE( str w7, [x3, #VGIC_V2_CPU_EISR] ) | 67 | CPU_LE( str w7, [x3, #VGIC_V2_CPU_EISR] ) |
diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S index 617a012a0107..3c20730ddff5 100644 --- a/arch/arm64/kvm/vgic-v3-switch.S +++ b/arch/arm64/kvm/vgic-v3-switch.S | |||
@@ -48,13 +48,11 @@ | |||
48 | dsb st | 48 | dsb st |
49 | 49 | ||
50 | // Save all interesting registers | 50 | // Save all interesting registers |
51 | mrs_s x4, ICH_HCR_EL2 | ||
52 | mrs_s x5, ICH_VMCR_EL2 | 51 | mrs_s x5, ICH_VMCR_EL2 |
53 | mrs_s x6, ICH_MISR_EL2 | 52 | mrs_s x6, ICH_MISR_EL2 |
54 | mrs_s x7, ICH_EISR_EL2 | 53 | mrs_s x7, ICH_EISR_EL2 |
55 | mrs_s x8, ICH_ELSR_EL2 | 54 | mrs_s x8, ICH_ELSR_EL2 |
56 | 55 | ||
57 | str w4, [x3, #VGIC_V3_CPU_HCR] | ||
58 | str w5, [x3, #VGIC_V3_CPU_VMCR] | 56 | str w5, [x3, #VGIC_V3_CPU_VMCR] |
59 | str w6, [x3, #VGIC_V3_CPU_MISR] | 57 | str w6, [x3, #VGIC_V3_CPU_MISR] |
60 | str w7, [x3, #VGIC_V3_CPU_EISR] | 58 | str w7, [x3, #VGIC_V3_CPU_EISR] |
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 4c25823563fe..e8c8d9d0c45f 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h | |||
@@ -839,7 +839,7 @@ static inline void kvm_arch_hardware_unsetup(void) {} | |||
839 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} | 839 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} |
840 | static inline void kvm_arch_free_memslot(struct kvm *kvm, | 840 | static inline void kvm_arch_free_memslot(struct kvm *kvm, |
841 | struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} | 841 | struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} |
842 | static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} | 842 | static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} |
843 | static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} | 843 | static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} |
844 | static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, | 844 | static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, |
845 | struct kvm_memory_slot *slot) {} | 845 | struct kvm_memory_slot *slot) {} |
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index bb68e8d520e8..cd4c129ce743 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c | |||
@@ -198,15 +198,16 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, | |||
198 | 198 | ||
199 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 199 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
200 | struct kvm_memory_slot *memslot, | 200 | struct kvm_memory_slot *memslot, |
201 | struct kvm_userspace_memory_region *mem, | 201 | const struct kvm_userspace_memory_region *mem, |
202 | enum kvm_mr_change change) | 202 | enum kvm_mr_change change) |
203 | { | 203 | { |
204 | return 0; | 204 | return 0; |
205 | } | 205 | } |
206 | 206 | ||
207 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 207 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
208 | struct kvm_userspace_memory_region *mem, | 208 | const struct kvm_userspace_memory_region *mem, |
209 | const struct kvm_memory_slot *old, | 209 | const struct kvm_memory_slot *old, |
210 | const struct kvm_memory_slot *new, | ||
210 | enum kvm_mr_change change) | 211 | enum kvm_mr_change change) |
211 | { | 212 | { |
212 | unsigned long npages = 0; | 213 | unsigned long npages = 0; |
@@ -393,7 +394,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
393 | kvm_mips_deliver_interrupts(vcpu, | 394 | kvm_mips_deliver_interrupts(vcpu, |
394 | kvm_read_c0_guest_cause(vcpu->arch.cop0)); | 395 | kvm_read_c0_guest_cause(vcpu->arch.cop0)); |
395 | 396 | ||
396 | kvm_guest_enter(); | 397 | __kvm_guest_enter(); |
397 | 398 | ||
398 | /* Disable hardware page table walking while in guest */ | 399 | /* Disable hardware page table walking while in guest */ |
399 | htw_stop(); | 400 | htw_stop(); |
@@ -403,7 +404,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
403 | /* Re-enable HTW before enabling interrupts */ | 404 | /* Re-enable HTW before enabling interrupts */ |
404 | htw_start(); | 405 | htw_start(); |
405 | 406 | ||
406 | kvm_guest_exit(); | 407 | __kvm_guest_exit(); |
407 | local_irq_enable(); | 408 | local_irq_enable(); |
408 | 409 | ||
409 | if (vcpu->sigset_active) | 410 | if (vcpu->sigset_active) |
@@ -968,6 +969,7 @@ out: | |||
968 | /* Get (and clear) the dirty memory log for a memory slot. */ | 969 | /* Get (and clear) the dirty memory log for a memory slot. */ |
969 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) | 970 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) |
970 | { | 971 | { |
972 | struct kvm_memslots *slots; | ||
971 | struct kvm_memory_slot *memslot; | 973 | struct kvm_memory_slot *memslot; |
972 | unsigned long ga, ga_end; | 974 | unsigned long ga, ga_end; |
973 | int is_dirty = 0; | 975 | int is_dirty = 0; |
@@ -982,7 +984,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) | |||
982 | 984 | ||
983 | /* If nothing is dirty, don't bother messing with page tables. */ | 985 | /* If nothing is dirty, don't bother messing with page tables. */ |
984 | if (is_dirty) { | 986 | if (is_dirty) { |
985 | memslot = &kvm->memslots->memslots[log->slot]; | 987 | slots = kvm_memslots(kvm); |
988 | memslot = id_to_memslot(slots, log->slot); | ||
986 | 989 | ||
987 | ga = memslot->base_gfn << PAGE_SHIFT; | 990 | ga = memslot->base_gfn << PAGE_SHIFT; |
988 | ga_end = ga + (memslot->npages << PAGE_SHIFT); | 991 | ga_end = ga + (memslot->npages << PAGE_SHIFT); |
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 3536d12eb798..2aa79c864e91 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h | |||
@@ -430,7 +430,7 @@ static inline void note_hpte_modification(struct kvm *kvm, | |||
430 | */ | 430 | */ |
431 | static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm) | 431 | static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm) |
432 | { | 432 | { |
433 | return rcu_dereference_raw_notrace(kvm->memslots); | 433 | return rcu_dereference_raw_notrace(kvm->memslots[0]); |
434 | } | 434 | } |
435 | 435 | ||
436 | extern void kvmppc_mmu_debugfs_init(struct kvm *kvm); | 436 | extern void kvmppc_mmu_debugfs_init(struct kvm *kvm); |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index a193a13cf08b..d91f65b28e32 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -698,7 +698,7 @@ struct kvm_vcpu_arch { | |||
698 | static inline void kvm_arch_hardware_disable(void) {} | 698 | static inline void kvm_arch_hardware_disable(void) {} |
699 | static inline void kvm_arch_hardware_unsetup(void) {} | 699 | static inline void kvm_arch_hardware_unsetup(void) {} |
700 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} | 700 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} |
701 | static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} | 701 | static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} |
702 | static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} | 702 | static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} |
703 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} | 703 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} |
704 | static inline void kvm_arch_exit(void) {} | 704 | static inline void kvm_arch_exit(void) {} |
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index b8475daad884..c6ef05bd0765 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h | |||
@@ -182,10 +182,11 @@ extern int kvmppc_core_create_memslot(struct kvm *kvm, | |||
182 | unsigned long npages); | 182 | unsigned long npages); |
183 | extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, | 183 | extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, |
184 | struct kvm_memory_slot *memslot, | 184 | struct kvm_memory_slot *memslot, |
185 | struct kvm_userspace_memory_region *mem); | 185 | const struct kvm_userspace_memory_region *mem); |
186 | extern void kvmppc_core_commit_memory_region(struct kvm *kvm, | 186 | extern void kvmppc_core_commit_memory_region(struct kvm *kvm, |
187 | struct kvm_userspace_memory_region *mem, | 187 | const struct kvm_userspace_memory_region *mem, |
188 | const struct kvm_memory_slot *old); | 188 | const struct kvm_memory_slot *old, |
189 | const struct kvm_memory_slot *new); | ||
189 | extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, | 190 | extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, |
190 | struct kvm_ppc_smmu_info *info); | 191 | struct kvm_ppc_smmu_info *info); |
191 | extern void kvmppc_core_flush_memslot(struct kvm *kvm, | 192 | extern void kvmppc_core_flush_memslot(struct kvm *kvm, |
@@ -243,10 +244,11 @@ struct kvmppc_ops { | |||
243 | void (*flush_memslot)(struct kvm *kvm, struct kvm_memory_slot *memslot); | 244 | void (*flush_memslot)(struct kvm *kvm, struct kvm_memory_slot *memslot); |
244 | int (*prepare_memory_region)(struct kvm *kvm, | 245 | int (*prepare_memory_region)(struct kvm *kvm, |
245 | struct kvm_memory_slot *memslot, | 246 | struct kvm_memory_slot *memslot, |
246 | struct kvm_userspace_memory_region *mem); | 247 | const struct kvm_userspace_memory_region *mem); |
247 | void (*commit_memory_region)(struct kvm *kvm, | 248 | void (*commit_memory_region)(struct kvm *kvm, |
248 | struct kvm_userspace_memory_region *mem, | 249 | const struct kvm_userspace_memory_region *mem, |
249 | const struct kvm_memory_slot *old); | 250 | const struct kvm_memory_slot *old, |
251 | const struct kvm_memory_slot *new); | ||
250 | int (*unmap_hva)(struct kvm *kvm, unsigned long hva); | 252 | int (*unmap_hva)(struct kvm *kvm, unsigned long hva); |
251 | int (*unmap_hva_range)(struct kvm *kvm, unsigned long start, | 253 | int (*unmap_hva_range)(struct kvm *kvm, unsigned long start, |
252 | unsigned long end); | 254 | unsigned long end); |
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 453a8a47a467..05ea8fc7f829 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c | |||
@@ -757,16 +757,17 @@ void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) | |||
757 | 757 | ||
758 | int kvmppc_core_prepare_memory_region(struct kvm *kvm, | 758 | int kvmppc_core_prepare_memory_region(struct kvm *kvm, |
759 | struct kvm_memory_slot *memslot, | 759 | struct kvm_memory_slot *memslot, |
760 | struct kvm_userspace_memory_region *mem) | 760 | const struct kvm_userspace_memory_region *mem) |
761 | { | 761 | { |
762 | return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem); | 762 | return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem); |
763 | } | 763 | } |
764 | 764 | ||
765 | void kvmppc_core_commit_memory_region(struct kvm *kvm, | 765 | void kvmppc_core_commit_memory_region(struct kvm *kvm, |
766 | struct kvm_userspace_memory_region *mem, | 766 | const struct kvm_userspace_memory_region *mem, |
767 | const struct kvm_memory_slot *old) | 767 | const struct kvm_memory_slot *old, |
768 | const struct kvm_memory_slot *new) | ||
768 | { | 769 | { |
769 | kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old); | 770 | kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new); |
770 | } | 771 | } |
771 | 772 | ||
772 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | 773 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 1a4acf8bf4f4..dab68b7af3f2 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
@@ -650,7 +650,7 @@ static void kvmppc_rmap_reset(struct kvm *kvm) | |||
650 | int srcu_idx; | 650 | int srcu_idx; |
651 | 651 | ||
652 | srcu_idx = srcu_read_lock(&kvm->srcu); | 652 | srcu_idx = srcu_read_lock(&kvm->srcu); |
653 | slots = kvm->memslots; | 653 | slots = kvm_memslots(kvm); |
654 | kvm_for_each_memslot(memslot, slots) { | 654 | kvm_for_each_memslot(memslot, slots) { |
655 | /* | 655 | /* |
656 | * This assumes it is acceptable to lose reference and | 656 | * This assumes it is acceptable to lose reference and |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index df81caab7383..68d067ad4222 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
@@ -2321,6 +2321,7 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm, | |||
2321 | static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm, | 2321 | static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm, |
2322 | struct kvm_dirty_log *log) | 2322 | struct kvm_dirty_log *log) |
2323 | { | 2323 | { |
2324 | struct kvm_memslots *slots; | ||
2324 | struct kvm_memory_slot *memslot; | 2325 | struct kvm_memory_slot *memslot; |
2325 | int r; | 2326 | int r; |
2326 | unsigned long n; | 2327 | unsigned long n; |
@@ -2331,7 +2332,8 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm, | |||
2331 | if (log->slot >= KVM_USER_MEM_SLOTS) | 2332 | if (log->slot >= KVM_USER_MEM_SLOTS) |
2332 | goto out; | 2333 | goto out; |
2333 | 2334 | ||
2334 | memslot = id_to_memslot(kvm->memslots, log->slot); | 2335 | slots = kvm_memslots(kvm); |
2336 | memslot = id_to_memslot(slots, log->slot); | ||
2335 | r = -ENOENT; | 2337 | r = -ENOENT; |
2336 | if (!memslot->dirty_bitmap) | 2338 | if (!memslot->dirty_bitmap) |
2337 | goto out; | 2339 | goto out; |
@@ -2374,16 +2376,18 @@ static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot, | |||
2374 | 2376 | ||
2375 | static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm, | 2377 | static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm, |
2376 | struct kvm_memory_slot *memslot, | 2378 | struct kvm_memory_slot *memslot, |
2377 | struct kvm_userspace_memory_region *mem) | 2379 | const struct kvm_userspace_memory_region *mem) |
2378 | { | 2380 | { |
2379 | return 0; | 2381 | return 0; |
2380 | } | 2382 | } |
2381 | 2383 | ||
2382 | static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, | 2384 | static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, |
2383 | struct kvm_userspace_memory_region *mem, | 2385 | const struct kvm_userspace_memory_region *mem, |
2384 | const struct kvm_memory_slot *old) | 2386 | const struct kvm_memory_slot *old, |
2387 | const struct kvm_memory_slot *new) | ||
2385 | { | 2388 | { |
2386 | unsigned long npages = mem->memory_size >> PAGE_SHIFT; | 2389 | unsigned long npages = mem->memory_size >> PAGE_SHIFT; |
2390 | struct kvm_memslots *slots; | ||
2387 | struct kvm_memory_slot *memslot; | 2391 | struct kvm_memory_slot *memslot; |
2388 | 2392 | ||
2389 | if (npages && old->npages) { | 2393 | if (npages && old->npages) { |
@@ -2393,7 +2397,8 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, | |||
2393 | * since the rmap array starts out as all zeroes, | 2397 | * since the rmap array starts out as all zeroes, |
2394 | * i.e. no pages are dirty. | 2398 | * i.e. no pages are dirty. |
2395 | */ | 2399 | */ |
2396 | memslot = id_to_memslot(kvm->memslots, mem->slot); | 2400 | slots = kvm_memslots(kvm); |
2401 | memslot = id_to_memslot(slots, mem->slot); | ||
2397 | kvmppc_hv_get_dirty_log(kvm, memslot, NULL); | 2402 | kvmppc_hv_get_dirty_log(kvm, memslot, NULL); |
2398 | } | 2403 | } |
2399 | } | 2404 | } |
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index f57383941d03..64891b081ad5 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c | |||
@@ -1530,6 +1530,7 @@ out: | |||
1530 | static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm, | 1530 | static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm, |
1531 | struct kvm_dirty_log *log) | 1531 | struct kvm_dirty_log *log) |
1532 | { | 1532 | { |
1533 | struct kvm_memslots *slots; | ||
1533 | struct kvm_memory_slot *memslot; | 1534 | struct kvm_memory_slot *memslot; |
1534 | struct kvm_vcpu *vcpu; | 1535 | struct kvm_vcpu *vcpu; |
1535 | ulong ga, ga_end; | 1536 | ulong ga, ga_end; |
@@ -1545,7 +1546,8 @@ static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm, | |||
1545 | 1546 | ||
1546 | /* If nothing is dirty, don't bother messing with page tables. */ | 1547 | /* If nothing is dirty, don't bother messing with page tables. */ |
1547 | if (is_dirty) { | 1548 | if (is_dirty) { |
1548 | memslot = id_to_memslot(kvm->memslots, log->slot); | 1549 | slots = kvm_memslots(kvm); |
1550 | memslot = id_to_memslot(slots, log->slot); | ||
1549 | 1551 | ||
1550 | ga = memslot->base_gfn << PAGE_SHIFT; | 1552 | ga = memslot->base_gfn << PAGE_SHIFT; |
1551 | ga_end = ga + (memslot->npages << PAGE_SHIFT); | 1553 | ga_end = ga + (memslot->npages << PAGE_SHIFT); |
@@ -1571,14 +1573,15 @@ static void kvmppc_core_flush_memslot_pr(struct kvm *kvm, | |||
1571 | 1573 | ||
1572 | static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm, | 1574 | static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm, |
1573 | struct kvm_memory_slot *memslot, | 1575 | struct kvm_memory_slot *memslot, |
1574 | struct kvm_userspace_memory_region *mem) | 1576 | const struct kvm_userspace_memory_region *mem) |
1575 | { | 1577 | { |
1576 | return 0; | 1578 | return 0; |
1577 | } | 1579 | } |
1578 | 1580 | ||
1579 | static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm, | 1581 | static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm, |
1580 | struct kvm_userspace_memory_region *mem, | 1582 | const struct kvm_userspace_memory_region *mem, |
1581 | const struct kvm_memory_slot *old) | 1583 | const struct kvm_memory_slot *old, |
1584 | const struct kvm_memory_slot *new) | ||
1582 | { | 1585 | { |
1583 | return; | 1586 | return; |
1584 | } | 1587 | } |
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 6c1316a15a27..cc5842657161 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c | |||
@@ -1004,10 +1004,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
1004 | break; | 1004 | break; |
1005 | } | 1005 | } |
1006 | 1006 | ||
1007 | local_irq_enable(); | ||
1008 | |||
1009 | trace_kvm_exit(exit_nr, vcpu); | 1007 | trace_kvm_exit(exit_nr, vcpu); |
1010 | kvm_guest_exit(); | 1008 | __kvm_guest_exit(); |
1009 | |||
1010 | local_irq_enable(); | ||
1011 | 1011 | ||
1012 | run->exit_reason = KVM_EXIT_UNKNOWN; | 1012 | run->exit_reason = KVM_EXIT_UNKNOWN; |
1013 | run->ready_for_interrupt_injection = 1; | 1013 | run->ready_for_interrupt_injection = 1; |
@@ -1784,14 +1784,15 @@ int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, | |||
1784 | 1784 | ||
1785 | int kvmppc_core_prepare_memory_region(struct kvm *kvm, | 1785 | int kvmppc_core_prepare_memory_region(struct kvm *kvm, |
1786 | struct kvm_memory_slot *memslot, | 1786 | struct kvm_memory_slot *memslot, |
1787 | struct kvm_userspace_memory_region *mem) | 1787 | const struct kvm_userspace_memory_region *mem) |
1788 | { | 1788 | { |
1789 | return 0; | 1789 | return 0; |
1790 | } | 1790 | } |
1791 | 1791 | ||
1792 | void kvmppc_core_commit_memory_region(struct kvm *kvm, | 1792 | void kvmppc_core_commit_memory_region(struct kvm *kvm, |
1793 | struct kvm_userspace_memory_region *mem, | 1793 | const struct kvm_userspace_memory_region *mem, |
1794 | const struct kvm_memory_slot *old) | 1794 | const struct kvm_memory_slot *old, |
1795 | const struct kvm_memory_slot *new) | ||
1795 | { | 1796 | { |
1796 | } | 1797 | } |
1797 | 1798 | ||
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index ac3ddf115f3d..e5dde32fe71f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -115,7 +115,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) | |||
115 | continue; | 115 | continue; |
116 | } | 116 | } |
117 | 117 | ||
118 | kvm_guest_enter(); | 118 | __kvm_guest_enter(); |
119 | return 1; | 119 | return 1; |
120 | } | 120 | } |
121 | 121 | ||
@@ -595,18 +595,19 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, | |||
595 | 595 | ||
596 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 596 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
597 | struct kvm_memory_slot *memslot, | 597 | struct kvm_memory_slot *memslot, |
598 | struct kvm_userspace_memory_region *mem, | 598 | const struct kvm_userspace_memory_region *mem, |
599 | enum kvm_mr_change change) | 599 | enum kvm_mr_change change) |
600 | { | 600 | { |
601 | return kvmppc_core_prepare_memory_region(kvm, memslot, mem); | 601 | return kvmppc_core_prepare_memory_region(kvm, memslot, mem); |
602 | } | 602 | } |
603 | 603 | ||
604 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 604 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
605 | struct kvm_userspace_memory_region *mem, | 605 | const struct kvm_userspace_memory_region *mem, |
606 | const struct kvm_memory_slot *old, | 606 | const struct kvm_memory_slot *old, |
607 | const struct kvm_memory_slot *new, | ||
607 | enum kvm_mr_change change) | 608 | enum kvm_mr_change change) |
608 | { | 609 | { |
609 | kvmppc_core_commit_memory_region(kvm, mem, old); | 610 | kvmppc_core_commit_memory_region(kvm, mem, old, new); |
610 | } | 611 | } |
611 | 612 | ||
612 | void kvm_arch_flush_shadow_memslot(struct kvm *kvm, | 613 | void kvm_arch_flush_shadow_memslot(struct kvm *kvm, |
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index d01fc588b5c3..3024acbe1f9d 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h | |||
@@ -80,6 +80,7 @@ struct sca_block { | |||
80 | #define CPUSTAT_MCDS 0x00000100 | 80 | #define CPUSTAT_MCDS 0x00000100 |
81 | #define CPUSTAT_SM 0x00000080 | 81 | #define CPUSTAT_SM 0x00000080 |
82 | #define CPUSTAT_IBS 0x00000040 | 82 | #define CPUSTAT_IBS 0x00000040 |
83 | #define CPUSTAT_GED2 0x00000010 | ||
83 | #define CPUSTAT_G 0x00000008 | 84 | #define CPUSTAT_G 0x00000008 |
84 | #define CPUSTAT_GED 0x00000004 | 85 | #define CPUSTAT_GED 0x00000004 |
85 | #define CPUSTAT_J 0x00000002 | 86 | #define CPUSTAT_J 0x00000002 |
@@ -95,7 +96,8 @@ struct kvm_s390_sie_block { | |||
95 | #define PROG_IN_SIE (1<<0) | 96 | #define PROG_IN_SIE (1<<0) |
96 | __u32 prog0c; /* 0x000c */ | 97 | __u32 prog0c; /* 0x000c */ |
97 | __u8 reserved10[16]; /* 0x0010 */ | 98 | __u8 reserved10[16]; /* 0x0010 */ |
98 | #define PROG_BLOCK_SIE 0x00000001 | 99 | #define PROG_BLOCK_SIE (1<<0) |
100 | #define PROG_REQUEST (1<<1) | ||
99 | atomic_t prog20; /* 0x0020 */ | 101 | atomic_t prog20; /* 0x0020 */ |
100 | __u8 reserved24[4]; /* 0x0024 */ | 102 | __u8 reserved24[4]; /* 0x0024 */ |
101 | __u64 cputm; /* 0x0028 */ | 103 | __u64 cputm; /* 0x0028 */ |
@@ -634,7 +636,7 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} | |||
634 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} | 636 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} |
635 | static inline void kvm_arch_free_memslot(struct kvm *kvm, | 637 | static inline void kvm_arch_free_memslot(struct kvm *kvm, |
636 | struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} | 638 | struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} |
637 | static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} | 639 | static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} |
638 | static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} | 640 | static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} |
639 | static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, | 641 | static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, |
640 | struct kvm_memory_slot *slot) {} | 642 | struct kvm_memory_slot *slot) {} |
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 99b44acbfcc7..3238893c9d4f 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S | |||
@@ -1005,7 +1005,7 @@ ENTRY(sie64a) | |||
1005 | .Lsie_gmap: | 1005 | .Lsie_gmap: |
1006 | lg %r14,__SF_EMPTY(%r15) # get control block pointer | 1006 | lg %r14,__SF_EMPTY(%r15) # get control block pointer |
1007 | oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now | 1007 | oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now |
1008 | tm __SIE_PROG20+3(%r14),1 # last exit... | 1008 | tm __SIE_PROG20+3(%r14),3 # last exit... |
1009 | jnz .Lsie_done | 1009 | jnz .Lsie_done |
1010 | LPP __SF_EMPTY(%r15) # set guest id | 1010 | LPP __SF_EMPTY(%r15) # set guest id |
1011 | sie 0(%r14) | 1011 | sie 0(%r14) |
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 9e3779e3e496..7365e8a46032 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c | |||
@@ -241,21 +241,6 @@ static int handle_prog(struct kvm_vcpu *vcpu) | |||
241 | return kvm_s390_inject_prog_irq(vcpu, &pgm_info); | 241 | return kvm_s390_inject_prog_irq(vcpu, &pgm_info); |
242 | } | 242 | } |
243 | 243 | ||
244 | static int handle_instruction_and_prog(struct kvm_vcpu *vcpu) | ||
245 | { | ||
246 | int rc, rc2; | ||
247 | |||
248 | vcpu->stat.exit_instr_and_program++; | ||
249 | rc = handle_instruction(vcpu); | ||
250 | rc2 = handle_prog(vcpu); | ||
251 | |||
252 | if (rc == -EOPNOTSUPP) | ||
253 | vcpu->arch.sie_block->icptcode = 0x04; | ||
254 | if (rc) | ||
255 | return rc; | ||
256 | return rc2; | ||
257 | } | ||
258 | |||
259 | /** | 244 | /** |
260 | * handle_external_interrupt - used for external interruption interceptions | 245 | * handle_external_interrupt - used for external interruption interceptions |
261 | * | 246 | * |
@@ -355,7 +340,6 @@ static const intercept_handler_t intercept_funcs[] = { | |||
355 | [0x00 >> 2] = handle_noop, | 340 | [0x00 >> 2] = handle_noop, |
356 | [0x04 >> 2] = handle_instruction, | 341 | [0x04 >> 2] = handle_instruction, |
357 | [0x08 >> 2] = handle_prog, | 342 | [0x08 >> 2] = handle_prog, |
358 | [0x0C >> 2] = handle_instruction_and_prog, | ||
359 | [0x10 >> 2] = handle_noop, | 343 | [0x10 >> 2] = handle_noop, |
360 | [0x14 >> 2] = handle_external_interrupt, | 344 | [0x14 >> 2] = handle_external_interrupt, |
361 | [0x18 >> 2] = handle_noop, | 345 | [0x18 >> 2] = handle_noop, |
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 0d3deef6edff..c98d89708e99 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c | |||
@@ -134,6 +134,8 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu) | |||
134 | 134 | ||
135 | active_mask = pending_local_irqs(vcpu); | 135 | active_mask = pending_local_irqs(vcpu); |
136 | active_mask |= pending_floating_irqs(vcpu); | 136 | active_mask |= pending_floating_irqs(vcpu); |
137 | if (!active_mask) | ||
138 | return 0; | ||
137 | 139 | ||
138 | if (psw_extint_disabled(vcpu)) | 140 | if (psw_extint_disabled(vcpu)) |
139 | active_mask &= ~IRQ_PEND_EXT_MASK; | 141 | active_mask &= ~IRQ_PEND_EXT_MASK; |
@@ -941,12 +943,9 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) | |||
941 | if (cpu_timer_irq_pending(vcpu)) | 943 | if (cpu_timer_irq_pending(vcpu)) |
942 | set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs); | 944 | set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs); |
943 | 945 | ||
944 | do { | 946 | while ((irqs = deliverable_irqs(vcpu)) && !rc) { |
945 | irqs = deliverable_irqs(vcpu); | ||
946 | /* bits are in the order of interrupt priority */ | 947 | /* bits are in the order of interrupt priority */ |
947 | irq_type = find_first_bit(&irqs, IRQ_PEND_COUNT); | 948 | irq_type = find_first_bit(&irqs, IRQ_PEND_COUNT); |
948 | if (irq_type == IRQ_PEND_COUNT) | ||
949 | break; | ||
950 | if (is_ioirq(irq_type)) { | 949 | if (is_ioirq(irq_type)) { |
951 | rc = __deliver_io(vcpu, irq_type); | 950 | rc = __deliver_io(vcpu, irq_type); |
952 | } else { | 951 | } else { |
@@ -958,9 +957,7 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) | |||
958 | } | 957 | } |
959 | rc = func(vcpu); | 958 | rc = func(vcpu); |
960 | } | 959 | } |
961 | if (rc) | 960 | } |
962 | break; | ||
963 | } while (!rc); | ||
964 | 961 | ||
965 | set_intercept_indicators(vcpu); | 962 | set_intercept_indicators(vcpu); |
966 | 963 | ||
@@ -1061,7 +1058,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) | |||
1061 | if (sclp.has_sigpif) | 1058 | if (sclp.has_sigpif) |
1062 | return __inject_extcall_sigpif(vcpu, src_id); | 1059 | return __inject_extcall_sigpif(vcpu, src_id); |
1063 | 1060 | ||
1064 | if (!test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs)) | 1061 | if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs)) |
1065 | return -EBUSY; | 1062 | return -EBUSY; |
1066 | *extcall = irq->u.extcall; | 1063 | *extcall = irq->u.extcall; |
1067 | atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); | 1064 | atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); |
@@ -1340,12 +1337,54 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) | |||
1340 | return 0; | 1337 | return 0; |
1341 | } | 1338 | } |
1342 | 1339 | ||
1343 | static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) | 1340 | /* |
1341 | * Find a destination VCPU for a floating irq and kick it. | ||
1342 | */ | ||
1343 | static void __floating_irq_kick(struct kvm *kvm, u64 type) | ||
1344 | { | 1344 | { |
1345 | struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; | ||
1345 | struct kvm_s390_local_interrupt *li; | 1346 | struct kvm_s390_local_interrupt *li; |
1347 | struct kvm_vcpu *dst_vcpu; | ||
1348 | int sigcpu, online_vcpus, nr_tries = 0; | ||
1349 | |||
1350 | online_vcpus = atomic_read(&kvm->online_vcpus); | ||
1351 | if (!online_vcpus) | ||
1352 | return; | ||
1353 | |||
1354 | /* find idle VCPUs first, then round robin */ | ||
1355 | sigcpu = find_first_bit(fi->idle_mask, online_vcpus); | ||
1356 | if (sigcpu == online_vcpus) { | ||
1357 | do { | ||
1358 | sigcpu = fi->next_rr_cpu; | ||
1359 | fi->next_rr_cpu = (fi->next_rr_cpu + 1) % online_vcpus; | ||
1360 | /* avoid endless loops if all vcpus are stopped */ | ||
1361 | if (nr_tries++ >= online_vcpus) | ||
1362 | return; | ||
1363 | } while (is_vcpu_stopped(kvm_get_vcpu(kvm, sigcpu))); | ||
1364 | } | ||
1365 | dst_vcpu = kvm_get_vcpu(kvm, sigcpu); | ||
1366 | |||
1367 | /* make the VCPU drop out of the SIE, or wake it up if sleeping */ | ||
1368 | li = &dst_vcpu->arch.local_int; | ||
1369 | spin_lock(&li->lock); | ||
1370 | switch (type) { | ||
1371 | case KVM_S390_MCHK: | ||
1372 | atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); | ||
1373 | break; | ||
1374 | case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: | ||
1375 | atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags); | ||
1376 | break; | ||
1377 | default: | ||
1378 | atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); | ||
1379 | break; | ||
1380 | } | ||
1381 | spin_unlock(&li->lock); | ||
1382 | kvm_s390_vcpu_wakeup(dst_vcpu); | ||
1383 | } | ||
1384 | |||
1385 | static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) | ||
1386 | { | ||
1346 | struct kvm_s390_float_interrupt *fi; | 1387 | struct kvm_s390_float_interrupt *fi; |
1347 | struct kvm_vcpu *dst_vcpu = NULL; | ||
1348 | int sigcpu; | ||
1349 | u64 type = READ_ONCE(inti->type); | 1388 | u64 type = READ_ONCE(inti->type); |
1350 | int rc; | 1389 | int rc; |
1351 | 1390 | ||
@@ -1373,32 +1412,8 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) | |||
1373 | if (rc) | 1412 | if (rc) |
1374 | return rc; | 1413 | return rc; |
1375 | 1414 | ||
1376 | sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); | 1415 | __floating_irq_kick(kvm, type); |
1377 | if (sigcpu == KVM_MAX_VCPUS) { | ||
1378 | do { | ||
1379 | sigcpu = fi->next_rr_cpu++; | ||
1380 | if (sigcpu == KVM_MAX_VCPUS) | ||
1381 | sigcpu = fi->next_rr_cpu = 0; | ||
1382 | } while (kvm_get_vcpu(kvm, sigcpu) == NULL); | ||
1383 | } | ||
1384 | dst_vcpu = kvm_get_vcpu(kvm, sigcpu); | ||
1385 | li = &dst_vcpu->arch.local_int; | ||
1386 | spin_lock(&li->lock); | ||
1387 | switch (type) { | ||
1388 | case KVM_S390_MCHK: | ||
1389 | atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); | ||
1390 | break; | ||
1391 | case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: | ||
1392 | atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags); | ||
1393 | break; | ||
1394 | default: | ||
1395 | atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); | ||
1396 | break; | ||
1397 | } | ||
1398 | spin_unlock(&li->lock); | ||
1399 | kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu)); | ||
1400 | return 0; | 1416 | return 0; |
1401 | |||
1402 | } | 1417 | } |
1403 | 1418 | ||
1404 | int kvm_s390_inject_vm(struct kvm *kvm, | 1419 | int kvm_s390_inject_vm(struct kvm *kvm, |
@@ -1606,6 +1621,9 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm) | |||
1606 | int i; | 1621 | int i; |
1607 | 1622 | ||
1608 | spin_lock(&fi->lock); | 1623 | spin_lock(&fi->lock); |
1624 | fi->pending_irqs = 0; | ||
1625 | memset(&fi->srv_signal, 0, sizeof(fi->srv_signal)); | ||
1626 | memset(&fi->mchk, 0, sizeof(fi->mchk)); | ||
1609 | for (i = 0; i < FIRQ_LIST_COUNT; i++) | 1627 | for (i = 0; i < FIRQ_LIST_COUNT; i++) |
1610 | clear_irq_list(&fi->lists[i]); | 1628 | clear_irq_list(&fi->lists[i]); |
1611 | for (i = 0; i < FIRQ_MAX_COUNT; i++) | 1629 | for (i = 0; i < FIRQ_MAX_COUNT; i++) |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c4e81b26c1b0..2078f92d15ac 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
@@ -36,6 +36,10 @@ | |||
36 | #include "kvm-s390.h" | 36 | #include "kvm-s390.h" |
37 | #include "gaccess.h" | 37 | #include "gaccess.h" |
38 | 38 | ||
39 | #define KMSG_COMPONENT "kvm-s390" | ||
40 | #undef pr_fmt | ||
41 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | ||
42 | |||
39 | #define CREATE_TRACE_POINTS | 43 | #define CREATE_TRACE_POINTS |
40 | #include "trace.h" | 44 | #include "trace.h" |
41 | #include "trace-s390.h" | 45 | #include "trace-s390.h" |
@@ -110,7 +114,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
110 | /* upper facilities limit for kvm */ | 114 | /* upper facilities limit for kvm */ |
111 | unsigned long kvm_s390_fac_list_mask[] = { | 115 | unsigned long kvm_s390_fac_list_mask[] = { |
112 | 0xffe6fffbfcfdfc40UL, | 116 | 0xffe6fffbfcfdfc40UL, |
113 | 0x005c800000000000UL, | 117 | 0x005e800000000000UL, |
114 | }; | 118 | }; |
115 | 119 | ||
116 | unsigned long kvm_s390_fac_list_mask_size(void) | 120 | unsigned long kvm_s390_fac_list_mask_size(void) |
@@ -236,6 +240,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
236 | { | 240 | { |
237 | int r; | 241 | int r; |
238 | unsigned long n; | 242 | unsigned long n; |
243 | struct kvm_memslots *slots; | ||
239 | struct kvm_memory_slot *memslot; | 244 | struct kvm_memory_slot *memslot; |
240 | int is_dirty = 0; | 245 | int is_dirty = 0; |
241 | 246 | ||
@@ -245,7 +250,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
245 | if (log->slot >= KVM_USER_MEM_SLOTS) | 250 | if (log->slot >= KVM_USER_MEM_SLOTS) |
246 | goto out; | 251 | goto out; |
247 | 252 | ||
248 | memslot = id_to_memslot(kvm->memslots, log->slot); | 253 | slots = kvm_memslots(kvm); |
254 | memslot = id_to_memslot(slots, log->slot); | ||
249 | r = -ENOENT; | 255 | r = -ENOENT; |
250 | if (!memslot->dirty_bitmap) | 256 | if (!memslot->dirty_bitmap) |
251 | goto out; | 257 | goto out; |
@@ -454,10 +460,10 @@ static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) | |||
454 | 460 | ||
455 | mutex_lock(&kvm->lock); | 461 | mutex_lock(&kvm->lock); |
456 | kvm->arch.epoch = gtod - host_tod; | 462 | kvm->arch.epoch = gtod - host_tod; |
457 | kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm) { | 463 | kvm_s390_vcpu_block_all(kvm); |
464 | kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm) | ||
458 | cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch; | 465 | cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch; |
459 | exit_sie(cur_vcpu); | 466 | kvm_s390_vcpu_unblock_all(kvm); |
460 | } | ||
461 | mutex_unlock(&kvm->lock); | 467 | mutex_unlock(&kvm->lock); |
462 | return 0; | 468 | return 0; |
463 | } | 469 | } |
@@ -1311,8 +1317,13 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
1311 | 1317 | ||
1312 | atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | | 1318 | atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | |
1313 | CPUSTAT_SM | | 1319 | CPUSTAT_SM | |
1314 | CPUSTAT_STOPPED | | 1320 | CPUSTAT_STOPPED); |
1315 | CPUSTAT_GED); | 1321 | |
1322 | if (test_kvm_facility(vcpu->kvm, 78)) | ||
1323 | atomic_set_mask(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags); | ||
1324 | else if (test_kvm_facility(vcpu->kvm, 8)) | ||
1325 | atomic_set_mask(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags); | ||
1326 | |||
1316 | kvm_s390_vcpu_setup_model(vcpu); | 1327 | kvm_s390_vcpu_setup_model(vcpu); |
1317 | 1328 | ||
1318 | vcpu->arch.sie_block->ecb = 6; | 1329 | vcpu->arch.sie_block->ecb = 6; |
@@ -1409,16 +1420,28 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | |||
1409 | return kvm_s390_vcpu_has_irq(vcpu, 0); | 1420 | return kvm_s390_vcpu_has_irq(vcpu, 0); |
1410 | } | 1421 | } |
1411 | 1422 | ||
1412 | void s390_vcpu_block(struct kvm_vcpu *vcpu) | 1423 | void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) |
1413 | { | 1424 | { |
1414 | atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); | 1425 | atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); |
1426 | exit_sie(vcpu); | ||
1415 | } | 1427 | } |
1416 | 1428 | ||
1417 | void s390_vcpu_unblock(struct kvm_vcpu *vcpu) | 1429 | void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu) |
1418 | { | 1430 | { |
1419 | atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); | 1431 | atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); |
1420 | } | 1432 | } |
1421 | 1433 | ||
1434 | static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu) | ||
1435 | { | ||
1436 | atomic_set_mask(PROG_REQUEST, &vcpu->arch.sie_block->prog20); | ||
1437 | exit_sie(vcpu); | ||
1438 | } | ||
1439 | |||
1440 | static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) | ||
1441 | { | ||
1442 | atomic_clear_mask(PROG_REQUEST, &vcpu->arch.sie_block->prog20); | ||
1443 | } | ||
1444 | |||
1422 | /* | 1445 | /* |
1423 | * Kick a guest cpu out of SIE and wait until SIE is not running. | 1446 | * Kick a guest cpu out of SIE and wait until SIE is not running. |
1424 | * If the CPU is not running (e.g. waiting as idle) the function will | 1447 | * If the CPU is not running (e.g. waiting as idle) the function will |
@@ -1430,11 +1453,11 @@ void exit_sie(struct kvm_vcpu *vcpu) | |||
1430 | cpu_relax(); | 1453 | cpu_relax(); |
1431 | } | 1454 | } |
1432 | 1455 | ||
1433 | /* Kick a guest cpu out of SIE and prevent SIE-reentry */ | 1456 | /* Kick a guest cpu out of SIE to process a request synchronously */ |
1434 | void exit_sie_sync(struct kvm_vcpu *vcpu) | 1457 | void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu) |
1435 | { | 1458 | { |
1436 | s390_vcpu_block(vcpu); | 1459 | kvm_make_request(req, vcpu); |
1437 | exit_sie(vcpu); | 1460 | kvm_s390_vcpu_request(vcpu); |
1438 | } | 1461 | } |
1439 | 1462 | ||
1440 | static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address) | 1463 | static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address) |
@@ -1447,8 +1470,7 @@ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address) | |||
1447 | /* match against both prefix pages */ | 1470 | /* match against both prefix pages */ |
1448 | if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) { | 1471 | if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) { |
1449 | VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address); | 1472 | VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address); |
1450 | kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); | 1473 | kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu); |
1451 | exit_sie_sync(vcpu); | ||
1452 | } | 1474 | } |
1453 | } | 1475 | } |
1454 | } | 1476 | } |
@@ -1720,8 +1742,10 @@ static bool ibs_enabled(struct kvm_vcpu *vcpu) | |||
1720 | 1742 | ||
1721 | static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) | 1743 | static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) |
1722 | { | 1744 | { |
1745 | if (!vcpu->requests) | ||
1746 | return 0; | ||
1723 | retry: | 1747 | retry: |
1724 | s390_vcpu_unblock(vcpu); | 1748 | kvm_s390_vcpu_request_handled(vcpu); |
1725 | /* | 1749 | /* |
1726 | * We use MMU_RELOAD just to re-arm the ipte notifier for the | 1750 | * We use MMU_RELOAD just to re-arm the ipte notifier for the |
1727 | * guest prefix page. gmap_ipte_notify will wait on the ptl lock. | 1751 | * guest prefix page. gmap_ipte_notify will wait on the ptl lock. |
@@ -1993,12 +2017,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
1993 | * As PF_VCPU will be used in fault handler, between | 2017 | * As PF_VCPU will be used in fault handler, between |
1994 | * guest_enter and guest_exit should be no uaccess. | 2018 | * guest_enter and guest_exit should be no uaccess. |
1995 | */ | 2019 | */ |
1996 | preempt_disable(); | 2020 | local_irq_disable(); |
1997 | kvm_guest_enter(); | 2021 | __kvm_guest_enter(); |
1998 | preempt_enable(); | 2022 | local_irq_enable(); |
1999 | exit_reason = sie64a(vcpu->arch.sie_block, | 2023 | exit_reason = sie64a(vcpu->arch.sie_block, |
2000 | vcpu->run->s.regs.gprs); | 2024 | vcpu->run->s.regs.gprs); |
2001 | kvm_guest_exit(); | 2025 | local_irq_disable(); |
2026 | __kvm_guest_exit(); | ||
2027 | local_irq_enable(); | ||
2002 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 2028 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
2003 | 2029 | ||
2004 | rc = vcpu_post_run(vcpu, exit_reason); | 2030 | rc = vcpu_post_run(vcpu, exit_reason); |
@@ -2068,7 +2094,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2068 | if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { | 2094 | if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { |
2069 | kvm_s390_vcpu_start(vcpu); | 2095 | kvm_s390_vcpu_start(vcpu); |
2070 | } else if (is_vcpu_stopped(vcpu)) { | 2096 | } else if (is_vcpu_stopped(vcpu)) { |
2071 | pr_err_ratelimited("kvm-s390: can't run stopped vcpu %d\n", | 2097 | pr_err_ratelimited("can't run stopped vcpu %d\n", |
2072 | vcpu->vcpu_id); | 2098 | vcpu->vcpu_id); |
2073 | return -EINVAL; | 2099 | return -EINVAL; |
2074 | } | 2100 | } |
@@ -2206,8 +2232,7 @@ int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr) | |||
2206 | static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) | 2232 | static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) |
2207 | { | 2233 | { |
2208 | kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); | 2234 | kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); |
2209 | kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu); | 2235 | kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu); |
2210 | exit_sie_sync(vcpu); | ||
2211 | } | 2236 | } |
2212 | 2237 | ||
2213 | static void __disable_ibs_on_all_vcpus(struct kvm *kvm) | 2238 | static void __disable_ibs_on_all_vcpus(struct kvm *kvm) |
@@ -2223,8 +2248,7 @@ static void __disable_ibs_on_all_vcpus(struct kvm *kvm) | |||
2223 | static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) | 2248 | static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) |
2224 | { | 2249 | { |
2225 | kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); | 2250 | kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); |
2226 | kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu); | 2251 | kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); |
2227 | exit_sie_sync(vcpu); | ||
2228 | } | 2252 | } |
2229 | 2253 | ||
2230 | void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) | 2254 | void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) |
@@ -2563,7 +2587,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, | |||
2563 | /* Section: memory related */ | 2587 | /* Section: memory related */ |
2564 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 2588 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
2565 | struct kvm_memory_slot *memslot, | 2589 | struct kvm_memory_slot *memslot, |
2566 | struct kvm_userspace_memory_region *mem, | 2590 | const struct kvm_userspace_memory_region *mem, |
2567 | enum kvm_mr_change change) | 2591 | enum kvm_mr_change change) |
2568 | { | 2592 | { |
2569 | /* A few sanity checks. We can have memory slots which have to be | 2593 | /* A few sanity checks. We can have memory slots which have to be |
@@ -2581,8 +2605,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
2581 | } | 2605 | } |
2582 | 2606 | ||
2583 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 2607 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
2584 | struct kvm_userspace_memory_region *mem, | 2608 | const struct kvm_userspace_memory_region *mem, |
2585 | const struct kvm_memory_slot *old, | 2609 | const struct kvm_memory_slot *old, |
2610 | const struct kvm_memory_slot *new, | ||
2586 | enum kvm_mr_change change) | 2611 | enum kvm_mr_change change) |
2587 | { | 2612 | { |
2588 | int rc; | 2613 | int rc; |
@@ -2601,7 +2626,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
2601 | rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, | 2626 | rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, |
2602 | mem->guest_phys_addr, mem->memory_size); | 2627 | mem->guest_phys_addr, mem->memory_size); |
2603 | if (rc) | 2628 | if (rc) |
2604 | printk(KERN_WARNING "kvm-s390: failed to commit memory region\n"); | 2629 | pr_warn("failed to commit memory region\n"); |
2605 | return; | 2630 | return; |
2606 | } | 2631 | } |
2607 | 2632 | ||
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index ca108b90ae56..c5704786e473 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
@@ -211,10 +211,10 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); | |||
211 | int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr); | 211 | int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr); |
212 | void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu); | 212 | void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu); |
213 | void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu); | 213 | void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu); |
214 | void s390_vcpu_block(struct kvm_vcpu *vcpu); | 214 | void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu); |
215 | void s390_vcpu_unblock(struct kvm_vcpu *vcpu); | 215 | void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu); |
216 | void exit_sie(struct kvm_vcpu *vcpu); | 216 | void exit_sie(struct kvm_vcpu *vcpu); |
217 | void exit_sie_sync(struct kvm_vcpu *vcpu); | 217 | void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu); |
218 | int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu); | 218 | int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu); |
219 | void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu); | 219 | void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu); |
220 | /* is cmma enabled */ | 220 | /* is cmma enabled */ |
@@ -228,6 +228,25 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); | |||
228 | int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu, | 228 | int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu, |
229 | struct kvm_s390_pgm_info *pgm_info); | 229 | struct kvm_s390_pgm_info *pgm_info); |
230 | 230 | ||
231 | static inline void kvm_s390_vcpu_block_all(struct kvm *kvm) | ||
232 | { | ||
233 | int i; | ||
234 | struct kvm_vcpu *vcpu; | ||
235 | |||
236 | WARN_ON(!mutex_is_locked(&kvm->lock)); | ||
237 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
238 | kvm_s390_vcpu_block(vcpu); | ||
239 | } | ||
240 | |||
241 | static inline void kvm_s390_vcpu_unblock_all(struct kvm *kvm) | ||
242 | { | ||
243 | int i; | ||
244 | struct kvm_vcpu *vcpu; | ||
245 | |||
246 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
247 | kvm_s390_vcpu_unblock(vcpu); | ||
248 | } | ||
249 | |||
231 | /** | 250 | /** |
232 | * kvm_s390_inject_prog_cond - conditionally inject a program check | 251 | * kvm_s390_inject_prog_cond - conditionally inject a program check |
233 | * @vcpu: virtual cpu | 252 | * @vcpu: virtual cpu |
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index d22d8ee1ff9d..ad4242245771 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c | |||
@@ -698,10 +698,14 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) | |||
698 | case 0x00001000: | 698 | case 0x00001000: |
699 | end = (start + (1UL << 20)) & ~((1UL << 20) - 1); | 699 | end = (start + (1UL << 20)) & ~((1UL << 20) - 1); |
700 | break; | 700 | break; |
701 | /* We dont support EDAT2 | ||
702 | case 0x00002000: | 701 | case 0x00002000: |
702 | /* only support 2G frame size if EDAT2 is available and we are | ||
703 | not in 24-bit addressing mode */ | ||
704 | if (!test_kvm_facility(vcpu->kvm, 78) || | ||
705 | psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_24BIT) | ||
706 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
703 | end = (start + (1UL << 31)) & ~((1UL << 31) - 1); | 707 | end = (start + (1UL << 31)) & ~((1UL << 31) - 1); |
704 | break;*/ | 708 | break; |
705 | default: | 709 | default: |
706 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 710 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
707 | } | 711 | } |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 57a9d94fe160..e16466ec473c 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -193,6 +193,8 @@ struct x86_emulate_ops { | |||
193 | int (*cpl)(struct x86_emulate_ctxt *ctxt); | 193 | int (*cpl)(struct x86_emulate_ctxt *ctxt); |
194 | int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest); | 194 | int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest); |
195 | int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); | 195 | int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); |
196 | u64 (*get_smbase)(struct x86_emulate_ctxt *ctxt); | ||
197 | void (*set_smbase)(struct x86_emulate_ctxt *ctxt, u64 smbase); | ||
196 | int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); | 198 | int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); |
197 | int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); | 199 | int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); |
198 | int (*check_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc); | 200 | int (*check_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc); |
@@ -262,6 +264,11 @@ enum x86emul_mode { | |||
262 | X86EMUL_MODE_PROT64, /* 64-bit (long) mode. */ | 264 | X86EMUL_MODE_PROT64, /* 64-bit (long) mode. */ |
263 | }; | 265 | }; |
264 | 266 | ||
267 | /* These match some of the HF_* flags defined in kvm_host.h */ | ||
268 | #define X86EMUL_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */ | ||
269 | #define X86EMUL_SMM_MASK (1 << 6) | ||
270 | #define X86EMUL_SMM_INSIDE_NMI_MASK (1 << 7) | ||
271 | |||
265 | struct x86_emulate_ctxt { | 272 | struct x86_emulate_ctxt { |
266 | const struct x86_emulate_ops *ops; | 273 | const struct x86_emulate_ops *ops; |
267 | 274 | ||
@@ -273,8 +280,8 @@ struct x86_emulate_ctxt { | |||
273 | 280 | ||
274 | /* interruptibility state, as a result of execution of STI or MOV SS */ | 281 | /* interruptibility state, as a result of execution of STI or MOV SS */ |
275 | int interruptibility; | 282 | int interruptibility; |
283 | int emul_flags; | ||
276 | 284 | ||
277 | bool guest_mode; /* guest running a nested guest */ | ||
278 | bool perm_ok; /* do not check permissions if true */ | 285 | bool perm_ok; /* do not check permissions if true */ |
279 | bool ud; /* inject an #UD if host doesn't support insn */ | 286 | bool ud; /* inject an #UD if host doesn't support insn */ |
280 | 287 | ||
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f8c0ec3a4a97..c7fa57b529d2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -184,23 +184,12 @@ struct kvm_mmu_memory_cache { | |||
184 | void *objects[KVM_NR_MEM_OBJS]; | 184 | void *objects[KVM_NR_MEM_OBJS]; |
185 | }; | 185 | }; |
186 | 186 | ||
187 | /* | ||
188 | * kvm_mmu_page_role, below, is defined as: | ||
189 | * | ||
190 | * bits 0:3 - total guest paging levels (2-4, or zero for real mode) | ||
191 | * bits 4:7 - page table level for this shadow (1-4) | ||
192 | * bits 8:9 - page table quadrant for 2-level guests | ||
193 | * bit 16 - direct mapping of virtual to physical mapping at gfn | ||
194 | * used for real mode and two-dimensional paging | ||
195 | * bits 17:19 - common access permissions for all ptes in this shadow page | ||
196 | */ | ||
197 | union kvm_mmu_page_role { | 187 | union kvm_mmu_page_role { |
198 | unsigned word; | 188 | unsigned word; |
199 | struct { | 189 | struct { |
200 | unsigned level:4; | 190 | unsigned level:4; |
201 | unsigned cr4_pae:1; | 191 | unsigned cr4_pae:1; |
202 | unsigned quadrant:2; | 192 | unsigned quadrant:2; |
203 | unsigned pad_for_nice_hex_output:6; | ||
204 | unsigned direct:1; | 193 | unsigned direct:1; |
205 | unsigned access:3; | 194 | unsigned access:3; |
206 | unsigned invalid:1; | 195 | unsigned invalid:1; |
@@ -208,6 +197,15 @@ union kvm_mmu_page_role { | |||
208 | unsigned cr0_wp:1; | 197 | unsigned cr0_wp:1; |
209 | unsigned smep_andnot_wp:1; | 198 | unsigned smep_andnot_wp:1; |
210 | unsigned smap_andnot_wp:1; | 199 | unsigned smap_andnot_wp:1; |
200 | unsigned :8; | ||
201 | |||
202 | /* | ||
203 | * This is left at the top of the word so that | ||
204 | * kvm_memslots_for_spte_role can extract it with a | ||
205 | * simple shift. While there is room, give it a whole | ||
206 | * byte so it is also faster to load it from memory. | ||
207 | */ | ||
208 | unsigned smm:8; | ||
211 | }; | 209 | }; |
212 | }; | 210 | }; |
213 | 211 | ||
@@ -338,12 +336,28 @@ struct kvm_pmu { | |||
338 | u64 reprogram_pmi; | 336 | u64 reprogram_pmi; |
339 | }; | 337 | }; |
340 | 338 | ||
339 | struct kvm_pmu_ops; | ||
340 | |||
341 | enum { | 341 | enum { |
342 | KVM_DEBUGREG_BP_ENABLED = 1, | 342 | KVM_DEBUGREG_BP_ENABLED = 1, |
343 | KVM_DEBUGREG_WONT_EXIT = 2, | 343 | KVM_DEBUGREG_WONT_EXIT = 2, |
344 | KVM_DEBUGREG_RELOAD = 4, | 344 | KVM_DEBUGREG_RELOAD = 4, |
345 | }; | 345 | }; |
346 | 346 | ||
347 | struct kvm_mtrr_range { | ||
348 | u64 base; | ||
349 | u64 mask; | ||
350 | struct list_head node; | ||
351 | }; | ||
352 | |||
353 | struct kvm_mtrr { | ||
354 | struct kvm_mtrr_range var_ranges[KVM_NR_VAR_MTRR]; | ||
355 | mtrr_type fixed_ranges[KVM_NR_FIXED_MTRR_REGION]; | ||
356 | u64 deftype; | ||
357 | |||
358 | struct list_head head; | ||
359 | }; | ||
360 | |||
347 | struct kvm_vcpu_arch { | 361 | struct kvm_vcpu_arch { |
348 | /* | 362 | /* |
349 | * rip and regs accesses must go through | 363 | * rip and regs accesses must go through |
@@ -368,6 +382,7 @@ struct kvm_vcpu_arch { | |||
368 | int32_t apic_arb_prio; | 382 | int32_t apic_arb_prio; |
369 | int mp_state; | 383 | int mp_state; |
370 | u64 ia32_misc_enable_msr; | 384 | u64 ia32_misc_enable_msr; |
385 | u64 smbase; | ||
371 | bool tpr_access_reporting; | 386 | bool tpr_access_reporting; |
372 | u64 ia32_xss; | 387 | u64 ia32_xss; |
373 | 388 | ||
@@ -471,8 +486,9 @@ struct kvm_vcpu_arch { | |||
471 | atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ | 486 | atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ |
472 | unsigned nmi_pending; /* NMI queued after currently running handler */ | 487 | unsigned nmi_pending; /* NMI queued after currently running handler */ |
473 | bool nmi_injected; /* Trying to inject an NMI this entry */ | 488 | bool nmi_injected; /* Trying to inject an NMI this entry */ |
489 | bool smi_pending; /* SMI queued after currently running handler */ | ||
474 | 490 | ||
475 | struct mtrr_state_type mtrr_state; | 491 | struct kvm_mtrr mtrr_state; |
476 | u64 pat; | 492 | u64 pat; |
477 | 493 | ||
478 | unsigned switch_db_regs; | 494 | unsigned switch_db_regs; |
@@ -637,6 +653,8 @@ struct kvm_arch { | |||
637 | #endif | 653 | #endif |
638 | 654 | ||
639 | bool boot_vcpu_runs_old_kvmclock; | 655 | bool boot_vcpu_runs_old_kvmclock; |
656 | |||
657 | u64 disabled_quirks; | ||
640 | }; | 658 | }; |
641 | 659 | ||
642 | struct kvm_vm_stat { | 660 | struct kvm_vm_stat { |
@@ -689,12 +707,13 @@ struct msr_data { | |||
689 | 707 | ||
690 | struct kvm_lapic_irq { | 708 | struct kvm_lapic_irq { |
691 | u32 vector; | 709 | u32 vector; |
692 | u32 delivery_mode; | 710 | u16 delivery_mode; |
693 | u32 dest_mode; | 711 | u16 dest_mode; |
694 | u32 level; | 712 | bool level; |
695 | u32 trig_mode; | 713 | u16 trig_mode; |
696 | u32 shorthand; | 714 | u32 shorthand; |
697 | u32 dest_id; | 715 | u32 dest_id; |
716 | bool msi_redir_hint; | ||
698 | }; | 717 | }; |
699 | 718 | ||
700 | struct kvm_x86_ops { | 719 | struct kvm_x86_ops { |
@@ -706,19 +725,20 @@ struct kvm_x86_ops { | |||
706 | int (*hardware_setup)(void); /* __init */ | 725 | int (*hardware_setup)(void); /* __init */ |
707 | void (*hardware_unsetup)(void); /* __exit */ | 726 | void (*hardware_unsetup)(void); /* __exit */ |
708 | bool (*cpu_has_accelerated_tpr)(void); | 727 | bool (*cpu_has_accelerated_tpr)(void); |
728 | bool (*cpu_has_high_real_mode_segbase)(void); | ||
709 | void (*cpuid_update)(struct kvm_vcpu *vcpu); | 729 | void (*cpuid_update)(struct kvm_vcpu *vcpu); |
710 | 730 | ||
711 | /* Create, but do not attach this VCPU */ | 731 | /* Create, but do not attach this VCPU */ |
712 | struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); | 732 | struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); |
713 | void (*vcpu_free)(struct kvm_vcpu *vcpu); | 733 | void (*vcpu_free)(struct kvm_vcpu *vcpu); |
714 | void (*vcpu_reset)(struct kvm_vcpu *vcpu); | 734 | void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event); |
715 | 735 | ||
716 | void (*prepare_guest_switch)(struct kvm_vcpu *vcpu); | 736 | void (*prepare_guest_switch)(struct kvm_vcpu *vcpu); |
717 | void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); | 737 | void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); |
718 | void (*vcpu_put)(struct kvm_vcpu *vcpu); | 738 | void (*vcpu_put)(struct kvm_vcpu *vcpu); |
719 | 739 | ||
720 | void (*update_db_bp_intercept)(struct kvm_vcpu *vcpu); | 740 | void (*update_db_bp_intercept)(struct kvm_vcpu *vcpu); |
721 | int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); | 741 | int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); |
722 | int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); | 742 | int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); |
723 | u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); | 743 | u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); |
724 | void (*get_segment)(struct kvm_vcpu *vcpu, | 744 | void (*get_segment)(struct kvm_vcpu *vcpu, |
@@ -836,6 +856,8 @@ struct kvm_x86_ops { | |||
836 | void (*enable_log_dirty_pt_masked)(struct kvm *kvm, | 856 | void (*enable_log_dirty_pt_masked)(struct kvm *kvm, |
837 | struct kvm_memory_slot *slot, | 857 | struct kvm_memory_slot *slot, |
838 | gfn_t offset, unsigned long mask); | 858 | gfn_t offset, unsigned long mask); |
859 | /* pmu operations of sub-arch */ | ||
860 | const struct kvm_pmu_ops *pmu_ops; | ||
839 | }; | 861 | }; |
840 | 862 | ||
841 | struct kvm_arch_async_pf { | 863 | struct kvm_arch_async_pf { |
@@ -871,7 +893,7 @@ void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); | |||
871 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, | 893 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, |
872 | struct kvm_memory_slot *memslot); | 894 | struct kvm_memory_slot *memslot); |
873 | void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, | 895 | void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, |
874 | struct kvm_memory_slot *memslot); | 896 | const struct kvm_memory_slot *memslot); |
875 | void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, | 897 | void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, |
876 | struct kvm_memory_slot *memslot); | 898 | struct kvm_memory_slot *memslot); |
877 | void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm, | 899 | void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm, |
@@ -882,7 +904,7 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, | |||
882 | struct kvm_memory_slot *slot, | 904 | struct kvm_memory_slot *slot, |
883 | gfn_t gfn_offset, unsigned long mask); | 905 | gfn_t gfn_offset, unsigned long mask); |
884 | void kvm_mmu_zap_all(struct kvm *kvm); | 906 | void kvm_mmu_zap_all(struct kvm *kvm); |
885 | void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm); | 907 | void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots); |
886 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); | 908 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); |
887 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); | 909 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); |
888 | 910 | ||
@@ -890,7 +912,6 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3); | |||
890 | 912 | ||
891 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | 913 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, |
892 | const void *val, int bytes); | 914 | const void *val, int bytes); |
893 | u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); | ||
894 | 915 | ||
895 | struct kvm_irq_mask_notifier { | 916 | struct kvm_irq_mask_notifier { |
896 | void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked); | 917 | void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked); |
@@ -938,7 +959,7 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu, | |||
938 | 959 | ||
939 | void kvm_enable_efer_bits(u64); | 960 | void kvm_enable_efer_bits(u64); |
940 | bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); | 961 | bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); |
941 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); | 962 | int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr); |
942 | int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr); | 963 | int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr); |
943 | 964 | ||
944 | struct x86_emulate_ctxt; | 965 | struct x86_emulate_ctxt; |
@@ -967,7 +988,7 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); | |||
967 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); | 988 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); |
968 | int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr); | 989 | int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr); |
969 | 990 | ||
970 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); | 991 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr); |
971 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr); | 992 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr); |
972 | 993 | ||
973 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu); | 994 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu); |
@@ -1110,6 +1131,14 @@ enum { | |||
1110 | #define HF_NMI_MASK (1 << 3) | 1131 | #define HF_NMI_MASK (1 << 3) |
1111 | #define HF_IRET_MASK (1 << 4) | 1132 | #define HF_IRET_MASK (1 << 4) |
1112 | #define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */ | 1133 | #define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */ |
1134 | #define HF_SMM_MASK (1 << 6) | ||
1135 | #define HF_SMM_INSIDE_NMI_MASK (1 << 7) | ||
1136 | |||
1137 | #define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE | ||
1138 | #define KVM_ADDRESS_SPACE_NUM 2 | ||
1139 | |||
1140 | #define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) | ||
1141 | #define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) | ||
1113 | 1142 | ||
1114 | /* | 1143 | /* |
1115 | * Hardware virtualization extension instructions may fault if a | 1144 | * Hardware virtualization extension instructions may fault if a |
@@ -1144,7 +1173,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); | |||
1144 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); | 1173 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); |
1145 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); | 1174 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); |
1146 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); | 1175 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); |
1147 | void kvm_vcpu_reset(struct kvm_vcpu *vcpu); | 1176 | void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); |
1148 | void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); | 1177 | void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); |
1149 | void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, | 1178 | void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, |
1150 | unsigned long address); | 1179 | unsigned long address); |
@@ -1168,16 +1197,9 @@ void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); | |||
1168 | 1197 | ||
1169 | int kvm_is_in_guest(void); | 1198 | int kvm_is_in_guest(void); |
1170 | 1199 | ||
1171 | void kvm_pmu_init(struct kvm_vcpu *vcpu); | 1200 | int __x86_set_memory_region(struct kvm *kvm, |
1172 | void kvm_pmu_destroy(struct kvm_vcpu *vcpu); | 1201 | const struct kvm_userspace_memory_region *mem); |
1173 | void kvm_pmu_reset(struct kvm_vcpu *vcpu); | 1202 | int x86_set_memory_region(struct kvm *kvm, |
1174 | void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu); | 1203 | const struct kvm_userspace_memory_region *mem); |
1175 | bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr); | ||
1176 | int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); | ||
1177 | int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); | ||
1178 | int kvm_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc); | ||
1179 | int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); | ||
1180 | void kvm_handle_pmu_event(struct kvm_vcpu *vcpu); | ||
1181 | void kvm_deliver_pmi(struct kvm_vcpu *vcpu); | ||
1182 | 1204 | ||
1183 | #endif /* _ASM_X86_KVM_HOST_H */ | 1205 | #endif /* _ASM_X86_KVM_HOST_H */ |
diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h index 6167fd798188..655e07a48f6c 100644 --- a/arch/x86/include/asm/pvclock-abi.h +++ b/arch/x86/include/asm/pvclock-abi.h | |||
@@ -41,5 +41,6 @@ struct pvclock_wall_clock { | |||
41 | 41 | ||
42 | #define PVCLOCK_TSC_STABLE_BIT (1 << 0) | 42 | #define PVCLOCK_TSC_STABLE_BIT (1 << 0) |
43 | #define PVCLOCK_GUEST_STOPPED (1 << 1) | 43 | #define PVCLOCK_GUEST_STOPPED (1 << 1) |
44 | #define PVCLOCK_COUNTS_FROM_ZERO (1 << 2) | ||
44 | #endif /* __ASSEMBLY__ */ | 45 | #endif /* __ASSEMBLY__ */ |
45 | #endif /* _ASM_X86_PVCLOCK_ABI_H */ | 46 | #endif /* _ASM_X86_PVCLOCK_ABI_H */ |
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index d6b078e9fa28..628954ceede1 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h | |||
@@ -86,7 +86,6 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, | |||
86 | offset = pvclock_get_nsec_offset(src); | 86 | offset = pvclock_get_nsec_offset(src); |
87 | ret = src->system_time + offset; | 87 | ret = src->system_time + offset; |
88 | ret_flags = src->flags; | 88 | ret_flags = src->flags; |
89 | rdtsc_barrier(); | ||
90 | 89 | ||
91 | *cycles = ret; | 90 | *cycles = ret; |
92 | *flags = ret_flags; | 91 | *flags = ret_flags; |
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index d7dcef58aefa..a4ae82eb82aa 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h | |||
@@ -106,6 +106,8 @@ struct kvm_ioapic_state { | |||
106 | #define KVM_IRQCHIP_IOAPIC 2 | 106 | #define KVM_IRQCHIP_IOAPIC 2 |
107 | #define KVM_NR_IRQCHIPS 3 | 107 | #define KVM_NR_IRQCHIPS 3 |
108 | 108 | ||
109 | #define KVM_RUN_X86_SMM (1 << 0) | ||
110 | |||
109 | /* for KVM_GET_REGS and KVM_SET_REGS */ | 111 | /* for KVM_GET_REGS and KVM_SET_REGS */ |
110 | struct kvm_regs { | 112 | struct kvm_regs { |
111 | /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */ | 113 | /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */ |
@@ -281,6 +283,7 @@ struct kvm_reinject_control { | |||
281 | #define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 | 283 | #define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 |
282 | #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 | 284 | #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 |
283 | #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 | 285 | #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 |
286 | #define KVM_VCPUEVENT_VALID_SMM 0x00000008 | ||
284 | 287 | ||
285 | /* Interrupt shadow states */ | 288 | /* Interrupt shadow states */ |
286 | #define KVM_X86_SHADOW_INT_MOV_SS 0x01 | 289 | #define KVM_X86_SHADOW_INT_MOV_SS 0x01 |
@@ -309,7 +312,13 @@ struct kvm_vcpu_events { | |||
309 | } nmi; | 312 | } nmi; |
310 | __u32 sipi_vector; | 313 | __u32 sipi_vector; |
311 | __u32 flags; | 314 | __u32 flags; |
312 | __u32 reserved[10]; | 315 | struct { |
316 | __u8 smm; | ||
317 | __u8 pending; | ||
318 | __u8 smm_inside_nmi; | ||
319 | __u8 latched_init; | ||
320 | } smi; | ||
321 | __u32 reserved[9]; | ||
313 | }; | 322 | }; |
314 | 323 | ||
315 | /* for KVM_GET/SET_DEBUGREGS */ | 324 | /* for KVM_GET/SET_DEBUGREGS */ |
@@ -345,4 +354,7 @@ struct kvm_xcrs { | |||
345 | struct kvm_sync_regs { | 354 | struct kvm_sync_regs { |
346 | }; | 355 | }; |
347 | 356 | ||
357 | #define KVM_QUIRK_LINT0_REENABLED (1 << 0) | ||
358 | #define KVM_QUIRK_CD_NW_CLEARED (1 << 1) | ||
359 | |||
348 | #endif /* _ASM_X86_KVM_H */ | 360 | #endif /* _ASM_X86_KVM_H */ |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 1681504e44a4..47190bd399e7 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -331,7 +331,7 @@ static void kvm_guest_apic_eoi_write(u32 reg, u32 val) | |||
331 | apic_write(APIC_EOI, APIC_EOI_ACK); | 331 | apic_write(APIC_EOI, APIC_EOI_ACK); |
332 | } | 332 | } |
333 | 333 | ||
334 | void kvm_guest_cpu_init(void) | 334 | static void kvm_guest_cpu_init(void) |
335 | { | 335 | { |
336 | if (!kvm_para_available()) | 336 | if (!kvm_para_available()) |
337 | return; | 337 | return; |
@@ -688,7 +688,7 @@ static inline void spin_time_accum_blocked(u64 start) | |||
688 | static struct dentry *d_spin_debug; | 688 | static struct dentry *d_spin_debug; |
689 | static struct dentry *d_kvm_debug; | 689 | static struct dentry *d_kvm_debug; |
690 | 690 | ||
691 | struct dentry *kvm_init_debugfs(void) | 691 | static struct dentry *kvm_init_debugfs(void) |
692 | { | 692 | { |
693 | d_kvm_debug = debugfs_create_dir("kvm-guest", NULL); | 693 | d_kvm_debug = debugfs_create_dir("kvm-guest", NULL); |
694 | if (!d_kvm_debug) | 694 | if (!d_kvm_debug) |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 42caaef897c8..49487b488061 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/percpu.h> | 24 | #include <linux/percpu.h> |
25 | #include <linux/hardirq.h> | 25 | #include <linux/hardirq.h> |
26 | #include <linux/memblock.h> | 26 | #include <linux/memblock.h> |
27 | #include <linux/sched.h> | ||
27 | 28 | ||
28 | #include <asm/x86_init.h> | 29 | #include <asm/x86_init.h> |
29 | #include <asm/reboot.h> | 30 | #include <asm/reboot.h> |
@@ -217,8 +218,10 @@ static void kvm_shutdown(void) | |||
217 | 218 | ||
218 | void __init kvmclock_init(void) | 219 | void __init kvmclock_init(void) |
219 | { | 220 | { |
221 | struct pvclock_vcpu_time_info *vcpu_time; | ||
220 | unsigned long mem; | 222 | unsigned long mem; |
221 | int size; | 223 | int size, cpu; |
224 | u8 flags; | ||
222 | 225 | ||
223 | size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); | 226 | size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); |
224 | 227 | ||
@@ -264,7 +267,14 @@ void __init kvmclock_init(void) | |||
264 | pv_info.name = "KVM"; | 267 | pv_info.name = "KVM"; |
265 | 268 | ||
266 | if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) | 269 | if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) |
267 | pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); | 270 | pvclock_set_flags(~0); |
271 | |||
272 | cpu = get_cpu(); | ||
273 | vcpu_time = &hv_clock[cpu].pvti; | ||
274 | flags = pvclock_read_flags(vcpu_time); | ||
275 | if (flags & PVCLOCK_COUNTS_FROM_ZERO) | ||
276 | set_sched_clock_stable(); | ||
277 | put_cpu(); | ||
268 | } | 278 | } |
269 | 279 | ||
270 | int __init kvm_setup_vsyscall_timeinfo(void) | 280 | int __init kvm_setup_vsyscall_timeinfo(void) |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 413a7bf9efbb..d8a1d56276e1 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -86,15 +86,16 @@ config KVM_MMU_AUDIT | |||
86 | auditing of KVM MMU events at runtime. | 86 | auditing of KVM MMU events at runtime. |
87 | 87 | ||
88 | config KVM_DEVICE_ASSIGNMENT | 88 | config KVM_DEVICE_ASSIGNMENT |
89 | bool "KVM legacy PCI device assignment support" | 89 | bool "KVM legacy PCI device assignment support (DEPRECATED)" |
90 | depends on KVM && PCI && IOMMU_API | 90 | depends on KVM && PCI && IOMMU_API |
91 | default y | 91 | default n |
92 | ---help--- | 92 | ---help--- |
93 | Provide support for legacy PCI device assignment through KVM. The | 93 | Provide support for legacy PCI device assignment through KVM. The |
94 | kernel now also supports a full featured userspace device driver | 94 | kernel now also supports a full featured userspace device driver |
95 | framework through VFIO, which supersedes much of this support. | 95 | framework through VFIO, which supersedes this support and provides |
96 | better security. | ||
96 | 97 | ||
97 | If unsure, say Y. | 98 | If unsure, say N. |
98 | 99 | ||
99 | # OK, it's a little counter-intuitive to do this, but it puts it neatly under | 100 | # OK, it's a little counter-intuitive to do this, but it puts it neatly under |
100 | # the virtualization menu. | 101 | # the virtualization menu. |
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 16e8f962eaad..67d215cb8953 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
@@ -12,10 +12,10 @@ kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ | |||
12 | kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o | 12 | kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o |
13 | 13 | ||
14 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ | 14 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ |
15 | i8254.o ioapic.o irq_comm.o cpuid.o pmu.o | 15 | i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o |
16 | kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += assigned-dev.o iommu.o | 16 | kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += assigned-dev.o iommu.o |
17 | kvm-intel-y += vmx.o | 17 | kvm-intel-y += vmx.o pmu_intel.o |
18 | kvm-amd-y += svm.o | 18 | kvm-amd-y += svm.o pmu_amd.o |
19 | 19 | ||
20 | obj-$(CONFIG_KVM) += kvm.o | 20 | obj-$(CONFIG_KVM) += kvm.o |
21 | obj-$(CONFIG_KVM_INTEL) += kvm-intel.o | 21 | obj-$(CONFIG_KVM_INTEL) += kvm-intel.o |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 9f705e618af5..64dd46793099 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -16,12 +16,14 @@ | |||
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/vmalloc.h> | 17 | #include <linux/vmalloc.h> |
18 | #include <linux/uaccess.h> | 18 | #include <linux/uaccess.h> |
19 | #include <asm/fpu/internal.h> /* For use_eager_fpu. Ugh! */ | ||
19 | #include <asm/user.h> | 20 | #include <asm/user.h> |
20 | #include <asm/fpu/xstate.h> | 21 | #include <asm/fpu/xstate.h> |
21 | #include "cpuid.h" | 22 | #include "cpuid.h" |
22 | #include "lapic.h" | 23 | #include "lapic.h" |
23 | #include "mmu.h" | 24 | #include "mmu.h" |
24 | #include "trace.h" | 25 | #include "trace.h" |
26 | #include "pmu.h" | ||
25 | 27 | ||
26 | static u32 xstate_required_size(u64 xstate_bv, bool compacted) | 28 | static u32 xstate_required_size(u64 xstate_bv, bool compacted) |
27 | { | 29 | { |
@@ -95,7 +97,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) | |||
95 | if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) | 97 | if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) |
96 | best->ebx = xstate_required_size(vcpu->arch.xcr0, true); | 98 | best->ebx = xstate_required_size(vcpu->arch.xcr0, true); |
97 | 99 | ||
98 | vcpu->arch.eager_fpu = guest_cpuid_has_mpx(vcpu); | 100 | vcpu->arch.eager_fpu = use_eager_fpu() || guest_cpuid_has_mpx(vcpu); |
99 | 101 | ||
100 | /* | 102 | /* |
101 | * The existing code assumes virtual address is 48-bit in the canonical | 103 | * The existing code assumes virtual address is 48-bit in the canonical |
@@ -109,7 +111,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) | |||
109 | /* Update physical-address width */ | 111 | /* Update physical-address width */ |
110 | vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); | 112 | vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); |
111 | 113 | ||
112 | kvm_pmu_cpuid_update(vcpu); | 114 | kvm_pmu_refresh(vcpu); |
113 | return 0; | 115 | return 0; |
114 | } | 116 | } |
115 | 117 | ||
@@ -413,6 +415,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
413 | } | 415 | } |
414 | break; | 416 | break; |
415 | } | 417 | } |
418 | case 6: /* Thermal management */ | ||
419 | entry->eax = 0x4; /* allow ARAT */ | ||
420 | entry->ebx = 0; | ||
421 | entry->ecx = 0; | ||
422 | entry->edx = 0; | ||
423 | break; | ||
416 | case 7: { | 424 | case 7: { |
417 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | 425 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; |
418 | /* Mask ebx against host capability word 9 */ | 426 | /* Mask ebx against host capability word 9 */ |
@@ -589,7 +597,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
589 | break; | 597 | break; |
590 | case 3: /* Processor serial number */ | 598 | case 3: /* Processor serial number */ |
591 | case 5: /* MONITOR/MWAIT */ | 599 | case 5: /* MONITOR/MWAIT */ |
592 | case 6: /* Thermal management */ | ||
593 | case 0xC0000002: | 600 | case 0xC0000002: |
594 | case 0xC0000003: | 601 | case 0xC0000003: |
595 | case 0xC0000004: | 602 | case 0xC0000004: |
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 496b3695d3d3..dd05b9cef6ae 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h | |||
@@ -70,6 +70,14 @@ static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu) | |||
70 | return best && (best->ebx & bit(X86_FEATURE_FSGSBASE)); | 70 | return best && (best->ebx & bit(X86_FEATURE_FSGSBASE)); |
71 | } | 71 | } |
72 | 72 | ||
73 | static inline bool guest_cpuid_has_longmode(struct kvm_vcpu *vcpu) | ||
74 | { | ||
75 | struct kvm_cpuid_entry2 *best; | ||
76 | |||
77 | best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | ||
78 | return best && (best->edx & bit(X86_FEATURE_LM)); | ||
79 | } | ||
80 | |||
73 | static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu) | 81 | static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu) |
74 | { | 82 | { |
75 | struct kvm_cpuid_entry2 *best; | 83 | struct kvm_cpuid_entry2 *best; |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 630bcb0d7a04..e7a4fde5d631 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/module.h> | 25 | #include <linux/module.h> |
26 | #include <asm/kvm_emulate.h> | 26 | #include <asm/kvm_emulate.h> |
27 | #include <linux/stringify.h> | 27 | #include <linux/stringify.h> |
28 | #include <asm/debugreg.h> | ||
28 | 29 | ||
29 | #include "x86.h" | 30 | #include "x86.h" |
30 | #include "tss.h" | 31 | #include "tss.h" |
@@ -523,13 +524,9 @@ static void masked_increment(ulong *reg, ulong mask, int inc) | |||
523 | static inline void | 524 | static inline void |
524 | register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc) | 525 | register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc) |
525 | { | 526 | { |
526 | ulong mask; | 527 | ulong *preg = reg_rmw(ctxt, reg); |
527 | 528 | ||
528 | if (ctxt->ad_bytes == sizeof(unsigned long)) | 529 | assign_register(preg, *preg + inc, ctxt->ad_bytes); |
529 | mask = ~0UL; | ||
530 | else | ||
531 | mask = ad_mask(ctxt); | ||
532 | masked_increment(reg_rmw(ctxt, reg), mask, inc); | ||
533 | } | 530 | } |
534 | 531 | ||
535 | static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc) | 532 | static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc) |
@@ -2262,6 +2259,260 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt) | |||
2262 | return rc; | 2259 | return rc; |
2263 | } | 2260 | } |
2264 | 2261 | ||
2262 | static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt) | ||
2263 | { | ||
2264 | u32 eax, ebx, ecx, edx; | ||
2265 | |||
2266 | eax = 0x80000001; | ||
2267 | ecx = 0; | ||
2268 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); | ||
2269 | return edx & bit(X86_FEATURE_LM); | ||
2270 | } | ||
2271 | |||
2272 | #define GET_SMSTATE(type, smbase, offset) \ | ||
2273 | ({ \ | ||
2274 | type __val; \ | ||
2275 | int r = ctxt->ops->read_std(ctxt, smbase + offset, &__val, \ | ||
2276 | sizeof(__val), NULL); \ | ||
2277 | if (r != X86EMUL_CONTINUE) \ | ||
2278 | return X86EMUL_UNHANDLEABLE; \ | ||
2279 | __val; \ | ||
2280 | }) | ||
2281 | |||
2282 | static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags) | ||
2283 | { | ||
2284 | desc->g = (flags >> 23) & 1; | ||
2285 | desc->d = (flags >> 22) & 1; | ||
2286 | desc->l = (flags >> 21) & 1; | ||
2287 | desc->avl = (flags >> 20) & 1; | ||
2288 | desc->p = (flags >> 15) & 1; | ||
2289 | desc->dpl = (flags >> 13) & 3; | ||
2290 | desc->s = (flags >> 12) & 1; | ||
2291 | desc->type = (flags >> 8) & 15; | ||
2292 | } | ||
2293 | |||
2294 | static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, u64 smbase, int n) | ||
2295 | { | ||
2296 | struct desc_struct desc; | ||
2297 | int offset; | ||
2298 | u16 selector; | ||
2299 | |||
2300 | selector = GET_SMSTATE(u32, smbase, 0x7fa8 + n * 4); | ||
2301 | |||
2302 | if (n < 3) | ||
2303 | offset = 0x7f84 + n * 12; | ||
2304 | else | ||
2305 | offset = 0x7f2c + (n - 3) * 12; | ||
2306 | |||
2307 | set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8)); | ||
2308 | set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4)); | ||
2309 | rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, offset)); | ||
2310 | ctxt->ops->set_segment(ctxt, selector, &desc, 0, n); | ||
2311 | return X86EMUL_CONTINUE; | ||
2312 | } | ||
2313 | |||
2314 | static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n) | ||
2315 | { | ||
2316 | struct desc_struct desc; | ||
2317 | int offset; | ||
2318 | u16 selector; | ||
2319 | u32 base3; | ||
2320 | |||
2321 | offset = 0x7e00 + n * 16; | ||
2322 | |||
2323 | selector = GET_SMSTATE(u16, smbase, offset); | ||
2324 | rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smbase, offset + 2) << 8); | ||
2325 | set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4)); | ||
2326 | set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8)); | ||
2327 | base3 = GET_SMSTATE(u32, smbase, offset + 12); | ||
2328 | |||
2329 | ctxt->ops->set_segment(ctxt, selector, &desc, base3, n); | ||
2330 | return X86EMUL_CONTINUE; | ||
2331 | } | ||
2332 | |||
2333 | static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, | ||
2334 | u64 cr0, u64 cr4) | ||
2335 | { | ||
2336 | int bad; | ||
2337 | |||
2338 | /* | ||
2339 | * First enable PAE, long mode needs it before CR0.PG = 1 is set. | ||
2340 | * Then enable protected mode. However, PCID cannot be enabled | ||
2341 | * if EFER.LMA=0, so set it separately. | ||
2342 | */ | ||
2343 | bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE); | ||
2344 | if (bad) | ||
2345 | return X86EMUL_UNHANDLEABLE; | ||
2346 | |||
2347 | bad = ctxt->ops->set_cr(ctxt, 0, cr0); | ||
2348 | if (bad) | ||
2349 | return X86EMUL_UNHANDLEABLE; | ||
2350 | |||
2351 | if (cr4 & X86_CR4_PCIDE) { | ||
2352 | bad = ctxt->ops->set_cr(ctxt, 4, cr4); | ||
2353 | if (bad) | ||
2354 | return X86EMUL_UNHANDLEABLE; | ||
2355 | } | ||
2356 | |||
2357 | return X86EMUL_CONTINUE; | ||
2358 | } | ||
2359 | |||
2360 | static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase) | ||
2361 | { | ||
2362 | struct desc_struct desc; | ||
2363 | struct desc_ptr dt; | ||
2364 | u16 selector; | ||
2365 | u32 val, cr0, cr4; | ||
2366 | int i; | ||
2367 | |||
2368 | cr0 = GET_SMSTATE(u32, smbase, 0x7ffc); | ||
2369 | ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8)); | ||
2370 | ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED; | ||
2371 | ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0); | ||
2372 | |||
2373 | for (i = 0; i < 8; i++) | ||
2374 | *reg_write(ctxt, i) = GET_SMSTATE(u32, smbase, 0x7fd0 + i * 4); | ||
2375 | |||
2376 | val = GET_SMSTATE(u32, smbase, 0x7fcc); | ||
2377 | ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1); | ||
2378 | val = GET_SMSTATE(u32, smbase, 0x7fc8); | ||
2379 | ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1); | ||
2380 | |||
2381 | selector = GET_SMSTATE(u32, smbase, 0x7fc4); | ||
2382 | set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f64)); | ||
2383 | set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f60)); | ||
2384 | rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f5c)); | ||
2385 | ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR); | ||
2386 | |||
2387 | selector = GET_SMSTATE(u32, smbase, 0x7fc0); | ||
2388 | set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f80)); | ||
2389 | set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f7c)); | ||
2390 | rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f78)); | ||
2391 | ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR); | ||
2392 | |||
2393 | dt.address = GET_SMSTATE(u32, smbase, 0x7f74); | ||
2394 | dt.size = GET_SMSTATE(u32, smbase, 0x7f70); | ||
2395 | ctxt->ops->set_gdt(ctxt, &dt); | ||
2396 | |||
2397 | dt.address = GET_SMSTATE(u32, smbase, 0x7f58); | ||
2398 | dt.size = GET_SMSTATE(u32, smbase, 0x7f54); | ||
2399 | ctxt->ops->set_idt(ctxt, &dt); | ||
2400 | |||
2401 | for (i = 0; i < 6; i++) { | ||
2402 | int r = rsm_load_seg_32(ctxt, smbase, i); | ||
2403 | if (r != X86EMUL_CONTINUE) | ||
2404 | return r; | ||
2405 | } | ||
2406 | |||
2407 | cr4 = GET_SMSTATE(u32, smbase, 0x7f14); | ||
2408 | |||
2409 | ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8)); | ||
2410 | |||
2411 | return rsm_enter_protected_mode(ctxt, cr0, cr4); | ||
2412 | } | ||
2413 | |||
2414 | static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) | ||
2415 | { | ||
2416 | struct desc_struct desc; | ||
2417 | struct desc_ptr dt; | ||
2418 | u64 val, cr0, cr4; | ||
2419 | u32 base3; | ||
2420 | u16 selector; | ||
2421 | int i; | ||
2422 | |||
2423 | for (i = 0; i < 16; i++) | ||
2424 | *reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8); | ||
2425 | |||
2426 | ctxt->_eip = GET_SMSTATE(u64, smbase, 0x7f78); | ||
2427 | ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7f70) | X86_EFLAGS_FIXED; | ||
2428 | |||
2429 | val = GET_SMSTATE(u32, smbase, 0x7f68); | ||
2430 | ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1); | ||
2431 | val = GET_SMSTATE(u32, smbase, 0x7f60); | ||
2432 | ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1); | ||
2433 | |||
2434 | cr0 = GET_SMSTATE(u64, smbase, 0x7f58); | ||
2435 | ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50)); | ||
2436 | cr4 = GET_SMSTATE(u64, smbase, 0x7f48); | ||
2437 | ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00)); | ||
2438 | val = GET_SMSTATE(u64, smbase, 0x7ed0); | ||
2439 | ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA); | ||
2440 | |||
2441 | selector = GET_SMSTATE(u32, smbase, 0x7e90); | ||
2442 | rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e92) << 8); | ||
2443 | set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e94)); | ||
2444 | set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e98)); | ||
2445 | base3 = GET_SMSTATE(u32, smbase, 0x7e9c); | ||
2446 | ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR); | ||
2447 | |||
2448 | dt.size = GET_SMSTATE(u32, smbase, 0x7e84); | ||
2449 | dt.address = GET_SMSTATE(u64, smbase, 0x7e88); | ||
2450 | ctxt->ops->set_idt(ctxt, &dt); | ||
2451 | |||
2452 | selector = GET_SMSTATE(u32, smbase, 0x7e70); | ||
2453 | rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e72) << 8); | ||
2454 | set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e74)); | ||
2455 | set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e78)); | ||
2456 | base3 = GET_SMSTATE(u32, smbase, 0x7e7c); | ||
2457 | ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR); | ||
2458 | |||
2459 | dt.size = GET_SMSTATE(u32, smbase, 0x7e64); | ||
2460 | dt.address = GET_SMSTATE(u64, smbase, 0x7e68); | ||
2461 | ctxt->ops->set_gdt(ctxt, &dt); | ||
2462 | |||
2463 | for (i = 0; i < 6; i++) { | ||
2464 | int r = rsm_load_seg_64(ctxt, smbase, i); | ||
2465 | if (r != X86EMUL_CONTINUE) | ||
2466 | return r; | ||
2467 | } | ||
2468 | |||
2469 | return rsm_enter_protected_mode(ctxt, cr0, cr4); | ||
2470 | } | ||
2471 | |||
2472 | static int em_rsm(struct x86_emulate_ctxt *ctxt) | ||
2473 | { | ||
2474 | unsigned long cr0, cr4, efer; | ||
2475 | u64 smbase; | ||
2476 | int ret; | ||
2477 | |||
2478 | if ((ctxt->emul_flags & X86EMUL_SMM_MASK) == 0) | ||
2479 | return emulate_ud(ctxt); | ||
2480 | |||
2481 | /* | ||
2482 | * Get back to real mode, to prepare a safe state in which to load | ||
2483 | * CR0/CR3/CR4/EFER. Also this will ensure that addresses passed | ||
2484 | * to read_std/write_std are not virtual. | ||
2485 | * | ||
2486 | * CR4.PCIDE must be zero, because it is a 64-bit mode only feature. | ||
2487 | */ | ||
2488 | cr0 = ctxt->ops->get_cr(ctxt, 0); | ||
2489 | if (cr0 & X86_CR0_PE) | ||
2490 | ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE)); | ||
2491 | cr4 = ctxt->ops->get_cr(ctxt, 4); | ||
2492 | if (cr4 & X86_CR4_PAE) | ||
2493 | ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE); | ||
2494 | efer = 0; | ||
2495 | ctxt->ops->set_msr(ctxt, MSR_EFER, efer); | ||
2496 | |||
2497 | smbase = ctxt->ops->get_smbase(ctxt); | ||
2498 | if (emulator_has_longmode(ctxt)) | ||
2499 | ret = rsm_load_state_64(ctxt, smbase + 0x8000); | ||
2500 | else | ||
2501 | ret = rsm_load_state_32(ctxt, smbase + 0x8000); | ||
2502 | |||
2503 | if (ret != X86EMUL_CONTINUE) { | ||
2504 | /* FIXME: should triple fault */ | ||
2505 | return X86EMUL_UNHANDLEABLE; | ||
2506 | } | ||
2507 | |||
2508 | if ((ctxt->emul_flags & X86EMUL_SMM_INSIDE_NMI_MASK) == 0) | ||
2509 | ctxt->ops->set_nmi_mask(ctxt, false); | ||
2510 | |||
2511 | ctxt->emul_flags &= ~X86EMUL_SMM_INSIDE_NMI_MASK; | ||
2512 | ctxt->emul_flags &= ~X86EMUL_SMM_MASK; | ||
2513 | return X86EMUL_CONTINUE; | ||
2514 | } | ||
2515 | |||
2265 | static void | 2516 | static void |
2266 | setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, | 2517 | setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, |
2267 | struct desc_struct *cs, struct desc_struct *ss) | 2518 | struct desc_struct *cs, struct desc_struct *ss) |
@@ -2573,6 +2824,30 @@ static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt, | |||
2573 | return true; | 2824 | return true; |
2574 | } | 2825 | } |
2575 | 2826 | ||
2827 | static void string_registers_quirk(struct x86_emulate_ctxt *ctxt) | ||
2828 | { | ||
2829 | /* | ||
2830 | * Intel CPUs mask the counter and pointers in quite strange | ||
2831 | * manner when ECX is zero due to REP-string optimizations. | ||
2832 | */ | ||
2833 | #ifdef CONFIG_X86_64 | ||
2834 | if (ctxt->ad_bytes != 4 || !vendor_intel(ctxt)) | ||
2835 | return; | ||
2836 | |||
2837 | *reg_write(ctxt, VCPU_REGS_RCX) = 0; | ||
2838 | |||
2839 | switch (ctxt->b) { | ||
2840 | case 0xa4: /* movsb */ | ||
2841 | case 0xa5: /* movsd/w */ | ||
2842 | *reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1; | ||
2843 | /* fall through */ | ||
2844 | case 0xaa: /* stosb */ | ||
2845 | case 0xab: /* stosd/w */ | ||
2846 | *reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1; | ||
2847 | } | ||
2848 | #endif | ||
2849 | } | ||
2850 | |||
2576 | static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, | 2851 | static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, |
2577 | struct tss_segment_16 *tss) | 2852 | struct tss_segment_16 *tss) |
2578 | { | 2853 | { |
@@ -2849,7 +3124,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2849 | ulong old_tss_base = | 3124 | ulong old_tss_base = |
2850 | ops->get_cached_segment_base(ctxt, VCPU_SREG_TR); | 3125 | ops->get_cached_segment_base(ctxt, VCPU_SREG_TR); |
2851 | u32 desc_limit; | 3126 | u32 desc_limit; |
2852 | ulong desc_addr; | 3127 | ulong desc_addr, dr7; |
2853 | 3128 | ||
2854 | /* FIXME: old_tss_base == ~0 ? */ | 3129 | /* FIXME: old_tss_base == ~0 ? */ |
2855 | 3130 | ||
@@ -2934,6 +3209,9 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2934 | ret = em_push(ctxt); | 3209 | ret = em_push(ctxt); |
2935 | } | 3210 | } |
2936 | 3211 | ||
3212 | ops->get_dr(ctxt, 7, &dr7); | ||
3213 | ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN)); | ||
3214 | |||
2937 | return ret; | 3215 | return ret; |
2938 | } | 3216 | } |
2939 | 3217 | ||
@@ -3840,7 +4118,7 @@ static const struct opcode group5[] = { | |||
3840 | F(DstMem | SrcNone | Lock, em_inc), | 4118 | F(DstMem | SrcNone | Lock, em_inc), |
3841 | F(DstMem | SrcNone | Lock, em_dec), | 4119 | F(DstMem | SrcNone | Lock, em_dec), |
3842 | I(SrcMem | NearBranch, em_call_near_abs), | 4120 | I(SrcMem | NearBranch, em_call_near_abs), |
3843 | I(SrcMemFAddr | ImplicitOps | Stack, em_call_far), | 4121 | I(SrcMemFAddr | ImplicitOps, em_call_far), |
3844 | I(SrcMem | NearBranch, em_jmp_abs), | 4122 | I(SrcMem | NearBranch, em_jmp_abs), |
3845 | I(SrcMemFAddr | ImplicitOps, em_jmp_far), | 4123 | I(SrcMemFAddr | ImplicitOps, em_jmp_far), |
3846 | I(SrcMem | Stack, em_push), D(Undefined), | 4124 | I(SrcMem | Stack, em_push), D(Undefined), |
@@ -4173,7 +4451,7 @@ static const struct opcode twobyte_table[256] = { | |||
4173 | F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N, | 4451 | F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N, |
4174 | /* 0xA8 - 0xAF */ | 4452 | /* 0xA8 - 0xAF */ |
4175 | I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), | 4453 | I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), |
4176 | DI(ImplicitOps, rsm), | 4454 | II(No64 | EmulateOnUD | ImplicitOps, em_rsm, rsm), |
4177 | F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), | 4455 | F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), |
4178 | F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), | 4456 | F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), |
4179 | F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), | 4457 | F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), |
@@ -4871,7 +5149,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
4871 | fetch_possible_mmx_operand(ctxt, &ctxt->dst); | 5149 | fetch_possible_mmx_operand(ctxt, &ctxt->dst); |
4872 | } | 5150 | } |
4873 | 5151 | ||
4874 | if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) { | 5152 | if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) { |
4875 | rc = emulator_check_intercept(ctxt, ctxt->intercept, | 5153 | rc = emulator_check_intercept(ctxt, ctxt->intercept, |
4876 | X86_ICPT_PRE_EXCEPT); | 5154 | X86_ICPT_PRE_EXCEPT); |
4877 | if (rc != X86EMUL_CONTINUE) | 5155 | if (rc != X86EMUL_CONTINUE) |
@@ -4900,7 +5178,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
4900 | goto done; | 5178 | goto done; |
4901 | } | 5179 | } |
4902 | 5180 | ||
4903 | if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) { | 5181 | if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) { |
4904 | rc = emulator_check_intercept(ctxt, ctxt->intercept, | 5182 | rc = emulator_check_intercept(ctxt, ctxt->intercept, |
4905 | X86_ICPT_POST_EXCEPT); | 5183 | X86_ICPT_POST_EXCEPT); |
4906 | if (rc != X86EMUL_CONTINUE) | 5184 | if (rc != X86EMUL_CONTINUE) |
@@ -4910,6 +5188,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
4910 | if (ctxt->rep_prefix && (ctxt->d & String)) { | 5188 | if (ctxt->rep_prefix && (ctxt->d & String)) { |
4911 | /* All REP prefixes have the same first termination condition */ | 5189 | /* All REP prefixes have the same first termination condition */ |
4912 | if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) { | 5190 | if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) { |
5191 | string_registers_quirk(ctxt); | ||
4913 | ctxt->eip = ctxt->_eip; | 5192 | ctxt->eip = ctxt->_eip; |
4914 | ctxt->eflags &= ~X86_EFLAGS_RF; | 5193 | ctxt->eflags &= ~X86_EFLAGS_RF; |
4915 | goto done; | 5194 | goto done; |
@@ -4953,7 +5232,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
4953 | 5232 | ||
4954 | special_insn: | 5233 | special_insn: |
4955 | 5234 | ||
4956 | if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) { | 5235 | if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) { |
4957 | rc = emulator_check_intercept(ctxt, ctxt->intercept, | 5236 | rc = emulator_check_intercept(ctxt, ctxt->intercept, |
4958 | X86_ICPT_POST_MEMACCESS); | 5237 | X86_ICPT_POST_MEMACCESS); |
4959 | if (rc != X86EMUL_CONTINUE) | 5238 | if (rc != X86EMUL_CONTINUE) |
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 28146f03c514..856f79105bb5 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c | |||
@@ -349,6 +349,7 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) | |||
349 | irqe.delivery_mode = entry->fields.delivery_mode << 8; | 349 | irqe.delivery_mode = entry->fields.delivery_mode << 8; |
350 | irqe.level = 1; | 350 | irqe.level = 1; |
351 | irqe.shorthand = 0; | 351 | irqe.shorthand = 0; |
352 | irqe.msi_redir_hint = false; | ||
352 | 353 | ||
353 | if (irqe.trig_mode == IOAPIC_EDGE_TRIG) | 354 | if (irqe.trig_mode == IOAPIC_EDGE_TRIG) |
354 | ioapic->irr_delivered |= 1 << irq; | 355 | ioapic->irr_delivered |= 1 << irq; |
@@ -637,11 +638,9 @@ void kvm_ioapic_destroy(struct kvm *kvm) | |||
637 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; | 638 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; |
638 | 639 | ||
639 | cancel_delayed_work_sync(&ioapic->eoi_inject); | 640 | cancel_delayed_work_sync(&ioapic->eoi_inject); |
640 | if (ioapic) { | 641 | kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); |
641 | kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); | 642 | kvm->arch.vioapic = NULL; |
642 | kvm->arch.vioapic = NULL; | 643 | kfree(ioapic); |
643 | kfree(ioapic); | ||
644 | } | ||
645 | } | 644 | } |
646 | 645 | ||
647 | int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) | 646 | int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) |
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 72298b3ac025..9efff9e5b58c 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c | |||
@@ -31,6 +31,8 @@ | |||
31 | 31 | ||
32 | #include "ioapic.h" | 32 | #include "ioapic.h" |
33 | 33 | ||
34 | #include "lapic.h" | ||
35 | |||
34 | static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, | 36 | static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, |
35 | struct kvm *kvm, int irq_source_id, int level, | 37 | struct kvm *kvm, int irq_source_id, int level, |
36 | bool line_status) | 38 | bool line_status) |
@@ -48,11 +50,6 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, | |||
48 | line_status); | 50 | line_status); |
49 | } | 51 | } |
50 | 52 | ||
51 | inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) | ||
52 | { | ||
53 | return irq->delivery_mode == APIC_DM_LOWEST; | ||
54 | } | ||
55 | |||
56 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | 53 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, |
57 | struct kvm_lapic_irq *irq, unsigned long *dest_map) | 54 | struct kvm_lapic_irq *irq, unsigned long *dest_map) |
58 | { | 55 | { |
@@ -60,7 +57,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
60 | struct kvm_vcpu *vcpu, *lowest = NULL; | 57 | struct kvm_vcpu *vcpu, *lowest = NULL; |
61 | 58 | ||
62 | if (irq->dest_mode == 0 && irq->dest_id == 0xff && | 59 | if (irq->dest_mode == 0 && irq->dest_id == 0xff && |
63 | kvm_is_dm_lowest_prio(irq)) { | 60 | kvm_lowest_prio_delivery(irq)) { |
64 | printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); | 61 | printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); |
65 | irq->delivery_mode = APIC_DM_FIXED; | 62 | irq->delivery_mode = APIC_DM_FIXED; |
66 | } | 63 | } |
@@ -76,7 +73,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
76 | irq->dest_id, irq->dest_mode)) | 73 | irq->dest_id, irq->dest_mode)) |
77 | continue; | 74 | continue; |
78 | 75 | ||
79 | if (!kvm_is_dm_lowest_prio(irq)) { | 76 | if (!kvm_lowest_prio_delivery(irq)) { |
80 | if (r < 0) | 77 | if (r < 0) |
81 | r = 0; | 78 | r = 0; |
82 | r += kvm_apic_set_irq(vcpu, irq, dest_map); | 79 | r += kvm_apic_set_irq(vcpu, irq, dest_map); |
@@ -106,9 +103,10 @@ static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, | |||
106 | irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo; | 103 | irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo; |
107 | irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data; | 104 | irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data; |
108 | irq->delivery_mode = e->msi.data & 0x700; | 105 | irq->delivery_mode = e->msi.data & 0x700; |
106 | irq->msi_redir_hint = ((e->msi.address_lo | ||
107 | & MSI_ADDR_REDIRECTION_LOWPRI) > 0); | ||
109 | irq->level = 1; | 108 | irq->level = 1; |
110 | irq->shorthand = 0; | 109 | irq->shorthand = 0; |
111 | /* TODO Deal with RH bit of MSI message address */ | ||
112 | } | 110 | } |
113 | 111 | ||
114 | int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | 112 | int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, |
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 544076c4f44b..e1e89ee4af75 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h | |||
@@ -99,4 +99,9 @@ static inline bool is_guest_mode(struct kvm_vcpu *vcpu) | |||
99 | return vcpu->arch.hflags & HF_GUEST_MASK; | 99 | return vcpu->arch.hflags & HF_GUEST_MASK; |
100 | } | 100 | } |
101 | 101 | ||
102 | static inline bool is_smm(struct kvm_vcpu *vcpu) | ||
103 | { | ||
104 | return vcpu->arch.hflags & HF_SMM_MASK; | ||
105 | } | ||
106 | |||
102 | #endif | 107 | #endif |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 4c7deb4f78a1..36e9de1b4127 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -240,6 +240,15 @@ static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) | |||
240 | recalculate_apic_map(apic->vcpu->kvm); | 240 | recalculate_apic_map(apic->vcpu->kvm); |
241 | } | 241 | } |
242 | 242 | ||
243 | static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u8 id) | ||
244 | { | ||
245 | u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); | ||
246 | |||
247 | apic_set_reg(apic, APIC_ID, id << 24); | ||
248 | apic_set_reg(apic, APIC_LDR, ldr); | ||
249 | recalculate_apic_map(apic->vcpu->kvm); | ||
250 | } | ||
251 | |||
243 | static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) | 252 | static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) |
244 | { | 253 | { |
245 | return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); | 254 | return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); |
@@ -728,7 +737,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, | |||
728 | 737 | ||
729 | dst = map->logical_map[cid]; | 738 | dst = map->logical_map[cid]; |
730 | 739 | ||
731 | if (irq->delivery_mode == APIC_DM_LOWEST) { | 740 | if (kvm_lowest_prio_delivery(irq)) { |
732 | int l = -1; | 741 | int l = -1; |
733 | for_each_set_bit(i, &bitmap, 16) { | 742 | for_each_set_bit(i, &bitmap, 16) { |
734 | if (!dst[i]) | 743 | if (!dst[i]) |
@@ -799,7 +808,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
799 | break; | 808 | break; |
800 | 809 | ||
801 | case APIC_DM_SMI: | 810 | case APIC_DM_SMI: |
802 | apic_debug("Ignoring guest SMI\n"); | 811 | result = 1; |
812 | kvm_make_request(KVM_REQ_SMI, vcpu); | ||
813 | kvm_vcpu_kick(vcpu); | ||
803 | break; | 814 | break; |
804 | 815 | ||
805 | case APIC_DM_NMI: | 816 | case APIC_DM_NMI: |
@@ -914,9 +925,10 @@ static void apic_send_ipi(struct kvm_lapic *apic) | |||
914 | irq.vector = icr_low & APIC_VECTOR_MASK; | 925 | irq.vector = icr_low & APIC_VECTOR_MASK; |
915 | irq.delivery_mode = icr_low & APIC_MODE_MASK; | 926 | irq.delivery_mode = icr_low & APIC_MODE_MASK; |
916 | irq.dest_mode = icr_low & APIC_DEST_MASK; | 927 | irq.dest_mode = icr_low & APIC_DEST_MASK; |
917 | irq.level = icr_low & APIC_INT_ASSERT; | 928 | irq.level = (icr_low & APIC_INT_ASSERT) != 0; |
918 | irq.trig_mode = icr_low & APIC_INT_LEVELTRIG; | 929 | irq.trig_mode = icr_low & APIC_INT_LEVELTRIG; |
919 | irq.shorthand = icr_low & APIC_SHORT_MASK; | 930 | irq.shorthand = icr_low & APIC_SHORT_MASK; |
931 | irq.msi_redir_hint = false; | ||
920 | if (apic_x2apic_mode(apic)) | 932 | if (apic_x2apic_mode(apic)) |
921 | irq.dest_id = icr_high; | 933 | irq.dest_id = icr_high; |
922 | else | 934 | else |
@@ -926,10 +938,11 @@ static void apic_send_ipi(struct kvm_lapic *apic) | |||
926 | 938 | ||
927 | apic_debug("icr_high 0x%x, icr_low 0x%x, " | 939 | apic_debug("icr_high 0x%x, icr_low 0x%x, " |
928 | "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " | 940 | "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " |
929 | "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n", | 941 | "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x, " |
942 | "msi_redir_hint 0x%x\n", | ||
930 | icr_high, icr_low, irq.shorthand, irq.dest_id, | 943 | icr_high, icr_low, irq.shorthand, irq.dest_id, |
931 | irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, | 944 | irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, |
932 | irq.vector); | 945 | irq.vector, irq.msi_redir_hint); |
933 | 946 | ||
934 | kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); | 947 | kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); |
935 | } | 948 | } |
@@ -1541,9 +1554,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) | |||
1541 | 1554 | ||
1542 | if ((old_value ^ value) & X2APIC_ENABLE) { | 1555 | if ((old_value ^ value) & X2APIC_ENABLE) { |
1543 | if (value & X2APIC_ENABLE) { | 1556 | if (value & X2APIC_ENABLE) { |
1544 | u32 id = kvm_apic_id(apic); | 1557 | kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id); |
1545 | u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); | ||
1546 | kvm_apic_set_ldr(apic, ldr); | ||
1547 | kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true); | 1558 | kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true); |
1548 | } else | 1559 | } else |
1549 | kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false); | 1560 | kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false); |
@@ -1562,7 +1573,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) | |||
1562 | 1573 | ||
1563 | } | 1574 | } |
1564 | 1575 | ||
1565 | void kvm_lapic_reset(struct kvm_vcpu *vcpu) | 1576 | void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) |
1566 | { | 1577 | { |
1567 | struct kvm_lapic *apic; | 1578 | struct kvm_lapic *apic; |
1568 | int i; | 1579 | int i; |
@@ -1576,19 +1587,22 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) | |||
1576 | /* Stop the timer in case it's a reset to an active apic */ | 1587 | /* Stop the timer in case it's a reset to an active apic */ |
1577 | hrtimer_cancel(&apic->lapic_timer.timer); | 1588 | hrtimer_cancel(&apic->lapic_timer.timer); |
1578 | 1589 | ||
1579 | kvm_apic_set_id(apic, vcpu->vcpu_id); | 1590 | if (!init_event) |
1591 | kvm_apic_set_id(apic, vcpu->vcpu_id); | ||
1580 | kvm_apic_set_version(apic->vcpu); | 1592 | kvm_apic_set_version(apic->vcpu); |
1581 | 1593 | ||
1582 | for (i = 0; i < APIC_LVT_NUM; i++) | 1594 | for (i = 0; i < APIC_LVT_NUM; i++) |
1583 | apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); | 1595 | apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); |
1584 | apic_update_lvtt(apic); | 1596 | apic_update_lvtt(apic); |
1585 | apic_set_reg(apic, APIC_LVT0, | 1597 | if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_LINT0_REENABLED)) |
1586 | SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); | 1598 | apic_set_reg(apic, APIC_LVT0, |
1599 | SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); | ||
1587 | 1600 | ||
1588 | apic_set_reg(apic, APIC_DFR, 0xffffffffU); | 1601 | apic_set_reg(apic, APIC_DFR, 0xffffffffU); |
1589 | apic_set_spiv(apic, 0xff); | 1602 | apic_set_spiv(apic, 0xff); |
1590 | apic_set_reg(apic, APIC_TASKPRI, 0); | 1603 | apic_set_reg(apic, APIC_TASKPRI, 0); |
1591 | kvm_apic_set_ldr(apic, 0); | 1604 | if (!apic_x2apic_mode(apic)) |
1605 | kvm_apic_set_ldr(apic, 0); | ||
1592 | apic_set_reg(apic, APIC_ESR, 0); | 1606 | apic_set_reg(apic, APIC_ESR, 0); |
1593 | apic_set_reg(apic, APIC_ICR, 0); | 1607 | apic_set_reg(apic, APIC_ICR, 0); |
1594 | apic_set_reg(apic, APIC_ICR2, 0); | 1608 | apic_set_reg(apic, APIC_ICR2, 0); |
@@ -1717,7 +1731,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) | |||
1717 | APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE); | 1731 | APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE); |
1718 | 1732 | ||
1719 | static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ | 1733 | static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ |
1720 | kvm_lapic_reset(vcpu); | 1734 | kvm_lapic_reset(vcpu, false); |
1721 | kvm_iodevice_init(&apic->dev, &apic_mmio_ops); | 1735 | kvm_iodevice_init(&apic->dev, &apic_mmio_ops); |
1722 | 1736 | ||
1723 | return 0; | 1737 | return 0; |
@@ -2049,11 +2063,22 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) | |||
2049 | if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events) | 2063 | if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events) |
2050 | return; | 2064 | return; |
2051 | 2065 | ||
2052 | pe = xchg(&apic->pending_events, 0); | 2066 | /* |
2067 | * INITs are latched while in SMM. Because an SMM CPU cannot | ||
2068 | * be in KVM_MP_STATE_INIT_RECEIVED state, just eat SIPIs | ||
2069 | * and delay processing of INIT until the next RSM. | ||
2070 | */ | ||
2071 | if (is_smm(vcpu)) { | ||
2072 | WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED); | ||
2073 | if (test_bit(KVM_APIC_SIPI, &apic->pending_events)) | ||
2074 | clear_bit(KVM_APIC_SIPI, &apic->pending_events); | ||
2075 | return; | ||
2076 | } | ||
2053 | 2077 | ||
2078 | pe = xchg(&apic->pending_events, 0); | ||
2054 | if (test_bit(KVM_APIC_INIT, &pe)) { | 2079 | if (test_bit(KVM_APIC_INIT, &pe)) { |
2055 | kvm_lapic_reset(vcpu); | 2080 | kvm_lapic_reset(vcpu, true); |
2056 | kvm_vcpu_reset(vcpu); | 2081 | kvm_vcpu_reset(vcpu, true); |
2057 | if (kvm_vcpu_is_bsp(apic->vcpu)) | 2082 | if (kvm_vcpu_is_bsp(apic->vcpu)) |
2058 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 2083 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
2059 | else | 2084 | else |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 9d28383fc1e7..f2f4e10ab772 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -48,7 +48,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); | |||
48 | int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); | 48 | int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); |
49 | int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); | 49 | int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); |
50 | void kvm_apic_accept_events(struct kvm_vcpu *vcpu); | 50 | void kvm_apic_accept_events(struct kvm_vcpu *vcpu); |
51 | void kvm_lapic_reset(struct kvm_vcpu *vcpu); | 51 | void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event); |
52 | u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); | 52 | u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); |
53 | void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); | 53 | void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); |
54 | void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu); | 54 | void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu); |
@@ -150,7 +150,18 @@ static inline bool kvm_apic_vid_enabled(struct kvm *kvm) | |||
150 | 150 | ||
151 | static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) | 151 | static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) |
152 | { | 152 | { |
153 | return vcpu->arch.apic->pending_events; | 153 | return kvm_vcpu_has_lapic(vcpu) && vcpu->arch.apic->pending_events; |
154 | } | ||
155 | |||
156 | static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq) | ||
157 | { | ||
158 | return (irq->delivery_mode == APIC_DM_LOWEST || | ||
159 | irq->msi_redir_hint); | ||
160 | } | ||
161 | |||
162 | static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) | ||
163 | { | ||
164 | return kvm_vcpu_has_lapic(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); | ||
154 | } | 165 | } |
155 | 166 | ||
156 | bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); | 167 | bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index b73337634214..f807496b62c2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -223,15 +223,15 @@ static unsigned int get_mmio_spte_generation(u64 spte) | |||
223 | return gen; | 223 | return gen; |
224 | } | 224 | } |
225 | 225 | ||
226 | static unsigned int kvm_current_mmio_generation(struct kvm *kvm) | 226 | static unsigned int kvm_current_mmio_generation(struct kvm_vcpu *vcpu) |
227 | { | 227 | { |
228 | return kvm_memslots(kvm)->generation & MMIO_GEN_MASK; | 228 | return kvm_vcpu_memslots(vcpu)->generation & MMIO_GEN_MASK; |
229 | } | 229 | } |
230 | 230 | ||
231 | static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn, | 231 | static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, |
232 | unsigned access) | 232 | unsigned access) |
233 | { | 233 | { |
234 | unsigned int gen = kvm_current_mmio_generation(kvm); | 234 | unsigned int gen = kvm_current_mmio_generation(vcpu); |
235 | u64 mask = generation_mmio_spte_mask(gen); | 235 | u64 mask = generation_mmio_spte_mask(gen); |
236 | 236 | ||
237 | access &= ACC_WRITE_MASK | ACC_USER_MASK; | 237 | access &= ACC_WRITE_MASK | ACC_USER_MASK; |
@@ -258,22 +258,22 @@ static unsigned get_mmio_spte_access(u64 spte) | |||
258 | return (spte & ~mask) & ~PAGE_MASK; | 258 | return (spte & ~mask) & ~PAGE_MASK; |
259 | } | 259 | } |
260 | 260 | ||
261 | static bool set_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn, | 261 | static bool set_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, |
262 | pfn_t pfn, unsigned access) | 262 | pfn_t pfn, unsigned access) |
263 | { | 263 | { |
264 | if (unlikely(is_noslot_pfn(pfn))) { | 264 | if (unlikely(is_noslot_pfn(pfn))) { |
265 | mark_mmio_spte(kvm, sptep, gfn, access); | 265 | mark_mmio_spte(vcpu, sptep, gfn, access); |
266 | return true; | 266 | return true; |
267 | } | 267 | } |
268 | 268 | ||
269 | return false; | 269 | return false; |
270 | } | 270 | } |
271 | 271 | ||
272 | static bool check_mmio_spte(struct kvm *kvm, u64 spte) | 272 | static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte) |
273 | { | 273 | { |
274 | unsigned int kvm_gen, spte_gen; | 274 | unsigned int kvm_gen, spte_gen; |
275 | 275 | ||
276 | kvm_gen = kvm_current_mmio_generation(kvm); | 276 | kvm_gen = kvm_current_mmio_generation(vcpu); |
277 | spte_gen = get_mmio_spte_generation(spte); | 277 | spte_gen = get_mmio_spte_generation(spte); |
278 | 278 | ||
279 | trace_check_mmio_spte(spte, kvm_gen, spte_gen); | 279 | trace_check_mmio_spte(spte, kvm_gen, spte_gen); |
@@ -804,30 +804,36 @@ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn, | |||
804 | return &slot->arch.lpage_info[level - 2][idx]; | 804 | return &slot->arch.lpage_info[level - 2][idx]; |
805 | } | 805 | } |
806 | 806 | ||
807 | static void account_shadowed(struct kvm *kvm, gfn_t gfn) | 807 | static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) |
808 | { | 808 | { |
809 | struct kvm_memslots *slots; | ||
809 | struct kvm_memory_slot *slot; | 810 | struct kvm_memory_slot *slot; |
810 | struct kvm_lpage_info *linfo; | 811 | struct kvm_lpage_info *linfo; |
812 | gfn_t gfn; | ||
811 | int i; | 813 | int i; |
812 | 814 | ||
813 | slot = gfn_to_memslot(kvm, gfn); | 815 | gfn = sp->gfn; |
814 | for (i = PT_DIRECTORY_LEVEL; | 816 | slots = kvm_memslots_for_spte_role(kvm, sp->role); |
815 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 817 | slot = __gfn_to_memslot(slots, gfn); |
818 | for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { | ||
816 | linfo = lpage_info_slot(gfn, slot, i); | 819 | linfo = lpage_info_slot(gfn, slot, i); |
817 | linfo->write_count += 1; | 820 | linfo->write_count += 1; |
818 | } | 821 | } |
819 | kvm->arch.indirect_shadow_pages++; | 822 | kvm->arch.indirect_shadow_pages++; |
820 | } | 823 | } |
821 | 824 | ||
822 | static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) | 825 | static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) |
823 | { | 826 | { |
827 | struct kvm_memslots *slots; | ||
824 | struct kvm_memory_slot *slot; | 828 | struct kvm_memory_slot *slot; |
825 | struct kvm_lpage_info *linfo; | 829 | struct kvm_lpage_info *linfo; |
830 | gfn_t gfn; | ||
826 | int i; | 831 | int i; |
827 | 832 | ||
828 | slot = gfn_to_memslot(kvm, gfn); | 833 | gfn = sp->gfn; |
829 | for (i = PT_DIRECTORY_LEVEL; | 834 | slots = kvm_memslots_for_spte_role(kvm, sp->role); |
830 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 835 | slot = __gfn_to_memslot(slots, gfn); |
836 | for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { | ||
831 | linfo = lpage_info_slot(gfn, slot, i); | 837 | linfo = lpage_info_slot(gfn, slot, i); |
832 | linfo->write_count -= 1; | 838 | linfo->write_count -= 1; |
833 | WARN_ON(linfo->write_count < 0); | 839 | WARN_ON(linfo->write_count < 0); |
@@ -835,14 +841,14 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) | |||
835 | kvm->arch.indirect_shadow_pages--; | 841 | kvm->arch.indirect_shadow_pages--; |
836 | } | 842 | } |
837 | 843 | ||
838 | static int has_wrprotected_page(struct kvm *kvm, | 844 | static int has_wrprotected_page(struct kvm_vcpu *vcpu, |
839 | gfn_t gfn, | 845 | gfn_t gfn, |
840 | int level) | 846 | int level) |
841 | { | 847 | { |
842 | struct kvm_memory_slot *slot; | 848 | struct kvm_memory_slot *slot; |
843 | struct kvm_lpage_info *linfo; | 849 | struct kvm_lpage_info *linfo; |
844 | 850 | ||
845 | slot = gfn_to_memslot(kvm, gfn); | 851 | slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); |
846 | if (slot) { | 852 | if (slot) { |
847 | linfo = lpage_info_slot(gfn, slot, level); | 853 | linfo = lpage_info_slot(gfn, slot, level); |
848 | return linfo->write_count; | 854 | return linfo->write_count; |
@@ -858,8 +864,7 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn) | |||
858 | 864 | ||
859 | page_size = kvm_host_page_size(kvm, gfn); | 865 | page_size = kvm_host_page_size(kvm, gfn); |
860 | 866 | ||
861 | for (i = PT_PAGE_TABLE_LEVEL; | 867 | for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { |
862 | i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) { | ||
863 | if (page_size >= KVM_HPAGE_SIZE(i)) | 868 | if (page_size >= KVM_HPAGE_SIZE(i)) |
864 | ret = i; | 869 | ret = i; |
865 | else | 870 | else |
@@ -875,7 +880,7 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
875 | { | 880 | { |
876 | struct kvm_memory_slot *slot; | 881 | struct kvm_memory_slot *slot; |
877 | 882 | ||
878 | slot = gfn_to_memslot(vcpu->kvm, gfn); | 883 | slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); |
879 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID || | 884 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID || |
880 | (no_dirty_log && slot->dirty_bitmap)) | 885 | (no_dirty_log && slot->dirty_bitmap)) |
881 | slot = NULL; | 886 | slot = NULL; |
@@ -900,7 +905,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | |||
900 | max_level = min(kvm_x86_ops->get_lpage_level(), host_level); | 905 | max_level = min(kvm_x86_ops->get_lpage_level(), host_level); |
901 | 906 | ||
902 | for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) | 907 | for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) |
903 | if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) | 908 | if (has_wrprotected_page(vcpu, large_gfn, level)) |
904 | break; | 909 | break; |
905 | 910 | ||
906 | return level - 1; | 911 | return level - 1; |
@@ -1042,12 +1047,14 @@ static unsigned long *__gfn_to_rmap(gfn_t gfn, int level, | |||
1042 | /* | 1047 | /* |
1043 | * Take gfn and return the reverse mapping to it. | 1048 | * Take gfn and return the reverse mapping to it. |
1044 | */ | 1049 | */ |
1045 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) | 1050 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, struct kvm_mmu_page *sp) |
1046 | { | 1051 | { |
1052 | struct kvm_memslots *slots; | ||
1047 | struct kvm_memory_slot *slot; | 1053 | struct kvm_memory_slot *slot; |
1048 | 1054 | ||
1049 | slot = gfn_to_memslot(kvm, gfn); | 1055 | slots = kvm_memslots_for_spte_role(kvm, sp->role); |
1050 | return __gfn_to_rmap(gfn, level, slot); | 1056 | slot = __gfn_to_memslot(slots, gfn); |
1057 | return __gfn_to_rmap(gfn, sp->role.level, slot); | ||
1051 | } | 1058 | } |
1052 | 1059 | ||
1053 | static bool rmap_can_add(struct kvm_vcpu *vcpu) | 1060 | static bool rmap_can_add(struct kvm_vcpu *vcpu) |
@@ -1065,7 +1072,7 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
1065 | 1072 | ||
1066 | sp = page_header(__pa(spte)); | 1073 | sp = page_header(__pa(spte)); |
1067 | kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn); | 1074 | kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn); |
1068 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); | 1075 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp); |
1069 | return pte_list_add(vcpu, spte, rmapp); | 1076 | return pte_list_add(vcpu, spte, rmapp); |
1070 | } | 1077 | } |
1071 | 1078 | ||
@@ -1077,7 +1084,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
1077 | 1084 | ||
1078 | sp = page_header(__pa(spte)); | 1085 | sp = page_header(__pa(spte)); |
1079 | gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt); | 1086 | gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt); |
1080 | rmapp = gfn_to_rmap(kvm, gfn, sp->role.level); | 1087 | rmapp = gfn_to_rmap(kvm, gfn, sp); |
1081 | pte_list_remove(spte, rmapp); | 1088 | pte_list_remove(spte, rmapp); |
1082 | } | 1089 | } |
1083 | 1090 | ||
@@ -1142,6 +1149,11 @@ static u64 *rmap_get_next(struct rmap_iterator *iter) | |||
1142 | return NULL; | 1149 | return NULL; |
1143 | } | 1150 | } |
1144 | 1151 | ||
1152 | #define for_each_rmap_spte(_rmap_, _iter_, _spte_) \ | ||
1153 | for (_spte_ = rmap_get_first(*_rmap_, _iter_); \ | ||
1154 | _spte_ && ({BUG_ON(!is_shadow_present_pte(*_spte_)); 1;}); \ | ||
1155 | _spte_ = rmap_get_next(_iter_)) | ||
1156 | |||
1145 | static void drop_spte(struct kvm *kvm, u64 *sptep) | 1157 | static void drop_spte(struct kvm *kvm, u64 *sptep) |
1146 | { | 1158 | { |
1147 | if (mmu_spte_clear_track_bits(sptep)) | 1159 | if (mmu_spte_clear_track_bits(sptep)) |
@@ -1205,12 +1217,8 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, | |||
1205 | struct rmap_iterator iter; | 1217 | struct rmap_iterator iter; |
1206 | bool flush = false; | 1218 | bool flush = false; |
1207 | 1219 | ||
1208 | for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { | 1220 | for_each_rmap_spte(rmapp, &iter, sptep) |
1209 | BUG_ON(!(*sptep & PT_PRESENT_MASK)); | ||
1210 | |||
1211 | flush |= spte_write_protect(kvm, sptep, pt_protect); | 1221 | flush |= spte_write_protect(kvm, sptep, pt_protect); |
1212 | sptep = rmap_get_next(&iter); | ||
1213 | } | ||
1214 | 1222 | ||
1215 | return flush; | 1223 | return flush; |
1216 | } | 1224 | } |
@@ -1232,12 +1240,8 @@ static bool __rmap_clear_dirty(struct kvm *kvm, unsigned long *rmapp) | |||
1232 | struct rmap_iterator iter; | 1240 | struct rmap_iterator iter; |
1233 | bool flush = false; | 1241 | bool flush = false; |
1234 | 1242 | ||
1235 | for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { | 1243 | for_each_rmap_spte(rmapp, &iter, sptep) |
1236 | BUG_ON(!(*sptep & PT_PRESENT_MASK)); | ||
1237 | |||
1238 | flush |= spte_clear_dirty(kvm, sptep); | 1244 | flush |= spte_clear_dirty(kvm, sptep); |
1239 | sptep = rmap_get_next(&iter); | ||
1240 | } | ||
1241 | 1245 | ||
1242 | return flush; | 1246 | return flush; |
1243 | } | 1247 | } |
@@ -1259,12 +1263,8 @@ static bool __rmap_set_dirty(struct kvm *kvm, unsigned long *rmapp) | |||
1259 | struct rmap_iterator iter; | 1263 | struct rmap_iterator iter; |
1260 | bool flush = false; | 1264 | bool flush = false; |
1261 | 1265 | ||
1262 | for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { | 1266 | for_each_rmap_spte(rmapp, &iter, sptep) |
1263 | BUG_ON(!(*sptep & PT_PRESENT_MASK)); | ||
1264 | |||
1265 | flush |= spte_set_dirty(kvm, sptep); | 1267 | flush |= spte_set_dirty(kvm, sptep); |
1266 | sptep = rmap_get_next(&iter); | ||
1267 | } | ||
1268 | 1268 | ||
1269 | return flush; | 1269 | return flush; |
1270 | } | 1270 | } |
@@ -1342,42 +1342,45 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, | |||
1342 | kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); | 1342 | kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); |
1343 | } | 1343 | } |
1344 | 1344 | ||
1345 | static bool rmap_write_protect(struct kvm *kvm, u64 gfn) | 1345 | static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) |
1346 | { | 1346 | { |
1347 | struct kvm_memory_slot *slot; | 1347 | struct kvm_memory_slot *slot; |
1348 | unsigned long *rmapp; | 1348 | unsigned long *rmapp; |
1349 | int i; | 1349 | int i; |
1350 | bool write_protected = false; | 1350 | bool write_protected = false; |
1351 | 1351 | ||
1352 | slot = gfn_to_memslot(kvm, gfn); | 1352 | slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); |
1353 | 1353 | ||
1354 | for (i = PT_PAGE_TABLE_LEVEL; | 1354 | for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { |
1355 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | ||
1356 | rmapp = __gfn_to_rmap(gfn, i, slot); | 1355 | rmapp = __gfn_to_rmap(gfn, i, slot); |
1357 | write_protected |= __rmap_write_protect(kvm, rmapp, true); | 1356 | write_protected |= __rmap_write_protect(vcpu->kvm, rmapp, true); |
1358 | } | 1357 | } |
1359 | 1358 | ||
1360 | return write_protected; | 1359 | return write_protected; |
1361 | } | 1360 | } |
1362 | 1361 | ||
1363 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1362 | static bool kvm_zap_rmapp(struct kvm *kvm, unsigned long *rmapp) |
1364 | struct kvm_memory_slot *slot, gfn_t gfn, int level, | ||
1365 | unsigned long data) | ||
1366 | { | 1363 | { |
1367 | u64 *sptep; | 1364 | u64 *sptep; |
1368 | struct rmap_iterator iter; | 1365 | struct rmap_iterator iter; |
1369 | int need_tlb_flush = 0; | 1366 | bool flush = false; |
1370 | 1367 | ||
1371 | while ((sptep = rmap_get_first(*rmapp, &iter))) { | 1368 | while ((sptep = rmap_get_first(*rmapp, &iter))) { |
1372 | BUG_ON(!(*sptep & PT_PRESENT_MASK)); | 1369 | BUG_ON(!(*sptep & PT_PRESENT_MASK)); |
1373 | rmap_printk("kvm_rmap_unmap_hva: spte %p %llx gfn %llx (%d)\n", | 1370 | rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep); |
1374 | sptep, *sptep, gfn, level); | ||
1375 | 1371 | ||
1376 | drop_spte(kvm, sptep); | 1372 | drop_spte(kvm, sptep); |
1377 | need_tlb_flush = 1; | 1373 | flush = true; |
1378 | } | 1374 | } |
1379 | 1375 | ||
1380 | return need_tlb_flush; | 1376 | return flush; |
1377 | } | ||
1378 | |||
1379 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | ||
1380 | struct kvm_memory_slot *slot, gfn_t gfn, int level, | ||
1381 | unsigned long data) | ||
1382 | { | ||
1383 | return kvm_zap_rmapp(kvm, rmapp); | ||
1381 | } | 1384 | } |
1382 | 1385 | ||
1383 | static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1386 | static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, |
@@ -1394,8 +1397,8 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1394 | WARN_ON(pte_huge(*ptep)); | 1397 | WARN_ON(pte_huge(*ptep)); |
1395 | new_pfn = pte_pfn(*ptep); | 1398 | new_pfn = pte_pfn(*ptep); |
1396 | 1399 | ||
1397 | for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { | 1400 | restart: |
1398 | BUG_ON(!is_shadow_present_pte(*sptep)); | 1401 | for_each_rmap_spte(rmapp, &iter, sptep) { |
1399 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n", | 1402 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n", |
1400 | sptep, *sptep, gfn, level); | 1403 | sptep, *sptep, gfn, level); |
1401 | 1404 | ||
@@ -1403,7 +1406,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1403 | 1406 | ||
1404 | if (pte_write(*ptep)) { | 1407 | if (pte_write(*ptep)) { |
1405 | drop_spte(kvm, sptep); | 1408 | drop_spte(kvm, sptep); |
1406 | sptep = rmap_get_first(*rmapp, &iter); | 1409 | goto restart; |
1407 | } else { | 1410 | } else { |
1408 | new_spte = *sptep & ~PT64_BASE_ADDR_MASK; | 1411 | new_spte = *sptep & ~PT64_BASE_ADDR_MASK; |
1409 | new_spte |= (u64)new_pfn << PAGE_SHIFT; | 1412 | new_spte |= (u64)new_pfn << PAGE_SHIFT; |
@@ -1414,7 +1417,6 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1414 | 1417 | ||
1415 | mmu_spte_clear_track_bits(sptep); | 1418 | mmu_spte_clear_track_bits(sptep); |
1416 | mmu_spte_set(sptep, new_spte); | 1419 | mmu_spte_set(sptep, new_spte); |
1417 | sptep = rmap_get_next(&iter); | ||
1418 | } | 1420 | } |
1419 | } | 1421 | } |
1420 | 1422 | ||
@@ -1424,6 +1426,74 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1424 | return 0; | 1426 | return 0; |
1425 | } | 1427 | } |
1426 | 1428 | ||
1429 | struct slot_rmap_walk_iterator { | ||
1430 | /* input fields. */ | ||
1431 | struct kvm_memory_slot *slot; | ||
1432 | gfn_t start_gfn; | ||
1433 | gfn_t end_gfn; | ||
1434 | int start_level; | ||
1435 | int end_level; | ||
1436 | |||
1437 | /* output fields. */ | ||
1438 | gfn_t gfn; | ||
1439 | unsigned long *rmap; | ||
1440 | int level; | ||
1441 | |||
1442 | /* private field. */ | ||
1443 | unsigned long *end_rmap; | ||
1444 | }; | ||
1445 | |||
1446 | static void | ||
1447 | rmap_walk_init_level(struct slot_rmap_walk_iterator *iterator, int level) | ||
1448 | { | ||
1449 | iterator->level = level; | ||
1450 | iterator->gfn = iterator->start_gfn; | ||
1451 | iterator->rmap = __gfn_to_rmap(iterator->gfn, level, iterator->slot); | ||
1452 | iterator->end_rmap = __gfn_to_rmap(iterator->end_gfn, level, | ||
1453 | iterator->slot); | ||
1454 | } | ||
1455 | |||
1456 | static void | ||
1457 | slot_rmap_walk_init(struct slot_rmap_walk_iterator *iterator, | ||
1458 | struct kvm_memory_slot *slot, int start_level, | ||
1459 | int end_level, gfn_t start_gfn, gfn_t end_gfn) | ||
1460 | { | ||
1461 | iterator->slot = slot; | ||
1462 | iterator->start_level = start_level; | ||
1463 | iterator->end_level = end_level; | ||
1464 | iterator->start_gfn = start_gfn; | ||
1465 | iterator->end_gfn = end_gfn; | ||
1466 | |||
1467 | rmap_walk_init_level(iterator, iterator->start_level); | ||
1468 | } | ||
1469 | |||
1470 | static bool slot_rmap_walk_okay(struct slot_rmap_walk_iterator *iterator) | ||
1471 | { | ||
1472 | return !!iterator->rmap; | ||
1473 | } | ||
1474 | |||
1475 | static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator) | ||
1476 | { | ||
1477 | if (++iterator->rmap <= iterator->end_rmap) { | ||
1478 | iterator->gfn += (1UL << KVM_HPAGE_GFN_SHIFT(iterator->level)); | ||
1479 | return; | ||
1480 | } | ||
1481 | |||
1482 | if (++iterator->level > iterator->end_level) { | ||
1483 | iterator->rmap = NULL; | ||
1484 | return; | ||
1485 | } | ||
1486 | |||
1487 | rmap_walk_init_level(iterator, iterator->level); | ||
1488 | } | ||
1489 | |||
1490 | #define for_each_slot_rmap_range(_slot_, _start_level_, _end_level_, \ | ||
1491 | _start_gfn, _end_gfn, _iter_) \ | ||
1492 | for (slot_rmap_walk_init(_iter_, _slot_, _start_level_, \ | ||
1493 | _end_level_, _start_gfn, _end_gfn); \ | ||
1494 | slot_rmap_walk_okay(_iter_); \ | ||
1495 | slot_rmap_walk_next(_iter_)) | ||
1496 | |||
1427 | static int kvm_handle_hva_range(struct kvm *kvm, | 1497 | static int kvm_handle_hva_range(struct kvm *kvm, |
1428 | unsigned long start, | 1498 | unsigned long start, |
1429 | unsigned long end, | 1499 | unsigned long end, |
@@ -1435,48 +1505,36 @@ static int kvm_handle_hva_range(struct kvm *kvm, | |||
1435 | int level, | 1505 | int level, |
1436 | unsigned long data)) | 1506 | unsigned long data)) |
1437 | { | 1507 | { |
1438 | int j; | ||
1439 | int ret = 0; | ||
1440 | struct kvm_memslots *slots; | 1508 | struct kvm_memslots *slots; |
1441 | struct kvm_memory_slot *memslot; | 1509 | struct kvm_memory_slot *memslot; |
1510 | struct slot_rmap_walk_iterator iterator; | ||
1511 | int ret = 0; | ||
1512 | int i; | ||
1442 | 1513 | ||
1443 | slots = kvm_memslots(kvm); | 1514 | for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { |
1444 | 1515 | slots = __kvm_memslots(kvm, i); | |
1445 | kvm_for_each_memslot(memslot, slots) { | 1516 | kvm_for_each_memslot(memslot, slots) { |
1446 | unsigned long hva_start, hva_end; | 1517 | unsigned long hva_start, hva_end; |
1447 | gfn_t gfn_start, gfn_end; | 1518 | gfn_t gfn_start, gfn_end; |
1448 | |||
1449 | hva_start = max(start, memslot->userspace_addr); | ||
1450 | hva_end = min(end, memslot->userspace_addr + | ||
1451 | (memslot->npages << PAGE_SHIFT)); | ||
1452 | if (hva_start >= hva_end) | ||
1453 | continue; | ||
1454 | /* | ||
1455 | * {gfn(page) | page intersects with [hva_start, hva_end)} = | ||
1456 | * {gfn_start, gfn_start+1, ..., gfn_end-1}. | ||
1457 | */ | ||
1458 | gfn_start = hva_to_gfn_memslot(hva_start, memslot); | ||
1459 | gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); | ||
1460 | |||
1461 | for (j = PT_PAGE_TABLE_LEVEL; | ||
1462 | j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) { | ||
1463 | unsigned long idx, idx_end; | ||
1464 | unsigned long *rmapp; | ||
1465 | gfn_t gfn = gfn_start; | ||
1466 | 1519 | ||
1520 | hva_start = max(start, memslot->userspace_addr); | ||
1521 | hva_end = min(end, memslot->userspace_addr + | ||
1522 | (memslot->npages << PAGE_SHIFT)); | ||
1523 | if (hva_start >= hva_end) | ||
1524 | continue; | ||
1467 | /* | 1525 | /* |
1468 | * {idx(page_j) | page_j intersects with | 1526 | * {gfn(page) | page intersects with [hva_start, hva_end)} = |
1469 | * [hva_start, hva_end)} = {idx, idx+1, ..., idx_end}. | 1527 | * {gfn_start, gfn_start+1, ..., gfn_end-1}. |
1470 | */ | 1528 | */ |
1471 | idx = gfn_to_index(gfn_start, memslot->base_gfn, j); | 1529 | gfn_start = hva_to_gfn_memslot(hva_start, memslot); |
1472 | idx_end = gfn_to_index(gfn_end - 1, memslot->base_gfn, j); | 1530 | gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); |
1473 | 1531 | ||
1474 | rmapp = __gfn_to_rmap(gfn_start, j, memslot); | 1532 | for_each_slot_rmap_range(memslot, PT_PAGE_TABLE_LEVEL, |
1475 | 1533 | PT_MAX_HUGEPAGE_LEVEL, | |
1476 | for (; idx <= idx_end; | 1534 | gfn_start, gfn_end - 1, |
1477 | ++idx, gfn += (1UL << KVM_HPAGE_GFN_SHIFT(j))) | 1535 | &iterator) |
1478 | ret |= handler(kvm, rmapp++, memslot, | 1536 | ret |= handler(kvm, iterator.rmap, memslot, |
1479 | gfn, j, data); | 1537 | iterator.gfn, iterator.level, data); |
1480 | } | 1538 | } |
1481 | } | 1539 | } |
1482 | 1540 | ||
@@ -1518,16 +1576,13 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1518 | 1576 | ||
1519 | BUG_ON(!shadow_accessed_mask); | 1577 | BUG_ON(!shadow_accessed_mask); |
1520 | 1578 | ||
1521 | for (sptep = rmap_get_first(*rmapp, &iter); sptep; | 1579 | for_each_rmap_spte(rmapp, &iter, sptep) |
1522 | sptep = rmap_get_next(&iter)) { | ||
1523 | BUG_ON(!is_shadow_present_pte(*sptep)); | ||
1524 | |||
1525 | if (*sptep & shadow_accessed_mask) { | 1580 | if (*sptep & shadow_accessed_mask) { |
1526 | young = 1; | 1581 | young = 1; |
1527 | clear_bit((ffs(shadow_accessed_mask) - 1), | 1582 | clear_bit((ffs(shadow_accessed_mask) - 1), |
1528 | (unsigned long *)sptep); | 1583 | (unsigned long *)sptep); |
1529 | } | 1584 | } |
1530 | } | 1585 | |
1531 | trace_kvm_age_page(gfn, level, slot, young); | 1586 | trace_kvm_age_page(gfn, level, slot, young); |
1532 | return young; | 1587 | return young; |
1533 | } | 1588 | } |
@@ -1548,15 +1603,11 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1548 | if (!shadow_accessed_mask) | 1603 | if (!shadow_accessed_mask) |
1549 | goto out; | 1604 | goto out; |
1550 | 1605 | ||
1551 | for (sptep = rmap_get_first(*rmapp, &iter); sptep; | 1606 | for_each_rmap_spte(rmapp, &iter, sptep) |
1552 | sptep = rmap_get_next(&iter)) { | ||
1553 | BUG_ON(!is_shadow_present_pte(*sptep)); | ||
1554 | |||
1555 | if (*sptep & shadow_accessed_mask) { | 1607 | if (*sptep & shadow_accessed_mask) { |
1556 | young = 1; | 1608 | young = 1; |
1557 | break; | 1609 | break; |
1558 | } | 1610 | } |
1559 | } | ||
1560 | out: | 1611 | out: |
1561 | return young; | 1612 | return young; |
1562 | } | 1613 | } |
@@ -1570,7 +1621,7 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
1570 | 1621 | ||
1571 | sp = page_header(__pa(spte)); | 1622 | sp = page_header(__pa(spte)); |
1572 | 1623 | ||
1573 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); | 1624 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp); |
1574 | 1625 | ||
1575 | kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, gfn, sp->role.level, 0); | 1626 | kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, gfn, sp->role.level, 0); |
1576 | kvm_flush_remote_tlbs(vcpu->kvm); | 1627 | kvm_flush_remote_tlbs(vcpu->kvm); |
@@ -1990,7 +2041,7 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, | |||
1990 | bool protected = false; | 2041 | bool protected = false; |
1991 | 2042 | ||
1992 | for_each_sp(pages, sp, parents, i) | 2043 | for_each_sp(pages, sp, parents, i) |
1993 | protected |= rmap_write_protect(vcpu->kvm, sp->gfn); | 2044 | protected |= rmap_write_protect(vcpu, sp->gfn); |
1994 | 2045 | ||
1995 | if (protected) | 2046 | if (protected) |
1996 | kvm_flush_remote_tlbs(vcpu->kvm); | 2047 | kvm_flush_remote_tlbs(vcpu->kvm); |
@@ -2088,12 +2139,12 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
2088 | hlist_add_head(&sp->hash_link, | 2139 | hlist_add_head(&sp->hash_link, |
2089 | &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]); | 2140 | &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]); |
2090 | if (!direct) { | 2141 | if (!direct) { |
2091 | if (rmap_write_protect(vcpu->kvm, gfn)) | 2142 | if (rmap_write_protect(vcpu, gfn)) |
2092 | kvm_flush_remote_tlbs(vcpu->kvm); | 2143 | kvm_flush_remote_tlbs(vcpu->kvm); |
2093 | if (level > PT_PAGE_TABLE_LEVEL && need_sync) | 2144 | if (level > PT_PAGE_TABLE_LEVEL && need_sync) |
2094 | kvm_sync_pages(vcpu, gfn); | 2145 | kvm_sync_pages(vcpu, gfn); |
2095 | 2146 | ||
2096 | account_shadowed(vcpu->kvm, gfn); | 2147 | account_shadowed(vcpu->kvm, sp); |
2097 | } | 2148 | } |
2098 | sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; | 2149 | sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; |
2099 | init_shadow_page_table(sp); | 2150 | init_shadow_page_table(sp); |
@@ -2274,7 +2325,7 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
2274 | kvm_mmu_unlink_parents(kvm, sp); | 2325 | kvm_mmu_unlink_parents(kvm, sp); |
2275 | 2326 | ||
2276 | if (!sp->role.invalid && !sp->role.direct) | 2327 | if (!sp->role.invalid && !sp->role.direct) |
2277 | unaccount_shadowed(kvm, sp->gfn); | 2328 | unaccount_shadowed(kvm, sp); |
2278 | 2329 | ||
2279 | if (sp->unsync) | 2330 | if (sp->unsync) |
2280 | kvm_unlink_unsync_page(kvm, sp); | 2331 | kvm_unlink_unsync_page(kvm, sp); |
@@ -2386,111 +2437,6 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | |||
2386 | } | 2437 | } |
2387 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page); | 2438 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page); |
2388 | 2439 | ||
2389 | /* | ||
2390 | * The function is based on mtrr_type_lookup() in | ||
2391 | * arch/x86/kernel/cpu/mtrr/generic.c | ||
2392 | */ | ||
2393 | static int get_mtrr_type(struct mtrr_state_type *mtrr_state, | ||
2394 | u64 start, u64 end) | ||
2395 | { | ||
2396 | int i; | ||
2397 | u64 base, mask; | ||
2398 | u8 prev_match, curr_match; | ||
2399 | int num_var_ranges = KVM_NR_VAR_MTRR; | ||
2400 | |||
2401 | if (!mtrr_state->enabled) | ||
2402 | return 0xFF; | ||
2403 | |||
2404 | /* Make end inclusive end, instead of exclusive */ | ||
2405 | end--; | ||
2406 | |||
2407 | /* Look in fixed ranges. Just return the type as per start */ | ||
2408 | if (mtrr_state->have_fixed && (start < 0x100000)) { | ||
2409 | int idx; | ||
2410 | |||
2411 | if (start < 0x80000) { | ||
2412 | idx = 0; | ||
2413 | idx += (start >> 16); | ||
2414 | return mtrr_state->fixed_ranges[idx]; | ||
2415 | } else if (start < 0xC0000) { | ||
2416 | idx = 1 * 8; | ||
2417 | idx += ((start - 0x80000) >> 14); | ||
2418 | return mtrr_state->fixed_ranges[idx]; | ||
2419 | } else if (start < 0x1000000) { | ||
2420 | idx = 3 * 8; | ||
2421 | idx += ((start - 0xC0000) >> 12); | ||
2422 | return mtrr_state->fixed_ranges[idx]; | ||
2423 | } | ||
2424 | } | ||
2425 | |||
2426 | /* | ||
2427 | * Look in variable ranges | ||
2428 | * Look of multiple ranges matching this address and pick type | ||
2429 | * as per MTRR precedence | ||
2430 | */ | ||
2431 | if (!(mtrr_state->enabled & 2)) | ||
2432 | return mtrr_state->def_type; | ||
2433 | |||
2434 | prev_match = 0xFF; | ||
2435 | for (i = 0; i < num_var_ranges; ++i) { | ||
2436 | unsigned short start_state, end_state; | ||
2437 | |||
2438 | if (!(mtrr_state->var_ranges[i].mask_lo & (1 << 11))) | ||
2439 | continue; | ||
2440 | |||
2441 | base = (((u64)mtrr_state->var_ranges[i].base_hi) << 32) + | ||
2442 | (mtrr_state->var_ranges[i].base_lo & PAGE_MASK); | ||
2443 | mask = (((u64)mtrr_state->var_ranges[i].mask_hi) << 32) + | ||
2444 | (mtrr_state->var_ranges[i].mask_lo & PAGE_MASK); | ||
2445 | |||
2446 | start_state = ((start & mask) == (base & mask)); | ||
2447 | end_state = ((end & mask) == (base & mask)); | ||
2448 | if (start_state != end_state) | ||
2449 | return 0xFE; | ||
2450 | |||
2451 | if ((start & mask) != (base & mask)) | ||
2452 | continue; | ||
2453 | |||
2454 | curr_match = mtrr_state->var_ranges[i].base_lo & 0xff; | ||
2455 | if (prev_match == 0xFF) { | ||
2456 | prev_match = curr_match; | ||
2457 | continue; | ||
2458 | } | ||
2459 | |||
2460 | if (prev_match == MTRR_TYPE_UNCACHABLE || | ||
2461 | curr_match == MTRR_TYPE_UNCACHABLE) | ||
2462 | return MTRR_TYPE_UNCACHABLE; | ||
2463 | |||
2464 | if ((prev_match == MTRR_TYPE_WRBACK && | ||
2465 | curr_match == MTRR_TYPE_WRTHROUGH) || | ||
2466 | (prev_match == MTRR_TYPE_WRTHROUGH && | ||
2467 | curr_match == MTRR_TYPE_WRBACK)) { | ||
2468 | prev_match = MTRR_TYPE_WRTHROUGH; | ||
2469 | curr_match = MTRR_TYPE_WRTHROUGH; | ||
2470 | } | ||
2471 | |||
2472 | if (prev_match != curr_match) | ||
2473 | return MTRR_TYPE_UNCACHABLE; | ||
2474 | } | ||
2475 | |||
2476 | if (prev_match != 0xFF) | ||
2477 | return prev_match; | ||
2478 | |||
2479 | return mtrr_state->def_type; | ||
2480 | } | ||
2481 | |||
2482 | u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
2483 | { | ||
2484 | u8 mtrr; | ||
2485 | |||
2486 | mtrr = get_mtrr_type(&vcpu->arch.mtrr_state, gfn << PAGE_SHIFT, | ||
2487 | (gfn << PAGE_SHIFT) + PAGE_SIZE); | ||
2488 | if (mtrr == 0xfe || mtrr == 0xff) | ||
2489 | mtrr = MTRR_TYPE_WRBACK; | ||
2490 | return mtrr; | ||
2491 | } | ||
2492 | EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type); | ||
2493 | |||
2494 | static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 2440 | static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
2495 | { | 2441 | { |
2496 | trace_kvm_mmu_unsync_page(sp); | 2442 | trace_kvm_mmu_unsync_page(sp); |
@@ -2541,7 +2487,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2541 | u64 spte; | 2487 | u64 spte; |
2542 | int ret = 0; | 2488 | int ret = 0; |
2543 | 2489 | ||
2544 | if (set_mmio_spte(vcpu->kvm, sptep, gfn, pfn, pte_access)) | 2490 | if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access)) |
2545 | return 0; | 2491 | return 0; |
2546 | 2492 | ||
2547 | spte = PT_PRESENT_MASK; | 2493 | spte = PT_PRESENT_MASK; |
@@ -2578,7 +2524,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2578 | * be fixed if guest refault. | 2524 | * be fixed if guest refault. |
2579 | */ | 2525 | */ |
2580 | if (level > PT_PAGE_TABLE_LEVEL && | 2526 | if (level > PT_PAGE_TABLE_LEVEL && |
2581 | has_wrprotected_page(vcpu->kvm, gfn, level)) | 2527 | has_wrprotected_page(vcpu, gfn, level)) |
2582 | goto done; | 2528 | goto done; |
2583 | 2529 | ||
2584 | spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; | 2530 | spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; |
@@ -2602,7 +2548,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2602 | } | 2548 | } |
2603 | 2549 | ||
2604 | if (pte_access & ACC_WRITE_MASK) { | 2550 | if (pte_access & ACC_WRITE_MASK) { |
2605 | mark_page_dirty(vcpu->kvm, gfn); | 2551 | kvm_vcpu_mark_page_dirty(vcpu, gfn); |
2606 | spte |= shadow_dirty_mask; | 2552 | spte |= shadow_dirty_mask; |
2607 | } | 2553 | } |
2608 | 2554 | ||
@@ -2692,15 +2638,17 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, | |||
2692 | u64 *start, u64 *end) | 2638 | u64 *start, u64 *end) |
2693 | { | 2639 | { |
2694 | struct page *pages[PTE_PREFETCH_NUM]; | 2640 | struct page *pages[PTE_PREFETCH_NUM]; |
2641 | struct kvm_memory_slot *slot; | ||
2695 | unsigned access = sp->role.access; | 2642 | unsigned access = sp->role.access; |
2696 | int i, ret; | 2643 | int i, ret; |
2697 | gfn_t gfn; | 2644 | gfn_t gfn; |
2698 | 2645 | ||
2699 | gfn = kvm_mmu_page_get_gfn(sp, start - sp->spt); | 2646 | gfn = kvm_mmu_page_get_gfn(sp, start - sp->spt); |
2700 | if (!gfn_to_memslot_dirty_bitmap(vcpu, gfn, access & ACC_WRITE_MASK)) | 2647 | slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, access & ACC_WRITE_MASK); |
2648 | if (!slot) | ||
2701 | return -1; | 2649 | return -1; |
2702 | 2650 | ||
2703 | ret = gfn_to_page_many_atomic(vcpu->kvm, gfn, pages, end - start); | 2651 | ret = gfn_to_page_many_atomic(slot, gfn, pages, end - start); |
2704 | if (ret <= 0) | 2652 | if (ret <= 0) |
2705 | return -1; | 2653 | return -1; |
2706 | 2654 | ||
@@ -2818,7 +2766,7 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, pfn_t pfn) | |||
2818 | return 1; | 2766 | return 1; |
2819 | 2767 | ||
2820 | if (pfn == KVM_PFN_ERR_HWPOISON) { | 2768 | if (pfn == KVM_PFN_ERR_HWPOISON) { |
2821 | kvm_send_hwpoison_signal(gfn_to_hva(vcpu->kvm, gfn), current); | 2769 | kvm_send_hwpoison_signal(kvm_vcpu_gfn_to_hva(vcpu, gfn), current); |
2822 | return 0; | 2770 | return 0; |
2823 | } | 2771 | } |
2824 | 2772 | ||
@@ -2841,7 +2789,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, | |||
2841 | if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) && | 2789 | if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) && |
2842 | level == PT_PAGE_TABLE_LEVEL && | 2790 | level == PT_PAGE_TABLE_LEVEL && |
2843 | PageTransCompound(pfn_to_page(pfn)) && | 2791 | PageTransCompound(pfn_to_page(pfn)) && |
2844 | !has_wrprotected_page(vcpu->kvm, gfn, PT_DIRECTORY_LEVEL)) { | 2792 | !has_wrprotected_page(vcpu, gfn, PT_DIRECTORY_LEVEL)) { |
2845 | unsigned long mask; | 2793 | unsigned long mask; |
2846 | /* | 2794 | /* |
2847 | * mmu_notifier_retry was successful and we hold the | 2795 | * mmu_notifier_retry was successful and we hold the |
@@ -2933,7 +2881,7 @@ fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
2933 | * Compare with set_spte where instead shadow_dirty_mask is set. | 2881 | * Compare with set_spte where instead shadow_dirty_mask is set. |
2934 | */ | 2882 | */ |
2935 | if (cmpxchg64(sptep, spte, spte | PT_WRITABLE_MASK) == spte) | 2883 | if (cmpxchg64(sptep, spte, spte | PT_WRITABLE_MASK) == spte) |
2936 | mark_page_dirty(vcpu->kvm, gfn); | 2884 | kvm_vcpu_mark_page_dirty(vcpu, gfn); |
2937 | 2885 | ||
2938 | return true; | 2886 | return true; |
2939 | } | 2887 | } |
@@ -3388,7 +3336,7 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) | |||
3388 | gfn_t gfn = get_mmio_spte_gfn(spte); | 3336 | gfn_t gfn = get_mmio_spte_gfn(spte); |
3389 | unsigned access = get_mmio_spte_access(spte); | 3337 | unsigned access = get_mmio_spte_access(spte); |
3390 | 3338 | ||
3391 | if (!check_mmio_spte(vcpu->kvm, spte)) | 3339 | if (!check_mmio_spte(vcpu, spte)) |
3392 | return RET_MMIO_PF_INVALID; | 3340 | return RET_MMIO_PF_INVALID; |
3393 | 3341 | ||
3394 | if (direct) | 3342 | if (direct) |
@@ -3460,7 +3408,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) | |||
3460 | arch.direct_map = vcpu->arch.mmu.direct_map; | 3408 | arch.direct_map = vcpu->arch.mmu.direct_map; |
3461 | arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu); | 3409 | arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu); |
3462 | 3410 | ||
3463 | return kvm_setup_async_pf(vcpu, gva, gfn_to_hva(vcpu->kvm, gfn), &arch); | 3411 | return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); |
3464 | } | 3412 | } |
3465 | 3413 | ||
3466 | static bool can_do_async_pf(struct kvm_vcpu *vcpu) | 3414 | static bool can_do_async_pf(struct kvm_vcpu *vcpu) |
@@ -3475,10 +3423,12 @@ static bool can_do_async_pf(struct kvm_vcpu *vcpu) | |||
3475 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, | 3423 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, |
3476 | gva_t gva, pfn_t *pfn, bool write, bool *writable) | 3424 | gva_t gva, pfn_t *pfn, bool write, bool *writable) |
3477 | { | 3425 | { |
3426 | struct kvm_memory_slot *slot; | ||
3478 | bool async; | 3427 | bool async; |
3479 | 3428 | ||
3480 | *pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async, write, writable); | 3429 | slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); |
3481 | 3430 | async = false; | |
3431 | *pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async, write, writable); | ||
3482 | if (!async) | 3432 | if (!async) |
3483 | return false; /* *pfn has correct page already */ | 3433 | return false; /* *pfn has correct page already */ |
3484 | 3434 | ||
@@ -3492,11 +3442,20 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, | |||
3492 | return true; | 3442 | return true; |
3493 | } | 3443 | } |
3494 | 3444 | ||
3495 | *pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write, writable); | 3445 | *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, write, writable); |
3496 | |||
3497 | return false; | 3446 | return false; |
3498 | } | 3447 | } |
3499 | 3448 | ||
3449 | static bool | ||
3450 | check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level) | ||
3451 | { | ||
3452 | int page_num = KVM_PAGES_PER_HPAGE(level); | ||
3453 | |||
3454 | gfn &= ~(page_num - 1); | ||
3455 | |||
3456 | return kvm_mtrr_check_gfn_range_consistency(vcpu, gfn, page_num); | ||
3457 | } | ||
3458 | |||
3500 | static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | 3459 | static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, |
3501 | bool prefault) | 3460 | bool prefault) |
3502 | { | 3461 | { |
@@ -3522,9 +3481,17 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
3522 | if (r) | 3481 | if (r) |
3523 | return r; | 3482 | return r; |
3524 | 3483 | ||
3525 | force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn); | 3484 | if (mapping_level_dirty_bitmap(vcpu, gfn) || |
3485 | !check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL)) | ||
3486 | force_pt_level = 1; | ||
3487 | else | ||
3488 | force_pt_level = 0; | ||
3489 | |||
3526 | if (likely(!force_pt_level)) { | 3490 | if (likely(!force_pt_level)) { |
3527 | level = mapping_level(vcpu, gfn); | 3491 | level = mapping_level(vcpu, gfn); |
3492 | if (level > PT_DIRECTORY_LEVEL && | ||
3493 | !check_hugepage_cache_consistency(vcpu, gfn, level)) | ||
3494 | level = PT_DIRECTORY_LEVEL; | ||
3528 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); | 3495 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); |
3529 | } else | 3496 | } else |
3530 | level = PT_PAGE_TABLE_LEVEL; | 3497 | level = PT_PAGE_TABLE_LEVEL; |
@@ -3590,7 +3557,7 @@ static void inject_page_fault(struct kvm_vcpu *vcpu, | |||
3590 | vcpu->arch.mmu.inject_page_fault(vcpu, fault); | 3557 | vcpu->arch.mmu.inject_page_fault(vcpu, fault); |
3591 | } | 3558 | } |
3592 | 3559 | ||
3593 | static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn, | 3560 | static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, |
3594 | unsigned access, int *nr_present) | 3561 | unsigned access, int *nr_present) |
3595 | { | 3562 | { |
3596 | if (unlikely(is_mmio_spte(*sptep))) { | 3563 | if (unlikely(is_mmio_spte(*sptep))) { |
@@ -3600,7 +3567,7 @@ static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn, | |||
3600 | } | 3567 | } |
3601 | 3568 | ||
3602 | (*nr_present)++; | 3569 | (*nr_present)++; |
3603 | mark_mmio_spte(kvm, sptep, gfn, access); | 3570 | mark_mmio_spte(vcpu, sptep, gfn, access); |
3604 | return true; | 3571 | return true; |
3605 | } | 3572 | } |
3606 | 3573 | ||
@@ -3878,6 +3845,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
3878 | struct kvm_mmu *context = &vcpu->arch.mmu; | 3845 | struct kvm_mmu *context = &vcpu->arch.mmu; |
3879 | 3846 | ||
3880 | context->base_role.word = 0; | 3847 | context->base_role.word = 0; |
3848 | context->base_role.smm = is_smm(vcpu); | ||
3881 | context->page_fault = tdp_page_fault; | 3849 | context->page_fault = tdp_page_fault; |
3882 | context->sync_page = nonpaging_sync_page; | 3850 | context->sync_page = nonpaging_sync_page; |
3883 | context->invlpg = nonpaging_invlpg; | 3851 | context->invlpg = nonpaging_invlpg; |
@@ -3939,6 +3907,7 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) | |||
3939 | = smep && !is_write_protection(vcpu); | 3907 | = smep && !is_write_protection(vcpu); |
3940 | context->base_role.smap_andnot_wp | 3908 | context->base_role.smap_andnot_wp |
3941 | = smap && !is_write_protection(vcpu); | 3909 | = smap && !is_write_protection(vcpu); |
3910 | context->base_role.smm = is_smm(vcpu); | ||
3942 | } | 3911 | } |
3943 | EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); | 3912 | EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); |
3944 | 3913 | ||
@@ -4110,7 +4079,7 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, | |||
4110 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | 4079 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ |
4111 | *gpa &= ~(gpa_t)7; | 4080 | *gpa &= ~(gpa_t)7; |
4112 | *bytes = 8; | 4081 | *bytes = 8; |
4113 | r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, 8); | 4082 | r = kvm_vcpu_read_guest(vcpu, *gpa, &gentry, 8); |
4114 | if (r) | 4083 | if (r) |
4115 | gentry = 0; | 4084 | gentry = 0; |
4116 | new = (const u8 *)&gentry; | 4085 | new = (const u8 *)&gentry; |
@@ -4222,6 +4191,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
4222 | mask.nxe = 1; | 4191 | mask.nxe = 1; |
4223 | mask.smep_andnot_wp = 1; | 4192 | mask.smep_andnot_wp = 1; |
4224 | mask.smap_andnot_wp = 1; | 4193 | mask.smap_andnot_wp = 1; |
4194 | mask.smm = 1; | ||
4225 | 4195 | ||
4226 | /* | 4196 | /* |
4227 | * If we don't have indirect shadow pages, it means no page is | 4197 | * If we don't have indirect shadow pages, it means no page is |
@@ -4420,36 +4390,115 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu) | |||
4420 | init_kvm_mmu(vcpu); | 4390 | init_kvm_mmu(vcpu); |
4421 | } | 4391 | } |
4422 | 4392 | ||
4423 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, | 4393 | /* The return value indicates if tlb flush on all vcpus is needed. */ |
4424 | struct kvm_memory_slot *memslot) | 4394 | typedef bool (*slot_level_handler) (struct kvm *kvm, unsigned long *rmap); |
4395 | |||
4396 | /* The caller should hold mmu-lock before calling this function. */ | ||
4397 | static bool | ||
4398 | slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
4399 | slot_level_handler fn, int start_level, int end_level, | ||
4400 | gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb) | ||
4425 | { | 4401 | { |
4426 | gfn_t last_gfn; | 4402 | struct slot_rmap_walk_iterator iterator; |
4427 | int i; | ||
4428 | bool flush = false; | 4403 | bool flush = false; |
4429 | 4404 | ||
4430 | last_gfn = memslot->base_gfn + memslot->npages - 1; | 4405 | for_each_slot_rmap_range(memslot, start_level, end_level, start_gfn, |
4406 | end_gfn, &iterator) { | ||
4407 | if (iterator.rmap) | ||
4408 | flush |= fn(kvm, iterator.rmap); | ||
4431 | 4409 | ||
4432 | spin_lock(&kvm->mmu_lock); | 4410 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { |
4411 | if (flush && lock_flush_tlb) { | ||
4412 | kvm_flush_remote_tlbs(kvm); | ||
4413 | flush = false; | ||
4414 | } | ||
4415 | cond_resched_lock(&kvm->mmu_lock); | ||
4416 | } | ||
4417 | } | ||
4433 | 4418 | ||
4434 | for (i = PT_PAGE_TABLE_LEVEL; | 4419 | if (flush && lock_flush_tlb) { |
4435 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 4420 | kvm_flush_remote_tlbs(kvm); |
4436 | unsigned long *rmapp; | 4421 | flush = false; |
4437 | unsigned long last_index, index; | 4422 | } |
4438 | 4423 | ||
4439 | rmapp = memslot->arch.rmap[i - PT_PAGE_TABLE_LEVEL]; | 4424 | return flush; |
4440 | last_index = gfn_to_index(last_gfn, memslot->base_gfn, i); | 4425 | } |
4441 | 4426 | ||
4442 | for (index = 0; index <= last_index; ++index, ++rmapp) { | 4427 | static bool |
4443 | if (*rmapp) | 4428 | slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot, |
4444 | flush |= __rmap_write_protect(kvm, rmapp, | 4429 | slot_level_handler fn, int start_level, int end_level, |
4445 | false); | 4430 | bool lock_flush_tlb) |
4431 | { | ||
4432 | return slot_handle_level_range(kvm, memslot, fn, start_level, | ||
4433 | end_level, memslot->base_gfn, | ||
4434 | memslot->base_gfn + memslot->npages - 1, | ||
4435 | lock_flush_tlb); | ||
4436 | } | ||
4437 | |||
4438 | static bool | ||
4439 | slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
4440 | slot_level_handler fn, bool lock_flush_tlb) | ||
4441 | { | ||
4442 | return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL, | ||
4443 | PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); | ||
4444 | } | ||
4445 | |||
4446 | static bool | ||
4447 | slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
4448 | slot_level_handler fn, bool lock_flush_tlb) | ||
4449 | { | ||
4450 | return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL + 1, | ||
4451 | PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); | ||
4452 | } | ||
4453 | |||
4454 | static bool | ||
4455 | slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
4456 | slot_level_handler fn, bool lock_flush_tlb) | ||
4457 | { | ||
4458 | return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL, | ||
4459 | PT_PAGE_TABLE_LEVEL, lock_flush_tlb); | ||
4460 | } | ||
4446 | 4461 | ||
4447 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) | 4462 | void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) |
4448 | cond_resched_lock(&kvm->mmu_lock); | 4463 | { |
4464 | struct kvm_memslots *slots; | ||
4465 | struct kvm_memory_slot *memslot; | ||
4466 | int i; | ||
4467 | |||
4468 | spin_lock(&kvm->mmu_lock); | ||
4469 | for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { | ||
4470 | slots = __kvm_memslots(kvm, i); | ||
4471 | kvm_for_each_memslot(memslot, slots) { | ||
4472 | gfn_t start, end; | ||
4473 | |||
4474 | start = max(gfn_start, memslot->base_gfn); | ||
4475 | end = min(gfn_end, memslot->base_gfn + memslot->npages); | ||
4476 | if (start >= end) | ||
4477 | continue; | ||
4478 | |||
4479 | slot_handle_level_range(kvm, memslot, kvm_zap_rmapp, | ||
4480 | PT_PAGE_TABLE_LEVEL, PT_MAX_HUGEPAGE_LEVEL, | ||
4481 | start, end - 1, true); | ||
4449 | } | 4482 | } |
4450 | } | 4483 | } |
4451 | 4484 | ||
4452 | spin_unlock(&kvm->mmu_lock); | 4485 | spin_unlock(&kvm->mmu_lock); |
4486 | } | ||
4487 | |||
4488 | static bool slot_rmap_write_protect(struct kvm *kvm, unsigned long *rmapp) | ||
4489 | { | ||
4490 | return __rmap_write_protect(kvm, rmapp, false); | ||
4491 | } | ||
4492 | |||
4493 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, | ||
4494 | struct kvm_memory_slot *memslot) | ||
4495 | { | ||
4496 | bool flush; | ||
4497 | |||
4498 | spin_lock(&kvm->mmu_lock); | ||
4499 | flush = slot_handle_all_level(kvm, memslot, slot_rmap_write_protect, | ||
4500 | false); | ||
4501 | spin_unlock(&kvm->mmu_lock); | ||
4453 | 4502 | ||
4454 | /* | 4503 | /* |
4455 | * kvm_mmu_slot_remove_write_access() and kvm_vm_ioctl_get_dirty_log() | 4504 | * kvm_mmu_slot_remove_write_access() and kvm_vm_ioctl_get_dirty_log() |
@@ -4482,9 +4531,8 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm, | |||
4482 | pfn_t pfn; | 4531 | pfn_t pfn; |
4483 | struct kvm_mmu_page *sp; | 4532 | struct kvm_mmu_page *sp; |
4484 | 4533 | ||
4485 | for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { | 4534 | restart: |
4486 | BUG_ON(!(*sptep & PT_PRESENT_MASK)); | 4535 | for_each_rmap_spte(rmapp, &iter, sptep) { |
4487 | |||
4488 | sp = page_header(__pa(sptep)); | 4536 | sp = page_header(__pa(sptep)); |
4489 | pfn = spte_to_pfn(*sptep); | 4537 | pfn = spte_to_pfn(*sptep); |
4490 | 4538 | ||
@@ -4499,71 +4547,31 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm, | |||
4499 | !kvm_is_reserved_pfn(pfn) && | 4547 | !kvm_is_reserved_pfn(pfn) && |
4500 | PageTransCompound(pfn_to_page(pfn))) { | 4548 | PageTransCompound(pfn_to_page(pfn))) { |
4501 | drop_spte(kvm, sptep); | 4549 | drop_spte(kvm, sptep); |
4502 | sptep = rmap_get_first(*rmapp, &iter); | ||
4503 | need_tlb_flush = 1; | 4550 | need_tlb_flush = 1; |
4504 | } else | 4551 | goto restart; |
4505 | sptep = rmap_get_next(&iter); | 4552 | } |
4506 | } | 4553 | } |
4507 | 4554 | ||
4508 | return need_tlb_flush; | 4555 | return need_tlb_flush; |
4509 | } | 4556 | } |
4510 | 4557 | ||
4511 | void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, | 4558 | void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, |
4512 | struct kvm_memory_slot *memslot) | 4559 | const struct kvm_memory_slot *memslot) |
4513 | { | 4560 | { |
4514 | bool flush = false; | 4561 | /* FIXME: const-ify all uses of struct kvm_memory_slot. */ |
4515 | unsigned long *rmapp; | ||
4516 | unsigned long last_index, index; | ||
4517 | |||
4518 | spin_lock(&kvm->mmu_lock); | 4562 | spin_lock(&kvm->mmu_lock); |
4519 | 4563 | slot_handle_leaf(kvm, (struct kvm_memory_slot *)memslot, | |
4520 | rmapp = memslot->arch.rmap[0]; | 4564 | kvm_mmu_zap_collapsible_spte, true); |
4521 | last_index = gfn_to_index(memslot->base_gfn + memslot->npages - 1, | ||
4522 | memslot->base_gfn, PT_PAGE_TABLE_LEVEL); | ||
4523 | |||
4524 | for (index = 0; index <= last_index; ++index, ++rmapp) { | ||
4525 | if (*rmapp) | ||
4526 | flush |= kvm_mmu_zap_collapsible_spte(kvm, rmapp); | ||
4527 | |||
4528 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { | ||
4529 | if (flush) { | ||
4530 | kvm_flush_remote_tlbs(kvm); | ||
4531 | flush = false; | ||
4532 | } | ||
4533 | cond_resched_lock(&kvm->mmu_lock); | ||
4534 | } | ||
4535 | } | ||
4536 | |||
4537 | if (flush) | ||
4538 | kvm_flush_remote_tlbs(kvm); | ||
4539 | |||
4540 | spin_unlock(&kvm->mmu_lock); | 4565 | spin_unlock(&kvm->mmu_lock); |
4541 | } | 4566 | } |
4542 | 4567 | ||
4543 | void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, | 4568 | void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, |
4544 | struct kvm_memory_slot *memslot) | 4569 | struct kvm_memory_slot *memslot) |
4545 | { | 4570 | { |
4546 | gfn_t last_gfn; | 4571 | bool flush; |
4547 | unsigned long *rmapp; | ||
4548 | unsigned long last_index, index; | ||
4549 | bool flush = false; | ||
4550 | |||
4551 | last_gfn = memslot->base_gfn + memslot->npages - 1; | ||
4552 | 4572 | ||
4553 | spin_lock(&kvm->mmu_lock); | 4573 | spin_lock(&kvm->mmu_lock); |
4554 | 4574 | flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty, false); | |
4555 | rmapp = memslot->arch.rmap[PT_PAGE_TABLE_LEVEL - 1]; | ||
4556 | last_index = gfn_to_index(last_gfn, memslot->base_gfn, | ||
4557 | PT_PAGE_TABLE_LEVEL); | ||
4558 | |||
4559 | for (index = 0; index <= last_index; ++index, ++rmapp) { | ||
4560 | if (*rmapp) | ||
4561 | flush |= __rmap_clear_dirty(kvm, rmapp); | ||
4562 | |||
4563 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) | ||
4564 | cond_resched_lock(&kvm->mmu_lock); | ||
4565 | } | ||
4566 | |||
4567 | spin_unlock(&kvm->mmu_lock); | 4575 | spin_unlock(&kvm->mmu_lock); |
4568 | 4576 | ||
4569 | lockdep_assert_held(&kvm->slots_lock); | 4577 | lockdep_assert_held(&kvm->slots_lock); |
@@ -4582,31 +4590,11 @@ EXPORT_SYMBOL_GPL(kvm_mmu_slot_leaf_clear_dirty); | |||
4582 | void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm, | 4590 | void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm, |
4583 | struct kvm_memory_slot *memslot) | 4591 | struct kvm_memory_slot *memslot) |
4584 | { | 4592 | { |
4585 | gfn_t last_gfn; | 4593 | bool flush; |
4586 | int i; | ||
4587 | bool flush = false; | ||
4588 | |||
4589 | last_gfn = memslot->base_gfn + memslot->npages - 1; | ||
4590 | 4594 | ||
4591 | spin_lock(&kvm->mmu_lock); | 4595 | spin_lock(&kvm->mmu_lock); |
4592 | 4596 | flush = slot_handle_large_level(kvm, memslot, slot_rmap_write_protect, | |
4593 | for (i = PT_PAGE_TABLE_LEVEL + 1; /* skip rmap for 4K page */ | 4597 | false); |
4594 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | ||
4595 | unsigned long *rmapp; | ||
4596 | unsigned long last_index, index; | ||
4597 | |||
4598 | rmapp = memslot->arch.rmap[i - PT_PAGE_TABLE_LEVEL]; | ||
4599 | last_index = gfn_to_index(last_gfn, memslot->base_gfn, i); | ||
4600 | |||
4601 | for (index = 0; index <= last_index; ++index, ++rmapp) { | ||
4602 | if (*rmapp) | ||
4603 | flush |= __rmap_write_protect(kvm, rmapp, | ||
4604 | false); | ||
4605 | |||
4606 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) | ||
4607 | cond_resched_lock(&kvm->mmu_lock); | ||
4608 | } | ||
4609 | } | ||
4610 | spin_unlock(&kvm->mmu_lock); | 4598 | spin_unlock(&kvm->mmu_lock); |
4611 | 4599 | ||
4612 | /* see kvm_mmu_slot_remove_write_access */ | 4600 | /* see kvm_mmu_slot_remove_write_access */ |
@@ -4620,31 +4608,10 @@ EXPORT_SYMBOL_GPL(kvm_mmu_slot_largepage_remove_write_access); | |||
4620 | void kvm_mmu_slot_set_dirty(struct kvm *kvm, | 4608 | void kvm_mmu_slot_set_dirty(struct kvm *kvm, |
4621 | struct kvm_memory_slot *memslot) | 4609 | struct kvm_memory_slot *memslot) |
4622 | { | 4610 | { |
4623 | gfn_t last_gfn; | 4611 | bool flush; |
4624 | int i; | ||
4625 | bool flush = false; | ||
4626 | |||
4627 | last_gfn = memslot->base_gfn + memslot->npages - 1; | ||
4628 | 4612 | ||
4629 | spin_lock(&kvm->mmu_lock); | 4613 | spin_lock(&kvm->mmu_lock); |
4630 | 4614 | flush = slot_handle_all_level(kvm, memslot, __rmap_set_dirty, false); | |
4631 | for (i = PT_PAGE_TABLE_LEVEL; | ||
4632 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | ||
4633 | unsigned long *rmapp; | ||
4634 | unsigned long last_index, index; | ||
4635 | |||
4636 | rmapp = memslot->arch.rmap[i - PT_PAGE_TABLE_LEVEL]; | ||
4637 | last_index = gfn_to_index(last_gfn, memslot->base_gfn, i); | ||
4638 | |||
4639 | for (index = 0; index <= last_index; ++index, ++rmapp) { | ||
4640 | if (*rmapp) | ||
4641 | flush |= __rmap_set_dirty(kvm, rmapp); | ||
4642 | |||
4643 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) | ||
4644 | cond_resched_lock(&kvm->mmu_lock); | ||
4645 | } | ||
4646 | } | ||
4647 | |||
4648 | spin_unlock(&kvm->mmu_lock); | 4615 | spin_unlock(&kvm->mmu_lock); |
4649 | 4616 | ||
4650 | lockdep_assert_held(&kvm->slots_lock); | 4617 | lockdep_assert_held(&kvm->slots_lock); |
@@ -4741,13 +4708,13 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm) | |||
4741 | return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages)); | 4708 | return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages)); |
4742 | } | 4709 | } |
4743 | 4710 | ||
4744 | void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm) | 4711 | void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots) |
4745 | { | 4712 | { |
4746 | /* | 4713 | /* |
4747 | * The very rare case: if the generation-number is round, | 4714 | * The very rare case: if the generation-number is round, |
4748 | * zap all shadow pages. | 4715 | * zap all shadow pages. |
4749 | */ | 4716 | */ |
4750 | if (unlikely(kvm_current_mmio_generation(kvm) == 0)) { | 4717 | if (unlikely((slots->generation & MMIO_GEN_MASK) == 0)) { |
4751 | printk_ratelimited(KERN_DEBUG "kvm: zapping shadow pages for mmio generation wraparound\n"); | 4718 | printk_ratelimited(KERN_DEBUG "kvm: zapping shadow pages for mmio generation wraparound\n"); |
4752 | kvm_mmu_invalidate_zap_all_pages(kvm); | 4719 | kvm_mmu_invalidate_zap_all_pages(kvm); |
4753 | } | 4720 | } |
@@ -4869,15 +4836,18 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) | |||
4869 | unsigned int nr_pages = 0; | 4836 | unsigned int nr_pages = 0; |
4870 | struct kvm_memslots *slots; | 4837 | struct kvm_memslots *slots; |
4871 | struct kvm_memory_slot *memslot; | 4838 | struct kvm_memory_slot *memslot; |
4839 | int i; | ||
4872 | 4840 | ||
4873 | slots = kvm_memslots(kvm); | 4841 | for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { |
4842 | slots = __kvm_memslots(kvm, i); | ||
4874 | 4843 | ||
4875 | kvm_for_each_memslot(memslot, slots) | 4844 | kvm_for_each_memslot(memslot, slots) |
4876 | nr_pages += memslot->npages; | 4845 | nr_pages += memslot->npages; |
4846 | } | ||
4877 | 4847 | ||
4878 | nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; | 4848 | nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; |
4879 | nr_mmu_pages = max(nr_mmu_pages, | 4849 | nr_mmu_pages = max(nr_mmu_pages, |
4880 | (unsigned int) KVM_MIN_ALLOC_MMU_PAGES); | 4850 | (unsigned int) KVM_MIN_ALLOC_MMU_PAGES); |
4881 | 4851 | ||
4882 | return nr_mmu_pages; | 4852 | return nr_mmu_pages; |
4883 | } | 4853 | } |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 0ada65ecddcf..398d21c0f6dd 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -43,6 +43,7 @@ | |||
43 | #define PT_PDPE_LEVEL 3 | 43 | #define PT_PDPE_LEVEL 3 |
44 | #define PT_DIRECTORY_LEVEL 2 | 44 | #define PT_DIRECTORY_LEVEL 2 |
45 | #define PT_PAGE_TABLE_LEVEL 1 | 45 | #define PT_PAGE_TABLE_LEVEL 1 |
46 | #define PT_MAX_HUGEPAGE_LEVEL (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES - 1) | ||
46 | 47 | ||
47 | static inline u64 rsvd_bits(int s, int e) | 48 | static inline u64 rsvd_bits(int s, int e) |
48 | { | 49 | { |
@@ -170,4 +171,5 @@ static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | |||
170 | } | 171 | } |
171 | 172 | ||
172 | void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm); | 173 | void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm); |
174 | void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end); | ||
173 | #endif | 175 | #endif |
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index 9ade5cfb5a4c..a4f62e6f2db2 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c | |||
@@ -114,7 +114,7 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level) | |||
114 | return; | 114 | return; |
115 | 115 | ||
116 | gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt); | 116 | gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt); |
117 | pfn = gfn_to_pfn_atomic(vcpu->kvm, gfn); | 117 | pfn = kvm_vcpu_gfn_to_pfn_atomic(vcpu, gfn); |
118 | 118 | ||
119 | if (is_error_pfn(pfn)) | 119 | if (is_error_pfn(pfn)) |
120 | return; | 120 | return; |
@@ -131,12 +131,16 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) | |||
131 | static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); | 131 | static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); |
132 | unsigned long *rmapp; | 132 | unsigned long *rmapp; |
133 | struct kvm_mmu_page *rev_sp; | 133 | struct kvm_mmu_page *rev_sp; |
134 | struct kvm_memslots *slots; | ||
135 | struct kvm_memory_slot *slot; | ||
134 | gfn_t gfn; | 136 | gfn_t gfn; |
135 | 137 | ||
136 | rev_sp = page_header(__pa(sptep)); | 138 | rev_sp = page_header(__pa(sptep)); |
137 | gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt); | 139 | gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt); |
138 | 140 | ||
139 | if (!gfn_to_memslot(kvm, gfn)) { | 141 | slots = kvm_memslots_for_spte_role(kvm, rev_sp->role); |
142 | slot = __gfn_to_memslot(slots, gfn); | ||
143 | if (!slot) { | ||
140 | if (!__ratelimit(&ratelimit_state)) | 144 | if (!__ratelimit(&ratelimit_state)) |
141 | return; | 145 | return; |
142 | audit_printk(kvm, "no memslot for gfn %llx\n", gfn); | 146 | audit_printk(kvm, "no memslot for gfn %llx\n", gfn); |
@@ -146,7 +150,7 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) | |||
146 | return; | 150 | return; |
147 | } | 151 | } |
148 | 152 | ||
149 | rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level); | 153 | rmapp = __gfn_to_rmap(gfn, rev_sp->role.level, slot); |
150 | if (!*rmapp) { | 154 | if (!*rmapp) { |
151 | if (!__ratelimit(&ratelimit_state)) | 155 | if (!__ratelimit(&ratelimit_state)) |
152 | return; | 156 | return; |
@@ -191,19 +195,21 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
191 | unsigned long *rmapp; | 195 | unsigned long *rmapp; |
192 | u64 *sptep; | 196 | u64 *sptep; |
193 | struct rmap_iterator iter; | 197 | struct rmap_iterator iter; |
198 | struct kvm_memslots *slots; | ||
199 | struct kvm_memory_slot *slot; | ||
194 | 200 | ||
195 | if (sp->role.direct || sp->unsync || sp->role.invalid) | 201 | if (sp->role.direct || sp->unsync || sp->role.invalid) |
196 | return; | 202 | return; |
197 | 203 | ||
198 | rmapp = gfn_to_rmap(kvm, sp->gfn, PT_PAGE_TABLE_LEVEL); | 204 | slots = kvm_memslots_for_spte_role(kvm, sp->role); |
205 | slot = __gfn_to_memslot(slots, sp->gfn); | ||
206 | rmapp = __gfn_to_rmap(sp->gfn, PT_PAGE_TABLE_LEVEL, slot); | ||
199 | 207 | ||
200 | for (sptep = rmap_get_first(*rmapp, &iter); sptep; | 208 | for_each_rmap_spte(rmapp, &iter, sptep) |
201 | sptep = rmap_get_next(&iter)) { | ||
202 | if (is_writable_pte(*sptep)) | 209 | if (is_writable_pte(*sptep)) |
203 | audit_printk(kvm, "shadow page has writable " | 210 | audit_printk(kvm, "shadow page has writable " |
204 | "mappings: gfn %llx role %x\n", | 211 | "mappings: gfn %llx role %x\n", |
205 | sp->gfn, sp->role.word); | 212 | sp->gfn, sp->role.word); |
206 | } | ||
207 | } | 213 | } |
208 | 214 | ||
209 | static void audit_sp(struct kvm *kvm, struct kvm_mmu_page *sp) | 215 | static void audit_sp(struct kvm *kvm, struct kvm_mmu_page *sp) |
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c new file mode 100644 index 000000000000..de1d2d8062e2 --- /dev/null +++ b/arch/x86/kvm/mtrr.c | |||
@@ -0,0 +1,699 @@ | |||
1 | /* | ||
2 | * vMTRR implementation | ||
3 | * | ||
4 | * Copyright (C) 2006 Qumranet, Inc. | ||
5 | * Copyright 2010 Red Hat, Inc. and/or its affiliates. | ||
6 | * Copyright(C) 2015 Intel Corporation. | ||
7 | * | ||
8 | * Authors: | ||
9 | * Yaniv Kamay <yaniv@qumranet.com> | ||
10 | * Avi Kivity <avi@qumranet.com> | ||
11 | * Marcelo Tosatti <mtosatti@redhat.com> | ||
12 | * Paolo Bonzini <pbonzini@redhat.com> | ||
13 | * Xiao Guangrong <guangrong.xiao@linux.intel.com> | ||
14 | * | ||
15 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
16 | * the COPYING file in the top-level directory. | ||
17 | */ | ||
18 | |||
19 | #include <linux/kvm_host.h> | ||
20 | #include <asm/mtrr.h> | ||
21 | |||
22 | #include "cpuid.h" | ||
23 | #include "mmu.h" | ||
24 | |||
25 | #define IA32_MTRR_DEF_TYPE_E (1ULL << 11) | ||
26 | #define IA32_MTRR_DEF_TYPE_FE (1ULL << 10) | ||
27 | #define IA32_MTRR_DEF_TYPE_TYPE_MASK (0xff) | ||
28 | |||
29 | static bool msr_mtrr_valid(unsigned msr) | ||
30 | { | ||
31 | switch (msr) { | ||
32 | case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1: | ||
33 | case MSR_MTRRfix64K_00000: | ||
34 | case MSR_MTRRfix16K_80000: | ||
35 | case MSR_MTRRfix16K_A0000: | ||
36 | case MSR_MTRRfix4K_C0000: | ||
37 | case MSR_MTRRfix4K_C8000: | ||
38 | case MSR_MTRRfix4K_D0000: | ||
39 | case MSR_MTRRfix4K_D8000: | ||
40 | case MSR_MTRRfix4K_E0000: | ||
41 | case MSR_MTRRfix4K_E8000: | ||
42 | case MSR_MTRRfix4K_F0000: | ||
43 | case MSR_MTRRfix4K_F8000: | ||
44 | case MSR_MTRRdefType: | ||
45 | case MSR_IA32_CR_PAT: | ||
46 | return true; | ||
47 | case 0x2f8: | ||
48 | return true; | ||
49 | } | ||
50 | return false; | ||
51 | } | ||
52 | |||
53 | static bool valid_pat_type(unsigned t) | ||
54 | { | ||
55 | return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */ | ||
56 | } | ||
57 | |||
58 | static bool valid_mtrr_type(unsigned t) | ||
59 | { | ||
60 | return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */ | ||
61 | } | ||
62 | |||
63 | bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) | ||
64 | { | ||
65 | int i; | ||
66 | u64 mask; | ||
67 | |||
68 | if (!msr_mtrr_valid(msr)) | ||
69 | return false; | ||
70 | |||
71 | if (msr == MSR_IA32_CR_PAT) { | ||
72 | for (i = 0; i < 8; i++) | ||
73 | if (!valid_pat_type((data >> (i * 8)) & 0xff)) | ||
74 | return false; | ||
75 | return true; | ||
76 | } else if (msr == MSR_MTRRdefType) { | ||
77 | if (data & ~0xcff) | ||
78 | return false; | ||
79 | return valid_mtrr_type(data & 0xff); | ||
80 | } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) { | ||
81 | for (i = 0; i < 8 ; i++) | ||
82 | if (!valid_mtrr_type((data >> (i * 8)) & 0xff)) | ||
83 | return false; | ||
84 | return true; | ||
85 | } | ||
86 | |||
87 | /* variable MTRRs */ | ||
88 | WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR)); | ||
89 | |||
90 | mask = (~0ULL) << cpuid_maxphyaddr(vcpu); | ||
91 | if ((msr & 1) == 0) { | ||
92 | /* MTRR base */ | ||
93 | if (!valid_mtrr_type(data & 0xff)) | ||
94 | return false; | ||
95 | mask |= 0xf00; | ||
96 | } else | ||
97 | /* MTRR mask */ | ||
98 | mask |= 0x7ff; | ||
99 | if (data & mask) { | ||
100 | kvm_inject_gp(vcpu, 0); | ||
101 | return false; | ||
102 | } | ||
103 | |||
104 | return true; | ||
105 | } | ||
106 | EXPORT_SYMBOL_GPL(kvm_mtrr_valid); | ||
107 | |||
108 | static bool mtrr_is_enabled(struct kvm_mtrr *mtrr_state) | ||
109 | { | ||
110 | return !!(mtrr_state->deftype & IA32_MTRR_DEF_TYPE_E); | ||
111 | } | ||
112 | |||
113 | static bool fixed_mtrr_is_enabled(struct kvm_mtrr *mtrr_state) | ||
114 | { | ||
115 | return !!(mtrr_state->deftype & IA32_MTRR_DEF_TYPE_FE); | ||
116 | } | ||
117 | |||
118 | static u8 mtrr_default_type(struct kvm_mtrr *mtrr_state) | ||
119 | { | ||
120 | return mtrr_state->deftype & IA32_MTRR_DEF_TYPE_TYPE_MASK; | ||
121 | } | ||
122 | |||
123 | /* | ||
124 | * Three terms are used in the following code: | ||
125 | * - segment, it indicates the address segments covered by fixed MTRRs. | ||
126 | * - unit, it corresponds to the MSR entry in the segment. | ||
127 | * - range, a range is covered in one memory cache type. | ||
128 | */ | ||
129 | struct fixed_mtrr_segment { | ||
130 | u64 start; | ||
131 | u64 end; | ||
132 | |||
133 | int range_shift; | ||
134 | |||
135 | /* the start position in kvm_mtrr.fixed_ranges[]. */ | ||
136 | int range_start; | ||
137 | }; | ||
138 | |||
139 | static struct fixed_mtrr_segment fixed_seg_table[] = { | ||
140 | /* MSR_MTRRfix64K_00000, 1 unit. 64K fixed mtrr. */ | ||
141 | { | ||
142 | .start = 0x0, | ||
143 | .end = 0x80000, | ||
144 | .range_shift = 16, /* 64K */ | ||
145 | .range_start = 0, | ||
146 | }, | ||
147 | |||
148 | /* | ||
149 | * MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000, 2 units, | ||
150 | * 16K fixed mtrr. | ||
151 | */ | ||
152 | { | ||
153 | .start = 0x80000, | ||
154 | .end = 0xc0000, | ||
155 | .range_shift = 14, /* 16K */ | ||
156 | .range_start = 8, | ||
157 | }, | ||
158 | |||
159 | /* | ||
160 | * MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000, 8 units, | ||
161 | * 4K fixed mtrr. | ||
162 | */ | ||
163 | { | ||
164 | .start = 0xc0000, | ||
165 | .end = 0x100000, | ||
166 | .range_shift = 12, /* 12K */ | ||
167 | .range_start = 24, | ||
168 | } | ||
169 | }; | ||
170 | |||
171 | /* | ||
172 | * The size of unit is covered in one MSR, one MSR entry contains | ||
173 | * 8 ranges so that unit size is always 8 * 2^range_shift. | ||
174 | */ | ||
175 | static u64 fixed_mtrr_seg_unit_size(int seg) | ||
176 | { | ||
177 | return 8 << fixed_seg_table[seg].range_shift; | ||
178 | } | ||
179 | |||
180 | static bool fixed_msr_to_seg_unit(u32 msr, int *seg, int *unit) | ||
181 | { | ||
182 | switch (msr) { | ||
183 | case MSR_MTRRfix64K_00000: | ||
184 | *seg = 0; | ||
185 | *unit = 0; | ||
186 | break; | ||
187 | case MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000: | ||
188 | *seg = 1; | ||
189 | *unit = msr - MSR_MTRRfix16K_80000; | ||
190 | break; | ||
191 | case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000: | ||
192 | *seg = 2; | ||
193 | *unit = msr - MSR_MTRRfix4K_C0000; | ||
194 | break; | ||
195 | default: | ||
196 | return false; | ||
197 | } | ||
198 | |||
199 | return true; | ||
200 | } | ||
201 | |||
202 | static void fixed_mtrr_seg_unit_range(int seg, int unit, u64 *start, u64 *end) | ||
203 | { | ||
204 | struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg]; | ||
205 | u64 unit_size = fixed_mtrr_seg_unit_size(seg); | ||
206 | |||
207 | *start = mtrr_seg->start + unit * unit_size; | ||
208 | *end = *start + unit_size; | ||
209 | WARN_ON(*end > mtrr_seg->end); | ||
210 | } | ||
211 | |||
212 | static int fixed_mtrr_seg_unit_range_index(int seg, int unit) | ||
213 | { | ||
214 | struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg]; | ||
215 | |||
216 | WARN_ON(mtrr_seg->start + unit * fixed_mtrr_seg_unit_size(seg) | ||
217 | > mtrr_seg->end); | ||
218 | |||
219 | /* each unit has 8 ranges. */ | ||
220 | return mtrr_seg->range_start + 8 * unit; | ||
221 | } | ||
222 | |||
223 | static int fixed_mtrr_seg_end_range_index(int seg) | ||
224 | { | ||
225 | struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg]; | ||
226 | int n; | ||
227 | |||
228 | n = (mtrr_seg->end - mtrr_seg->start) >> mtrr_seg->range_shift; | ||
229 | return mtrr_seg->range_start + n - 1; | ||
230 | } | ||
231 | |||
232 | static bool fixed_msr_to_range(u32 msr, u64 *start, u64 *end) | ||
233 | { | ||
234 | int seg, unit; | ||
235 | |||
236 | if (!fixed_msr_to_seg_unit(msr, &seg, &unit)) | ||
237 | return false; | ||
238 | |||
239 | fixed_mtrr_seg_unit_range(seg, unit, start, end); | ||
240 | return true; | ||
241 | } | ||
242 | |||
243 | static int fixed_msr_to_range_index(u32 msr) | ||
244 | { | ||
245 | int seg, unit; | ||
246 | |||
247 | if (!fixed_msr_to_seg_unit(msr, &seg, &unit)) | ||
248 | return -1; | ||
249 | |||
250 | return fixed_mtrr_seg_unit_range_index(seg, unit); | ||
251 | } | ||
252 | |||
253 | static int fixed_mtrr_addr_to_seg(u64 addr) | ||
254 | { | ||
255 | struct fixed_mtrr_segment *mtrr_seg; | ||
256 | int seg, seg_num = ARRAY_SIZE(fixed_seg_table); | ||
257 | |||
258 | for (seg = 0; seg < seg_num; seg++) { | ||
259 | mtrr_seg = &fixed_seg_table[seg]; | ||
260 | if (mtrr_seg->start >= addr && addr < mtrr_seg->end) | ||
261 | return seg; | ||
262 | } | ||
263 | |||
264 | return -1; | ||
265 | } | ||
266 | |||
267 | static int fixed_mtrr_addr_seg_to_range_index(u64 addr, int seg) | ||
268 | { | ||
269 | struct fixed_mtrr_segment *mtrr_seg; | ||
270 | int index; | ||
271 | |||
272 | mtrr_seg = &fixed_seg_table[seg]; | ||
273 | index = mtrr_seg->range_start; | ||
274 | index += (addr - mtrr_seg->start) >> mtrr_seg->range_shift; | ||
275 | return index; | ||
276 | } | ||
277 | |||
278 | static u64 fixed_mtrr_range_end_addr(int seg, int index) | ||
279 | { | ||
280 | struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg]; | ||
281 | int pos = index - mtrr_seg->range_start; | ||
282 | |||
283 | return mtrr_seg->start + ((pos + 1) << mtrr_seg->range_shift); | ||
284 | } | ||
285 | |||
286 | static void var_mtrr_range(struct kvm_mtrr_range *range, u64 *start, u64 *end) | ||
287 | { | ||
288 | u64 mask; | ||
289 | |||
290 | *start = range->base & PAGE_MASK; | ||
291 | |||
292 | mask = range->mask & PAGE_MASK; | ||
293 | mask |= ~0ULL << boot_cpu_data.x86_phys_bits; | ||
294 | |||
295 | /* This cannot overflow because writing to the reserved bits of | ||
296 | * variable MTRRs causes a #GP. | ||
297 | */ | ||
298 | *end = (*start | ~mask) + 1; | ||
299 | } | ||
300 | |||
301 | static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr) | ||
302 | { | ||
303 | struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state; | ||
304 | gfn_t start, end; | ||
305 | int index; | ||
306 | |||
307 | if (msr == MSR_IA32_CR_PAT || !tdp_enabled || | ||
308 | !kvm_arch_has_noncoherent_dma(vcpu->kvm)) | ||
309 | return; | ||
310 | |||
311 | if (!mtrr_is_enabled(mtrr_state) && msr != MSR_MTRRdefType) | ||
312 | return; | ||
313 | |||
314 | /* fixed MTRRs. */ | ||
315 | if (fixed_msr_to_range(msr, &start, &end)) { | ||
316 | if (!fixed_mtrr_is_enabled(mtrr_state)) | ||
317 | return; | ||
318 | } else if (msr == MSR_MTRRdefType) { | ||
319 | start = 0x0; | ||
320 | end = ~0ULL; | ||
321 | } else { | ||
322 | /* variable range MTRRs. */ | ||
323 | index = (msr - 0x200) / 2; | ||
324 | var_mtrr_range(&mtrr_state->var_ranges[index], &start, &end); | ||
325 | } | ||
326 | |||
327 | kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end)); | ||
328 | } | ||
329 | |||
330 | static bool var_mtrr_range_is_valid(struct kvm_mtrr_range *range) | ||
331 | { | ||
332 | return (range->mask & (1 << 11)) != 0; | ||
333 | } | ||
334 | |||
335 | static void set_var_mtrr_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) | ||
336 | { | ||
337 | struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state; | ||
338 | struct kvm_mtrr_range *tmp, *cur; | ||
339 | int index, is_mtrr_mask; | ||
340 | |||
341 | index = (msr - 0x200) / 2; | ||
342 | is_mtrr_mask = msr - 0x200 - 2 * index; | ||
343 | cur = &mtrr_state->var_ranges[index]; | ||
344 | |||
345 | /* remove the entry if it's in the list. */ | ||
346 | if (var_mtrr_range_is_valid(cur)) | ||
347 | list_del(&mtrr_state->var_ranges[index].node); | ||
348 | |||
349 | if (!is_mtrr_mask) | ||
350 | cur->base = data; | ||
351 | else | ||
352 | cur->mask = data; | ||
353 | |||
354 | /* add it to the list if it's enabled. */ | ||
355 | if (var_mtrr_range_is_valid(cur)) { | ||
356 | list_for_each_entry(tmp, &mtrr_state->head, node) | ||
357 | if (cur->base >= tmp->base) | ||
358 | break; | ||
359 | list_add_tail(&cur->node, &tmp->node); | ||
360 | } | ||
361 | } | ||
362 | |||
363 | int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) | ||
364 | { | ||
365 | int index; | ||
366 | |||
367 | if (!kvm_mtrr_valid(vcpu, msr, data)) | ||
368 | return 1; | ||
369 | |||
370 | index = fixed_msr_to_range_index(msr); | ||
371 | if (index >= 0) | ||
372 | *(u64 *)&vcpu->arch.mtrr_state.fixed_ranges[index] = data; | ||
373 | else if (msr == MSR_MTRRdefType) | ||
374 | vcpu->arch.mtrr_state.deftype = data; | ||
375 | else if (msr == MSR_IA32_CR_PAT) | ||
376 | vcpu->arch.pat = data; | ||
377 | else | ||
378 | set_var_mtrr_msr(vcpu, msr, data); | ||
379 | |||
380 | update_mtrr(vcpu, msr); | ||
381 | return 0; | ||
382 | } | ||
383 | |||
384 | int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | ||
385 | { | ||
386 | int index; | ||
387 | |||
388 | /* MSR_MTRRcap is a readonly MSR. */ | ||
389 | if (msr == MSR_MTRRcap) { | ||
390 | /* | ||
391 | * SMRR = 0 | ||
392 | * WC = 1 | ||
393 | * FIX = 1 | ||
394 | * VCNT = KVM_NR_VAR_MTRR | ||
395 | */ | ||
396 | *pdata = 0x500 | KVM_NR_VAR_MTRR; | ||
397 | return 0; | ||
398 | } | ||
399 | |||
400 | if (!msr_mtrr_valid(msr)) | ||
401 | return 1; | ||
402 | |||
403 | index = fixed_msr_to_range_index(msr); | ||
404 | if (index >= 0) | ||
405 | *pdata = *(u64 *)&vcpu->arch.mtrr_state.fixed_ranges[index]; | ||
406 | else if (msr == MSR_MTRRdefType) | ||
407 | *pdata = vcpu->arch.mtrr_state.deftype; | ||
408 | else if (msr == MSR_IA32_CR_PAT) | ||
409 | *pdata = vcpu->arch.pat; | ||
410 | else { /* Variable MTRRs */ | ||
411 | int is_mtrr_mask; | ||
412 | |||
413 | index = (msr - 0x200) / 2; | ||
414 | is_mtrr_mask = msr - 0x200 - 2 * index; | ||
415 | if (!is_mtrr_mask) | ||
416 | *pdata = vcpu->arch.mtrr_state.var_ranges[index].base; | ||
417 | else | ||
418 | *pdata = vcpu->arch.mtrr_state.var_ranges[index].mask; | ||
419 | } | ||
420 | |||
421 | return 0; | ||
422 | } | ||
423 | |||
424 | void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu) | ||
425 | { | ||
426 | INIT_LIST_HEAD(&vcpu->arch.mtrr_state.head); | ||
427 | } | ||
428 | |||
429 | struct mtrr_iter { | ||
430 | /* input fields. */ | ||
431 | struct kvm_mtrr *mtrr_state; | ||
432 | u64 start; | ||
433 | u64 end; | ||
434 | |||
435 | /* output fields. */ | ||
436 | int mem_type; | ||
437 | /* [start, end) is not fully covered in MTRRs? */ | ||
438 | bool partial_map; | ||
439 | |||
440 | /* private fields. */ | ||
441 | union { | ||
442 | /* used for fixed MTRRs. */ | ||
443 | struct { | ||
444 | int index; | ||
445 | int seg; | ||
446 | }; | ||
447 | |||
448 | /* used for var MTRRs. */ | ||
449 | struct { | ||
450 | struct kvm_mtrr_range *range; | ||
451 | /* max address has been covered in var MTRRs. */ | ||
452 | u64 start_max; | ||
453 | }; | ||
454 | }; | ||
455 | |||
456 | bool fixed; | ||
457 | }; | ||
458 | |||
459 | static bool mtrr_lookup_fixed_start(struct mtrr_iter *iter) | ||
460 | { | ||
461 | int seg, index; | ||
462 | |||
463 | if (!fixed_mtrr_is_enabled(iter->mtrr_state)) | ||
464 | return false; | ||
465 | |||
466 | seg = fixed_mtrr_addr_to_seg(iter->start); | ||
467 | if (seg < 0) | ||
468 | return false; | ||
469 | |||
470 | iter->fixed = true; | ||
471 | index = fixed_mtrr_addr_seg_to_range_index(iter->start, seg); | ||
472 | iter->index = index; | ||
473 | iter->seg = seg; | ||
474 | return true; | ||
475 | } | ||
476 | |||
477 | static bool match_var_range(struct mtrr_iter *iter, | ||
478 | struct kvm_mtrr_range *range) | ||
479 | { | ||
480 | u64 start, end; | ||
481 | |||
482 | var_mtrr_range(range, &start, &end); | ||
483 | if (!(start >= iter->end || end <= iter->start)) { | ||
484 | iter->range = range; | ||
485 | |||
486 | /* | ||
487 | * the function is called when we do kvm_mtrr.head walking. | ||
488 | * Range has the minimum base address which interleaves | ||
489 | * [looker->start_max, looker->end). | ||
490 | */ | ||
491 | iter->partial_map |= iter->start_max < start; | ||
492 | |||
493 | /* update the max address has been covered. */ | ||
494 | iter->start_max = max(iter->start_max, end); | ||
495 | return true; | ||
496 | } | ||
497 | |||
498 | return false; | ||
499 | } | ||
500 | |||
501 | static void __mtrr_lookup_var_next(struct mtrr_iter *iter) | ||
502 | { | ||
503 | struct kvm_mtrr *mtrr_state = iter->mtrr_state; | ||
504 | |||
505 | list_for_each_entry_continue(iter->range, &mtrr_state->head, node) | ||
506 | if (match_var_range(iter, iter->range)) | ||
507 | return; | ||
508 | |||
509 | iter->range = NULL; | ||
510 | iter->partial_map |= iter->start_max < iter->end; | ||
511 | } | ||
512 | |||
513 | static void mtrr_lookup_var_start(struct mtrr_iter *iter) | ||
514 | { | ||
515 | struct kvm_mtrr *mtrr_state = iter->mtrr_state; | ||
516 | |||
517 | iter->fixed = false; | ||
518 | iter->start_max = iter->start; | ||
519 | iter->range = list_prepare_entry(iter->range, &mtrr_state->head, node); | ||
520 | |||
521 | __mtrr_lookup_var_next(iter); | ||
522 | } | ||
523 | |||
524 | static void mtrr_lookup_fixed_next(struct mtrr_iter *iter) | ||
525 | { | ||
526 | /* terminate the lookup. */ | ||
527 | if (fixed_mtrr_range_end_addr(iter->seg, iter->index) >= iter->end) { | ||
528 | iter->fixed = false; | ||
529 | iter->range = NULL; | ||
530 | return; | ||
531 | } | ||
532 | |||
533 | iter->index++; | ||
534 | |||
535 | /* have looked up for all fixed MTRRs. */ | ||
536 | if (iter->index >= ARRAY_SIZE(iter->mtrr_state->fixed_ranges)) | ||
537 | return mtrr_lookup_var_start(iter); | ||
538 | |||
539 | /* switch to next segment. */ | ||
540 | if (iter->index > fixed_mtrr_seg_end_range_index(iter->seg)) | ||
541 | iter->seg++; | ||
542 | } | ||
543 | |||
544 | static void mtrr_lookup_var_next(struct mtrr_iter *iter) | ||
545 | { | ||
546 | __mtrr_lookup_var_next(iter); | ||
547 | } | ||
548 | |||
549 | static void mtrr_lookup_start(struct mtrr_iter *iter) | ||
550 | { | ||
551 | if (!mtrr_is_enabled(iter->mtrr_state)) { | ||
552 | iter->partial_map = true; | ||
553 | return; | ||
554 | } | ||
555 | |||
556 | if (!mtrr_lookup_fixed_start(iter)) | ||
557 | mtrr_lookup_var_start(iter); | ||
558 | } | ||
559 | |||
560 | static void mtrr_lookup_init(struct mtrr_iter *iter, | ||
561 | struct kvm_mtrr *mtrr_state, u64 start, u64 end) | ||
562 | { | ||
563 | iter->mtrr_state = mtrr_state; | ||
564 | iter->start = start; | ||
565 | iter->end = end; | ||
566 | iter->partial_map = false; | ||
567 | iter->fixed = false; | ||
568 | iter->range = NULL; | ||
569 | |||
570 | mtrr_lookup_start(iter); | ||
571 | } | ||
572 | |||
573 | static bool mtrr_lookup_okay(struct mtrr_iter *iter) | ||
574 | { | ||
575 | if (iter->fixed) { | ||
576 | iter->mem_type = iter->mtrr_state->fixed_ranges[iter->index]; | ||
577 | return true; | ||
578 | } | ||
579 | |||
580 | if (iter->range) { | ||
581 | iter->mem_type = iter->range->base & 0xff; | ||
582 | return true; | ||
583 | } | ||
584 | |||
585 | return false; | ||
586 | } | ||
587 | |||
588 | static void mtrr_lookup_next(struct mtrr_iter *iter) | ||
589 | { | ||
590 | if (iter->fixed) | ||
591 | mtrr_lookup_fixed_next(iter); | ||
592 | else | ||
593 | mtrr_lookup_var_next(iter); | ||
594 | } | ||
595 | |||
596 | #define mtrr_for_each_mem_type(_iter_, _mtrr_, _gpa_start_, _gpa_end_) \ | ||
597 | for (mtrr_lookup_init(_iter_, _mtrr_, _gpa_start_, _gpa_end_); \ | ||
598 | mtrr_lookup_okay(_iter_); mtrr_lookup_next(_iter_)) | ||
599 | |||
600 | u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
601 | { | ||
602 | struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state; | ||
603 | struct mtrr_iter iter; | ||
604 | u64 start, end; | ||
605 | int type = -1; | ||
606 | const int wt_wb_mask = (1 << MTRR_TYPE_WRBACK) | ||
607 | | (1 << MTRR_TYPE_WRTHROUGH); | ||
608 | |||
609 | start = gfn_to_gpa(gfn); | ||
610 | end = start + PAGE_SIZE; | ||
611 | |||
612 | mtrr_for_each_mem_type(&iter, mtrr_state, start, end) { | ||
613 | int curr_type = iter.mem_type; | ||
614 | |||
615 | /* | ||
616 | * Please refer to Intel SDM Volume 3: 11.11.4.1 MTRR | ||
617 | * Precedences. | ||
618 | */ | ||
619 | |||
620 | if (type == -1) { | ||
621 | type = curr_type; | ||
622 | continue; | ||
623 | } | ||
624 | |||
625 | /* | ||
626 | * If two or more variable memory ranges match and the | ||
627 | * memory types are identical, then that memory type is | ||
628 | * used. | ||
629 | */ | ||
630 | if (type == curr_type) | ||
631 | continue; | ||
632 | |||
633 | /* | ||
634 | * If two or more variable memory ranges match and one of | ||
635 | * the memory types is UC, the UC memory type used. | ||
636 | */ | ||
637 | if (curr_type == MTRR_TYPE_UNCACHABLE) | ||
638 | return MTRR_TYPE_UNCACHABLE; | ||
639 | |||
640 | /* | ||
641 | * If two or more variable memory ranges match and the | ||
642 | * memory types are WT and WB, the WT memory type is used. | ||
643 | */ | ||
644 | if (((1 << type) & wt_wb_mask) && | ||
645 | ((1 << curr_type) & wt_wb_mask)) { | ||
646 | type = MTRR_TYPE_WRTHROUGH; | ||
647 | continue; | ||
648 | } | ||
649 | |||
650 | /* | ||
651 | * For overlaps not defined by the above rules, processor | ||
652 | * behavior is undefined. | ||
653 | */ | ||
654 | |||
655 | /* We use WB for this undefined behavior. :( */ | ||
656 | return MTRR_TYPE_WRBACK; | ||
657 | } | ||
658 | |||
659 | /* It is not covered by MTRRs. */ | ||
660 | if (iter.partial_map) { | ||
661 | /* | ||
662 | * We just check one page, partially covered by MTRRs is | ||
663 | * impossible. | ||
664 | */ | ||
665 | WARN_ON(type != -1); | ||
666 | type = mtrr_default_type(mtrr_state); | ||
667 | } | ||
668 | return type; | ||
669 | } | ||
670 | EXPORT_SYMBOL_GPL(kvm_mtrr_get_guest_memory_type); | ||
671 | |||
672 | bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, | ||
673 | int page_num) | ||
674 | { | ||
675 | struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state; | ||
676 | struct mtrr_iter iter; | ||
677 | u64 start, end; | ||
678 | int type = -1; | ||
679 | |||
680 | start = gfn_to_gpa(gfn); | ||
681 | end = gfn_to_gpa(gfn + page_num); | ||
682 | mtrr_for_each_mem_type(&iter, mtrr_state, start, end) { | ||
683 | if (type == -1) { | ||
684 | type = iter.mem_type; | ||
685 | continue; | ||
686 | } | ||
687 | |||
688 | if (type != iter.mem_type) | ||
689 | return false; | ||
690 | } | ||
691 | |||
692 | if (!iter.partial_map) | ||
693 | return true; | ||
694 | |||
695 | if (type == -1) | ||
696 | return true; | ||
697 | |||
698 | return type == mtrr_default_type(mtrr_state); | ||
699 | } | ||
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 6e6d115fe9b5..0f67d7e24800 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -256,7 +256,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, | |||
256 | if (ret) | 256 | if (ret) |
257 | return ret; | 257 | return ret; |
258 | 258 | ||
259 | mark_page_dirty(vcpu->kvm, table_gfn); | 259 | kvm_vcpu_mark_page_dirty(vcpu, table_gfn); |
260 | walker->ptes[level] = pte; | 260 | walker->ptes[level] = pte; |
261 | } | 261 | } |
262 | return 0; | 262 | return 0; |
@@ -338,7 +338,7 @@ retry_walk: | |||
338 | 338 | ||
339 | real_gfn = gpa_to_gfn(real_gfn); | 339 | real_gfn = gpa_to_gfn(real_gfn); |
340 | 340 | ||
341 | host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn, | 341 | host_addr = kvm_vcpu_gfn_to_hva_prot(vcpu, real_gfn, |
342 | &walker->pte_writable[walker->level - 1]); | 342 | &walker->pte_writable[walker->level - 1]); |
343 | if (unlikely(kvm_is_error_hva(host_addr))) | 343 | if (unlikely(kvm_is_error_hva(host_addr))) |
344 | goto error; | 344 | goto error; |
@@ -511,11 +511,11 @@ static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu, | |||
511 | base_gpa = pte_gpa & ~mask; | 511 | base_gpa = pte_gpa & ~mask; |
512 | index = (pte_gpa - base_gpa) / sizeof(pt_element_t); | 512 | index = (pte_gpa - base_gpa) / sizeof(pt_element_t); |
513 | 513 | ||
514 | r = kvm_read_guest_atomic(vcpu->kvm, base_gpa, | 514 | r = kvm_vcpu_read_guest_atomic(vcpu, base_gpa, |
515 | gw->prefetch_ptes, sizeof(gw->prefetch_ptes)); | 515 | gw->prefetch_ptes, sizeof(gw->prefetch_ptes)); |
516 | curr_pte = gw->prefetch_ptes[index]; | 516 | curr_pte = gw->prefetch_ptes[index]; |
517 | } else | 517 | } else |
518 | r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, | 518 | r = kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, |
519 | &curr_pte, sizeof(curr_pte)); | 519 | &curr_pte, sizeof(curr_pte)); |
520 | 520 | ||
521 | return r || curr_pte != gw->ptes[level - 1]; | 521 | return r || curr_pte != gw->ptes[level - 1]; |
@@ -869,8 +869,8 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
869 | if (!rmap_can_add(vcpu)) | 869 | if (!rmap_can_add(vcpu)) |
870 | break; | 870 | break; |
871 | 871 | ||
872 | if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, | 872 | if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte, |
873 | sizeof(pt_element_t))) | 873 | sizeof(pt_element_t))) |
874 | break; | 874 | break; |
875 | 875 | ||
876 | FNAME(update_pte)(vcpu, sp, sptep, &gpte); | 876 | FNAME(update_pte)(vcpu, sp, sptep, &gpte); |
@@ -956,8 +956,8 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
956 | 956 | ||
957 | pte_gpa = first_pte_gpa + i * sizeof(pt_element_t); | 957 | pte_gpa = first_pte_gpa + i * sizeof(pt_element_t); |
958 | 958 | ||
959 | if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, | 959 | if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte, |
960 | sizeof(pt_element_t))) | 960 | sizeof(pt_element_t))) |
961 | return -EINVAL; | 961 | return -EINVAL; |
962 | 962 | ||
963 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) { | 963 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) { |
@@ -970,7 +970,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
970 | pte_access &= FNAME(gpte_access)(vcpu, gpte); | 970 | pte_access &= FNAME(gpte_access)(vcpu, gpte); |
971 | FNAME(protect_clean_gpte)(&pte_access, gpte); | 971 | FNAME(protect_clean_gpte)(&pte_access, gpte); |
972 | 972 | ||
973 | if (sync_mmio_spte(vcpu->kvm, &sp->spt[i], gfn, pte_access, | 973 | if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access, |
974 | &nr_present)) | 974 | &nr_present)) |
975 | continue; | 975 | continue; |
976 | 976 | ||
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 29fbf9dfdc54..31aa2c85dc97 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c | |||
@@ -1,11 +1,12 @@ | |||
1 | /* | 1 | /* |
2 | * Kernel-based Virtual Machine -- Performance Monitoring Unit support | 2 | * Kernel-based Virtual Machine -- Performance Monitoring Unit support |
3 | * | 3 | * |
4 | * Copyright 2011 Red Hat, Inc. and/or its affiliates. | 4 | * Copyright 2015 Red Hat, Inc. and/or its affiliates. |
5 | * | 5 | * |
6 | * Authors: | 6 | * Authors: |
7 | * Avi Kivity <avi@redhat.com> | 7 | * Avi Kivity <avi@redhat.com> |
8 | * Gleb Natapov <gleb@redhat.com> | 8 | * Gleb Natapov <gleb@redhat.com> |
9 | * Wei Huang <wei@redhat.com> | ||
9 | * | 10 | * |
10 | * This work is licensed under the terms of the GNU GPL, version 2. See | 11 | * This work is licensed under the terms of the GNU GPL, version 2. See |
11 | * the COPYING file in the top-level directory. | 12 | * the COPYING file in the top-level directory. |
@@ -19,88 +20,39 @@ | |||
19 | #include "x86.h" | 20 | #include "x86.h" |
20 | #include "cpuid.h" | 21 | #include "cpuid.h" |
21 | #include "lapic.h" | 22 | #include "lapic.h" |
23 | #include "pmu.h" | ||
24 | |||
25 | /* NOTE: | ||
26 | * - Each perf counter is defined as "struct kvm_pmc"; | ||
27 | * - There are two types of perf counters: general purpose (gp) and fixed. | ||
28 | * gp counters are stored in gp_counters[] and fixed counters are stored | ||
29 | * in fixed_counters[] respectively. Both of them are part of "struct | ||
30 | * kvm_pmu"; | ||
31 | * - pmu.c understands the difference between gp counters and fixed counters. | ||
32 | * However AMD doesn't support fixed-counters; | ||
33 | * - There are three types of index to access perf counters (PMC): | ||
34 | * 1. MSR (named msr): For example Intel has MSR_IA32_PERFCTRn and AMD | ||
35 | * has MSR_K7_PERFCTRn. | ||
36 | * 2. MSR Index (named idx): This normally is used by RDPMC instruction. | ||
37 | * For instance AMD RDPMC instruction uses 0000_0003h in ECX to access | ||
38 | * C001_0007h (MSR_K7_PERCTR3). Intel has a similar mechanism, except | ||
39 | * that it also supports fixed counters. idx can be used to as index to | ||
40 | * gp and fixed counters. | ||
41 | * 3. Global PMC Index (named pmc): pmc is an index specific to PMU | ||
42 | * code. Each pmc, stored in kvm_pmc.idx field, is unique across | ||
43 | * all perf counters (both gp and fixed). The mapping relationship | ||
44 | * between pmc and perf counters is as the following: | ||
45 | * * Intel: [0 .. INTEL_PMC_MAX_GENERIC-1] <=> gp counters | ||
46 | * [INTEL_PMC_IDX_FIXED .. INTEL_PMC_IDX_FIXED + 2] <=> fixed | ||
47 | * * AMD: [0 .. AMD64_NUM_COUNTERS-1] <=> gp counters | ||
48 | */ | ||
22 | 49 | ||
23 | static struct kvm_arch_event_perf_mapping { | 50 | static void kvm_pmi_trigger_fn(struct irq_work *irq_work) |
24 | u8 eventsel; | ||
25 | u8 unit_mask; | ||
26 | unsigned event_type; | ||
27 | bool inexact; | ||
28 | } arch_events[] = { | ||
29 | /* Index must match CPUID 0x0A.EBX bit vector */ | ||
30 | [0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES }, | ||
31 | [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS }, | ||
32 | [2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES }, | ||
33 | [3] = { 0x2e, 0x4f, PERF_COUNT_HW_CACHE_REFERENCES }, | ||
34 | [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES }, | ||
35 | [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, | ||
36 | [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, | ||
37 | [7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES }, | ||
38 | }; | ||
39 | |||
40 | /* mapping between fixed pmc index and arch_events array */ | ||
41 | static int fixed_pmc_events[] = {1, 0, 7}; | ||
42 | |||
43 | static bool pmc_is_gp(struct kvm_pmc *pmc) | ||
44 | { | ||
45 | return pmc->type == KVM_PMC_GP; | ||
46 | } | ||
47 | |||
48 | static inline u64 pmc_bitmask(struct kvm_pmc *pmc) | ||
49 | { | ||
50 | struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu; | ||
51 | |||
52 | return pmu->counter_bitmask[pmc->type]; | ||
53 | } | ||
54 | |||
55 | static inline bool pmc_enabled(struct kvm_pmc *pmc) | ||
56 | { | ||
57 | struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu; | ||
58 | return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl); | ||
59 | } | ||
60 | |||
61 | static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr, | ||
62 | u32 base) | ||
63 | { | ||
64 | if (msr >= base && msr < base + pmu->nr_arch_gp_counters) | ||
65 | return &pmu->gp_counters[msr - base]; | ||
66 | return NULL; | ||
67 | } | ||
68 | |||
69 | static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr) | ||
70 | { | ||
71 | int base = MSR_CORE_PERF_FIXED_CTR0; | ||
72 | if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) | ||
73 | return &pmu->fixed_counters[msr - base]; | ||
74 | return NULL; | ||
75 | } | ||
76 | |||
77 | static inline struct kvm_pmc *get_fixed_pmc_idx(struct kvm_pmu *pmu, int idx) | ||
78 | { | ||
79 | return get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + idx); | ||
80 | } | ||
81 | |||
82 | static struct kvm_pmc *global_idx_to_pmc(struct kvm_pmu *pmu, int idx) | ||
83 | { | ||
84 | if (idx < INTEL_PMC_IDX_FIXED) | ||
85 | return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + idx, MSR_P6_EVNTSEL0); | ||
86 | else | ||
87 | return get_fixed_pmc_idx(pmu, idx - INTEL_PMC_IDX_FIXED); | ||
88 | } | ||
89 | |||
90 | void kvm_deliver_pmi(struct kvm_vcpu *vcpu) | ||
91 | { | ||
92 | if (vcpu->arch.apic) | ||
93 | kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC); | ||
94 | } | ||
95 | |||
96 | static void trigger_pmi(struct irq_work *irq_work) | ||
97 | { | 51 | { |
98 | struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, | 52 | struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work); |
99 | irq_work); | 53 | struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); |
100 | struct kvm_vcpu *vcpu = container_of(pmu, struct kvm_vcpu, | ||
101 | arch.pmu); | ||
102 | 54 | ||
103 | kvm_deliver_pmi(vcpu); | 55 | kvm_pmu_deliver_pmi(vcpu); |
104 | } | 56 | } |
105 | 57 | ||
106 | static void kvm_perf_overflow(struct perf_event *perf_event, | 58 | static void kvm_perf_overflow(struct perf_event *perf_event, |
@@ -108,63 +60,46 @@ static void kvm_perf_overflow(struct perf_event *perf_event, | |||
108 | struct pt_regs *regs) | 60 | struct pt_regs *regs) |
109 | { | 61 | { |
110 | struct kvm_pmc *pmc = perf_event->overflow_handler_context; | 62 | struct kvm_pmc *pmc = perf_event->overflow_handler_context; |
111 | struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu; | 63 | struct kvm_pmu *pmu = pmc_to_pmu(pmc); |
112 | if (!test_and_set_bit(pmc->idx, (unsigned long *)&pmu->reprogram_pmi)) { | 64 | |
65 | if (!test_and_set_bit(pmc->idx, | ||
66 | (unsigned long *)&pmu->reprogram_pmi)) { | ||
113 | __set_bit(pmc->idx, (unsigned long *)&pmu->global_status); | 67 | __set_bit(pmc->idx, (unsigned long *)&pmu->global_status); |
114 | kvm_make_request(KVM_REQ_PMU, pmc->vcpu); | 68 | kvm_make_request(KVM_REQ_PMU, pmc->vcpu); |
115 | } | 69 | } |
116 | } | 70 | } |
117 | 71 | ||
118 | static void kvm_perf_overflow_intr(struct perf_event *perf_event, | 72 | static void kvm_perf_overflow_intr(struct perf_event *perf_event, |
119 | struct perf_sample_data *data, struct pt_regs *regs) | 73 | struct perf_sample_data *data, |
74 | struct pt_regs *regs) | ||
120 | { | 75 | { |
121 | struct kvm_pmc *pmc = perf_event->overflow_handler_context; | 76 | struct kvm_pmc *pmc = perf_event->overflow_handler_context; |
122 | struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu; | 77 | struct kvm_pmu *pmu = pmc_to_pmu(pmc); |
123 | if (!test_and_set_bit(pmc->idx, (unsigned long *)&pmu->reprogram_pmi)) { | 78 | |
79 | if (!test_and_set_bit(pmc->idx, | ||
80 | (unsigned long *)&pmu->reprogram_pmi)) { | ||
124 | __set_bit(pmc->idx, (unsigned long *)&pmu->global_status); | 81 | __set_bit(pmc->idx, (unsigned long *)&pmu->global_status); |
125 | kvm_make_request(KVM_REQ_PMU, pmc->vcpu); | 82 | kvm_make_request(KVM_REQ_PMU, pmc->vcpu); |
83 | |||
126 | /* | 84 | /* |
127 | * Inject PMI. If vcpu was in a guest mode during NMI PMI | 85 | * Inject PMI. If vcpu was in a guest mode during NMI PMI |
128 | * can be ejected on a guest mode re-entry. Otherwise we can't | 86 | * can be ejected on a guest mode re-entry. Otherwise we can't |
129 | * be sure that vcpu wasn't executing hlt instruction at the | 87 | * be sure that vcpu wasn't executing hlt instruction at the |
130 | * time of vmexit and is not going to re-enter guest mode until, | 88 | * time of vmexit and is not going to re-enter guest mode until |
131 | * woken up. So we should wake it, but this is impossible from | 89 | * woken up. So we should wake it, but this is impossible from |
132 | * NMI context. Do it from irq work instead. | 90 | * NMI context. Do it from irq work instead. |
133 | */ | 91 | */ |
134 | if (!kvm_is_in_guest()) | 92 | if (!kvm_is_in_guest()) |
135 | irq_work_queue(&pmc->vcpu->arch.pmu.irq_work); | 93 | irq_work_queue(&pmc_to_pmu(pmc)->irq_work); |
136 | else | 94 | else |
137 | kvm_make_request(KVM_REQ_PMI, pmc->vcpu); | 95 | kvm_make_request(KVM_REQ_PMI, pmc->vcpu); |
138 | } | 96 | } |
139 | } | 97 | } |
140 | 98 | ||
141 | static u64 read_pmc(struct kvm_pmc *pmc) | 99 | static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, |
142 | { | 100 | unsigned config, bool exclude_user, |
143 | u64 counter, enabled, running; | 101 | bool exclude_kernel, bool intr, |
144 | 102 | bool in_tx, bool in_tx_cp) | |
145 | counter = pmc->counter; | ||
146 | |||
147 | if (pmc->perf_event) | ||
148 | counter += perf_event_read_value(pmc->perf_event, | ||
149 | &enabled, &running); | ||
150 | |||
151 | /* FIXME: Scaling needed? */ | ||
152 | |||
153 | return counter & pmc_bitmask(pmc); | ||
154 | } | ||
155 | |||
156 | static void stop_counter(struct kvm_pmc *pmc) | ||
157 | { | ||
158 | if (pmc->perf_event) { | ||
159 | pmc->counter = read_pmc(pmc); | ||
160 | perf_event_release_kernel(pmc->perf_event); | ||
161 | pmc->perf_event = NULL; | ||
162 | } | ||
163 | } | ||
164 | |||
165 | static void reprogram_counter(struct kvm_pmc *pmc, u32 type, | ||
166 | unsigned config, bool exclude_user, bool exclude_kernel, | ||
167 | bool intr, bool in_tx, bool in_tx_cp) | ||
168 | { | 103 | { |
169 | struct perf_event *event; | 104 | struct perf_event *event; |
170 | struct perf_event_attr attr = { | 105 | struct perf_event_attr attr = { |
@@ -177,6 +112,7 @@ static void reprogram_counter(struct kvm_pmc *pmc, u32 type, | |||
177 | .exclude_kernel = exclude_kernel, | 112 | .exclude_kernel = exclude_kernel, |
178 | .config = config, | 113 | .config = config, |
179 | }; | 114 | }; |
115 | |||
180 | if (in_tx) | 116 | if (in_tx) |
181 | attr.config |= HSW_IN_TX; | 117 | attr.config |= HSW_IN_TX; |
182 | if (in_tx_cp) | 118 | if (in_tx_cp) |
@@ -188,33 +124,16 @@ static void reprogram_counter(struct kvm_pmc *pmc, u32 type, | |||
188 | intr ? kvm_perf_overflow_intr : | 124 | intr ? kvm_perf_overflow_intr : |
189 | kvm_perf_overflow, pmc); | 125 | kvm_perf_overflow, pmc); |
190 | if (IS_ERR(event)) { | 126 | if (IS_ERR(event)) { |
191 | printk_once("kvm: pmu event creation failed %ld\n", | 127 | printk_once("kvm_pmu: event creation failed %ld\n", |
192 | PTR_ERR(event)); | 128 | PTR_ERR(event)); |
193 | return; | 129 | return; |
194 | } | 130 | } |
195 | 131 | ||
196 | pmc->perf_event = event; | 132 | pmc->perf_event = event; |
197 | clear_bit(pmc->idx, (unsigned long*)&pmc->vcpu->arch.pmu.reprogram_pmi); | 133 | clear_bit(pmc->idx, (unsigned long*)&pmc_to_pmu(pmc)->reprogram_pmi); |
198 | } | ||
199 | |||
200 | static unsigned find_arch_event(struct kvm_pmu *pmu, u8 event_select, | ||
201 | u8 unit_mask) | ||
202 | { | ||
203 | int i; | ||
204 | |||
205 | for (i = 0; i < ARRAY_SIZE(arch_events); i++) | ||
206 | if (arch_events[i].eventsel == event_select | ||
207 | && arch_events[i].unit_mask == unit_mask | ||
208 | && (pmu->available_event_types & (1 << i))) | ||
209 | break; | ||
210 | |||
211 | if (i == ARRAY_SIZE(arch_events)) | ||
212 | return PERF_COUNT_HW_MAX; | ||
213 | |||
214 | return arch_events[i].event_type; | ||
215 | } | 134 | } |
216 | 135 | ||
217 | static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) | 136 | void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) |
218 | { | 137 | { |
219 | unsigned config, type = PERF_TYPE_RAW; | 138 | unsigned config, type = PERF_TYPE_RAW; |
220 | u8 event_select, unit_mask; | 139 | u8 event_select, unit_mask; |
@@ -224,21 +143,22 @@ static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) | |||
224 | 143 | ||
225 | pmc->eventsel = eventsel; | 144 | pmc->eventsel = eventsel; |
226 | 145 | ||
227 | stop_counter(pmc); | 146 | pmc_stop_counter(pmc); |
228 | 147 | ||
229 | if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_enabled(pmc)) | 148 | if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc)) |
230 | return; | 149 | return; |
231 | 150 | ||
232 | event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT; | 151 | event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT; |
233 | unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; | 152 | unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; |
234 | 153 | ||
235 | if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE | | 154 | if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE | |
236 | ARCH_PERFMON_EVENTSEL_INV | | 155 | ARCH_PERFMON_EVENTSEL_INV | |
237 | ARCH_PERFMON_EVENTSEL_CMASK | | 156 | ARCH_PERFMON_EVENTSEL_CMASK | |
238 | HSW_IN_TX | | 157 | HSW_IN_TX | |
239 | HSW_IN_TX_CHECKPOINTED))) { | 158 | HSW_IN_TX_CHECKPOINTED))) { |
240 | config = find_arch_event(&pmc->vcpu->arch.pmu, event_select, | 159 | config = kvm_x86_ops->pmu_ops->find_arch_event(pmc_to_pmu(pmc), |
241 | unit_mask); | 160 | event_select, |
161 | unit_mask); | ||
242 | if (config != PERF_COUNT_HW_MAX) | 162 | if (config != PERF_COUNT_HW_MAX) |
243 | type = PERF_TYPE_HARDWARE; | 163 | type = PERF_TYPE_HARDWARE; |
244 | } | 164 | } |
@@ -246,56 +166,36 @@ static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) | |||
246 | if (type == PERF_TYPE_RAW) | 166 | if (type == PERF_TYPE_RAW) |
247 | config = eventsel & X86_RAW_EVENT_MASK; | 167 | config = eventsel & X86_RAW_EVENT_MASK; |
248 | 168 | ||
249 | reprogram_counter(pmc, type, config, | 169 | pmc_reprogram_counter(pmc, type, config, |
250 | !(eventsel & ARCH_PERFMON_EVENTSEL_USR), | 170 | !(eventsel & ARCH_PERFMON_EVENTSEL_USR), |
251 | !(eventsel & ARCH_PERFMON_EVENTSEL_OS), | 171 | !(eventsel & ARCH_PERFMON_EVENTSEL_OS), |
252 | eventsel & ARCH_PERFMON_EVENTSEL_INT, | 172 | eventsel & ARCH_PERFMON_EVENTSEL_INT, |
253 | (eventsel & HSW_IN_TX), | 173 | (eventsel & HSW_IN_TX), |
254 | (eventsel & HSW_IN_TX_CHECKPOINTED)); | 174 | (eventsel & HSW_IN_TX_CHECKPOINTED)); |
255 | } | 175 | } |
176 | EXPORT_SYMBOL_GPL(reprogram_gp_counter); | ||
256 | 177 | ||
257 | static void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 en_pmi, int idx) | 178 | void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx) |
258 | { | 179 | { |
259 | unsigned en = en_pmi & 0x3; | 180 | unsigned en_field = ctrl & 0x3; |
260 | bool pmi = en_pmi & 0x8; | 181 | bool pmi = ctrl & 0x8; |
261 | 182 | ||
262 | stop_counter(pmc); | 183 | pmc_stop_counter(pmc); |
263 | 184 | ||
264 | if (!en || !pmc_enabled(pmc)) | 185 | if (!en_field || !pmc_is_enabled(pmc)) |
265 | return; | 186 | return; |
266 | 187 | ||
267 | reprogram_counter(pmc, PERF_TYPE_HARDWARE, | 188 | pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE, |
268 | arch_events[fixed_pmc_events[idx]].event_type, | 189 | kvm_x86_ops->pmu_ops->find_fixed_event(idx), |
269 | !(en & 0x2), /* exclude user */ | 190 | !(en_field & 0x2), /* exclude user */ |
270 | !(en & 0x1), /* exclude kernel */ | 191 | !(en_field & 0x1), /* exclude kernel */ |
271 | pmi, false, false); | 192 | pmi, false, false); |
272 | } | 193 | } |
194 | EXPORT_SYMBOL_GPL(reprogram_fixed_counter); | ||
273 | 195 | ||
274 | static inline u8 fixed_en_pmi(u64 ctrl, int idx) | 196 | void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx) |
275 | { | 197 | { |
276 | return (ctrl >> (idx * 4)) & 0xf; | 198 | struct kvm_pmc *pmc = kvm_x86_ops->pmu_ops->pmc_idx_to_pmc(pmu, pmc_idx); |
277 | } | ||
278 | |||
279 | static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) | ||
280 | { | ||
281 | int i; | ||
282 | |||
283 | for (i = 0; i < pmu->nr_arch_fixed_counters; i++) { | ||
284 | u8 en_pmi = fixed_en_pmi(data, i); | ||
285 | struct kvm_pmc *pmc = get_fixed_pmc_idx(pmu, i); | ||
286 | |||
287 | if (fixed_en_pmi(pmu->fixed_ctr_ctrl, i) == en_pmi) | ||
288 | continue; | ||
289 | |||
290 | reprogram_fixed_counter(pmc, en_pmi, i); | ||
291 | } | ||
292 | |||
293 | pmu->fixed_ctr_ctrl = data; | ||
294 | } | ||
295 | |||
296 | static void reprogram_idx(struct kvm_pmu *pmu, int idx) | ||
297 | { | ||
298 | struct kvm_pmc *pmc = global_idx_to_pmc(pmu, idx); | ||
299 | 199 | ||
300 | if (!pmc) | 200 | if (!pmc) |
301 | return; | 201 | return; |
@@ -303,274 +203,107 @@ static void reprogram_idx(struct kvm_pmu *pmu, int idx) | |||
303 | if (pmc_is_gp(pmc)) | 203 | if (pmc_is_gp(pmc)) |
304 | reprogram_gp_counter(pmc, pmc->eventsel); | 204 | reprogram_gp_counter(pmc, pmc->eventsel); |
305 | else { | 205 | else { |
306 | int fidx = idx - INTEL_PMC_IDX_FIXED; | 206 | int idx = pmc_idx - INTEL_PMC_IDX_FIXED; |
307 | reprogram_fixed_counter(pmc, | 207 | u8 ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, idx); |
308 | fixed_en_pmi(pmu->fixed_ctr_ctrl, fidx), fidx); | 208 | |
209 | reprogram_fixed_counter(pmc, ctrl, idx); | ||
309 | } | 210 | } |
310 | } | 211 | } |
212 | EXPORT_SYMBOL_GPL(reprogram_counter); | ||
311 | 213 | ||
312 | static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data) | 214 | void kvm_pmu_handle_event(struct kvm_vcpu *vcpu) |
313 | { | 215 | { |
216 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||
217 | u64 bitmask; | ||
314 | int bit; | 218 | int bit; |
315 | u64 diff = pmu->global_ctrl ^ data; | ||
316 | |||
317 | pmu->global_ctrl = data; | ||
318 | |||
319 | for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) | ||
320 | reprogram_idx(pmu, bit); | ||
321 | } | ||
322 | 219 | ||
323 | bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr) | 220 | bitmask = pmu->reprogram_pmi; |
324 | { | ||
325 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
326 | int ret; | ||
327 | |||
328 | switch (msr) { | ||
329 | case MSR_CORE_PERF_FIXED_CTR_CTRL: | ||
330 | case MSR_CORE_PERF_GLOBAL_STATUS: | ||
331 | case MSR_CORE_PERF_GLOBAL_CTRL: | ||
332 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: | ||
333 | ret = pmu->version > 1; | ||
334 | break; | ||
335 | default: | ||
336 | ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) | ||
337 | || get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) | ||
338 | || get_fixed_pmc(pmu, msr); | ||
339 | break; | ||
340 | } | ||
341 | return ret; | ||
342 | } | ||
343 | 221 | ||
344 | int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) | 222 | for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) { |
345 | { | 223 | struct kvm_pmc *pmc = kvm_x86_ops->pmu_ops->pmc_idx_to_pmc(pmu, bit); |
346 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
347 | struct kvm_pmc *pmc; | ||
348 | 224 | ||
349 | switch (index) { | 225 | if (unlikely(!pmc || !pmc->perf_event)) { |
350 | case MSR_CORE_PERF_FIXED_CTR_CTRL: | 226 | clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi); |
351 | *data = pmu->fixed_ctr_ctrl; | 227 | continue; |
352 | return 0; | ||
353 | case MSR_CORE_PERF_GLOBAL_STATUS: | ||
354 | *data = pmu->global_status; | ||
355 | return 0; | ||
356 | case MSR_CORE_PERF_GLOBAL_CTRL: | ||
357 | *data = pmu->global_ctrl; | ||
358 | return 0; | ||
359 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: | ||
360 | *data = pmu->global_ovf_ctrl; | ||
361 | return 0; | ||
362 | default: | ||
363 | if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) || | ||
364 | (pmc = get_fixed_pmc(pmu, index))) { | ||
365 | *data = read_pmc(pmc); | ||
366 | return 0; | ||
367 | } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { | ||
368 | *data = pmc->eventsel; | ||
369 | return 0; | ||
370 | } | 228 | } |
371 | } | ||
372 | return 1; | ||
373 | } | ||
374 | 229 | ||
375 | int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | 230 | reprogram_counter(pmu, bit); |
376 | { | ||
377 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
378 | struct kvm_pmc *pmc; | ||
379 | u32 index = msr_info->index; | ||
380 | u64 data = msr_info->data; | ||
381 | |||
382 | switch (index) { | ||
383 | case MSR_CORE_PERF_FIXED_CTR_CTRL: | ||
384 | if (pmu->fixed_ctr_ctrl == data) | ||
385 | return 0; | ||
386 | if (!(data & 0xfffffffffffff444ull)) { | ||
387 | reprogram_fixed_counters(pmu, data); | ||
388 | return 0; | ||
389 | } | ||
390 | break; | ||
391 | case MSR_CORE_PERF_GLOBAL_STATUS: | ||
392 | if (msr_info->host_initiated) { | ||
393 | pmu->global_status = data; | ||
394 | return 0; | ||
395 | } | ||
396 | break; /* RO MSR */ | ||
397 | case MSR_CORE_PERF_GLOBAL_CTRL: | ||
398 | if (pmu->global_ctrl == data) | ||
399 | return 0; | ||
400 | if (!(data & pmu->global_ctrl_mask)) { | ||
401 | global_ctrl_changed(pmu, data); | ||
402 | return 0; | ||
403 | } | ||
404 | break; | ||
405 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: | ||
406 | if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) { | ||
407 | if (!msr_info->host_initiated) | ||
408 | pmu->global_status &= ~data; | ||
409 | pmu->global_ovf_ctrl = data; | ||
410 | return 0; | ||
411 | } | ||
412 | break; | ||
413 | default: | ||
414 | if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) || | ||
415 | (pmc = get_fixed_pmc(pmu, index))) { | ||
416 | if (!msr_info->host_initiated) | ||
417 | data = (s64)(s32)data; | ||
418 | pmc->counter += data - read_pmc(pmc); | ||
419 | return 0; | ||
420 | } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { | ||
421 | if (data == pmc->eventsel) | ||
422 | return 0; | ||
423 | if (!(data & pmu->reserved_bits)) { | ||
424 | reprogram_gp_counter(pmc, data); | ||
425 | return 0; | ||
426 | } | ||
427 | } | ||
428 | } | 231 | } |
429 | return 1; | ||
430 | } | 232 | } |
431 | 233 | ||
432 | int kvm_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc) | 234 | /* check if idx is a valid index to access PMU */ |
235 | int kvm_pmu_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx) | ||
433 | { | 236 | { |
434 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | 237 | return kvm_x86_ops->pmu_ops->is_valid_msr_idx(vcpu, idx); |
435 | bool fixed = pmc & (1u << 30); | ||
436 | pmc &= ~(3u << 30); | ||
437 | return (!fixed && pmc >= pmu->nr_arch_gp_counters) || | ||
438 | (fixed && pmc >= pmu->nr_arch_fixed_counters); | ||
439 | } | 238 | } |
440 | 239 | ||
441 | int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data) | 240 | int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) |
442 | { | 241 | { |
443 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | 242 | bool fast_mode = idx & (1u << 31); |
444 | bool fast_mode = pmc & (1u << 31); | 243 | struct kvm_pmc *pmc; |
445 | bool fixed = pmc & (1u << 30); | 244 | u64 ctr_val; |
446 | struct kvm_pmc *counters; | 245 | |
447 | u64 ctr; | 246 | pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx); |
448 | 247 | if (!pmc) | |
449 | pmc &= ~(3u << 30); | ||
450 | if (!fixed && pmc >= pmu->nr_arch_gp_counters) | ||
451 | return 1; | ||
452 | if (fixed && pmc >= pmu->nr_arch_fixed_counters) | ||
453 | return 1; | 248 | return 1; |
454 | counters = fixed ? pmu->fixed_counters : pmu->gp_counters; | 249 | |
455 | ctr = read_pmc(&counters[pmc]); | 250 | ctr_val = pmc_read_counter(pmc); |
456 | if (fast_mode) | 251 | if (fast_mode) |
457 | ctr = (u32)ctr; | 252 | ctr_val = (u32)ctr_val; |
458 | *data = ctr; | ||
459 | 253 | ||
254 | *data = ctr_val; | ||
460 | return 0; | 255 | return 0; |
461 | } | 256 | } |
462 | 257 | ||
463 | void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu) | 258 | void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu) |
464 | { | 259 | { |
465 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | 260 | if (vcpu->arch.apic) |
466 | struct kvm_cpuid_entry2 *entry; | 261 | kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC); |
467 | union cpuid10_eax eax; | 262 | } |
468 | union cpuid10_edx edx; | ||
469 | |||
470 | pmu->nr_arch_gp_counters = 0; | ||
471 | pmu->nr_arch_fixed_counters = 0; | ||
472 | pmu->counter_bitmask[KVM_PMC_GP] = 0; | ||
473 | pmu->counter_bitmask[KVM_PMC_FIXED] = 0; | ||
474 | pmu->version = 0; | ||
475 | pmu->reserved_bits = 0xffffffff00200000ull; | ||
476 | |||
477 | entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); | ||
478 | if (!entry) | ||
479 | return; | ||
480 | eax.full = entry->eax; | ||
481 | edx.full = entry->edx; | ||
482 | |||
483 | pmu->version = eax.split.version_id; | ||
484 | if (!pmu->version) | ||
485 | return; | ||
486 | |||
487 | pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters, | ||
488 | INTEL_PMC_MAX_GENERIC); | ||
489 | pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1; | ||
490 | pmu->available_event_types = ~entry->ebx & | ||
491 | ((1ull << eax.split.mask_length) - 1); | ||
492 | |||
493 | if (pmu->version == 1) { | ||
494 | pmu->nr_arch_fixed_counters = 0; | ||
495 | } else { | ||
496 | pmu->nr_arch_fixed_counters = | ||
497 | min_t(int, edx.split.num_counters_fixed, | ||
498 | INTEL_PMC_MAX_FIXED); | ||
499 | pmu->counter_bitmask[KVM_PMC_FIXED] = | ||
500 | ((u64)1 << edx.split.bit_width_fixed) - 1; | ||
501 | } | ||
502 | 263 | ||
503 | pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | | 264 | bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) |
504 | (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED); | 265 | { |
505 | pmu->global_ctrl_mask = ~pmu->global_ctrl; | 266 | return kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, msr); |
267 | } | ||
506 | 268 | ||
507 | entry = kvm_find_cpuid_entry(vcpu, 7, 0); | 269 | int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) |
508 | if (entry && | 270 | { |
509 | (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) && | 271 | return kvm_x86_ops->pmu_ops->get_msr(vcpu, msr, data); |
510 | (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) | ||
511 | pmu->reserved_bits ^= HSW_IN_TX|HSW_IN_TX_CHECKPOINTED; | ||
512 | } | 272 | } |
513 | 273 | ||
514 | void kvm_pmu_init(struct kvm_vcpu *vcpu) | 274 | int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
515 | { | 275 | { |
516 | int i; | 276 | return kvm_x86_ops->pmu_ops->set_msr(vcpu, msr_info); |
517 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | 277 | } |
518 | 278 | ||
519 | memset(pmu, 0, sizeof(*pmu)); | 279 | /* refresh PMU settings. This function generally is called when underlying |
520 | for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) { | 280 | * settings are changed (such as changes of PMU CPUID by guest VMs), which |
521 | pmu->gp_counters[i].type = KVM_PMC_GP; | 281 | * should rarely happen. |
522 | pmu->gp_counters[i].vcpu = vcpu; | 282 | */ |
523 | pmu->gp_counters[i].idx = i; | 283 | void kvm_pmu_refresh(struct kvm_vcpu *vcpu) |
524 | } | 284 | { |
525 | for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) { | 285 | kvm_x86_ops->pmu_ops->refresh(vcpu); |
526 | pmu->fixed_counters[i].type = KVM_PMC_FIXED; | ||
527 | pmu->fixed_counters[i].vcpu = vcpu; | ||
528 | pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED; | ||
529 | } | ||
530 | init_irq_work(&pmu->irq_work, trigger_pmi); | ||
531 | kvm_pmu_cpuid_update(vcpu); | ||
532 | } | 286 | } |
533 | 287 | ||
534 | void kvm_pmu_reset(struct kvm_vcpu *vcpu) | 288 | void kvm_pmu_reset(struct kvm_vcpu *vcpu) |
535 | { | 289 | { |
536 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | 290 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); |
537 | int i; | ||
538 | 291 | ||
539 | irq_work_sync(&pmu->irq_work); | 292 | irq_work_sync(&pmu->irq_work); |
540 | for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) { | 293 | kvm_x86_ops->pmu_ops->reset(vcpu); |
541 | struct kvm_pmc *pmc = &pmu->gp_counters[i]; | 294 | } |
542 | stop_counter(pmc); | ||
543 | pmc->counter = pmc->eventsel = 0; | ||
544 | } | ||
545 | 295 | ||
546 | for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) | 296 | void kvm_pmu_init(struct kvm_vcpu *vcpu) |
547 | stop_counter(&pmu->fixed_counters[i]); | 297 | { |
298 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||
548 | 299 | ||
549 | pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = | 300 | memset(pmu, 0, sizeof(*pmu)); |
550 | pmu->global_ovf_ctrl = 0; | 301 | kvm_x86_ops->pmu_ops->init(vcpu); |
302 | init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn); | ||
303 | kvm_pmu_refresh(vcpu); | ||
551 | } | 304 | } |
552 | 305 | ||
553 | void kvm_pmu_destroy(struct kvm_vcpu *vcpu) | 306 | void kvm_pmu_destroy(struct kvm_vcpu *vcpu) |
554 | { | 307 | { |
555 | kvm_pmu_reset(vcpu); | 308 | kvm_pmu_reset(vcpu); |
556 | } | 309 | } |
557 | |||
558 | void kvm_handle_pmu_event(struct kvm_vcpu *vcpu) | ||
559 | { | ||
560 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
561 | u64 bitmask; | ||
562 | int bit; | ||
563 | |||
564 | bitmask = pmu->reprogram_pmi; | ||
565 | |||
566 | for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) { | ||
567 | struct kvm_pmc *pmc = global_idx_to_pmc(pmu, bit); | ||
568 | |||
569 | if (unlikely(!pmc || !pmc->perf_event)) { | ||
570 | clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi); | ||
571 | continue; | ||
572 | } | ||
573 | |||
574 | reprogram_idx(pmu, bit); | ||
575 | } | ||
576 | } | ||
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h new file mode 100644 index 000000000000..f96e1f962587 --- /dev/null +++ b/arch/x86/kvm/pmu.h | |||
@@ -0,0 +1,118 @@ | |||
1 | #ifndef __KVM_X86_PMU_H | ||
2 | #define __KVM_X86_PMU_H | ||
3 | |||
4 | #define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu) | ||
5 | #define pmu_to_vcpu(pmu) (container_of((pmu), struct kvm_vcpu, arch.pmu)) | ||
6 | #define pmc_to_pmu(pmc) (&(pmc)->vcpu->arch.pmu) | ||
7 | |||
8 | /* retrieve the 4 bits for EN and PMI out of IA32_FIXED_CTR_CTRL */ | ||
9 | #define fixed_ctrl_field(ctrl_reg, idx) (((ctrl_reg) >> ((idx)*4)) & 0xf) | ||
10 | |||
11 | struct kvm_event_hw_type_mapping { | ||
12 | u8 eventsel; | ||
13 | u8 unit_mask; | ||
14 | unsigned event_type; | ||
15 | }; | ||
16 | |||
17 | struct kvm_pmu_ops { | ||
18 | unsigned (*find_arch_event)(struct kvm_pmu *pmu, u8 event_select, | ||
19 | u8 unit_mask); | ||
20 | unsigned (*find_fixed_event)(int idx); | ||
21 | bool (*pmc_is_enabled)(struct kvm_pmc *pmc); | ||
22 | struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx); | ||
23 | struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, unsigned idx); | ||
24 | int (*is_valid_msr_idx)(struct kvm_vcpu *vcpu, unsigned idx); | ||
25 | bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr); | ||
26 | int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 *data); | ||
27 | int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info); | ||
28 | void (*refresh)(struct kvm_vcpu *vcpu); | ||
29 | void (*init)(struct kvm_vcpu *vcpu); | ||
30 | void (*reset)(struct kvm_vcpu *vcpu); | ||
31 | }; | ||
32 | |||
33 | static inline u64 pmc_bitmask(struct kvm_pmc *pmc) | ||
34 | { | ||
35 | struct kvm_pmu *pmu = pmc_to_pmu(pmc); | ||
36 | |||
37 | return pmu->counter_bitmask[pmc->type]; | ||
38 | } | ||
39 | |||
40 | static inline u64 pmc_read_counter(struct kvm_pmc *pmc) | ||
41 | { | ||
42 | u64 counter, enabled, running; | ||
43 | |||
44 | counter = pmc->counter; | ||
45 | if (pmc->perf_event) | ||
46 | counter += perf_event_read_value(pmc->perf_event, | ||
47 | &enabled, &running); | ||
48 | /* FIXME: Scaling needed? */ | ||
49 | return counter & pmc_bitmask(pmc); | ||
50 | } | ||
51 | |||
52 | static inline void pmc_stop_counter(struct kvm_pmc *pmc) | ||
53 | { | ||
54 | if (pmc->perf_event) { | ||
55 | pmc->counter = pmc_read_counter(pmc); | ||
56 | perf_event_release_kernel(pmc->perf_event); | ||
57 | pmc->perf_event = NULL; | ||
58 | } | ||
59 | } | ||
60 | |||
61 | static inline bool pmc_is_gp(struct kvm_pmc *pmc) | ||
62 | { | ||
63 | return pmc->type == KVM_PMC_GP; | ||
64 | } | ||
65 | |||
66 | static inline bool pmc_is_fixed(struct kvm_pmc *pmc) | ||
67 | { | ||
68 | return pmc->type == KVM_PMC_FIXED; | ||
69 | } | ||
70 | |||
71 | static inline bool pmc_is_enabled(struct kvm_pmc *pmc) | ||
72 | { | ||
73 | return kvm_x86_ops->pmu_ops->pmc_is_enabled(pmc); | ||
74 | } | ||
75 | |||
76 | /* returns general purpose PMC with the specified MSR. Note that it can be | ||
77 | * used for both PERFCTRn and EVNTSELn; that is why it accepts base as a | ||
78 | * paramenter to tell them apart. | ||
79 | */ | ||
80 | static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr, | ||
81 | u32 base) | ||
82 | { | ||
83 | if (msr >= base && msr < base + pmu->nr_arch_gp_counters) | ||
84 | return &pmu->gp_counters[msr - base]; | ||
85 | |||
86 | return NULL; | ||
87 | } | ||
88 | |||
89 | /* returns fixed PMC with the specified MSR */ | ||
90 | static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr) | ||
91 | { | ||
92 | int base = MSR_CORE_PERF_FIXED_CTR0; | ||
93 | |||
94 | if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) | ||
95 | return &pmu->fixed_counters[msr - base]; | ||
96 | |||
97 | return NULL; | ||
98 | } | ||
99 | |||
100 | void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel); | ||
101 | void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx); | ||
102 | void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx); | ||
103 | |||
104 | void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu); | ||
105 | void kvm_pmu_handle_event(struct kvm_vcpu *vcpu); | ||
106 | int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); | ||
107 | int kvm_pmu_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx); | ||
108 | bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr); | ||
109 | int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); | ||
110 | int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); | ||
111 | void kvm_pmu_refresh(struct kvm_vcpu *vcpu); | ||
112 | void kvm_pmu_reset(struct kvm_vcpu *vcpu); | ||
113 | void kvm_pmu_init(struct kvm_vcpu *vcpu); | ||
114 | void kvm_pmu_destroy(struct kvm_vcpu *vcpu); | ||
115 | |||
116 | extern struct kvm_pmu_ops intel_pmu_ops; | ||
117 | extern struct kvm_pmu_ops amd_pmu_ops; | ||
118 | #endif /* __KVM_X86_PMU_H */ | ||
diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c new file mode 100644 index 000000000000..886aa25a7131 --- /dev/null +++ b/arch/x86/kvm/pmu_amd.c | |||
@@ -0,0 +1,207 @@ | |||
1 | /* | ||
2 | * KVM PMU support for AMD | ||
3 | * | ||
4 | * Copyright 2015, Red Hat, Inc. and/or its affiliates. | ||
5 | * | ||
6 | * Author: | ||
7 | * Wei Huang <wei@redhat.com> | ||
8 | * | ||
9 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
10 | * the COPYING file in the top-level directory. | ||
11 | * | ||
12 | * Implementation is based on pmu_intel.c file | ||
13 | */ | ||
14 | #include <linux/types.h> | ||
15 | #include <linux/kvm_host.h> | ||
16 | #include <linux/perf_event.h> | ||
17 | #include "x86.h" | ||
18 | #include "cpuid.h" | ||
19 | #include "lapic.h" | ||
20 | #include "pmu.h" | ||
21 | |||
22 | /* duplicated from amd_perfmon_event_map, K7 and above should work. */ | ||
23 | static struct kvm_event_hw_type_mapping amd_event_mapping[] = { | ||
24 | [0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES }, | ||
25 | [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS }, | ||
26 | [2] = { 0x80, 0x00, PERF_COUNT_HW_CACHE_REFERENCES }, | ||
27 | [3] = { 0x81, 0x00, PERF_COUNT_HW_CACHE_MISSES }, | ||
28 | [4] = { 0xc2, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, | ||
29 | [5] = { 0xc3, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, | ||
30 | [6] = { 0xd0, 0x00, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, | ||
31 | [7] = { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, | ||
32 | }; | ||
33 | |||
34 | static unsigned amd_find_arch_event(struct kvm_pmu *pmu, | ||
35 | u8 event_select, | ||
36 | u8 unit_mask) | ||
37 | { | ||
38 | int i; | ||
39 | |||
40 | for (i = 0; i < ARRAY_SIZE(amd_event_mapping); i++) | ||
41 | if (amd_event_mapping[i].eventsel == event_select | ||
42 | && amd_event_mapping[i].unit_mask == unit_mask) | ||
43 | break; | ||
44 | |||
45 | if (i == ARRAY_SIZE(amd_event_mapping)) | ||
46 | return PERF_COUNT_HW_MAX; | ||
47 | |||
48 | return amd_event_mapping[i].event_type; | ||
49 | } | ||
50 | |||
51 | /* return PERF_COUNT_HW_MAX as AMD doesn't have fixed events */ | ||
52 | static unsigned amd_find_fixed_event(int idx) | ||
53 | { | ||
54 | return PERF_COUNT_HW_MAX; | ||
55 | } | ||
56 | |||
57 | /* check if a PMC is enabled by comparing it against global_ctrl bits. Because | ||
58 | * AMD CPU doesn't have global_ctrl MSR, all PMCs are enabled (return TRUE). | ||
59 | */ | ||
60 | static bool amd_pmc_is_enabled(struct kvm_pmc *pmc) | ||
61 | { | ||
62 | return true; | ||
63 | } | ||
64 | |||
65 | static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) | ||
66 | { | ||
67 | return get_gp_pmc(pmu, MSR_K7_EVNTSEL0 + pmc_idx, MSR_K7_EVNTSEL0); | ||
68 | } | ||
69 | |||
70 | /* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */ | ||
71 | static int amd_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx) | ||
72 | { | ||
73 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||
74 | |||
75 | idx &= ~(3u << 30); | ||
76 | |||
77 | return (idx >= pmu->nr_arch_gp_counters); | ||
78 | } | ||
79 | |||
80 | /* idx is the ECX register of RDPMC instruction */ | ||
81 | static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, unsigned idx) | ||
82 | { | ||
83 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||
84 | struct kvm_pmc *counters; | ||
85 | |||
86 | idx &= ~(3u << 30); | ||
87 | if (idx >= pmu->nr_arch_gp_counters) | ||
88 | return NULL; | ||
89 | counters = pmu->gp_counters; | ||
90 | |||
91 | return &counters[idx]; | ||
92 | } | ||
93 | |||
94 | static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) | ||
95 | { | ||
96 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||
97 | int ret = false; | ||
98 | |||
99 | ret = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0) || | ||
100 | get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0); | ||
101 | |||
102 | return ret; | ||
103 | } | ||
104 | |||
105 | static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) | ||
106 | { | ||
107 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||
108 | struct kvm_pmc *pmc; | ||
109 | |||
110 | /* MSR_K7_PERFCTRn */ | ||
111 | pmc = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0); | ||
112 | if (pmc) { | ||
113 | *data = pmc_read_counter(pmc); | ||
114 | return 0; | ||
115 | } | ||
116 | /* MSR_K7_EVNTSELn */ | ||
117 | pmc = get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0); | ||
118 | if (pmc) { | ||
119 | *data = pmc->eventsel; | ||
120 | return 0; | ||
121 | } | ||
122 | |||
123 | return 1; | ||
124 | } | ||
125 | |||
126 | static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | ||
127 | { | ||
128 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||
129 | struct kvm_pmc *pmc; | ||
130 | u32 msr = msr_info->index; | ||
131 | u64 data = msr_info->data; | ||
132 | |||
133 | /* MSR_K7_PERFCTRn */ | ||
134 | pmc = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0); | ||
135 | if (pmc) { | ||
136 | if (!msr_info->host_initiated) | ||
137 | data = (s64)data; | ||
138 | pmc->counter += data - pmc_read_counter(pmc); | ||
139 | return 0; | ||
140 | } | ||
141 | /* MSR_K7_EVNTSELn */ | ||
142 | pmc = get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0); | ||
143 | if (pmc) { | ||
144 | if (data == pmc->eventsel) | ||
145 | return 0; | ||
146 | if (!(data & pmu->reserved_bits)) { | ||
147 | reprogram_gp_counter(pmc, data); | ||
148 | return 0; | ||
149 | } | ||
150 | } | ||
151 | |||
152 | return 1; | ||
153 | } | ||
154 | |||
155 | static void amd_pmu_refresh(struct kvm_vcpu *vcpu) | ||
156 | { | ||
157 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||
158 | |||
159 | pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS; | ||
160 | pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1; | ||
161 | pmu->reserved_bits = 0xffffffff00200000ull; | ||
162 | /* not applicable to AMD; but clean them to prevent any fall out */ | ||
163 | pmu->counter_bitmask[KVM_PMC_FIXED] = 0; | ||
164 | pmu->nr_arch_fixed_counters = 0; | ||
165 | pmu->version = 0; | ||
166 | pmu->global_status = 0; | ||
167 | } | ||
168 | |||
169 | static void amd_pmu_init(struct kvm_vcpu *vcpu) | ||
170 | { | ||
171 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||
172 | int i; | ||
173 | |||
174 | for (i = 0; i < AMD64_NUM_COUNTERS ; i++) { | ||
175 | pmu->gp_counters[i].type = KVM_PMC_GP; | ||
176 | pmu->gp_counters[i].vcpu = vcpu; | ||
177 | pmu->gp_counters[i].idx = i; | ||
178 | } | ||
179 | } | ||
180 | |||
181 | static void amd_pmu_reset(struct kvm_vcpu *vcpu) | ||
182 | { | ||
183 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||
184 | int i; | ||
185 | |||
186 | for (i = 0; i < AMD64_NUM_COUNTERS; i++) { | ||
187 | struct kvm_pmc *pmc = &pmu->gp_counters[i]; | ||
188 | |||
189 | pmc_stop_counter(pmc); | ||
190 | pmc->counter = pmc->eventsel = 0; | ||
191 | } | ||
192 | } | ||
193 | |||
194 | struct kvm_pmu_ops amd_pmu_ops = { | ||
195 | .find_arch_event = amd_find_arch_event, | ||
196 | .find_fixed_event = amd_find_fixed_event, | ||
197 | .pmc_is_enabled = amd_pmc_is_enabled, | ||
198 | .pmc_idx_to_pmc = amd_pmc_idx_to_pmc, | ||
199 | .msr_idx_to_pmc = amd_msr_idx_to_pmc, | ||
200 | .is_valid_msr_idx = amd_is_valid_msr_idx, | ||
201 | .is_valid_msr = amd_is_valid_msr, | ||
202 | .get_msr = amd_pmu_get_msr, | ||
203 | .set_msr = amd_pmu_set_msr, | ||
204 | .refresh = amd_pmu_refresh, | ||
205 | .init = amd_pmu_init, | ||
206 | .reset = amd_pmu_reset, | ||
207 | }; | ||
diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c new file mode 100644 index 000000000000..ab38af4f4947 --- /dev/null +++ b/arch/x86/kvm/pmu_intel.c | |||
@@ -0,0 +1,358 @@ | |||
1 | /* | ||
2 | * KVM PMU support for Intel CPUs | ||
3 | * | ||
4 | * Copyright 2011 Red Hat, Inc. and/or its affiliates. | ||
5 | * | ||
6 | * Authors: | ||
7 | * Avi Kivity <avi@redhat.com> | ||
8 | * Gleb Natapov <gleb@redhat.com> | ||
9 | * | ||
10 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
11 | * the COPYING file in the top-level directory. | ||
12 | * | ||
13 | */ | ||
14 | #include <linux/types.h> | ||
15 | #include <linux/kvm_host.h> | ||
16 | #include <linux/perf_event.h> | ||
17 | #include <asm/perf_event.h> | ||
18 | #include "x86.h" | ||
19 | #include "cpuid.h" | ||
20 | #include "lapic.h" | ||
21 | #include "pmu.h" | ||
22 | |||
23 | static struct kvm_event_hw_type_mapping intel_arch_events[] = { | ||
24 | /* Index must match CPUID 0x0A.EBX bit vector */ | ||
25 | [0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES }, | ||
26 | [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS }, | ||
27 | [2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES }, | ||
28 | [3] = { 0x2e, 0x4f, PERF_COUNT_HW_CACHE_REFERENCES }, | ||
29 | [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES }, | ||
30 | [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, | ||
31 | [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, | ||
32 | [7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES }, | ||
33 | }; | ||
34 | |||
35 | /* mapping between fixed pmc index and intel_arch_events array */ | ||
36 | static int fixed_pmc_events[] = {1, 0, 7}; | ||
37 | |||
38 | static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) | ||
39 | { | ||
40 | int i; | ||
41 | |||
42 | for (i = 0; i < pmu->nr_arch_fixed_counters; i++) { | ||
43 | u8 new_ctrl = fixed_ctrl_field(data, i); | ||
44 | u8 old_ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, i); | ||
45 | struct kvm_pmc *pmc; | ||
46 | |||
47 | pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i); | ||
48 | |||
49 | if (old_ctrl == new_ctrl) | ||
50 | continue; | ||
51 | |||
52 | reprogram_fixed_counter(pmc, new_ctrl, i); | ||
53 | } | ||
54 | |||
55 | pmu->fixed_ctr_ctrl = data; | ||
56 | } | ||
57 | |||
58 | /* function is called when global control register has been updated. */ | ||
59 | static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data) | ||
60 | { | ||
61 | int bit; | ||
62 | u64 diff = pmu->global_ctrl ^ data; | ||
63 | |||
64 | pmu->global_ctrl = data; | ||
65 | |||
66 | for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) | ||
67 | reprogram_counter(pmu, bit); | ||
68 | } | ||
69 | |||
70 | static unsigned intel_find_arch_event(struct kvm_pmu *pmu, | ||
71 | u8 event_select, | ||
72 | u8 unit_mask) | ||
73 | { | ||
74 | int i; | ||
75 | |||
76 | for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++) | ||
77 | if (intel_arch_events[i].eventsel == event_select | ||
78 | && intel_arch_events[i].unit_mask == unit_mask | ||
79 | && (pmu->available_event_types & (1 << i))) | ||
80 | break; | ||
81 | |||
82 | if (i == ARRAY_SIZE(intel_arch_events)) | ||
83 | return PERF_COUNT_HW_MAX; | ||
84 | |||
85 | return intel_arch_events[i].event_type; | ||
86 | } | ||
87 | |||
88 | static unsigned intel_find_fixed_event(int idx) | ||
89 | { | ||
90 | if (idx >= ARRAY_SIZE(fixed_pmc_events)) | ||
91 | return PERF_COUNT_HW_MAX; | ||
92 | |||
93 | return intel_arch_events[fixed_pmc_events[idx]].event_type; | ||
94 | } | ||
95 | |||
96 | /* check if a PMC is enabled by comparising it with globl_ctrl bits. */ | ||
97 | static bool intel_pmc_is_enabled(struct kvm_pmc *pmc) | ||
98 | { | ||
99 | struct kvm_pmu *pmu = pmc_to_pmu(pmc); | ||
100 | |||
101 | return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl); | ||
102 | } | ||
103 | |||
104 | static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) | ||
105 | { | ||
106 | if (pmc_idx < INTEL_PMC_IDX_FIXED) | ||
107 | return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + pmc_idx, | ||
108 | MSR_P6_EVNTSEL0); | ||
109 | else { | ||
110 | u32 idx = pmc_idx - INTEL_PMC_IDX_FIXED; | ||
111 | |||
112 | return get_fixed_pmc(pmu, idx + MSR_CORE_PERF_FIXED_CTR0); | ||
113 | } | ||
114 | } | ||
115 | |||
116 | /* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */ | ||
117 | static int intel_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx) | ||
118 | { | ||
119 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||
120 | bool fixed = idx & (1u << 30); | ||
121 | |||
122 | idx &= ~(3u << 30); | ||
123 | |||
124 | return (!fixed && idx >= pmu->nr_arch_gp_counters) || | ||
125 | (fixed && idx >= pmu->nr_arch_fixed_counters); | ||
126 | } | ||
127 | |||
128 | static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, | ||
129 | unsigned idx) | ||
130 | { | ||
131 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||
132 | bool fixed = idx & (1u << 30); | ||
133 | struct kvm_pmc *counters; | ||
134 | |||
135 | idx &= ~(3u << 30); | ||
136 | if (!fixed && idx >= pmu->nr_arch_gp_counters) | ||
137 | return NULL; | ||
138 | if (fixed && idx >= pmu->nr_arch_fixed_counters) | ||
139 | return NULL; | ||
140 | counters = fixed ? pmu->fixed_counters : pmu->gp_counters; | ||
141 | |||
142 | return &counters[idx]; | ||
143 | } | ||
144 | |||
145 | static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) | ||
146 | { | ||
147 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||
148 | int ret; | ||
149 | |||
150 | switch (msr) { | ||
151 | case MSR_CORE_PERF_FIXED_CTR_CTRL: | ||
152 | case MSR_CORE_PERF_GLOBAL_STATUS: | ||
153 | case MSR_CORE_PERF_GLOBAL_CTRL: | ||
154 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: | ||
155 | ret = pmu->version > 1; | ||
156 | break; | ||
157 | default: | ||
158 | ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) || | ||
159 | get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) || | ||
160 | get_fixed_pmc(pmu, msr); | ||
161 | break; | ||
162 | } | ||
163 | |||
164 | return ret; | ||
165 | } | ||
166 | |||
167 | static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) | ||
168 | { | ||
169 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||
170 | struct kvm_pmc *pmc; | ||
171 | |||
172 | switch (msr) { | ||
173 | case MSR_CORE_PERF_FIXED_CTR_CTRL: | ||
174 | *data = pmu->fixed_ctr_ctrl; | ||
175 | return 0; | ||
176 | case MSR_CORE_PERF_GLOBAL_STATUS: | ||
177 | *data = pmu->global_status; | ||
178 | return 0; | ||
179 | case MSR_CORE_PERF_GLOBAL_CTRL: | ||
180 | *data = pmu->global_ctrl; | ||
181 | return 0; | ||
182 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: | ||
183 | *data = pmu->global_ovf_ctrl; | ||
184 | return 0; | ||
185 | default: | ||
186 | if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || | ||
187 | (pmc = get_fixed_pmc(pmu, msr))) { | ||
188 | *data = pmc_read_counter(pmc); | ||
189 | return 0; | ||
190 | } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { | ||
191 | *data = pmc->eventsel; | ||
192 | return 0; | ||
193 | } | ||
194 | } | ||
195 | |||
196 | return 1; | ||
197 | } | ||
198 | |||
199 | static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | ||
200 | { | ||
201 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||
202 | struct kvm_pmc *pmc; | ||
203 | u32 msr = msr_info->index; | ||
204 | u64 data = msr_info->data; | ||
205 | |||
206 | switch (msr) { | ||
207 | case MSR_CORE_PERF_FIXED_CTR_CTRL: | ||
208 | if (pmu->fixed_ctr_ctrl == data) | ||
209 | return 0; | ||
210 | if (!(data & 0xfffffffffffff444ull)) { | ||
211 | reprogram_fixed_counters(pmu, data); | ||
212 | return 0; | ||
213 | } | ||
214 | break; | ||
215 | case MSR_CORE_PERF_GLOBAL_STATUS: | ||
216 | if (msr_info->host_initiated) { | ||
217 | pmu->global_status = data; | ||
218 | return 0; | ||
219 | } | ||
220 | break; /* RO MSR */ | ||
221 | case MSR_CORE_PERF_GLOBAL_CTRL: | ||
222 | if (pmu->global_ctrl == data) | ||
223 | return 0; | ||
224 | if (!(data & pmu->global_ctrl_mask)) { | ||
225 | global_ctrl_changed(pmu, data); | ||
226 | return 0; | ||
227 | } | ||
228 | break; | ||
229 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: | ||
230 | if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) { | ||
231 | if (!msr_info->host_initiated) | ||
232 | pmu->global_status &= ~data; | ||
233 | pmu->global_ovf_ctrl = data; | ||
234 | return 0; | ||
235 | } | ||
236 | break; | ||
237 | default: | ||
238 | if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || | ||
239 | (pmc = get_fixed_pmc(pmu, msr))) { | ||
240 | if (!msr_info->host_initiated) | ||
241 | data = (s64)(s32)data; | ||
242 | pmc->counter += data - pmc_read_counter(pmc); | ||
243 | return 0; | ||
244 | } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { | ||
245 | if (data == pmc->eventsel) | ||
246 | return 0; | ||
247 | if (!(data & pmu->reserved_bits)) { | ||
248 | reprogram_gp_counter(pmc, data); | ||
249 | return 0; | ||
250 | } | ||
251 | } | ||
252 | } | ||
253 | |||
254 | return 1; | ||
255 | } | ||
256 | |||
257 | static void intel_pmu_refresh(struct kvm_vcpu *vcpu) | ||
258 | { | ||
259 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||
260 | struct kvm_cpuid_entry2 *entry; | ||
261 | union cpuid10_eax eax; | ||
262 | union cpuid10_edx edx; | ||
263 | |||
264 | pmu->nr_arch_gp_counters = 0; | ||
265 | pmu->nr_arch_fixed_counters = 0; | ||
266 | pmu->counter_bitmask[KVM_PMC_GP] = 0; | ||
267 | pmu->counter_bitmask[KVM_PMC_FIXED] = 0; | ||
268 | pmu->version = 0; | ||
269 | pmu->reserved_bits = 0xffffffff00200000ull; | ||
270 | |||
271 | entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); | ||
272 | if (!entry) | ||
273 | return; | ||
274 | eax.full = entry->eax; | ||
275 | edx.full = entry->edx; | ||
276 | |||
277 | pmu->version = eax.split.version_id; | ||
278 | if (!pmu->version) | ||
279 | return; | ||
280 | |||
281 | pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters, | ||
282 | INTEL_PMC_MAX_GENERIC); | ||
283 | pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1; | ||
284 | pmu->available_event_types = ~entry->ebx & | ||
285 | ((1ull << eax.split.mask_length) - 1); | ||
286 | |||
287 | if (pmu->version == 1) { | ||
288 | pmu->nr_arch_fixed_counters = 0; | ||
289 | } else { | ||
290 | pmu->nr_arch_fixed_counters = | ||
291 | min_t(int, edx.split.num_counters_fixed, | ||
292 | INTEL_PMC_MAX_FIXED); | ||
293 | pmu->counter_bitmask[KVM_PMC_FIXED] = | ||
294 | ((u64)1 << edx.split.bit_width_fixed) - 1; | ||
295 | } | ||
296 | |||
297 | pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | | ||
298 | (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED); | ||
299 | pmu->global_ctrl_mask = ~pmu->global_ctrl; | ||
300 | |||
301 | entry = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
302 | if (entry && | ||
303 | (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) && | ||
304 | (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) | ||
305 | pmu->reserved_bits ^= HSW_IN_TX|HSW_IN_TX_CHECKPOINTED; | ||
306 | } | ||
307 | |||
308 | static void intel_pmu_init(struct kvm_vcpu *vcpu) | ||
309 | { | ||
310 | int i; | ||
311 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||
312 | |||
313 | for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) { | ||
314 | pmu->gp_counters[i].type = KVM_PMC_GP; | ||
315 | pmu->gp_counters[i].vcpu = vcpu; | ||
316 | pmu->gp_counters[i].idx = i; | ||
317 | } | ||
318 | |||
319 | for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) { | ||
320 | pmu->fixed_counters[i].type = KVM_PMC_FIXED; | ||
321 | pmu->fixed_counters[i].vcpu = vcpu; | ||
322 | pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED; | ||
323 | } | ||
324 | } | ||
325 | |||
326 | static void intel_pmu_reset(struct kvm_vcpu *vcpu) | ||
327 | { | ||
328 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||
329 | int i; | ||
330 | |||
331 | for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) { | ||
332 | struct kvm_pmc *pmc = &pmu->gp_counters[i]; | ||
333 | |||
334 | pmc_stop_counter(pmc); | ||
335 | pmc->counter = pmc->eventsel = 0; | ||
336 | } | ||
337 | |||
338 | for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) | ||
339 | pmc_stop_counter(&pmu->fixed_counters[i]); | ||
340 | |||
341 | pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = | ||
342 | pmu->global_ovf_ctrl = 0; | ||
343 | } | ||
344 | |||
345 | struct kvm_pmu_ops intel_pmu_ops = { | ||
346 | .find_arch_event = intel_find_arch_event, | ||
347 | .find_fixed_event = intel_find_fixed_event, | ||
348 | .pmc_is_enabled = intel_pmc_is_enabled, | ||
349 | .pmc_idx_to_pmc = intel_pmc_idx_to_pmc, | ||
350 | .msr_idx_to_pmc = intel_msr_idx_to_pmc, | ||
351 | .is_valid_msr_idx = intel_is_valid_msr_idx, | ||
352 | .is_valid_msr = intel_is_valid_msr, | ||
353 | .get_msr = intel_pmu_get_msr, | ||
354 | .set_msr = intel_pmu_set_msr, | ||
355 | .refresh = intel_pmu_refresh, | ||
356 | .init = intel_pmu_init, | ||
357 | .reset = intel_pmu_reset, | ||
358 | }; | ||
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9afa233b5482..851a9a1c6dfc 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include "kvm_cache_regs.h" | 21 | #include "kvm_cache_regs.h" |
22 | #include "x86.h" | 22 | #include "x86.h" |
23 | #include "cpuid.h" | 23 | #include "cpuid.h" |
24 | #include "pmu.h" | ||
24 | 25 | ||
25 | #include <linux/module.h> | 26 | #include <linux/module.h> |
26 | #include <linux/mod_devicetable.h> | 27 | #include <linux/mod_devicetable.h> |
@@ -511,8 +512,10 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
511 | { | 512 | { |
512 | struct vcpu_svm *svm = to_svm(vcpu); | 513 | struct vcpu_svm *svm = to_svm(vcpu); |
513 | 514 | ||
514 | if (svm->vmcb->control.next_rip != 0) | 515 | if (svm->vmcb->control.next_rip != 0) { |
516 | WARN_ON(!static_cpu_has(X86_FEATURE_NRIPS)); | ||
515 | svm->next_rip = svm->vmcb->control.next_rip; | 517 | svm->next_rip = svm->vmcb->control.next_rip; |
518 | } | ||
516 | 519 | ||
517 | if (!svm->next_rip) { | 520 | if (!svm->next_rip) { |
518 | if (emulate_instruction(vcpu, EMULTYPE_SKIP) != | 521 | if (emulate_instruction(vcpu, EMULTYPE_SKIP) != |
@@ -1082,7 +1085,7 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) | |||
1082 | return target_tsc - tsc; | 1085 | return target_tsc - tsc; |
1083 | } | 1086 | } |
1084 | 1087 | ||
1085 | static void init_vmcb(struct vcpu_svm *svm) | 1088 | static void init_vmcb(struct vcpu_svm *svm, bool init_event) |
1086 | { | 1089 | { |
1087 | struct vmcb_control_area *control = &svm->vmcb->control; | 1090 | struct vmcb_control_area *control = &svm->vmcb->control; |
1088 | struct vmcb_save_area *save = &svm->vmcb->save; | 1091 | struct vmcb_save_area *save = &svm->vmcb->save; |
@@ -1153,17 +1156,17 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
1153 | init_sys_seg(&save->ldtr, SEG_TYPE_LDT); | 1156 | init_sys_seg(&save->ldtr, SEG_TYPE_LDT); |
1154 | init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); | 1157 | init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); |
1155 | 1158 | ||
1156 | svm_set_efer(&svm->vcpu, 0); | 1159 | if (!init_event) |
1160 | svm_set_efer(&svm->vcpu, 0); | ||
1157 | save->dr6 = 0xffff0ff0; | 1161 | save->dr6 = 0xffff0ff0; |
1158 | kvm_set_rflags(&svm->vcpu, 2); | 1162 | kvm_set_rflags(&svm->vcpu, 2); |
1159 | save->rip = 0x0000fff0; | 1163 | save->rip = 0x0000fff0; |
1160 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; | 1164 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; |
1161 | 1165 | ||
1162 | /* | 1166 | /* |
1163 | * This is the guest-visible cr0 value. | ||
1164 | * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. | 1167 | * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. |
1168 | * It also updates the guest-visible cr0 value. | ||
1165 | */ | 1169 | */ |
1166 | svm->vcpu.arch.cr0 = 0; | ||
1167 | (void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); | 1170 | (void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); |
1168 | 1171 | ||
1169 | save->cr4 = X86_CR4_PAE; | 1172 | save->cr4 = X86_CR4_PAE; |
@@ -1176,7 +1179,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
1176 | clr_exception_intercept(svm, PF_VECTOR); | 1179 | clr_exception_intercept(svm, PF_VECTOR); |
1177 | clr_cr_intercept(svm, INTERCEPT_CR3_READ); | 1180 | clr_cr_intercept(svm, INTERCEPT_CR3_READ); |
1178 | clr_cr_intercept(svm, INTERCEPT_CR3_WRITE); | 1181 | clr_cr_intercept(svm, INTERCEPT_CR3_WRITE); |
1179 | save->g_pat = 0x0007040600070406ULL; | 1182 | save->g_pat = svm->vcpu.arch.pat; |
1180 | save->cr3 = 0; | 1183 | save->cr3 = 0; |
1181 | save->cr4 = 0; | 1184 | save->cr4 = 0; |
1182 | } | 1185 | } |
@@ -1195,13 +1198,19 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
1195 | enable_gif(svm); | 1198 | enable_gif(svm); |
1196 | } | 1199 | } |
1197 | 1200 | ||
1198 | static void svm_vcpu_reset(struct kvm_vcpu *vcpu) | 1201 | static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) |
1199 | { | 1202 | { |
1200 | struct vcpu_svm *svm = to_svm(vcpu); | 1203 | struct vcpu_svm *svm = to_svm(vcpu); |
1201 | u32 dummy; | 1204 | u32 dummy; |
1202 | u32 eax = 1; | 1205 | u32 eax = 1; |
1203 | 1206 | ||
1204 | init_vmcb(svm); | 1207 | if (!init_event) { |
1208 | svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | | ||
1209 | MSR_IA32_APICBASE_ENABLE; | ||
1210 | if (kvm_vcpu_is_reset_bsp(&svm->vcpu)) | ||
1211 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; | ||
1212 | } | ||
1213 | init_vmcb(svm, init_event); | ||
1205 | 1214 | ||
1206 | kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); | 1215 | kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); |
1207 | kvm_register_write(vcpu, VCPU_REGS_RDX, eax); | 1216 | kvm_register_write(vcpu, VCPU_REGS_RDX, eax); |
@@ -1257,12 +1266,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
1257 | clear_page(svm->vmcb); | 1266 | clear_page(svm->vmcb); |
1258 | svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; | 1267 | svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; |
1259 | svm->asid_generation = 0; | 1268 | svm->asid_generation = 0; |
1260 | init_vmcb(svm); | 1269 | init_vmcb(svm, false); |
1261 | |||
1262 | svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | | ||
1263 | MSR_IA32_APICBASE_ENABLE; | ||
1264 | if (kvm_vcpu_is_reset_bsp(&svm->vcpu)) | ||
1265 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; | ||
1266 | 1270 | ||
1267 | svm_init_osvw(&svm->vcpu); | 1271 | svm_init_osvw(&svm->vcpu); |
1268 | 1272 | ||
@@ -1575,7 +1579,8 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1575 | * does not do it - this results in some delay at | 1579 | * does not do it - this results in some delay at |
1576 | * reboot | 1580 | * reboot |
1577 | */ | 1581 | */ |
1578 | cr0 &= ~(X86_CR0_CD | X86_CR0_NW); | 1582 | if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_CD_NW_CLEARED)) |
1583 | cr0 &= ~(X86_CR0_CD | X86_CR0_NW); | ||
1579 | svm->vmcb->save.cr0 = cr0; | 1584 | svm->vmcb->save.cr0 = cr0; |
1580 | mark_dirty(svm->vmcb, VMCB_CR); | 1585 | mark_dirty(svm->vmcb, VMCB_CR); |
1581 | update_cr0_intercept(svm); | 1586 | update_cr0_intercept(svm); |
@@ -1883,7 +1888,7 @@ static int shutdown_interception(struct vcpu_svm *svm) | |||
1883 | * so reinitialize it. | 1888 | * so reinitialize it. |
1884 | */ | 1889 | */ |
1885 | clear_page(svm->vmcb); | 1890 | clear_page(svm->vmcb); |
1886 | init_vmcb(svm); | 1891 | init_vmcb(svm, false); |
1887 | 1892 | ||
1888 | kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; | 1893 | kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; |
1889 | return 0; | 1894 | return 0; |
@@ -1953,8 +1958,8 @@ static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) | |||
1953 | u64 pdpte; | 1958 | u64 pdpte; |
1954 | int ret; | 1959 | int ret; |
1955 | 1960 | ||
1956 | ret = kvm_read_guest_page(vcpu->kvm, gpa_to_gfn(cr3), &pdpte, | 1961 | ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte, |
1957 | offset_in_page(cr3) + index * 8, 8); | 1962 | offset_in_page(cr3) + index * 8, 8); |
1958 | if (ret) | 1963 | if (ret) |
1959 | return 0; | 1964 | return 0; |
1960 | return pdpte; | 1965 | return pdpte; |
@@ -2112,7 +2117,7 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page) | |||
2112 | 2117 | ||
2113 | might_sleep(); | 2118 | might_sleep(); |
2114 | 2119 | ||
2115 | page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); | 2120 | page = kvm_vcpu_gfn_to_page(&svm->vcpu, gpa >> PAGE_SHIFT); |
2116 | if (is_error_page(page)) | 2121 | if (is_error_page(page)) |
2117 | goto error; | 2122 | goto error; |
2118 | 2123 | ||
@@ -2151,7 +2156,7 @@ static int nested_svm_intercept_ioio(struct vcpu_svm *svm) | |||
2151 | mask = (0xf >> (4 - size)) << start_bit; | 2156 | mask = (0xf >> (4 - size)) << start_bit; |
2152 | val = 0; | 2157 | val = 0; |
2153 | 2158 | ||
2154 | if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, iopm_len)) | 2159 | if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len)) |
2155 | return NESTED_EXIT_DONE; | 2160 | return NESTED_EXIT_DONE; |
2156 | 2161 | ||
2157 | return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; | 2162 | return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; |
@@ -2176,7 +2181,7 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) | |||
2176 | /* Offset is in 32 bit units but need in 8 bit units */ | 2181 | /* Offset is in 32 bit units but need in 8 bit units */ |
2177 | offset *= 4; | 2182 | offset *= 4; |
2178 | 2183 | ||
2179 | if (kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + offset, &value, 4)) | 2184 | if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.vmcb_msrpm + offset, &value, 4)) |
2180 | return NESTED_EXIT_DONE; | 2185 | return NESTED_EXIT_DONE; |
2181 | 2186 | ||
2182 | return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; | 2187 | return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; |
@@ -2447,7 +2452,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) | |||
2447 | p = msrpm_offsets[i]; | 2452 | p = msrpm_offsets[i]; |
2448 | offset = svm->nested.vmcb_msrpm + (p * 4); | 2453 | offset = svm->nested.vmcb_msrpm + (p * 4); |
2449 | 2454 | ||
2450 | if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4)) | 2455 | if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4)) |
2451 | return false; | 2456 | return false; |
2452 | 2457 | ||
2453 | svm->nested.msrpm[p] = svm->msrpm[p] | value; | 2458 | svm->nested.msrpm[p] = svm->msrpm[p] | value; |
@@ -3067,42 +3072,42 @@ static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) | |||
3067 | svm_scale_tsc(vcpu, host_tsc); | 3072 | svm_scale_tsc(vcpu, host_tsc); |
3068 | } | 3073 | } |
3069 | 3074 | ||
3070 | static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | 3075 | static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
3071 | { | 3076 | { |
3072 | struct vcpu_svm *svm = to_svm(vcpu); | 3077 | struct vcpu_svm *svm = to_svm(vcpu); |
3073 | 3078 | ||
3074 | switch (ecx) { | 3079 | switch (msr_info->index) { |
3075 | case MSR_IA32_TSC: { | 3080 | case MSR_IA32_TSC: { |
3076 | *data = svm->vmcb->control.tsc_offset + | 3081 | msr_info->data = svm->vmcb->control.tsc_offset + |
3077 | svm_scale_tsc(vcpu, native_read_tsc()); | 3082 | svm_scale_tsc(vcpu, native_read_tsc()); |
3078 | 3083 | ||
3079 | break; | 3084 | break; |
3080 | } | 3085 | } |
3081 | case MSR_STAR: | 3086 | case MSR_STAR: |
3082 | *data = svm->vmcb->save.star; | 3087 | msr_info->data = svm->vmcb->save.star; |
3083 | break; | 3088 | break; |
3084 | #ifdef CONFIG_X86_64 | 3089 | #ifdef CONFIG_X86_64 |
3085 | case MSR_LSTAR: | 3090 | case MSR_LSTAR: |
3086 | *data = svm->vmcb->save.lstar; | 3091 | msr_info->data = svm->vmcb->save.lstar; |
3087 | break; | 3092 | break; |
3088 | case MSR_CSTAR: | 3093 | case MSR_CSTAR: |
3089 | *data = svm->vmcb->save.cstar; | 3094 | msr_info->data = svm->vmcb->save.cstar; |
3090 | break; | 3095 | break; |
3091 | case MSR_KERNEL_GS_BASE: | 3096 | case MSR_KERNEL_GS_BASE: |
3092 | *data = svm->vmcb->save.kernel_gs_base; | 3097 | msr_info->data = svm->vmcb->save.kernel_gs_base; |
3093 | break; | 3098 | break; |
3094 | case MSR_SYSCALL_MASK: | 3099 | case MSR_SYSCALL_MASK: |
3095 | *data = svm->vmcb->save.sfmask; | 3100 | msr_info->data = svm->vmcb->save.sfmask; |
3096 | break; | 3101 | break; |
3097 | #endif | 3102 | #endif |
3098 | case MSR_IA32_SYSENTER_CS: | 3103 | case MSR_IA32_SYSENTER_CS: |
3099 | *data = svm->vmcb->save.sysenter_cs; | 3104 | msr_info->data = svm->vmcb->save.sysenter_cs; |
3100 | break; | 3105 | break; |
3101 | case MSR_IA32_SYSENTER_EIP: | 3106 | case MSR_IA32_SYSENTER_EIP: |
3102 | *data = svm->sysenter_eip; | 3107 | msr_info->data = svm->sysenter_eip; |
3103 | break; | 3108 | break; |
3104 | case MSR_IA32_SYSENTER_ESP: | 3109 | case MSR_IA32_SYSENTER_ESP: |
3105 | *data = svm->sysenter_esp; | 3110 | msr_info->data = svm->sysenter_esp; |
3106 | break; | 3111 | break; |
3107 | /* | 3112 | /* |
3108 | * Nobody will change the following 5 values in the VMCB so we can | 3113 | * Nobody will change the following 5 values in the VMCB so we can |
@@ -3110,31 +3115,31 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
3110 | * implemented. | 3115 | * implemented. |
3111 | */ | 3116 | */ |
3112 | case MSR_IA32_DEBUGCTLMSR: | 3117 | case MSR_IA32_DEBUGCTLMSR: |
3113 | *data = svm->vmcb->save.dbgctl; | 3118 | msr_info->data = svm->vmcb->save.dbgctl; |
3114 | break; | 3119 | break; |
3115 | case MSR_IA32_LASTBRANCHFROMIP: | 3120 | case MSR_IA32_LASTBRANCHFROMIP: |
3116 | *data = svm->vmcb->save.br_from; | 3121 | msr_info->data = svm->vmcb->save.br_from; |
3117 | break; | 3122 | break; |
3118 | case MSR_IA32_LASTBRANCHTOIP: | 3123 | case MSR_IA32_LASTBRANCHTOIP: |
3119 | *data = svm->vmcb->save.br_to; | 3124 | msr_info->data = svm->vmcb->save.br_to; |
3120 | break; | 3125 | break; |
3121 | case MSR_IA32_LASTINTFROMIP: | 3126 | case MSR_IA32_LASTINTFROMIP: |
3122 | *data = svm->vmcb->save.last_excp_from; | 3127 | msr_info->data = svm->vmcb->save.last_excp_from; |
3123 | break; | 3128 | break; |
3124 | case MSR_IA32_LASTINTTOIP: | 3129 | case MSR_IA32_LASTINTTOIP: |
3125 | *data = svm->vmcb->save.last_excp_to; | 3130 | msr_info->data = svm->vmcb->save.last_excp_to; |
3126 | break; | 3131 | break; |
3127 | case MSR_VM_HSAVE_PA: | 3132 | case MSR_VM_HSAVE_PA: |
3128 | *data = svm->nested.hsave_msr; | 3133 | msr_info->data = svm->nested.hsave_msr; |
3129 | break; | 3134 | break; |
3130 | case MSR_VM_CR: | 3135 | case MSR_VM_CR: |
3131 | *data = svm->nested.vm_cr_msr; | 3136 | msr_info->data = svm->nested.vm_cr_msr; |
3132 | break; | 3137 | break; |
3133 | case MSR_IA32_UCODE_REV: | 3138 | case MSR_IA32_UCODE_REV: |
3134 | *data = 0x01000065; | 3139 | msr_info->data = 0x01000065; |
3135 | break; | 3140 | break; |
3136 | default: | 3141 | default: |
3137 | return kvm_get_msr_common(vcpu, ecx, data); | 3142 | return kvm_get_msr_common(vcpu, msr_info); |
3138 | } | 3143 | } |
3139 | return 0; | 3144 | return 0; |
3140 | } | 3145 | } |
@@ -3142,16 +3147,20 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
3142 | static int rdmsr_interception(struct vcpu_svm *svm) | 3147 | static int rdmsr_interception(struct vcpu_svm *svm) |
3143 | { | 3148 | { |
3144 | u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); | 3149 | u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); |
3145 | u64 data; | 3150 | struct msr_data msr_info; |
3146 | 3151 | ||
3147 | if (svm_get_msr(&svm->vcpu, ecx, &data)) { | 3152 | msr_info.index = ecx; |
3153 | msr_info.host_initiated = false; | ||
3154 | if (svm_get_msr(&svm->vcpu, &msr_info)) { | ||
3148 | trace_kvm_msr_read_ex(ecx); | 3155 | trace_kvm_msr_read_ex(ecx); |
3149 | kvm_inject_gp(&svm->vcpu, 0); | 3156 | kvm_inject_gp(&svm->vcpu, 0); |
3150 | } else { | 3157 | } else { |
3151 | trace_kvm_msr_read(ecx, data); | 3158 | trace_kvm_msr_read(ecx, msr_info.data); |
3152 | 3159 | ||
3153 | kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, data & 0xffffffff); | 3160 | kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, |
3154 | kvm_register_write(&svm->vcpu, VCPU_REGS_RDX, data >> 32); | 3161 | msr_info.data & 0xffffffff); |
3162 | kvm_register_write(&svm->vcpu, VCPU_REGS_RDX, | ||
3163 | msr_info.data >> 32); | ||
3155 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; | 3164 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; |
3156 | skip_emulated_instruction(&svm->vcpu); | 3165 | skip_emulated_instruction(&svm->vcpu); |
3157 | } | 3166 | } |
@@ -3388,6 +3397,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
3388 | [SVM_EXIT_MWAIT] = mwait_interception, | 3397 | [SVM_EXIT_MWAIT] = mwait_interception, |
3389 | [SVM_EXIT_XSETBV] = xsetbv_interception, | 3398 | [SVM_EXIT_XSETBV] = xsetbv_interception, |
3390 | [SVM_EXIT_NPF] = pf_interception, | 3399 | [SVM_EXIT_NPF] = pf_interception, |
3400 | [SVM_EXIT_RSM] = emulate_on_interception, | ||
3391 | }; | 3401 | }; |
3392 | 3402 | ||
3393 | static void dump_vmcb(struct kvm_vcpu *vcpu) | 3403 | static void dump_vmcb(struct kvm_vcpu *vcpu) |
@@ -4073,6 +4083,11 @@ static bool svm_cpu_has_accelerated_tpr(void) | |||
4073 | return false; | 4083 | return false; |
4074 | } | 4084 | } |
4075 | 4085 | ||
4086 | static bool svm_has_high_real_mode_segbase(void) | ||
4087 | { | ||
4088 | return true; | ||
4089 | } | ||
4090 | |||
4076 | static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | 4091 | static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) |
4077 | { | 4092 | { |
4078 | return 0; | 4093 | return 0; |
@@ -4317,7 +4332,9 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, | |||
4317 | break; | 4332 | break; |
4318 | } | 4333 | } |
4319 | 4334 | ||
4320 | vmcb->control.next_rip = info->next_rip; | 4335 | /* TODO: Advertise NRIPS to guest hypervisor unconditionally */ |
4336 | if (static_cpu_has(X86_FEATURE_NRIPS)) | ||
4337 | vmcb->control.next_rip = info->next_rip; | ||
4321 | vmcb->control.exit_code = icpt_info.exit_code; | 4338 | vmcb->control.exit_code = icpt_info.exit_code; |
4322 | vmexit = nested_svm_exit_handled(svm); | 4339 | vmexit = nested_svm_exit_handled(svm); |
4323 | 4340 | ||
@@ -4346,6 +4363,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4346 | .hardware_enable = svm_hardware_enable, | 4363 | .hardware_enable = svm_hardware_enable, |
4347 | .hardware_disable = svm_hardware_disable, | 4364 | .hardware_disable = svm_hardware_disable, |
4348 | .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr, | 4365 | .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr, |
4366 | .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase, | ||
4349 | 4367 | ||
4350 | .vcpu_create = svm_create_vcpu, | 4368 | .vcpu_create = svm_create_vcpu, |
4351 | .vcpu_free = svm_free_vcpu, | 4369 | .vcpu_free = svm_free_vcpu, |
@@ -4440,6 +4458,8 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4440 | .handle_external_intr = svm_handle_external_intr, | 4458 | .handle_external_intr = svm_handle_external_intr, |
4441 | 4459 | ||
4442 | .sched_in = svm_sched_in, | 4460 | .sched_in = svm_sched_in, |
4461 | |||
4462 | .pmu_ops = &amd_pmu_ops, | ||
4443 | }; | 4463 | }; |
4444 | 4464 | ||
4445 | static int __init svm_init(void) | 4465 | static int __init svm_init(void) |
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 7c7bc8bef21f..4eae7c35ddf5 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -952,6 +952,28 @@ TRACE_EVENT(kvm_wait_lapic_expire, | |||
952 | __entry->delta < 0 ? "early" : "late") | 952 | __entry->delta < 0 ? "early" : "late") |
953 | ); | 953 | ); |
954 | 954 | ||
955 | TRACE_EVENT(kvm_enter_smm, | ||
956 | TP_PROTO(unsigned int vcpu_id, u64 smbase, bool entering), | ||
957 | TP_ARGS(vcpu_id, smbase, entering), | ||
958 | |||
959 | TP_STRUCT__entry( | ||
960 | __field( unsigned int, vcpu_id ) | ||
961 | __field( u64, smbase ) | ||
962 | __field( bool, entering ) | ||
963 | ), | ||
964 | |||
965 | TP_fast_assign( | ||
966 | __entry->vcpu_id = vcpu_id; | ||
967 | __entry->smbase = smbase; | ||
968 | __entry->entering = entering; | ||
969 | ), | ||
970 | |||
971 | TP_printk("vcpu %u: %s SMM, smbase 0x%llx", | ||
972 | __entry->vcpu_id, | ||
973 | __entry->entering ? "entering" : "leaving", | ||
974 | __entry->smbase) | ||
975 | ); | ||
976 | |||
955 | #endif /* _TRACE_KVM_H */ | 977 | #endif /* _TRACE_KVM_H */ |
956 | 978 | ||
957 | #undef TRACE_INCLUDE_PATH | 979 | #undef TRACE_INCLUDE_PATH |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e11dd59398f1..ab53d80b0f64 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include <asm/apic.h> | 47 | #include <asm/apic.h> |
48 | 48 | ||
49 | #include "trace.h" | 49 | #include "trace.h" |
50 | #include "pmu.h" | ||
50 | 51 | ||
51 | #define __ex(x) __kvm_handle_fault_on_reboot(x) | 52 | #define __ex(x) __kvm_handle_fault_on_reboot(x) |
52 | #define __ex_clear(x, reg) \ | 53 | #define __ex_clear(x, reg) \ |
@@ -785,7 +786,7 @@ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) | |||
785 | 786 | ||
786 | static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr) | 787 | static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr) |
787 | { | 788 | { |
788 | struct page *page = gfn_to_page(vcpu->kvm, addr >> PAGE_SHIFT); | 789 | struct page *page = kvm_vcpu_gfn_to_page(vcpu, addr >> PAGE_SHIFT); |
789 | if (is_error_page(page)) | 790 | if (is_error_page(page)) |
790 | return NULL; | 791 | return NULL; |
791 | 792 | ||
@@ -2169,8 +2170,7 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) | |||
2169 | 2170 | ||
2170 | if (is_guest_mode(vcpu)) | 2171 | if (is_guest_mode(vcpu)) |
2171 | msr_bitmap = vmx_msr_bitmap_nested; | 2172 | msr_bitmap = vmx_msr_bitmap_nested; |
2172 | else if (irqchip_in_kernel(vcpu->kvm) && | 2173 | else if (vcpu->arch.apic_base & X2APIC_ENABLE) { |
2173 | apic_x2apic_mode(vcpu->arch.apic)) { | ||
2174 | if (is_long_mode(vcpu)) | 2174 | if (is_long_mode(vcpu)) |
2175 | msr_bitmap = vmx_msr_bitmap_longmode_x2apic; | 2175 | msr_bitmap = vmx_msr_bitmap_longmode_x2apic; |
2176 | else | 2176 | else |
@@ -2622,76 +2622,69 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2622 | * Returns 0 on success, non-0 otherwise. | 2622 | * Returns 0 on success, non-0 otherwise. |
2623 | * Assumes vcpu_load() was already called. | 2623 | * Assumes vcpu_load() was already called. |
2624 | */ | 2624 | */ |
2625 | static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | 2625 | static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
2626 | { | 2626 | { |
2627 | u64 data; | ||
2628 | struct shared_msr_entry *msr; | 2627 | struct shared_msr_entry *msr; |
2629 | 2628 | ||
2630 | if (!pdata) { | 2629 | switch (msr_info->index) { |
2631 | printk(KERN_ERR "BUG: get_msr called with NULL pdata\n"); | ||
2632 | return -EINVAL; | ||
2633 | } | ||
2634 | |||
2635 | switch (msr_index) { | ||
2636 | #ifdef CONFIG_X86_64 | 2630 | #ifdef CONFIG_X86_64 |
2637 | case MSR_FS_BASE: | 2631 | case MSR_FS_BASE: |
2638 | data = vmcs_readl(GUEST_FS_BASE); | 2632 | msr_info->data = vmcs_readl(GUEST_FS_BASE); |
2639 | break; | 2633 | break; |
2640 | case MSR_GS_BASE: | 2634 | case MSR_GS_BASE: |
2641 | data = vmcs_readl(GUEST_GS_BASE); | 2635 | msr_info->data = vmcs_readl(GUEST_GS_BASE); |
2642 | break; | 2636 | break; |
2643 | case MSR_KERNEL_GS_BASE: | 2637 | case MSR_KERNEL_GS_BASE: |
2644 | vmx_load_host_state(to_vmx(vcpu)); | 2638 | vmx_load_host_state(to_vmx(vcpu)); |
2645 | data = to_vmx(vcpu)->msr_guest_kernel_gs_base; | 2639 | msr_info->data = to_vmx(vcpu)->msr_guest_kernel_gs_base; |
2646 | break; | 2640 | break; |
2647 | #endif | 2641 | #endif |
2648 | case MSR_EFER: | 2642 | case MSR_EFER: |
2649 | return kvm_get_msr_common(vcpu, msr_index, pdata); | 2643 | return kvm_get_msr_common(vcpu, msr_info); |
2650 | case MSR_IA32_TSC: | 2644 | case MSR_IA32_TSC: |
2651 | data = guest_read_tsc(); | 2645 | msr_info->data = guest_read_tsc(); |
2652 | break; | 2646 | break; |
2653 | case MSR_IA32_SYSENTER_CS: | 2647 | case MSR_IA32_SYSENTER_CS: |
2654 | data = vmcs_read32(GUEST_SYSENTER_CS); | 2648 | msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); |
2655 | break; | 2649 | break; |
2656 | case MSR_IA32_SYSENTER_EIP: | 2650 | case MSR_IA32_SYSENTER_EIP: |
2657 | data = vmcs_readl(GUEST_SYSENTER_EIP); | 2651 | msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP); |
2658 | break; | 2652 | break; |
2659 | case MSR_IA32_SYSENTER_ESP: | 2653 | case MSR_IA32_SYSENTER_ESP: |
2660 | data = vmcs_readl(GUEST_SYSENTER_ESP); | 2654 | msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); |
2661 | break; | 2655 | break; |
2662 | case MSR_IA32_BNDCFGS: | 2656 | case MSR_IA32_BNDCFGS: |
2663 | if (!vmx_mpx_supported()) | 2657 | if (!vmx_mpx_supported()) |
2664 | return 1; | 2658 | return 1; |
2665 | data = vmcs_read64(GUEST_BNDCFGS); | 2659 | msr_info->data = vmcs_read64(GUEST_BNDCFGS); |
2666 | break; | 2660 | break; |
2667 | case MSR_IA32_FEATURE_CONTROL: | 2661 | case MSR_IA32_FEATURE_CONTROL: |
2668 | if (!nested_vmx_allowed(vcpu)) | 2662 | if (!nested_vmx_allowed(vcpu)) |
2669 | return 1; | 2663 | return 1; |
2670 | data = to_vmx(vcpu)->nested.msr_ia32_feature_control; | 2664 | msr_info->data = to_vmx(vcpu)->nested.msr_ia32_feature_control; |
2671 | break; | 2665 | break; |
2672 | case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: | 2666 | case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: |
2673 | if (!nested_vmx_allowed(vcpu)) | 2667 | if (!nested_vmx_allowed(vcpu)) |
2674 | return 1; | 2668 | return 1; |
2675 | return vmx_get_vmx_msr(vcpu, msr_index, pdata); | 2669 | return vmx_get_vmx_msr(vcpu, msr_info->index, &msr_info->data); |
2676 | case MSR_IA32_XSS: | 2670 | case MSR_IA32_XSS: |
2677 | if (!vmx_xsaves_supported()) | 2671 | if (!vmx_xsaves_supported()) |
2678 | return 1; | 2672 | return 1; |
2679 | data = vcpu->arch.ia32_xss; | 2673 | msr_info->data = vcpu->arch.ia32_xss; |
2680 | break; | 2674 | break; |
2681 | case MSR_TSC_AUX: | 2675 | case MSR_TSC_AUX: |
2682 | if (!to_vmx(vcpu)->rdtscp_enabled) | 2676 | if (!to_vmx(vcpu)->rdtscp_enabled) |
2683 | return 1; | 2677 | return 1; |
2684 | /* Otherwise falls through */ | 2678 | /* Otherwise falls through */ |
2685 | default: | 2679 | default: |
2686 | msr = find_msr_entry(to_vmx(vcpu), msr_index); | 2680 | msr = find_msr_entry(to_vmx(vcpu), msr_info->index); |
2687 | if (msr) { | 2681 | if (msr) { |
2688 | data = msr->data; | 2682 | msr_info->data = msr->data; |
2689 | break; | 2683 | break; |
2690 | } | 2684 | } |
2691 | return kvm_get_msr_common(vcpu, msr_index, pdata); | 2685 | return kvm_get_msr_common(vcpu, msr_info); |
2692 | } | 2686 | } |
2693 | 2687 | ||
2694 | *pdata = data; | ||
2695 | return 0; | 2688 | return 0; |
2696 | } | 2689 | } |
2697 | 2690 | ||
@@ -4122,7 +4115,7 @@ static int alloc_apic_access_page(struct kvm *kvm) | |||
4122 | kvm_userspace_mem.flags = 0; | 4115 | kvm_userspace_mem.flags = 0; |
4123 | kvm_userspace_mem.guest_phys_addr = APIC_DEFAULT_PHYS_BASE; | 4116 | kvm_userspace_mem.guest_phys_addr = APIC_DEFAULT_PHYS_BASE; |
4124 | kvm_userspace_mem.memory_size = PAGE_SIZE; | 4117 | kvm_userspace_mem.memory_size = PAGE_SIZE; |
4125 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); | 4118 | r = __x86_set_memory_region(kvm, &kvm_userspace_mem); |
4126 | if (r) | 4119 | if (r) |
4127 | goto out; | 4120 | goto out; |
4128 | 4121 | ||
@@ -4157,7 +4150,7 @@ static int alloc_identity_pagetable(struct kvm *kvm) | |||
4157 | kvm_userspace_mem.guest_phys_addr = | 4150 | kvm_userspace_mem.guest_phys_addr = |
4158 | kvm->arch.ept_identity_map_addr; | 4151 | kvm->arch.ept_identity_map_addr; |
4159 | kvm_userspace_mem.memory_size = PAGE_SIZE; | 4152 | kvm_userspace_mem.memory_size = PAGE_SIZE; |
4160 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); | 4153 | r = __x86_set_memory_region(kvm, &kvm_userspace_mem); |
4161 | 4154 | ||
4162 | return r; | 4155 | return r; |
4163 | } | 4156 | } |
@@ -4666,16 +4659,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
4666 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); | 4659 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); |
4667 | vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest)); | 4660 | vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest)); |
4668 | 4661 | ||
4669 | if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { | 4662 | if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) |
4670 | u32 msr_low, msr_high; | 4663 | vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); |
4671 | u64 host_pat; | ||
4672 | rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high); | ||
4673 | host_pat = msr_low | ((u64) msr_high << 32); | ||
4674 | /* Write the default value follow host pat */ | ||
4675 | vmcs_write64(GUEST_IA32_PAT, host_pat); | ||
4676 | /* Keep arch.pat sync with GUEST_IA32_PAT */ | ||
4677 | vmx->vcpu.arch.pat = host_pat; | ||
4678 | } | ||
4679 | 4664 | ||
4680 | for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) { | 4665 | for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) { |
4681 | u32 index = vmx_msr_index[i]; | 4666 | u32 index = vmx_msr_index[i]; |
@@ -4707,22 +4692,27 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
4707 | return 0; | 4692 | return 0; |
4708 | } | 4693 | } |
4709 | 4694 | ||
4710 | static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) | 4695 | static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) |
4711 | { | 4696 | { |
4712 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 4697 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
4713 | struct msr_data apic_base_msr; | 4698 | struct msr_data apic_base_msr; |
4699 | u64 cr0; | ||
4714 | 4700 | ||
4715 | vmx->rmode.vm86_active = 0; | 4701 | vmx->rmode.vm86_active = 0; |
4716 | 4702 | ||
4717 | vmx->soft_vnmi_blocked = 0; | 4703 | vmx->soft_vnmi_blocked = 0; |
4718 | 4704 | ||
4719 | vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); | 4705 | vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); |
4720 | kvm_set_cr8(&vmx->vcpu, 0); | 4706 | kvm_set_cr8(vcpu, 0); |
4721 | apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; | 4707 | |
4722 | if (kvm_vcpu_is_reset_bsp(&vmx->vcpu)) | 4708 | if (!init_event) { |
4723 | apic_base_msr.data |= MSR_IA32_APICBASE_BSP; | 4709 | apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | |
4724 | apic_base_msr.host_initiated = true; | 4710 | MSR_IA32_APICBASE_ENABLE; |
4725 | kvm_set_apic_base(&vmx->vcpu, &apic_base_msr); | 4711 | if (kvm_vcpu_is_reset_bsp(vcpu)) |
4712 | apic_base_msr.data |= MSR_IA32_APICBASE_BSP; | ||
4713 | apic_base_msr.host_initiated = true; | ||
4714 | kvm_set_apic_base(vcpu, &apic_base_msr); | ||
4715 | } | ||
4726 | 4716 | ||
4727 | vmx_segment_cache_clear(vmx); | 4717 | vmx_segment_cache_clear(vmx); |
4728 | 4718 | ||
@@ -4746,9 +4736,12 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
4746 | vmcs_write32(GUEST_LDTR_LIMIT, 0xffff); | 4736 | vmcs_write32(GUEST_LDTR_LIMIT, 0xffff); |
4747 | vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082); | 4737 | vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082); |
4748 | 4738 | ||
4749 | vmcs_write32(GUEST_SYSENTER_CS, 0); | 4739 | if (!init_event) { |
4750 | vmcs_writel(GUEST_SYSENTER_ESP, 0); | 4740 | vmcs_write32(GUEST_SYSENTER_CS, 0); |
4751 | vmcs_writel(GUEST_SYSENTER_EIP, 0); | 4741 | vmcs_writel(GUEST_SYSENTER_ESP, 0); |
4742 | vmcs_writel(GUEST_SYSENTER_EIP, 0); | ||
4743 | vmcs_write64(GUEST_IA32_DEBUGCTL, 0); | ||
4744 | } | ||
4752 | 4745 | ||
4753 | vmcs_writel(GUEST_RFLAGS, 0x02); | 4746 | vmcs_writel(GUEST_RFLAGS, 0x02); |
4754 | kvm_rip_write(vcpu, 0xfff0); | 4747 | kvm_rip_write(vcpu, 0xfff0); |
@@ -4763,18 +4756,15 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
4763 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); | 4756 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); |
4764 | vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); | 4757 | vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); |
4765 | 4758 | ||
4766 | /* Special registers */ | ||
4767 | vmcs_write64(GUEST_IA32_DEBUGCTL, 0); | ||
4768 | |||
4769 | setup_msrs(vmx); | 4759 | setup_msrs(vmx); |
4770 | 4760 | ||
4771 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */ | 4761 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */ |
4772 | 4762 | ||
4773 | if (cpu_has_vmx_tpr_shadow()) { | 4763 | if (cpu_has_vmx_tpr_shadow() && !init_event) { |
4774 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); | 4764 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); |
4775 | if (vm_need_tpr_shadow(vmx->vcpu.kvm)) | 4765 | if (vm_need_tpr_shadow(vcpu->kvm)) |
4776 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, | 4766 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, |
4777 | __pa(vmx->vcpu.arch.apic->regs)); | 4767 | __pa(vcpu->arch.apic->regs)); |
4778 | vmcs_write32(TPR_THRESHOLD, 0); | 4768 | vmcs_write32(TPR_THRESHOLD, 0); |
4779 | } | 4769 | } |
4780 | 4770 | ||
@@ -4786,12 +4776,14 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
4786 | if (vmx->vpid != 0) | 4776 | if (vmx->vpid != 0) |
4787 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); | 4777 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); |
4788 | 4778 | ||
4789 | vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; | 4779 | cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; |
4790 | vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */ | 4780 | vmx_set_cr0(vcpu, cr0); /* enter rmode */ |
4791 | vmx_set_cr4(&vmx->vcpu, 0); | 4781 | vmx->vcpu.arch.cr0 = cr0; |
4792 | vmx_set_efer(&vmx->vcpu, 0); | 4782 | vmx_set_cr4(vcpu, 0); |
4793 | vmx_fpu_activate(&vmx->vcpu); | 4783 | if (!init_event) |
4794 | update_exception_bitmap(&vmx->vcpu); | 4784 | vmx_set_efer(vcpu, 0); |
4785 | vmx_fpu_activate(vcpu); | ||
4786 | update_exception_bitmap(vcpu); | ||
4795 | 4787 | ||
4796 | vpid_sync_context(vmx); | 4788 | vpid_sync_context(vmx); |
4797 | } | 4789 | } |
@@ -4964,7 +4956,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) | |||
4964 | .flags = 0, | 4956 | .flags = 0, |
4965 | }; | 4957 | }; |
4966 | 4958 | ||
4967 | ret = kvm_set_memory_region(kvm, &tss_mem); | 4959 | ret = x86_set_memory_region(kvm, &tss_mem); |
4968 | if (ret) | 4960 | if (ret) |
4969 | return ret; | 4961 | return ret; |
4970 | kvm->arch.tss_addr = addr; | 4962 | kvm->arch.tss_addr = addr; |
@@ -5474,19 +5466,21 @@ static int handle_cpuid(struct kvm_vcpu *vcpu) | |||
5474 | static int handle_rdmsr(struct kvm_vcpu *vcpu) | 5466 | static int handle_rdmsr(struct kvm_vcpu *vcpu) |
5475 | { | 5467 | { |
5476 | u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; | 5468 | u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; |
5477 | u64 data; | 5469 | struct msr_data msr_info; |
5478 | 5470 | ||
5479 | if (vmx_get_msr(vcpu, ecx, &data)) { | 5471 | msr_info.index = ecx; |
5472 | msr_info.host_initiated = false; | ||
5473 | if (vmx_get_msr(vcpu, &msr_info)) { | ||
5480 | trace_kvm_msr_read_ex(ecx); | 5474 | trace_kvm_msr_read_ex(ecx); |
5481 | kvm_inject_gp(vcpu, 0); | 5475 | kvm_inject_gp(vcpu, 0); |
5482 | return 1; | 5476 | return 1; |
5483 | } | 5477 | } |
5484 | 5478 | ||
5485 | trace_kvm_msr_read(ecx, data); | 5479 | trace_kvm_msr_read(ecx, msr_info.data); |
5486 | 5480 | ||
5487 | /* FIXME: handling of bits 32:63 of rax, rdx */ | 5481 | /* FIXME: handling of bits 32:63 of rax, rdx */ |
5488 | vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u; | 5482 | vcpu->arch.regs[VCPU_REGS_RAX] = msr_info.data & -1u; |
5489 | vcpu->arch.regs[VCPU_REGS_RDX] = (data >> 32) & -1u; | 5483 | vcpu->arch.regs[VCPU_REGS_RDX] = (msr_info.data >> 32) & -1u; |
5490 | skip_emulated_instruction(vcpu); | 5484 | skip_emulated_instruction(vcpu); |
5491 | return 1; | 5485 | return 1; |
5492 | } | 5486 | } |
@@ -5709,9 +5703,6 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
5709 | return 0; | 5703 | return 0; |
5710 | } | 5704 | } |
5711 | 5705 | ||
5712 | /* clear all local breakpoint enable flags */ | ||
5713 | vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x155); | ||
5714 | |||
5715 | /* | 5706 | /* |
5716 | * TODO: What about debug traps on tss switch? | 5707 | * TODO: What about debug traps on tss switch? |
5717 | * Are we supposed to inject them and update dr6? | 5708 | * Are we supposed to inject them and update dr6? |
@@ -7332,7 +7323,7 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, | |||
7332 | bitmap += (port & 0x7fff) / 8; | 7323 | bitmap += (port & 0x7fff) / 8; |
7333 | 7324 | ||
7334 | if (last_bitmap != bitmap) | 7325 | if (last_bitmap != bitmap) |
7335 | if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1)) | 7326 | if (kvm_vcpu_read_guest(vcpu, bitmap, &b, 1)) |
7336 | return true; | 7327 | return true; |
7337 | if (b & (1 << (port & 7))) | 7328 | if (b & (1 << (port & 7))) |
7338 | return true; | 7329 | return true; |
@@ -7376,7 +7367,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, | |||
7376 | /* Then read the msr_index'th bit from this bitmap: */ | 7367 | /* Then read the msr_index'th bit from this bitmap: */ |
7377 | if (msr_index < 1024*8) { | 7368 | if (msr_index < 1024*8) { |
7378 | unsigned char b; | 7369 | unsigned char b; |
7379 | if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1)) | 7370 | if (kvm_vcpu_read_guest(vcpu, bitmap + msr_index/8, &b, 1)) |
7380 | return true; | 7371 | return true; |
7381 | return 1 & (b >> (msr_index & 7)); | 7372 | return 1 & (b >> (msr_index & 7)); |
7382 | } else | 7373 | } else |
@@ -7641,9 +7632,9 @@ static void vmx_disable_pml(struct vcpu_vmx *vmx) | |||
7641 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | 7632 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); |
7642 | } | 7633 | } |
7643 | 7634 | ||
7644 | static void vmx_flush_pml_buffer(struct vcpu_vmx *vmx) | 7635 | static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu) |
7645 | { | 7636 | { |
7646 | struct kvm *kvm = vmx->vcpu.kvm; | 7637 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
7647 | u64 *pml_buf; | 7638 | u64 *pml_buf; |
7648 | u16 pml_idx; | 7639 | u16 pml_idx; |
7649 | 7640 | ||
@@ -7665,7 +7656,7 @@ static void vmx_flush_pml_buffer(struct vcpu_vmx *vmx) | |||
7665 | 7656 | ||
7666 | gpa = pml_buf[pml_idx]; | 7657 | gpa = pml_buf[pml_idx]; |
7667 | WARN_ON(gpa & (PAGE_SIZE - 1)); | 7658 | WARN_ON(gpa & (PAGE_SIZE - 1)); |
7668 | mark_page_dirty(kvm, gpa >> PAGE_SHIFT); | 7659 | kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); |
7669 | } | 7660 | } |
7670 | 7661 | ||
7671 | /* reset PML index */ | 7662 | /* reset PML index */ |
@@ -7690,6 +7681,158 @@ static void kvm_flush_pml_buffers(struct kvm *kvm) | |||
7690 | kvm_vcpu_kick(vcpu); | 7681 | kvm_vcpu_kick(vcpu); |
7691 | } | 7682 | } |
7692 | 7683 | ||
7684 | static void vmx_dump_sel(char *name, uint32_t sel) | ||
7685 | { | ||
7686 | pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n", | ||
7687 | name, vmcs_read32(sel), | ||
7688 | vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR), | ||
7689 | vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR), | ||
7690 | vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR)); | ||
7691 | } | ||
7692 | |||
7693 | static void vmx_dump_dtsel(char *name, uint32_t limit) | ||
7694 | { | ||
7695 | pr_err("%s limit=0x%08x, base=0x%016lx\n", | ||
7696 | name, vmcs_read32(limit), | ||
7697 | vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT)); | ||
7698 | } | ||
7699 | |||
7700 | static void dump_vmcs(void) | ||
7701 | { | ||
7702 | u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS); | ||
7703 | u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS); | ||
7704 | u32 cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | ||
7705 | u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); | ||
7706 | u32 secondary_exec_control = 0; | ||
7707 | unsigned long cr4 = vmcs_readl(GUEST_CR4); | ||
7708 | u64 efer = vmcs_readl(GUEST_IA32_EFER); | ||
7709 | int i, n; | ||
7710 | |||
7711 | if (cpu_has_secondary_exec_ctrls()) | ||
7712 | secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
7713 | |||
7714 | pr_err("*** Guest State ***\n"); | ||
7715 | pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", | ||
7716 | vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW), | ||
7717 | vmcs_readl(CR0_GUEST_HOST_MASK)); | ||
7718 | pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", | ||
7719 | cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK)); | ||
7720 | pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3)); | ||
7721 | if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) && | ||
7722 | (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA)) | ||
7723 | { | ||
7724 | pr_err("PDPTR0 = 0x%016lx PDPTR1 = 0x%016lx\n", | ||
7725 | vmcs_readl(GUEST_PDPTR0), vmcs_readl(GUEST_PDPTR1)); | ||
7726 | pr_err("PDPTR2 = 0x%016lx PDPTR3 = 0x%016lx\n", | ||
7727 | vmcs_readl(GUEST_PDPTR2), vmcs_readl(GUEST_PDPTR3)); | ||
7728 | } | ||
7729 | pr_err("RSP = 0x%016lx RIP = 0x%016lx\n", | ||
7730 | vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP)); | ||
7731 | pr_err("RFLAGS=0x%08lx DR7 = 0x%016lx\n", | ||
7732 | vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7)); | ||
7733 | pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n", | ||
7734 | vmcs_readl(GUEST_SYSENTER_ESP), | ||
7735 | vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP)); | ||
7736 | vmx_dump_sel("CS: ", GUEST_CS_SELECTOR); | ||
7737 | vmx_dump_sel("DS: ", GUEST_DS_SELECTOR); | ||
7738 | vmx_dump_sel("SS: ", GUEST_SS_SELECTOR); | ||
7739 | vmx_dump_sel("ES: ", GUEST_ES_SELECTOR); | ||
7740 | vmx_dump_sel("FS: ", GUEST_FS_SELECTOR); | ||
7741 | vmx_dump_sel("GS: ", GUEST_GS_SELECTOR); | ||
7742 | vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT); | ||
7743 | vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR); | ||
7744 | vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT); | ||
7745 | vmx_dump_sel("TR: ", GUEST_TR_SELECTOR); | ||
7746 | if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) || | ||
7747 | (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER))) | ||
7748 | pr_err("EFER = 0x%016llx PAT = 0x%016lx\n", | ||
7749 | efer, vmcs_readl(GUEST_IA32_PAT)); | ||
7750 | pr_err("DebugCtl = 0x%016lx DebugExceptions = 0x%016lx\n", | ||
7751 | vmcs_readl(GUEST_IA32_DEBUGCTL), | ||
7752 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS)); | ||
7753 | if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) | ||
7754 | pr_err("PerfGlobCtl = 0x%016lx\n", | ||
7755 | vmcs_readl(GUEST_IA32_PERF_GLOBAL_CTRL)); | ||
7756 | if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) | ||
7757 | pr_err("BndCfgS = 0x%016lx\n", vmcs_readl(GUEST_BNDCFGS)); | ||
7758 | pr_err("Interruptibility = %08x ActivityState = %08x\n", | ||
7759 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO), | ||
7760 | vmcs_read32(GUEST_ACTIVITY_STATE)); | ||
7761 | if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) | ||
7762 | pr_err("InterruptStatus = %04x\n", | ||
7763 | vmcs_read16(GUEST_INTR_STATUS)); | ||
7764 | |||
7765 | pr_err("*** Host State ***\n"); | ||
7766 | pr_err("RIP = 0x%016lx RSP = 0x%016lx\n", | ||
7767 | vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP)); | ||
7768 | pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n", | ||
7769 | vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR), | ||
7770 | vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR), | ||
7771 | vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR), | ||
7772 | vmcs_read16(HOST_TR_SELECTOR)); | ||
7773 | pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n", | ||
7774 | vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE), | ||
7775 | vmcs_readl(HOST_TR_BASE)); | ||
7776 | pr_err("GDTBase=%016lx IDTBase=%016lx\n", | ||
7777 | vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE)); | ||
7778 | pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n", | ||
7779 | vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3), | ||
7780 | vmcs_readl(HOST_CR4)); | ||
7781 | pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n", | ||
7782 | vmcs_readl(HOST_IA32_SYSENTER_ESP), | ||
7783 | vmcs_read32(HOST_IA32_SYSENTER_CS), | ||
7784 | vmcs_readl(HOST_IA32_SYSENTER_EIP)); | ||
7785 | if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER)) | ||
7786 | pr_err("EFER = 0x%016lx PAT = 0x%016lx\n", | ||
7787 | vmcs_readl(HOST_IA32_EFER), vmcs_readl(HOST_IA32_PAT)); | ||
7788 | if (vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) | ||
7789 | pr_err("PerfGlobCtl = 0x%016lx\n", | ||
7790 | vmcs_readl(HOST_IA32_PERF_GLOBAL_CTRL)); | ||
7791 | |||
7792 | pr_err("*** Control State ***\n"); | ||
7793 | pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n", | ||
7794 | pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control); | ||
7795 | pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl); | ||
7796 | pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n", | ||
7797 | vmcs_read32(EXCEPTION_BITMAP), | ||
7798 | vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK), | ||
7799 | vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH)); | ||
7800 | pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n", | ||
7801 | vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), | ||
7802 | vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE), | ||
7803 | vmcs_read32(VM_ENTRY_INSTRUCTION_LEN)); | ||
7804 | pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n", | ||
7805 | vmcs_read32(VM_EXIT_INTR_INFO), | ||
7806 | vmcs_read32(VM_EXIT_INTR_ERROR_CODE), | ||
7807 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); | ||
7808 | pr_err(" reason=%08x qualification=%016lx\n", | ||
7809 | vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION)); | ||
7810 | pr_err("IDTVectoring: info=%08x errcode=%08x\n", | ||
7811 | vmcs_read32(IDT_VECTORING_INFO_FIELD), | ||
7812 | vmcs_read32(IDT_VECTORING_ERROR_CODE)); | ||
7813 | pr_err("TSC Offset = 0x%016lx\n", vmcs_readl(TSC_OFFSET)); | ||
7814 | if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) | ||
7815 | pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD)); | ||
7816 | if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR) | ||
7817 | pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV)); | ||
7818 | if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT)) | ||
7819 | pr_err("EPT pointer = 0x%016lx\n", vmcs_readl(EPT_POINTER)); | ||
7820 | n = vmcs_read32(CR3_TARGET_COUNT); | ||
7821 | for (i = 0; i + 1 < n; i += 4) | ||
7822 | pr_err("CR3 target%u=%016lx target%u=%016lx\n", | ||
7823 | i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2), | ||
7824 | i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2)); | ||
7825 | if (i < n) | ||
7826 | pr_err("CR3 target%u=%016lx\n", | ||
7827 | i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2)); | ||
7828 | if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) | ||
7829 | pr_err("PLE Gap=%08x Window=%08x\n", | ||
7830 | vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW)); | ||
7831 | if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID) | ||
7832 | pr_err("Virtual processor ID = 0x%04x\n", | ||
7833 | vmcs_read16(VIRTUAL_PROCESSOR_ID)); | ||
7834 | } | ||
7835 | |||
7693 | /* | 7836 | /* |
7694 | * The guest has exited. See if we can fix it or if we need userspace | 7837 | * The guest has exited. See if we can fix it or if we need userspace |
7695 | * assistance. | 7838 | * assistance. |
@@ -7708,7 +7851,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
7708 | * flushed already. | 7851 | * flushed already. |
7709 | */ | 7852 | */ |
7710 | if (enable_pml) | 7853 | if (enable_pml) |
7711 | vmx_flush_pml_buffer(vmx); | 7854 | vmx_flush_pml_buffer(vcpu); |
7712 | 7855 | ||
7713 | /* If guest state is invalid, start emulating */ | 7856 | /* If guest state is invalid, start emulating */ |
7714 | if (vmx->emulation_required) | 7857 | if (vmx->emulation_required) |
@@ -7722,6 +7865,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
7722 | } | 7865 | } |
7723 | 7866 | ||
7724 | if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { | 7867 | if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { |
7868 | dump_vmcs(); | ||
7725 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 7869 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
7726 | vcpu->run->fail_entry.hardware_entry_failure_reason | 7870 | vcpu->run->fail_entry.hardware_entry_failure_reason |
7727 | = exit_reason; | 7871 | = exit_reason; |
@@ -7995,6 +8139,11 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) | |||
7995 | local_irq_enable(); | 8139 | local_irq_enable(); |
7996 | } | 8140 | } |
7997 | 8141 | ||
8142 | static bool vmx_has_high_real_mode_segbase(void) | ||
8143 | { | ||
8144 | return enable_unrestricted_guest || emulate_invalid_guest_state; | ||
8145 | } | ||
8146 | |||
7998 | static bool vmx_mpx_supported(void) | 8147 | static bool vmx_mpx_supported(void) |
7999 | { | 8148 | { |
8000 | return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) && | 8149 | return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) && |
@@ -8479,7 +8628,8 @@ static int get_ept_level(void) | |||
8479 | 8628 | ||
8480 | static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | 8629 | static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) |
8481 | { | 8630 | { |
8482 | u64 ret; | 8631 | u8 cache; |
8632 | u64 ipat = 0; | ||
8483 | 8633 | ||
8484 | /* For VT-d and EPT combination | 8634 | /* For VT-d and EPT combination |
8485 | * 1. MMIO: always map as UC | 8635 | * 1. MMIO: always map as UC |
@@ -8492,16 +8642,27 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | |||
8492 | * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep | 8642 | * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep |
8493 | * consistent with host MTRR | 8643 | * consistent with host MTRR |
8494 | */ | 8644 | */ |
8495 | if (is_mmio) | 8645 | if (is_mmio) { |
8496 | ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT; | 8646 | cache = MTRR_TYPE_UNCACHABLE; |
8497 | else if (kvm_arch_has_noncoherent_dma(vcpu->kvm)) | 8647 | goto exit; |
8498 | ret = kvm_get_guest_memory_type(vcpu, gfn) << | 8648 | } |
8499 | VMX_EPT_MT_EPTE_SHIFT; | ||
8500 | else | ||
8501 | ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | ||
8502 | | VMX_EPT_IPAT_BIT; | ||
8503 | 8649 | ||
8504 | return ret; | 8650 | if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) { |
8651 | ipat = VMX_EPT_IPAT_BIT; | ||
8652 | cache = MTRR_TYPE_WRBACK; | ||
8653 | goto exit; | ||
8654 | } | ||
8655 | |||
8656 | if (kvm_read_cr0(vcpu) & X86_CR0_CD) { | ||
8657 | ipat = VMX_EPT_IPAT_BIT; | ||
8658 | cache = MTRR_TYPE_UNCACHABLE; | ||
8659 | goto exit; | ||
8660 | } | ||
8661 | |||
8662 | cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn); | ||
8663 | |||
8664 | exit: | ||
8665 | return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat; | ||
8505 | } | 8666 | } |
8506 | 8667 | ||
8507 | static int vmx_get_lpage_level(void) | 8668 | static int vmx_get_lpage_level(void) |
@@ -8923,7 +9084,7 @@ static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu, | |||
8923 | struct vmx_msr_entry *e) | 9084 | struct vmx_msr_entry *e) |
8924 | { | 9085 | { |
8925 | /* x2APIC MSR accesses are not allowed */ | 9086 | /* x2APIC MSR accesses are not allowed */ |
8926 | if (apic_x2apic_mode(vcpu->arch.apic) && e->index >> 8 == 0x8) | 9087 | if (vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8) |
8927 | return -EINVAL; | 9088 | return -EINVAL; |
8928 | if (e->index == MSR_IA32_UCODE_WRITE || /* SDM Table 35-2 */ | 9089 | if (e->index == MSR_IA32_UCODE_WRITE || /* SDM Table 35-2 */ |
8929 | e->index == MSR_IA32_UCODE_REV) | 9090 | e->index == MSR_IA32_UCODE_REV) |
@@ -8965,8 +9126,8 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) | |||
8965 | 9126 | ||
8966 | msr.host_initiated = false; | 9127 | msr.host_initiated = false; |
8967 | for (i = 0; i < count; i++) { | 9128 | for (i = 0; i < count; i++) { |
8968 | if (kvm_read_guest(vcpu->kvm, gpa + i * sizeof(e), | 9129 | if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e), |
8969 | &e, sizeof(e))) { | 9130 | &e, sizeof(e))) { |
8970 | pr_warn_ratelimited( | 9131 | pr_warn_ratelimited( |
8971 | "%s cannot read MSR entry (%u, 0x%08llx)\n", | 9132 | "%s cannot read MSR entry (%u, 0x%08llx)\n", |
8972 | __func__, i, gpa + i * sizeof(e)); | 9133 | __func__, i, gpa + i * sizeof(e)); |
@@ -8998,9 +9159,10 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) | |||
8998 | struct vmx_msr_entry e; | 9159 | struct vmx_msr_entry e; |
8999 | 9160 | ||
9000 | for (i = 0; i < count; i++) { | 9161 | for (i = 0; i < count; i++) { |
9001 | if (kvm_read_guest(vcpu->kvm, | 9162 | struct msr_data msr_info; |
9002 | gpa + i * sizeof(e), | 9163 | if (kvm_vcpu_read_guest(vcpu, |
9003 | &e, 2 * sizeof(u32))) { | 9164 | gpa + i * sizeof(e), |
9165 | &e, 2 * sizeof(u32))) { | ||
9004 | pr_warn_ratelimited( | 9166 | pr_warn_ratelimited( |
9005 | "%s cannot read MSR entry (%u, 0x%08llx)\n", | 9167 | "%s cannot read MSR entry (%u, 0x%08llx)\n", |
9006 | __func__, i, gpa + i * sizeof(e)); | 9168 | __func__, i, gpa + i * sizeof(e)); |
@@ -9012,19 +9174,21 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) | |||
9012 | __func__, i, e.index, e.reserved); | 9174 | __func__, i, e.index, e.reserved); |
9013 | return -EINVAL; | 9175 | return -EINVAL; |
9014 | } | 9176 | } |
9015 | if (kvm_get_msr(vcpu, e.index, &e.value)) { | 9177 | msr_info.host_initiated = false; |
9178 | msr_info.index = e.index; | ||
9179 | if (kvm_get_msr(vcpu, &msr_info)) { | ||
9016 | pr_warn_ratelimited( | 9180 | pr_warn_ratelimited( |
9017 | "%s cannot read MSR (%u, 0x%x)\n", | 9181 | "%s cannot read MSR (%u, 0x%x)\n", |
9018 | __func__, i, e.index); | 9182 | __func__, i, e.index); |
9019 | return -EINVAL; | 9183 | return -EINVAL; |
9020 | } | 9184 | } |
9021 | if (kvm_write_guest(vcpu->kvm, | 9185 | if (kvm_vcpu_write_guest(vcpu, |
9022 | gpa + i * sizeof(e) + | 9186 | gpa + i * sizeof(e) + |
9023 | offsetof(struct vmx_msr_entry, value), | 9187 | offsetof(struct vmx_msr_entry, value), |
9024 | &e.value, sizeof(e.value))) { | 9188 | &msr_info.data, sizeof(msr_info.data))) { |
9025 | pr_warn_ratelimited( | 9189 | pr_warn_ratelimited( |
9026 | "%s cannot write MSR (%u, 0x%x, 0x%llx)\n", | 9190 | "%s cannot write MSR (%u, 0x%x, 0x%llx)\n", |
9027 | __func__, i, e.index, e.value); | 9191 | __func__, i, e.index, msr_info.data); |
9028 | return -EINVAL; | 9192 | return -EINVAL; |
9029 | } | 9193 | } |
9030 | } | 9194 | } |
@@ -10149,6 +10313,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
10149 | .hardware_enable = hardware_enable, | 10313 | .hardware_enable = hardware_enable, |
10150 | .hardware_disable = hardware_disable, | 10314 | .hardware_disable = hardware_disable, |
10151 | .cpu_has_accelerated_tpr = report_flexpriority, | 10315 | .cpu_has_accelerated_tpr = report_flexpriority, |
10316 | .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase, | ||
10152 | 10317 | ||
10153 | .vcpu_create = vmx_create_vcpu, | 10318 | .vcpu_create = vmx_create_vcpu, |
10154 | .vcpu_free = vmx_free_vcpu, | 10319 | .vcpu_free = vmx_free_vcpu, |
@@ -10254,6 +10419,8 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
10254 | .slot_disable_log_dirty = vmx_slot_disable_log_dirty, | 10419 | .slot_disable_log_dirty = vmx_slot_disable_log_dirty, |
10255 | .flush_log_dirty = vmx_flush_log_dirty, | 10420 | .flush_log_dirty = vmx_flush_log_dirty, |
10256 | .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked, | 10421 | .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked, |
10422 | |||
10423 | .pmu_ops = &intel_pmu_ops, | ||
10257 | }; | 10424 | }; |
10258 | 10425 | ||
10259 | static int __init vmx_init(void) | 10426 | static int __init vmx_init(void) |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 26eaeb522cab..ac165c2fb8e5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include "x86.h" | 28 | #include "x86.h" |
29 | #include "cpuid.h" | 29 | #include "cpuid.h" |
30 | #include "assigned-dev.h" | 30 | #include "assigned-dev.h" |
31 | #include "pmu.h" | ||
31 | 32 | ||
32 | #include <linux/clocksource.h> | 33 | #include <linux/clocksource.h> |
33 | #include <linux/interrupt.h> | 34 | #include <linux/interrupt.h> |
@@ -57,7 +58,6 @@ | |||
57 | #include <asm/debugreg.h> | 58 | #include <asm/debugreg.h> |
58 | #include <asm/msr.h> | 59 | #include <asm/msr.h> |
59 | #include <asm/desc.h> | 60 | #include <asm/desc.h> |
60 | #include <asm/mtrr.h> | ||
61 | #include <asm/mce.h> | 61 | #include <asm/mce.h> |
62 | #include <linux/kernel_stat.h> | 62 | #include <linux/kernel_stat.h> |
63 | #include <asm/fpu/internal.h> /* Ugh! */ | 63 | #include <asm/fpu/internal.h> /* Ugh! */ |
@@ -98,6 +98,9 @@ module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); | |||
98 | unsigned int min_timer_period_us = 500; | 98 | unsigned int min_timer_period_us = 500; |
99 | module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); | 99 | module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); |
100 | 100 | ||
101 | static bool __read_mostly kvmclock_periodic_sync = true; | ||
102 | module_param(kvmclock_periodic_sync, bool, S_IRUGO); | ||
103 | |||
101 | bool kvm_has_tsc_control; | 104 | bool kvm_has_tsc_control; |
102 | EXPORT_SYMBOL_GPL(kvm_has_tsc_control); | 105 | EXPORT_SYMBOL_GPL(kvm_has_tsc_control); |
103 | u32 kvm_max_guest_tsc_khz; | 106 | u32 kvm_max_guest_tsc_khz; |
@@ -474,7 +477,7 @@ EXPORT_SYMBOL_GPL(kvm_require_dr); | |||
474 | 477 | ||
475 | /* | 478 | /* |
476 | * This function will be used to read from the physical memory of the currently | 479 | * This function will be used to read from the physical memory of the currently |
477 | * running guest. The difference to kvm_read_guest_page is that this function | 480 | * running guest. The difference to kvm_vcpu_read_guest_page is that this function |
478 | * can read from guest physical or from the guest's guest physical memory. | 481 | * can read from guest physical or from the guest's guest physical memory. |
479 | */ | 482 | */ |
480 | int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | 483 | int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, |
@@ -492,7 +495,7 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | |||
492 | 495 | ||
493 | real_gfn = gpa_to_gfn(real_gfn); | 496 | real_gfn = gpa_to_gfn(real_gfn); |
494 | 497 | ||
495 | return kvm_read_guest_page(vcpu->kvm, real_gfn, data, offset, len); | 498 | return kvm_vcpu_read_guest_page(vcpu, real_gfn, data, offset, len); |
496 | } | 499 | } |
497 | EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu); | 500 | EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu); |
498 | 501 | ||
@@ -571,8 +574,7 @@ out: | |||
571 | int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | 574 | int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) |
572 | { | 575 | { |
573 | unsigned long old_cr0 = kvm_read_cr0(vcpu); | 576 | unsigned long old_cr0 = kvm_read_cr0(vcpu); |
574 | unsigned long update_bits = X86_CR0_PG | X86_CR0_WP | | 577 | unsigned long update_bits = X86_CR0_PG | X86_CR0_WP; |
575 | X86_CR0_CD | X86_CR0_NW; | ||
576 | 578 | ||
577 | cr0 |= X86_CR0_ET; | 579 | cr0 |= X86_CR0_ET; |
578 | 580 | ||
@@ -618,6 +620,10 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
618 | 620 | ||
619 | if ((cr0 ^ old_cr0) & update_bits) | 621 | if ((cr0 ^ old_cr0) & update_bits) |
620 | kvm_mmu_reset_context(vcpu); | 622 | kvm_mmu_reset_context(vcpu); |
623 | |||
624 | if ((cr0 ^ old_cr0) & X86_CR0_CD) | ||
625 | kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL); | ||
626 | |||
621 | return 0; | 627 | return 0; |
622 | } | 628 | } |
623 | EXPORT_SYMBOL_GPL(kvm_set_cr0); | 629 | EXPORT_SYMBOL_GPL(kvm_set_cr0); |
@@ -907,7 +913,7 @@ bool kvm_rdpmc(struct kvm_vcpu *vcpu) | |||
907 | u64 data; | 913 | u64 data; |
908 | int err; | 914 | int err; |
909 | 915 | ||
910 | err = kvm_pmu_read_pmc(vcpu, ecx, &data); | 916 | err = kvm_pmu_rdpmc(vcpu, ecx, &data); |
911 | if (err) | 917 | if (err) |
912 | return err; | 918 | return err; |
913 | kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data); | 919 | kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data); |
@@ -922,17 +928,11 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); | |||
922 | * | 928 | * |
923 | * This list is modified at module load time to reflect the | 929 | * This list is modified at module load time to reflect the |
924 | * capabilities of the host cpu. This capabilities test skips MSRs that are | 930 | * capabilities of the host cpu. This capabilities test skips MSRs that are |
925 | * kvm-specific. Those are put in the beginning of the list. | 931 | * kvm-specific. Those are put in emulated_msrs; filtering of emulated_msrs |
932 | * may depend on host virtualization features rather than host cpu features. | ||
926 | */ | 933 | */ |
927 | 934 | ||
928 | #define KVM_SAVE_MSRS_BEGIN 12 | ||
929 | static u32 msrs_to_save[] = { | 935 | static u32 msrs_to_save[] = { |
930 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | ||
931 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, | ||
932 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | ||
933 | HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, | ||
934 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, | ||
935 | MSR_KVM_PV_EOI_EN, | ||
936 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 936 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
937 | MSR_STAR, | 937 | MSR_STAR, |
938 | #ifdef CONFIG_X86_64 | 938 | #ifdef CONFIG_X86_64 |
@@ -944,14 +944,24 @@ static u32 msrs_to_save[] = { | |||
944 | 944 | ||
945 | static unsigned num_msrs_to_save; | 945 | static unsigned num_msrs_to_save; |
946 | 946 | ||
947 | static const u32 emulated_msrs[] = { | 947 | static u32 emulated_msrs[] = { |
948 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | ||
949 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, | ||
950 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | ||
951 | HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, | ||
952 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, | ||
953 | MSR_KVM_PV_EOI_EN, | ||
954 | |||
948 | MSR_IA32_TSC_ADJUST, | 955 | MSR_IA32_TSC_ADJUST, |
949 | MSR_IA32_TSCDEADLINE, | 956 | MSR_IA32_TSCDEADLINE, |
950 | MSR_IA32_MISC_ENABLE, | 957 | MSR_IA32_MISC_ENABLE, |
951 | MSR_IA32_MCG_STATUS, | 958 | MSR_IA32_MCG_STATUS, |
952 | MSR_IA32_MCG_CTL, | 959 | MSR_IA32_MCG_CTL, |
960 | MSR_IA32_SMBASE, | ||
953 | }; | 961 | }; |
954 | 962 | ||
963 | static unsigned num_emulated_msrs; | ||
964 | |||
955 | bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) | 965 | bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) |
956 | { | 966 | { |
957 | if (efer & efer_reserved_bits) | 967 | if (efer & efer_reserved_bits) |
@@ -1045,6 +1055,21 @@ EXPORT_SYMBOL_GPL(kvm_set_msr); | |||
1045 | /* | 1055 | /* |
1046 | * Adapt set_msr() to msr_io()'s calling convention | 1056 | * Adapt set_msr() to msr_io()'s calling convention |
1047 | */ | 1057 | */ |
1058 | static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) | ||
1059 | { | ||
1060 | struct msr_data msr; | ||
1061 | int r; | ||
1062 | |||
1063 | msr.index = index; | ||
1064 | msr.host_initiated = true; | ||
1065 | r = kvm_get_msr(vcpu, &msr); | ||
1066 | if (r) | ||
1067 | return r; | ||
1068 | |||
1069 | *data = msr.data; | ||
1070 | return 0; | ||
1071 | } | ||
1072 | |||
1048 | static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) | 1073 | static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) |
1049 | { | 1074 | { |
1050 | struct msr_data msr; | 1075 | struct msr_data msr; |
@@ -1697,6 +1722,8 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1697 | vcpu->pvclock_set_guest_stopped_request = false; | 1722 | vcpu->pvclock_set_guest_stopped_request = false; |
1698 | } | 1723 | } |
1699 | 1724 | ||
1725 | pvclock_flags |= PVCLOCK_COUNTS_FROM_ZERO; | ||
1726 | |||
1700 | /* If the host uses TSC clocksource, then it is stable */ | 1727 | /* If the host uses TSC clocksource, then it is stable */ |
1701 | if (use_master_clock) | 1728 | if (use_master_clock) |
1702 | pvclock_flags |= PVCLOCK_TSC_STABLE_BIT; | 1729 | pvclock_flags |= PVCLOCK_TSC_STABLE_BIT; |
@@ -1767,127 +1794,14 @@ static void kvmclock_sync_fn(struct work_struct *work) | |||
1767 | kvmclock_sync_work); | 1794 | kvmclock_sync_work); |
1768 | struct kvm *kvm = container_of(ka, struct kvm, arch); | 1795 | struct kvm *kvm = container_of(ka, struct kvm, arch); |
1769 | 1796 | ||
1797 | if (!kvmclock_periodic_sync) | ||
1798 | return; | ||
1799 | |||
1770 | schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0); | 1800 | schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0); |
1771 | schedule_delayed_work(&kvm->arch.kvmclock_sync_work, | 1801 | schedule_delayed_work(&kvm->arch.kvmclock_sync_work, |
1772 | KVMCLOCK_SYNC_PERIOD); | 1802 | KVMCLOCK_SYNC_PERIOD); |
1773 | } | 1803 | } |
1774 | 1804 | ||
1775 | static bool msr_mtrr_valid(unsigned msr) | ||
1776 | { | ||
1777 | switch (msr) { | ||
1778 | case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1: | ||
1779 | case MSR_MTRRfix64K_00000: | ||
1780 | case MSR_MTRRfix16K_80000: | ||
1781 | case MSR_MTRRfix16K_A0000: | ||
1782 | case MSR_MTRRfix4K_C0000: | ||
1783 | case MSR_MTRRfix4K_C8000: | ||
1784 | case MSR_MTRRfix4K_D0000: | ||
1785 | case MSR_MTRRfix4K_D8000: | ||
1786 | case MSR_MTRRfix4K_E0000: | ||
1787 | case MSR_MTRRfix4K_E8000: | ||
1788 | case MSR_MTRRfix4K_F0000: | ||
1789 | case MSR_MTRRfix4K_F8000: | ||
1790 | case MSR_MTRRdefType: | ||
1791 | case MSR_IA32_CR_PAT: | ||
1792 | return true; | ||
1793 | case 0x2f8: | ||
1794 | return true; | ||
1795 | } | ||
1796 | return false; | ||
1797 | } | ||
1798 | |||
1799 | static bool valid_pat_type(unsigned t) | ||
1800 | { | ||
1801 | return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */ | ||
1802 | } | ||
1803 | |||
1804 | static bool valid_mtrr_type(unsigned t) | ||
1805 | { | ||
1806 | return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */ | ||
1807 | } | ||
1808 | |||
1809 | bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) | ||
1810 | { | ||
1811 | int i; | ||
1812 | u64 mask; | ||
1813 | |||
1814 | if (!msr_mtrr_valid(msr)) | ||
1815 | return false; | ||
1816 | |||
1817 | if (msr == MSR_IA32_CR_PAT) { | ||
1818 | for (i = 0; i < 8; i++) | ||
1819 | if (!valid_pat_type((data >> (i * 8)) & 0xff)) | ||
1820 | return false; | ||
1821 | return true; | ||
1822 | } else if (msr == MSR_MTRRdefType) { | ||
1823 | if (data & ~0xcff) | ||
1824 | return false; | ||
1825 | return valid_mtrr_type(data & 0xff); | ||
1826 | } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) { | ||
1827 | for (i = 0; i < 8 ; i++) | ||
1828 | if (!valid_mtrr_type((data >> (i * 8)) & 0xff)) | ||
1829 | return false; | ||
1830 | return true; | ||
1831 | } | ||
1832 | |||
1833 | /* variable MTRRs */ | ||
1834 | WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR)); | ||
1835 | |||
1836 | mask = (~0ULL) << cpuid_maxphyaddr(vcpu); | ||
1837 | if ((msr & 1) == 0) { | ||
1838 | /* MTRR base */ | ||
1839 | if (!valid_mtrr_type(data & 0xff)) | ||
1840 | return false; | ||
1841 | mask |= 0xf00; | ||
1842 | } else | ||
1843 | /* MTRR mask */ | ||
1844 | mask |= 0x7ff; | ||
1845 | if (data & mask) { | ||
1846 | kvm_inject_gp(vcpu, 0); | ||
1847 | return false; | ||
1848 | } | ||
1849 | |||
1850 | return true; | ||
1851 | } | ||
1852 | EXPORT_SYMBOL_GPL(kvm_mtrr_valid); | ||
1853 | |||
1854 | static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) | ||
1855 | { | ||
1856 | u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; | ||
1857 | |||
1858 | if (!kvm_mtrr_valid(vcpu, msr, data)) | ||
1859 | return 1; | ||
1860 | |||
1861 | if (msr == MSR_MTRRdefType) { | ||
1862 | vcpu->arch.mtrr_state.def_type = data; | ||
1863 | vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10; | ||
1864 | } else if (msr == MSR_MTRRfix64K_00000) | ||
1865 | p[0] = data; | ||
1866 | else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000) | ||
1867 | p[1 + msr - MSR_MTRRfix16K_80000] = data; | ||
1868 | else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000) | ||
1869 | p[3 + msr - MSR_MTRRfix4K_C0000] = data; | ||
1870 | else if (msr == MSR_IA32_CR_PAT) | ||
1871 | vcpu->arch.pat = data; | ||
1872 | else { /* Variable MTRRs */ | ||
1873 | int idx, is_mtrr_mask; | ||
1874 | u64 *pt; | ||
1875 | |||
1876 | idx = (msr - 0x200) / 2; | ||
1877 | is_mtrr_mask = msr - 0x200 - 2 * idx; | ||
1878 | if (!is_mtrr_mask) | ||
1879 | pt = | ||
1880 | (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo; | ||
1881 | else | ||
1882 | pt = | ||
1883 | (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo; | ||
1884 | *pt = data; | ||
1885 | } | ||
1886 | |||
1887 | kvm_mmu_reset_context(vcpu); | ||
1888 | return 0; | ||
1889 | } | ||
1890 | |||
1891 | static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 1805 | static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
1892 | { | 1806 | { |
1893 | u64 mcg_cap = vcpu->arch.mcg_cap; | 1807 | u64 mcg_cap = vcpu->arch.mcg_cap; |
@@ -1946,7 +1860,7 @@ static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data) | |||
1946 | r = PTR_ERR(page); | 1860 | r = PTR_ERR(page); |
1947 | goto out; | 1861 | goto out; |
1948 | } | 1862 | } |
1949 | if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE)) | 1863 | if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE)) |
1950 | goto out_free; | 1864 | goto out_free; |
1951 | r = 0; | 1865 | r = 0; |
1952 | out_free: | 1866 | out_free: |
@@ -2046,13 +1960,13 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
2046 | break; | 1960 | break; |
2047 | } | 1961 | } |
2048 | gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT; | 1962 | gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT; |
2049 | addr = gfn_to_hva(vcpu->kvm, gfn); | 1963 | addr = kvm_vcpu_gfn_to_hva(vcpu, gfn); |
2050 | if (kvm_is_error_hva(addr)) | 1964 | if (kvm_is_error_hva(addr)) |
2051 | return 1; | 1965 | return 1; |
2052 | if (__clear_user((void __user *)addr, PAGE_SIZE)) | 1966 | if (__clear_user((void __user *)addr, PAGE_SIZE)) |
2053 | return 1; | 1967 | return 1; |
2054 | vcpu->arch.hv_vapic = data; | 1968 | vcpu->arch.hv_vapic = data; |
2055 | mark_page_dirty(vcpu->kvm, gfn); | 1969 | kvm_vcpu_mark_page_dirty(vcpu, gfn); |
2056 | if (kvm_lapic_enable_pv_eoi(vcpu, gfn_to_gpa(gfn) | KVM_MSR_ENABLED)) | 1970 | if (kvm_lapic_enable_pv_eoi(vcpu, gfn_to_gpa(gfn) | KVM_MSR_ENABLED)) |
2057 | return 1; | 1971 | return 1; |
2058 | break; | 1972 | break; |
@@ -2179,7 +2093,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2179 | __func__, data); | 2093 | __func__, data); |
2180 | break; | 2094 | break; |
2181 | case 0x200 ... 0x2ff: | 2095 | case 0x200 ... 0x2ff: |
2182 | return set_msr_mtrr(vcpu, msr, data); | 2096 | return kvm_mtrr_set_msr(vcpu, msr, data); |
2183 | case MSR_IA32_APICBASE: | 2097 | case MSR_IA32_APICBASE: |
2184 | return kvm_set_apic_base(vcpu, msr_info); | 2098 | return kvm_set_apic_base(vcpu, msr_info); |
2185 | case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: | 2099 | case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: |
@@ -2199,6 +2113,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2199 | case MSR_IA32_MISC_ENABLE: | 2113 | case MSR_IA32_MISC_ENABLE: |
2200 | vcpu->arch.ia32_misc_enable_msr = data; | 2114 | vcpu->arch.ia32_misc_enable_msr = data; |
2201 | break; | 2115 | break; |
2116 | case MSR_IA32_SMBASE: | ||
2117 | if (!msr_info->host_initiated) | ||
2118 | return 1; | ||
2119 | vcpu->arch.smbase = data; | ||
2120 | break; | ||
2202 | case MSR_KVM_WALL_CLOCK_NEW: | 2121 | case MSR_KVM_WALL_CLOCK_NEW: |
2203 | case MSR_KVM_WALL_CLOCK: | 2122 | case MSR_KVM_WALL_CLOCK: |
2204 | vcpu->kvm->arch.wall_clock = data; | 2123 | vcpu->kvm->arch.wall_clock = data; |
@@ -2219,6 +2138,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2219 | &vcpu->requests); | 2138 | &vcpu->requests); |
2220 | 2139 | ||
2221 | ka->boot_vcpu_runs_old_kvmclock = tmp; | 2140 | ka->boot_vcpu_runs_old_kvmclock = tmp; |
2141 | |||
2142 | ka->kvmclock_offset = -get_kernel_ns(); | ||
2222 | } | 2143 | } |
2223 | 2144 | ||
2224 | vcpu->arch.time = data; | 2145 | vcpu->arch.time = data; |
@@ -2280,37 +2201,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2280 | case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: | 2201 | case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: |
2281 | return set_msr_mce(vcpu, msr, data); | 2202 | return set_msr_mce(vcpu, msr, data); |
2282 | 2203 | ||
2283 | /* Performance counters are not protected by a CPUID bit, | 2204 | case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3: |
2284 | * so we should check all of them in the generic path for the sake of | 2205 | case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1: |
2285 | * cross vendor migration. | 2206 | pr = true; /* fall through */ |
2286 | * Writing a zero into the event select MSRs disables them, | 2207 | case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3: |
2287 | * which we perfectly emulate ;-). Any other value should be at least | 2208 | case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1: |
2288 | * reported, some guests depend on them. | 2209 | if (kvm_pmu_is_valid_msr(vcpu, msr)) |
2289 | */ | ||
2290 | case MSR_K7_EVNTSEL0: | ||
2291 | case MSR_K7_EVNTSEL1: | ||
2292 | case MSR_K7_EVNTSEL2: | ||
2293 | case MSR_K7_EVNTSEL3: | ||
2294 | if (data != 0) | ||
2295 | vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: " | ||
2296 | "0x%x data 0x%llx\n", msr, data); | ||
2297 | break; | ||
2298 | /* at least RHEL 4 unconditionally writes to the perfctr registers, | ||
2299 | * so we ignore writes to make it happy. | ||
2300 | */ | ||
2301 | case MSR_K7_PERFCTR0: | ||
2302 | case MSR_K7_PERFCTR1: | ||
2303 | case MSR_K7_PERFCTR2: | ||
2304 | case MSR_K7_PERFCTR3: | ||
2305 | vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: " | ||
2306 | "0x%x data 0x%llx\n", msr, data); | ||
2307 | break; | ||
2308 | case MSR_P6_PERFCTR0: | ||
2309 | case MSR_P6_PERFCTR1: | ||
2310 | pr = true; | ||
2311 | case MSR_P6_EVNTSEL0: | ||
2312 | case MSR_P6_EVNTSEL1: | ||
2313 | if (kvm_pmu_msr(vcpu, msr)) | ||
2314 | return kvm_pmu_set_msr(vcpu, msr_info); | 2210 | return kvm_pmu_set_msr(vcpu, msr_info); |
2315 | 2211 | ||
2316 | if (pr || data != 0) | 2212 | if (pr || data != 0) |
@@ -2356,7 +2252,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2356 | default: | 2252 | default: |
2357 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) | 2253 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) |
2358 | return xen_hvm_config(vcpu, data); | 2254 | return xen_hvm_config(vcpu, data); |
2359 | if (kvm_pmu_msr(vcpu, msr)) | 2255 | if (kvm_pmu_is_valid_msr(vcpu, msr)) |
2360 | return kvm_pmu_set_msr(vcpu, msr_info); | 2256 | return kvm_pmu_set_msr(vcpu, msr_info); |
2361 | if (!ignore_msrs) { | 2257 | if (!ignore_msrs) { |
2362 | vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", | 2258 | vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", |
@@ -2378,48 +2274,12 @@ EXPORT_SYMBOL_GPL(kvm_set_msr_common); | |||
2378 | * Returns 0 on success, non-0 otherwise. | 2274 | * Returns 0 on success, non-0 otherwise. |
2379 | * Assumes vcpu_load() was already called. | 2275 | * Assumes vcpu_load() was already called. |
2380 | */ | 2276 | */ |
2381 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | 2277 | int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) |
2382 | { | 2278 | { |
2383 | return kvm_x86_ops->get_msr(vcpu, msr_index, pdata); | 2279 | return kvm_x86_ops->get_msr(vcpu, msr); |
2384 | } | 2280 | } |
2385 | EXPORT_SYMBOL_GPL(kvm_get_msr); | 2281 | EXPORT_SYMBOL_GPL(kvm_get_msr); |
2386 | 2282 | ||
2387 | static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | ||
2388 | { | ||
2389 | u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; | ||
2390 | |||
2391 | if (!msr_mtrr_valid(msr)) | ||
2392 | return 1; | ||
2393 | |||
2394 | if (msr == MSR_MTRRdefType) | ||
2395 | *pdata = vcpu->arch.mtrr_state.def_type + | ||
2396 | (vcpu->arch.mtrr_state.enabled << 10); | ||
2397 | else if (msr == MSR_MTRRfix64K_00000) | ||
2398 | *pdata = p[0]; | ||
2399 | else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000) | ||
2400 | *pdata = p[1 + msr - MSR_MTRRfix16K_80000]; | ||
2401 | else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000) | ||
2402 | *pdata = p[3 + msr - MSR_MTRRfix4K_C0000]; | ||
2403 | else if (msr == MSR_IA32_CR_PAT) | ||
2404 | *pdata = vcpu->arch.pat; | ||
2405 | else { /* Variable MTRRs */ | ||
2406 | int idx, is_mtrr_mask; | ||
2407 | u64 *pt; | ||
2408 | |||
2409 | idx = (msr - 0x200) / 2; | ||
2410 | is_mtrr_mask = msr - 0x200 - 2 * idx; | ||
2411 | if (!is_mtrr_mask) | ||
2412 | pt = | ||
2413 | (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo; | ||
2414 | else | ||
2415 | pt = | ||
2416 | (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo; | ||
2417 | *pdata = *pt; | ||
2418 | } | ||
2419 | |||
2420 | return 0; | ||
2421 | } | ||
2422 | |||
2423 | static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | 2283 | static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) |
2424 | { | 2284 | { |
2425 | u64 data; | 2285 | u64 data; |
@@ -2517,11 +2377,11 @@ static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
2517 | return 0; | 2377 | return 0; |
2518 | } | 2378 | } |
2519 | 2379 | ||
2520 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | 2380 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
2521 | { | 2381 | { |
2522 | u64 data; | 2382 | u64 data; |
2523 | 2383 | ||
2524 | switch (msr) { | 2384 | switch (msr_info->index) { |
2525 | case MSR_IA32_PLATFORM_ID: | 2385 | case MSR_IA32_PLATFORM_ID: |
2526 | case MSR_IA32_EBL_CR_POWERON: | 2386 | case MSR_IA32_EBL_CR_POWERON: |
2527 | case MSR_IA32_DEBUGCTLMSR: | 2387 | case MSR_IA32_DEBUGCTLMSR: |
@@ -2532,38 +2392,28 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
2532 | case MSR_K8_SYSCFG: | 2392 | case MSR_K8_SYSCFG: |
2533 | case MSR_K7_HWCR: | 2393 | case MSR_K7_HWCR: |
2534 | case MSR_VM_HSAVE_PA: | 2394 | case MSR_VM_HSAVE_PA: |
2535 | case MSR_K7_EVNTSEL0: | ||
2536 | case MSR_K7_EVNTSEL1: | ||
2537 | case MSR_K7_EVNTSEL2: | ||
2538 | case MSR_K7_EVNTSEL3: | ||
2539 | case MSR_K7_PERFCTR0: | ||
2540 | case MSR_K7_PERFCTR1: | ||
2541 | case MSR_K7_PERFCTR2: | ||
2542 | case MSR_K7_PERFCTR3: | ||
2543 | case MSR_K8_INT_PENDING_MSG: | 2395 | case MSR_K8_INT_PENDING_MSG: |
2544 | case MSR_AMD64_NB_CFG: | 2396 | case MSR_AMD64_NB_CFG: |
2545 | case MSR_FAM10H_MMIO_CONF_BASE: | 2397 | case MSR_FAM10H_MMIO_CONF_BASE: |
2546 | case MSR_AMD64_BU_CFG2: | 2398 | case MSR_AMD64_BU_CFG2: |
2547 | data = 0; | 2399 | msr_info->data = 0; |
2548 | break; | 2400 | break; |
2549 | case MSR_P6_PERFCTR0: | 2401 | case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3: |
2550 | case MSR_P6_PERFCTR1: | 2402 | case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3: |
2551 | case MSR_P6_EVNTSEL0: | 2403 | case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1: |
2552 | case MSR_P6_EVNTSEL1: | 2404 | case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1: |
2553 | if (kvm_pmu_msr(vcpu, msr)) | 2405 | if (kvm_pmu_is_valid_msr(vcpu, msr_info->index)) |
2554 | return kvm_pmu_get_msr(vcpu, msr, pdata); | 2406 | return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data); |
2555 | data = 0; | 2407 | msr_info->data = 0; |
2556 | break; | 2408 | break; |
2557 | case MSR_IA32_UCODE_REV: | 2409 | case MSR_IA32_UCODE_REV: |
2558 | data = 0x100000000ULL; | 2410 | msr_info->data = 0x100000000ULL; |
2559 | break; | 2411 | break; |
2560 | case MSR_MTRRcap: | 2412 | case MSR_MTRRcap: |
2561 | data = 0x500 | KVM_NR_VAR_MTRR; | ||
2562 | break; | ||
2563 | case 0x200 ... 0x2ff: | 2413 | case 0x200 ... 0x2ff: |
2564 | return get_msr_mtrr(vcpu, msr, pdata); | 2414 | return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data); |
2565 | case 0xcd: /* fsb frequency */ | 2415 | case 0xcd: /* fsb frequency */ |
2566 | data = 3; | 2416 | msr_info->data = 3; |
2567 | break; | 2417 | break; |
2568 | /* | 2418 | /* |
2569 | * MSR_EBC_FREQUENCY_ID | 2419 | * MSR_EBC_FREQUENCY_ID |
@@ -2577,48 +2427,53 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
2577 | * multiplying by zero otherwise. | 2427 | * multiplying by zero otherwise. |
2578 | */ | 2428 | */ |
2579 | case MSR_EBC_FREQUENCY_ID: | 2429 | case MSR_EBC_FREQUENCY_ID: |
2580 | data = 1 << 24; | 2430 | msr_info->data = 1 << 24; |
2581 | break; | 2431 | break; |
2582 | case MSR_IA32_APICBASE: | 2432 | case MSR_IA32_APICBASE: |
2583 | data = kvm_get_apic_base(vcpu); | 2433 | msr_info->data = kvm_get_apic_base(vcpu); |
2584 | break; | 2434 | break; |
2585 | case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: | 2435 | case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: |
2586 | return kvm_x2apic_msr_read(vcpu, msr, pdata); | 2436 | return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data); |
2587 | break; | 2437 | break; |
2588 | case MSR_IA32_TSCDEADLINE: | 2438 | case MSR_IA32_TSCDEADLINE: |
2589 | data = kvm_get_lapic_tscdeadline_msr(vcpu); | 2439 | msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu); |
2590 | break; | 2440 | break; |
2591 | case MSR_IA32_TSC_ADJUST: | 2441 | case MSR_IA32_TSC_ADJUST: |
2592 | data = (u64)vcpu->arch.ia32_tsc_adjust_msr; | 2442 | msr_info->data = (u64)vcpu->arch.ia32_tsc_adjust_msr; |
2593 | break; | 2443 | break; |
2594 | case MSR_IA32_MISC_ENABLE: | 2444 | case MSR_IA32_MISC_ENABLE: |
2595 | data = vcpu->arch.ia32_misc_enable_msr; | 2445 | msr_info->data = vcpu->arch.ia32_misc_enable_msr; |
2446 | break; | ||
2447 | case MSR_IA32_SMBASE: | ||
2448 | if (!msr_info->host_initiated) | ||
2449 | return 1; | ||
2450 | msr_info->data = vcpu->arch.smbase; | ||
2596 | break; | 2451 | break; |
2597 | case MSR_IA32_PERF_STATUS: | 2452 | case MSR_IA32_PERF_STATUS: |
2598 | /* TSC increment by tick */ | 2453 | /* TSC increment by tick */ |
2599 | data = 1000ULL; | 2454 | msr_info->data = 1000ULL; |
2600 | /* CPU multiplier */ | 2455 | /* CPU multiplier */ |
2601 | data |= (((uint64_t)4ULL) << 40); | 2456 | data |= (((uint64_t)4ULL) << 40); |
2602 | break; | 2457 | break; |
2603 | case MSR_EFER: | 2458 | case MSR_EFER: |
2604 | data = vcpu->arch.efer; | 2459 | msr_info->data = vcpu->arch.efer; |
2605 | break; | 2460 | break; |
2606 | case MSR_KVM_WALL_CLOCK: | 2461 | case MSR_KVM_WALL_CLOCK: |
2607 | case MSR_KVM_WALL_CLOCK_NEW: | 2462 | case MSR_KVM_WALL_CLOCK_NEW: |
2608 | data = vcpu->kvm->arch.wall_clock; | 2463 | msr_info->data = vcpu->kvm->arch.wall_clock; |
2609 | break; | 2464 | break; |
2610 | case MSR_KVM_SYSTEM_TIME: | 2465 | case MSR_KVM_SYSTEM_TIME: |
2611 | case MSR_KVM_SYSTEM_TIME_NEW: | 2466 | case MSR_KVM_SYSTEM_TIME_NEW: |
2612 | data = vcpu->arch.time; | 2467 | msr_info->data = vcpu->arch.time; |
2613 | break; | 2468 | break; |
2614 | case MSR_KVM_ASYNC_PF_EN: | 2469 | case MSR_KVM_ASYNC_PF_EN: |
2615 | data = vcpu->arch.apf.msr_val; | 2470 | msr_info->data = vcpu->arch.apf.msr_val; |
2616 | break; | 2471 | break; |
2617 | case MSR_KVM_STEAL_TIME: | 2472 | case MSR_KVM_STEAL_TIME: |
2618 | data = vcpu->arch.st.msr_val; | 2473 | msr_info->data = vcpu->arch.st.msr_val; |
2619 | break; | 2474 | break; |
2620 | case MSR_KVM_PV_EOI_EN: | 2475 | case MSR_KVM_PV_EOI_EN: |
2621 | data = vcpu->arch.pv_eoi.msr_val; | 2476 | msr_info->data = vcpu->arch.pv_eoi.msr_val; |
2622 | break; | 2477 | break; |
2623 | case MSR_IA32_P5_MC_ADDR: | 2478 | case MSR_IA32_P5_MC_ADDR: |
2624 | case MSR_IA32_P5_MC_TYPE: | 2479 | case MSR_IA32_P5_MC_TYPE: |
@@ -2626,7 +2481,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
2626 | case MSR_IA32_MCG_CTL: | 2481 | case MSR_IA32_MCG_CTL: |
2627 | case MSR_IA32_MCG_STATUS: | 2482 | case MSR_IA32_MCG_STATUS: |
2628 | case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: | 2483 | case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: |
2629 | return get_msr_mce(vcpu, msr, pdata); | 2484 | return get_msr_mce(vcpu, msr_info->index, &msr_info->data); |
2630 | case MSR_K7_CLK_CTL: | 2485 | case MSR_K7_CLK_CTL: |
2631 | /* | 2486 | /* |
2632 | * Provide expected ramp-up count for K7. All other | 2487 | * Provide expected ramp-up count for K7. All other |
@@ -2637,17 +2492,17 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
2637 | * type 6, model 8 and higher from exploding due to | 2492 | * type 6, model 8 and higher from exploding due to |
2638 | * the rdmsr failing. | 2493 | * the rdmsr failing. |
2639 | */ | 2494 | */ |
2640 | data = 0x20000000; | 2495 | msr_info->data = 0x20000000; |
2641 | break; | 2496 | break; |
2642 | case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: | 2497 | case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: |
2643 | if (kvm_hv_msr_partition_wide(msr)) { | 2498 | if (kvm_hv_msr_partition_wide(msr_info->index)) { |
2644 | int r; | 2499 | int r; |
2645 | mutex_lock(&vcpu->kvm->lock); | 2500 | mutex_lock(&vcpu->kvm->lock); |
2646 | r = get_msr_hyperv_pw(vcpu, msr, pdata); | 2501 | r = get_msr_hyperv_pw(vcpu, msr_info->index, &msr_info->data); |
2647 | mutex_unlock(&vcpu->kvm->lock); | 2502 | mutex_unlock(&vcpu->kvm->lock); |
2648 | return r; | 2503 | return r; |
2649 | } else | 2504 | } else |
2650 | return get_msr_hyperv(vcpu, msr, pdata); | 2505 | return get_msr_hyperv(vcpu, msr_info->index, &msr_info->data); |
2651 | break; | 2506 | break; |
2652 | case MSR_IA32_BBL_CR_CTL3: | 2507 | case MSR_IA32_BBL_CR_CTL3: |
2653 | /* This legacy MSR exists but isn't fully documented in current | 2508 | /* This legacy MSR exists but isn't fully documented in current |
@@ -2660,31 +2515,30 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
2660 | * L2 cache control register 3: 64GB range, 256KB size, | 2515 | * L2 cache control register 3: 64GB range, 256KB size, |
2661 | * enabled, latency 0x1, configured | 2516 | * enabled, latency 0x1, configured |
2662 | */ | 2517 | */ |
2663 | data = 0xbe702111; | 2518 | msr_info->data = 0xbe702111; |
2664 | break; | 2519 | break; |
2665 | case MSR_AMD64_OSVW_ID_LENGTH: | 2520 | case MSR_AMD64_OSVW_ID_LENGTH: |
2666 | if (!guest_cpuid_has_osvw(vcpu)) | 2521 | if (!guest_cpuid_has_osvw(vcpu)) |
2667 | return 1; | 2522 | return 1; |
2668 | data = vcpu->arch.osvw.length; | 2523 | msr_info->data = vcpu->arch.osvw.length; |
2669 | break; | 2524 | break; |
2670 | case MSR_AMD64_OSVW_STATUS: | 2525 | case MSR_AMD64_OSVW_STATUS: |
2671 | if (!guest_cpuid_has_osvw(vcpu)) | 2526 | if (!guest_cpuid_has_osvw(vcpu)) |
2672 | return 1; | 2527 | return 1; |
2673 | data = vcpu->arch.osvw.status; | 2528 | msr_info->data = vcpu->arch.osvw.status; |
2674 | break; | 2529 | break; |
2675 | default: | 2530 | default: |
2676 | if (kvm_pmu_msr(vcpu, msr)) | 2531 | if (kvm_pmu_is_valid_msr(vcpu, msr_info->index)) |
2677 | return kvm_pmu_get_msr(vcpu, msr, pdata); | 2532 | return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data); |
2678 | if (!ignore_msrs) { | 2533 | if (!ignore_msrs) { |
2679 | vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); | 2534 | vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr_info->index); |
2680 | return 1; | 2535 | return 1; |
2681 | } else { | 2536 | } else { |
2682 | vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr); | 2537 | vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index); |
2683 | data = 0; | 2538 | msr_info->data = 0; |
2684 | } | 2539 | } |
2685 | break; | 2540 | break; |
2686 | } | 2541 | } |
2687 | *pdata = data; | ||
2688 | return 0; | 2542 | return 0; |
2689 | } | 2543 | } |
2690 | EXPORT_SYMBOL_GPL(kvm_get_msr_common); | 2544 | EXPORT_SYMBOL_GPL(kvm_get_msr_common); |
@@ -2797,12 +2651,25 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
2797 | case KVM_CAP_HYPERV_TIME: | 2651 | case KVM_CAP_HYPERV_TIME: |
2798 | case KVM_CAP_IOAPIC_POLARITY_IGNORED: | 2652 | case KVM_CAP_IOAPIC_POLARITY_IGNORED: |
2799 | case KVM_CAP_TSC_DEADLINE_TIMER: | 2653 | case KVM_CAP_TSC_DEADLINE_TIMER: |
2654 | case KVM_CAP_ENABLE_CAP_VM: | ||
2655 | case KVM_CAP_DISABLE_QUIRKS: | ||
2800 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT | 2656 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT |
2801 | case KVM_CAP_ASSIGN_DEV_IRQ: | 2657 | case KVM_CAP_ASSIGN_DEV_IRQ: |
2802 | case KVM_CAP_PCI_2_3: | 2658 | case KVM_CAP_PCI_2_3: |
2803 | #endif | 2659 | #endif |
2804 | r = 1; | 2660 | r = 1; |
2805 | break; | 2661 | break; |
2662 | case KVM_CAP_X86_SMM: | ||
2663 | /* SMBASE is usually relocated above 1M on modern chipsets, | ||
2664 | * and SMM handlers might indeed rely on 4G segment limits, | ||
2665 | * so do not report SMM to be available if real mode is | ||
2666 | * emulated via vm86 mode. Still, do not go to great lengths | ||
2667 | * to avoid userspace's usage of the feature, because it is a | ||
2668 | * fringe case that is not enabled except via specific settings | ||
2669 | * of the module parameters. | ||
2670 | */ | ||
2671 | r = kvm_x86_ops->cpu_has_high_real_mode_segbase(); | ||
2672 | break; | ||
2806 | case KVM_CAP_COALESCED_MMIO: | 2673 | case KVM_CAP_COALESCED_MMIO: |
2807 | r = KVM_COALESCED_MMIO_PAGE_OFFSET; | 2674 | r = KVM_COALESCED_MMIO_PAGE_OFFSET; |
2808 | break; | 2675 | break; |
@@ -2859,7 +2726,7 @@ long kvm_arch_dev_ioctl(struct file *filp, | |||
2859 | if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list)) | 2726 | if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list)) |
2860 | goto out; | 2727 | goto out; |
2861 | n = msr_list.nmsrs; | 2728 | n = msr_list.nmsrs; |
2862 | msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs); | 2729 | msr_list.nmsrs = num_msrs_to_save + num_emulated_msrs; |
2863 | if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list)) | 2730 | if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list)) |
2864 | goto out; | 2731 | goto out; |
2865 | r = -E2BIG; | 2732 | r = -E2BIG; |
@@ -2871,7 +2738,7 @@ long kvm_arch_dev_ioctl(struct file *filp, | |||
2871 | goto out; | 2738 | goto out; |
2872 | if (copy_to_user(user_msr_list->indices + num_msrs_to_save, | 2739 | if (copy_to_user(user_msr_list->indices + num_msrs_to_save, |
2873 | &emulated_msrs, | 2740 | &emulated_msrs, |
2874 | ARRAY_SIZE(emulated_msrs) * sizeof(u32))) | 2741 | num_emulated_msrs * sizeof(u32))) |
2875 | goto out; | 2742 | goto out; |
2876 | r = 0; | 2743 | r = 0; |
2877 | break; | 2744 | break; |
@@ -3015,6 +2882,13 @@ static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu) | |||
3015 | return 0; | 2882 | return 0; |
3016 | } | 2883 | } |
3017 | 2884 | ||
2885 | static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu) | ||
2886 | { | ||
2887 | kvm_make_request(KVM_REQ_SMI, vcpu); | ||
2888 | |||
2889 | return 0; | ||
2890 | } | ||
2891 | |||
3018 | static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu, | 2892 | static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu, |
3019 | struct kvm_tpr_access_ctl *tac) | 2893 | struct kvm_tpr_access_ctl *tac) |
3020 | { | 2894 | { |
@@ -3120,8 +2994,15 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
3120 | 2994 | ||
3121 | events->sipi_vector = 0; /* never valid when reporting to user space */ | 2995 | events->sipi_vector = 0; /* never valid when reporting to user space */ |
3122 | 2996 | ||
2997 | events->smi.smm = is_smm(vcpu); | ||
2998 | events->smi.pending = vcpu->arch.smi_pending; | ||
2999 | events->smi.smm_inside_nmi = | ||
3000 | !!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK); | ||
3001 | events->smi.latched_init = kvm_lapic_latched_init(vcpu); | ||
3002 | |||
3123 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING | 3003 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING |
3124 | | KVM_VCPUEVENT_VALID_SHADOW); | 3004 | | KVM_VCPUEVENT_VALID_SHADOW |
3005 | | KVM_VCPUEVENT_VALID_SMM); | ||
3125 | memset(&events->reserved, 0, sizeof(events->reserved)); | 3006 | memset(&events->reserved, 0, sizeof(events->reserved)); |
3126 | } | 3007 | } |
3127 | 3008 | ||
@@ -3130,7 +3011,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
3130 | { | 3011 | { |
3131 | if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING | 3012 | if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING |
3132 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR | 3013 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR |
3133 | | KVM_VCPUEVENT_VALID_SHADOW)) | 3014 | | KVM_VCPUEVENT_VALID_SHADOW |
3015 | | KVM_VCPUEVENT_VALID_SMM)) | ||
3134 | return -EINVAL; | 3016 | return -EINVAL; |
3135 | 3017 | ||
3136 | process_nmi(vcpu); | 3018 | process_nmi(vcpu); |
@@ -3155,6 +3037,24 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
3155 | kvm_vcpu_has_lapic(vcpu)) | 3037 | kvm_vcpu_has_lapic(vcpu)) |
3156 | vcpu->arch.apic->sipi_vector = events->sipi_vector; | 3038 | vcpu->arch.apic->sipi_vector = events->sipi_vector; |
3157 | 3039 | ||
3040 | if (events->flags & KVM_VCPUEVENT_VALID_SMM) { | ||
3041 | if (events->smi.smm) | ||
3042 | vcpu->arch.hflags |= HF_SMM_MASK; | ||
3043 | else | ||
3044 | vcpu->arch.hflags &= ~HF_SMM_MASK; | ||
3045 | vcpu->arch.smi_pending = events->smi.pending; | ||
3046 | if (events->smi.smm_inside_nmi) | ||
3047 | vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; | ||
3048 | else | ||
3049 | vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK; | ||
3050 | if (kvm_vcpu_has_lapic(vcpu)) { | ||
3051 | if (events->smi.latched_init) | ||
3052 | set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); | ||
3053 | else | ||
3054 | clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); | ||
3055 | } | ||
3056 | } | ||
3057 | |||
3158 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 3058 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
3159 | 3059 | ||
3160 | return 0; | 3060 | return 0; |
@@ -3414,6 +3314,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
3414 | r = kvm_vcpu_ioctl_nmi(vcpu); | 3314 | r = kvm_vcpu_ioctl_nmi(vcpu); |
3415 | break; | 3315 | break; |
3416 | } | 3316 | } |
3317 | case KVM_SMI: { | ||
3318 | r = kvm_vcpu_ioctl_smi(vcpu); | ||
3319 | break; | ||
3320 | } | ||
3417 | case KVM_SET_CPUID: { | 3321 | case KVM_SET_CPUID: { |
3418 | struct kvm_cpuid __user *cpuid_arg = argp; | 3322 | struct kvm_cpuid __user *cpuid_arg = argp; |
3419 | struct kvm_cpuid cpuid; | 3323 | struct kvm_cpuid cpuid; |
@@ -3453,7 +3357,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
3453 | break; | 3357 | break; |
3454 | } | 3358 | } |
3455 | case KVM_GET_MSRS: | 3359 | case KVM_GET_MSRS: |
3456 | r = msr_io(vcpu, argp, kvm_get_msr, 1); | 3360 | r = msr_io(vcpu, argp, do_get_msr, 1); |
3457 | break; | 3361 | break; |
3458 | case KVM_SET_MSRS: | 3362 | case KVM_SET_MSRS: |
3459 | r = msr_io(vcpu, argp, do_set_msr, 0); | 3363 | r = msr_io(vcpu, argp, do_set_msr, 0); |
@@ -3844,6 +3748,26 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, | |||
3844 | return 0; | 3748 | return 0; |
3845 | } | 3749 | } |
3846 | 3750 | ||
3751 | static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, | ||
3752 | struct kvm_enable_cap *cap) | ||
3753 | { | ||
3754 | int r; | ||
3755 | |||
3756 | if (cap->flags) | ||
3757 | return -EINVAL; | ||
3758 | |||
3759 | switch (cap->cap) { | ||
3760 | case KVM_CAP_DISABLE_QUIRKS: | ||
3761 | kvm->arch.disabled_quirks = cap->args[0]; | ||
3762 | r = 0; | ||
3763 | break; | ||
3764 | default: | ||
3765 | r = -EINVAL; | ||
3766 | break; | ||
3767 | } | ||
3768 | return r; | ||
3769 | } | ||
3770 | |||
3847 | long kvm_arch_vm_ioctl(struct file *filp, | 3771 | long kvm_arch_vm_ioctl(struct file *filp, |
3848 | unsigned int ioctl, unsigned long arg) | 3772 | unsigned int ioctl, unsigned long arg) |
3849 | { | 3773 | { |
@@ -4096,7 +4020,15 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
4096 | r = 0; | 4020 | r = 0; |
4097 | break; | 4021 | break; |
4098 | } | 4022 | } |
4023 | case KVM_ENABLE_CAP: { | ||
4024 | struct kvm_enable_cap cap; | ||
4099 | 4025 | ||
4026 | r = -EFAULT; | ||
4027 | if (copy_from_user(&cap, argp, sizeof(cap))) | ||
4028 | goto out; | ||
4029 | r = kvm_vm_ioctl_enable_cap(kvm, &cap); | ||
4030 | break; | ||
4031 | } | ||
4100 | default: | 4032 | default: |
4101 | r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); | 4033 | r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); |
4102 | } | 4034 | } |
@@ -4109,8 +4041,7 @@ static void kvm_init_msr_list(void) | |||
4109 | u32 dummy[2]; | 4041 | u32 dummy[2]; |
4110 | unsigned i, j; | 4042 | unsigned i, j; |
4111 | 4043 | ||
4112 | /* skip the first msrs in the list. KVM-specific */ | 4044 | for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) { |
4113 | for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) { | ||
4114 | if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) | 4045 | if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) |
4115 | continue; | 4046 | continue; |
4116 | 4047 | ||
@@ -4135,6 +4066,22 @@ static void kvm_init_msr_list(void) | |||
4135 | j++; | 4066 | j++; |
4136 | } | 4067 | } |
4137 | num_msrs_to_save = j; | 4068 | num_msrs_to_save = j; |
4069 | |||
4070 | for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) { | ||
4071 | switch (emulated_msrs[i]) { | ||
4072 | case MSR_IA32_SMBASE: | ||
4073 | if (!kvm_x86_ops->cpu_has_high_real_mode_segbase()) | ||
4074 | continue; | ||
4075 | break; | ||
4076 | default: | ||
4077 | break; | ||
4078 | } | ||
4079 | |||
4080 | if (j < i) | ||
4081 | emulated_msrs[j] = emulated_msrs[i]; | ||
4082 | j++; | ||
4083 | } | ||
4084 | num_emulated_msrs = j; | ||
4138 | } | 4085 | } |
4139 | 4086 | ||
4140 | static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, | 4087 | static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, |
@@ -4252,8 +4199,8 @@ static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, | |||
4252 | 4199 | ||
4253 | if (gpa == UNMAPPED_GVA) | 4200 | if (gpa == UNMAPPED_GVA) |
4254 | return X86EMUL_PROPAGATE_FAULT; | 4201 | return X86EMUL_PROPAGATE_FAULT; |
4255 | ret = kvm_read_guest_page(vcpu->kvm, gpa >> PAGE_SHIFT, data, | 4202 | ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, data, |
4256 | offset, toread); | 4203 | offset, toread); |
4257 | if (ret < 0) { | 4204 | if (ret < 0) { |
4258 | r = X86EMUL_IO_NEEDED; | 4205 | r = X86EMUL_IO_NEEDED; |
4259 | goto out; | 4206 | goto out; |
@@ -4286,8 +4233,8 @@ static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt, | |||
4286 | offset = addr & (PAGE_SIZE-1); | 4233 | offset = addr & (PAGE_SIZE-1); |
4287 | if (WARN_ON(offset + bytes > PAGE_SIZE)) | 4234 | if (WARN_ON(offset + bytes > PAGE_SIZE)) |
4288 | bytes = (unsigned)PAGE_SIZE - offset; | 4235 | bytes = (unsigned)PAGE_SIZE - offset; |
4289 | ret = kvm_read_guest_page(vcpu->kvm, gpa >> PAGE_SHIFT, val, | 4236 | ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, val, |
4290 | offset, bytes); | 4237 | offset, bytes); |
4291 | if (unlikely(ret < 0)) | 4238 | if (unlikely(ret < 0)) |
4292 | return X86EMUL_IO_NEEDED; | 4239 | return X86EMUL_IO_NEEDED; |
4293 | 4240 | ||
@@ -4333,7 +4280,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, | |||
4333 | 4280 | ||
4334 | if (gpa == UNMAPPED_GVA) | 4281 | if (gpa == UNMAPPED_GVA) |
4335 | return X86EMUL_PROPAGATE_FAULT; | 4282 | return X86EMUL_PROPAGATE_FAULT; |
4336 | ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite); | 4283 | ret = kvm_vcpu_write_guest(vcpu, gpa, data, towrite); |
4337 | if (ret < 0) { | 4284 | if (ret < 0) { |
4338 | r = X86EMUL_IO_NEEDED; | 4285 | r = X86EMUL_IO_NEEDED; |
4339 | goto out; | 4286 | goto out; |
@@ -4386,7 +4333,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
4386 | { | 4333 | { |
4387 | int ret; | 4334 | int ret; |
4388 | 4335 | ||
4389 | ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); | 4336 | ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes); |
4390 | if (ret < 0) | 4337 | if (ret < 0) |
4391 | return 0; | 4338 | return 0; |
4392 | kvm_mmu_pte_write(vcpu, gpa, val, bytes); | 4339 | kvm_mmu_pte_write(vcpu, gpa, val, bytes); |
@@ -4420,7 +4367,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes) | |||
4420 | static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa, | 4367 | static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa, |
4421 | void *val, int bytes) | 4368 | void *val, int bytes) |
4422 | { | 4369 | { |
4423 | return !kvm_read_guest(vcpu->kvm, gpa, val, bytes); | 4370 | return !kvm_vcpu_read_guest(vcpu, gpa, val, bytes); |
4424 | } | 4371 | } |
4425 | 4372 | ||
4426 | static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa, | 4373 | static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa, |
@@ -4618,7 +4565,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, | |||
4618 | if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) | 4565 | if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) |
4619 | goto emul_write; | 4566 | goto emul_write; |
4620 | 4567 | ||
4621 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 4568 | page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT); |
4622 | if (is_error_page(page)) | 4569 | if (is_error_page(page)) |
4623 | goto emul_write; | 4570 | goto emul_write; |
4624 | 4571 | ||
@@ -4646,7 +4593,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, | |||
4646 | if (!exchanged) | 4593 | if (!exchanged) |
4647 | return X86EMUL_CMPXCHG_FAILED; | 4594 | return X86EMUL_CMPXCHG_FAILED; |
4648 | 4595 | ||
4649 | mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); | 4596 | kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); |
4650 | kvm_mmu_pte_write(vcpu, gpa, new, bytes); | 4597 | kvm_mmu_pte_write(vcpu, gpa, new, bytes); |
4651 | 4598 | ||
4652 | return X86EMUL_CONTINUE; | 4599 | return X86EMUL_CONTINUE; |
@@ -4945,7 +4892,17 @@ static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector, | |||
4945 | static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, | 4892 | static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, |
4946 | u32 msr_index, u64 *pdata) | 4893 | u32 msr_index, u64 *pdata) |
4947 | { | 4894 | { |
4948 | return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata); | 4895 | struct msr_data msr; |
4896 | int r; | ||
4897 | |||
4898 | msr.index = msr_index; | ||
4899 | msr.host_initiated = false; | ||
4900 | r = kvm_get_msr(emul_to_vcpu(ctxt), &msr); | ||
4901 | if (r) | ||
4902 | return r; | ||
4903 | |||
4904 | *pdata = msr.data; | ||
4905 | return 0; | ||
4949 | } | 4906 | } |
4950 | 4907 | ||
4951 | static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, | 4908 | static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, |
@@ -4959,16 +4916,30 @@ static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, | |||
4959 | return kvm_set_msr(emul_to_vcpu(ctxt), &msr); | 4916 | return kvm_set_msr(emul_to_vcpu(ctxt), &msr); |
4960 | } | 4917 | } |
4961 | 4918 | ||
4919 | static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt) | ||
4920 | { | ||
4921 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
4922 | |||
4923 | return vcpu->arch.smbase; | ||
4924 | } | ||
4925 | |||
4926 | static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase) | ||
4927 | { | ||
4928 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
4929 | |||
4930 | vcpu->arch.smbase = smbase; | ||
4931 | } | ||
4932 | |||
4962 | static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt, | 4933 | static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt, |
4963 | u32 pmc) | 4934 | u32 pmc) |
4964 | { | 4935 | { |
4965 | return kvm_pmu_check_pmc(emul_to_vcpu(ctxt), pmc); | 4936 | return kvm_pmu_is_valid_msr_idx(emul_to_vcpu(ctxt), pmc); |
4966 | } | 4937 | } |
4967 | 4938 | ||
4968 | static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt, | 4939 | static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt, |
4969 | u32 pmc, u64 *pdata) | 4940 | u32 pmc, u64 *pdata) |
4970 | { | 4941 | { |
4971 | return kvm_pmu_read_pmc(emul_to_vcpu(ctxt), pmc, pdata); | 4942 | return kvm_pmu_rdpmc(emul_to_vcpu(ctxt), pmc, pdata); |
4972 | } | 4943 | } |
4973 | 4944 | ||
4974 | static void emulator_halt(struct x86_emulate_ctxt *ctxt) | 4945 | static void emulator_halt(struct x86_emulate_ctxt *ctxt) |
@@ -5044,6 +5015,8 @@ static const struct x86_emulate_ops emulate_ops = { | |||
5044 | .cpl = emulator_get_cpl, | 5015 | .cpl = emulator_get_cpl, |
5045 | .get_dr = emulator_get_dr, | 5016 | .get_dr = emulator_get_dr, |
5046 | .set_dr = emulator_set_dr, | 5017 | .set_dr = emulator_set_dr, |
5018 | .get_smbase = emulator_get_smbase, | ||
5019 | .set_smbase = emulator_set_smbase, | ||
5047 | .set_msr = emulator_set_msr, | 5020 | .set_msr = emulator_set_msr, |
5048 | .get_msr = emulator_get_msr, | 5021 | .get_msr = emulator_get_msr, |
5049 | .check_pmc = emulator_check_pmc, | 5022 | .check_pmc = emulator_check_pmc, |
@@ -5105,7 +5078,10 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) | |||
5105 | (cs_l && is_long_mode(vcpu)) ? X86EMUL_MODE_PROT64 : | 5078 | (cs_l && is_long_mode(vcpu)) ? X86EMUL_MODE_PROT64 : |
5106 | cs_db ? X86EMUL_MODE_PROT32 : | 5079 | cs_db ? X86EMUL_MODE_PROT32 : |
5107 | X86EMUL_MODE_PROT16; | 5080 | X86EMUL_MODE_PROT16; |
5108 | ctxt->guest_mode = is_guest_mode(vcpu); | 5081 | BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK); |
5082 | BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK); | ||
5083 | BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK); | ||
5084 | ctxt->emul_flags = vcpu->arch.hflags; | ||
5109 | 5085 | ||
5110 | init_decode_cache(ctxt); | 5086 | init_decode_cache(ctxt); |
5111 | vcpu->arch.emulate_regs_need_sync_from_vcpu = false; | 5087 | vcpu->arch.emulate_regs_need_sync_from_vcpu = false; |
@@ -5274,6 +5250,34 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, | |||
5274 | static int complete_emulated_mmio(struct kvm_vcpu *vcpu); | 5250 | static int complete_emulated_mmio(struct kvm_vcpu *vcpu); |
5275 | static int complete_emulated_pio(struct kvm_vcpu *vcpu); | 5251 | static int complete_emulated_pio(struct kvm_vcpu *vcpu); |
5276 | 5252 | ||
5253 | static void kvm_smm_changed(struct kvm_vcpu *vcpu) | ||
5254 | { | ||
5255 | if (!(vcpu->arch.hflags & HF_SMM_MASK)) { | ||
5256 | /* This is a good place to trace that we are exiting SMM. */ | ||
5257 | trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, false); | ||
5258 | |||
5259 | if (unlikely(vcpu->arch.smi_pending)) { | ||
5260 | kvm_make_request(KVM_REQ_SMI, vcpu); | ||
5261 | vcpu->arch.smi_pending = 0; | ||
5262 | } else { | ||
5263 | /* Process a latched INIT, if any. */ | ||
5264 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
5265 | } | ||
5266 | } | ||
5267 | |||
5268 | kvm_mmu_reset_context(vcpu); | ||
5269 | } | ||
5270 | |||
5271 | static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags) | ||
5272 | { | ||
5273 | unsigned changed = vcpu->arch.hflags ^ emul_flags; | ||
5274 | |||
5275 | vcpu->arch.hflags = emul_flags; | ||
5276 | |||
5277 | if (changed & HF_SMM_MASK) | ||
5278 | kvm_smm_changed(vcpu); | ||
5279 | } | ||
5280 | |||
5277 | static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7, | 5281 | static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7, |
5278 | unsigned long *db) | 5282 | unsigned long *db) |
5279 | { | 5283 | { |
@@ -5473,6 +5477,8 @@ restart: | |||
5473 | unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); | 5477 | unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); |
5474 | toggle_interruptibility(vcpu, ctxt->interruptibility); | 5478 | toggle_interruptibility(vcpu, ctxt->interruptibility); |
5475 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; | 5479 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; |
5480 | if (vcpu->arch.hflags != ctxt->emul_flags) | ||
5481 | kvm_set_hflags(vcpu, ctxt->emul_flags); | ||
5476 | kvm_rip_write(vcpu, ctxt->eip); | 5482 | kvm_rip_write(vcpu, ctxt->eip); |
5477 | if (r == EMULATE_DONE) | 5483 | if (r == EMULATE_DONE) |
5478 | kvm_vcpu_check_singlestep(vcpu, rflags, &r); | 5484 | kvm_vcpu_check_singlestep(vcpu, rflags, &r); |
@@ -5951,6 +5957,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) | |||
5951 | lapic_irq.shorthand = 0; | 5957 | lapic_irq.shorthand = 0; |
5952 | lapic_irq.dest_mode = 0; | 5958 | lapic_irq.dest_mode = 0; |
5953 | lapic_irq.dest_id = apicid; | 5959 | lapic_irq.dest_id = apicid; |
5960 | lapic_irq.msi_redir_hint = false; | ||
5954 | 5961 | ||
5955 | lapic_irq.delivery_mode = APIC_DM_REMRD; | 5962 | lapic_irq.delivery_mode = APIC_DM_REMRD; |
5956 | kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); | 5963 | kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); |
@@ -6038,6 +6045,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) | |||
6038 | struct kvm_run *kvm_run = vcpu->run; | 6045 | struct kvm_run *kvm_run = vcpu->run; |
6039 | 6046 | ||
6040 | kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0; | 6047 | kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0; |
6048 | kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0; | ||
6041 | kvm_run->cr8 = kvm_get_cr8(vcpu); | 6049 | kvm_run->cr8 = kvm_get_cr8(vcpu); |
6042 | kvm_run->apic_base = kvm_get_apic_base(vcpu); | 6050 | kvm_run->apic_base = kvm_get_apic_base(vcpu); |
6043 | if (irqchip_in_kernel(vcpu->kvm)) | 6051 | if (irqchip_in_kernel(vcpu->kvm)) |
@@ -6161,6 +6169,233 @@ static void process_nmi(struct kvm_vcpu *vcpu) | |||
6161 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 6169 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
6162 | } | 6170 | } |
6163 | 6171 | ||
6172 | #define put_smstate(type, buf, offset, val) \ | ||
6173 | *(type *)((buf) + (offset) - 0x7e00) = val | ||
6174 | |||
6175 | static u32 process_smi_get_segment_flags(struct kvm_segment *seg) | ||
6176 | { | ||
6177 | u32 flags = 0; | ||
6178 | flags |= seg->g << 23; | ||
6179 | flags |= seg->db << 22; | ||
6180 | flags |= seg->l << 21; | ||
6181 | flags |= seg->avl << 20; | ||
6182 | flags |= seg->present << 15; | ||
6183 | flags |= seg->dpl << 13; | ||
6184 | flags |= seg->s << 12; | ||
6185 | flags |= seg->type << 8; | ||
6186 | return flags; | ||
6187 | } | ||
6188 | |||
6189 | static void process_smi_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n) | ||
6190 | { | ||
6191 | struct kvm_segment seg; | ||
6192 | int offset; | ||
6193 | |||
6194 | kvm_get_segment(vcpu, &seg, n); | ||
6195 | put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector); | ||
6196 | |||
6197 | if (n < 3) | ||
6198 | offset = 0x7f84 + n * 12; | ||
6199 | else | ||
6200 | offset = 0x7f2c + (n - 3) * 12; | ||
6201 | |||
6202 | put_smstate(u32, buf, offset + 8, seg.base); | ||
6203 | put_smstate(u32, buf, offset + 4, seg.limit); | ||
6204 | put_smstate(u32, buf, offset, process_smi_get_segment_flags(&seg)); | ||
6205 | } | ||
6206 | |||
6207 | static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n) | ||
6208 | { | ||
6209 | struct kvm_segment seg; | ||
6210 | int offset; | ||
6211 | u16 flags; | ||
6212 | |||
6213 | kvm_get_segment(vcpu, &seg, n); | ||
6214 | offset = 0x7e00 + n * 16; | ||
6215 | |||
6216 | flags = process_smi_get_segment_flags(&seg) >> 8; | ||
6217 | put_smstate(u16, buf, offset, seg.selector); | ||
6218 | put_smstate(u16, buf, offset + 2, flags); | ||
6219 | put_smstate(u32, buf, offset + 4, seg.limit); | ||
6220 | put_smstate(u64, buf, offset + 8, seg.base); | ||
6221 | } | ||
6222 | |||
6223 | static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf) | ||
6224 | { | ||
6225 | struct desc_ptr dt; | ||
6226 | struct kvm_segment seg; | ||
6227 | unsigned long val; | ||
6228 | int i; | ||
6229 | |||
6230 | put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu)); | ||
6231 | put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu)); | ||
6232 | put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu)); | ||
6233 | put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu)); | ||
6234 | |||
6235 | for (i = 0; i < 8; i++) | ||
6236 | put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read(vcpu, i)); | ||
6237 | |||
6238 | kvm_get_dr(vcpu, 6, &val); | ||
6239 | put_smstate(u32, buf, 0x7fcc, (u32)val); | ||
6240 | kvm_get_dr(vcpu, 7, &val); | ||
6241 | put_smstate(u32, buf, 0x7fc8, (u32)val); | ||
6242 | |||
6243 | kvm_get_segment(vcpu, &seg, VCPU_SREG_TR); | ||
6244 | put_smstate(u32, buf, 0x7fc4, seg.selector); | ||
6245 | put_smstate(u32, buf, 0x7f64, seg.base); | ||
6246 | put_smstate(u32, buf, 0x7f60, seg.limit); | ||
6247 | put_smstate(u32, buf, 0x7f5c, process_smi_get_segment_flags(&seg)); | ||
6248 | |||
6249 | kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR); | ||
6250 | put_smstate(u32, buf, 0x7fc0, seg.selector); | ||
6251 | put_smstate(u32, buf, 0x7f80, seg.base); | ||
6252 | put_smstate(u32, buf, 0x7f7c, seg.limit); | ||
6253 | put_smstate(u32, buf, 0x7f78, process_smi_get_segment_flags(&seg)); | ||
6254 | |||
6255 | kvm_x86_ops->get_gdt(vcpu, &dt); | ||
6256 | put_smstate(u32, buf, 0x7f74, dt.address); | ||
6257 | put_smstate(u32, buf, 0x7f70, dt.size); | ||
6258 | |||
6259 | kvm_x86_ops->get_idt(vcpu, &dt); | ||
6260 | put_smstate(u32, buf, 0x7f58, dt.address); | ||
6261 | put_smstate(u32, buf, 0x7f54, dt.size); | ||
6262 | |||
6263 | for (i = 0; i < 6; i++) | ||
6264 | process_smi_save_seg_32(vcpu, buf, i); | ||
6265 | |||
6266 | put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu)); | ||
6267 | |||
6268 | /* revision id */ | ||
6269 | put_smstate(u32, buf, 0x7efc, 0x00020000); | ||
6270 | put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase); | ||
6271 | } | ||
6272 | |||
6273 | static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf) | ||
6274 | { | ||
6275 | #ifdef CONFIG_X86_64 | ||
6276 | struct desc_ptr dt; | ||
6277 | struct kvm_segment seg; | ||
6278 | unsigned long val; | ||
6279 | int i; | ||
6280 | |||
6281 | for (i = 0; i < 16; i++) | ||
6282 | put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read(vcpu, i)); | ||
6283 | |||
6284 | put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu)); | ||
6285 | put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu)); | ||
6286 | |||
6287 | kvm_get_dr(vcpu, 6, &val); | ||
6288 | put_smstate(u64, buf, 0x7f68, val); | ||
6289 | kvm_get_dr(vcpu, 7, &val); | ||
6290 | put_smstate(u64, buf, 0x7f60, val); | ||
6291 | |||
6292 | put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu)); | ||
6293 | put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu)); | ||
6294 | put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu)); | ||
6295 | |||
6296 | put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase); | ||
6297 | |||
6298 | /* revision id */ | ||
6299 | put_smstate(u32, buf, 0x7efc, 0x00020064); | ||
6300 | |||
6301 | put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer); | ||
6302 | |||
6303 | kvm_get_segment(vcpu, &seg, VCPU_SREG_TR); | ||
6304 | put_smstate(u16, buf, 0x7e90, seg.selector); | ||
6305 | put_smstate(u16, buf, 0x7e92, process_smi_get_segment_flags(&seg) >> 8); | ||
6306 | put_smstate(u32, buf, 0x7e94, seg.limit); | ||
6307 | put_smstate(u64, buf, 0x7e98, seg.base); | ||
6308 | |||
6309 | kvm_x86_ops->get_idt(vcpu, &dt); | ||
6310 | put_smstate(u32, buf, 0x7e84, dt.size); | ||
6311 | put_smstate(u64, buf, 0x7e88, dt.address); | ||
6312 | |||
6313 | kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR); | ||
6314 | put_smstate(u16, buf, 0x7e70, seg.selector); | ||
6315 | put_smstate(u16, buf, 0x7e72, process_smi_get_segment_flags(&seg) >> 8); | ||
6316 | put_smstate(u32, buf, 0x7e74, seg.limit); | ||
6317 | put_smstate(u64, buf, 0x7e78, seg.base); | ||
6318 | |||
6319 | kvm_x86_ops->get_gdt(vcpu, &dt); | ||
6320 | put_smstate(u32, buf, 0x7e64, dt.size); | ||
6321 | put_smstate(u64, buf, 0x7e68, dt.address); | ||
6322 | |||
6323 | for (i = 0; i < 6; i++) | ||
6324 | process_smi_save_seg_64(vcpu, buf, i); | ||
6325 | #else | ||
6326 | WARN_ON_ONCE(1); | ||
6327 | #endif | ||
6328 | } | ||
6329 | |||
6330 | static void process_smi(struct kvm_vcpu *vcpu) | ||
6331 | { | ||
6332 | struct kvm_segment cs, ds; | ||
6333 | char buf[512]; | ||
6334 | u32 cr0; | ||
6335 | |||
6336 | if (is_smm(vcpu)) { | ||
6337 | vcpu->arch.smi_pending = true; | ||
6338 | return; | ||
6339 | } | ||
6340 | |||
6341 | trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true); | ||
6342 | vcpu->arch.hflags |= HF_SMM_MASK; | ||
6343 | memset(buf, 0, 512); | ||
6344 | if (guest_cpuid_has_longmode(vcpu)) | ||
6345 | process_smi_save_state_64(vcpu, buf); | ||
6346 | else | ||
6347 | process_smi_save_state_32(vcpu, buf); | ||
6348 | |||
6349 | kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf)); | ||
6350 | |||
6351 | if (kvm_x86_ops->get_nmi_mask(vcpu)) | ||
6352 | vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; | ||
6353 | else | ||
6354 | kvm_x86_ops->set_nmi_mask(vcpu, true); | ||
6355 | |||
6356 | kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); | ||
6357 | kvm_rip_write(vcpu, 0x8000); | ||
6358 | |||
6359 | cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG); | ||
6360 | kvm_x86_ops->set_cr0(vcpu, cr0); | ||
6361 | vcpu->arch.cr0 = cr0; | ||
6362 | |||
6363 | kvm_x86_ops->set_cr4(vcpu, 0); | ||
6364 | |||
6365 | __kvm_set_dr(vcpu, 7, DR7_FIXED_1); | ||
6366 | |||
6367 | cs.selector = (vcpu->arch.smbase >> 4) & 0xffff; | ||
6368 | cs.base = vcpu->arch.smbase; | ||
6369 | |||
6370 | ds.selector = 0; | ||
6371 | ds.base = 0; | ||
6372 | |||
6373 | cs.limit = ds.limit = 0xffffffff; | ||
6374 | cs.type = ds.type = 0x3; | ||
6375 | cs.dpl = ds.dpl = 0; | ||
6376 | cs.db = ds.db = 0; | ||
6377 | cs.s = ds.s = 1; | ||
6378 | cs.l = ds.l = 0; | ||
6379 | cs.g = ds.g = 1; | ||
6380 | cs.avl = ds.avl = 0; | ||
6381 | cs.present = ds.present = 1; | ||
6382 | cs.unusable = ds.unusable = 0; | ||
6383 | cs.padding = ds.padding = 0; | ||
6384 | |||
6385 | kvm_set_segment(vcpu, &cs, VCPU_SREG_CS); | ||
6386 | kvm_set_segment(vcpu, &ds, VCPU_SREG_DS); | ||
6387 | kvm_set_segment(vcpu, &ds, VCPU_SREG_ES); | ||
6388 | kvm_set_segment(vcpu, &ds, VCPU_SREG_FS); | ||
6389 | kvm_set_segment(vcpu, &ds, VCPU_SREG_GS); | ||
6390 | kvm_set_segment(vcpu, &ds, VCPU_SREG_SS); | ||
6391 | |||
6392 | if (guest_cpuid_has_longmode(vcpu)) | ||
6393 | kvm_x86_ops->set_efer(vcpu, 0); | ||
6394 | |||
6395 | kvm_update_cpuid(vcpu); | ||
6396 | kvm_mmu_reset_context(vcpu); | ||
6397 | } | ||
6398 | |||
6164 | static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) | 6399 | static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) |
6165 | { | 6400 | { |
6166 | u64 eoi_exit_bitmap[4]; | 6401 | u64 eoi_exit_bitmap[4]; |
@@ -6269,12 +6504,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
6269 | } | 6504 | } |
6270 | if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) | 6505 | if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) |
6271 | record_steal_time(vcpu); | 6506 | record_steal_time(vcpu); |
6507 | if (kvm_check_request(KVM_REQ_SMI, vcpu)) | ||
6508 | process_smi(vcpu); | ||
6272 | if (kvm_check_request(KVM_REQ_NMI, vcpu)) | 6509 | if (kvm_check_request(KVM_REQ_NMI, vcpu)) |
6273 | process_nmi(vcpu); | 6510 | process_nmi(vcpu); |
6274 | if (kvm_check_request(KVM_REQ_PMU, vcpu)) | 6511 | if (kvm_check_request(KVM_REQ_PMU, vcpu)) |
6275 | kvm_handle_pmu_event(vcpu); | 6512 | kvm_pmu_handle_event(vcpu); |
6276 | if (kvm_check_request(KVM_REQ_PMI, vcpu)) | 6513 | if (kvm_check_request(KVM_REQ_PMI, vcpu)) |
6277 | kvm_deliver_pmi(vcpu); | 6514 | kvm_pmu_deliver_pmi(vcpu); |
6278 | if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) | 6515 | if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) |
6279 | vcpu_scan_ioapic(vcpu); | 6516 | vcpu_scan_ioapic(vcpu); |
6280 | if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) | 6517 | if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) |
@@ -6346,7 +6583,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
6346 | if (req_immediate_exit) | 6583 | if (req_immediate_exit) |
6347 | smp_send_reschedule(vcpu->cpu); | 6584 | smp_send_reschedule(vcpu->cpu); |
6348 | 6585 | ||
6349 | kvm_guest_enter(); | 6586 | __kvm_guest_enter(); |
6350 | 6587 | ||
6351 | if (unlikely(vcpu->arch.switch_db_regs)) { | 6588 | if (unlikely(vcpu->arch.switch_db_regs)) { |
6352 | set_debugreg(0, 7); | 6589 | set_debugreg(0, 7); |
@@ -7038,16 +7275,25 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) | |||
7038 | { | 7275 | { |
7039 | kvm_put_guest_xcr0(vcpu); | 7276 | kvm_put_guest_xcr0(vcpu); |
7040 | 7277 | ||
7041 | if (!vcpu->guest_fpu_loaded) | 7278 | if (!vcpu->guest_fpu_loaded) { |
7279 | vcpu->fpu_counter = 0; | ||
7042 | return; | 7280 | return; |
7281 | } | ||
7043 | 7282 | ||
7044 | vcpu->guest_fpu_loaded = 0; | 7283 | vcpu->guest_fpu_loaded = 0; |
7045 | copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu); | 7284 | copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu); |
7046 | __kernel_fpu_end(); | 7285 | __kernel_fpu_end(); |
7047 | ++vcpu->stat.fpu_reload; | 7286 | ++vcpu->stat.fpu_reload; |
7048 | if (!vcpu->arch.eager_fpu) | 7287 | /* |
7049 | kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); | 7288 | * If using eager FPU mode, or if the guest is a frequent user |
7050 | 7289 | * of the FPU, just leave the FPU active for next time. | |
7290 | * Every 255 times fpu_counter rolls over to 0; a guest that uses | ||
7291 | * the FPU in bursts will revert to loading it on demand. | ||
7292 | */ | ||
7293 | if (!vcpu->arch.eager_fpu) { | ||
7294 | if (++vcpu->fpu_counter < 5) | ||
7295 | kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); | ||
7296 | } | ||
7051 | trace_kvm_fpu(0); | 7297 | trace_kvm_fpu(0); |
7052 | } | 7298 | } |
7053 | 7299 | ||
@@ -7083,14 +7329,13 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
7083 | { | 7329 | { |
7084 | int r; | 7330 | int r; |
7085 | 7331 | ||
7086 | vcpu->arch.mtrr_state.have_fixed = 1; | 7332 | kvm_vcpu_mtrr_init(vcpu); |
7087 | r = vcpu_load(vcpu); | 7333 | r = vcpu_load(vcpu); |
7088 | if (r) | 7334 | if (r) |
7089 | return r; | 7335 | return r; |
7090 | kvm_vcpu_reset(vcpu); | 7336 | kvm_vcpu_reset(vcpu, false); |
7091 | kvm_mmu_setup(vcpu); | 7337 | kvm_mmu_setup(vcpu); |
7092 | vcpu_put(vcpu); | 7338 | vcpu_put(vcpu); |
7093 | |||
7094 | return r; | 7339 | return r; |
7095 | } | 7340 | } |
7096 | 7341 | ||
@@ -7107,6 +7352,9 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) | |||
7107 | kvm_write_tsc(vcpu, &msr); | 7352 | kvm_write_tsc(vcpu, &msr); |
7108 | vcpu_put(vcpu); | 7353 | vcpu_put(vcpu); |
7109 | 7354 | ||
7355 | if (!kvmclock_periodic_sync) | ||
7356 | return; | ||
7357 | |||
7110 | schedule_delayed_work(&kvm->arch.kvmclock_sync_work, | 7358 | schedule_delayed_work(&kvm->arch.kvmclock_sync_work, |
7111 | KVMCLOCK_SYNC_PERIOD); | 7359 | KVMCLOCK_SYNC_PERIOD); |
7112 | } | 7360 | } |
@@ -7124,8 +7372,10 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | |||
7124 | kvm_x86_ops->vcpu_free(vcpu); | 7372 | kvm_x86_ops->vcpu_free(vcpu); |
7125 | } | 7373 | } |
7126 | 7374 | ||
7127 | void kvm_vcpu_reset(struct kvm_vcpu *vcpu) | 7375 | void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) |
7128 | { | 7376 | { |
7377 | vcpu->arch.hflags = 0; | ||
7378 | |||
7129 | atomic_set(&vcpu->arch.nmi_queued, 0); | 7379 | atomic_set(&vcpu->arch.nmi_queued, 0); |
7130 | vcpu->arch.nmi_pending = 0; | 7380 | vcpu->arch.nmi_pending = 0; |
7131 | vcpu->arch.nmi_injected = false; | 7381 | vcpu->arch.nmi_injected = false; |
@@ -7151,13 +7401,16 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu) | |||
7151 | kvm_async_pf_hash_reset(vcpu); | 7401 | kvm_async_pf_hash_reset(vcpu); |
7152 | vcpu->arch.apf.halted = false; | 7402 | vcpu->arch.apf.halted = false; |
7153 | 7403 | ||
7154 | kvm_pmu_reset(vcpu); | 7404 | if (!init_event) { |
7405 | kvm_pmu_reset(vcpu); | ||
7406 | vcpu->arch.smbase = 0x30000; | ||
7407 | } | ||
7155 | 7408 | ||
7156 | memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs)); | 7409 | memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs)); |
7157 | vcpu->arch.regs_avail = ~0; | 7410 | vcpu->arch.regs_avail = ~0; |
7158 | vcpu->arch.regs_dirty = ~0; | 7411 | vcpu->arch.regs_dirty = ~0; |
7159 | 7412 | ||
7160 | kvm_x86_ops->vcpu_reset(vcpu); | 7413 | kvm_x86_ops->vcpu_reset(vcpu, init_event); |
7161 | } | 7414 | } |
7162 | 7415 | ||
7163 | void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) | 7416 | void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) |
@@ -7356,6 +7609,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
7356 | 7609 | ||
7357 | vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); | 7610 | vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); |
7358 | 7611 | ||
7612 | vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT; | ||
7613 | |||
7359 | kvm_async_pf_hash_reset(vcpu); | 7614 | kvm_async_pf_hash_reset(vcpu); |
7360 | kvm_pmu_init(vcpu); | 7615 | kvm_pmu_init(vcpu); |
7361 | 7616 | ||
@@ -7462,6 +7717,40 @@ void kvm_arch_sync_events(struct kvm *kvm) | |||
7462 | kvm_free_pit(kvm); | 7717 | kvm_free_pit(kvm); |
7463 | } | 7718 | } |
7464 | 7719 | ||
7720 | int __x86_set_memory_region(struct kvm *kvm, | ||
7721 | const struct kvm_userspace_memory_region *mem) | ||
7722 | { | ||
7723 | int i, r; | ||
7724 | |||
7725 | /* Called with kvm->slots_lock held. */ | ||
7726 | BUG_ON(mem->slot >= KVM_MEM_SLOTS_NUM); | ||
7727 | |||
7728 | for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { | ||
7729 | struct kvm_userspace_memory_region m = *mem; | ||
7730 | |||
7731 | m.slot |= i << 16; | ||
7732 | r = __kvm_set_memory_region(kvm, &m); | ||
7733 | if (r < 0) | ||
7734 | return r; | ||
7735 | } | ||
7736 | |||
7737 | return 0; | ||
7738 | } | ||
7739 | EXPORT_SYMBOL_GPL(__x86_set_memory_region); | ||
7740 | |||
7741 | int x86_set_memory_region(struct kvm *kvm, | ||
7742 | const struct kvm_userspace_memory_region *mem) | ||
7743 | { | ||
7744 | int r; | ||
7745 | |||
7746 | mutex_lock(&kvm->slots_lock); | ||
7747 | r = __x86_set_memory_region(kvm, mem); | ||
7748 | mutex_unlock(&kvm->slots_lock); | ||
7749 | |||
7750 | return r; | ||
7751 | } | ||
7752 | EXPORT_SYMBOL_GPL(x86_set_memory_region); | ||
7753 | |||
7465 | void kvm_arch_destroy_vm(struct kvm *kvm) | 7754 | void kvm_arch_destroy_vm(struct kvm *kvm) |
7466 | { | 7755 | { |
7467 | if (current->mm == kvm->mm) { | 7756 | if (current->mm == kvm->mm) { |
@@ -7473,13 +7762,13 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
7473 | struct kvm_userspace_memory_region mem; | 7762 | struct kvm_userspace_memory_region mem; |
7474 | memset(&mem, 0, sizeof(mem)); | 7763 | memset(&mem, 0, sizeof(mem)); |
7475 | mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; | 7764 | mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; |
7476 | kvm_set_memory_region(kvm, &mem); | 7765 | x86_set_memory_region(kvm, &mem); |
7477 | 7766 | ||
7478 | mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; | 7767 | mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; |
7479 | kvm_set_memory_region(kvm, &mem); | 7768 | x86_set_memory_region(kvm, &mem); |
7480 | 7769 | ||
7481 | mem.slot = TSS_PRIVATE_MEMSLOT; | 7770 | mem.slot = TSS_PRIVATE_MEMSLOT; |
7482 | kvm_set_memory_region(kvm, &mem); | 7771 | x86_set_memory_region(kvm, &mem); |
7483 | } | 7772 | } |
7484 | kvm_iommu_unmap_guest(kvm); | 7773 | kvm_iommu_unmap_guest(kvm); |
7485 | kfree(kvm->arch.vpic); | 7774 | kfree(kvm->arch.vpic); |
@@ -7568,18 +7857,18 @@ out_free: | |||
7568 | return -ENOMEM; | 7857 | return -ENOMEM; |
7569 | } | 7858 | } |
7570 | 7859 | ||
7571 | void kvm_arch_memslots_updated(struct kvm *kvm) | 7860 | void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) |
7572 | { | 7861 | { |
7573 | /* | 7862 | /* |
7574 | * memslots->generation has been incremented. | 7863 | * memslots->generation has been incremented. |
7575 | * mmio generation may have reached its maximum value. | 7864 | * mmio generation may have reached its maximum value. |
7576 | */ | 7865 | */ |
7577 | kvm_mmu_invalidate_mmio_sptes(kvm); | 7866 | kvm_mmu_invalidate_mmio_sptes(kvm, slots); |
7578 | } | 7867 | } |
7579 | 7868 | ||
7580 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 7869 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
7581 | struct kvm_memory_slot *memslot, | 7870 | struct kvm_memory_slot *memslot, |
7582 | struct kvm_userspace_memory_region *mem, | 7871 | const struct kvm_userspace_memory_region *mem, |
7583 | enum kvm_mr_change change) | 7872 | enum kvm_mr_change change) |
7584 | { | 7873 | { |
7585 | /* | 7874 | /* |
@@ -7657,14 +7946,14 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, | |||
7657 | } | 7946 | } |
7658 | 7947 | ||
7659 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 7948 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
7660 | struct kvm_userspace_memory_region *mem, | 7949 | const struct kvm_userspace_memory_region *mem, |
7661 | const struct kvm_memory_slot *old, | 7950 | const struct kvm_memory_slot *old, |
7951 | const struct kvm_memory_slot *new, | ||
7662 | enum kvm_mr_change change) | 7952 | enum kvm_mr_change change) |
7663 | { | 7953 | { |
7664 | struct kvm_memory_slot *new; | ||
7665 | int nr_mmu_pages = 0; | 7954 | int nr_mmu_pages = 0; |
7666 | 7955 | ||
7667 | if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) { | 7956 | if (change == KVM_MR_DELETE && old->id >= KVM_USER_MEM_SLOTS) { |
7668 | int ret; | 7957 | int ret; |
7669 | 7958 | ||
7670 | ret = vm_munmap(old->userspace_addr, | 7959 | ret = vm_munmap(old->userspace_addr, |
@@ -7681,9 +7970,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
7681 | if (nr_mmu_pages) | 7970 | if (nr_mmu_pages) |
7682 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); | 7971 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); |
7683 | 7972 | ||
7684 | /* It's OK to get 'new' slot here as it has already been installed */ | ||
7685 | new = id_to_memslot(kvm->memslots, mem->slot); | ||
7686 | |||
7687 | /* | 7973 | /* |
7688 | * Dirty logging tracks sptes in 4k granularity, meaning that large | 7974 | * Dirty logging tracks sptes in 4k granularity, meaning that large |
7689 | * sptes have to be split. If live migration is successful, the guest | 7975 | * sptes have to be split. If live migration is successful, the guest |
@@ -7708,9 +7994,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
7708 | * been zapped so no dirty logging staff is needed for old slot. For | 7994 | * been zapped so no dirty logging staff is needed for old slot. For |
7709 | * KVM_MR_FLAGS_ONLY, the old slot is essentially the same one as the | 7995 | * KVM_MR_FLAGS_ONLY, the old slot is essentially the same one as the |
7710 | * new and it's also covered when dealing with the new slot. | 7996 | * new and it's also covered when dealing with the new slot. |
7997 | * | ||
7998 | * FIXME: const-ify all uses of struct kvm_memory_slot. | ||
7711 | */ | 7999 | */ |
7712 | if (change != KVM_MR_DELETE) | 8000 | if (change != KVM_MR_DELETE) |
7713 | kvm_mmu_slot_apply_flags(kvm, new); | 8001 | kvm_mmu_slot_apply_flags(kvm, (struct kvm_memory_slot *) new); |
7714 | } | 8002 | } |
7715 | 8003 | ||
7716 | void kvm_arch_flush_shadow_all(struct kvm *kvm) | 8004 | void kvm_arch_flush_shadow_all(struct kvm *kvm) |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index f5fef1868096..edc8cdcd786b 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -4,6 +4,8 @@ | |||
4 | #include <linux/kvm_host.h> | 4 | #include <linux/kvm_host.h> |
5 | #include "kvm_cache_regs.h" | 5 | #include "kvm_cache_regs.h" |
6 | 6 | ||
7 | #define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL | ||
8 | |||
7 | static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) | 9 | static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) |
8 | { | 10 | { |
9 | vcpu->arch.exception.pending = false; | 11 | vcpu->arch.exception.pending = false; |
@@ -160,7 +162,13 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, | |||
160 | gva_t addr, void *val, unsigned int bytes, | 162 | gva_t addr, void *val, unsigned int bytes, |
161 | struct x86_exception *exception); | 163 | struct x86_exception *exception); |
162 | 164 | ||
165 | void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu); | ||
166 | u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); | ||
163 | bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data); | 167 | bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data); |
168 | int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data); | ||
169 | int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); | ||
170 | bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, | ||
171 | int page_num); | ||
164 | 172 | ||
165 | #define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ | 173 | #define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ |
166 | | XSTATE_BNDREGS | XSTATE_BNDCSR \ | 174 | | XSTATE_BNDREGS | XSTATE_BNDCSR \ |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ad45054309a0..9564fd78c547 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -44,6 +44,10 @@ | |||
44 | /* Two fragments for cross MMIO pages. */ | 44 | /* Two fragments for cross MMIO pages. */ |
45 | #define KVM_MAX_MMIO_FRAGMENTS 2 | 45 | #define KVM_MAX_MMIO_FRAGMENTS 2 |
46 | 46 | ||
47 | #ifndef KVM_ADDRESS_SPACE_NUM | ||
48 | #define KVM_ADDRESS_SPACE_NUM 1 | ||
49 | #endif | ||
50 | |||
47 | /* | 51 | /* |
48 | * For the normal pfn, the highest 12 bits should be zero, | 52 | * For the normal pfn, the highest 12 bits should be zero, |
49 | * so we can mask bit 62 ~ bit 52 to indicate the error pfn, | 53 | * so we can mask bit 62 ~ bit 52 to indicate the error pfn, |
@@ -134,6 +138,7 @@ static inline bool is_error_page(struct page *page) | |||
134 | #define KVM_REQ_ENABLE_IBS 23 | 138 | #define KVM_REQ_ENABLE_IBS 23 |
135 | #define KVM_REQ_DISABLE_IBS 24 | 139 | #define KVM_REQ_DISABLE_IBS 24 |
136 | #define KVM_REQ_APIC_PAGE_RELOAD 25 | 140 | #define KVM_REQ_APIC_PAGE_RELOAD 25 |
141 | #define KVM_REQ_SMI 26 | ||
137 | 142 | ||
138 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 | 143 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 |
139 | #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 | 144 | #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 |
@@ -230,6 +235,7 @@ struct kvm_vcpu { | |||
230 | 235 | ||
231 | int fpu_active; | 236 | int fpu_active; |
232 | int guest_fpu_loaded, guest_xcr0_loaded; | 237 | int guest_fpu_loaded, guest_xcr0_loaded; |
238 | unsigned char fpu_counter; | ||
233 | wait_queue_head_t wq; | 239 | wait_queue_head_t wq; |
234 | struct pid *pid; | 240 | struct pid *pid; |
235 | int sigset_active; | 241 | int sigset_active; |
@@ -329,6 +335,13 @@ struct kvm_kernel_irq_routing_entry { | |||
329 | #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) | 335 | #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) |
330 | #endif | 336 | #endif |
331 | 337 | ||
338 | #ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE | ||
339 | static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) | ||
340 | { | ||
341 | return 0; | ||
342 | } | ||
343 | #endif | ||
344 | |||
332 | /* | 345 | /* |
333 | * Note: | 346 | * Note: |
334 | * memslots are not sorted by id anymore, please use id_to_memslot() | 347 | * memslots are not sorted by id anymore, please use id_to_memslot() |
@@ -347,7 +360,7 @@ struct kvm { | |||
347 | spinlock_t mmu_lock; | 360 | spinlock_t mmu_lock; |
348 | struct mutex slots_lock; | 361 | struct mutex slots_lock; |
349 | struct mm_struct *mm; /* userspace tied to this vm */ | 362 | struct mm_struct *mm; /* userspace tied to this vm */ |
350 | struct kvm_memslots *memslots; | 363 | struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM]; |
351 | struct srcu_struct srcu; | 364 | struct srcu_struct srcu; |
352 | struct srcu_struct irq_srcu; | 365 | struct srcu_struct irq_srcu; |
353 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE | 366 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE |
@@ -462,13 +475,25 @@ void kvm_exit(void); | |||
462 | void kvm_get_kvm(struct kvm *kvm); | 475 | void kvm_get_kvm(struct kvm *kvm); |
463 | void kvm_put_kvm(struct kvm *kvm); | 476 | void kvm_put_kvm(struct kvm *kvm); |
464 | 477 | ||
465 | static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) | 478 | static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id) |
466 | { | 479 | { |
467 | return rcu_dereference_check(kvm->memslots, | 480 | return rcu_dereference_check(kvm->memslots[as_id], |
468 | srcu_read_lock_held(&kvm->srcu) | 481 | srcu_read_lock_held(&kvm->srcu) |
469 | || lockdep_is_held(&kvm->slots_lock)); | 482 | || lockdep_is_held(&kvm->slots_lock)); |
470 | } | 483 | } |
471 | 484 | ||
485 | static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) | ||
486 | { | ||
487 | return __kvm_memslots(kvm, 0); | ||
488 | } | ||
489 | |||
490 | static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu) | ||
491 | { | ||
492 | int as_id = kvm_arch_vcpu_memslots_id(vcpu); | ||
493 | |||
494 | return __kvm_memslots(vcpu->kvm, as_id); | ||
495 | } | ||
496 | |||
472 | static inline struct kvm_memory_slot * | 497 | static inline struct kvm_memory_slot * |
473 | id_to_memslot(struct kvm_memslots *slots, int id) | 498 | id_to_memslot(struct kvm_memslots *slots, int id) |
474 | { | 499 | { |
@@ -500,21 +525,22 @@ enum kvm_mr_change { | |||
500 | }; | 525 | }; |
501 | 526 | ||
502 | int kvm_set_memory_region(struct kvm *kvm, | 527 | int kvm_set_memory_region(struct kvm *kvm, |
503 | struct kvm_userspace_memory_region *mem); | 528 | const struct kvm_userspace_memory_region *mem); |
504 | int __kvm_set_memory_region(struct kvm *kvm, | 529 | int __kvm_set_memory_region(struct kvm *kvm, |
505 | struct kvm_userspace_memory_region *mem); | 530 | const struct kvm_userspace_memory_region *mem); |
506 | void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, | 531 | void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, |
507 | struct kvm_memory_slot *dont); | 532 | struct kvm_memory_slot *dont); |
508 | int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, | 533 | int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, |
509 | unsigned long npages); | 534 | unsigned long npages); |
510 | void kvm_arch_memslots_updated(struct kvm *kvm); | 535 | void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots); |
511 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 536 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
512 | struct kvm_memory_slot *memslot, | 537 | struct kvm_memory_slot *memslot, |
513 | struct kvm_userspace_memory_region *mem, | 538 | const struct kvm_userspace_memory_region *mem, |
514 | enum kvm_mr_change change); | 539 | enum kvm_mr_change change); |
515 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 540 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
516 | struct kvm_userspace_memory_region *mem, | 541 | const struct kvm_userspace_memory_region *mem, |
517 | const struct kvm_memory_slot *old, | 542 | const struct kvm_memory_slot *old, |
543 | const struct kvm_memory_slot *new, | ||
518 | enum kvm_mr_change change); | 544 | enum kvm_mr_change change); |
519 | bool kvm_largepages_enabled(void); | 545 | bool kvm_largepages_enabled(void); |
520 | void kvm_disable_largepages(void); | 546 | void kvm_disable_largepages(void); |
@@ -524,8 +550,8 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm); | |||
524 | void kvm_arch_flush_shadow_memslot(struct kvm *kvm, | 550 | void kvm_arch_flush_shadow_memslot(struct kvm *kvm, |
525 | struct kvm_memory_slot *slot); | 551 | struct kvm_memory_slot *slot); |
526 | 552 | ||
527 | int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, | 553 | int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn, |
528 | int nr_pages); | 554 | struct page **pages, int nr_pages); |
529 | 555 | ||
530 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); | 556 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); |
531 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); | 557 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); |
@@ -538,13 +564,13 @@ void kvm_release_page_dirty(struct page *page); | |||
538 | void kvm_set_page_accessed(struct page *page); | 564 | void kvm_set_page_accessed(struct page *page); |
539 | 565 | ||
540 | pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn); | 566 | pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn); |
541 | pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async, | ||
542 | bool write_fault, bool *writable); | ||
543 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); | 567 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); |
544 | pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, | 568 | pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, |
545 | bool *writable); | 569 | bool *writable); |
546 | pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn); | 570 | pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn); |
547 | pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn); | 571 | pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn); |
572 | pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic, | ||
573 | bool *async, bool write_fault, bool *writable); | ||
548 | 574 | ||
549 | void kvm_release_pfn_clean(pfn_t pfn); | 575 | void kvm_release_pfn_clean(pfn_t pfn); |
550 | void kvm_set_pfn_dirty(pfn_t pfn); | 576 | void kvm_set_pfn_dirty(pfn_t pfn); |
@@ -573,6 +599,25 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); | |||
573 | unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn); | 599 | unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn); |
574 | void mark_page_dirty(struct kvm *kvm, gfn_t gfn); | 600 | void mark_page_dirty(struct kvm *kvm, gfn_t gfn); |
575 | 601 | ||
602 | struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); | ||
603 | struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn); | ||
604 | pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn); | ||
605 | pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); | ||
606 | struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn); | ||
607 | unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn); | ||
608 | unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable); | ||
609 | int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset, | ||
610 | int len); | ||
611 | int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, | ||
612 | unsigned long len); | ||
613 | int kvm_vcpu_read_guest(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, | ||
614 | unsigned long len); | ||
615 | int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, const void *data, | ||
616 | int offset, int len); | ||
617 | int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, | ||
618 | unsigned long len); | ||
619 | void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); | ||
620 | |||
576 | void kvm_vcpu_block(struct kvm_vcpu *vcpu); | 621 | void kvm_vcpu_block(struct kvm_vcpu *vcpu); |
577 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu); | 622 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu); |
578 | int kvm_vcpu_yield_to(struct kvm_vcpu *target); | 623 | int kvm_vcpu_yield_to(struct kvm_vcpu *target); |
@@ -762,16 +807,10 @@ static inline void kvm_iommu_unmap_pages(struct kvm *kvm, | |||
762 | } | 807 | } |
763 | #endif | 808 | #endif |
764 | 809 | ||
765 | static inline void kvm_guest_enter(void) | 810 | /* must be called with irqs disabled */ |
811 | static inline void __kvm_guest_enter(void) | ||
766 | { | 812 | { |
767 | unsigned long flags; | ||
768 | |||
769 | BUG_ON(preemptible()); | ||
770 | |||
771 | local_irq_save(flags); | ||
772 | guest_enter(); | 813 | guest_enter(); |
773 | local_irq_restore(flags); | ||
774 | |||
775 | /* KVM does not hold any references to rcu protected data when it | 814 | /* KVM does not hold any references to rcu protected data when it |
776 | * switches CPU into a guest mode. In fact switching to a guest mode | 815 | * switches CPU into a guest mode. In fact switching to a guest mode |
777 | * is very similar to exiting to userspace from rcu point of view. In | 816 | * is very similar to exiting to userspace from rcu point of view. In |
@@ -783,12 +822,27 @@ static inline void kvm_guest_enter(void) | |||
783 | rcu_virt_note_context_switch(smp_processor_id()); | 822 | rcu_virt_note_context_switch(smp_processor_id()); |
784 | } | 823 | } |
785 | 824 | ||
825 | /* must be called with irqs disabled */ | ||
826 | static inline void __kvm_guest_exit(void) | ||
827 | { | ||
828 | guest_exit(); | ||
829 | } | ||
830 | |||
831 | static inline void kvm_guest_enter(void) | ||
832 | { | ||
833 | unsigned long flags; | ||
834 | |||
835 | local_irq_save(flags); | ||
836 | __kvm_guest_enter(); | ||
837 | local_irq_restore(flags); | ||
838 | } | ||
839 | |||
786 | static inline void kvm_guest_exit(void) | 840 | static inline void kvm_guest_exit(void) |
787 | { | 841 | { |
788 | unsigned long flags; | 842 | unsigned long flags; |
789 | 843 | ||
790 | local_irq_save(flags); | 844 | local_irq_save(flags); |
791 | guest_exit(); | 845 | __kvm_guest_exit(); |
792 | local_irq_restore(flags); | 846 | local_irq_restore(flags); |
793 | } | 847 | } |
794 | 848 | ||
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 931da7e917cf..1b47a185c2f0 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h | |||
@@ -28,6 +28,7 @@ struct kvm_run; | |||
28 | struct kvm_userspace_memory_region; | 28 | struct kvm_userspace_memory_region; |
29 | struct kvm_vcpu; | 29 | struct kvm_vcpu; |
30 | struct kvm_vcpu_init; | 30 | struct kvm_vcpu_init; |
31 | struct kvm_memslots; | ||
31 | 32 | ||
32 | enum kvm_mr_change; | 33 | enum kvm_mr_change; |
33 | 34 | ||
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4b60056776d1..716ad4ae4d4b 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h | |||
@@ -202,7 +202,7 @@ struct kvm_run { | |||
202 | __u32 exit_reason; | 202 | __u32 exit_reason; |
203 | __u8 ready_for_interrupt_injection; | 203 | __u8 ready_for_interrupt_injection; |
204 | __u8 if_flag; | 204 | __u8 if_flag; |
205 | __u8 padding2[2]; | 205 | __u16 flags; |
206 | 206 | ||
207 | /* in (pre_kvm_run), out (post_kvm_run) */ | 207 | /* in (pre_kvm_run), out (post_kvm_run) */ |
208 | __u64 cr8; | 208 | __u64 cr8; |
@@ -814,6 +814,9 @@ struct kvm_ppc_smmu_info { | |||
814 | #define KVM_CAP_S390_INJECT_IRQ 113 | 814 | #define KVM_CAP_S390_INJECT_IRQ 113 |
815 | #define KVM_CAP_S390_IRQ_STATE 114 | 815 | #define KVM_CAP_S390_IRQ_STATE 114 |
816 | #define KVM_CAP_PPC_HWRNG 115 | 816 | #define KVM_CAP_PPC_HWRNG 115 |
817 | #define KVM_CAP_DISABLE_QUIRKS 116 | ||
818 | #define KVM_CAP_X86_SMM 117 | ||
819 | #define KVM_CAP_MULTI_ADDRESS_SPACE 118 | ||
817 | 820 | ||
818 | #ifdef KVM_CAP_IRQ_ROUTING | 821 | #ifdef KVM_CAP_IRQ_ROUTING |
819 | 822 | ||
@@ -894,7 +897,7 @@ struct kvm_xen_hvm_config { | |||
894 | * | 897 | * |
895 | * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies | 898 | * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies |
896 | * the irqfd to operate in resampling mode for level triggered interrupt | 899 | * the irqfd to operate in resampling mode for level triggered interrupt |
897 | * emlation. See Documentation/virtual/kvm/api.txt. | 900 | * emulation. See Documentation/virtual/kvm/api.txt. |
898 | */ | 901 | */ |
899 | #define KVM_IRQFD_FLAG_RESAMPLE (1 << 1) | 902 | #define KVM_IRQFD_FLAG_RESAMPLE (1 << 1) |
900 | 903 | ||
@@ -1199,6 +1202,8 @@ struct kvm_s390_ucas_mapping { | |||
1199 | /* Available with KVM_CAP_S390_IRQ_STATE */ | 1202 | /* Available with KVM_CAP_S390_IRQ_STATE */ |
1200 | #define KVM_S390_SET_IRQ_STATE _IOW(KVMIO, 0xb5, struct kvm_s390_irq_state) | 1203 | #define KVM_S390_SET_IRQ_STATE _IOW(KVMIO, 0xb5, struct kvm_s390_irq_state) |
1201 | #define KVM_S390_GET_IRQ_STATE _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state) | 1204 | #define KVM_S390_GET_IRQ_STATE _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state) |
1205 | /* Available with KVM_CAP_X86_SMM */ | ||
1206 | #define KVM_SMI _IO(KVMIO, 0xb7) | ||
1202 | 1207 | ||
1203 | #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) | 1208 | #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) |
1204 | #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) | 1209 | #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) |
diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c index e9c3a7a83833..e661e7fb9d91 100644 --- a/virt/kvm/arm/vgic-v3-emul.c +++ b/virt/kvm/arm/vgic-v3-emul.c | |||
@@ -76,8 +76,6 @@ static bool handle_mmio_ctlr(struct kvm_vcpu *vcpu, | |||
76 | vgic_reg_access(mmio, ®, offset, | 76 | vgic_reg_access(mmio, ®, offset, |
77 | ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); | 77 | ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); |
78 | if (mmio->is_write) { | 78 | if (mmio->is_write) { |
79 | if (reg & GICD_CTLR_ENABLE_SS_G0) | ||
80 | kvm_info("guest tried to enable unsupported Group0 interrupts\n"); | ||
81 | vcpu->kvm->arch.vgic.enabled = !!(reg & GICD_CTLR_ENABLE_SS_G1); | 79 | vcpu->kvm->arch.vgic.enabled = !!(reg & GICD_CTLR_ENABLE_SS_G1); |
82 | vgic_update_state(vcpu->kvm); | 80 | vgic_update_state(vcpu->kvm); |
83 | return true; | 81 | return true; |
@@ -173,6 +171,32 @@ static bool handle_mmio_clear_pending_reg_dist(struct kvm_vcpu *vcpu, | |||
173 | return false; | 171 | return false; |
174 | } | 172 | } |
175 | 173 | ||
174 | static bool handle_mmio_set_active_reg_dist(struct kvm_vcpu *vcpu, | ||
175 | struct kvm_exit_mmio *mmio, | ||
176 | phys_addr_t offset) | ||
177 | { | ||
178 | if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8)) | ||
179 | return vgic_handle_set_active_reg(vcpu->kvm, mmio, offset, | ||
180 | vcpu->vcpu_id); | ||
181 | |||
182 | vgic_reg_access(mmio, NULL, offset, | ||
183 | ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); | ||
184 | return false; | ||
185 | } | ||
186 | |||
187 | static bool handle_mmio_clear_active_reg_dist(struct kvm_vcpu *vcpu, | ||
188 | struct kvm_exit_mmio *mmio, | ||
189 | phys_addr_t offset) | ||
190 | { | ||
191 | if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8)) | ||
192 | return vgic_handle_clear_active_reg(vcpu->kvm, mmio, offset, | ||
193 | vcpu->vcpu_id); | ||
194 | |||
195 | vgic_reg_access(mmio, NULL, offset, | ||
196 | ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); | ||
197 | return false; | ||
198 | } | ||
199 | |||
176 | static bool handle_mmio_priority_reg_dist(struct kvm_vcpu *vcpu, | 200 | static bool handle_mmio_priority_reg_dist(struct kvm_vcpu *vcpu, |
177 | struct kvm_exit_mmio *mmio, | 201 | struct kvm_exit_mmio *mmio, |
178 | phys_addr_t offset) | 202 | phys_addr_t offset) |
@@ -428,13 +452,13 @@ static const struct vgic_io_range vgic_v3_dist_ranges[] = { | |||
428 | .base = GICD_ISACTIVER, | 452 | .base = GICD_ISACTIVER, |
429 | .len = 0x80, | 453 | .len = 0x80, |
430 | .bits_per_irq = 1, | 454 | .bits_per_irq = 1, |
431 | .handle_mmio = handle_mmio_raz_wi, | 455 | .handle_mmio = handle_mmio_set_active_reg_dist, |
432 | }, | 456 | }, |
433 | { | 457 | { |
434 | .base = GICD_ICACTIVER, | 458 | .base = GICD_ICACTIVER, |
435 | .len = 0x80, | 459 | .len = 0x80, |
436 | .bits_per_irq = 1, | 460 | .bits_per_irq = 1, |
437 | .handle_mmio = handle_mmio_raz_wi, | 461 | .handle_mmio = handle_mmio_clear_active_reg_dist, |
438 | }, | 462 | }, |
439 | { | 463 | { |
440 | .base = GICD_IPRIORITYR, | 464 | .base = GICD_IPRIORITYR, |
@@ -561,6 +585,26 @@ static bool handle_mmio_clear_enable_reg_redist(struct kvm_vcpu *vcpu, | |||
561 | ACCESS_WRITE_CLEARBIT); | 585 | ACCESS_WRITE_CLEARBIT); |
562 | } | 586 | } |
563 | 587 | ||
588 | static bool handle_mmio_set_active_reg_redist(struct kvm_vcpu *vcpu, | ||
589 | struct kvm_exit_mmio *mmio, | ||
590 | phys_addr_t offset) | ||
591 | { | ||
592 | struct kvm_vcpu *redist_vcpu = mmio->private; | ||
593 | |||
594 | return vgic_handle_set_active_reg(vcpu->kvm, mmio, offset, | ||
595 | redist_vcpu->vcpu_id); | ||
596 | } | ||
597 | |||
598 | static bool handle_mmio_clear_active_reg_redist(struct kvm_vcpu *vcpu, | ||
599 | struct kvm_exit_mmio *mmio, | ||
600 | phys_addr_t offset) | ||
601 | { | ||
602 | struct kvm_vcpu *redist_vcpu = mmio->private; | ||
603 | |||
604 | return vgic_handle_clear_active_reg(vcpu->kvm, mmio, offset, | ||
605 | redist_vcpu->vcpu_id); | ||
606 | } | ||
607 | |||
564 | static bool handle_mmio_set_pending_reg_redist(struct kvm_vcpu *vcpu, | 608 | static bool handle_mmio_set_pending_reg_redist(struct kvm_vcpu *vcpu, |
565 | struct kvm_exit_mmio *mmio, | 609 | struct kvm_exit_mmio *mmio, |
566 | phys_addr_t offset) | 610 | phys_addr_t offset) |
@@ -674,13 +718,13 @@ static const struct vgic_io_range vgic_redist_ranges[] = { | |||
674 | .base = SGI_base(GICR_ISACTIVER0), | 718 | .base = SGI_base(GICR_ISACTIVER0), |
675 | .len = 0x04, | 719 | .len = 0x04, |
676 | .bits_per_irq = 1, | 720 | .bits_per_irq = 1, |
677 | .handle_mmio = handle_mmio_raz_wi, | 721 | .handle_mmio = handle_mmio_set_active_reg_redist, |
678 | }, | 722 | }, |
679 | { | 723 | { |
680 | .base = SGI_base(GICR_ICACTIVER0), | 724 | .base = SGI_base(GICR_ICACTIVER0), |
681 | .len = 0x04, | 725 | .len = 0x04, |
682 | .bits_per_irq = 1, | 726 | .bits_per_irq = 1, |
683 | .handle_mmio = handle_mmio_raz_wi, | 727 | .handle_mmio = handle_mmio_clear_active_reg_redist, |
684 | }, | 728 | }, |
685 | { | 729 | { |
686 | .base = SGI_base(GICR_IPRIORITYR0), | 730 | .base = SGI_base(GICR_IPRIORITYR0), |
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 78fb8201014f..f94d887d20e6 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c | |||
@@ -26,8 +26,6 @@ | |||
26 | #include <linux/of_irq.h> | 26 | #include <linux/of_irq.h> |
27 | #include <linux/uaccess.h> | 27 | #include <linux/uaccess.h> |
28 | 28 | ||
29 | #include <linux/irqchip/arm-gic.h> | ||
30 | |||
31 | #include <asm/kvm_emulate.h> | 29 | #include <asm/kvm_emulate.h> |
32 | #include <asm/kvm_arm.h> | 30 | #include <asm/kvm_arm.h> |
33 | #include <asm/kvm_mmu.h> | 31 | #include <asm/kvm_mmu.h> |
@@ -1561,7 +1559,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, | |||
1561 | goto out; | 1559 | goto out; |
1562 | } | 1560 | } |
1563 | 1561 | ||
1564 | if (irq_num >= kvm->arch.vgic.nr_irqs) | 1562 | if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020)) |
1565 | return -EINVAL; | 1563 | return -EINVAL; |
1566 | 1564 | ||
1567 | vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level); | 1565 | vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level); |
@@ -2161,10 +2159,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, | |||
2161 | 2159 | ||
2162 | BUG_ON(!vgic_initialized(kvm)); | 2160 | BUG_ON(!vgic_initialized(kvm)); |
2163 | 2161 | ||
2164 | if (spi > kvm->arch.vgic.nr_irqs) | ||
2165 | return -EINVAL; | ||
2166 | return kvm_vgic_inject_irq(kvm, 0, spi, level); | 2162 | return kvm_vgic_inject_irq(kvm, 0, spi, level); |
2167 | |||
2168 | } | 2163 | } |
2169 | 2164 | ||
2170 | /* MSI not implemented yet */ | 2165 | /* MSI not implemented yet */ |
diff --git a/virt/kvm/async_pf.h b/virt/kvm/async_pf.h index e7ef6447cb82..ec4cfa278f04 100644 --- a/virt/kvm/async_pf.h +++ b/virt/kvm/async_pf.h | |||
@@ -29,8 +29,8 @@ void kvm_async_pf_deinit(void); | |||
29 | void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu); | 29 | void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu); |
30 | #else | 30 | #else |
31 | #define kvm_async_pf_init() (0) | 31 | #define kvm_async_pf_init() (0) |
32 | #define kvm_async_pf_deinit() do{}while(0) | 32 | #define kvm_async_pf_deinit() do {} while (0) |
33 | #define kvm_async_pf_vcpu_init(C) do{}while(0) | 33 | #define kvm_async_pf_vcpu_init(C) do {} while (0) |
34 | #endif | 34 | #endif |
35 | 35 | ||
36 | #endif | 36 | #endif |
diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h index b280c20444d1..5cbf190d238c 100644 --- a/virt/kvm/coalesced_mmio.h +++ b/virt/kvm/coalesced_mmio.h | |||
@@ -24,9 +24,9 @@ struct kvm_coalesced_mmio_dev { | |||
24 | int kvm_coalesced_mmio_init(struct kvm *kvm); | 24 | int kvm_coalesced_mmio_init(struct kvm *kvm); |
25 | void kvm_coalesced_mmio_free(struct kvm *kvm); | 25 | void kvm_coalesced_mmio_free(struct kvm *kvm); |
26 | int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, | 26 | int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, |
27 | struct kvm_coalesced_mmio_zone *zone); | 27 | struct kvm_coalesced_mmio_zone *zone); |
28 | int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, | 28 | int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, |
29 | struct kvm_coalesced_mmio_zone *zone); | 29 | struct kvm_coalesced_mmio_zone *zone); |
30 | 30 | ||
31 | #else | 31 | #else |
32 | 32 | ||
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 1d56a901e791..21c14244f4c4 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c | |||
@@ -33,7 +33,6 @@ | |||
33 | 33 | ||
34 | struct kvm_irq_routing_table { | 34 | struct kvm_irq_routing_table { |
35 | int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; | 35 | int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; |
36 | struct kvm_kernel_irq_routing_entry *rt_entries; | ||
37 | u32 nr_rt_entries; | 36 | u32 nr_rt_entries; |
38 | /* | 37 | /* |
39 | * Array indexed by gsi. Each entry contains list of irq chips | 38 | * Array indexed by gsi. Each entry contains list of irq chips |
@@ -118,11 +117,32 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, | |||
118 | return ret; | 117 | return ret; |
119 | } | 118 | } |
120 | 119 | ||
120 | static void free_irq_routing_table(struct kvm_irq_routing_table *rt) | ||
121 | { | ||
122 | int i; | ||
123 | |||
124 | if (!rt) | ||
125 | return; | ||
126 | |||
127 | for (i = 0; i < rt->nr_rt_entries; ++i) { | ||
128 | struct kvm_kernel_irq_routing_entry *e; | ||
129 | struct hlist_node *n; | ||
130 | |||
131 | hlist_for_each_entry_safe(e, n, &rt->map[i], link) { | ||
132 | hlist_del(&e->link); | ||
133 | kfree(e); | ||
134 | } | ||
135 | } | ||
136 | |||
137 | kfree(rt); | ||
138 | } | ||
139 | |||
121 | void kvm_free_irq_routing(struct kvm *kvm) | 140 | void kvm_free_irq_routing(struct kvm *kvm) |
122 | { | 141 | { |
123 | /* Called only during vm destruction. Nobody can use the pointer | 142 | /* Called only during vm destruction. Nobody can use the pointer |
124 | at this stage */ | 143 | at this stage */ |
125 | kfree(kvm->irq_routing); | 144 | struct kvm_irq_routing_table *rt = rcu_access_pointer(kvm->irq_routing); |
145 | free_irq_routing_table(rt); | ||
126 | } | 146 | } |
127 | 147 | ||
128 | static int setup_routing_entry(struct kvm_irq_routing_table *rt, | 148 | static int setup_routing_entry(struct kvm_irq_routing_table *rt, |
@@ -173,25 +193,29 @@ int kvm_set_irq_routing(struct kvm *kvm, | |||
173 | 193 | ||
174 | nr_rt_entries += 1; | 194 | nr_rt_entries += 1; |
175 | 195 | ||
176 | new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)) | 196 | new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)), |
177 | + (nr * sizeof(struct kvm_kernel_irq_routing_entry)), | ||
178 | GFP_KERNEL); | 197 | GFP_KERNEL); |
179 | 198 | ||
180 | if (!new) | 199 | if (!new) |
181 | return -ENOMEM; | 200 | return -ENOMEM; |
182 | 201 | ||
183 | new->rt_entries = (void *)&new->map[nr_rt_entries]; | ||
184 | |||
185 | new->nr_rt_entries = nr_rt_entries; | 202 | new->nr_rt_entries = nr_rt_entries; |
186 | for (i = 0; i < KVM_NR_IRQCHIPS; i++) | 203 | for (i = 0; i < KVM_NR_IRQCHIPS; i++) |
187 | for (j = 0; j < KVM_IRQCHIP_NUM_PINS; j++) | 204 | for (j = 0; j < KVM_IRQCHIP_NUM_PINS; j++) |
188 | new->chip[i][j] = -1; | 205 | new->chip[i][j] = -1; |
189 | 206 | ||
190 | for (i = 0; i < nr; ++i) { | 207 | for (i = 0; i < nr; ++i) { |
208 | struct kvm_kernel_irq_routing_entry *e; | ||
209 | |||
210 | r = -ENOMEM; | ||
211 | e = kzalloc(sizeof(*e), GFP_KERNEL); | ||
212 | if (!e) | ||
213 | goto out; | ||
214 | |||
191 | r = -EINVAL; | 215 | r = -EINVAL; |
192 | if (ue->flags) | 216 | if (ue->flags) |
193 | goto out; | 217 | goto out; |
194 | r = setup_routing_entry(new, &new->rt_entries[i], ue); | 218 | r = setup_routing_entry(new, e, ue); |
195 | if (r) | 219 | if (r) |
196 | goto out; | 220 | goto out; |
197 | ++ue; | 221 | ++ue; |
@@ -209,6 +233,7 @@ int kvm_set_irq_routing(struct kvm *kvm, | |||
209 | r = 0; | 233 | r = 0; |
210 | 234 | ||
211 | out: | 235 | out: |
212 | kfree(new); | 236 | free_irq_routing_table(new); |
237 | |||
213 | return r; | 238 | return r; |
214 | } | 239 | } |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 90977418aeb6..848af90b8091 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -103,8 +103,7 @@ static void hardware_disable_all(void); | |||
103 | static void kvm_io_bus_destroy(struct kvm_io_bus *bus); | 103 | static void kvm_io_bus_destroy(struct kvm_io_bus *bus); |
104 | 104 | ||
105 | static void kvm_release_pfn_dirty(pfn_t pfn); | 105 | static void kvm_release_pfn_dirty(pfn_t pfn); |
106 | static void mark_page_dirty_in_slot(struct kvm *kvm, | 106 | static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn); |
107 | struct kvm_memory_slot *memslot, gfn_t gfn); | ||
108 | 107 | ||
109 | __visible bool kvm_rebooting; | 108 | __visible bool kvm_rebooting; |
110 | EXPORT_SYMBOL_GPL(kvm_rebooting); | 109 | EXPORT_SYMBOL_GPL(kvm_rebooting); |
@@ -440,13 +439,60 @@ static int kvm_init_mmu_notifier(struct kvm *kvm) | |||
440 | 439 | ||
441 | #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ | 440 | #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ |
442 | 441 | ||
443 | static void kvm_init_memslots_id(struct kvm *kvm) | 442 | static struct kvm_memslots *kvm_alloc_memslots(void) |
444 | { | 443 | { |
445 | int i; | 444 | int i; |
446 | struct kvm_memslots *slots = kvm->memslots; | 445 | struct kvm_memslots *slots; |
447 | 446 | ||
447 | slots = kvm_kvzalloc(sizeof(struct kvm_memslots)); | ||
448 | if (!slots) | ||
449 | return NULL; | ||
450 | |||
451 | /* | ||
452 | * Init kvm generation close to the maximum to easily test the | ||
453 | * code of handling generation number wrap-around. | ||
454 | */ | ||
455 | slots->generation = -150; | ||
448 | for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) | 456 | for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) |
449 | slots->id_to_index[i] = slots->memslots[i].id = i; | 457 | slots->id_to_index[i] = slots->memslots[i].id = i; |
458 | |||
459 | return slots; | ||
460 | } | ||
461 | |||
462 | static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) | ||
463 | { | ||
464 | if (!memslot->dirty_bitmap) | ||
465 | return; | ||
466 | |||
467 | kvfree(memslot->dirty_bitmap); | ||
468 | memslot->dirty_bitmap = NULL; | ||
469 | } | ||
470 | |||
471 | /* | ||
472 | * Free any memory in @free but not in @dont. | ||
473 | */ | ||
474 | static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, | ||
475 | struct kvm_memory_slot *dont) | ||
476 | { | ||
477 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) | ||
478 | kvm_destroy_dirty_bitmap(free); | ||
479 | |||
480 | kvm_arch_free_memslot(kvm, free, dont); | ||
481 | |||
482 | free->npages = 0; | ||
483 | } | ||
484 | |||
485 | static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots) | ||
486 | { | ||
487 | struct kvm_memory_slot *memslot; | ||
488 | |||
489 | if (!slots) | ||
490 | return; | ||
491 | |||
492 | kvm_for_each_memslot(memslot, slots) | ||
493 | kvm_free_memslot(kvm, memslot, NULL); | ||
494 | |||
495 | kvfree(slots); | ||
450 | } | 496 | } |
451 | 497 | ||
452 | static struct kvm *kvm_create_vm(unsigned long type) | 498 | static struct kvm *kvm_create_vm(unsigned long type) |
@@ -472,17 +518,12 @@ static struct kvm *kvm_create_vm(unsigned long type) | |||
472 | BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); | 518 | BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); |
473 | 519 | ||
474 | r = -ENOMEM; | 520 | r = -ENOMEM; |
475 | kvm->memslots = kvm_kvzalloc(sizeof(struct kvm_memslots)); | 521 | for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { |
476 | if (!kvm->memslots) | 522 | kvm->memslots[i] = kvm_alloc_memslots(); |
477 | goto out_err_no_srcu; | 523 | if (!kvm->memslots[i]) |
478 | 524 | goto out_err_no_srcu; | |
479 | /* | 525 | } |
480 | * Init kvm generation close to the maximum to easily test the | ||
481 | * code of handling generation number wrap-around. | ||
482 | */ | ||
483 | kvm->memslots->generation = -150; | ||
484 | 526 | ||
485 | kvm_init_memslots_id(kvm); | ||
486 | if (init_srcu_struct(&kvm->srcu)) | 527 | if (init_srcu_struct(&kvm->srcu)) |
487 | goto out_err_no_srcu; | 528 | goto out_err_no_srcu; |
488 | if (init_srcu_struct(&kvm->irq_srcu)) | 529 | if (init_srcu_struct(&kvm->irq_srcu)) |
@@ -523,7 +564,8 @@ out_err_no_srcu: | |||
523 | out_err_no_disable: | 564 | out_err_no_disable: |
524 | for (i = 0; i < KVM_NR_BUSES; i++) | 565 | for (i = 0; i < KVM_NR_BUSES; i++) |
525 | kfree(kvm->buses[i]); | 566 | kfree(kvm->buses[i]); |
526 | kvfree(kvm->memslots); | 567 | for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) |
568 | kvm_free_memslots(kvm, kvm->memslots[i]); | ||
527 | kvm_arch_free_vm(kvm); | 569 | kvm_arch_free_vm(kvm); |
528 | return ERR_PTR(r); | 570 | return ERR_PTR(r); |
529 | } | 571 | } |
@@ -540,40 +582,6 @@ void *kvm_kvzalloc(unsigned long size) | |||
540 | return kzalloc(size, GFP_KERNEL); | 582 | return kzalloc(size, GFP_KERNEL); |
541 | } | 583 | } |
542 | 584 | ||
543 | static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) | ||
544 | { | ||
545 | if (!memslot->dirty_bitmap) | ||
546 | return; | ||
547 | |||
548 | kvfree(memslot->dirty_bitmap); | ||
549 | memslot->dirty_bitmap = NULL; | ||
550 | } | ||
551 | |||
552 | /* | ||
553 | * Free any memory in @free but not in @dont. | ||
554 | */ | ||
555 | static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free, | ||
556 | struct kvm_memory_slot *dont) | ||
557 | { | ||
558 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) | ||
559 | kvm_destroy_dirty_bitmap(free); | ||
560 | |||
561 | kvm_arch_free_memslot(kvm, free, dont); | ||
562 | |||
563 | free->npages = 0; | ||
564 | } | ||
565 | |||
566 | static void kvm_free_physmem(struct kvm *kvm) | ||
567 | { | ||
568 | struct kvm_memslots *slots = kvm->memslots; | ||
569 | struct kvm_memory_slot *memslot; | ||
570 | |||
571 | kvm_for_each_memslot(memslot, slots) | ||
572 | kvm_free_physmem_slot(kvm, memslot, NULL); | ||
573 | |||
574 | kvfree(kvm->memslots); | ||
575 | } | ||
576 | |||
577 | static void kvm_destroy_devices(struct kvm *kvm) | 585 | static void kvm_destroy_devices(struct kvm *kvm) |
578 | { | 586 | { |
579 | struct list_head *node, *tmp; | 587 | struct list_head *node, *tmp; |
@@ -607,7 +615,8 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
607 | #endif | 615 | #endif |
608 | kvm_arch_destroy_vm(kvm); | 616 | kvm_arch_destroy_vm(kvm); |
609 | kvm_destroy_devices(kvm); | 617 | kvm_destroy_devices(kvm); |
610 | kvm_free_physmem(kvm); | 618 | for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) |
619 | kvm_free_memslots(kvm, kvm->memslots[i]); | ||
611 | cleanup_srcu_struct(&kvm->irq_srcu); | 620 | cleanup_srcu_struct(&kvm->irq_srcu); |
612 | cleanup_srcu_struct(&kvm->srcu); | 621 | cleanup_srcu_struct(&kvm->srcu); |
613 | kvm_arch_free_vm(kvm); | 622 | kvm_arch_free_vm(kvm); |
@@ -670,8 +679,6 @@ static void update_memslots(struct kvm_memslots *slots, | |||
670 | WARN_ON(mslots[i].id != id); | 679 | WARN_ON(mslots[i].id != id); |
671 | if (!new->npages) { | 680 | if (!new->npages) { |
672 | WARN_ON(!mslots[i].npages); | 681 | WARN_ON(!mslots[i].npages); |
673 | new->base_gfn = 0; | ||
674 | new->flags = 0; | ||
675 | if (mslots[i].npages) | 682 | if (mslots[i].npages) |
676 | slots->used_slots--; | 683 | slots->used_slots--; |
677 | } else { | 684 | } else { |
@@ -711,7 +718,7 @@ static void update_memslots(struct kvm_memslots *slots, | |||
711 | slots->id_to_index[mslots[i].id] = i; | 718 | slots->id_to_index[mslots[i].id] = i; |
712 | } | 719 | } |
713 | 720 | ||
714 | static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) | 721 | static int check_memory_region_flags(const struct kvm_userspace_memory_region *mem) |
715 | { | 722 | { |
716 | u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; | 723 | u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; |
717 | 724 | ||
@@ -726,9 +733,9 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) | |||
726 | } | 733 | } |
727 | 734 | ||
728 | static struct kvm_memslots *install_new_memslots(struct kvm *kvm, | 735 | static struct kvm_memslots *install_new_memslots(struct kvm *kvm, |
729 | struct kvm_memslots *slots) | 736 | int as_id, struct kvm_memslots *slots) |
730 | { | 737 | { |
731 | struct kvm_memslots *old_memslots = kvm->memslots; | 738 | struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id); |
732 | 739 | ||
733 | /* | 740 | /* |
734 | * Set the low bit in the generation, which disables SPTE caching | 741 | * Set the low bit in the generation, which disables SPTE caching |
@@ -737,7 +744,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, | |||
737 | WARN_ON(old_memslots->generation & 1); | 744 | WARN_ON(old_memslots->generation & 1); |
738 | slots->generation = old_memslots->generation + 1; | 745 | slots->generation = old_memslots->generation + 1; |
739 | 746 | ||
740 | rcu_assign_pointer(kvm->memslots, slots); | 747 | rcu_assign_pointer(kvm->memslots[as_id], slots); |
741 | synchronize_srcu_expedited(&kvm->srcu); | 748 | synchronize_srcu_expedited(&kvm->srcu); |
742 | 749 | ||
743 | /* | 750 | /* |
@@ -747,7 +754,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, | |||
747 | */ | 754 | */ |
748 | slots->generation++; | 755 | slots->generation++; |
749 | 756 | ||
750 | kvm_arch_memslots_updated(kvm); | 757 | kvm_arch_memslots_updated(kvm, slots); |
751 | 758 | ||
752 | return old_memslots; | 759 | return old_memslots; |
753 | } | 760 | } |
@@ -761,7 +768,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, | |||
761 | * Must be called holding kvm->slots_lock for write. | 768 | * Must be called holding kvm->slots_lock for write. |
762 | */ | 769 | */ |
763 | int __kvm_set_memory_region(struct kvm *kvm, | 770 | int __kvm_set_memory_region(struct kvm *kvm, |
764 | struct kvm_userspace_memory_region *mem) | 771 | const struct kvm_userspace_memory_region *mem) |
765 | { | 772 | { |
766 | int r; | 773 | int r; |
767 | gfn_t base_gfn; | 774 | gfn_t base_gfn; |
@@ -769,6 +776,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
769 | struct kvm_memory_slot *slot; | 776 | struct kvm_memory_slot *slot; |
770 | struct kvm_memory_slot old, new; | 777 | struct kvm_memory_slot old, new; |
771 | struct kvm_memslots *slots = NULL, *old_memslots; | 778 | struct kvm_memslots *slots = NULL, *old_memslots; |
779 | int as_id, id; | ||
772 | enum kvm_mr_change change; | 780 | enum kvm_mr_change change; |
773 | 781 | ||
774 | r = check_memory_region_flags(mem); | 782 | r = check_memory_region_flags(mem); |
@@ -776,36 +784,36 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
776 | goto out; | 784 | goto out; |
777 | 785 | ||
778 | r = -EINVAL; | 786 | r = -EINVAL; |
787 | as_id = mem->slot >> 16; | ||
788 | id = (u16)mem->slot; | ||
789 | |||
779 | /* General sanity checks */ | 790 | /* General sanity checks */ |
780 | if (mem->memory_size & (PAGE_SIZE - 1)) | 791 | if (mem->memory_size & (PAGE_SIZE - 1)) |
781 | goto out; | 792 | goto out; |
782 | if (mem->guest_phys_addr & (PAGE_SIZE - 1)) | 793 | if (mem->guest_phys_addr & (PAGE_SIZE - 1)) |
783 | goto out; | 794 | goto out; |
784 | /* We can read the guest memory with __xxx_user() later on. */ | 795 | /* We can read the guest memory with __xxx_user() later on. */ |
785 | if ((mem->slot < KVM_USER_MEM_SLOTS) && | 796 | if ((id < KVM_USER_MEM_SLOTS) && |
786 | ((mem->userspace_addr & (PAGE_SIZE - 1)) || | 797 | ((mem->userspace_addr & (PAGE_SIZE - 1)) || |
787 | !access_ok(VERIFY_WRITE, | 798 | !access_ok(VERIFY_WRITE, |
788 | (void __user *)(unsigned long)mem->userspace_addr, | 799 | (void __user *)(unsigned long)mem->userspace_addr, |
789 | mem->memory_size))) | 800 | mem->memory_size))) |
790 | goto out; | 801 | goto out; |
791 | if (mem->slot >= KVM_MEM_SLOTS_NUM) | 802 | if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_MEM_SLOTS_NUM) |
792 | goto out; | 803 | goto out; |
793 | if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) | 804 | if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) |
794 | goto out; | 805 | goto out; |
795 | 806 | ||
796 | slot = id_to_memslot(kvm->memslots, mem->slot); | 807 | slot = id_to_memslot(__kvm_memslots(kvm, as_id), id); |
797 | base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; | 808 | base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; |
798 | npages = mem->memory_size >> PAGE_SHIFT; | 809 | npages = mem->memory_size >> PAGE_SHIFT; |
799 | 810 | ||
800 | if (npages > KVM_MEM_MAX_NR_PAGES) | 811 | if (npages > KVM_MEM_MAX_NR_PAGES) |
801 | goto out; | 812 | goto out; |
802 | 813 | ||
803 | if (!npages) | ||
804 | mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; | ||
805 | |||
806 | new = old = *slot; | 814 | new = old = *slot; |
807 | 815 | ||
808 | new.id = mem->slot; | 816 | new.id = id; |
809 | new.base_gfn = base_gfn; | 817 | new.base_gfn = base_gfn; |
810 | new.npages = npages; | 818 | new.npages = npages; |
811 | new.flags = mem->flags; | 819 | new.flags = mem->flags; |
@@ -828,17 +836,21 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
828 | goto out; | 836 | goto out; |
829 | } | 837 | } |
830 | } | 838 | } |
831 | } else if (old.npages) { | 839 | } else { |
840 | if (!old.npages) | ||
841 | goto out; | ||
842 | |||
832 | change = KVM_MR_DELETE; | 843 | change = KVM_MR_DELETE; |
833 | } else /* Modify a non-existent slot: disallowed. */ | 844 | new.base_gfn = 0; |
834 | goto out; | 845 | new.flags = 0; |
846 | } | ||
835 | 847 | ||
836 | if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { | 848 | if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { |
837 | /* Check for overlaps */ | 849 | /* Check for overlaps */ |
838 | r = -EEXIST; | 850 | r = -EEXIST; |
839 | kvm_for_each_memslot(slot, kvm->memslots) { | 851 | kvm_for_each_memslot(slot, __kvm_memslots(kvm, as_id)) { |
840 | if ((slot->id >= KVM_USER_MEM_SLOTS) || | 852 | if ((slot->id >= KVM_USER_MEM_SLOTS) || |
841 | (slot->id == mem->slot)) | 853 | (slot->id == id)) |
842 | continue; | 854 | continue; |
843 | if (!((base_gfn + npages <= slot->base_gfn) || | 855 | if (!((base_gfn + npages <= slot->base_gfn) || |
844 | (base_gfn >= slot->base_gfn + slot->npages))) | 856 | (base_gfn >= slot->base_gfn + slot->npages))) |
@@ -867,13 +879,13 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
867 | slots = kvm_kvzalloc(sizeof(struct kvm_memslots)); | 879 | slots = kvm_kvzalloc(sizeof(struct kvm_memslots)); |
868 | if (!slots) | 880 | if (!slots) |
869 | goto out_free; | 881 | goto out_free; |
870 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | 882 | memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots)); |
871 | 883 | ||
872 | if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { | 884 | if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { |
873 | slot = id_to_memslot(slots, mem->slot); | 885 | slot = id_to_memslot(slots, id); |
874 | slot->flags |= KVM_MEMSLOT_INVALID; | 886 | slot->flags |= KVM_MEMSLOT_INVALID; |
875 | 887 | ||
876 | old_memslots = install_new_memslots(kvm, slots); | 888 | old_memslots = install_new_memslots(kvm, as_id, slots); |
877 | 889 | ||
878 | /* slot was deleted or moved, clear iommu mapping */ | 890 | /* slot was deleted or moved, clear iommu mapping */ |
879 | kvm_iommu_unmap_pages(kvm, &old); | 891 | kvm_iommu_unmap_pages(kvm, &old); |
@@ -898,18 +910,18 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
898 | if (r) | 910 | if (r) |
899 | goto out_slots; | 911 | goto out_slots; |
900 | 912 | ||
901 | /* actual memory is freed via old in kvm_free_physmem_slot below */ | 913 | /* actual memory is freed via old in kvm_free_memslot below */ |
902 | if (change == KVM_MR_DELETE) { | 914 | if (change == KVM_MR_DELETE) { |
903 | new.dirty_bitmap = NULL; | 915 | new.dirty_bitmap = NULL; |
904 | memset(&new.arch, 0, sizeof(new.arch)); | 916 | memset(&new.arch, 0, sizeof(new.arch)); |
905 | } | 917 | } |
906 | 918 | ||
907 | update_memslots(slots, &new); | 919 | update_memslots(slots, &new); |
908 | old_memslots = install_new_memslots(kvm, slots); | 920 | old_memslots = install_new_memslots(kvm, as_id, slots); |
909 | 921 | ||
910 | kvm_arch_commit_memory_region(kvm, mem, &old, change); | 922 | kvm_arch_commit_memory_region(kvm, mem, &old, &new, change); |
911 | 923 | ||
912 | kvm_free_physmem_slot(kvm, &old, &new); | 924 | kvm_free_memslot(kvm, &old, &new); |
913 | kvfree(old_memslots); | 925 | kvfree(old_memslots); |
914 | 926 | ||
915 | /* | 927 | /* |
@@ -931,14 +943,14 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
931 | out_slots: | 943 | out_slots: |
932 | kvfree(slots); | 944 | kvfree(slots); |
933 | out_free: | 945 | out_free: |
934 | kvm_free_physmem_slot(kvm, &new, &old); | 946 | kvm_free_memslot(kvm, &new, &old); |
935 | out: | 947 | out: |
936 | return r; | 948 | return r; |
937 | } | 949 | } |
938 | EXPORT_SYMBOL_GPL(__kvm_set_memory_region); | 950 | EXPORT_SYMBOL_GPL(__kvm_set_memory_region); |
939 | 951 | ||
940 | int kvm_set_memory_region(struct kvm *kvm, | 952 | int kvm_set_memory_region(struct kvm *kvm, |
941 | struct kvm_userspace_memory_region *mem) | 953 | const struct kvm_userspace_memory_region *mem) |
942 | { | 954 | { |
943 | int r; | 955 | int r; |
944 | 956 | ||
@@ -952,24 +964,29 @@ EXPORT_SYMBOL_GPL(kvm_set_memory_region); | |||
952 | static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, | 964 | static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, |
953 | struct kvm_userspace_memory_region *mem) | 965 | struct kvm_userspace_memory_region *mem) |
954 | { | 966 | { |
955 | if (mem->slot >= KVM_USER_MEM_SLOTS) | 967 | if ((u16)mem->slot >= KVM_USER_MEM_SLOTS) |
956 | return -EINVAL; | 968 | return -EINVAL; |
969 | |||
957 | return kvm_set_memory_region(kvm, mem); | 970 | return kvm_set_memory_region(kvm, mem); |
958 | } | 971 | } |
959 | 972 | ||
960 | int kvm_get_dirty_log(struct kvm *kvm, | 973 | int kvm_get_dirty_log(struct kvm *kvm, |
961 | struct kvm_dirty_log *log, int *is_dirty) | 974 | struct kvm_dirty_log *log, int *is_dirty) |
962 | { | 975 | { |
976 | struct kvm_memslots *slots; | ||
963 | struct kvm_memory_slot *memslot; | 977 | struct kvm_memory_slot *memslot; |
964 | int r, i; | 978 | int r, i, as_id, id; |
965 | unsigned long n; | 979 | unsigned long n; |
966 | unsigned long any = 0; | 980 | unsigned long any = 0; |
967 | 981 | ||
968 | r = -EINVAL; | 982 | r = -EINVAL; |
969 | if (log->slot >= KVM_USER_MEM_SLOTS) | 983 | as_id = log->slot >> 16; |
984 | id = (u16)log->slot; | ||
985 | if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) | ||
970 | goto out; | 986 | goto out; |
971 | 987 | ||
972 | memslot = id_to_memslot(kvm->memslots, log->slot); | 988 | slots = __kvm_memslots(kvm, as_id); |
989 | memslot = id_to_memslot(slots, id); | ||
973 | r = -ENOENT; | 990 | r = -ENOENT; |
974 | if (!memslot->dirty_bitmap) | 991 | if (!memslot->dirty_bitmap) |
975 | goto out; | 992 | goto out; |
@@ -1018,17 +1035,21 @@ EXPORT_SYMBOL_GPL(kvm_get_dirty_log); | |||
1018 | int kvm_get_dirty_log_protect(struct kvm *kvm, | 1035 | int kvm_get_dirty_log_protect(struct kvm *kvm, |
1019 | struct kvm_dirty_log *log, bool *is_dirty) | 1036 | struct kvm_dirty_log *log, bool *is_dirty) |
1020 | { | 1037 | { |
1038 | struct kvm_memslots *slots; | ||
1021 | struct kvm_memory_slot *memslot; | 1039 | struct kvm_memory_slot *memslot; |
1022 | int r, i; | 1040 | int r, i, as_id, id; |
1023 | unsigned long n; | 1041 | unsigned long n; |
1024 | unsigned long *dirty_bitmap; | 1042 | unsigned long *dirty_bitmap; |
1025 | unsigned long *dirty_bitmap_buffer; | 1043 | unsigned long *dirty_bitmap_buffer; |
1026 | 1044 | ||
1027 | r = -EINVAL; | 1045 | r = -EINVAL; |
1028 | if (log->slot >= KVM_USER_MEM_SLOTS) | 1046 | as_id = log->slot >> 16; |
1047 | id = (u16)log->slot; | ||
1048 | if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) | ||
1029 | goto out; | 1049 | goto out; |
1030 | 1050 | ||
1031 | memslot = id_to_memslot(kvm->memslots, log->slot); | 1051 | slots = __kvm_memslots(kvm, as_id); |
1052 | memslot = id_to_memslot(slots, id); | ||
1032 | 1053 | ||
1033 | dirty_bitmap = memslot->dirty_bitmap; | 1054 | dirty_bitmap = memslot->dirty_bitmap; |
1034 | r = -ENOENT; | 1055 | r = -ENOENT; |
@@ -1091,6 +1112,11 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | |||
1091 | } | 1112 | } |
1092 | EXPORT_SYMBOL_GPL(gfn_to_memslot); | 1113 | EXPORT_SYMBOL_GPL(gfn_to_memslot); |
1093 | 1114 | ||
1115 | struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
1116 | { | ||
1117 | return __gfn_to_memslot(kvm_vcpu_memslots(vcpu), gfn); | ||
1118 | } | ||
1119 | |||
1094 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) | 1120 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) |
1095 | { | 1121 | { |
1096 | struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn); | 1122 | struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn); |
@@ -1166,6 +1192,12 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | |||
1166 | } | 1192 | } |
1167 | EXPORT_SYMBOL_GPL(gfn_to_hva); | 1193 | EXPORT_SYMBOL_GPL(gfn_to_hva); |
1168 | 1194 | ||
1195 | unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
1196 | { | ||
1197 | return gfn_to_hva_many(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn, NULL); | ||
1198 | } | ||
1199 | EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_hva); | ||
1200 | |||
1169 | /* | 1201 | /* |
1170 | * If writable is set to false, the hva returned by this function is only | 1202 | * If writable is set to false, the hva returned by this function is only |
1171 | * allowed to be read. | 1203 | * allowed to be read. |
@@ -1188,6 +1220,13 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) | |||
1188 | return gfn_to_hva_memslot_prot(slot, gfn, writable); | 1220 | return gfn_to_hva_memslot_prot(slot, gfn, writable); |
1189 | } | 1221 | } |
1190 | 1222 | ||
1223 | unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable) | ||
1224 | { | ||
1225 | struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); | ||
1226 | |||
1227 | return gfn_to_hva_memslot_prot(slot, gfn, writable); | ||
1228 | } | ||
1229 | |||
1191 | static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, | 1230 | static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, |
1192 | unsigned long start, int write, struct page **page) | 1231 | unsigned long start, int write, struct page **page) |
1193 | { | 1232 | { |
@@ -1355,9 +1394,8 @@ exit: | |||
1355 | return pfn; | 1394 | return pfn; |
1356 | } | 1395 | } |
1357 | 1396 | ||
1358 | static pfn_t | 1397 | pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic, |
1359 | __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic, | 1398 | bool *async, bool write_fault, bool *writable) |
1360 | bool *async, bool write_fault, bool *writable) | ||
1361 | { | 1399 | { |
1362 | unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault); | 1400 | unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault); |
1363 | 1401 | ||
@@ -1376,65 +1414,59 @@ __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic, | |||
1376 | return hva_to_pfn(addr, atomic, async, write_fault, | 1414 | return hva_to_pfn(addr, atomic, async, write_fault, |
1377 | writable); | 1415 | writable); |
1378 | } | 1416 | } |
1417 | EXPORT_SYMBOL_GPL(__gfn_to_pfn_memslot); | ||
1379 | 1418 | ||
1380 | static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async, | 1419 | pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, |
1381 | bool write_fault, bool *writable) | 1420 | bool *writable) |
1382 | { | 1421 | { |
1383 | struct kvm_memory_slot *slot; | 1422 | return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn, false, NULL, |
1384 | 1423 | write_fault, writable); | |
1385 | if (async) | ||
1386 | *async = false; | ||
1387 | |||
1388 | slot = gfn_to_memslot(kvm, gfn); | ||
1389 | |||
1390 | return __gfn_to_pfn_memslot(slot, gfn, atomic, async, write_fault, | ||
1391 | writable); | ||
1392 | } | 1424 | } |
1425 | EXPORT_SYMBOL_GPL(gfn_to_pfn_prot); | ||
1393 | 1426 | ||
1394 | pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) | 1427 | pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn) |
1395 | { | 1428 | { |
1396 | return __gfn_to_pfn(kvm, gfn, true, NULL, true, NULL); | 1429 | return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL); |
1397 | } | 1430 | } |
1398 | EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic); | 1431 | EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot); |
1399 | 1432 | ||
1400 | pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async, | 1433 | pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn) |
1401 | bool write_fault, bool *writable) | ||
1402 | { | 1434 | { |
1403 | return __gfn_to_pfn(kvm, gfn, false, async, write_fault, writable); | 1435 | return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL); |
1404 | } | 1436 | } |
1405 | EXPORT_SYMBOL_GPL(gfn_to_pfn_async); | 1437 | EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic); |
1406 | 1438 | ||
1407 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) | 1439 | pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) |
1408 | { | 1440 | { |
1409 | return __gfn_to_pfn(kvm, gfn, false, NULL, true, NULL); | 1441 | return gfn_to_pfn_memslot_atomic(gfn_to_memslot(kvm, gfn), gfn); |
1410 | } | 1442 | } |
1411 | EXPORT_SYMBOL_GPL(gfn_to_pfn); | 1443 | EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic); |
1412 | 1444 | ||
1413 | pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, | 1445 | pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn) |
1414 | bool *writable) | ||
1415 | { | 1446 | { |
1416 | return __gfn_to_pfn(kvm, gfn, false, NULL, write_fault, writable); | 1447 | return gfn_to_pfn_memslot_atomic(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn); |
1417 | } | 1448 | } |
1418 | EXPORT_SYMBOL_GPL(gfn_to_pfn_prot); | 1449 | EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_pfn_atomic); |
1419 | 1450 | ||
1420 | pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn) | 1451 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) |
1421 | { | 1452 | { |
1422 | return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL); | 1453 | return gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn); |
1423 | } | 1454 | } |
1455 | EXPORT_SYMBOL_GPL(gfn_to_pfn); | ||
1424 | 1456 | ||
1425 | pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn) | 1457 | pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn) |
1426 | { | 1458 | { |
1427 | return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL); | 1459 | return gfn_to_pfn_memslot(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn); |
1428 | } | 1460 | } |
1429 | EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic); | 1461 | EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_pfn); |
1430 | 1462 | ||
1431 | int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, | 1463 | int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn, |
1432 | int nr_pages) | 1464 | struct page **pages, int nr_pages) |
1433 | { | 1465 | { |
1434 | unsigned long addr; | 1466 | unsigned long addr; |
1435 | gfn_t entry; | 1467 | gfn_t entry; |
1436 | 1468 | ||
1437 | addr = gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, &entry); | 1469 | addr = gfn_to_hva_many(slot, gfn, &entry); |
1438 | if (kvm_is_error_hva(addr)) | 1470 | if (kvm_is_error_hva(addr)) |
1439 | return -1; | 1471 | return -1; |
1440 | 1472 | ||
@@ -1468,6 +1500,16 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | |||
1468 | } | 1500 | } |
1469 | EXPORT_SYMBOL_GPL(gfn_to_page); | 1501 | EXPORT_SYMBOL_GPL(gfn_to_page); |
1470 | 1502 | ||
1503 | struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
1504 | { | ||
1505 | pfn_t pfn; | ||
1506 | |||
1507 | pfn = kvm_vcpu_gfn_to_pfn(vcpu, gfn); | ||
1508 | |||
1509 | return kvm_pfn_to_page(pfn); | ||
1510 | } | ||
1511 | EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_page); | ||
1512 | |||
1471 | void kvm_release_page_clean(struct page *page) | 1513 | void kvm_release_page_clean(struct page *page) |
1472 | { | 1514 | { |
1473 | WARN_ON(is_error_page(page)); | 1515 | WARN_ON(is_error_page(page)); |
@@ -1530,13 +1572,13 @@ static int next_segment(unsigned long len, int offset) | |||
1530 | return len; | 1572 | return len; |
1531 | } | 1573 | } |
1532 | 1574 | ||
1533 | int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, | 1575 | static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn, |
1534 | int len) | 1576 | void *data, int offset, int len) |
1535 | { | 1577 | { |
1536 | int r; | 1578 | int r; |
1537 | unsigned long addr; | 1579 | unsigned long addr; |
1538 | 1580 | ||
1539 | addr = gfn_to_hva_prot(kvm, gfn, NULL); | 1581 | addr = gfn_to_hva_memslot_prot(slot, gfn, NULL); |
1540 | if (kvm_is_error_hva(addr)) | 1582 | if (kvm_is_error_hva(addr)) |
1541 | return -EFAULT; | 1583 | return -EFAULT; |
1542 | r = __copy_from_user(data, (void __user *)addr + offset, len); | 1584 | r = __copy_from_user(data, (void __user *)addr + offset, len); |
@@ -1544,8 +1586,25 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, | |||
1544 | return -EFAULT; | 1586 | return -EFAULT; |
1545 | return 0; | 1587 | return 0; |
1546 | } | 1588 | } |
1589 | |||
1590 | int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, | ||
1591 | int len) | ||
1592 | { | ||
1593 | struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); | ||
1594 | |||
1595 | return __kvm_read_guest_page(slot, gfn, data, offset, len); | ||
1596 | } | ||
1547 | EXPORT_SYMBOL_GPL(kvm_read_guest_page); | 1597 | EXPORT_SYMBOL_GPL(kvm_read_guest_page); |
1548 | 1598 | ||
1599 | int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, | ||
1600 | int offset, int len) | ||
1601 | { | ||
1602 | struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); | ||
1603 | |||
1604 | return __kvm_read_guest_page(slot, gfn, data, offset, len); | ||
1605 | } | ||
1606 | EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_page); | ||
1607 | |||
1549 | int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len) | 1608 | int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len) |
1550 | { | 1609 | { |
1551 | gfn_t gfn = gpa >> PAGE_SHIFT; | 1610 | gfn_t gfn = gpa >> PAGE_SHIFT; |
@@ -1566,15 +1625,33 @@ int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len) | |||
1566 | } | 1625 | } |
1567 | EXPORT_SYMBOL_GPL(kvm_read_guest); | 1626 | EXPORT_SYMBOL_GPL(kvm_read_guest); |
1568 | 1627 | ||
1569 | int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, | 1628 | int kvm_vcpu_read_guest(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, unsigned long len) |
1570 | unsigned long len) | ||
1571 | { | 1629 | { |
1572 | int r; | ||
1573 | unsigned long addr; | ||
1574 | gfn_t gfn = gpa >> PAGE_SHIFT; | 1630 | gfn_t gfn = gpa >> PAGE_SHIFT; |
1631 | int seg; | ||
1575 | int offset = offset_in_page(gpa); | 1632 | int offset = offset_in_page(gpa); |
1633 | int ret; | ||
1634 | |||
1635 | while ((seg = next_segment(len, offset)) != 0) { | ||
1636 | ret = kvm_vcpu_read_guest_page(vcpu, gfn, data, offset, seg); | ||
1637 | if (ret < 0) | ||
1638 | return ret; | ||
1639 | offset = 0; | ||
1640 | len -= seg; | ||
1641 | data += seg; | ||
1642 | ++gfn; | ||
1643 | } | ||
1644 | return 0; | ||
1645 | } | ||
1646 | EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest); | ||
1576 | 1647 | ||
1577 | addr = gfn_to_hva_prot(kvm, gfn, NULL); | 1648 | static int __kvm_read_guest_atomic(struct kvm_memory_slot *slot, gfn_t gfn, |
1649 | void *data, int offset, unsigned long len) | ||
1650 | { | ||
1651 | int r; | ||
1652 | unsigned long addr; | ||
1653 | |||
1654 | addr = gfn_to_hva_memslot_prot(slot, gfn, NULL); | ||
1578 | if (kvm_is_error_hva(addr)) | 1655 | if (kvm_is_error_hva(addr)) |
1579 | return -EFAULT; | 1656 | return -EFAULT; |
1580 | pagefault_disable(); | 1657 | pagefault_disable(); |
@@ -1584,25 +1661,63 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, | |||
1584 | return -EFAULT; | 1661 | return -EFAULT; |
1585 | return 0; | 1662 | return 0; |
1586 | } | 1663 | } |
1587 | EXPORT_SYMBOL(kvm_read_guest_atomic); | ||
1588 | 1664 | ||
1589 | int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, | 1665 | int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, |
1590 | int offset, int len) | 1666 | unsigned long len) |
1667 | { | ||
1668 | gfn_t gfn = gpa >> PAGE_SHIFT; | ||
1669 | struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); | ||
1670 | int offset = offset_in_page(gpa); | ||
1671 | |||
1672 | return __kvm_read_guest_atomic(slot, gfn, data, offset, len); | ||
1673 | } | ||
1674 | EXPORT_SYMBOL_GPL(kvm_read_guest_atomic); | ||
1675 | |||
1676 | int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa, | ||
1677 | void *data, unsigned long len) | ||
1678 | { | ||
1679 | gfn_t gfn = gpa >> PAGE_SHIFT; | ||
1680 | struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); | ||
1681 | int offset = offset_in_page(gpa); | ||
1682 | |||
1683 | return __kvm_read_guest_atomic(slot, gfn, data, offset, len); | ||
1684 | } | ||
1685 | EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_atomic); | ||
1686 | |||
1687 | static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn, | ||
1688 | const void *data, int offset, int len) | ||
1591 | { | 1689 | { |
1592 | int r; | 1690 | int r; |
1593 | unsigned long addr; | 1691 | unsigned long addr; |
1594 | 1692 | ||
1595 | addr = gfn_to_hva(kvm, gfn); | 1693 | addr = gfn_to_hva_memslot(memslot, gfn); |
1596 | if (kvm_is_error_hva(addr)) | 1694 | if (kvm_is_error_hva(addr)) |
1597 | return -EFAULT; | 1695 | return -EFAULT; |
1598 | r = __copy_to_user((void __user *)addr + offset, data, len); | 1696 | r = __copy_to_user((void __user *)addr + offset, data, len); |
1599 | if (r) | 1697 | if (r) |
1600 | return -EFAULT; | 1698 | return -EFAULT; |
1601 | mark_page_dirty(kvm, gfn); | 1699 | mark_page_dirty_in_slot(memslot, gfn); |
1602 | return 0; | 1700 | return 0; |
1603 | } | 1701 | } |
1702 | |||
1703 | int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, | ||
1704 | const void *data, int offset, int len) | ||
1705 | { | ||
1706 | struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); | ||
1707 | |||
1708 | return __kvm_write_guest_page(slot, gfn, data, offset, len); | ||
1709 | } | ||
1604 | EXPORT_SYMBOL_GPL(kvm_write_guest_page); | 1710 | EXPORT_SYMBOL_GPL(kvm_write_guest_page); |
1605 | 1711 | ||
1712 | int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, | ||
1713 | const void *data, int offset, int len) | ||
1714 | { | ||
1715 | struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); | ||
1716 | |||
1717 | return __kvm_write_guest_page(slot, gfn, data, offset, len); | ||
1718 | } | ||
1719 | EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_page); | ||
1720 | |||
1606 | int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, | 1721 | int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, |
1607 | unsigned long len) | 1722 | unsigned long len) |
1608 | { | 1723 | { |
@@ -1624,6 +1739,27 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, | |||
1624 | } | 1739 | } |
1625 | EXPORT_SYMBOL_GPL(kvm_write_guest); | 1740 | EXPORT_SYMBOL_GPL(kvm_write_guest); |
1626 | 1741 | ||
1742 | int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, | ||
1743 | unsigned long len) | ||
1744 | { | ||
1745 | gfn_t gfn = gpa >> PAGE_SHIFT; | ||
1746 | int seg; | ||
1747 | int offset = offset_in_page(gpa); | ||
1748 | int ret; | ||
1749 | |||
1750 | while ((seg = next_segment(len, offset)) != 0) { | ||
1751 | ret = kvm_vcpu_write_guest_page(vcpu, gfn, data, offset, seg); | ||
1752 | if (ret < 0) | ||
1753 | return ret; | ||
1754 | offset = 0; | ||
1755 | len -= seg; | ||
1756 | data += seg; | ||
1757 | ++gfn; | ||
1758 | } | ||
1759 | return 0; | ||
1760 | } | ||
1761 | EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest); | ||
1762 | |||
1627 | int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | 1763 | int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, |
1628 | gpa_t gpa, unsigned long len) | 1764 | gpa_t gpa, unsigned long len) |
1629 | { | 1765 | { |
@@ -1681,7 +1817,7 @@ int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | |||
1681 | r = __copy_to_user((void __user *)ghc->hva, data, len); | 1817 | r = __copy_to_user((void __user *)ghc->hva, data, len); |
1682 | if (r) | 1818 | if (r) |
1683 | return -EFAULT; | 1819 | return -EFAULT; |
1684 | mark_page_dirty_in_slot(kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT); | 1820 | mark_page_dirty_in_slot(ghc->memslot, ghc->gpa >> PAGE_SHIFT); |
1685 | 1821 | ||
1686 | return 0; | 1822 | return 0; |
1687 | } | 1823 | } |
@@ -1739,8 +1875,7 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) | |||
1739 | } | 1875 | } |
1740 | EXPORT_SYMBOL_GPL(kvm_clear_guest); | 1876 | EXPORT_SYMBOL_GPL(kvm_clear_guest); |
1741 | 1877 | ||
1742 | static void mark_page_dirty_in_slot(struct kvm *kvm, | 1878 | static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, |
1743 | struct kvm_memory_slot *memslot, | ||
1744 | gfn_t gfn) | 1879 | gfn_t gfn) |
1745 | { | 1880 | { |
1746 | if (memslot && memslot->dirty_bitmap) { | 1881 | if (memslot && memslot->dirty_bitmap) { |
@@ -1755,10 +1890,19 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) | |||
1755 | struct kvm_memory_slot *memslot; | 1890 | struct kvm_memory_slot *memslot; |
1756 | 1891 | ||
1757 | memslot = gfn_to_memslot(kvm, gfn); | 1892 | memslot = gfn_to_memslot(kvm, gfn); |
1758 | mark_page_dirty_in_slot(kvm, memslot, gfn); | 1893 | mark_page_dirty_in_slot(memslot, gfn); |
1759 | } | 1894 | } |
1760 | EXPORT_SYMBOL_GPL(mark_page_dirty); | 1895 | EXPORT_SYMBOL_GPL(mark_page_dirty); |
1761 | 1896 | ||
1897 | void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
1898 | { | ||
1899 | struct kvm_memory_slot *memslot; | ||
1900 | |||
1901 | memslot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); | ||
1902 | mark_page_dirty_in_slot(memslot, gfn); | ||
1903 | } | ||
1904 | EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty); | ||
1905 | |||
1762 | static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu) | 1906 | static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu) |
1763 | { | 1907 | { |
1764 | if (kvm_arch_vcpu_runnable(vcpu)) { | 1908 | if (kvm_arch_vcpu_runnable(vcpu)) { |
@@ -2488,6 +2632,10 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) | |||
2488 | case KVM_CAP_IRQ_ROUTING: | 2632 | case KVM_CAP_IRQ_ROUTING: |
2489 | return KVM_MAX_IRQ_ROUTES; | 2633 | return KVM_MAX_IRQ_ROUTES; |
2490 | #endif | 2634 | #endif |
2635 | #if KVM_ADDRESS_SPACE_NUM > 1 | ||
2636 | case KVM_CAP_MULTI_ADDRESS_SPACE: | ||
2637 | return KVM_ADDRESS_SPACE_NUM; | ||
2638 | #endif | ||
2491 | default: | 2639 | default: |
2492 | break; | 2640 | break; |
2493 | } | 2641 | } |
@@ -2882,18 +3030,12 @@ static int hardware_enable_all(void) | |||
2882 | static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, | 3030 | static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, |
2883 | void *v) | 3031 | void *v) |
2884 | { | 3032 | { |
2885 | int cpu = (long)v; | ||
2886 | |||
2887 | val &= ~CPU_TASKS_FROZEN; | 3033 | val &= ~CPU_TASKS_FROZEN; |
2888 | switch (val) { | 3034 | switch (val) { |
2889 | case CPU_DYING: | 3035 | case CPU_DYING: |
2890 | pr_info("kvm: disabling virtualization on CPU%d\n", | ||
2891 | cpu); | ||
2892 | hardware_disable(); | 3036 | hardware_disable(); |
2893 | break; | 3037 | break; |
2894 | case CPU_STARTING: | 3038 | case CPU_STARTING: |
2895 | pr_info("kvm: enabling virtualization on CPU%d\n", | ||
2896 | cpu); | ||
2897 | hardware_enable(); | 3039 | hardware_enable(); |
2898 | break; | 3040 | break; |
2899 | } | 3041 | } |