diff options
81 files changed, 7826 insertions, 2811 deletions
diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index c6416a398163..a237518e51b9 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt | |||
| @@ -656,6 +656,7 @@ struct kvm_clock_data { | |||
| 656 | 4.29 KVM_GET_VCPU_EVENTS | 656 | 4.29 KVM_GET_VCPU_EVENTS |
| 657 | 657 | ||
| 658 | Capability: KVM_CAP_VCPU_EVENTS | 658 | Capability: KVM_CAP_VCPU_EVENTS |
| 659 | Extended by: KVM_CAP_INTR_SHADOW | ||
| 659 | Architectures: x86 | 660 | Architectures: x86 |
| 660 | Type: vm ioctl | 661 | Type: vm ioctl |
| 661 | Parameters: struct kvm_vcpu_event (out) | 662 | Parameters: struct kvm_vcpu_event (out) |
| @@ -676,7 +677,7 @@ struct kvm_vcpu_events { | |||
| 676 | __u8 injected; | 677 | __u8 injected; |
| 677 | __u8 nr; | 678 | __u8 nr; |
| 678 | __u8 soft; | 679 | __u8 soft; |
| 679 | __u8 pad; | 680 | __u8 shadow; |
| 680 | } interrupt; | 681 | } interrupt; |
| 681 | struct { | 682 | struct { |
| 682 | __u8 injected; | 683 | __u8 injected; |
| @@ -688,9 +689,13 @@ struct kvm_vcpu_events { | |||
| 688 | __u32 flags; | 689 | __u32 flags; |
| 689 | }; | 690 | }; |
| 690 | 691 | ||
| 692 | KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that | ||
| 693 | interrupt.shadow contains a valid state. Otherwise, this field is undefined. | ||
| 694 | |||
| 691 | 4.30 KVM_SET_VCPU_EVENTS | 695 | 4.30 KVM_SET_VCPU_EVENTS |
| 692 | 696 | ||
| 693 | Capability: KVM_CAP_VCPU_EVENTS | 697 | Capability: KVM_CAP_VCPU_EVENTS |
| 698 | Extended by: KVM_CAP_INTR_SHADOW | ||
| 694 | Architectures: x86 | 699 | Architectures: x86 |
| 695 | Type: vm ioctl | 700 | Type: vm ioctl |
| 696 | Parameters: struct kvm_vcpu_event (in) | 701 | Parameters: struct kvm_vcpu_event (in) |
| @@ -709,6 +714,183 @@ current in-kernel state. The bits are: | |||
| 709 | KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel | 714 | KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel |
| 710 | KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector | 715 | KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector |
| 711 | 716 | ||
| 717 | If KVM_CAP_INTR_SHADOW is available, KVM_VCPUEVENT_VALID_SHADOW can be set in | ||
| 718 | the flags field to signal that interrupt.shadow contains a valid state and | ||
| 719 | shall be written into the VCPU. | ||
| 720 | |||
| 721 | 4.32 KVM_GET_DEBUGREGS | ||
| 722 | |||
| 723 | Capability: KVM_CAP_DEBUGREGS | ||
| 724 | Architectures: x86 | ||
| 725 | Type: vm ioctl | ||
| 726 | Parameters: struct kvm_debugregs (out) | ||
| 727 | Returns: 0 on success, -1 on error | ||
| 728 | |||
| 729 | Reads debug registers from the vcpu. | ||
| 730 | |||
| 731 | struct kvm_debugregs { | ||
| 732 | __u64 db[4]; | ||
| 733 | __u64 dr6; | ||
| 734 | __u64 dr7; | ||
| 735 | __u64 flags; | ||
| 736 | __u64 reserved[9]; | ||
| 737 | }; | ||
| 738 | |||
| 739 | 4.33 KVM_SET_DEBUGREGS | ||
| 740 | |||
| 741 | Capability: KVM_CAP_DEBUGREGS | ||
| 742 | Architectures: x86 | ||
| 743 | Type: vm ioctl | ||
| 744 | Parameters: struct kvm_debugregs (in) | ||
| 745 | Returns: 0 on success, -1 on error | ||
| 746 | |||
| 747 | Writes debug registers into the vcpu. | ||
| 748 | |||
| 749 | See KVM_GET_DEBUGREGS for the data structure. The flags field is unused | ||
| 750 | yet and must be cleared on entry. | ||
| 751 | |||
| 752 | 4.34 KVM_SET_USER_MEMORY_REGION | ||
| 753 | |||
| 754 | Capability: KVM_CAP_USER_MEM | ||
| 755 | Architectures: all | ||
| 756 | Type: vm ioctl | ||
| 757 | Parameters: struct kvm_userspace_memory_region (in) | ||
| 758 | Returns: 0 on success, -1 on error | ||
| 759 | |||
| 760 | struct kvm_userspace_memory_region { | ||
| 761 | __u32 slot; | ||
| 762 | __u32 flags; | ||
| 763 | __u64 guest_phys_addr; | ||
| 764 | __u64 memory_size; /* bytes */ | ||
| 765 | __u64 userspace_addr; /* start of the userspace allocated memory */ | ||
| 766 | }; | ||
| 767 | |||
| 768 | /* for kvm_memory_region::flags */ | ||
| 769 | #define KVM_MEM_LOG_DIRTY_PAGES 1UL | ||
| 770 | |||
| 771 | This ioctl allows the user to create or modify a guest physical memory | ||
| 772 | slot. When changing an existing slot, it may be moved in the guest | ||
| 773 | physical memory space, or its flags may be modified. It may not be | ||
| 774 | resized. Slots may not overlap in guest physical address space. | ||
| 775 | |||
| 776 | Memory for the region is taken starting at the address denoted by the | ||
| 777 | field userspace_addr, which must point at user addressable memory for | ||
| 778 | the entire memory slot size. Any object may back this memory, including | ||
| 779 | anonymous memory, ordinary files, and hugetlbfs. | ||
| 780 | |||
| 781 | It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr | ||
| 782 | be identical. This allows large pages in the guest to be backed by large | ||
| 783 | pages in the host. | ||
| 784 | |||
| 785 | The flags field supports just one flag, KVM_MEM_LOG_DIRTY_PAGES, which | ||
| 786 | instructs kvm to keep track of writes to memory within the slot. See | ||
| 787 | the KVM_GET_DIRTY_LOG ioctl. | ||
| 788 | |||
| 789 | When the KVM_CAP_SYNC_MMU capability, changes in the backing of the memory | ||
| 790 | region are automatically reflected into the guest. For example, an mmap() | ||
| 791 | that affects the region will be made visible immediately. Another example | ||
| 792 | is madvise(MADV_DROP). | ||
| 793 | |||
| 794 | It is recommended to use this API instead of the KVM_SET_MEMORY_REGION ioctl. | ||
| 795 | The KVM_SET_MEMORY_REGION does not allow fine grained control over memory | ||
| 796 | allocation and is deprecated. | ||
| 797 | |||
| 798 | 4.35 KVM_SET_TSS_ADDR | ||
| 799 | |||
| 800 | Capability: KVM_CAP_SET_TSS_ADDR | ||
| 801 | Architectures: x86 | ||
| 802 | Type: vm ioctl | ||
| 803 | Parameters: unsigned long tss_address (in) | ||
| 804 | Returns: 0 on success, -1 on error | ||
| 805 | |||
| 806 | This ioctl defines the physical address of a three-page region in the guest | ||
| 807 | physical address space. The region must be within the first 4GB of the | ||
| 808 | guest physical address space and must not conflict with any memory slot | ||
| 809 | or any mmio address. The guest may malfunction if it accesses this memory | ||
| 810 | region. | ||
| 811 | |||
| 812 | This ioctl is required on Intel-based hosts. This is needed on Intel hardware | ||
| 813 | because of a quirk in the virtualization implementation (see the internals | ||
| 814 | documentation when it pops into existence). | ||
| 815 | |||
| 816 | 4.36 KVM_ENABLE_CAP | ||
| 817 | |||
| 818 | Capability: KVM_CAP_ENABLE_CAP | ||
| 819 | Architectures: ppc | ||
| 820 | Type: vcpu ioctl | ||
| 821 | Parameters: struct kvm_enable_cap (in) | ||
| 822 | Returns: 0 on success; -1 on error | ||
| 823 | |||
| 824 | +Not all extensions are enabled by default. Using this ioctl the application | ||
| 825 | can enable an extension, making it available to the guest. | ||
| 826 | |||
| 827 | On systems that do not support this ioctl, it always fails. On systems that | ||
| 828 | do support it, it only works for extensions that are supported for enablement. | ||
| 829 | |||
| 830 | To check if a capability can be enabled, the KVM_CHECK_EXTENSION ioctl should | ||
| 831 | be used. | ||
| 832 | |||
| 833 | struct kvm_enable_cap { | ||
| 834 | /* in */ | ||
| 835 | __u32 cap; | ||
| 836 | |||
| 837 | The capability that is supposed to get enabled. | ||
| 838 | |||
| 839 | __u32 flags; | ||
| 840 | |||
| 841 | A bitfield indicating future enhancements. Has to be 0 for now. | ||
| 842 | |||
| 843 | __u64 args[4]; | ||
| 844 | |||
| 845 | Arguments for enabling a feature. If a feature needs initial values to | ||
| 846 | function properly, this is the place to put them. | ||
| 847 | |||
| 848 | __u8 pad[64]; | ||
| 849 | }; | ||
| 850 | |||
| 851 | 4.37 KVM_GET_MP_STATE | ||
| 852 | |||
| 853 | Capability: KVM_CAP_MP_STATE | ||
| 854 | Architectures: x86, ia64 | ||
| 855 | Type: vcpu ioctl | ||
| 856 | Parameters: struct kvm_mp_state (out) | ||
| 857 | Returns: 0 on success; -1 on error | ||
| 858 | |||
| 859 | struct kvm_mp_state { | ||
| 860 | __u32 mp_state; | ||
| 861 | }; | ||
| 862 | |||
| 863 | Returns the vcpu's current "multiprocessing state" (though also valid on | ||
| 864 | uniprocessor guests). | ||
| 865 | |||
| 866 | Possible values are: | ||
| 867 | |||
| 868 | - KVM_MP_STATE_RUNNABLE: the vcpu is currently running | ||
| 869 | - KVM_MP_STATE_UNINITIALIZED: the vcpu is an application processor (AP) | ||
| 870 | which has not yet received an INIT signal | ||
| 871 | - KVM_MP_STATE_INIT_RECEIVED: the vcpu has received an INIT signal, and is | ||
| 872 | now ready for a SIPI | ||
| 873 | - KVM_MP_STATE_HALTED: the vcpu has executed a HLT instruction and | ||
| 874 | is waiting for an interrupt | ||
| 875 | - KVM_MP_STATE_SIPI_RECEIVED: the vcpu has just received a SIPI (vector | ||
| 876 | accesible via KVM_GET_VCPU_EVENTS) | ||
| 877 | |||
| 878 | This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel | ||
| 879 | irqchip, the multiprocessing state must be maintained by userspace. | ||
| 880 | |||
| 881 | 4.38 KVM_SET_MP_STATE | ||
| 882 | |||
| 883 | Capability: KVM_CAP_MP_STATE | ||
| 884 | Architectures: x86, ia64 | ||
| 885 | Type: vcpu ioctl | ||
| 886 | Parameters: struct kvm_mp_state (in) | ||
| 887 | Returns: 0 on success; -1 on error | ||
| 888 | |||
| 889 | Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for | ||
| 890 | arguments. | ||
| 891 | |||
| 892 | This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel | ||
| 893 | irqchip, the multiprocessing state must be maintained by userspace. | ||
| 712 | 894 | ||
| 713 | 5. The kvm_run structure | 895 | 5. The kvm_run structure |
| 714 | 896 | ||
| @@ -820,6 +1002,13 @@ executed a memory-mapped I/O instruction which could not be satisfied | |||
| 820 | by kvm. The 'data' member contains the written data if 'is_write' is | 1002 | by kvm. The 'data' member contains the written data if 'is_write' is |
| 821 | true, and should be filled by application code otherwise. | 1003 | true, and should be filled by application code otherwise. |
| 822 | 1004 | ||
| 1005 | NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO and KVM_EXIT_OSI, the corresponding | ||
| 1006 | operations are complete (and guest state is consistent) only after userspace | ||
| 1007 | has re-entered the kernel with KVM_RUN. The kernel side will first finish | ||
| 1008 | incomplete operations and then check for pending signals. Userspace | ||
| 1009 | can re-enter the guest with an unmasked signal pending to complete | ||
| 1010 | pending operations. | ||
| 1011 | |||
| 823 | /* KVM_EXIT_HYPERCALL */ | 1012 | /* KVM_EXIT_HYPERCALL */ |
| 824 | struct { | 1013 | struct { |
| 825 | __u64 nr; | 1014 | __u64 nr; |
| @@ -829,7 +1018,9 @@ true, and should be filled by application code otherwise. | |||
| 829 | __u32 pad; | 1018 | __u32 pad; |
| 830 | } hypercall; | 1019 | } hypercall; |
| 831 | 1020 | ||
| 832 | Unused. | 1021 | Unused. This was once used for 'hypercall to userspace'. To implement |
| 1022 | such functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390). | ||
| 1023 | Note KVM_EXIT_IO is significantly faster than KVM_EXIT_MMIO. | ||
| 833 | 1024 | ||
| 834 | /* KVM_EXIT_TPR_ACCESS */ | 1025 | /* KVM_EXIT_TPR_ACCESS */ |
| 835 | struct { | 1026 | struct { |
| @@ -870,6 +1061,19 @@ s390 specific. | |||
| 870 | 1061 | ||
| 871 | powerpc specific. | 1062 | powerpc specific. |
| 872 | 1063 | ||
| 1064 | /* KVM_EXIT_OSI */ | ||
| 1065 | struct { | ||
| 1066 | __u64 gprs[32]; | ||
| 1067 | } osi; | ||
| 1068 | |||
| 1069 | MOL uses a special hypercall interface it calls 'OSI'. To enable it, we catch | ||
| 1070 | hypercalls and exit with this exit struct that contains all the guest gprs. | ||
| 1071 | |||
| 1072 | If exit_reason is KVM_EXIT_OSI, then the vcpu has triggered such a hypercall. | ||
| 1073 | Userspace can now handle the hypercall and when it's done modify the gprs as | ||
| 1074 | necessary. Upon guest entry all guest GPRs will then be replaced by the values | ||
| 1075 | in this struct. | ||
| 1076 | |||
| 873 | /* Fix the size of the union. */ | 1077 | /* Fix the size of the union. */ |
| 874 | char padding[256]; | 1078 | char padding[256]; |
| 875 | }; | 1079 | }; |
diff --git a/Documentation/kvm/cpuid.txt b/Documentation/kvm/cpuid.txt new file mode 100644 index 000000000000..14a12ea92b7f --- /dev/null +++ b/Documentation/kvm/cpuid.txt | |||
| @@ -0,0 +1,42 @@ | |||
| 1 | KVM CPUID bits | ||
| 2 | Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010 | ||
| 3 | ===================================================== | ||
| 4 | |||
| 5 | A guest running on a kvm host, can check some of its features using | ||
| 6 | cpuid. This is not always guaranteed to work, since userspace can | ||
| 7 | mask-out some, or even all KVM-related cpuid features before launching | ||
| 8 | a guest. | ||
| 9 | |||
| 10 | KVM cpuid functions are: | ||
| 11 | |||
| 12 | function: KVM_CPUID_SIGNATURE (0x40000000) | ||
| 13 | returns : eax = 0, | ||
| 14 | ebx = 0x4b4d564b, | ||
| 15 | ecx = 0x564b4d56, | ||
| 16 | edx = 0x4d. | ||
| 17 | Note that this value in ebx, ecx and edx corresponds to the string "KVMKVMKVM". | ||
| 18 | This function queries the presence of KVM cpuid leafs. | ||
| 19 | |||
| 20 | |||
| 21 | function: define KVM_CPUID_FEATURES (0x40000001) | ||
| 22 | returns : ebx, ecx, edx = 0 | ||
| 23 | eax = and OR'ed group of (1 << flag), where each flags is: | ||
| 24 | |||
| 25 | |||
| 26 | flag || value || meaning | ||
| 27 | ============================================================================= | ||
| 28 | KVM_FEATURE_CLOCKSOURCE || 0 || kvmclock available at msrs | ||
| 29 | || || 0x11 and 0x12. | ||
| 30 | ------------------------------------------------------------------------------ | ||
| 31 | KVM_FEATURE_NOP_IO_DELAY || 1 || not necessary to perform delays | ||
| 32 | || || on PIO operations. | ||
| 33 | ------------------------------------------------------------------------------ | ||
| 34 | KVM_FEATURE_MMU_OP || 2 || deprecated. | ||
| 35 | ------------------------------------------------------------------------------ | ||
| 36 | KVM_FEATURE_CLOCKSOURCE2 || 3 || kvmclock available at msrs | ||
| 37 | || || 0x4b564d00 and 0x4b564d01 | ||
| 38 | ------------------------------------------------------------------------------ | ||
| 39 | KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side | ||
| 40 | || || per-cpu warps are expected in | ||
| 41 | || || kvmclock. | ||
| 42 | ------------------------------------------------------------------------------ | ||
diff --git a/Documentation/kvm/mmu.txt b/Documentation/kvm/mmu.txt new file mode 100644 index 000000000000..aaed6ab9d7ab --- /dev/null +++ b/Documentation/kvm/mmu.txt | |||
| @@ -0,0 +1,304 @@ | |||
| 1 | The x86 kvm shadow mmu | ||
| 2 | ====================== | ||
| 3 | |||
| 4 | The mmu (in arch/x86/kvm, files mmu.[ch] and paging_tmpl.h) is responsible | ||
| 5 | for presenting a standard x86 mmu to the guest, while translating guest | ||
| 6 | physical addresses to host physical addresses. | ||
| 7 | |||
| 8 | The mmu code attempts to satisfy the following requirements: | ||
| 9 | |||
| 10 | - correctness: the guest should not be able to determine that it is running | ||
| 11 | on an emulated mmu except for timing (we attempt to comply | ||
| 12 | with the specification, not emulate the characteristics of | ||
| 13 | a particular implementation such as tlb size) | ||
| 14 | - security: the guest must not be able to touch host memory not assigned | ||
| 15 | to it | ||
| 16 | - performance: minimize the performance penalty imposed by the mmu | ||
| 17 | - scaling: need to scale to large memory and large vcpu guests | ||
| 18 | - hardware: support the full range of x86 virtualization hardware | ||
| 19 | - integration: Linux memory management code must be in control of guest memory | ||
| 20 | so that swapping, page migration, page merging, transparent | ||
| 21 | hugepages, and similar features work without change | ||
| 22 | - dirty tracking: report writes to guest memory to enable live migration | ||
| 23 | and framebuffer-based displays | ||
| 24 | - footprint: keep the amount of pinned kernel memory low (most memory | ||
| 25 | should be shrinkable) | ||
| 26 | - reliablity: avoid multipage or GFP_ATOMIC allocations | ||
| 27 | |||
| 28 | Acronyms | ||
| 29 | ======== | ||
| 30 | |||
| 31 | pfn host page frame number | ||
| 32 | hpa host physical address | ||
| 33 | hva host virtual address | ||
| 34 | gfn guest frame number | ||
| 35 | gpa guest physical address | ||
| 36 | gva guest virtual address | ||
| 37 | ngpa nested guest physical address | ||
| 38 | ngva nested guest virtual address | ||
| 39 | pte page table entry (used also to refer generically to paging structure | ||
| 40 | entries) | ||
| 41 | gpte guest pte (referring to gfns) | ||
| 42 | spte shadow pte (referring to pfns) | ||
| 43 | tdp two dimensional paging (vendor neutral term for NPT and EPT) | ||
| 44 | |||
| 45 | Virtual and real hardware supported | ||
| 46 | =================================== | ||
| 47 | |||
| 48 | The mmu supports first-generation mmu hardware, which allows an atomic switch | ||
| 49 | of the current paging mode and cr3 during guest entry, as well as | ||
| 50 | two-dimensional paging (AMD's NPT and Intel's EPT). The emulated hardware | ||
| 51 | it exposes is the traditional 2/3/4 level x86 mmu, with support for global | ||
| 52 | pages, pae, pse, pse36, cr0.wp, and 1GB pages. Work is in progress to support | ||
| 53 | exposing NPT capable hardware on NPT capable hosts. | ||
| 54 | |||
| 55 | Translation | ||
| 56 | =========== | ||
| 57 | |||
| 58 | The primary job of the mmu is to program the processor's mmu to translate | ||
| 59 | addresses for the guest. Different translations are required at different | ||
| 60 | times: | ||
| 61 | |||
| 62 | - when guest paging is disabled, we translate guest physical addresses to | ||
| 63 | host physical addresses (gpa->hpa) | ||
| 64 | - when guest paging is enabled, we translate guest virtual addresses, to | ||
| 65 | guest physical addresses, to host physical addresses (gva->gpa->hpa) | ||
| 66 | - when the guest launches a guest of its own, we translate nested guest | ||
| 67 | virtual addresses, to nested guest physical addresses, to guest physical | ||
| 68 | addresses, to host physical addresses (ngva->ngpa->gpa->hpa) | ||
| 69 | |||
| 70 | The primary challenge is to encode between 1 and 3 translations into hardware | ||
| 71 | that support only 1 (traditional) and 2 (tdp) translations. When the | ||
| 72 | number of required translations matches the hardware, the mmu operates in | ||
| 73 | direct mode; otherwise it operates in shadow mode (see below). | ||
| 74 | |||
| 75 | Memory | ||
| 76 | ====== | ||
| 77 | |||
| 78 | Guest memory (gpa) is part of the user address space of the process that is | ||
| 79 | using kvm. Userspace defines the translation between guest addresses and user | ||
| 80 | addresses (gpa->hva); note that two gpas may alias to the same gva, but not | ||
| 81 | vice versa. | ||
| 82 | |||
| 83 | These gvas may be backed using any method available to the host: anonymous | ||
| 84 | memory, file backed memory, and device memory. Memory might be paged by the | ||
| 85 | host at any time. | ||
| 86 | |||
| 87 | Events | ||
| 88 | ====== | ||
| 89 | |||
| 90 | The mmu is driven by events, some from the guest, some from the host. | ||
| 91 | |||
| 92 | Guest generated events: | ||
| 93 | - writes to control registers (especially cr3) | ||
| 94 | - invlpg/invlpga instruction execution | ||
| 95 | - access to missing or protected translations | ||
| 96 | |||
| 97 | Host generated events: | ||
| 98 | - changes in the gpa->hpa translation (either through gpa->hva changes or | ||
| 99 | through hva->hpa changes) | ||
| 100 | - memory pressure (the shrinker) | ||
| 101 | |||
| 102 | Shadow pages | ||
| 103 | ============ | ||
| 104 | |||
| 105 | The principal data structure is the shadow page, 'struct kvm_mmu_page'. A | ||
| 106 | shadow page contains 512 sptes, which can be either leaf or nonleaf sptes. A | ||
| 107 | shadow page may contain a mix of leaf and nonleaf sptes. | ||
| 108 | |||
| 109 | A nonleaf spte allows the hardware mmu to reach the leaf pages and | ||
| 110 | is not related to a translation directly. It points to other shadow pages. | ||
| 111 | |||
| 112 | A leaf spte corresponds to either one or two translations encoded into | ||
| 113 | one paging structure entry. These are always the lowest level of the | ||
| 114 | translation stack, with optional higher level translations left to NPT/EPT. | ||
| 115 | Leaf ptes point at guest pages. | ||
| 116 | |||
| 117 | The following table shows translations encoded by leaf ptes, with higher-level | ||
| 118 | translations in parentheses: | ||
| 119 | |||
| 120 | Non-nested guests: | ||
| 121 | nonpaging: gpa->hpa | ||
| 122 | paging: gva->gpa->hpa | ||
| 123 | paging, tdp: (gva->)gpa->hpa | ||
| 124 | Nested guests: | ||
| 125 | non-tdp: ngva->gpa->hpa (*) | ||
| 126 | tdp: (ngva->)ngpa->gpa->hpa | ||
| 127 | |||
| 128 | (*) the guest hypervisor will encode the ngva->gpa translation into its page | ||
| 129 | tables if npt is not present | ||
| 130 | |||
| 131 | Shadow pages contain the following information: | ||
| 132 | role.level: | ||
| 133 | The level in the shadow paging hierarchy that this shadow page belongs to. | ||
| 134 | 1=4k sptes, 2=2M sptes, 3=1G sptes, etc. | ||
| 135 | role.direct: | ||
| 136 | If set, leaf sptes reachable from this page are for a linear range. | ||
| 137 | Examples include real mode translation, large guest pages backed by small | ||
| 138 | host pages, and gpa->hpa translations when NPT or EPT is active. | ||
| 139 | The linear range starts at (gfn << PAGE_SHIFT) and its size is determined | ||
| 140 | by role.level (2MB for first level, 1GB for second level, 0.5TB for third | ||
| 141 | level, 256TB for fourth level) | ||
| 142 | If clear, this page corresponds to a guest page table denoted by the gfn | ||
| 143 | field. | ||
| 144 | role.quadrant: | ||
| 145 | When role.cr4_pae=0, the guest uses 32-bit gptes while the host uses 64-bit | ||
| 146 | sptes. That means a guest page table contains more ptes than the host, | ||
| 147 | so multiple shadow pages are needed to shadow one guest page. | ||
| 148 | For first-level shadow pages, role.quadrant can be 0 or 1 and denotes the | ||
| 149 | first or second 512-gpte block in the guest page table. For second-level | ||
| 150 | page tables, each 32-bit gpte is converted to two 64-bit sptes | ||
| 151 | (since each first-level guest page is shadowed by two first-level | ||
| 152 | shadow pages) so role.quadrant takes values in the range 0..3. Each | ||
| 153 | quadrant maps 1GB virtual address space. | ||
| 154 | role.access: | ||
| 155 | Inherited guest access permissions in the form uwx. Note execute | ||
| 156 | permission is positive, not negative. | ||
| 157 | role.invalid: | ||
| 158 | The page is invalid and should not be used. It is a root page that is | ||
| 159 | currently pinned (by a cpu hardware register pointing to it); once it is | ||
| 160 | unpinned it will be destroyed. | ||
| 161 | role.cr4_pae: | ||
| 162 | Contains the value of cr4.pae for which the page is valid (e.g. whether | ||
| 163 | 32-bit or 64-bit gptes are in use). | ||
| 164 | role.cr4_nxe: | ||
| 165 | Contains the value of efer.nxe for which the page is valid. | ||
| 166 | role.cr0_wp: | ||
| 167 | Contains the value of cr0.wp for which the page is valid. | ||
| 168 | gfn: | ||
| 169 | Either the guest page table containing the translations shadowed by this | ||
| 170 | page, or the base page frame for linear translations. See role.direct. | ||
| 171 | spt: | ||
| 172 | A pageful of 64-bit sptes containing the translations for this page. | ||
| 173 | Accessed by both kvm and hardware. | ||
| 174 | The page pointed to by spt will have its page->private pointing back | ||
| 175 | at the shadow page structure. | ||
| 176 | sptes in spt point either at guest pages, or at lower-level shadow pages. | ||
| 177 | Specifically, if sp1 and sp2 are shadow pages, then sp1->spt[n] may point | ||
| 178 | at __pa(sp2->spt). sp2 will point back at sp1 through parent_pte. | ||
| 179 | The spt array forms a DAG structure with the shadow page as a node, and | ||
| 180 | guest pages as leaves. | ||
| 181 | gfns: | ||
| 182 | An array of 512 guest frame numbers, one for each present pte. Used to | ||
| 183 | perform a reverse map from a pte to a gfn. | ||
| 184 | slot_bitmap: | ||
| 185 | A bitmap containing one bit per memory slot. If the page contains a pte | ||
| 186 | mapping a page from memory slot n, then bit n of slot_bitmap will be set | ||
| 187 | (if a page is aliased among several slots, then it is not guaranteed that | ||
| 188 | all slots will be marked). | ||
| 189 | Used during dirty logging to avoid scanning a shadow page if none if its | ||
| 190 | pages need tracking. | ||
| 191 | root_count: | ||
| 192 | A counter keeping track of how many hardware registers (guest cr3 or | ||
| 193 | pdptrs) are now pointing at the page. While this counter is nonzero, the | ||
| 194 | page cannot be destroyed. See role.invalid. | ||
| 195 | multimapped: | ||
| 196 | Whether there exist multiple sptes pointing at this page. | ||
| 197 | parent_pte/parent_ptes: | ||
| 198 | If multimapped is zero, parent_pte points at the single spte that points at | ||
| 199 | this page's spt. Otherwise, parent_ptes points at a data structure | ||
| 200 | with a list of parent_ptes. | ||
| 201 | unsync: | ||
| 202 | If true, then the translations in this page may not match the guest's | ||
| 203 | translation. This is equivalent to the state of the tlb when a pte is | ||
| 204 | changed but before the tlb entry is flushed. Accordingly, unsync ptes | ||
| 205 | are synchronized when the guest executes invlpg or flushes its tlb by | ||
| 206 | other means. Valid for leaf pages. | ||
| 207 | unsync_children: | ||
| 208 | How many sptes in the page point at pages that are unsync (or have | ||
| 209 | unsynchronized children). | ||
| 210 | unsync_child_bitmap: | ||
| 211 | A bitmap indicating which sptes in spt point (directly or indirectly) at | ||
| 212 | pages that may be unsynchronized. Used to quickly locate all unsychronized | ||
| 213 | pages reachable from a given page. | ||
| 214 | |||
| 215 | Reverse map | ||
| 216 | =========== | ||
| 217 | |||
| 218 | The mmu maintains a reverse mapping whereby all ptes mapping a page can be | ||
| 219 | reached given its gfn. This is used, for example, when swapping out a page. | ||
| 220 | |||
| 221 | Synchronized and unsynchronized pages | ||
| 222 | ===================================== | ||
| 223 | |||
| 224 | The guest uses two events to synchronize its tlb and page tables: tlb flushes | ||
| 225 | and page invalidations (invlpg). | ||
| 226 | |||
| 227 | A tlb flush means that we need to synchronize all sptes reachable from the | ||
| 228 | guest's cr3. This is expensive, so we keep all guest page tables write | ||
| 229 | protected, and synchronize sptes to gptes when a gpte is written. | ||
| 230 | |||
| 231 | A special case is when a guest page table is reachable from the current | ||
| 232 | guest cr3. In this case, the guest is obliged to issue an invlpg instruction | ||
| 233 | before using the translation. We take advantage of that by removing write | ||
| 234 | protection from the guest page, and allowing the guest to modify it freely. | ||
| 235 | We synchronize modified gptes when the guest invokes invlpg. This reduces | ||
| 236 | the amount of emulation we have to do when the guest modifies multiple gptes, | ||
| 237 | or when the a guest page is no longer used as a page table and is used for | ||
| 238 | random guest data. | ||
| 239 | |||
| 240 | As a side effect we have to resynchronize all reachable unsynchronized shadow | ||
| 241 | pages on a tlb flush. | ||
| 242 | |||
| 243 | |||
| 244 | Reaction to events | ||
| 245 | ================== | ||
| 246 | |||
| 247 | - guest page fault (or npt page fault, or ept violation) | ||
| 248 | |||
| 249 | This is the most complicated event. The cause of a page fault can be: | ||
| 250 | |||
| 251 | - a true guest fault (the guest translation won't allow the access) (*) | ||
| 252 | - access to a missing translation | ||
| 253 | - access to a protected translation | ||
| 254 | - when logging dirty pages, memory is write protected | ||
| 255 | - synchronized shadow pages are write protected (*) | ||
| 256 | - access to untranslatable memory (mmio) | ||
| 257 | |||
| 258 | (*) not applicable in direct mode | ||
| 259 | |||
| 260 | Handling a page fault is performed as follows: | ||
| 261 | |||
| 262 | - if needed, walk the guest page tables to determine the guest translation | ||
| 263 | (gva->gpa or ngpa->gpa) | ||
| 264 | - if permissions are insufficient, reflect the fault back to the guest | ||
| 265 | - determine the host page | ||
| 266 | - if this is an mmio request, there is no host page; call the emulator | ||
| 267 | to emulate the instruction instead | ||
| 268 | - walk the shadow page table to find the spte for the translation, | ||
| 269 | instantiating missing intermediate page tables as necessary | ||
| 270 | - try to unsynchronize the page | ||
| 271 | - if successful, we can let the guest continue and modify the gpte | ||
| 272 | - emulate the instruction | ||
| 273 | - if failed, unshadow the page and let the guest continue | ||
| 274 | - update any translations that were modified by the instruction | ||
| 275 | |||
| 276 | invlpg handling: | ||
| 277 | |||
| 278 | - walk the shadow page hierarchy and drop affected translations | ||
| 279 | - try to reinstantiate the indicated translation in the hope that the | ||
| 280 | guest will use it in the near future | ||
| 281 | |||
| 282 | Guest control register updates: | ||
| 283 | |||
| 284 | - mov to cr3 | ||
| 285 | - look up new shadow roots | ||
| 286 | - synchronize newly reachable shadow pages | ||
| 287 | |||
| 288 | - mov to cr0/cr4/efer | ||
| 289 | - set up mmu context for new paging mode | ||
| 290 | - look up new shadow roots | ||
| 291 | - synchronize newly reachable shadow pages | ||
| 292 | |||
| 293 | Host translation updates: | ||
| 294 | |||
| 295 | - mmu notifier called with updated hva | ||
| 296 | - look up affected sptes through reverse map | ||
| 297 | - drop (or update) translations | ||
| 298 | |||
| 299 | Further reading | ||
| 300 | =============== | ||
| 301 | |||
| 302 | - NPT presentation from KVM Forum 2008 | ||
| 303 | http://www.linux-kvm.org/wiki/images/c/c8/KvmForum2008%24kdf2008_21.pdf | ||
| 304 | |||
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 7f3c0a2e60cd..d5f4e9161201 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c | |||
| @@ -979,11 +979,13 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 979 | r = -EFAULT; | 979 | r = -EFAULT; |
| 980 | if (copy_from_user(&irq_event, argp, sizeof irq_event)) | 980 | if (copy_from_user(&irq_event, argp, sizeof irq_event)) |
| 981 | goto out; | 981 | goto out; |
| 982 | r = -ENXIO; | ||
| 982 | if (irqchip_in_kernel(kvm)) { | 983 | if (irqchip_in_kernel(kvm)) { |
| 983 | __s32 status; | 984 | __s32 status; |
| 984 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, | 985 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, |
| 985 | irq_event.irq, irq_event.level); | 986 | irq_event.irq, irq_event.level); |
| 986 | if (ioctl == KVM_IRQ_LINE_STATUS) { | 987 | if (ioctl == KVM_IRQ_LINE_STATUS) { |
| 988 | r = -EFAULT; | ||
| 987 | irq_event.status = status; | 989 | irq_event.status = status; |
| 988 | if (copy_to_user(argp, &irq_event, | 990 | if (copy_to_user(argp, &irq_event, |
| 989 | sizeof irq_event)) | 991 | sizeof irq_event)) |
| @@ -1379,7 +1381,7 @@ static void kvm_release_vm_pages(struct kvm *kvm) | |||
| 1379 | int i, j; | 1381 | int i, j; |
| 1380 | unsigned long base_gfn; | 1382 | unsigned long base_gfn; |
| 1381 | 1383 | ||
| 1382 | slots = rcu_dereference(kvm->memslots); | 1384 | slots = kvm_memslots(kvm); |
| 1383 | for (i = 0; i < slots->nmemslots; i++) { | 1385 | for (i = 0; i < slots->nmemslots; i++) { |
| 1384 | memslot = &slots->memslots[i]; | 1386 | memslot = &slots->memslots[i]; |
| 1385 | base_gfn = memslot->base_gfn; | 1387 | base_gfn = memslot->base_gfn; |
| @@ -1535,8 +1537,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
| 1535 | goto out; | 1537 | goto out; |
| 1536 | 1538 | ||
| 1537 | if (copy_to_user(user_stack, stack, | 1539 | if (copy_to_user(user_stack, stack, |
| 1538 | sizeof(struct kvm_ia64_vcpu_stack))) | 1540 | sizeof(struct kvm_ia64_vcpu_stack))) { |
| 1541 | r = -EFAULT; | ||
| 1539 | goto out; | 1542 | goto out; |
| 1543 | } | ||
| 1540 | 1544 | ||
| 1541 | break; | 1545 | break; |
| 1542 | } | 1546 | } |
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c index 7a62f75778c5..f0b9cac82414 100644 --- a/arch/ia64/kvm/vmm.c +++ b/arch/ia64/kvm/vmm.c | |||
| @@ -51,7 +51,7 @@ static int __init kvm_vmm_init(void) | |||
| 51 | vmm_fpswa_interface = fpswa_interface; | 51 | vmm_fpswa_interface = fpswa_interface; |
| 52 | 52 | ||
| 53 | /*Register vmm data to kvm side*/ | 53 | /*Register vmm data to kvm side*/ |
| 54 | return kvm_init(&vmm_info, 1024, THIS_MODULE); | 54 | return kvm_init(&vmm_info, 1024, 0, THIS_MODULE); |
| 55 | } | 55 | } |
| 56 | 56 | ||
| 57 | static void __exit kvm_vmm_exit(void) | 57 | static void __exit kvm_vmm_exit(void) |
diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h index a9b91ed3d4b9..2048a6aeea91 100644 --- a/arch/powerpc/include/asm/asm-compat.h +++ b/arch/powerpc/include/asm/asm-compat.h | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | /* operations for longs and pointers */ | 21 | /* operations for longs and pointers */ |
| 22 | #define PPC_LL stringify_in_c(ld) | 22 | #define PPC_LL stringify_in_c(ld) |
| 23 | #define PPC_STL stringify_in_c(std) | 23 | #define PPC_STL stringify_in_c(std) |
| 24 | #define PPC_STLU stringify_in_c(stdu) | ||
| 24 | #define PPC_LCMPI stringify_in_c(cmpdi) | 25 | #define PPC_LCMPI stringify_in_c(cmpdi) |
| 25 | #define PPC_LONG stringify_in_c(.llong) | 26 | #define PPC_LONG stringify_in_c(.llong) |
| 26 | #define PPC_LONG_ALIGN stringify_in_c(.balign 8) | 27 | #define PPC_LONG_ALIGN stringify_in_c(.balign 8) |
| @@ -44,6 +45,7 @@ | |||
| 44 | /* operations for longs and pointers */ | 45 | /* operations for longs and pointers */ |
| 45 | #define PPC_LL stringify_in_c(lwz) | 46 | #define PPC_LL stringify_in_c(lwz) |
| 46 | #define PPC_STL stringify_in_c(stw) | 47 | #define PPC_STL stringify_in_c(stw) |
| 48 | #define PPC_STLU stringify_in_c(stwu) | ||
| 47 | #define PPC_LCMPI stringify_in_c(cmpwi) | 49 | #define PPC_LCMPI stringify_in_c(cmpwi) |
| 48 | #define PPC_LONG stringify_in_c(.long) | 50 | #define PPC_LONG stringify_in_c(.long) |
| 49 | #define PPC_LONG_ALIGN stringify_in_c(.balign 4) | 51 | #define PPC_LONG_ALIGN stringify_in_c(.balign 4) |
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 81f3b0b5601e..6c5547d82bbe 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h | |||
| @@ -77,4 +77,14 @@ struct kvm_debug_exit_arch { | |||
| 77 | struct kvm_guest_debug_arch { | 77 | struct kvm_guest_debug_arch { |
| 78 | }; | 78 | }; |
| 79 | 79 | ||
| 80 | #define KVM_REG_MASK 0x001f | ||
| 81 | #define KVM_REG_EXT_MASK 0xffe0 | ||
| 82 | #define KVM_REG_GPR 0x0000 | ||
| 83 | #define KVM_REG_FPR 0x0020 | ||
| 84 | #define KVM_REG_QPR 0x0040 | ||
| 85 | #define KVM_REG_FQPR 0x0060 | ||
| 86 | |||
| 87 | #define KVM_INTERRUPT_SET -1U | ||
| 88 | #define KVM_INTERRUPT_UNSET -2U | ||
| 89 | |||
| 80 | #endif /* __LINUX_KVM_POWERPC_H */ | 90 | #endif /* __LINUX_KVM_POWERPC_H */ |
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index aadf2dd6f84e..c5ea4cda34b3 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h | |||
| @@ -88,6 +88,8 @@ | |||
| 88 | 88 | ||
| 89 | #define BOOK3S_HFLAG_DCBZ32 0x1 | 89 | #define BOOK3S_HFLAG_DCBZ32 0x1 |
| 90 | #define BOOK3S_HFLAG_SLB 0x2 | 90 | #define BOOK3S_HFLAG_SLB 0x2 |
| 91 | #define BOOK3S_HFLAG_PAIRED_SINGLE 0x4 | ||
| 92 | #define BOOK3S_HFLAG_NATIVE_PS 0x8 | ||
| 91 | 93 | ||
| 92 | #define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */ | 94 | #define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */ |
| 93 | #define RESUME_FLAG_HOST (1<<1) /* Resume host? */ | 95 | #define RESUME_FLAG_HOST (1<<1) /* Resume host? */ |
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index db7db0a96967..6f74d93725a0 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h | |||
| @@ -22,46 +22,47 @@ | |||
| 22 | 22 | ||
| 23 | #include <linux/types.h> | 23 | #include <linux/types.h> |
| 24 | #include <linux/kvm_host.h> | 24 | #include <linux/kvm_host.h> |
| 25 | #include <asm/kvm_book3s_64_asm.h> | 25 | #include <asm/kvm_book3s_asm.h> |
| 26 | 26 | ||
| 27 | struct kvmppc_slb { | 27 | struct kvmppc_slb { |
| 28 | u64 esid; | 28 | u64 esid; |
| 29 | u64 vsid; | 29 | u64 vsid; |
| 30 | u64 orige; | 30 | u64 orige; |
| 31 | u64 origv; | 31 | u64 origv; |
| 32 | bool valid; | 32 | bool valid : 1; |
| 33 | bool Ks; | 33 | bool Ks : 1; |
| 34 | bool Kp; | 34 | bool Kp : 1; |
| 35 | bool nx; | 35 | bool nx : 1; |
| 36 | bool large; /* PTEs are 16MB */ | 36 | bool large : 1; /* PTEs are 16MB */ |
| 37 | bool tb; /* 1TB segment */ | 37 | bool tb : 1; /* 1TB segment */ |
| 38 | bool class; | 38 | bool class : 1; |
| 39 | }; | 39 | }; |
| 40 | 40 | ||
| 41 | struct kvmppc_sr { | 41 | struct kvmppc_sr { |
| 42 | u32 raw; | 42 | u32 raw; |
| 43 | u32 vsid; | 43 | u32 vsid; |
| 44 | bool Ks; | 44 | bool Ks : 1; |
| 45 | bool Kp; | 45 | bool Kp : 1; |
| 46 | bool nx; | 46 | bool nx : 1; |
| 47 | bool valid : 1; | ||
| 47 | }; | 48 | }; |
| 48 | 49 | ||
| 49 | struct kvmppc_bat { | 50 | struct kvmppc_bat { |
| 50 | u64 raw; | 51 | u64 raw; |
| 51 | u32 bepi; | 52 | u32 bepi; |
| 52 | u32 bepi_mask; | 53 | u32 bepi_mask; |
| 53 | bool vs; | ||
| 54 | bool vp; | ||
| 55 | u32 brpn; | 54 | u32 brpn; |
| 56 | u8 wimg; | 55 | u8 wimg; |
| 57 | u8 pp; | 56 | u8 pp; |
| 57 | bool vs : 1; | ||
| 58 | bool vp : 1; | ||
| 58 | }; | 59 | }; |
| 59 | 60 | ||
| 60 | struct kvmppc_sid_map { | 61 | struct kvmppc_sid_map { |
| 61 | u64 guest_vsid; | 62 | u64 guest_vsid; |
| 62 | u64 guest_esid; | 63 | u64 guest_esid; |
| 63 | u64 host_vsid; | 64 | u64 host_vsid; |
| 64 | bool valid; | 65 | bool valid : 1; |
| 65 | }; | 66 | }; |
| 66 | 67 | ||
| 67 | #define SID_MAP_BITS 9 | 68 | #define SID_MAP_BITS 9 |
| @@ -70,7 +71,7 @@ struct kvmppc_sid_map { | |||
| 70 | 71 | ||
| 71 | struct kvmppc_vcpu_book3s { | 72 | struct kvmppc_vcpu_book3s { |
| 72 | struct kvm_vcpu vcpu; | 73 | struct kvm_vcpu vcpu; |
| 73 | struct kvmppc_book3s_shadow_vcpu shadow_vcpu; | 74 | struct kvmppc_book3s_shadow_vcpu *shadow_vcpu; |
| 74 | struct kvmppc_sid_map sid_map[SID_MAP_NUM]; | 75 | struct kvmppc_sid_map sid_map[SID_MAP_NUM]; |
| 75 | struct kvmppc_slb slb[64]; | 76 | struct kvmppc_slb slb[64]; |
| 76 | struct { | 77 | struct { |
| @@ -82,9 +83,10 @@ struct kvmppc_vcpu_book3s { | |||
| 82 | struct kvmppc_bat ibat[8]; | 83 | struct kvmppc_bat ibat[8]; |
| 83 | struct kvmppc_bat dbat[8]; | 84 | struct kvmppc_bat dbat[8]; |
| 84 | u64 hid[6]; | 85 | u64 hid[6]; |
| 86 | u64 gqr[8]; | ||
| 85 | int slb_nr; | 87 | int slb_nr; |
| 88 | u32 dsisr; | ||
| 86 | u64 sdr1; | 89 | u64 sdr1; |
| 87 | u64 dsisr; | ||
| 88 | u64 hior; | 90 | u64 hior; |
| 89 | u64 msr_mask; | 91 | u64 msr_mask; |
| 90 | u64 vsid_first; | 92 | u64 vsid_first; |
| @@ -98,15 +100,15 @@ struct kvmppc_vcpu_book3s { | |||
| 98 | #define CONTEXT_GUEST 1 | 100 | #define CONTEXT_GUEST 1 |
| 99 | #define CONTEXT_GUEST_END 2 | 101 | #define CONTEXT_GUEST_END 2 |
| 100 | 102 | ||
| 101 | #define VSID_REAL 0xfffffffffff00000 | 103 | #define VSID_REAL 0x1fffffffffc00000ULL |
| 102 | #define VSID_REAL_DR 0xffffffffffe00000 | 104 | #define VSID_BAT 0x1fffffffffb00000ULL |
| 103 | #define VSID_REAL_IR 0xffffffffffd00000 | 105 | #define VSID_REAL_DR 0x2000000000000000ULL |
| 104 | #define VSID_BAT 0xffffffffffc00000 | 106 | #define VSID_REAL_IR 0x4000000000000000ULL |
| 105 | #define VSID_PR 0x8000000000000000 | 107 | #define VSID_PR 0x8000000000000000ULL |
| 106 | 108 | ||
| 107 | extern void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, u64 ea, u64 ea_mask); | 109 | extern void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong ea, ulong ea_mask); |
| 108 | extern void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 vp, u64 vp_mask); | 110 | extern void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 vp, u64 vp_mask); |
| 109 | extern void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, u64 pa_start, u64 pa_end); | 111 | extern void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end); |
| 110 | extern void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 new_msr); | 112 | extern void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 new_msr); |
| 111 | extern void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu); | 113 | extern void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu); |
| 112 | extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu); | 114 | extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu); |
| @@ -114,11 +116,13 @@ extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); | |||
| 114 | extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); | 116 | extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); |
| 115 | extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); | 117 | extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); |
| 116 | extern struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data); | 118 | extern struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data); |
| 117 | extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr, bool data); | 119 | extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); |
| 118 | extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr); | 120 | extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); |
| 119 | extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); | 121 | extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); |
| 120 | extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, | 122 | extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, |
| 121 | bool upper, u32 val); | 123 | bool upper, u32 val); |
| 124 | extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); | ||
| 125 | extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); | ||
| 122 | 126 | ||
| 123 | extern u32 kvmppc_trampoline_lowmem; | 127 | extern u32 kvmppc_trampoline_lowmem; |
| 124 | extern u32 kvmppc_trampoline_enter; | 128 | extern u32 kvmppc_trampoline_enter; |
| @@ -126,6 +130,8 @@ extern void kvmppc_rmcall(ulong srr0, ulong srr1); | |||
| 126 | extern void kvmppc_load_up_fpu(void); | 130 | extern void kvmppc_load_up_fpu(void); |
| 127 | extern void kvmppc_load_up_altivec(void); | 131 | extern void kvmppc_load_up_altivec(void); |
| 128 | extern void kvmppc_load_up_vsx(void); | 132 | extern void kvmppc_load_up_vsx(void); |
| 133 | extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst); | ||
| 134 | extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst); | ||
| 129 | 135 | ||
| 130 | static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) | 136 | static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) |
| 131 | { | 137 | { |
| @@ -140,7 +146,108 @@ static inline ulong dsisr(void) | |||
| 140 | } | 146 | } |
| 141 | 147 | ||
| 142 | extern void kvm_return_point(void); | 148 | extern void kvm_return_point(void); |
| 149 | static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu); | ||
| 150 | |||
| 151 | static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) | ||
| 152 | { | ||
| 153 | if ( num < 14 ) { | ||
| 154 | to_svcpu(vcpu)->gpr[num] = val; | ||
| 155 | to_book3s(vcpu)->shadow_vcpu->gpr[num] = val; | ||
| 156 | } else | ||
| 157 | vcpu->arch.gpr[num] = val; | ||
| 158 | } | ||
| 159 | |||
| 160 | static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) | ||
| 161 | { | ||
| 162 | if ( num < 14 ) | ||
| 163 | return to_svcpu(vcpu)->gpr[num]; | ||
| 164 | else | ||
| 165 | return vcpu->arch.gpr[num]; | ||
| 166 | } | ||
| 167 | |||
| 168 | static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) | ||
| 169 | { | ||
| 170 | to_svcpu(vcpu)->cr = val; | ||
| 171 | to_book3s(vcpu)->shadow_vcpu->cr = val; | ||
| 172 | } | ||
| 173 | |||
| 174 | static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) | ||
| 175 | { | ||
| 176 | return to_svcpu(vcpu)->cr; | ||
| 177 | } | ||
| 178 | |||
| 179 | static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) | ||
| 180 | { | ||
| 181 | to_svcpu(vcpu)->xer = val; | ||
| 182 | to_book3s(vcpu)->shadow_vcpu->xer = val; | ||
| 183 | } | ||
| 184 | |||
| 185 | static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) | ||
| 186 | { | ||
| 187 | return to_svcpu(vcpu)->xer; | ||
| 188 | } | ||
| 189 | |||
| 190 | static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val) | ||
| 191 | { | ||
| 192 | to_svcpu(vcpu)->ctr = val; | ||
| 193 | } | ||
| 194 | |||
| 195 | static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu) | ||
| 196 | { | ||
| 197 | return to_svcpu(vcpu)->ctr; | ||
| 198 | } | ||
| 199 | |||
| 200 | static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val) | ||
| 201 | { | ||
| 202 | to_svcpu(vcpu)->lr = val; | ||
| 203 | } | ||
| 204 | |||
| 205 | static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu) | ||
| 206 | { | ||
| 207 | return to_svcpu(vcpu)->lr; | ||
| 208 | } | ||
| 209 | |||
| 210 | static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val) | ||
| 211 | { | ||
| 212 | to_svcpu(vcpu)->pc = val; | ||
| 213 | } | ||
| 214 | |||
| 215 | static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu) | ||
| 216 | { | ||
| 217 | return to_svcpu(vcpu)->pc; | ||
| 218 | } | ||
| 219 | |||
| 220 | static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) | ||
| 221 | { | ||
| 222 | ulong pc = kvmppc_get_pc(vcpu); | ||
| 223 | struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu); | ||
| 224 | |||
| 225 | /* Load the instruction manually if it failed to do so in the | ||
| 226 | * exit path */ | ||
| 227 | if (svcpu->last_inst == KVM_INST_FETCH_FAILED) | ||
| 228 | kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false); | ||
| 229 | |||
| 230 | return svcpu->last_inst; | ||
| 231 | } | ||
| 232 | |||
| 233 | static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) | ||
| 234 | { | ||
| 235 | return to_svcpu(vcpu)->fault_dar; | ||
| 236 | } | ||
| 237 | |||
| 238 | /* Magic register values loaded into r3 and r4 before the 'sc' assembly | ||
| 239 | * instruction for the OSI hypercalls */ | ||
| 240 | #define OSI_SC_MAGIC_R3 0x113724FA | ||
| 241 | #define OSI_SC_MAGIC_R4 0x77810F9B | ||
| 143 | 242 | ||
| 144 | #define INS_DCBZ 0x7c0007ec | 243 | #define INS_DCBZ 0x7c0007ec |
| 145 | 244 | ||
| 245 | /* Also add subarch specific defines */ | ||
| 246 | |||
| 247 | #ifdef CONFIG_PPC_BOOK3S_32 | ||
| 248 | #include <asm/kvm_book3s_32.h> | ||
| 249 | #else | ||
| 250 | #include <asm/kvm_book3s_64.h> | ||
| 251 | #endif | ||
| 252 | |||
| 146 | #endif /* __ASM_KVM_BOOK3S_H__ */ | 253 | #endif /* __ASM_KVM_BOOK3S_H__ */ |
diff --git a/arch/powerpc/include/asm/kvm_book3s_32.h b/arch/powerpc/include/asm/kvm_book3s_32.h new file mode 100644 index 000000000000..de604db135f5 --- /dev/null +++ b/arch/powerpc/include/asm/kvm_book3s_32.h | |||
| @@ -0,0 +1,42 @@ | |||
| 1 | /* | ||
| 2 | * This program is free software; you can redistribute it and/or modify | ||
| 3 | * it under the terms of the GNU General Public License, version 2, as | ||
| 4 | * published by the Free Software Foundation. | ||
| 5 | * | ||
| 6 | * This program is distributed in the hope that it will be useful, | ||
| 7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 9 | * GNU General Public License for more details. | ||
| 10 | * | ||
| 11 | * You should have received a copy of the GNU General Public License | ||
| 12 | * along with this program; if not, write to the Free Software | ||
| 13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
| 14 | * | ||
| 15 | * Copyright SUSE Linux Products GmbH 2010 | ||
| 16 | * | ||
| 17 | * Authors: Alexander Graf <agraf@suse.de> | ||
| 18 | */ | ||
| 19 | |||
| 20 | #ifndef __ASM_KVM_BOOK3S_32_H__ | ||
| 21 | #define __ASM_KVM_BOOK3S_32_H__ | ||
| 22 | |||
| 23 | static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu) | ||
| 24 | { | ||
| 25 | return to_book3s(vcpu)->shadow_vcpu; | ||
| 26 | } | ||
| 27 | |||
| 28 | #define PTE_SIZE 12 | ||
| 29 | #define VSID_ALL 0 | ||
| 30 | #define SR_INVALID 0x00000001 /* VSID 1 should always be unused */ | ||
| 31 | #define SR_KP 0x20000000 | ||
| 32 | #define PTE_V 0x80000000 | ||
| 33 | #define PTE_SEC 0x00000040 | ||
| 34 | #define PTE_M 0x00000010 | ||
| 35 | #define PTE_R 0x00000100 | ||
| 36 | #define PTE_C 0x00000080 | ||
| 37 | |||
| 38 | #define SID_SHIFT 28 | ||
| 39 | #define ESID_MASK 0xf0000000 | ||
| 40 | #define VSID_MASK 0x00fffffff0000000ULL | ||
| 41 | |||
| 42 | #endif /* __ASM_KVM_BOOK3S_32_H__ */ | ||
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h new file mode 100644 index 000000000000..4cadd612d575 --- /dev/null +++ b/arch/powerpc/include/asm/kvm_book3s_64.h | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | /* | ||
| 2 | * This program is free software; you can redistribute it and/or modify | ||
| 3 | * it under the terms of the GNU General Public License, version 2, as | ||
| 4 | * published by the Free Software Foundation. | ||
| 5 | * | ||
| 6 | * This program is distributed in the hope that it will be useful, | ||
| 7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 9 | * GNU General Public License for more details. | ||
| 10 | * | ||
| 11 | * You should have received a copy of the GNU General Public License | ||
| 12 | * along with this program; if not, write to the Free Software | ||
| 13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
| 14 | * | ||
| 15 | * Copyright SUSE Linux Products GmbH 2010 | ||
| 16 | * | ||
| 17 | * Authors: Alexander Graf <agraf@suse.de> | ||
| 18 | */ | ||
| 19 | |||
| 20 | #ifndef __ASM_KVM_BOOK3S_64_H__ | ||
| 21 | #define __ASM_KVM_BOOK3S_64_H__ | ||
| 22 | |||
| 23 | static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu) | ||
| 24 | { | ||
| 25 | return &get_paca()->shadow_vcpu; | ||
| 26 | } | ||
| 27 | |||
| 28 | #endif /* __ASM_KVM_BOOK3S_64_H__ */ | ||
diff --git a/arch/powerpc/include/asm/kvm_book3s_64_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 183461b48407..36fdb3aff30b 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h | |||
| @@ -22,7 +22,7 @@ | |||
| 22 | 22 | ||
| 23 | #ifdef __ASSEMBLY__ | 23 | #ifdef __ASSEMBLY__ |
| 24 | 24 | ||
| 25 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER | 25 | #ifdef CONFIG_KVM_BOOK3S_HANDLER |
| 26 | 26 | ||
| 27 | #include <asm/kvm_asm.h> | 27 | #include <asm/kvm_asm.h> |
| 28 | 28 | ||
| @@ -55,7 +55,7 @@ kvmppc_resume_\intno: | |||
| 55 | .macro DO_KVM intno | 55 | .macro DO_KVM intno |
| 56 | .endm | 56 | .endm |
| 57 | 57 | ||
| 58 | #endif /* CONFIG_KVM_BOOK3S_64_HANDLER */ | 58 | #endif /* CONFIG_KVM_BOOK3S_HANDLER */ |
| 59 | 59 | ||
| 60 | #else /*__ASSEMBLY__ */ | 60 | #else /*__ASSEMBLY__ */ |
| 61 | 61 | ||
| @@ -63,12 +63,33 @@ struct kvmppc_book3s_shadow_vcpu { | |||
| 63 | ulong gpr[14]; | 63 | ulong gpr[14]; |
| 64 | u32 cr; | 64 | u32 cr; |
| 65 | u32 xer; | 65 | u32 xer; |
| 66 | |||
| 67 | u32 fault_dsisr; | ||
| 68 | u32 last_inst; | ||
| 69 | ulong ctr; | ||
| 70 | ulong lr; | ||
| 71 | ulong pc; | ||
| 72 | ulong shadow_srr1; | ||
| 73 | ulong fault_dar; | ||
| 74 | |||
| 66 | ulong host_r1; | 75 | ulong host_r1; |
| 67 | ulong host_r2; | 76 | ulong host_r2; |
| 68 | ulong handler; | 77 | ulong handler; |
| 69 | ulong scratch0; | 78 | ulong scratch0; |
| 70 | ulong scratch1; | 79 | ulong scratch1; |
| 71 | ulong vmhandler; | 80 | ulong vmhandler; |
| 81 | u8 in_guest; | ||
| 82 | |||
| 83 | #ifdef CONFIG_PPC_BOOK3S_32 | ||
| 84 | u32 sr[16]; /* Guest SRs */ | ||
| 85 | #endif | ||
| 86 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
| 87 | u8 slb_max; /* highest used guest slb entry */ | ||
| 88 | struct { | ||
| 89 | u64 esid; | ||
| 90 | u64 vsid; | ||
| 91 | } slb[64]; /* guest SLB */ | ||
| 92 | #endif | ||
| 72 | }; | 93 | }; |
| 73 | 94 | ||
| 74 | #endif /*__ASSEMBLY__ */ | 95 | #endif /*__ASSEMBLY__ */ |
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h new file mode 100644 index 000000000000..9c9ba3d59b1b --- /dev/null +++ b/arch/powerpc/include/asm/kvm_booke.h | |||
| @@ -0,0 +1,96 @@ | |||
| 1 | /* | ||
| 2 | * This program is free software; you can redistribute it and/or modify | ||
| 3 | * it under the terms of the GNU General Public License, version 2, as | ||
| 4 | * published by the Free Software Foundation. | ||
| 5 | * | ||
| 6 | * This program is distributed in the hope that it will be useful, | ||
| 7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 9 | * GNU General Public License for more details. | ||
| 10 | * | ||
| 11 | * You should have received a copy of the GNU General Public License | ||
| 12 | * along with this program; if not, write to the Free Software | ||
| 13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
| 14 | * | ||
| 15 | * Copyright SUSE Linux Products GmbH 2010 | ||
| 16 | * | ||
| 17 | * Authors: Alexander Graf <agraf@suse.de> | ||
| 18 | */ | ||
| 19 | |||
| 20 | #ifndef __ASM_KVM_BOOKE_H__ | ||
| 21 | #define __ASM_KVM_BOOKE_H__ | ||
| 22 | |||
| 23 | #include <linux/types.h> | ||
| 24 | #include <linux/kvm_host.h> | ||
| 25 | |||
| 26 | static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) | ||
| 27 | { | ||
| 28 | vcpu->arch.gpr[num] = val; | ||
| 29 | } | ||
| 30 | |||
| 31 | static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) | ||
| 32 | { | ||
| 33 | return vcpu->arch.gpr[num]; | ||
| 34 | } | ||
| 35 | |||
| 36 | static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) | ||
| 37 | { | ||
| 38 | vcpu->arch.cr = val; | ||
| 39 | } | ||
| 40 | |||
| 41 | static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) | ||
| 42 | { | ||
| 43 | return vcpu->arch.cr; | ||
| 44 | } | ||
| 45 | |||
| 46 | static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) | ||
| 47 | { | ||
| 48 | vcpu->arch.xer = val; | ||
| 49 | } | ||
| 50 | |||
| 51 | static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) | ||
| 52 | { | ||
| 53 | return vcpu->arch.xer; | ||
| 54 | } | ||
| 55 | |||
| 56 | static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) | ||
| 57 | { | ||
| 58 | return vcpu->arch.last_inst; | ||
| 59 | } | ||
| 60 | |||
| 61 | static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val) | ||
| 62 | { | ||
| 63 | vcpu->arch.ctr = val; | ||
| 64 | } | ||
| 65 | |||
| 66 | static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu) | ||
| 67 | { | ||
| 68 | return vcpu->arch.ctr; | ||
| 69 | } | ||
| 70 | |||
| 71 | static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val) | ||
| 72 | { | ||
| 73 | vcpu->arch.lr = val; | ||
| 74 | } | ||
| 75 | |||
| 76 | static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu) | ||
| 77 | { | ||
| 78 | return vcpu->arch.lr; | ||
| 79 | } | ||
| 80 | |||
| 81 | static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val) | ||
| 82 | { | ||
| 83 | vcpu->arch.pc = val; | ||
| 84 | } | ||
| 85 | |||
| 86 | static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu) | ||
| 87 | { | ||
| 88 | return vcpu->arch.pc; | ||
| 89 | } | ||
| 90 | |||
| 91 | static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) | ||
| 92 | { | ||
| 93 | return vcpu->arch.fault_dear; | ||
| 94 | } | ||
| 95 | |||
| 96 | #endif /* __ASM_KVM_BOOKE_H__ */ | ||
diff --git a/arch/powerpc/include/asm/kvm_fpu.h b/arch/powerpc/include/asm/kvm_fpu.h new file mode 100644 index 000000000000..94f05de9ad04 --- /dev/null +++ b/arch/powerpc/include/asm/kvm_fpu.h | |||
| @@ -0,0 +1,85 @@ | |||
| 1 | /* | ||
| 2 | * This program is free software; you can redistribute it and/or modify | ||
| 3 | * it under the terms of the GNU General Public License, version 2, as | ||
| 4 | * published by the Free Software Foundation. | ||
| 5 | * | ||
| 6 | * This program is distributed in the hope that it will be useful, | ||
| 7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 9 | * GNU General Public License for more details. | ||
| 10 | * | ||
| 11 | * You should have received a copy of the GNU General Public License | ||
| 12 | * along with this program; if not, write to the Free Software | ||
| 13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
| 14 | * | ||
| 15 | * Copyright Novell Inc. 2010 | ||
| 16 | * | ||
| 17 | * Authors: Alexander Graf <agraf@suse.de> | ||
| 18 | */ | ||
| 19 | |||
| 20 | #ifndef __ASM_KVM_FPU_H__ | ||
| 21 | #define __ASM_KVM_FPU_H__ | ||
| 22 | |||
| 23 | #include <linux/types.h> | ||
| 24 | |||
| 25 | extern void fps_fres(struct thread_struct *t, u32 *dst, u32 *src1); | ||
| 26 | extern void fps_frsqrte(struct thread_struct *t, u32 *dst, u32 *src1); | ||
| 27 | extern void fps_fsqrts(struct thread_struct *t, u32 *dst, u32 *src1); | ||
| 28 | |||
| 29 | extern void fps_fadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); | ||
| 30 | extern void fps_fdivs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); | ||
| 31 | extern void fps_fmuls(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); | ||
| 32 | extern void fps_fsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); | ||
| 33 | |||
| 34 | extern void fps_fmadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, | ||
| 35 | u32 *src3); | ||
| 36 | extern void fps_fmsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, | ||
| 37 | u32 *src3); | ||
| 38 | extern void fps_fnmadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, | ||
| 39 | u32 *src3); | ||
| 40 | extern void fps_fnmsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, | ||
| 41 | u32 *src3); | ||
| 42 | extern void fps_fsel(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, | ||
| 43 | u32 *src3); | ||
| 44 | |||
| 45 | #define FPD_ONE_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \ | ||
| 46 | u64 *dst, u64 *src1); | ||
| 47 | #define FPD_TWO_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \ | ||
| 48 | u64 *dst, u64 *src1, u64 *src2); | ||
| 49 | #define FPD_THREE_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \ | ||
| 50 | u64 *dst, u64 *src1, u64 *src2, u64 *src3); | ||
| 51 | |||
| 52 | extern void fpd_fcmpu(u64 *fpscr, u32 *cr, u64 *src1, u64 *src2); | ||
| 53 | extern void fpd_fcmpo(u64 *fpscr, u32 *cr, u64 *src1, u64 *src2); | ||
| 54 | |||
| 55 | FPD_ONE_IN(fsqrts) | ||
| 56 | FPD_ONE_IN(frsqrtes) | ||
| 57 | FPD_ONE_IN(fres) | ||
| 58 | FPD_ONE_IN(frsp) | ||
| 59 | FPD_ONE_IN(fctiw) | ||
| 60 | FPD_ONE_IN(fctiwz) | ||
| 61 | FPD_ONE_IN(fsqrt) | ||
| 62 | FPD_ONE_IN(fre) | ||
| 63 | FPD_ONE_IN(frsqrte) | ||
| 64 | FPD_ONE_IN(fneg) | ||
| 65 | FPD_ONE_IN(fabs) | ||
| 66 | FPD_TWO_IN(fadds) | ||
| 67 | FPD_TWO_IN(fsubs) | ||
| 68 | FPD_TWO_IN(fdivs) | ||
| 69 | FPD_TWO_IN(fmuls) | ||
| 70 | FPD_TWO_IN(fcpsgn) | ||
| 71 | FPD_TWO_IN(fdiv) | ||
| 72 | FPD_TWO_IN(fadd) | ||
| 73 | FPD_TWO_IN(fmul) | ||
| 74 | FPD_TWO_IN(fsub) | ||
| 75 | FPD_THREE_IN(fmsubs) | ||
| 76 | FPD_THREE_IN(fmadds) | ||
| 77 | FPD_THREE_IN(fnmsubs) | ||
| 78 | FPD_THREE_IN(fnmadds) | ||
| 79 | FPD_THREE_IN(fsel) | ||
| 80 | FPD_THREE_IN(fmsub) | ||
| 81 | FPD_THREE_IN(fmadd) | ||
| 82 | FPD_THREE_IN(fnmsub) | ||
| 83 | FPD_THREE_IN(fnmadd) | ||
| 84 | |||
| 85 | #endif | ||
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 5e5bae7e152f..0c9ad869decd 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
| @@ -66,7 +66,7 @@ struct kvm_vcpu_stat { | |||
| 66 | u32 dec_exits; | 66 | u32 dec_exits; |
| 67 | u32 ext_intr_exits; | 67 | u32 ext_intr_exits; |
| 68 | u32 halt_wakeup; | 68 | u32 halt_wakeup; |
| 69 | #ifdef CONFIG_PPC64 | 69 | #ifdef CONFIG_PPC_BOOK3S |
| 70 | u32 pf_storage; | 70 | u32 pf_storage; |
| 71 | u32 pf_instruc; | 71 | u32 pf_instruc; |
| 72 | u32 sp_storage; | 72 | u32 sp_storage; |
| @@ -124,12 +124,12 @@ struct kvm_arch { | |||
| 124 | }; | 124 | }; |
| 125 | 125 | ||
| 126 | struct kvmppc_pte { | 126 | struct kvmppc_pte { |
| 127 | u64 eaddr; | 127 | ulong eaddr; |
| 128 | u64 vpage; | 128 | u64 vpage; |
| 129 | u64 raddr; | 129 | ulong raddr; |
| 130 | bool may_read; | 130 | bool may_read : 1; |
| 131 | bool may_write; | 131 | bool may_write : 1; |
| 132 | bool may_execute; | 132 | bool may_execute : 1; |
| 133 | }; | 133 | }; |
| 134 | 134 | ||
| 135 | struct kvmppc_mmu { | 135 | struct kvmppc_mmu { |
| @@ -145,7 +145,7 @@ struct kvmppc_mmu { | |||
| 145 | int (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data); | 145 | int (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data); |
| 146 | void (*reset_msr)(struct kvm_vcpu *vcpu); | 146 | void (*reset_msr)(struct kvm_vcpu *vcpu); |
| 147 | void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large); | 147 | void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large); |
| 148 | int (*esid_to_vsid)(struct kvm_vcpu *vcpu, u64 esid, u64 *vsid); | 148 | int (*esid_to_vsid)(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid); |
| 149 | u64 (*ea_to_vp)(struct kvm_vcpu *vcpu, gva_t eaddr, bool data); | 149 | u64 (*ea_to_vp)(struct kvm_vcpu *vcpu, gva_t eaddr, bool data); |
| 150 | bool (*is_dcbz32)(struct kvm_vcpu *vcpu); | 150 | bool (*is_dcbz32)(struct kvm_vcpu *vcpu); |
| 151 | }; | 151 | }; |
| @@ -160,7 +160,7 @@ struct hpte_cache { | |||
| 160 | struct kvm_vcpu_arch { | 160 | struct kvm_vcpu_arch { |
| 161 | ulong host_stack; | 161 | ulong host_stack; |
| 162 | u32 host_pid; | 162 | u32 host_pid; |
| 163 | #ifdef CONFIG_PPC64 | 163 | #ifdef CONFIG_PPC_BOOK3S |
| 164 | ulong host_msr; | 164 | ulong host_msr; |
| 165 | ulong host_r2; | 165 | ulong host_r2; |
| 166 | void *host_retip; | 166 | void *host_retip; |
| @@ -175,7 +175,7 @@ struct kvm_vcpu_arch { | |||
| 175 | ulong gpr[32]; | 175 | ulong gpr[32]; |
| 176 | 176 | ||
| 177 | u64 fpr[32]; | 177 | u64 fpr[32]; |
| 178 | u32 fpscr; | 178 | u64 fpscr; |
| 179 | 179 | ||
| 180 | #ifdef CONFIG_ALTIVEC | 180 | #ifdef CONFIG_ALTIVEC |
| 181 | vector128 vr[32]; | 181 | vector128 vr[32]; |
| @@ -186,19 +186,23 @@ struct kvm_vcpu_arch { | |||
| 186 | u64 vsr[32]; | 186 | u64 vsr[32]; |
| 187 | #endif | 187 | #endif |
| 188 | 188 | ||
| 189 | #ifdef CONFIG_PPC_BOOK3S | ||
| 190 | /* For Gekko paired singles */ | ||
| 191 | u32 qpr[32]; | ||
| 192 | #endif | ||
| 193 | |||
| 194 | #ifdef CONFIG_BOOKE | ||
| 189 | ulong pc; | 195 | ulong pc; |
| 190 | ulong ctr; | 196 | ulong ctr; |
| 191 | ulong lr; | 197 | ulong lr; |
| 192 | 198 | ||
| 193 | #ifdef CONFIG_BOOKE | ||
| 194 | ulong xer; | 199 | ulong xer; |
| 195 | u32 cr; | 200 | u32 cr; |
| 196 | #endif | 201 | #endif |
| 197 | 202 | ||
| 198 | ulong msr; | 203 | ulong msr; |
| 199 | #ifdef CONFIG_PPC64 | 204 | #ifdef CONFIG_PPC_BOOK3S |
| 200 | ulong shadow_msr; | 205 | ulong shadow_msr; |
| 201 | ulong shadow_srr1; | ||
| 202 | ulong hflags; | 206 | ulong hflags; |
| 203 | ulong guest_owned_ext; | 207 | ulong guest_owned_ext; |
| 204 | #endif | 208 | #endif |
| @@ -253,20 +257,22 @@ struct kvm_vcpu_arch { | |||
| 253 | struct dentry *debugfs_exit_timing; | 257 | struct dentry *debugfs_exit_timing; |
| 254 | #endif | 258 | #endif |
| 255 | 259 | ||
| 260 | #ifdef CONFIG_BOOKE | ||
| 256 | u32 last_inst; | 261 | u32 last_inst; |
| 257 | #ifdef CONFIG_PPC64 | ||
| 258 | ulong fault_dsisr; | ||
| 259 | #endif | ||
| 260 | ulong fault_dear; | 262 | ulong fault_dear; |
| 261 | ulong fault_esr; | 263 | ulong fault_esr; |
| 262 | ulong queued_dear; | 264 | ulong queued_dear; |
| 263 | ulong queued_esr; | 265 | ulong queued_esr; |
| 266 | #endif | ||
| 264 | gpa_t paddr_accessed; | 267 | gpa_t paddr_accessed; |
| 265 | 268 | ||
| 266 | u8 io_gpr; /* GPR used as IO source/target */ | 269 | u8 io_gpr; /* GPR used as IO source/target */ |
| 267 | u8 mmio_is_bigendian; | 270 | u8 mmio_is_bigendian; |
| 271 | u8 mmio_sign_extend; | ||
| 268 | u8 dcr_needed; | 272 | u8 dcr_needed; |
| 269 | u8 dcr_is_write; | 273 | u8 dcr_is_write; |
| 274 | u8 osi_needed; | ||
| 275 | u8 osi_enabled; | ||
| 270 | 276 | ||
| 271 | u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ | 277 | u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ |
| 272 | 278 | ||
| @@ -275,7 +281,7 @@ struct kvm_vcpu_arch { | |||
| 275 | u64 dec_jiffies; | 281 | u64 dec_jiffies; |
| 276 | unsigned long pending_exceptions; | 282 | unsigned long pending_exceptions; |
| 277 | 283 | ||
| 278 | #ifdef CONFIG_PPC64 | 284 | #ifdef CONFIG_PPC_BOOK3S |
| 279 | struct hpte_cache hpte_cache[HPTEG_CACHE_NUM]; | 285 | struct hpte_cache hpte_cache[HPTEG_CACHE_NUM]; |
| 280 | int hpte_cache_offset; | 286 | int hpte_cache_offset; |
| 281 | #endif | 287 | #endif |
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index e2642829e435..18d139ec2d22 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h | |||
| @@ -30,6 +30,8 @@ | |||
| 30 | #include <linux/kvm_host.h> | 30 | #include <linux/kvm_host.h> |
| 31 | #ifdef CONFIG_PPC_BOOK3S | 31 | #ifdef CONFIG_PPC_BOOK3S |
| 32 | #include <asm/kvm_book3s.h> | 32 | #include <asm/kvm_book3s.h> |
| 33 | #else | ||
| 34 | #include <asm/kvm_booke.h> | ||
| 33 | #endif | 35 | #endif |
| 34 | 36 | ||
| 35 | enum emulation_result { | 37 | enum emulation_result { |
| @@ -37,6 +39,7 @@ enum emulation_result { | |||
| 37 | EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ | 39 | EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ |
| 38 | EMULATE_DO_DCR, /* kvm_run filled with DCR request */ | 40 | EMULATE_DO_DCR, /* kvm_run filled with DCR request */ |
| 39 | EMULATE_FAIL, /* can't emulate this instruction */ | 41 | EMULATE_FAIL, /* can't emulate this instruction */ |
| 42 | EMULATE_AGAIN, /* something went wrong. go again */ | ||
| 40 | }; | 43 | }; |
| 41 | 44 | ||
| 42 | extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); | 45 | extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); |
| @@ -48,8 +51,11 @@ extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu); | |||
| 48 | extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | 51 | extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, |
| 49 | unsigned int rt, unsigned int bytes, | 52 | unsigned int rt, unsigned int bytes, |
| 50 | int is_bigendian); | 53 | int is_bigendian); |
| 54 | extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
| 55 | unsigned int rt, unsigned int bytes, | ||
| 56 | int is_bigendian); | ||
| 51 | extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | 57 | extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, |
| 52 | u32 val, unsigned int bytes, int is_bigendian); | 58 | u64 val, unsigned int bytes, int is_bigendian); |
| 53 | 59 | ||
| 54 | extern int kvmppc_emulate_instruction(struct kvm_run *run, | 60 | extern int kvmppc_emulate_instruction(struct kvm_run *run, |
| 55 | struct kvm_vcpu *vcpu); | 61 | struct kvm_vcpu *vcpu); |
| @@ -63,6 +69,7 @@ extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, | |||
| 63 | extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); | 69 | extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); |
| 64 | extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); | 70 | extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); |
| 65 | extern void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu); | 71 | extern void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu); |
| 72 | extern int kvmppc_mmu_init(struct kvm_vcpu *vcpu); | ||
| 66 | extern int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); | 73 | extern int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); |
| 67 | extern int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); | 74 | extern int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); |
| 68 | extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, | 75 | extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, |
| @@ -88,6 +95,8 @@ extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); | |||
| 88 | extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); | 95 | extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); |
| 89 | extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, | 96 | extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, |
| 90 | struct kvm_interrupt *irq); | 97 | struct kvm_interrupt *irq); |
| 98 | extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, | ||
| 99 | struct kvm_interrupt *irq); | ||
| 91 | 100 | ||
| 92 | extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | 101 | extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, |
| 93 | unsigned int op, int *advance); | 102 | unsigned int op, int *advance); |
| @@ -99,81 +108,37 @@ extern void kvmppc_booke_exit(void); | |||
| 99 | 108 | ||
| 100 | extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); | 109 | extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); |
| 101 | 110 | ||
| 102 | #ifdef CONFIG_PPC_BOOK3S | 111 | /* |
| 103 | 112 | * Cuts out inst bits with ordering according to spec. | |
| 104 | /* We assume we're always acting on the current vcpu */ | 113 | * That means the leftmost bit is zero. All given bits are included. |
| 105 | 114 | */ | |
| 106 | static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) | 115 | static inline u32 kvmppc_get_field(u64 inst, int msb, int lsb) |
| 107 | { | ||
| 108 | if ( num < 14 ) { | ||
| 109 | get_paca()->shadow_vcpu.gpr[num] = val; | ||
| 110 | to_book3s(vcpu)->shadow_vcpu.gpr[num] = val; | ||
| 111 | } else | ||
| 112 | vcpu->arch.gpr[num] = val; | ||
| 113 | } | ||
| 114 | |||
| 115 | static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) | ||
| 116 | { | ||
| 117 | if ( num < 14 ) | ||
| 118 | return get_paca()->shadow_vcpu.gpr[num]; | ||
| 119 | else | ||
| 120 | return vcpu->arch.gpr[num]; | ||
| 121 | } | ||
| 122 | |||
| 123 | static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) | ||
| 124 | { | ||
| 125 | get_paca()->shadow_vcpu.cr = val; | ||
| 126 | to_book3s(vcpu)->shadow_vcpu.cr = val; | ||
| 127 | } | ||
| 128 | |||
| 129 | static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) | ||
| 130 | { | ||
| 131 | return get_paca()->shadow_vcpu.cr; | ||
| 132 | } | ||
| 133 | |||
| 134 | static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) | ||
| 135 | { | ||
| 136 | get_paca()->shadow_vcpu.xer = val; | ||
| 137 | to_book3s(vcpu)->shadow_vcpu.xer = val; | ||
| 138 | } | ||
| 139 | |||
| 140 | static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) | ||
| 141 | { | 116 | { |
| 142 | return get_paca()->shadow_vcpu.xer; | 117 | u32 r; |
| 143 | } | 118 | u32 mask; |
| 144 | 119 | ||
| 145 | #else | 120 | BUG_ON(msb > lsb); |
| 146 | 121 | ||
| 147 | static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) | 122 | mask = (1 << (lsb - msb + 1)) - 1; |
| 148 | { | 123 | r = (inst >> (63 - lsb)) & mask; |
| 149 | vcpu->arch.gpr[num] = val; | ||
| 150 | } | ||
| 151 | 124 | ||
| 152 | static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) | 125 | return r; |
| 153 | { | ||
| 154 | return vcpu->arch.gpr[num]; | ||
| 155 | } | 126 | } |
| 156 | 127 | ||
| 157 | static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) | 128 | /* |
| 129 | * Replaces inst bits with ordering according to spec. | ||
| 130 | */ | ||
| 131 | static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value) | ||
| 158 | { | 132 | { |
| 159 | vcpu->arch.cr = val; | 133 | u32 r; |
| 160 | } | 134 | u32 mask; |
| 161 | 135 | ||
| 162 | static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) | 136 | BUG_ON(msb > lsb); |
| 163 | { | ||
| 164 | return vcpu->arch.cr; | ||
| 165 | } | ||
| 166 | 137 | ||
| 167 | static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) | 138 | mask = ((1 << (lsb - msb + 1)) - 1) << (63 - lsb); |
| 168 | { | 139 | r = (inst & ~mask) | ((value << (63 - lsb)) & mask); |
| 169 | vcpu->arch.xer = val; | ||
| 170 | } | ||
| 171 | 140 | ||
| 172 | static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) | 141 | return r; |
| 173 | { | ||
| 174 | return vcpu->arch.xer; | ||
| 175 | } | 142 | } |
| 176 | 143 | ||
| 177 | #endif | ||
| 178 | |||
| 179 | #endif /* __POWERPC_KVM_PPC_H__ */ | 144 | #endif /* __POWERPC_KVM_PPC_H__ */ |
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 26383e0778aa..81fb41289d6c 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h | |||
| @@ -27,6 +27,8 @@ extern int __init_new_context(void); | |||
| 27 | extern void __destroy_context(int context_id); | 27 | extern void __destroy_context(int context_id); |
| 28 | static inline void mmu_context_init(void) { } | 28 | static inline void mmu_context_init(void) { } |
| 29 | #else | 29 | #else |
| 30 | extern unsigned long __init_new_context(void); | ||
| 31 | extern void __destroy_context(unsigned long context_id); | ||
| 30 | extern void mmu_context_init(void); | 32 | extern void mmu_context_init(void); |
| 31 | #endif | 33 | #endif |
| 32 | 34 | ||
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 971dfa4815f0..8ce7963ad41d 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h | |||
| @@ -23,7 +23,7 @@ | |||
| 23 | #include <asm/page.h> | 23 | #include <asm/page.h> |
| 24 | #include <asm/exception-64e.h> | 24 | #include <asm/exception-64e.h> |
| 25 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER | 25 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER |
| 26 | #include <asm/kvm_book3s_64_asm.h> | 26 | #include <asm/kvm_book3s_asm.h> |
| 27 | #endif | 27 | #endif |
| 28 | 28 | ||
| 29 | register struct paca_struct *local_paca asm("r13"); | 29 | register struct paca_struct *local_paca asm("r13"); |
| @@ -137,15 +137,9 @@ struct paca_struct { | |||
| 137 | u64 startpurr; /* PURR/TB value snapshot */ | 137 | u64 startpurr; /* PURR/TB value snapshot */ |
| 138 | u64 startspurr; /* SPURR value snapshot */ | 138 | u64 startspurr; /* SPURR value snapshot */ |
| 139 | 139 | ||
| 140 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER | 140 | #ifdef CONFIG_KVM_BOOK3S_HANDLER |
| 141 | struct { | ||
| 142 | u64 esid; | ||
| 143 | u64 vsid; | ||
| 144 | } kvm_slb[64]; /* guest SLB */ | ||
| 145 | /* We use this to store guest state in */ | 141 | /* We use this to store guest state in */ |
| 146 | struct kvmppc_book3s_shadow_vcpu shadow_vcpu; | 142 | struct kvmppc_book3s_shadow_vcpu shadow_vcpu; |
| 147 | u8 kvm_slb_max; /* highest used guest slb entry */ | ||
| 148 | u8 kvm_in_guest; /* are we inside the guest? */ | ||
| 149 | #endif | 143 | #endif |
| 150 | }; | 144 | }; |
| 151 | 145 | ||
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 221ba6240464..7492fe8ad6e4 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h | |||
| @@ -229,6 +229,9 @@ struct thread_struct { | |||
| 229 | unsigned long spefscr; /* SPE & eFP status */ | 229 | unsigned long spefscr; /* SPE & eFP status */ |
| 230 | int used_spe; /* set if process has used spe */ | 230 | int used_spe; /* set if process has used spe */ |
| 231 | #endif /* CONFIG_SPE */ | 231 | #endif /* CONFIG_SPE */ |
| 232 | #ifdef CONFIG_KVM_BOOK3S_32_HANDLER | ||
| 233 | void* kvm_shadow_vcpu; /* KVM internal data */ | ||
| 234 | #endif /* CONFIG_KVM_BOOK3S_32_HANDLER */ | ||
| 232 | }; | 235 | }; |
| 233 | 236 | ||
| 234 | #define ARCH_MIN_TASKALIGN 16 | 237 | #define ARCH_MIN_TASKALIGN 16 |
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index b68f025924a8..d62fdf4e504b 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h | |||
| @@ -293,10 +293,12 @@ | |||
| 293 | #define HID1_ABE (1<<10) /* 7450 Address Broadcast Enable */ | 293 | #define HID1_ABE (1<<10) /* 7450 Address Broadcast Enable */ |
| 294 | #define HID1_PS (1<<16) /* 750FX PLL selection */ | 294 | #define HID1_PS (1<<16) /* 750FX PLL selection */ |
| 295 | #define SPRN_HID2 0x3F8 /* Hardware Implementation Register 2 */ | 295 | #define SPRN_HID2 0x3F8 /* Hardware Implementation Register 2 */ |
| 296 | #define SPRN_HID2_GEKKO 0x398 /* Gekko HID2 Register */ | ||
| 296 | #define SPRN_IABR 0x3F2 /* Instruction Address Breakpoint Register */ | 297 | #define SPRN_IABR 0x3F2 /* Instruction Address Breakpoint Register */ |
| 297 | #define SPRN_IABR2 0x3FA /* 83xx */ | 298 | #define SPRN_IABR2 0x3FA /* 83xx */ |
| 298 | #define SPRN_IBCR 0x135 /* 83xx Insn Breakpoint Control Reg */ | 299 | #define SPRN_IBCR 0x135 /* 83xx Insn Breakpoint Control Reg */ |
| 299 | #define SPRN_HID4 0x3F4 /* 970 HID4 */ | 300 | #define SPRN_HID4 0x3F4 /* 970 HID4 */ |
| 301 | #define SPRN_HID4_GEKKO 0x3F3 /* Gekko HID4 */ | ||
| 300 | #define SPRN_HID5 0x3F6 /* 970 HID5 */ | 302 | #define SPRN_HID5 0x3F6 /* 970 HID5 */ |
| 301 | #define SPRN_HID6 0x3F9 /* BE HID 6 */ | 303 | #define SPRN_HID6 0x3F9 /* BE HID 6 */ |
| 302 | #define HID6_LB (0x0F<<12) /* Concurrent Large Page Modes */ | 304 | #define HID6_LB (0x0F<<12) /* Concurrent Large Page Modes */ |
| @@ -465,6 +467,14 @@ | |||
| 465 | #define SPRN_VRSAVE 0x100 /* Vector Register Save Register */ | 467 | #define SPRN_VRSAVE 0x100 /* Vector Register Save Register */ |
| 466 | #define SPRN_XER 0x001 /* Fixed Point Exception Register */ | 468 | #define SPRN_XER 0x001 /* Fixed Point Exception Register */ |
| 467 | 469 | ||
| 470 | #define SPRN_MMCR0_GEKKO 0x3B8 /* Gekko Monitor Mode Control Register 0 */ | ||
| 471 | #define SPRN_MMCR1_GEKKO 0x3BC /* Gekko Monitor Mode Control Register 1 */ | ||
| 472 | #define SPRN_PMC1_GEKKO 0x3B9 /* Gekko Performance Monitor Control 1 */ | ||
| 473 | #define SPRN_PMC2_GEKKO 0x3BA /* Gekko Performance Monitor Control 2 */ | ||
| 474 | #define SPRN_PMC3_GEKKO 0x3BD /* Gekko Performance Monitor Control 3 */ | ||
| 475 | #define SPRN_PMC4_GEKKO 0x3BE /* Gekko Performance Monitor Control 4 */ | ||
| 476 | #define SPRN_WPAR_GEKKO 0x399 /* Gekko Write Pipe Address Register */ | ||
| 477 | |||
| 468 | #define SPRN_SCOMC 0x114 /* SCOM Access Control */ | 478 | #define SPRN_SCOMC 0x114 /* SCOM Access Control */ |
| 469 | #define SPRN_SCOMD 0x115 /* SCOM Access DATA */ | 479 | #define SPRN_SCOMD 0x115 /* SCOM Access DATA */ |
| 470 | 480 | ||
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 28a686fb269c..496cc5b3984f 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
| @@ -50,6 +50,9 @@ | |||
| 50 | #endif | 50 | #endif |
| 51 | #ifdef CONFIG_KVM | 51 | #ifdef CONFIG_KVM |
| 52 | #include <linux/kvm_host.h> | 52 | #include <linux/kvm_host.h> |
| 53 | #ifndef CONFIG_BOOKE | ||
| 54 | #include <asm/kvm_book3s.h> | ||
| 55 | #endif | ||
| 53 | #endif | 56 | #endif |
| 54 | 57 | ||
| 55 | #ifdef CONFIG_PPC32 | 58 | #ifdef CONFIG_PPC32 |
| @@ -105,6 +108,9 @@ int main(void) | |||
| 105 | DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe)); | 108 | DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe)); |
| 106 | #endif /* CONFIG_SPE */ | 109 | #endif /* CONFIG_SPE */ |
| 107 | #endif /* CONFIG_PPC64 */ | 110 | #endif /* CONFIG_PPC64 */ |
| 111 | #ifdef CONFIG_KVM_BOOK3S_32_HANDLER | ||
| 112 | DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu)); | ||
| 113 | #endif | ||
| 108 | 114 | ||
| 109 | DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); | 115 | DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); |
| 110 | DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); | 116 | DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); |
| @@ -191,33 +197,9 @@ int main(void) | |||
| 191 | DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset)); | 197 | DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset)); |
| 192 | DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); | 198 | DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); |
| 193 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER | 199 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER |
| 194 | DEFINE(PACA_KVM_IN_GUEST, offsetof(struct paca_struct, kvm_in_guest)); | 200 | DEFINE(PACA_KVM_SVCPU, offsetof(struct paca_struct, shadow_vcpu)); |
| 195 | DEFINE(PACA_KVM_SLB, offsetof(struct paca_struct, kvm_slb)); | 201 | DEFINE(SVCPU_SLB, offsetof(struct kvmppc_book3s_shadow_vcpu, slb)); |
| 196 | DEFINE(PACA_KVM_SLB_MAX, offsetof(struct paca_struct, kvm_slb_max)); | 202 | DEFINE(SVCPU_SLB_MAX, offsetof(struct kvmppc_book3s_shadow_vcpu, slb_max)); |
| 197 | DEFINE(PACA_KVM_CR, offsetof(struct paca_struct, shadow_vcpu.cr)); | ||
| 198 | DEFINE(PACA_KVM_XER, offsetof(struct paca_struct, shadow_vcpu.xer)); | ||
| 199 | DEFINE(PACA_KVM_R0, offsetof(struct paca_struct, shadow_vcpu.gpr[0])); | ||
| 200 | DEFINE(PACA_KVM_R1, offsetof(struct paca_struct, shadow_vcpu.gpr[1])); | ||
| 201 | DEFINE(PACA_KVM_R2, offsetof(struct paca_struct, shadow_vcpu.gpr[2])); | ||
| 202 | DEFINE(PACA_KVM_R3, offsetof(struct paca_struct, shadow_vcpu.gpr[3])); | ||
| 203 | DEFINE(PACA_KVM_R4, offsetof(struct paca_struct, shadow_vcpu.gpr[4])); | ||
| 204 | DEFINE(PACA_KVM_R5, offsetof(struct paca_struct, shadow_vcpu.gpr[5])); | ||
| 205 | DEFINE(PACA_KVM_R6, offsetof(struct paca_struct, shadow_vcpu.gpr[6])); | ||
| 206 | DEFINE(PACA_KVM_R7, offsetof(struct paca_struct, shadow_vcpu.gpr[7])); | ||
| 207 | DEFINE(PACA_KVM_R8, offsetof(struct paca_struct, shadow_vcpu.gpr[8])); | ||
| 208 | DEFINE(PACA_KVM_R9, offsetof(struct paca_struct, shadow_vcpu.gpr[9])); | ||
| 209 | DEFINE(PACA_KVM_R10, offsetof(struct paca_struct, shadow_vcpu.gpr[10])); | ||
| 210 | DEFINE(PACA_KVM_R11, offsetof(struct paca_struct, shadow_vcpu.gpr[11])); | ||
| 211 | DEFINE(PACA_KVM_R12, offsetof(struct paca_struct, shadow_vcpu.gpr[12])); | ||
| 212 | DEFINE(PACA_KVM_R13, offsetof(struct paca_struct, shadow_vcpu.gpr[13])); | ||
| 213 | DEFINE(PACA_KVM_HOST_R1, offsetof(struct paca_struct, shadow_vcpu.host_r1)); | ||
| 214 | DEFINE(PACA_KVM_HOST_R2, offsetof(struct paca_struct, shadow_vcpu.host_r2)); | ||
| 215 | DEFINE(PACA_KVM_VMHANDLER, offsetof(struct paca_struct, | ||
| 216 | shadow_vcpu.vmhandler)); | ||
| 217 | DEFINE(PACA_KVM_SCRATCH0, offsetof(struct paca_struct, | ||
| 218 | shadow_vcpu.scratch0)); | ||
| 219 | DEFINE(PACA_KVM_SCRATCH1, offsetof(struct paca_struct, | ||
| 220 | shadow_vcpu.scratch1)); | ||
| 221 | #endif | 203 | #endif |
| 222 | #endif /* CONFIG_PPC64 */ | 204 | #endif /* CONFIG_PPC64 */ |
| 223 | 205 | ||
| @@ -228,8 +210,8 @@ int main(void) | |||
| 228 | /* Interrupt register frame */ | 210 | /* Interrupt register frame */ |
| 229 | DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD); | 211 | DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD); |
| 230 | DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); | 212 | DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); |
| 231 | #ifdef CONFIG_PPC64 | ||
| 232 | DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); | 213 | DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); |
| 214 | #ifdef CONFIG_PPC64 | ||
| 233 | /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */ | 215 | /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */ |
| 234 | DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); | 216 | DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); |
| 235 | DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); | 217 | DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); |
| @@ -412,9 +394,6 @@ int main(void) | |||
| 412 | DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); | 394 | DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); |
| 413 | DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); | 395 | DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); |
| 414 | DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); | 396 | DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); |
| 415 | DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); | ||
| 416 | DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); | ||
| 417 | DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); | ||
| 418 | DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr)); | 397 | DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr)); |
| 419 | DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); | 398 | DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); |
| 420 | DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); | 399 | DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); |
| @@ -422,27 +401,68 @@ int main(void) | |||
| 422 | DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7)); | 401 | DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7)); |
| 423 | DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid)); | 402 | DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid)); |
| 424 | 403 | ||
| 425 | DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); | 404 | /* book3s */ |
| 426 | DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); | 405 | #ifdef CONFIG_PPC_BOOK3S |
| 427 | DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); | ||
| 428 | |||
| 429 | /* book3s_64 */ | ||
| 430 | #ifdef CONFIG_PPC64 | ||
| 431 | DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr)); | ||
| 432 | DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip)); | 406 | DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip)); |
| 433 | DEFINE(VCPU_HOST_R2, offsetof(struct kvm_vcpu, arch.host_r2)); | ||
| 434 | DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr)); | 407 | DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr)); |
| 435 | DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); | 408 | DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); |
| 436 | DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1)); | ||
| 437 | DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem)); | 409 | DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem)); |
| 438 | DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter)); | 410 | DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter)); |
| 439 | DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler)); | 411 | DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler)); |
| 440 | DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall)); | 412 | DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall)); |
| 441 | DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); | 413 | DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); |
| 414 | DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) - | ||
| 415 | offsetof(struct kvmppc_vcpu_book3s, vcpu)); | ||
| 416 | DEFINE(SVCPU_CR, offsetof(struct kvmppc_book3s_shadow_vcpu, cr)); | ||
| 417 | DEFINE(SVCPU_XER, offsetof(struct kvmppc_book3s_shadow_vcpu, xer)); | ||
| 418 | DEFINE(SVCPU_CTR, offsetof(struct kvmppc_book3s_shadow_vcpu, ctr)); | ||
| 419 | DEFINE(SVCPU_LR, offsetof(struct kvmppc_book3s_shadow_vcpu, lr)); | ||
| 420 | DEFINE(SVCPU_PC, offsetof(struct kvmppc_book3s_shadow_vcpu, pc)); | ||
| 421 | DEFINE(SVCPU_R0, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[0])); | ||
| 422 | DEFINE(SVCPU_R1, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[1])); | ||
| 423 | DEFINE(SVCPU_R2, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[2])); | ||
| 424 | DEFINE(SVCPU_R3, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[3])); | ||
| 425 | DEFINE(SVCPU_R4, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[4])); | ||
| 426 | DEFINE(SVCPU_R5, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[5])); | ||
| 427 | DEFINE(SVCPU_R6, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[6])); | ||
| 428 | DEFINE(SVCPU_R7, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[7])); | ||
| 429 | DEFINE(SVCPU_R8, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[8])); | ||
| 430 | DEFINE(SVCPU_R9, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[9])); | ||
| 431 | DEFINE(SVCPU_R10, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[10])); | ||
| 432 | DEFINE(SVCPU_R11, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[11])); | ||
| 433 | DEFINE(SVCPU_R12, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[12])); | ||
| 434 | DEFINE(SVCPU_R13, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[13])); | ||
| 435 | DEFINE(SVCPU_HOST_R1, offsetof(struct kvmppc_book3s_shadow_vcpu, host_r1)); | ||
| 436 | DEFINE(SVCPU_HOST_R2, offsetof(struct kvmppc_book3s_shadow_vcpu, host_r2)); | ||
| 437 | DEFINE(SVCPU_VMHANDLER, offsetof(struct kvmppc_book3s_shadow_vcpu, | ||
| 438 | vmhandler)); | ||
| 439 | DEFINE(SVCPU_SCRATCH0, offsetof(struct kvmppc_book3s_shadow_vcpu, | ||
| 440 | scratch0)); | ||
| 441 | DEFINE(SVCPU_SCRATCH1, offsetof(struct kvmppc_book3s_shadow_vcpu, | ||
| 442 | scratch1)); | ||
| 443 | DEFINE(SVCPU_IN_GUEST, offsetof(struct kvmppc_book3s_shadow_vcpu, | ||
| 444 | in_guest)); | ||
| 445 | DEFINE(SVCPU_FAULT_DSISR, offsetof(struct kvmppc_book3s_shadow_vcpu, | ||
| 446 | fault_dsisr)); | ||
| 447 | DEFINE(SVCPU_FAULT_DAR, offsetof(struct kvmppc_book3s_shadow_vcpu, | ||
| 448 | fault_dar)); | ||
| 449 | DEFINE(SVCPU_LAST_INST, offsetof(struct kvmppc_book3s_shadow_vcpu, | ||
| 450 | last_inst)); | ||
| 451 | DEFINE(SVCPU_SHADOW_SRR1, offsetof(struct kvmppc_book3s_shadow_vcpu, | ||
| 452 | shadow_srr1)); | ||
| 453 | #ifdef CONFIG_PPC_BOOK3S_32 | ||
| 454 | DEFINE(SVCPU_SR, offsetof(struct kvmppc_book3s_shadow_vcpu, sr)); | ||
| 455 | #endif | ||
| 442 | #else | 456 | #else |
| 443 | DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); | 457 | DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); |
| 444 | DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); | 458 | DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); |
| 445 | #endif /* CONFIG_PPC64 */ | 459 | DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); |
| 460 | DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); | ||
| 461 | DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); | ||
| 462 | DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); | ||
| 463 | DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); | ||
| 464 | DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); | ||
| 465 | #endif /* CONFIG_PPC_BOOK3S */ | ||
| 446 | #endif | 466 | #endif |
| 447 | #ifdef CONFIG_44x | 467 | #ifdef CONFIG_44x |
| 448 | DEFINE(PGD_T_LOG2, PGD_T_LOG2); | 468 | DEFINE(PGD_T_LOG2, PGD_T_LOG2); |
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index e025e89fe93e..98c4b29a56f4 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S | |||
| @@ -33,6 +33,7 @@ | |||
| 33 | #include <asm/asm-offsets.h> | 33 | #include <asm/asm-offsets.h> |
| 34 | #include <asm/ptrace.h> | 34 | #include <asm/ptrace.h> |
| 35 | #include <asm/bug.h> | 35 | #include <asm/bug.h> |
| 36 | #include <asm/kvm_book3s_asm.h> | ||
| 36 | 37 | ||
| 37 | /* 601 only have IBAT; cr0.eq is set on 601 when using this macro */ | 38 | /* 601 only have IBAT; cr0.eq is set on 601 when using this macro */ |
| 38 | #define LOAD_BAT(n, reg, RA, RB) \ | 39 | #define LOAD_BAT(n, reg, RA, RB) \ |
| @@ -303,6 +304,7 @@ __secondary_hold_acknowledge: | |||
| 303 | */ | 304 | */ |
| 304 | #define EXCEPTION(n, label, hdlr, xfer) \ | 305 | #define EXCEPTION(n, label, hdlr, xfer) \ |
| 305 | . = n; \ | 306 | . = n; \ |
| 307 | DO_KVM n; \ | ||
| 306 | label: \ | 308 | label: \ |
| 307 | EXCEPTION_PROLOG; \ | 309 | EXCEPTION_PROLOG; \ |
| 308 | addi r3,r1,STACK_FRAME_OVERHEAD; \ | 310 | addi r3,r1,STACK_FRAME_OVERHEAD; \ |
| @@ -358,6 +360,7 @@ i##n: \ | |||
| 358 | * -- paulus. | 360 | * -- paulus. |
| 359 | */ | 361 | */ |
| 360 | . = 0x200 | 362 | . = 0x200 |
| 363 | DO_KVM 0x200 | ||
| 361 | mtspr SPRN_SPRG_SCRATCH0,r10 | 364 | mtspr SPRN_SPRG_SCRATCH0,r10 |
| 362 | mtspr SPRN_SPRG_SCRATCH1,r11 | 365 | mtspr SPRN_SPRG_SCRATCH1,r11 |
| 363 | mfcr r10 | 366 | mfcr r10 |
| @@ -381,6 +384,7 @@ i##n: \ | |||
| 381 | 384 | ||
| 382 | /* Data access exception. */ | 385 | /* Data access exception. */ |
| 383 | . = 0x300 | 386 | . = 0x300 |
| 387 | DO_KVM 0x300 | ||
| 384 | DataAccess: | 388 | DataAccess: |
| 385 | EXCEPTION_PROLOG | 389 | EXCEPTION_PROLOG |
| 386 | mfspr r10,SPRN_DSISR | 390 | mfspr r10,SPRN_DSISR |
| @@ -397,6 +401,7 @@ DataAccess: | |||
| 397 | 401 | ||
| 398 | /* Instruction access exception. */ | 402 | /* Instruction access exception. */ |
| 399 | . = 0x400 | 403 | . = 0x400 |
| 404 | DO_KVM 0x400 | ||
| 400 | InstructionAccess: | 405 | InstructionAccess: |
| 401 | EXCEPTION_PROLOG | 406 | EXCEPTION_PROLOG |
| 402 | andis. r0,r9,0x4000 /* no pte found? */ | 407 | andis. r0,r9,0x4000 /* no pte found? */ |
| @@ -413,6 +418,7 @@ InstructionAccess: | |||
| 413 | 418 | ||
| 414 | /* Alignment exception */ | 419 | /* Alignment exception */ |
| 415 | . = 0x600 | 420 | . = 0x600 |
| 421 | DO_KVM 0x600 | ||
| 416 | Alignment: | 422 | Alignment: |
| 417 | EXCEPTION_PROLOG | 423 | EXCEPTION_PROLOG |
| 418 | mfspr r4,SPRN_DAR | 424 | mfspr r4,SPRN_DAR |
| @@ -427,6 +433,7 @@ Alignment: | |||
| 427 | 433 | ||
| 428 | /* Floating-point unavailable */ | 434 | /* Floating-point unavailable */ |
| 429 | . = 0x800 | 435 | . = 0x800 |
| 436 | DO_KVM 0x800 | ||
| 430 | FPUnavailable: | 437 | FPUnavailable: |
| 431 | BEGIN_FTR_SECTION | 438 | BEGIN_FTR_SECTION |
| 432 | /* | 439 | /* |
| @@ -450,6 +457,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) | |||
| 450 | 457 | ||
| 451 | /* System call */ | 458 | /* System call */ |
| 452 | . = 0xc00 | 459 | . = 0xc00 |
| 460 | DO_KVM 0xc00 | ||
| 453 | SystemCall: | 461 | SystemCall: |
| 454 | EXCEPTION_PROLOG | 462 | EXCEPTION_PROLOG |
| 455 | EXC_XFER_EE_LITE(0xc00, DoSyscall) | 463 | EXC_XFER_EE_LITE(0xc00, DoSyscall) |
| @@ -467,9 +475,11 @@ SystemCall: | |||
| 467 | * by executing an altivec instruction. | 475 | * by executing an altivec instruction. |
| 468 | */ | 476 | */ |
| 469 | . = 0xf00 | 477 | . = 0xf00 |
| 478 | DO_KVM 0xf00 | ||
| 470 | b PerformanceMonitor | 479 | b PerformanceMonitor |
| 471 | 480 | ||
| 472 | . = 0xf20 | 481 | . = 0xf20 |
| 482 | DO_KVM 0xf20 | ||
| 473 | b AltiVecUnavailable | 483 | b AltiVecUnavailable |
| 474 | 484 | ||
| 475 | /* | 485 | /* |
| @@ -882,6 +892,10 @@ __secondary_start: | |||
| 882 | RFI | 892 | RFI |
| 883 | #endif /* CONFIG_SMP */ | 893 | #endif /* CONFIG_SMP */ |
| 884 | 894 | ||
| 895 | #ifdef CONFIG_KVM_BOOK3S_HANDLER | ||
| 896 | #include "../kvm/book3s_rmhandlers.S" | ||
| 897 | #endif | ||
| 898 | |||
| 885 | /* | 899 | /* |
| 886 | * Those generic dummy functions are kept for CPUs not | 900 | * Those generic dummy functions are kept for CPUs not |
| 887 | * included in CONFIG_6xx | 901 | * included in CONFIG_6xx |
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index bed9a29ee383..844a44b64472 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S | |||
| @@ -37,7 +37,7 @@ | |||
| 37 | #include <asm/firmware.h> | 37 | #include <asm/firmware.h> |
| 38 | #include <asm/page_64.h> | 38 | #include <asm/page_64.h> |
| 39 | #include <asm/irqflags.h> | 39 | #include <asm/irqflags.h> |
| 40 | #include <asm/kvm_book3s_64_asm.h> | 40 | #include <asm/kvm_book3s_asm.h> |
| 41 | 41 | ||
| 42 | /* The physical memory is layed out such that the secondary processor | 42 | /* The physical memory is layed out such that the secondary processor |
| 43 | * spin code sits at 0x0000...0x00ff. On server, the vectors follow | 43 | * spin code sits at 0x0000...0x00ff. On server, the vectors follow |
| @@ -169,7 +169,7 @@ exception_marker: | |||
| 169 | /* KVM trampoline code needs to be close to the interrupt handlers */ | 169 | /* KVM trampoline code needs to be close to the interrupt handlers */ |
| 170 | 170 | ||
| 171 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER | 171 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER |
| 172 | #include "../kvm/book3s_64_rmhandlers.S" | 172 | #include "../kvm/book3s_rmhandlers.S" |
| 173 | #endif | 173 | #endif |
| 174 | 174 | ||
| 175 | _GLOBAL(generic_secondary_thread_init) | 175 | _GLOBAL(generic_secondary_thread_init) |
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index ab3e392ac63c..bc9f39d2598b 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c | |||
| @@ -101,6 +101,10 @@ EXPORT_SYMBOL(pci_dram_offset); | |||
| 101 | EXPORT_SYMBOL(start_thread); | 101 | EXPORT_SYMBOL(start_thread); |
| 102 | EXPORT_SYMBOL(kernel_thread); | 102 | EXPORT_SYMBOL(kernel_thread); |
| 103 | 103 | ||
| 104 | #ifndef CONFIG_BOOKE | ||
| 105 | EXPORT_SYMBOL_GPL(cvt_df); | ||
| 106 | EXPORT_SYMBOL_GPL(cvt_fd); | ||
| 107 | #endif | ||
| 104 | EXPORT_SYMBOL(giveup_fpu); | 108 | EXPORT_SYMBOL(giveup_fpu); |
| 105 | #ifdef CONFIG_ALTIVEC | 109 | #ifdef CONFIG_ALTIVEC |
| 106 | EXPORT_SYMBOL(giveup_altivec); | 110 | EXPORT_SYMBOL(giveup_altivec); |
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index 689a57c2ac80..73c0a3f64ed1 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c | |||
| @@ -147,7 +147,7 @@ static int __init kvmppc_44x_init(void) | |||
| 147 | if (r) | 147 | if (r) |
| 148 | return r; | 148 | return r; |
| 149 | 149 | ||
| 150 | return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE); | 150 | return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE); |
| 151 | } | 151 | } |
| 152 | 152 | ||
| 153 | static void __exit kvmppc_44x_exit(void) | 153 | static void __exit kvmppc_44x_exit(void) |
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 60624cc9f4d4..b7baff78f90c 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig | |||
| @@ -22,12 +22,34 @@ config KVM | |||
| 22 | select ANON_INODES | 22 | select ANON_INODES |
| 23 | select KVM_MMIO | 23 | select KVM_MMIO |
| 24 | 24 | ||
| 25 | config KVM_BOOK3S_HANDLER | ||
| 26 | bool | ||
| 27 | |||
| 28 | config KVM_BOOK3S_32_HANDLER | ||
| 29 | bool | ||
| 30 | select KVM_BOOK3S_HANDLER | ||
| 31 | |||
| 25 | config KVM_BOOK3S_64_HANDLER | 32 | config KVM_BOOK3S_64_HANDLER |
| 26 | bool | 33 | bool |
| 34 | select KVM_BOOK3S_HANDLER | ||
| 35 | |||
| 36 | config KVM_BOOK3S_32 | ||
| 37 | tristate "KVM support for PowerPC book3s_32 processors" | ||
| 38 | depends on EXPERIMENTAL && PPC_BOOK3S_32 && !SMP && !PTE_64BIT | ||
| 39 | select KVM | ||
| 40 | select KVM_BOOK3S_32_HANDLER | ||
| 41 | ---help--- | ||
| 42 | Support running unmodified book3s_32 guest kernels | ||
| 43 | in virtual machines on book3s_32 host processors. | ||
| 44 | |||
| 45 | This module provides access to the hardware capabilities through | ||
| 46 | a character device node named /dev/kvm. | ||
| 47 | |||
| 48 | If unsure, say N. | ||
| 27 | 49 | ||
| 28 | config KVM_BOOK3S_64 | 50 | config KVM_BOOK3S_64 |
| 29 | tristate "KVM support for PowerPC book3s_64 processors" | 51 | tristate "KVM support for PowerPC book3s_64 processors" |
| 30 | depends on EXPERIMENTAL && PPC64 | 52 | depends on EXPERIMENTAL && PPC_BOOK3S_64 |
| 31 | select KVM | 53 | select KVM |
| 32 | select KVM_BOOK3S_64_HANDLER | 54 | select KVM_BOOK3S_64_HANDLER |
| 33 | ---help--- | 55 | ---help--- |
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 56484d652377..ff436066bf77 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile | |||
| @@ -14,7 +14,7 @@ CFLAGS_emulate.o := -I. | |||
| 14 | 14 | ||
| 15 | common-objs-y += powerpc.o emulate.o | 15 | common-objs-y += powerpc.o emulate.o |
| 16 | obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o | 16 | obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o |
| 17 | obj-$(CONFIG_KVM_BOOK3S_64_HANDLER) += book3s_64_exports.o | 17 | obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o |
| 18 | 18 | ||
| 19 | AFLAGS_booke_interrupts.o := -I$(obj) | 19 | AFLAGS_booke_interrupts.o := -I$(obj) |
| 20 | 20 | ||
| @@ -40,17 +40,31 @@ kvm-objs-$(CONFIG_KVM_E500) := $(kvm-e500-objs) | |||
| 40 | 40 | ||
| 41 | kvm-book3s_64-objs := \ | 41 | kvm-book3s_64-objs := \ |
| 42 | $(common-objs-y) \ | 42 | $(common-objs-y) \ |
| 43 | fpu.o \ | ||
| 44 | book3s_paired_singles.o \ | ||
| 43 | book3s.o \ | 45 | book3s.o \ |
| 44 | book3s_64_emulate.o \ | 46 | book3s_emulate.o \ |
| 45 | book3s_64_interrupts.o \ | 47 | book3s_interrupts.o \ |
| 46 | book3s_64_mmu_host.o \ | 48 | book3s_64_mmu_host.o \ |
| 47 | book3s_64_mmu.o \ | 49 | book3s_64_mmu.o \ |
| 48 | book3s_32_mmu.o | 50 | book3s_32_mmu.o |
| 49 | kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-objs) | 51 | kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-objs) |
| 50 | 52 | ||
| 53 | kvm-book3s_32-objs := \ | ||
| 54 | $(common-objs-y) \ | ||
| 55 | fpu.o \ | ||
| 56 | book3s_paired_singles.o \ | ||
| 57 | book3s.o \ | ||
| 58 | book3s_emulate.o \ | ||
| 59 | book3s_interrupts.o \ | ||
| 60 | book3s_32_mmu_host.o \ | ||
| 61 | book3s_32_mmu.o | ||
| 62 | kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) | ||
| 63 | |||
| 51 | kvm-objs := $(kvm-objs-m) $(kvm-objs-y) | 64 | kvm-objs := $(kvm-objs-m) $(kvm-objs-y) |
| 52 | 65 | ||
| 53 | obj-$(CONFIG_KVM_440) += kvm.o | 66 | obj-$(CONFIG_KVM_440) += kvm.o |
| 54 | obj-$(CONFIG_KVM_E500) += kvm.o | 67 | obj-$(CONFIG_KVM_E500) += kvm.o |
| 55 | obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o | 68 | obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o |
| 69 | obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o | ||
| 56 | 70 | ||
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 604af29b71ed..b998abf1a63d 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | 16 | ||
| 17 | #include <linux/kvm_host.h> | 17 | #include <linux/kvm_host.h> |
| 18 | #include <linux/err.h> | 18 | #include <linux/err.h> |
| 19 | #include <linux/slab.h> | ||
| 19 | 20 | ||
| 20 | #include <asm/reg.h> | 21 | #include <asm/reg.h> |
| 21 | #include <asm/cputable.h> | 22 | #include <asm/cputable.h> |
| @@ -29,6 +30,7 @@ | |||
| 29 | #include <linux/gfp.h> | 30 | #include <linux/gfp.h> |
| 30 | #include <linux/sched.h> | 31 | #include <linux/sched.h> |
| 31 | #include <linux/vmalloc.h> | 32 | #include <linux/vmalloc.h> |
| 33 | #include <linux/highmem.h> | ||
| 32 | 34 | ||
| 33 | #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU | 35 | #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU |
| 34 | 36 | ||
| @@ -36,7 +38,15 @@ | |||
| 36 | /* #define EXIT_DEBUG_SIMPLE */ | 38 | /* #define EXIT_DEBUG_SIMPLE */ |
| 37 | /* #define DEBUG_EXT */ | 39 | /* #define DEBUG_EXT */ |
| 38 | 40 | ||
| 39 | static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); | 41 | static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, |
| 42 | ulong msr); | ||
| 43 | |||
| 44 | /* Some compatibility defines */ | ||
| 45 | #ifdef CONFIG_PPC_BOOK3S_32 | ||
| 46 | #define MSR_USER32 MSR_USER | ||
| 47 | #define MSR_USER64 MSR_USER | ||
| 48 | #define HW_PAGE_SIZE PAGE_SIZE | ||
| 49 | #endif | ||
| 40 | 50 | ||
| 41 | struct kvm_stats_debugfs_item debugfs_entries[] = { | 51 | struct kvm_stats_debugfs_item debugfs_entries[] = { |
| 42 | { "exits", VCPU_STAT(sum_exits) }, | 52 | { "exits", VCPU_STAT(sum_exits) }, |
| @@ -69,18 +79,26 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) | |||
| 69 | 79 | ||
| 70 | void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 80 | void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
| 71 | { | 81 | { |
| 72 | memcpy(get_paca()->kvm_slb, to_book3s(vcpu)->slb_shadow, sizeof(get_paca()->kvm_slb)); | 82 | #ifdef CONFIG_PPC_BOOK3S_64 |
| 73 | memcpy(&get_paca()->shadow_vcpu, &to_book3s(vcpu)->shadow_vcpu, | 83 | memcpy(to_svcpu(vcpu)->slb, to_book3s(vcpu)->slb_shadow, sizeof(to_svcpu(vcpu)->slb)); |
| 84 | memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu, | ||
| 74 | sizeof(get_paca()->shadow_vcpu)); | 85 | sizeof(get_paca()->shadow_vcpu)); |
| 75 | get_paca()->kvm_slb_max = to_book3s(vcpu)->slb_shadow_max; | 86 | to_svcpu(vcpu)->slb_max = to_book3s(vcpu)->slb_shadow_max; |
| 87 | #endif | ||
| 88 | |||
| 89 | #ifdef CONFIG_PPC_BOOK3S_32 | ||
| 90 | current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu; | ||
| 91 | #endif | ||
| 76 | } | 92 | } |
| 77 | 93 | ||
| 78 | void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) | 94 | void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) |
| 79 | { | 95 | { |
| 80 | memcpy(to_book3s(vcpu)->slb_shadow, get_paca()->kvm_slb, sizeof(get_paca()->kvm_slb)); | 96 | #ifdef CONFIG_PPC_BOOK3S_64 |
| 81 | memcpy(&to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu, | 97 | memcpy(to_book3s(vcpu)->slb_shadow, to_svcpu(vcpu)->slb, sizeof(to_svcpu(vcpu)->slb)); |
| 98 | memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu, | ||
| 82 | sizeof(get_paca()->shadow_vcpu)); | 99 | sizeof(get_paca()->shadow_vcpu)); |
| 83 | to_book3s(vcpu)->slb_shadow_max = get_paca()->kvm_slb_max; | 100 | to_book3s(vcpu)->slb_shadow_max = to_svcpu(vcpu)->slb_max; |
| 101 | #endif | ||
| 84 | 102 | ||
| 85 | kvmppc_giveup_ext(vcpu, MSR_FP); | 103 | kvmppc_giveup_ext(vcpu, MSR_FP); |
| 86 | kvmppc_giveup_ext(vcpu, MSR_VEC); | 104 | kvmppc_giveup_ext(vcpu, MSR_VEC); |
| @@ -131,18 +149,22 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) | |||
| 131 | } | 149 | } |
| 132 | } | 150 | } |
| 133 | 151 | ||
| 134 | if (((vcpu->arch.msr & (MSR_IR|MSR_DR)) != (old_msr & (MSR_IR|MSR_DR))) || | 152 | if ((vcpu->arch.msr & (MSR_PR|MSR_IR|MSR_DR)) != |
| 135 | (vcpu->arch.msr & MSR_PR) != (old_msr & MSR_PR)) { | 153 | (old_msr & (MSR_PR|MSR_IR|MSR_DR))) { |
| 136 | kvmppc_mmu_flush_segments(vcpu); | 154 | kvmppc_mmu_flush_segments(vcpu); |
| 137 | kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc); | 155 | kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); |
| 138 | } | 156 | } |
| 157 | |||
| 158 | /* Preload FPU if it's enabled */ | ||
| 159 | if (vcpu->arch.msr & MSR_FP) | ||
| 160 | kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); | ||
| 139 | } | 161 | } |
| 140 | 162 | ||
| 141 | void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) | 163 | void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) |
| 142 | { | 164 | { |
| 143 | vcpu->arch.srr0 = vcpu->arch.pc; | 165 | vcpu->arch.srr0 = kvmppc_get_pc(vcpu); |
| 144 | vcpu->arch.srr1 = vcpu->arch.msr | flags; | 166 | vcpu->arch.srr1 = vcpu->arch.msr | flags; |
| 145 | vcpu->arch.pc = to_book3s(vcpu)->hior + vec; | 167 | kvmppc_set_pc(vcpu, to_book3s(vcpu)->hior + vec); |
| 146 | vcpu->arch.mmu.reset_msr(vcpu); | 168 | vcpu->arch.mmu.reset_msr(vcpu); |
| 147 | } | 169 | } |
| 148 | 170 | ||
| @@ -218,6 +240,12 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, | |||
| 218 | kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); | 240 | kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); |
| 219 | } | 241 | } |
| 220 | 242 | ||
| 243 | void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, | ||
| 244 | struct kvm_interrupt *irq) | ||
| 245 | { | ||
| 246 | kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); | ||
| 247 | } | ||
| 248 | |||
| 221 | int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) | 249 | int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) |
| 222 | { | 250 | { |
| 223 | int deliver = 1; | 251 | int deliver = 1; |
| @@ -302,7 +330,7 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) | |||
| 302 | printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions); | 330 | printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions); |
| 303 | #endif | 331 | #endif |
| 304 | priority = __ffs(*pending); | 332 | priority = __ffs(*pending); |
| 305 | while (priority <= (sizeof(unsigned int) * 8)) { | 333 | while (priority < BOOK3S_IRQPRIO_MAX) { |
| 306 | if (kvmppc_book3s_irqprio_deliver(vcpu, priority) && | 334 | if (kvmppc_book3s_irqprio_deliver(vcpu, priority) && |
| 307 | (priority != BOOK3S_IRQPRIO_DECREMENTER)) { | 335 | (priority != BOOK3S_IRQPRIO_DECREMENTER)) { |
| 308 | /* DEC interrupts get cleared by mtdec */ | 336 | /* DEC interrupts get cleared by mtdec */ |
| @@ -318,13 +346,18 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) | |||
| 318 | 346 | ||
| 319 | void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) | 347 | void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) |
| 320 | { | 348 | { |
| 349 | u32 host_pvr; | ||
| 350 | |||
| 321 | vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB; | 351 | vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB; |
| 322 | vcpu->arch.pvr = pvr; | 352 | vcpu->arch.pvr = pvr; |
| 353 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
| 323 | if ((pvr >= 0x330000) && (pvr < 0x70330000)) { | 354 | if ((pvr >= 0x330000) && (pvr < 0x70330000)) { |
| 324 | kvmppc_mmu_book3s_64_init(vcpu); | 355 | kvmppc_mmu_book3s_64_init(vcpu); |
| 325 | to_book3s(vcpu)->hior = 0xfff00000; | 356 | to_book3s(vcpu)->hior = 0xfff00000; |
| 326 | to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL; | 357 | to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL; |
| 327 | } else { | 358 | } else |
| 359 | #endif | ||
| 360 | { | ||
| 328 | kvmppc_mmu_book3s_32_init(vcpu); | 361 | kvmppc_mmu_book3s_32_init(vcpu); |
| 329 | to_book3s(vcpu)->hior = 0; | 362 | to_book3s(vcpu)->hior = 0; |
| 330 | to_book3s(vcpu)->msr_mask = 0xffffffffULL; | 363 | to_book3s(vcpu)->msr_mask = 0xffffffffULL; |
| @@ -337,6 +370,32 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) | |||
| 337 | !strcmp(cur_cpu_spec->platform, "ppc970")) | 370 | !strcmp(cur_cpu_spec->platform, "ppc970")) |
| 338 | vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; | 371 | vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; |
| 339 | 372 | ||
| 373 | /* Cell performs badly if MSR_FEx are set. So let's hope nobody | ||
| 374 | really needs them in a VM on Cell and force disable them. */ | ||
| 375 | if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be")) | ||
| 376 | to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1); | ||
| 377 | |||
| 378 | #ifdef CONFIG_PPC_BOOK3S_32 | ||
| 379 | /* 32 bit Book3S always has 32 byte dcbz */ | ||
| 380 | vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; | ||
| 381 | #endif | ||
| 382 | |||
| 383 | /* On some CPUs we can execute paired single operations natively */ | ||
| 384 | asm ( "mfpvr %0" : "=r"(host_pvr)); | ||
| 385 | switch (host_pvr) { | ||
| 386 | case 0x00080200: /* lonestar 2.0 */ | ||
| 387 | case 0x00088202: /* lonestar 2.2 */ | ||
| 388 | case 0x70000100: /* gekko 1.0 */ | ||
| 389 | case 0x00080100: /* gekko 2.0 */ | ||
| 390 | case 0x00083203: /* gekko 2.3a */ | ||
| 391 | case 0x00083213: /* gekko 2.3b */ | ||
| 392 | case 0x00083204: /* gekko 2.4 */ | ||
| 393 | case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */ | ||
| 394 | case 0x00087200: /* broadway */ | ||
| 395 | vcpu->arch.hflags |= BOOK3S_HFLAG_NATIVE_PS; | ||
| 396 | /* Enable HID2.PSE - in case we need it later */ | ||
| 397 | mtspr(SPRN_HID2_GEKKO, mfspr(SPRN_HID2_GEKKO) | (1 << 29)); | ||
| 398 | } | ||
| 340 | } | 399 | } |
| 341 | 400 | ||
| 342 | /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To | 401 | /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To |
| @@ -350,34 +409,29 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) | |||
| 350 | */ | 409 | */ |
| 351 | static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) | 410 | static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) |
| 352 | { | 411 | { |
| 353 | bool touched = false; | 412 | struct page *hpage; |
| 354 | hva_t hpage; | 413 | u64 hpage_offset; |
| 355 | u32 *page; | 414 | u32 *page; |
| 356 | int i; | 415 | int i; |
| 357 | 416 | ||
| 358 | hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT); | 417 | hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT); |
| 359 | if (kvm_is_error_hva(hpage)) | 418 | if (is_error_page(hpage)) |
| 360 | return; | 419 | return; |
| 361 | 420 | ||
| 362 | hpage |= pte->raddr & ~PAGE_MASK; | 421 | hpage_offset = pte->raddr & ~PAGE_MASK; |
| 363 | hpage &= ~0xFFFULL; | 422 | hpage_offset &= ~0xFFFULL; |
| 364 | 423 | hpage_offset /= 4; | |
| 365 | page = vmalloc(HW_PAGE_SIZE); | ||
| 366 | |||
| 367 | if (copy_from_user(page, (void __user *)hpage, HW_PAGE_SIZE)) | ||
| 368 | goto out; | ||
| 369 | 424 | ||
| 370 | for (i=0; i < HW_PAGE_SIZE / 4; i++) | 425 | get_page(hpage); |
| 371 | if ((page[i] & 0xff0007ff) == INS_DCBZ) { | 426 | page = kmap_atomic(hpage, KM_USER0); |
| 372 | page[i] &= 0xfffffff7; // reserved instruction, so we trap | ||
| 373 | touched = true; | ||
| 374 | } | ||
| 375 | 427 | ||
| 376 | if (touched) | 428 | /* patch dcbz into reserved instruction, so we trap */ |
| 377 | copy_to_user((void __user *)hpage, page, HW_PAGE_SIZE); | 429 | for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++) |
| 430 | if ((page[i] & 0xff0007ff) == INS_DCBZ) | ||
| 431 | page[i] &= 0xfffffff7; | ||
| 378 | 432 | ||
| 379 | out: | 433 | kunmap_atomic(page, KM_USER0); |
| 380 | vfree(page); | 434 | put_page(hpage); |
| 381 | } | 435 | } |
| 382 | 436 | ||
| 383 | static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, | 437 | static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, |
| @@ -391,15 +445,7 @@ static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, | |||
| 391 | } else { | 445 | } else { |
| 392 | pte->eaddr = eaddr; | 446 | pte->eaddr = eaddr; |
| 393 | pte->raddr = eaddr & 0xffffffff; | 447 | pte->raddr = eaddr & 0xffffffff; |
| 394 | pte->vpage = eaddr >> 12; | 448 | pte->vpage = VSID_REAL | eaddr >> 12; |
| 395 | switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { | ||
| 396 | case 0: | ||
| 397 | pte->vpage |= VSID_REAL; | ||
| 398 | case MSR_DR: | ||
| 399 | pte->vpage |= VSID_REAL_DR; | ||
| 400 | case MSR_IR: | ||
| 401 | pte->vpage |= VSID_REAL_IR; | ||
| 402 | } | ||
| 403 | pte->may_read = true; | 449 | pte->may_read = true; |
| 404 | pte->may_write = true; | 450 | pte->may_write = true; |
| 405 | pte->may_execute = true; | 451 | pte->may_execute = true; |
| @@ -434,55 +480,55 @@ err: | |||
| 434 | return kvmppc_bad_hva(); | 480 | return kvmppc_bad_hva(); |
| 435 | } | 481 | } |
| 436 | 482 | ||
| 437 | int kvmppc_st(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr) | 483 | int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, |
| 484 | bool data) | ||
| 438 | { | 485 | { |
| 439 | struct kvmppc_pte pte; | 486 | struct kvmppc_pte pte; |
| 440 | hva_t hva = eaddr; | ||
| 441 | 487 | ||
| 442 | vcpu->stat.st++; | 488 | vcpu->stat.st++; |
| 443 | 489 | ||
| 444 | if (kvmppc_xlate(vcpu, eaddr, false, &pte)) | 490 | if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) |
| 445 | goto err; | 491 | return -ENOENT; |
| 446 | 492 | ||
| 447 | hva = kvmppc_pte_to_hva(vcpu, &pte, false); | 493 | *eaddr = pte.raddr; |
| 448 | if (kvm_is_error_hva(hva)) | ||
| 449 | goto err; | ||
| 450 | 494 | ||
| 451 | if (copy_to_user((void __user *)hva, ptr, size)) { | 495 | if (!pte.may_write) |
| 452 | printk(KERN_INFO "kvmppc_st at 0x%lx failed\n", hva); | 496 | return -EPERM; |
| 453 | goto err; | ||
| 454 | } | ||
| 455 | 497 | ||
| 456 | return 0; | 498 | if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size)) |
| 499 | return EMULATE_DO_MMIO; | ||
| 457 | 500 | ||
| 458 | err: | 501 | return EMULATE_DONE; |
| 459 | return -ENOENT; | ||
| 460 | } | 502 | } |
| 461 | 503 | ||
| 462 | int kvmppc_ld(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr, | 504 | int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, |
| 463 | bool data) | 505 | bool data) |
| 464 | { | 506 | { |
| 465 | struct kvmppc_pte pte; | 507 | struct kvmppc_pte pte; |
| 466 | hva_t hva = eaddr; | 508 | hva_t hva = *eaddr; |
| 467 | 509 | ||
| 468 | vcpu->stat.ld++; | 510 | vcpu->stat.ld++; |
| 469 | 511 | ||
| 470 | if (kvmppc_xlate(vcpu, eaddr, data, &pte)) | 512 | if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) |
| 471 | goto err; | 513 | goto nopte; |
| 514 | |||
| 515 | *eaddr = pte.raddr; | ||
| 472 | 516 | ||
| 473 | hva = kvmppc_pte_to_hva(vcpu, &pte, true); | 517 | hva = kvmppc_pte_to_hva(vcpu, &pte, true); |
| 474 | if (kvm_is_error_hva(hva)) | 518 | if (kvm_is_error_hva(hva)) |
| 475 | goto err; | 519 | goto mmio; |
| 476 | 520 | ||
| 477 | if (copy_from_user(ptr, (void __user *)hva, size)) { | 521 | if (copy_from_user(ptr, (void __user *)hva, size)) { |
| 478 | printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva); | 522 | printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva); |
| 479 | goto err; | 523 | goto mmio; |
| 480 | } | 524 | } |
| 481 | 525 | ||
| 482 | return 0; | 526 | return EMULATE_DONE; |
| 483 | 527 | ||
| 484 | err: | 528 | nopte: |
| 485 | return -ENOENT; | 529 | return -ENOENT; |
| 530 | mmio: | ||
| 531 | return EMULATE_DO_MMIO; | ||
| 486 | } | 532 | } |
| 487 | 533 | ||
| 488 | static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) | 534 | static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) |
| @@ -499,12 +545,11 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 499 | int page_found = 0; | 545 | int page_found = 0; |
| 500 | struct kvmppc_pte pte; | 546 | struct kvmppc_pte pte; |
| 501 | bool is_mmio = false; | 547 | bool is_mmio = false; |
| 548 | bool dr = (vcpu->arch.msr & MSR_DR) ? true : false; | ||
| 549 | bool ir = (vcpu->arch.msr & MSR_IR) ? true : false; | ||
| 550 | u64 vsid; | ||
| 502 | 551 | ||
| 503 | if ( vec == BOOK3S_INTERRUPT_DATA_STORAGE ) { | 552 | relocated = data ? dr : ir; |
| 504 | relocated = (vcpu->arch.msr & MSR_DR); | ||
| 505 | } else { | ||
| 506 | relocated = (vcpu->arch.msr & MSR_IR); | ||
| 507 | } | ||
| 508 | 553 | ||
| 509 | /* Resolve real address if translation turned on */ | 554 | /* Resolve real address if translation turned on */ |
| 510 | if (relocated) { | 555 | if (relocated) { |
| @@ -516,14 +561,25 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 516 | pte.raddr = eaddr & 0xffffffff; | 561 | pte.raddr = eaddr & 0xffffffff; |
| 517 | pte.eaddr = eaddr; | 562 | pte.eaddr = eaddr; |
| 518 | pte.vpage = eaddr >> 12; | 563 | pte.vpage = eaddr >> 12; |
| 519 | switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { | 564 | } |
| 520 | case 0: | 565 | |
| 521 | pte.vpage |= VSID_REAL; | 566 | switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { |
| 522 | case MSR_DR: | 567 | case 0: |
| 523 | pte.vpage |= VSID_REAL_DR; | 568 | pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12)); |
| 524 | case MSR_IR: | 569 | break; |
| 525 | pte.vpage |= VSID_REAL_IR; | 570 | case MSR_DR: |
| 526 | } | 571 | case MSR_IR: |
| 572 | vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); | ||
| 573 | |||
| 574 | if ((vcpu->arch.msr & (MSR_DR|MSR_IR)) == MSR_DR) | ||
| 575 | pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12)); | ||
| 576 | else | ||
| 577 | pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12)); | ||
| 578 | pte.vpage |= vsid; | ||
| 579 | |||
| 580 | if (vsid == -1) | ||
| 581 | page_found = -EINVAL; | ||
| 582 | break; | ||
| 527 | } | 583 | } |
| 528 | 584 | ||
| 529 | if (vcpu->arch.mmu.is_dcbz32(vcpu) && | 585 | if (vcpu->arch.mmu.is_dcbz32(vcpu) && |
| @@ -538,20 +594,20 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 538 | 594 | ||
| 539 | if (page_found == -ENOENT) { | 595 | if (page_found == -ENOENT) { |
| 540 | /* Page not found in guest PTE entries */ | 596 | /* Page not found in guest PTE entries */ |
| 541 | vcpu->arch.dear = vcpu->arch.fault_dear; | 597 | vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); |
| 542 | to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr; | 598 | to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr; |
| 543 | vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL); | 599 | vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); |
| 544 | kvmppc_book3s_queue_irqprio(vcpu, vec); | 600 | kvmppc_book3s_queue_irqprio(vcpu, vec); |
| 545 | } else if (page_found == -EPERM) { | 601 | } else if (page_found == -EPERM) { |
| 546 | /* Storage protection */ | 602 | /* Storage protection */ |
| 547 | vcpu->arch.dear = vcpu->arch.fault_dear; | 603 | vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); |
| 548 | to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE; | 604 | to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE; |
| 549 | to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT; | 605 | to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT; |
| 550 | vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL); | 606 | vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); |
| 551 | kvmppc_book3s_queue_irqprio(vcpu, vec); | 607 | kvmppc_book3s_queue_irqprio(vcpu, vec); |
| 552 | } else if (page_found == -EINVAL) { | 608 | } else if (page_found == -EINVAL) { |
| 553 | /* Page not found in guest SLB */ | 609 | /* Page not found in guest SLB */ |
| 554 | vcpu->arch.dear = vcpu->arch.fault_dear; | 610 | vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); |
| 555 | kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); | 611 | kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); |
| 556 | } else if (!is_mmio && | 612 | } else if (!is_mmio && |
| 557 | kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { | 613 | kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { |
| @@ -583,11 +639,13 @@ static inline int get_fpr_index(int i) | |||
| 583 | } | 639 | } |
| 584 | 640 | ||
| 585 | /* Give up external provider (FPU, Altivec, VSX) */ | 641 | /* Give up external provider (FPU, Altivec, VSX) */ |
| 586 | static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) | 642 | void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) |
| 587 | { | 643 | { |
| 588 | struct thread_struct *t = ¤t->thread; | 644 | struct thread_struct *t = ¤t->thread; |
| 589 | u64 *vcpu_fpr = vcpu->arch.fpr; | 645 | u64 *vcpu_fpr = vcpu->arch.fpr; |
| 646 | #ifdef CONFIG_VSX | ||
| 590 | u64 *vcpu_vsx = vcpu->arch.vsr; | 647 | u64 *vcpu_vsx = vcpu->arch.vsr; |
| 648 | #endif | ||
| 591 | u64 *thread_fpr = (u64*)t->fpr; | 649 | u64 *thread_fpr = (u64*)t->fpr; |
| 592 | int i; | 650 | int i; |
| 593 | 651 | ||
| @@ -629,21 +687,65 @@ static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) | |||
| 629 | kvmppc_recalc_shadow_msr(vcpu); | 687 | kvmppc_recalc_shadow_msr(vcpu); |
| 630 | } | 688 | } |
| 631 | 689 | ||
| 690 | static int kvmppc_read_inst(struct kvm_vcpu *vcpu) | ||
| 691 | { | ||
| 692 | ulong srr0 = kvmppc_get_pc(vcpu); | ||
| 693 | u32 last_inst = kvmppc_get_last_inst(vcpu); | ||
| 694 | int ret; | ||
| 695 | |||
| 696 | ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); | ||
| 697 | if (ret == -ENOENT) { | ||
| 698 | vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 33, 1); | ||
| 699 | vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 34, 36, 0); | ||
| 700 | vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0); | ||
| 701 | kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); | ||
| 702 | return EMULATE_AGAIN; | ||
| 703 | } | ||
| 704 | |||
| 705 | return EMULATE_DONE; | ||
| 706 | } | ||
| 707 | |||
| 708 | static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr) | ||
| 709 | { | ||
| 710 | |||
| 711 | /* Need to do paired single emulation? */ | ||
| 712 | if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) | ||
| 713 | return EMULATE_DONE; | ||
| 714 | |||
| 715 | /* Read out the instruction */ | ||
| 716 | if (kvmppc_read_inst(vcpu) == EMULATE_DONE) | ||
| 717 | /* Need to emulate */ | ||
| 718 | return EMULATE_FAIL; | ||
| 719 | |||
| 720 | return EMULATE_AGAIN; | ||
| 721 | } | ||
| 722 | |||
| 632 | /* Handle external providers (FPU, Altivec, VSX) */ | 723 | /* Handle external providers (FPU, Altivec, VSX) */ |
| 633 | static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, | 724 | static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, |
| 634 | ulong msr) | 725 | ulong msr) |
| 635 | { | 726 | { |
| 636 | struct thread_struct *t = ¤t->thread; | 727 | struct thread_struct *t = ¤t->thread; |
| 637 | u64 *vcpu_fpr = vcpu->arch.fpr; | 728 | u64 *vcpu_fpr = vcpu->arch.fpr; |
| 729 | #ifdef CONFIG_VSX | ||
| 638 | u64 *vcpu_vsx = vcpu->arch.vsr; | 730 | u64 *vcpu_vsx = vcpu->arch.vsr; |
| 731 | #endif | ||
| 639 | u64 *thread_fpr = (u64*)t->fpr; | 732 | u64 *thread_fpr = (u64*)t->fpr; |
| 640 | int i; | 733 | int i; |
| 641 | 734 | ||
| 735 | /* When we have paired singles, we emulate in software */ | ||
| 736 | if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) | ||
| 737 | return RESUME_GUEST; | ||
| 738 | |||
| 642 | if (!(vcpu->arch.msr & msr)) { | 739 | if (!(vcpu->arch.msr & msr)) { |
| 643 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | 740 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); |
| 644 | return RESUME_GUEST; | 741 | return RESUME_GUEST; |
| 645 | } | 742 | } |
| 646 | 743 | ||
| 744 | /* We already own the ext */ | ||
| 745 | if (vcpu->arch.guest_owned_ext & msr) { | ||
| 746 | return RESUME_GUEST; | ||
| 747 | } | ||
| 748 | |||
| 647 | #ifdef DEBUG_EXT | 749 | #ifdef DEBUG_EXT |
| 648 | printk(KERN_INFO "Loading up ext 0x%lx\n", msr); | 750 | printk(KERN_INFO "Loading up ext 0x%lx\n", msr); |
| 649 | #endif | 751 | #endif |
| @@ -696,21 +798,33 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 696 | run->ready_for_interrupt_injection = 1; | 798 | run->ready_for_interrupt_injection = 1; |
| 697 | #ifdef EXIT_DEBUG | 799 | #ifdef EXIT_DEBUG |
| 698 | printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | dec=0x%x | msr=0x%lx\n", | 800 | printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | dec=0x%x | msr=0x%lx\n", |
| 699 | exit_nr, vcpu->arch.pc, vcpu->arch.fault_dear, | 801 | exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu), |
| 700 | kvmppc_get_dec(vcpu), vcpu->arch.msr); | 802 | kvmppc_get_dec(vcpu), to_svcpu(vcpu)->shadow_srr1); |
| 701 | #elif defined (EXIT_DEBUG_SIMPLE) | 803 | #elif defined (EXIT_DEBUG_SIMPLE) |
| 702 | if ((exit_nr != 0x900) && (exit_nr != 0x500)) | 804 | if ((exit_nr != 0x900) && (exit_nr != 0x500)) |
| 703 | printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | msr=0x%lx\n", | 805 | printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | msr=0x%lx\n", |
| 704 | exit_nr, vcpu->arch.pc, vcpu->arch.fault_dear, | 806 | exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu), |
| 705 | vcpu->arch.msr); | 807 | vcpu->arch.msr); |
| 706 | #endif | 808 | #endif |
| 707 | kvm_resched(vcpu); | 809 | kvm_resched(vcpu); |
| 708 | switch (exit_nr) { | 810 | switch (exit_nr) { |
| 709 | case BOOK3S_INTERRUPT_INST_STORAGE: | 811 | case BOOK3S_INTERRUPT_INST_STORAGE: |
| 710 | vcpu->stat.pf_instruc++; | 812 | vcpu->stat.pf_instruc++; |
| 813 | |||
| 814 | #ifdef CONFIG_PPC_BOOK3S_32 | ||
| 815 | /* We set segments as unused segments when invalidating them. So | ||
| 816 | * treat the respective fault as segment fault. */ | ||
| 817 | if (to_svcpu(vcpu)->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] | ||
| 818 | == SR_INVALID) { | ||
| 819 | kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); | ||
| 820 | r = RESUME_GUEST; | ||
| 821 | break; | ||
| 822 | } | ||
| 823 | #endif | ||
| 824 | |||
| 711 | /* only care about PTEG not found errors, but leave NX alone */ | 825 | /* only care about PTEG not found errors, but leave NX alone */ |
| 712 | if (vcpu->arch.shadow_srr1 & 0x40000000) { | 826 | if (to_svcpu(vcpu)->shadow_srr1 & 0x40000000) { |
| 713 | r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.pc, exit_nr); | 827 | r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr); |
| 714 | vcpu->stat.sp_instruc++; | 828 | vcpu->stat.sp_instruc++; |
| 715 | } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && | 829 | } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && |
| 716 | (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { | 830 | (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { |
| @@ -719,37 +833,52 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 719 | * so we can't use the NX bit inside the guest. Let's cross our fingers, | 833 | * so we can't use the NX bit inside the guest. Let's cross our fingers, |
| 720 | * that no guest that needs the dcbz hack does NX. | 834 | * that no guest that needs the dcbz hack does NX. |
| 721 | */ | 835 | */ |
| 722 | kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); | 836 | kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); |
| 837 | r = RESUME_GUEST; | ||
| 723 | } else { | 838 | } else { |
| 724 | vcpu->arch.msr |= vcpu->arch.shadow_srr1 & 0x58000000; | 839 | vcpu->arch.msr |= to_svcpu(vcpu)->shadow_srr1 & 0x58000000; |
| 725 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | 840 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); |
| 726 | kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); | 841 | kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); |
| 727 | r = RESUME_GUEST; | 842 | r = RESUME_GUEST; |
| 728 | } | 843 | } |
| 729 | break; | 844 | break; |
| 730 | case BOOK3S_INTERRUPT_DATA_STORAGE: | 845 | case BOOK3S_INTERRUPT_DATA_STORAGE: |
| 846 | { | ||
| 847 | ulong dar = kvmppc_get_fault_dar(vcpu); | ||
| 731 | vcpu->stat.pf_storage++; | 848 | vcpu->stat.pf_storage++; |
| 849 | |||
| 850 | #ifdef CONFIG_PPC_BOOK3S_32 | ||
| 851 | /* We set segments as unused segments when invalidating them. So | ||
| 852 | * treat the respective fault as segment fault. */ | ||
| 853 | if ((to_svcpu(vcpu)->sr[dar >> SID_SHIFT]) == SR_INVALID) { | ||
| 854 | kvmppc_mmu_map_segment(vcpu, dar); | ||
| 855 | r = RESUME_GUEST; | ||
| 856 | break; | ||
| 857 | } | ||
| 858 | #endif | ||
| 859 | |||
| 732 | /* The only case we need to handle is missing shadow PTEs */ | 860 | /* The only case we need to handle is missing shadow PTEs */ |
| 733 | if (vcpu->arch.fault_dsisr & DSISR_NOHPTE) { | 861 | if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) { |
| 734 | r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.fault_dear, exit_nr); | 862 | r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); |
| 735 | } else { | 863 | } else { |
| 736 | vcpu->arch.dear = vcpu->arch.fault_dear; | 864 | vcpu->arch.dear = dar; |
| 737 | to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr; | 865 | to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr; |
| 738 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | 866 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); |
| 739 | kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFULL); | 867 | kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFUL); |
| 740 | r = RESUME_GUEST; | 868 | r = RESUME_GUEST; |
| 741 | } | 869 | } |
| 742 | break; | 870 | break; |
| 871 | } | ||
| 743 | case BOOK3S_INTERRUPT_DATA_SEGMENT: | 872 | case BOOK3S_INTERRUPT_DATA_SEGMENT: |
| 744 | if (kvmppc_mmu_map_segment(vcpu, vcpu->arch.fault_dear) < 0) { | 873 | if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) { |
| 745 | vcpu->arch.dear = vcpu->arch.fault_dear; | 874 | vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); |
| 746 | kvmppc_book3s_queue_irqprio(vcpu, | 875 | kvmppc_book3s_queue_irqprio(vcpu, |
| 747 | BOOK3S_INTERRUPT_DATA_SEGMENT); | 876 | BOOK3S_INTERRUPT_DATA_SEGMENT); |
| 748 | } | 877 | } |
| 749 | r = RESUME_GUEST; | 878 | r = RESUME_GUEST; |
| 750 | break; | 879 | break; |
| 751 | case BOOK3S_INTERRUPT_INST_SEGMENT: | 880 | case BOOK3S_INTERRUPT_INST_SEGMENT: |
| 752 | if (kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc) < 0) { | 881 | if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)) < 0) { |
| 753 | kvmppc_book3s_queue_irqprio(vcpu, | 882 | kvmppc_book3s_queue_irqprio(vcpu, |
| 754 | BOOK3S_INTERRUPT_INST_SEGMENT); | 883 | BOOK3S_INTERRUPT_INST_SEGMENT); |
| 755 | } | 884 | } |
| @@ -764,18 +893,22 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 764 | vcpu->stat.ext_intr_exits++; | 893 | vcpu->stat.ext_intr_exits++; |
| 765 | r = RESUME_GUEST; | 894 | r = RESUME_GUEST; |
| 766 | break; | 895 | break; |
| 896 | case BOOK3S_INTERRUPT_PERFMON: | ||
| 897 | r = RESUME_GUEST; | ||
| 898 | break; | ||
| 767 | case BOOK3S_INTERRUPT_PROGRAM: | 899 | case BOOK3S_INTERRUPT_PROGRAM: |
| 768 | { | 900 | { |
| 769 | enum emulation_result er; | 901 | enum emulation_result er; |
| 770 | ulong flags; | 902 | ulong flags; |
| 771 | 903 | ||
| 772 | flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; | 904 | program_interrupt: |
| 905 | flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull; | ||
| 773 | 906 | ||
| 774 | if (vcpu->arch.msr & MSR_PR) { | 907 | if (vcpu->arch.msr & MSR_PR) { |
| 775 | #ifdef EXIT_DEBUG | 908 | #ifdef EXIT_DEBUG |
| 776 | printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", vcpu->arch.pc, vcpu->arch.last_inst); | 909 | printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); |
| 777 | #endif | 910 | #endif |
| 778 | if ((vcpu->arch.last_inst & 0xff0007ff) != | 911 | if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) != |
| 779 | (INS_DCBZ & 0xfffffff7)) { | 912 | (INS_DCBZ & 0xfffffff7)) { |
| 780 | kvmppc_core_queue_program(vcpu, flags); | 913 | kvmppc_core_queue_program(vcpu, flags); |
| 781 | r = RESUME_GUEST; | 914 | r = RESUME_GUEST; |
| @@ -789,33 +922,80 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 789 | case EMULATE_DONE: | 922 | case EMULATE_DONE: |
| 790 | r = RESUME_GUEST_NV; | 923 | r = RESUME_GUEST_NV; |
| 791 | break; | 924 | break; |
| 925 | case EMULATE_AGAIN: | ||
| 926 | r = RESUME_GUEST; | ||
| 927 | break; | ||
| 792 | case EMULATE_FAIL: | 928 | case EMULATE_FAIL: |
| 793 | printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", | 929 | printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", |
| 794 | __func__, vcpu->arch.pc, vcpu->arch.last_inst); | 930 | __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); |
| 795 | kvmppc_core_queue_program(vcpu, flags); | 931 | kvmppc_core_queue_program(vcpu, flags); |
| 796 | r = RESUME_GUEST; | 932 | r = RESUME_GUEST; |
| 797 | break; | 933 | break; |
| 934 | case EMULATE_DO_MMIO: | ||
| 935 | run->exit_reason = KVM_EXIT_MMIO; | ||
| 936 | r = RESUME_HOST_NV; | ||
| 937 | break; | ||
| 798 | default: | 938 | default: |
| 799 | BUG(); | 939 | BUG(); |
| 800 | } | 940 | } |
| 801 | break; | 941 | break; |
| 802 | } | 942 | } |
| 803 | case BOOK3S_INTERRUPT_SYSCALL: | 943 | case BOOK3S_INTERRUPT_SYSCALL: |
| 804 | #ifdef EXIT_DEBUG | 944 | // XXX make user settable |
| 805 | printk(KERN_INFO "Syscall Nr %d\n", (int)kvmppc_get_gpr(vcpu, 0)); | 945 | if (vcpu->arch.osi_enabled && |
| 806 | #endif | 946 | (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) && |
| 807 | vcpu->stat.syscall_exits++; | 947 | (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) { |
| 808 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | 948 | u64 *gprs = run->osi.gprs; |
| 809 | r = RESUME_GUEST; | 949 | int i; |
| 950 | |||
| 951 | run->exit_reason = KVM_EXIT_OSI; | ||
| 952 | for (i = 0; i < 32; i++) | ||
| 953 | gprs[i] = kvmppc_get_gpr(vcpu, i); | ||
| 954 | vcpu->arch.osi_needed = 1; | ||
| 955 | r = RESUME_HOST_NV; | ||
| 956 | |||
| 957 | } else { | ||
| 958 | vcpu->stat.syscall_exits++; | ||
| 959 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | ||
| 960 | r = RESUME_GUEST; | ||
| 961 | } | ||
| 810 | break; | 962 | break; |
| 811 | case BOOK3S_INTERRUPT_FP_UNAVAIL: | 963 | case BOOK3S_INTERRUPT_FP_UNAVAIL: |
| 812 | r = kvmppc_handle_ext(vcpu, exit_nr, MSR_FP); | ||
| 813 | break; | ||
| 814 | case BOOK3S_INTERRUPT_ALTIVEC: | 964 | case BOOK3S_INTERRUPT_ALTIVEC: |
| 815 | r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VEC); | ||
| 816 | break; | ||
| 817 | case BOOK3S_INTERRUPT_VSX: | 965 | case BOOK3S_INTERRUPT_VSX: |
| 818 | r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VSX); | 966 | { |
| 967 | int ext_msr = 0; | ||
| 968 | |||
| 969 | switch (exit_nr) { | ||
| 970 | case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP; break; | ||
| 971 | case BOOK3S_INTERRUPT_ALTIVEC: ext_msr = MSR_VEC; break; | ||
| 972 | case BOOK3S_INTERRUPT_VSX: ext_msr = MSR_VSX; break; | ||
| 973 | } | ||
| 974 | |||
| 975 | switch (kvmppc_check_ext(vcpu, exit_nr)) { | ||
| 976 | case EMULATE_DONE: | ||
| 977 | /* everything ok - let's enable the ext */ | ||
| 978 | r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr); | ||
| 979 | break; | ||
| 980 | case EMULATE_FAIL: | ||
| 981 | /* we need to emulate this instruction */ | ||
| 982 | goto program_interrupt; | ||
| 983 | break; | ||
| 984 | default: | ||
| 985 | /* nothing to worry about - go again */ | ||
| 986 | break; | ||
| 987 | } | ||
| 988 | break; | ||
| 989 | } | ||
| 990 | case BOOK3S_INTERRUPT_ALIGNMENT: | ||
| 991 | if (kvmppc_read_inst(vcpu) == EMULATE_DONE) { | ||
| 992 | to_book3s(vcpu)->dsisr = kvmppc_alignment_dsisr(vcpu, | ||
| 993 | kvmppc_get_last_inst(vcpu)); | ||
| 994 | vcpu->arch.dear = kvmppc_alignment_dar(vcpu, | ||
| 995 | kvmppc_get_last_inst(vcpu)); | ||
| 996 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | ||
| 997 | } | ||
| 998 | r = RESUME_GUEST; | ||
| 819 | break; | 999 | break; |
| 820 | case BOOK3S_INTERRUPT_MACHINE_CHECK: | 1000 | case BOOK3S_INTERRUPT_MACHINE_CHECK: |
| 821 | case BOOK3S_INTERRUPT_TRACE: | 1001 | case BOOK3S_INTERRUPT_TRACE: |
| @@ -825,7 +1005,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 825 | default: | 1005 | default: |
| 826 | /* Ugh - bork here! What did we get? */ | 1006 | /* Ugh - bork here! What did we get? */ |
| 827 | printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", | 1007 | printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", |
| 828 | exit_nr, vcpu->arch.pc, vcpu->arch.shadow_srr1); | 1008 | exit_nr, kvmppc_get_pc(vcpu), to_svcpu(vcpu)->shadow_srr1); |
| 829 | r = RESUME_HOST; | 1009 | r = RESUME_HOST; |
| 830 | BUG(); | 1010 | BUG(); |
| 831 | break; | 1011 | break; |
| @@ -852,7 +1032,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 852 | } | 1032 | } |
| 853 | 1033 | ||
| 854 | #ifdef EXIT_DEBUG | 1034 | #ifdef EXIT_DEBUG |
| 855 | printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, vcpu->arch.pc, r); | 1035 | printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, kvmppc_get_pc(vcpu), r); |
| 856 | #endif | 1036 | #endif |
| 857 | 1037 | ||
| 858 | return r; | 1038 | return r; |
| @@ -867,10 +1047,12 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 867 | { | 1047 | { |
| 868 | int i; | 1048 | int i; |
| 869 | 1049 | ||
| 870 | regs->pc = vcpu->arch.pc; | 1050 | vcpu_load(vcpu); |
| 1051 | |||
| 1052 | regs->pc = kvmppc_get_pc(vcpu); | ||
| 871 | regs->cr = kvmppc_get_cr(vcpu); | 1053 | regs->cr = kvmppc_get_cr(vcpu); |
| 872 | regs->ctr = vcpu->arch.ctr; | 1054 | regs->ctr = kvmppc_get_ctr(vcpu); |
| 873 | regs->lr = vcpu->arch.lr; | 1055 | regs->lr = kvmppc_get_lr(vcpu); |
| 874 | regs->xer = kvmppc_get_xer(vcpu); | 1056 | regs->xer = kvmppc_get_xer(vcpu); |
| 875 | regs->msr = vcpu->arch.msr; | 1057 | regs->msr = vcpu->arch.msr; |
| 876 | regs->srr0 = vcpu->arch.srr0; | 1058 | regs->srr0 = vcpu->arch.srr0; |
| @@ -887,6 +1069,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 887 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 1069 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
| 888 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); | 1070 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); |
| 889 | 1071 | ||
| 1072 | vcpu_put(vcpu); | ||
| 1073 | |||
| 890 | return 0; | 1074 | return 0; |
| 891 | } | 1075 | } |
| 892 | 1076 | ||
| @@ -894,10 +1078,12 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 894 | { | 1078 | { |
| 895 | int i; | 1079 | int i; |
| 896 | 1080 | ||
| 897 | vcpu->arch.pc = regs->pc; | 1081 | vcpu_load(vcpu); |
| 1082 | |||
| 1083 | kvmppc_set_pc(vcpu, regs->pc); | ||
| 898 | kvmppc_set_cr(vcpu, regs->cr); | 1084 | kvmppc_set_cr(vcpu, regs->cr); |
| 899 | vcpu->arch.ctr = regs->ctr; | 1085 | kvmppc_set_ctr(vcpu, regs->ctr); |
| 900 | vcpu->arch.lr = regs->lr; | 1086 | kvmppc_set_lr(vcpu, regs->lr); |
| 901 | kvmppc_set_xer(vcpu, regs->xer); | 1087 | kvmppc_set_xer(vcpu, regs->xer); |
| 902 | kvmppc_set_msr(vcpu, regs->msr); | 1088 | kvmppc_set_msr(vcpu, regs->msr); |
| 903 | vcpu->arch.srr0 = regs->srr0; | 1089 | vcpu->arch.srr0 = regs->srr0; |
| @@ -913,6 +1099,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 913 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 1099 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
| 914 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); | 1100 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); |
| 915 | 1101 | ||
| 1102 | vcpu_put(vcpu); | ||
| 1103 | |||
| 916 | return 0; | 1104 | return 0; |
| 917 | } | 1105 | } |
| 918 | 1106 | ||
| @@ -922,6 +1110,8 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
| 922 | struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); | 1110 | struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); |
| 923 | int i; | 1111 | int i; |
| 924 | 1112 | ||
| 1113 | vcpu_load(vcpu); | ||
| 1114 | |||
| 925 | sregs->pvr = vcpu->arch.pvr; | 1115 | sregs->pvr = vcpu->arch.pvr; |
| 926 | 1116 | ||
| 927 | sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1; | 1117 | sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1; |
| @@ -940,6 +1130,9 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
| 940 | sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw; | 1130 | sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw; |
| 941 | } | 1131 | } |
| 942 | } | 1132 | } |
| 1133 | |||
| 1134 | vcpu_put(vcpu); | ||
| 1135 | |||
| 943 | return 0; | 1136 | return 0; |
| 944 | } | 1137 | } |
| 945 | 1138 | ||
| @@ -949,6 +1142,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 949 | struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); | 1142 | struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); |
| 950 | int i; | 1143 | int i; |
| 951 | 1144 | ||
| 1145 | vcpu_load(vcpu); | ||
| 1146 | |||
| 952 | kvmppc_set_pvr(vcpu, sregs->pvr); | 1147 | kvmppc_set_pvr(vcpu, sregs->pvr); |
| 953 | 1148 | ||
| 954 | vcpu3s->sdr1 = sregs->u.s.sdr1; | 1149 | vcpu3s->sdr1 = sregs->u.s.sdr1; |
| @@ -975,6 +1170,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 975 | 1170 | ||
| 976 | /* Flush the MMU after messing with the segments */ | 1171 | /* Flush the MMU after messing with the segments */ |
| 977 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | 1172 | kvmppc_mmu_pte_flush(vcpu, 0, 0); |
| 1173 | |||
| 1174 | vcpu_put(vcpu); | ||
| 1175 | |||
| 978 | return 0; | 1176 | return 0; |
| 979 | } | 1177 | } |
| 980 | 1178 | ||
| @@ -1042,24 +1240,33 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | |||
| 1042 | { | 1240 | { |
| 1043 | struct kvmppc_vcpu_book3s *vcpu_book3s; | 1241 | struct kvmppc_vcpu_book3s *vcpu_book3s; |
| 1044 | struct kvm_vcpu *vcpu; | 1242 | struct kvm_vcpu *vcpu; |
| 1045 | int err; | 1243 | int err = -ENOMEM; |
| 1046 | 1244 | ||
| 1047 | vcpu_book3s = (struct kvmppc_vcpu_book3s *)__get_free_pages( GFP_KERNEL | __GFP_ZERO, | 1245 | vcpu_book3s = vmalloc(sizeof(struct kvmppc_vcpu_book3s)); |
| 1048 | get_order(sizeof(struct kvmppc_vcpu_book3s))); | 1246 | if (!vcpu_book3s) |
| 1049 | if (!vcpu_book3s) { | ||
| 1050 | err = -ENOMEM; | ||
| 1051 | goto out; | 1247 | goto out; |
| 1052 | } | 1248 | |
| 1249 | memset(vcpu_book3s, 0, sizeof(struct kvmppc_vcpu_book3s)); | ||
| 1250 | |||
| 1251 | vcpu_book3s->shadow_vcpu = (struct kvmppc_book3s_shadow_vcpu *) | ||
| 1252 | kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL); | ||
| 1253 | if (!vcpu_book3s->shadow_vcpu) | ||
| 1254 | goto free_vcpu; | ||
| 1053 | 1255 | ||
| 1054 | vcpu = &vcpu_book3s->vcpu; | 1256 | vcpu = &vcpu_book3s->vcpu; |
| 1055 | err = kvm_vcpu_init(vcpu, kvm, id); | 1257 | err = kvm_vcpu_init(vcpu, kvm, id); |
| 1056 | if (err) | 1258 | if (err) |
| 1057 | goto free_vcpu; | 1259 | goto free_shadow_vcpu; |
| 1058 | 1260 | ||
| 1059 | vcpu->arch.host_retip = kvm_return_point; | 1261 | vcpu->arch.host_retip = kvm_return_point; |
| 1060 | vcpu->arch.host_msr = mfmsr(); | 1262 | vcpu->arch.host_msr = mfmsr(); |
| 1263 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
| 1061 | /* default to book3s_64 (970fx) */ | 1264 | /* default to book3s_64 (970fx) */ |
| 1062 | vcpu->arch.pvr = 0x3C0301; | 1265 | vcpu->arch.pvr = 0x3C0301; |
| 1266 | #else | ||
| 1267 | /* default to book3s_32 (750) */ | ||
| 1268 | vcpu->arch.pvr = 0x84202; | ||
| 1269 | #endif | ||
| 1063 | kvmppc_set_pvr(vcpu, vcpu->arch.pvr); | 1270 | kvmppc_set_pvr(vcpu, vcpu->arch.pvr); |
| 1064 | vcpu_book3s->slb_nr = 64; | 1271 | vcpu_book3s->slb_nr = 64; |
| 1065 | 1272 | ||
| @@ -1067,23 +1274,24 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | |||
| 1067 | vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem; | 1274 | vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem; |
| 1068 | vcpu->arch.trampoline_enter = kvmppc_trampoline_enter; | 1275 | vcpu->arch.trampoline_enter = kvmppc_trampoline_enter; |
| 1069 | vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem; | 1276 | vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem; |
| 1277 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
| 1070 | vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall; | 1278 | vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall; |
| 1279 | #else | ||
| 1280 | vcpu->arch.rmcall = (ulong)kvmppc_rmcall; | ||
| 1281 | #endif | ||
| 1071 | 1282 | ||
| 1072 | vcpu->arch.shadow_msr = MSR_USER64; | 1283 | vcpu->arch.shadow_msr = MSR_USER64; |
| 1073 | 1284 | ||
| 1074 | err = __init_new_context(); | 1285 | err = kvmppc_mmu_init(vcpu); |
| 1075 | if (err < 0) | 1286 | if (err < 0) |
| 1076 | goto free_vcpu; | 1287 | goto free_shadow_vcpu; |
| 1077 | vcpu_book3s->context_id = err; | ||
| 1078 | |||
| 1079 | vcpu_book3s->vsid_max = ((vcpu_book3s->context_id + 1) << USER_ESID_BITS) - 1; | ||
| 1080 | vcpu_book3s->vsid_first = vcpu_book3s->context_id << USER_ESID_BITS; | ||
| 1081 | vcpu_book3s->vsid_next = vcpu_book3s->vsid_first; | ||
| 1082 | 1288 | ||
| 1083 | return vcpu; | 1289 | return vcpu; |
| 1084 | 1290 | ||
| 1291 | free_shadow_vcpu: | ||
| 1292 | kfree(vcpu_book3s->shadow_vcpu); | ||
| 1085 | free_vcpu: | 1293 | free_vcpu: |
| 1086 | free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s))); | 1294 | vfree(vcpu_book3s); |
| 1087 | out: | 1295 | out: |
| 1088 | return ERR_PTR(err); | 1296 | return ERR_PTR(err); |
| 1089 | } | 1297 | } |
| @@ -1092,9 +1300,9 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) | |||
| 1092 | { | 1300 | { |
| 1093 | struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); | 1301 | struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); |
| 1094 | 1302 | ||
| 1095 | __destroy_context(vcpu_book3s->context_id); | ||
| 1096 | kvm_vcpu_uninit(vcpu); | 1303 | kvm_vcpu_uninit(vcpu); |
| 1097 | free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s))); | 1304 | kfree(vcpu_book3s->shadow_vcpu); |
| 1305 | vfree(vcpu_book3s); | ||
| 1098 | } | 1306 | } |
| 1099 | 1307 | ||
| 1100 | extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); | 1308 | extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); |
| @@ -1102,8 +1310,12 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
| 1102 | { | 1310 | { |
| 1103 | int ret; | 1311 | int ret; |
| 1104 | struct thread_struct ext_bkp; | 1312 | struct thread_struct ext_bkp; |
| 1313 | #ifdef CONFIG_ALTIVEC | ||
| 1105 | bool save_vec = current->thread.used_vr; | 1314 | bool save_vec = current->thread.used_vr; |
| 1315 | #endif | ||
| 1316 | #ifdef CONFIG_VSX | ||
| 1106 | bool save_vsx = current->thread.used_vsr; | 1317 | bool save_vsx = current->thread.used_vsr; |
| 1318 | #endif | ||
| 1107 | ulong ext_msr; | 1319 | ulong ext_msr; |
| 1108 | 1320 | ||
| 1109 | /* No need to go into the guest when all we do is going out */ | 1321 | /* No need to go into the guest when all we do is going out */ |
| @@ -1144,6 +1356,10 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
| 1144 | /* XXX we get called with irq disabled - change that! */ | 1356 | /* XXX we get called with irq disabled - change that! */ |
| 1145 | local_irq_enable(); | 1357 | local_irq_enable(); |
| 1146 | 1358 | ||
| 1359 | /* Preload FPU if it's enabled */ | ||
| 1360 | if (vcpu->arch.msr & MSR_FP) | ||
| 1361 | kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); | ||
| 1362 | |||
| 1147 | ret = __kvmppc_vcpu_entry(kvm_run, vcpu); | 1363 | ret = __kvmppc_vcpu_entry(kvm_run, vcpu); |
| 1148 | 1364 | ||
| 1149 | local_irq_disable(); | 1365 | local_irq_disable(); |
| @@ -1179,7 +1395,8 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
| 1179 | 1395 | ||
| 1180 | static int kvmppc_book3s_init(void) | 1396 | static int kvmppc_book3s_init(void) |
| 1181 | { | 1397 | { |
| 1182 | return kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), THIS_MODULE); | 1398 | return kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0, |
| 1399 | THIS_MODULE); | ||
| 1183 | } | 1400 | } |
| 1184 | 1401 | ||
| 1185 | static void kvmppc_book3s_exit(void) | 1402 | static void kvmppc_book3s_exit(void) |
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index faf99f20d993..0b10503c8a4a 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c | |||
| @@ -37,7 +37,7 @@ | |||
| 37 | #define dprintk(X...) do { } while(0) | 37 | #define dprintk(X...) do { } while(0) |
| 38 | #endif | 38 | #endif |
| 39 | 39 | ||
| 40 | #ifdef DEBUG_PTE | 40 | #ifdef DEBUG_MMU_PTE |
| 41 | #define dprintk_pte(X...) printk(KERN_INFO X) | 41 | #define dprintk_pte(X...) printk(KERN_INFO X) |
| 42 | #else | 42 | #else |
| 43 | #define dprintk_pte(X...) do { } while(0) | 43 | #define dprintk_pte(X...) do { } while(0) |
| @@ -45,6 +45,9 @@ | |||
| 45 | 45 | ||
| 46 | #define PTEG_FLAG_ACCESSED 0x00000100 | 46 | #define PTEG_FLAG_ACCESSED 0x00000100 |
| 47 | #define PTEG_FLAG_DIRTY 0x00000080 | 47 | #define PTEG_FLAG_DIRTY 0x00000080 |
| 48 | #ifndef SID_SHIFT | ||
| 49 | #define SID_SHIFT 28 | ||
| 50 | #endif | ||
| 48 | 51 | ||
| 49 | static inline bool check_debug_ip(struct kvm_vcpu *vcpu) | 52 | static inline bool check_debug_ip(struct kvm_vcpu *vcpu) |
| 50 | { | 53 | { |
| @@ -57,6 +60,8 @@ static inline bool check_debug_ip(struct kvm_vcpu *vcpu) | |||
| 57 | 60 | ||
| 58 | static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, | 61 | static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, |
| 59 | struct kvmppc_pte *pte, bool data); | 62 | struct kvmppc_pte *pte, bool data); |
| 63 | static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, | ||
| 64 | u64 *vsid); | ||
| 60 | 65 | ||
| 61 | static struct kvmppc_sr *find_sr(struct kvmppc_vcpu_book3s *vcpu_book3s, gva_t eaddr) | 66 | static struct kvmppc_sr *find_sr(struct kvmppc_vcpu_book3s *vcpu_book3s, gva_t eaddr) |
| 62 | { | 67 | { |
| @@ -66,13 +71,14 @@ static struct kvmppc_sr *find_sr(struct kvmppc_vcpu_book3s *vcpu_book3s, gva_t e | |||
| 66 | static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, | 71 | static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, |
| 67 | bool data) | 72 | bool data) |
| 68 | { | 73 | { |
| 69 | struct kvmppc_sr *sre = find_sr(to_book3s(vcpu), eaddr); | 74 | u64 vsid; |
| 70 | struct kvmppc_pte pte; | 75 | struct kvmppc_pte pte; |
| 71 | 76 | ||
| 72 | if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data)) | 77 | if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data)) |
| 73 | return pte.vpage; | 78 | return pte.vpage; |
| 74 | 79 | ||
| 75 | return (((u64)eaddr >> 12) & 0xffff) | (((u64)sre->vsid) << 16); | 80 | kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); |
| 81 | return (((u64)eaddr >> 12) & 0xffff) | (vsid << 16); | ||
| 76 | } | 82 | } |
| 77 | 83 | ||
| 78 | static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu) | 84 | static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu) |
| @@ -142,8 +148,13 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, | |||
| 142 | bat->bepi_mask); | 148 | bat->bepi_mask); |
| 143 | } | 149 | } |
| 144 | if ((eaddr & bat->bepi_mask) == bat->bepi) { | 150 | if ((eaddr & bat->bepi_mask) == bat->bepi) { |
| 151 | u64 vsid; | ||
| 152 | kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, | ||
| 153 | eaddr >> SID_SHIFT, &vsid); | ||
| 154 | vsid <<= 16; | ||
| 155 | pte->vpage = (((u64)eaddr >> 12) & 0xffff) | vsid; | ||
| 156 | |||
| 145 | pte->raddr = bat->brpn | (eaddr & ~bat->bepi_mask); | 157 | pte->raddr = bat->brpn | (eaddr & ~bat->bepi_mask); |
| 146 | pte->vpage = (eaddr >> 12) | VSID_BAT; | ||
| 147 | pte->may_read = bat->pp; | 158 | pte->may_read = bat->pp; |
| 148 | pte->may_write = bat->pp > 1; | 159 | pte->may_write = bat->pp > 1; |
| 149 | pte->may_execute = true; | 160 | pte->may_execute = true; |
| @@ -172,7 +183,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr, | |||
| 172 | struct kvmppc_sr *sre; | 183 | struct kvmppc_sr *sre; |
| 173 | hva_t ptegp; | 184 | hva_t ptegp; |
| 174 | u32 pteg[16]; | 185 | u32 pteg[16]; |
| 175 | u64 ptem = 0; | 186 | u32 ptem = 0; |
| 176 | int i; | 187 | int i; |
| 177 | int found = 0; | 188 | int found = 0; |
| 178 | 189 | ||
| @@ -302,6 +313,7 @@ static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum, | |||
| 302 | /* And then put in the new SR */ | 313 | /* And then put in the new SR */ |
| 303 | sre->raw = value; | 314 | sre->raw = value; |
| 304 | sre->vsid = (value & 0x0fffffff); | 315 | sre->vsid = (value & 0x0fffffff); |
| 316 | sre->valid = (value & 0x80000000) ? false : true; | ||
| 305 | sre->Ks = (value & 0x40000000) ? true : false; | 317 | sre->Ks = (value & 0x40000000) ? true : false; |
| 306 | sre->Kp = (value & 0x20000000) ? true : false; | 318 | sre->Kp = (value & 0x20000000) ? true : false; |
| 307 | sre->nx = (value & 0x10000000) ? true : false; | 319 | sre->nx = (value & 0x10000000) ? true : false; |
| @@ -312,36 +324,48 @@ static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum, | |||
| 312 | 324 | ||
| 313 | static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large) | 325 | static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large) |
| 314 | { | 326 | { |
| 315 | kvmppc_mmu_pte_flush(vcpu, ea, ~0xFFFULL); | 327 | kvmppc_mmu_pte_flush(vcpu, ea, 0x0FFFF000); |
| 316 | } | 328 | } |
| 317 | 329 | ||
| 318 | static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, u64 esid, | 330 | static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, |
| 319 | u64 *vsid) | 331 | u64 *vsid) |
| 320 | { | 332 | { |
| 333 | ulong ea = esid << SID_SHIFT; | ||
| 334 | struct kvmppc_sr *sr; | ||
| 335 | u64 gvsid = esid; | ||
| 336 | |||
| 337 | if (vcpu->arch.msr & (MSR_DR|MSR_IR)) { | ||
| 338 | sr = find_sr(to_book3s(vcpu), ea); | ||
| 339 | if (sr->valid) | ||
| 340 | gvsid = sr->vsid; | ||
| 341 | } | ||
| 342 | |||
| 321 | /* In case we only have one of MSR_IR or MSR_DR set, let's put | 343 | /* In case we only have one of MSR_IR or MSR_DR set, let's put |
| 322 | that in the real-mode context (and hope RM doesn't access | 344 | that in the real-mode context (and hope RM doesn't access |
| 323 | high memory) */ | 345 | high memory) */ |
| 324 | switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { | 346 | switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { |
| 325 | case 0: | 347 | case 0: |
| 326 | *vsid = (VSID_REAL >> 16) | esid; | 348 | *vsid = VSID_REAL | esid; |
| 327 | break; | 349 | break; |
| 328 | case MSR_IR: | 350 | case MSR_IR: |
| 329 | *vsid = (VSID_REAL_IR >> 16) | esid; | 351 | *vsid = VSID_REAL_IR | gvsid; |
| 330 | break; | 352 | break; |
| 331 | case MSR_DR: | 353 | case MSR_DR: |
| 332 | *vsid = (VSID_REAL_DR >> 16) | esid; | 354 | *vsid = VSID_REAL_DR | gvsid; |
| 333 | break; | 355 | break; |
| 334 | case MSR_DR|MSR_IR: | 356 | case MSR_DR|MSR_IR: |
| 335 | { | 357 | if (!sr->valid) |
| 336 | ulong ea; | 358 | return -1; |
| 337 | ea = esid << SID_SHIFT; | 359 | |
| 338 | *vsid = find_sr(to_book3s(vcpu), ea)->vsid; | 360 | *vsid = sr->vsid; |
| 339 | break; | 361 | break; |
| 340 | } | ||
| 341 | default: | 362 | default: |
| 342 | BUG(); | 363 | BUG(); |
| 343 | } | 364 | } |
| 344 | 365 | ||
| 366 | if (vcpu->arch.msr & MSR_PR) | ||
| 367 | *vsid |= VSID_PR; | ||
| 368 | |||
| 345 | return 0; | 369 | return 0; |
| 346 | } | 370 | } |
| 347 | 371 | ||
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c new file mode 100644 index 000000000000..0bb66005338f --- /dev/null +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c | |||
| @@ -0,0 +1,483 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved. | ||
| 3 | * | ||
| 4 | * Authors: | ||
| 5 | * Alexander Graf <agraf@suse.de> | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or modify | ||
| 8 | * it under the terms of the GNU General Public License, version 2, as | ||
| 9 | * published by the Free Software Foundation. | ||
| 10 | * | ||
| 11 | * This program is distributed in the hope that it will be useful, | ||
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 14 | * GNU General Public License for more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU General Public License | ||
| 17 | * along with this program; if not, write to the Free Software | ||
| 18 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
| 19 | */ | ||
| 20 | |||
| 21 | #include <linux/kvm_host.h> | ||
| 22 | |||
| 23 | #include <asm/kvm_ppc.h> | ||
| 24 | #include <asm/kvm_book3s.h> | ||
| 25 | #include <asm/mmu-hash32.h> | ||
| 26 | #include <asm/machdep.h> | ||
| 27 | #include <asm/mmu_context.h> | ||
| 28 | #include <asm/hw_irq.h> | ||
| 29 | |||
| 30 | /* #define DEBUG_MMU */ | ||
| 31 | /* #define DEBUG_SR */ | ||
| 32 | |||
| 33 | #ifdef DEBUG_MMU | ||
| 34 | #define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__) | ||
| 35 | #else | ||
| 36 | #define dprintk_mmu(a, ...) do { } while(0) | ||
| 37 | #endif | ||
| 38 | |||
| 39 | #ifdef DEBUG_SR | ||
| 40 | #define dprintk_sr(a, ...) printk(KERN_INFO a, __VA_ARGS__) | ||
| 41 | #else | ||
| 42 | #define dprintk_sr(a, ...) do { } while(0) | ||
| 43 | #endif | ||
| 44 | |||
| 45 | #if PAGE_SHIFT != 12 | ||
| 46 | #error Unknown page size | ||
| 47 | #endif | ||
| 48 | |||
| 49 | #ifdef CONFIG_SMP | ||
| 50 | #error XXX need to grab mmu_hash_lock | ||
| 51 | #endif | ||
| 52 | |||
| 53 | #ifdef CONFIG_PTE_64BIT | ||
| 54 | #error Only 32 bit pages are supported for now | ||
| 55 | #endif | ||
| 56 | |||
| 57 | static ulong htab; | ||
| 58 | static u32 htabmask; | ||
| 59 | |||
| 60 | static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) | ||
| 61 | { | ||
| 62 | volatile u32 *pteg; | ||
| 63 | |||
| 64 | dprintk_mmu("KVM: Flushing SPTE: 0x%llx (0x%llx) -> 0x%llx\n", | ||
| 65 | pte->pte.eaddr, pte->pte.vpage, pte->host_va); | ||
| 66 | |||
| 67 | pteg = (u32*)pte->slot; | ||
| 68 | |||
| 69 | pteg[0] = 0; | ||
| 70 | asm volatile ("sync"); | ||
| 71 | asm volatile ("tlbie %0" : : "r" (pte->pte.eaddr) : "memory"); | ||
| 72 | asm volatile ("sync"); | ||
| 73 | asm volatile ("tlbsync"); | ||
| 74 | |||
| 75 | pte->host_va = 0; | ||
| 76 | |||
| 77 | if (pte->pte.may_write) | ||
| 78 | kvm_release_pfn_dirty(pte->pfn); | ||
| 79 | else | ||
| 80 | kvm_release_pfn_clean(pte->pfn); | ||
| 81 | } | ||
| 82 | |||
| 83 | void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) | ||
| 84 | { | ||
| 85 | int i; | ||
| 86 | |||
| 87 | dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%x & 0x%x\n", | ||
| 88 | vcpu->arch.hpte_cache_offset, guest_ea, ea_mask); | ||
| 89 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); | ||
| 90 | |||
| 91 | guest_ea &= ea_mask; | ||
| 92 | for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { | ||
| 93 | struct hpte_cache *pte; | ||
| 94 | |||
| 95 | pte = &vcpu->arch.hpte_cache[i]; | ||
| 96 | if (!pte->host_va) | ||
| 97 | continue; | ||
| 98 | |||
| 99 | if ((pte->pte.eaddr & ea_mask) == guest_ea) { | ||
| 100 | invalidate_pte(vcpu, pte); | ||
| 101 | } | ||
| 102 | } | ||
| 103 | |||
| 104 | /* Doing a complete flush -> start from scratch */ | ||
| 105 | if (!ea_mask) | ||
| 106 | vcpu->arch.hpte_cache_offset = 0; | ||
| 107 | } | ||
| 108 | |||
| 109 | void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) | ||
| 110 | { | ||
| 111 | int i; | ||
| 112 | |||
| 113 | dprintk_mmu("KVM: Flushing %d Shadow vPTEs: 0x%llx & 0x%llx\n", | ||
| 114 | vcpu->arch.hpte_cache_offset, guest_vp, vp_mask); | ||
| 115 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); | ||
| 116 | |||
| 117 | guest_vp &= vp_mask; | ||
| 118 | for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { | ||
| 119 | struct hpte_cache *pte; | ||
| 120 | |||
| 121 | pte = &vcpu->arch.hpte_cache[i]; | ||
| 122 | if (!pte->host_va) | ||
| 123 | continue; | ||
| 124 | |||
| 125 | if ((pte->pte.vpage & vp_mask) == guest_vp) { | ||
| 126 | invalidate_pte(vcpu, pte); | ||
| 127 | } | ||
| 128 | } | ||
| 129 | } | ||
| 130 | |||
| 131 | void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) | ||
| 132 | { | ||
| 133 | int i; | ||
| 134 | |||
| 135 | dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%llx & 0x%llx\n", | ||
| 136 | vcpu->arch.hpte_cache_offset, pa_start, pa_end); | ||
| 137 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); | ||
| 138 | |||
| 139 | for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { | ||
| 140 | struct hpte_cache *pte; | ||
| 141 | |||
| 142 | pte = &vcpu->arch.hpte_cache[i]; | ||
| 143 | if (!pte->host_va) | ||
| 144 | continue; | ||
| 145 | |||
| 146 | if ((pte->pte.raddr >= pa_start) && | ||
| 147 | (pte->pte.raddr < pa_end)) { | ||
| 148 | invalidate_pte(vcpu, pte); | ||
| 149 | } | ||
| 150 | } | ||
| 151 | } | ||
| 152 | |||
| 153 | struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data) | ||
| 154 | { | ||
| 155 | int i; | ||
| 156 | u64 guest_vp; | ||
| 157 | |||
| 158 | guest_vp = vcpu->arch.mmu.ea_to_vp(vcpu, ea, false); | ||
| 159 | for (i=0; i<vcpu->arch.hpte_cache_offset; i++) { | ||
| 160 | struct hpte_cache *pte; | ||
| 161 | |||
| 162 | pte = &vcpu->arch.hpte_cache[i]; | ||
| 163 | if (!pte->host_va) | ||
| 164 | continue; | ||
| 165 | |||
| 166 | if (pte->pte.vpage == guest_vp) | ||
| 167 | return &pte->pte; | ||
| 168 | } | ||
| 169 | |||
| 170 | return NULL; | ||
| 171 | } | ||
| 172 | |||
| 173 | static int kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu) | ||
| 174 | { | ||
| 175 | if (vcpu->arch.hpte_cache_offset == HPTEG_CACHE_NUM) | ||
| 176 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | ||
| 177 | |||
| 178 | return vcpu->arch.hpte_cache_offset++; | ||
| 179 | } | ||
| 180 | |||
| 181 | /* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using | ||
| 182 | * a hash, so we don't waste cycles on looping */ | ||
| 183 | static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid) | ||
| 184 | { | ||
| 185 | return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^ | ||
| 186 | ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^ | ||
| 187 | ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^ | ||
| 188 | ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^ | ||
| 189 | ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^ | ||
| 190 | ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^ | ||
| 191 | ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^ | ||
| 192 | ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK)); | ||
| 193 | } | ||
| 194 | |||
| 195 | |||
| 196 | static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid) | ||
| 197 | { | ||
| 198 | struct kvmppc_sid_map *map; | ||
| 199 | u16 sid_map_mask; | ||
| 200 | |||
| 201 | if (vcpu->arch.msr & MSR_PR) | ||
| 202 | gvsid |= VSID_PR; | ||
| 203 | |||
| 204 | sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); | ||
| 205 | map = &to_book3s(vcpu)->sid_map[sid_map_mask]; | ||
| 206 | if (map->guest_vsid == gvsid) { | ||
| 207 | dprintk_sr("SR: Searching 0x%llx -> 0x%llx\n", | ||
| 208 | gvsid, map->host_vsid); | ||
| 209 | return map; | ||
| 210 | } | ||
| 211 | |||
| 212 | map = &to_book3s(vcpu)->sid_map[SID_MAP_MASK - sid_map_mask]; | ||
| 213 | if (map->guest_vsid == gvsid) { | ||
| 214 | dprintk_sr("SR: Searching 0x%llx -> 0x%llx\n", | ||
| 215 | gvsid, map->host_vsid); | ||
| 216 | return map; | ||
| 217 | } | ||
| 218 | |||
| 219 | dprintk_sr("SR: Searching 0x%llx -> not found\n", gvsid); | ||
| 220 | return NULL; | ||
| 221 | } | ||
| 222 | |||
| 223 | static u32 *kvmppc_mmu_get_pteg(struct kvm_vcpu *vcpu, u32 vsid, u32 eaddr, | ||
| 224 | bool primary) | ||
| 225 | { | ||
| 226 | u32 page, hash; | ||
| 227 | ulong pteg = htab; | ||
| 228 | |||
| 229 | page = (eaddr & ~ESID_MASK) >> 12; | ||
| 230 | |||
| 231 | hash = ((vsid ^ page) << 6); | ||
| 232 | if (!primary) | ||
| 233 | hash = ~hash; | ||
| 234 | |||
| 235 | hash &= htabmask; | ||
| 236 | |||
| 237 | pteg |= hash; | ||
| 238 | |||
| 239 | dprintk_mmu("htab: %lx | hash: %x | htabmask: %x | pteg: %lx\n", | ||
| 240 | htab, hash, htabmask, pteg); | ||
| 241 | |||
| 242 | return (u32*)pteg; | ||
| 243 | } | ||
| 244 | |||
| 245 | extern char etext[]; | ||
| 246 | |||
| 247 | int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) | ||
| 248 | { | ||
| 249 | pfn_t hpaddr; | ||
| 250 | u64 va; | ||
| 251 | u64 vsid; | ||
| 252 | struct kvmppc_sid_map *map; | ||
| 253 | volatile u32 *pteg; | ||
| 254 | u32 eaddr = orig_pte->eaddr; | ||
| 255 | u32 pteg0, pteg1; | ||
| 256 | register int rr = 0; | ||
| 257 | bool primary = false; | ||
| 258 | bool evict = false; | ||
| 259 | int hpte_id; | ||
| 260 | struct hpte_cache *pte; | ||
| 261 | |||
| 262 | /* Get host physical address for gpa */ | ||
| 263 | hpaddr = gfn_to_pfn(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); | ||
| 264 | if (kvm_is_error_hva(hpaddr)) { | ||
| 265 | printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", | ||
| 266 | orig_pte->eaddr); | ||
| 267 | return -EINVAL; | ||
| 268 | } | ||
| 269 | hpaddr <<= PAGE_SHIFT; | ||
| 270 | |||
| 271 | /* and write the mapping ea -> hpa into the pt */ | ||
| 272 | vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid); | ||
| 273 | map = find_sid_vsid(vcpu, vsid); | ||
| 274 | if (!map) { | ||
| 275 | kvmppc_mmu_map_segment(vcpu, eaddr); | ||
| 276 | map = find_sid_vsid(vcpu, vsid); | ||
| 277 | } | ||
| 278 | BUG_ON(!map); | ||
| 279 | |||
| 280 | vsid = map->host_vsid; | ||
| 281 | va = (vsid << SID_SHIFT) | (eaddr & ~ESID_MASK); | ||
| 282 | |||
| 283 | next_pteg: | ||
| 284 | if (rr == 16) { | ||
| 285 | primary = !primary; | ||
| 286 | evict = true; | ||
| 287 | rr = 0; | ||
| 288 | } | ||
| 289 | |||
| 290 | pteg = kvmppc_mmu_get_pteg(vcpu, vsid, eaddr, primary); | ||
| 291 | |||
| 292 | /* not evicting yet */ | ||
| 293 | if (!evict && (pteg[rr] & PTE_V)) { | ||
| 294 | rr += 2; | ||
| 295 | goto next_pteg; | ||
| 296 | } | ||
| 297 | |||
| 298 | dprintk_mmu("KVM: old PTEG: %p (%d)\n", pteg, rr); | ||
| 299 | dprintk_mmu("KVM: %08x - %08x\n", pteg[0], pteg[1]); | ||
| 300 | dprintk_mmu("KVM: %08x - %08x\n", pteg[2], pteg[3]); | ||
| 301 | dprintk_mmu("KVM: %08x - %08x\n", pteg[4], pteg[5]); | ||
| 302 | dprintk_mmu("KVM: %08x - %08x\n", pteg[6], pteg[7]); | ||
| 303 | dprintk_mmu("KVM: %08x - %08x\n", pteg[8], pteg[9]); | ||
| 304 | dprintk_mmu("KVM: %08x - %08x\n", pteg[10], pteg[11]); | ||
| 305 | dprintk_mmu("KVM: %08x - %08x\n", pteg[12], pteg[13]); | ||
| 306 | dprintk_mmu("KVM: %08x - %08x\n", pteg[14], pteg[15]); | ||
| 307 | |||
| 308 | pteg0 = ((eaddr & 0x0fffffff) >> 22) | (vsid << 7) | PTE_V | | ||
| 309 | (primary ? 0 : PTE_SEC); | ||
| 310 | pteg1 = hpaddr | PTE_M | PTE_R | PTE_C; | ||
| 311 | |||
| 312 | if (orig_pte->may_write) { | ||
| 313 | pteg1 |= PP_RWRW; | ||
| 314 | mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); | ||
| 315 | } else { | ||
| 316 | pteg1 |= PP_RWRX; | ||
| 317 | } | ||
| 318 | |||
| 319 | local_irq_disable(); | ||
| 320 | |||
| 321 | if (pteg[rr]) { | ||
| 322 | pteg[rr] = 0; | ||
| 323 | asm volatile ("sync"); | ||
| 324 | } | ||
| 325 | pteg[rr + 1] = pteg1; | ||
| 326 | pteg[rr] = pteg0; | ||
| 327 | asm volatile ("sync"); | ||
| 328 | |||
| 329 | local_irq_enable(); | ||
| 330 | |||
| 331 | dprintk_mmu("KVM: new PTEG: %p\n", pteg); | ||
| 332 | dprintk_mmu("KVM: %08x - %08x\n", pteg[0], pteg[1]); | ||
| 333 | dprintk_mmu("KVM: %08x - %08x\n", pteg[2], pteg[3]); | ||
| 334 | dprintk_mmu("KVM: %08x - %08x\n", pteg[4], pteg[5]); | ||
| 335 | dprintk_mmu("KVM: %08x - %08x\n", pteg[6], pteg[7]); | ||
| 336 | dprintk_mmu("KVM: %08x - %08x\n", pteg[8], pteg[9]); | ||
| 337 | dprintk_mmu("KVM: %08x - %08x\n", pteg[10], pteg[11]); | ||
| 338 | dprintk_mmu("KVM: %08x - %08x\n", pteg[12], pteg[13]); | ||
| 339 | dprintk_mmu("KVM: %08x - %08x\n", pteg[14], pteg[15]); | ||
| 340 | |||
| 341 | |||
| 342 | /* Now tell our Shadow PTE code about the new page */ | ||
| 343 | |||
| 344 | hpte_id = kvmppc_mmu_hpte_cache_next(vcpu); | ||
| 345 | pte = &vcpu->arch.hpte_cache[hpte_id]; | ||
| 346 | |||
| 347 | dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n", | ||
| 348 | orig_pte->may_write ? 'w' : '-', | ||
| 349 | orig_pte->may_execute ? 'x' : '-', | ||
| 350 | orig_pte->eaddr, (ulong)pteg, va, | ||
| 351 | orig_pte->vpage, hpaddr); | ||
| 352 | |||
| 353 | pte->slot = (ulong)&pteg[rr]; | ||
| 354 | pte->host_va = va; | ||
| 355 | pte->pte = *orig_pte; | ||
| 356 | pte->pfn = hpaddr >> PAGE_SHIFT; | ||
| 357 | |||
| 358 | return 0; | ||
| 359 | } | ||
| 360 | |||
| 361 | static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) | ||
| 362 | { | ||
| 363 | struct kvmppc_sid_map *map; | ||
| 364 | struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); | ||
| 365 | u16 sid_map_mask; | ||
| 366 | static int backwards_map = 0; | ||
| 367 | |||
| 368 | if (vcpu->arch.msr & MSR_PR) | ||
| 369 | gvsid |= VSID_PR; | ||
| 370 | |||
| 371 | /* We might get collisions that trap in preceding order, so let's | ||
| 372 | map them differently */ | ||
| 373 | |||
| 374 | sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); | ||
| 375 | if (backwards_map) | ||
| 376 | sid_map_mask = SID_MAP_MASK - sid_map_mask; | ||
| 377 | |||
| 378 | map = &to_book3s(vcpu)->sid_map[sid_map_mask]; | ||
| 379 | |||
| 380 | /* Make sure we're taking the other map next time */ | ||
| 381 | backwards_map = !backwards_map; | ||
| 382 | |||
| 383 | /* Uh-oh ... out of mappings. Let's flush! */ | ||
| 384 | if (vcpu_book3s->vsid_next >= vcpu_book3s->vsid_max) { | ||
| 385 | vcpu_book3s->vsid_next = vcpu_book3s->vsid_first; | ||
| 386 | memset(vcpu_book3s->sid_map, 0, | ||
| 387 | sizeof(struct kvmppc_sid_map) * SID_MAP_NUM); | ||
| 388 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | ||
| 389 | kvmppc_mmu_flush_segments(vcpu); | ||
| 390 | } | ||
| 391 | map->host_vsid = vcpu_book3s->vsid_next; | ||
| 392 | |||
| 393 | /* Would have to be 111 to be completely aligned with the rest of | ||
| 394 | Linux, but that is just way too little space! */ | ||
| 395 | vcpu_book3s->vsid_next+=1; | ||
| 396 | |||
| 397 | map->guest_vsid = gvsid; | ||
| 398 | map->valid = true; | ||
| 399 | |||
| 400 | return map; | ||
| 401 | } | ||
| 402 | |||
| 403 | int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr) | ||
| 404 | { | ||
| 405 | u32 esid = eaddr >> SID_SHIFT; | ||
| 406 | u64 gvsid; | ||
| 407 | u32 sr; | ||
| 408 | struct kvmppc_sid_map *map; | ||
| 409 | struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu); | ||
| 410 | |||
| 411 | if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) { | ||
| 412 | /* Invalidate an entry */ | ||
| 413 | svcpu->sr[esid] = SR_INVALID; | ||
| 414 | return -ENOENT; | ||
| 415 | } | ||
| 416 | |||
| 417 | map = find_sid_vsid(vcpu, gvsid); | ||
| 418 | if (!map) | ||
| 419 | map = create_sid_map(vcpu, gvsid); | ||
| 420 | |||
| 421 | map->guest_esid = esid; | ||
| 422 | sr = map->host_vsid | SR_KP; | ||
| 423 | svcpu->sr[esid] = sr; | ||
| 424 | |||
| 425 | dprintk_sr("MMU: mtsr %d, 0x%x\n", esid, sr); | ||
| 426 | |||
| 427 | return 0; | ||
| 428 | } | ||
| 429 | |||
| 430 | void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) | ||
| 431 | { | ||
| 432 | int i; | ||
| 433 | struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu); | ||
| 434 | |||
| 435 | dprintk_sr("MMU: flushing all segments (%d)\n", ARRAY_SIZE(svcpu->sr)); | ||
| 436 | for (i = 0; i < ARRAY_SIZE(svcpu->sr); i++) | ||
| 437 | svcpu->sr[i] = SR_INVALID; | ||
| 438 | } | ||
| 439 | |||
| 440 | void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) | ||
| 441 | { | ||
| 442 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | ||
| 443 | preempt_disable(); | ||
| 444 | __destroy_context(to_book3s(vcpu)->context_id); | ||
| 445 | preempt_enable(); | ||
| 446 | } | ||
| 447 | |||
| 448 | /* From mm/mmu_context_hash32.c */ | ||
| 449 | #define CTX_TO_VSID(ctx) (((ctx) * (897 * 16)) & 0xffffff) | ||
| 450 | |||
| 451 | int kvmppc_mmu_init(struct kvm_vcpu *vcpu) | ||
| 452 | { | ||
| 453 | struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); | ||
| 454 | int err; | ||
| 455 | ulong sdr1; | ||
| 456 | |||
| 457 | err = __init_new_context(); | ||
| 458 | if (err < 0) | ||
| 459 | return -1; | ||
| 460 | vcpu3s->context_id = err; | ||
| 461 | |||
| 462 | vcpu3s->vsid_max = CTX_TO_VSID(vcpu3s->context_id + 1) - 1; | ||
| 463 | vcpu3s->vsid_first = CTX_TO_VSID(vcpu3s->context_id); | ||
| 464 | |||
| 465 | #if 0 /* XXX still doesn't guarantee uniqueness */ | ||
| 466 | /* We could collide with the Linux vsid space because the vsid | ||
| 467 | * wraps around at 24 bits. We're safe if we do our own space | ||
| 468 | * though, so let's always set the highest bit. */ | ||
| 469 | |||
| 470 | vcpu3s->vsid_max |= 0x00800000; | ||
| 471 | vcpu3s->vsid_first |= 0x00800000; | ||
| 472 | #endif | ||
| 473 | BUG_ON(vcpu3s->vsid_max < vcpu3s->vsid_first); | ||
| 474 | |||
| 475 | vcpu3s->vsid_next = vcpu3s->vsid_first; | ||
| 476 | |||
| 477 | /* Remember where the HTAB is */ | ||
| 478 | asm ( "mfsdr1 %0" : "=r"(sdr1) ); | ||
| 479 | htabmask = ((sdr1 & 0x1FF) << 16) | 0xFFC0; | ||
| 480 | htab = (ulong)__va(sdr1 & 0xffff0000); | ||
| 481 | |||
| 482 | return 0; | ||
| 483 | } | ||
diff --git a/arch/powerpc/kvm/book3s_32_sr.S b/arch/powerpc/kvm/book3s_32_sr.S new file mode 100644 index 000000000000..3608471ad2d8 --- /dev/null +++ b/arch/powerpc/kvm/book3s_32_sr.S | |||
| @@ -0,0 +1,143 @@ | |||
| 1 | /* | ||
| 2 | * This program is free software; you can redistribute it and/or modify | ||
| 3 | * it under the terms of the GNU General Public License, version 2, as | ||
| 4 | * published by the Free Software Foundation. | ||
| 5 | * | ||
| 6 | * This program is distributed in the hope that it will be useful, | ||
| 7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 9 | * GNU General Public License for more details. | ||
| 10 | * | ||
| 11 | * You should have received a copy of the GNU General Public License | ||
| 12 | * along with this program; if not, write to the Free Software | ||
| 13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
| 14 | * | ||
| 15 | * Copyright SUSE Linux Products GmbH 2009 | ||
| 16 | * | ||
| 17 | * Authors: Alexander Graf <agraf@suse.de> | ||
| 18 | */ | ||
| 19 | |||
| 20 | /****************************************************************************** | ||
| 21 | * * | ||
| 22 | * Entry code * | ||
| 23 | * * | ||
| 24 | *****************************************************************************/ | ||
| 25 | |||
| 26 | .macro LOAD_GUEST_SEGMENTS | ||
| 27 | |||
| 28 | /* Required state: | ||
| 29 | * | ||
| 30 | * MSR = ~IR|DR | ||
| 31 | * R1 = host R1 | ||
| 32 | * R2 = host R2 | ||
| 33 | * R3 = shadow vcpu | ||
| 34 | * all other volatile GPRS = free | ||
| 35 | * SVCPU[CR] = guest CR | ||
| 36 | * SVCPU[XER] = guest XER | ||
| 37 | * SVCPU[CTR] = guest CTR | ||
| 38 | * SVCPU[LR] = guest LR | ||
| 39 | */ | ||
| 40 | |||
| 41 | #define XCHG_SR(n) lwz r9, (SVCPU_SR+(n*4))(r3); \ | ||
| 42 | mtsr n, r9 | ||
| 43 | |||
| 44 | XCHG_SR(0) | ||
| 45 | XCHG_SR(1) | ||
| 46 | XCHG_SR(2) | ||
| 47 | XCHG_SR(3) | ||
| 48 | XCHG_SR(4) | ||
| 49 | XCHG_SR(5) | ||
| 50 | XCHG_SR(6) | ||
| 51 | XCHG_SR(7) | ||
| 52 | XCHG_SR(8) | ||
| 53 | XCHG_SR(9) | ||
| 54 | XCHG_SR(10) | ||
| 55 | XCHG_SR(11) | ||
| 56 | XCHG_SR(12) | ||
| 57 | XCHG_SR(13) | ||
| 58 | XCHG_SR(14) | ||
| 59 | XCHG_SR(15) | ||
| 60 | |||
| 61 | /* Clear BATs. */ | ||
| 62 | |||
| 63 | #define KVM_KILL_BAT(n, reg) \ | ||
| 64 | mtspr SPRN_IBAT##n##U,reg; \ | ||
| 65 | mtspr SPRN_IBAT##n##L,reg; \ | ||
| 66 | mtspr SPRN_DBAT##n##U,reg; \ | ||
| 67 | mtspr SPRN_DBAT##n##L,reg; \ | ||
| 68 | |||
| 69 | li r9, 0 | ||
| 70 | KVM_KILL_BAT(0, r9) | ||
| 71 | KVM_KILL_BAT(1, r9) | ||
| 72 | KVM_KILL_BAT(2, r9) | ||
| 73 | KVM_KILL_BAT(3, r9) | ||
| 74 | |||
| 75 | .endm | ||
| 76 | |||
| 77 | /****************************************************************************** | ||
| 78 | * * | ||
| 79 | * Exit code * | ||
| 80 | * * | ||
| 81 | *****************************************************************************/ | ||
| 82 | |||
| 83 | .macro LOAD_HOST_SEGMENTS | ||
| 84 | |||
| 85 | /* Register usage at this point: | ||
| 86 | * | ||
| 87 | * R1 = host R1 | ||
| 88 | * R2 = host R2 | ||
| 89 | * R12 = exit handler id | ||
| 90 | * R13 = shadow vcpu - SHADOW_VCPU_OFF | ||
| 91 | * SVCPU.* = guest * | ||
| 92 | * SVCPU[CR] = guest CR | ||
| 93 | * SVCPU[XER] = guest XER | ||
| 94 | * SVCPU[CTR] = guest CTR | ||
| 95 | * SVCPU[LR] = guest LR | ||
| 96 | * | ||
| 97 | */ | ||
| 98 | |||
| 99 | /* Restore BATs */ | ||
| 100 | |||
| 101 | /* We only overwrite the upper part, so we only restoree | ||
| 102 | the upper part. */ | ||
| 103 | #define KVM_LOAD_BAT(n, reg, RA, RB) \ | ||
| 104 | lwz RA,(n*16)+0(reg); \ | ||
| 105 | lwz RB,(n*16)+4(reg); \ | ||
| 106 | mtspr SPRN_IBAT##n##U,RA; \ | ||
| 107 | mtspr SPRN_IBAT##n##L,RB; \ | ||
| 108 | lwz RA,(n*16)+8(reg); \ | ||
| 109 | lwz RB,(n*16)+12(reg); \ | ||
| 110 | mtspr SPRN_DBAT##n##U,RA; \ | ||
| 111 | mtspr SPRN_DBAT##n##L,RB; \ | ||
| 112 | |||
| 113 | lis r9, BATS@ha | ||
| 114 | addi r9, r9, BATS@l | ||
| 115 | tophys(r9, r9) | ||
| 116 | KVM_LOAD_BAT(0, r9, r10, r11) | ||
| 117 | KVM_LOAD_BAT(1, r9, r10, r11) | ||
| 118 | KVM_LOAD_BAT(2, r9, r10, r11) | ||
| 119 | KVM_LOAD_BAT(3, r9, r10, r11) | ||
| 120 | |||
| 121 | /* Restore Segment Registers */ | ||
| 122 | |||
| 123 | /* 0xc - 0xf */ | ||
| 124 | |||
| 125 | li r0, 4 | ||
| 126 | mtctr r0 | ||
| 127 | LOAD_REG_IMMEDIATE(r3, 0x20000000 | (0x111 * 0xc)) | ||
| 128 | lis r4, 0xc000 | ||
| 129 | 3: mtsrin r3, r4 | ||
| 130 | addi r3, r3, 0x111 /* increment VSID */ | ||
| 131 | addis r4, r4, 0x1000 /* address of next segment */ | ||
| 132 | bdnz 3b | ||
| 133 | |||
| 134 | /* 0x0 - 0xb */ | ||
| 135 | |||
| 136 | /* 'current->mm' needs to be in r4 */ | ||
| 137 | tophys(r4, r2) | ||
| 138 | lwz r4, MM(r4) | ||
| 139 | tophys(r4, r4) | ||
| 140 | /* This only clobbers r0, r3, r4 and r5 */ | ||
| 141 | bl switch_mmu_context | ||
| 142 | |||
| 143 | .endm | ||
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 512dcff77554..4025ea26b3c1 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c | |||
| @@ -232,7 +232,7 @@ do_second: | |||
| 232 | } | 232 | } |
| 233 | 233 | ||
| 234 | dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx " | 234 | dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx " |
| 235 | "-> 0x%llx\n", | 235 | "-> 0x%lx\n", |
| 236 | eaddr, avpn, gpte->vpage, gpte->raddr); | 236 | eaddr, avpn, gpte->vpage, gpte->raddr); |
| 237 | found = true; | 237 | found = true; |
| 238 | break; | 238 | break; |
| @@ -383,7 +383,7 @@ static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu) | |||
| 383 | 383 | ||
| 384 | if (vcpu->arch.msr & MSR_IR) { | 384 | if (vcpu->arch.msr & MSR_IR) { |
| 385 | kvmppc_mmu_flush_segments(vcpu); | 385 | kvmppc_mmu_flush_segments(vcpu); |
| 386 | kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc); | 386 | kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); |
| 387 | } | 387 | } |
| 388 | } | 388 | } |
| 389 | 389 | ||
| @@ -439,37 +439,43 @@ static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va, | |||
| 439 | kvmppc_mmu_pte_vflush(vcpu, va >> 12, mask); | 439 | kvmppc_mmu_pte_vflush(vcpu, va >> 12, mask); |
| 440 | } | 440 | } |
| 441 | 441 | ||
| 442 | static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, u64 esid, | 442 | static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, |
| 443 | u64 *vsid) | 443 | u64 *vsid) |
| 444 | { | 444 | { |
| 445 | ulong ea = esid << SID_SHIFT; | ||
| 446 | struct kvmppc_slb *slb; | ||
| 447 | u64 gvsid = esid; | ||
| 448 | |||
| 449 | if (vcpu->arch.msr & (MSR_DR|MSR_IR)) { | ||
| 450 | slb = kvmppc_mmu_book3s_64_find_slbe(to_book3s(vcpu), ea); | ||
| 451 | if (slb) | ||
| 452 | gvsid = slb->vsid; | ||
| 453 | } | ||
| 454 | |||
| 445 | switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { | 455 | switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { |
| 446 | case 0: | 456 | case 0: |
| 447 | *vsid = (VSID_REAL >> 16) | esid; | 457 | *vsid = VSID_REAL | esid; |
| 448 | break; | 458 | break; |
| 449 | case MSR_IR: | 459 | case MSR_IR: |
| 450 | *vsid = (VSID_REAL_IR >> 16) | esid; | 460 | *vsid = VSID_REAL_IR | gvsid; |
| 451 | break; | 461 | break; |
| 452 | case MSR_DR: | 462 | case MSR_DR: |
| 453 | *vsid = (VSID_REAL_DR >> 16) | esid; | 463 | *vsid = VSID_REAL_DR | gvsid; |
| 454 | break; | 464 | break; |
| 455 | case MSR_DR|MSR_IR: | 465 | case MSR_DR|MSR_IR: |
| 456 | { | 466 | if (!slb) |
| 457 | ulong ea; | ||
| 458 | struct kvmppc_slb *slb; | ||
| 459 | ea = esid << SID_SHIFT; | ||
| 460 | slb = kvmppc_mmu_book3s_64_find_slbe(to_book3s(vcpu), ea); | ||
| 461 | if (slb) | ||
| 462 | *vsid = slb->vsid; | ||
| 463 | else | ||
| 464 | return -ENOENT; | 467 | return -ENOENT; |
| 465 | 468 | ||
| 469 | *vsid = gvsid; | ||
| 466 | break; | 470 | break; |
| 467 | } | ||
| 468 | default: | 471 | default: |
| 469 | BUG(); | 472 | BUG(); |
| 470 | break; | 473 | break; |
| 471 | } | 474 | } |
| 472 | 475 | ||
| 476 | if (vcpu->arch.msr & MSR_PR) | ||
| 477 | *vsid |= VSID_PR; | ||
| 478 | |||
| 473 | return 0; | 479 | return 0; |
| 474 | } | 480 | } |
| 475 | 481 | ||
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index f2899b297ffd..e4b5744977f6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c | |||
| @@ -48,21 +48,25 @@ | |||
| 48 | 48 | ||
| 49 | static void invalidate_pte(struct hpte_cache *pte) | 49 | static void invalidate_pte(struct hpte_cache *pte) |
| 50 | { | 50 | { |
| 51 | dprintk_mmu("KVM: Flushing SPT %d: 0x%llx (0x%llx) -> 0x%llx\n", | 51 | dprintk_mmu("KVM: Flushing SPT: 0x%lx (0x%llx) -> 0x%llx\n", |
| 52 | i, pte->pte.eaddr, pte->pte.vpage, pte->host_va); | 52 | pte->pte.eaddr, pte->pte.vpage, pte->host_va); |
| 53 | 53 | ||
| 54 | ppc_md.hpte_invalidate(pte->slot, pte->host_va, | 54 | ppc_md.hpte_invalidate(pte->slot, pte->host_va, |
| 55 | MMU_PAGE_4K, MMU_SEGSIZE_256M, | 55 | MMU_PAGE_4K, MMU_SEGSIZE_256M, |
| 56 | false); | 56 | false); |
| 57 | pte->host_va = 0; | 57 | pte->host_va = 0; |
| 58 | kvm_release_pfn_dirty(pte->pfn); | 58 | |
| 59 | if (pte->pte.may_write) | ||
| 60 | kvm_release_pfn_dirty(pte->pfn); | ||
| 61 | else | ||
| 62 | kvm_release_pfn_clean(pte->pfn); | ||
| 59 | } | 63 | } |
| 60 | 64 | ||
| 61 | void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, u64 guest_ea, u64 ea_mask) | 65 | void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) |
| 62 | { | 66 | { |
| 63 | int i; | 67 | int i; |
| 64 | 68 | ||
| 65 | dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%llx & 0x%llx\n", | 69 | dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%lx & 0x%lx\n", |
| 66 | vcpu->arch.hpte_cache_offset, guest_ea, ea_mask); | 70 | vcpu->arch.hpte_cache_offset, guest_ea, ea_mask); |
| 67 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); | 71 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); |
| 68 | 72 | ||
| @@ -106,12 +110,12 @@ void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) | |||
| 106 | } | 110 | } |
| 107 | } | 111 | } |
| 108 | 112 | ||
| 109 | void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, u64 pa_start, u64 pa_end) | 113 | void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) |
| 110 | { | 114 | { |
| 111 | int i; | 115 | int i; |
| 112 | 116 | ||
| 113 | dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%llx & 0x%llx\n", | 117 | dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%lx & 0x%lx\n", |
| 114 | vcpu->arch.hpte_cache_offset, guest_pa, pa_mask); | 118 | vcpu->arch.hpte_cache_offset, pa_start, pa_end); |
| 115 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); | 119 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); |
| 116 | 120 | ||
| 117 | for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { | 121 | for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { |
| @@ -182,7 +186,7 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid) | |||
| 182 | sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); | 186 | sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); |
| 183 | map = &to_book3s(vcpu)->sid_map[sid_map_mask]; | 187 | map = &to_book3s(vcpu)->sid_map[sid_map_mask]; |
| 184 | if (map->guest_vsid == gvsid) { | 188 | if (map->guest_vsid == gvsid) { |
| 185 | dprintk_slb("SLB: Searching 0x%llx -> 0x%llx\n", | 189 | dprintk_slb("SLB: Searching: 0x%llx -> 0x%llx\n", |
| 186 | gvsid, map->host_vsid); | 190 | gvsid, map->host_vsid); |
| 187 | return map; | 191 | return map; |
| 188 | } | 192 | } |
| @@ -194,7 +198,8 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid) | |||
| 194 | return map; | 198 | return map; |
| 195 | } | 199 | } |
| 196 | 200 | ||
| 197 | dprintk_slb("SLB: Searching 0x%llx -> not found\n", gvsid); | 201 | dprintk_slb("SLB: Searching %d/%d: 0x%llx -> not found\n", |
| 202 | sid_map_mask, SID_MAP_MASK - sid_map_mask, gvsid); | ||
| 198 | return NULL; | 203 | return NULL; |
| 199 | } | 204 | } |
| 200 | 205 | ||
| @@ -212,7 +217,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) | |||
| 212 | /* Get host physical address for gpa */ | 217 | /* Get host physical address for gpa */ |
| 213 | hpaddr = gfn_to_pfn(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); | 218 | hpaddr = gfn_to_pfn(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); |
| 214 | if (kvm_is_error_hva(hpaddr)) { | 219 | if (kvm_is_error_hva(hpaddr)) { |
| 215 | printk(KERN_INFO "Couldn't get guest page for gfn %llx!\n", orig_pte->eaddr); | 220 | printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr); |
| 216 | return -EINVAL; | 221 | return -EINVAL; |
| 217 | } | 222 | } |
| 218 | hpaddr <<= PAGE_SHIFT; | 223 | hpaddr <<= PAGE_SHIFT; |
| @@ -227,10 +232,16 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) | |||
| 227 | vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid); | 232 | vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid); |
| 228 | map = find_sid_vsid(vcpu, vsid); | 233 | map = find_sid_vsid(vcpu, vsid); |
| 229 | if (!map) { | 234 | if (!map) { |
| 230 | kvmppc_mmu_map_segment(vcpu, orig_pte->eaddr); | 235 | ret = kvmppc_mmu_map_segment(vcpu, orig_pte->eaddr); |
| 236 | WARN_ON(ret < 0); | ||
| 231 | map = find_sid_vsid(vcpu, vsid); | 237 | map = find_sid_vsid(vcpu, vsid); |
| 232 | } | 238 | } |
| 233 | BUG_ON(!map); | 239 | if (!map) { |
| 240 | printk(KERN_ERR "KVM: Segment map for 0x%llx (0x%lx) failed\n", | ||
| 241 | vsid, orig_pte->eaddr); | ||
| 242 | WARN_ON(true); | ||
| 243 | return -EINVAL; | ||
| 244 | } | ||
| 234 | 245 | ||
| 235 | vsid = map->host_vsid; | 246 | vsid = map->host_vsid; |
| 236 | va = hpt_va(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M); | 247 | va = hpt_va(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M); |
| @@ -257,26 +268,26 @@ map_again: | |||
| 257 | 268 | ||
| 258 | if (ret < 0) { | 269 | if (ret < 0) { |
| 259 | /* If we couldn't map a primary PTE, try a secondary */ | 270 | /* If we couldn't map a primary PTE, try a secondary */ |
| 260 | #ifdef USE_SECONDARY | ||
| 261 | hash = ~hash; | 271 | hash = ~hash; |
| 272 | vflags ^= HPTE_V_SECONDARY; | ||
| 262 | attempt++; | 273 | attempt++; |
| 263 | if (attempt % 2) | ||
| 264 | vflags = HPTE_V_SECONDARY; | ||
| 265 | else | ||
| 266 | vflags = 0; | ||
| 267 | #else | ||
| 268 | attempt = 2; | ||
| 269 | #endif | ||
| 270 | goto map_again; | 274 | goto map_again; |
| 271 | } else { | 275 | } else { |
| 272 | int hpte_id = kvmppc_mmu_hpte_cache_next(vcpu); | 276 | int hpte_id = kvmppc_mmu_hpte_cache_next(vcpu); |
| 273 | struct hpte_cache *pte = &vcpu->arch.hpte_cache[hpte_id]; | 277 | struct hpte_cache *pte = &vcpu->arch.hpte_cache[hpte_id]; |
| 274 | 278 | ||
| 275 | dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%lx (0x%llx) -> %lx\n", | 279 | dprintk_mmu("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx\n", |
| 276 | ((rflags & HPTE_R_PP) == 3) ? '-' : 'w', | 280 | ((rflags & HPTE_R_PP) == 3) ? '-' : 'w', |
| 277 | (rflags & HPTE_R_N) ? '-' : 'x', | 281 | (rflags & HPTE_R_N) ? '-' : 'x', |
| 278 | orig_pte->eaddr, hpteg, va, orig_pte->vpage, hpaddr); | 282 | orig_pte->eaddr, hpteg, va, orig_pte->vpage, hpaddr); |
| 279 | 283 | ||
| 284 | /* The ppc_md code may give us a secondary entry even though we | ||
| 285 | asked for a primary. Fix up. */ | ||
| 286 | if ((ret & _PTEIDX_SECONDARY) && !(vflags & HPTE_V_SECONDARY)) { | ||
| 287 | hash = ~hash; | ||
| 288 | hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); | ||
| 289 | } | ||
| 290 | |||
| 280 | pte->slot = hpteg + (ret & 7); | 291 | pte->slot = hpteg + (ret & 7); |
| 281 | pte->host_va = va; | 292 | pte->host_va = va; |
| 282 | pte->pte = *orig_pte; | 293 | pte->pte = *orig_pte; |
| @@ -321,6 +332,9 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) | |||
| 321 | map->guest_vsid = gvsid; | 332 | map->guest_vsid = gvsid; |
| 322 | map->valid = true; | 333 | map->valid = true; |
| 323 | 334 | ||
| 335 | dprintk_slb("SLB: New mapping at %d: 0x%llx -> 0x%llx\n", | ||
| 336 | sid_map_mask, gvsid, map->host_vsid); | ||
| 337 | |||
| 324 | return map; | 338 | return map; |
| 325 | } | 339 | } |
| 326 | 340 | ||
| @@ -331,14 +345,14 @@ static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid) | |||
| 331 | int found_inval = -1; | 345 | int found_inval = -1; |
| 332 | int r; | 346 | int r; |
| 333 | 347 | ||
| 334 | if (!get_paca()->kvm_slb_max) | 348 | if (!to_svcpu(vcpu)->slb_max) |
| 335 | get_paca()->kvm_slb_max = 1; | 349 | to_svcpu(vcpu)->slb_max = 1; |
| 336 | 350 | ||
| 337 | /* Are we overwriting? */ | 351 | /* Are we overwriting? */ |
| 338 | for (i = 1; i < get_paca()->kvm_slb_max; i++) { | 352 | for (i = 1; i < to_svcpu(vcpu)->slb_max; i++) { |
| 339 | if (!(get_paca()->kvm_slb[i].esid & SLB_ESID_V)) | 353 | if (!(to_svcpu(vcpu)->slb[i].esid & SLB_ESID_V)) |
| 340 | found_inval = i; | 354 | found_inval = i; |
| 341 | else if ((get_paca()->kvm_slb[i].esid & ESID_MASK) == esid) | 355 | else if ((to_svcpu(vcpu)->slb[i].esid & ESID_MASK) == esid) |
| 342 | return i; | 356 | return i; |
| 343 | } | 357 | } |
| 344 | 358 | ||
| @@ -352,11 +366,11 @@ static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid) | |||
| 352 | max_slb_size = mmu_slb_size; | 366 | max_slb_size = mmu_slb_size; |
| 353 | 367 | ||
| 354 | /* Overflowing -> purge */ | 368 | /* Overflowing -> purge */ |
| 355 | if ((get_paca()->kvm_slb_max) == max_slb_size) | 369 | if ((to_svcpu(vcpu)->slb_max) == max_slb_size) |
| 356 | kvmppc_mmu_flush_segments(vcpu); | 370 | kvmppc_mmu_flush_segments(vcpu); |
| 357 | 371 | ||
| 358 | r = get_paca()->kvm_slb_max; | 372 | r = to_svcpu(vcpu)->slb_max; |
| 359 | get_paca()->kvm_slb_max++; | 373 | to_svcpu(vcpu)->slb_max++; |
| 360 | 374 | ||
| 361 | return r; | 375 | return r; |
| 362 | } | 376 | } |
| @@ -374,7 +388,7 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr) | |||
| 374 | 388 | ||
| 375 | if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) { | 389 | if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) { |
| 376 | /* Invalidate an entry */ | 390 | /* Invalidate an entry */ |
| 377 | get_paca()->kvm_slb[slb_index].esid = 0; | 391 | to_svcpu(vcpu)->slb[slb_index].esid = 0; |
| 378 | return -ENOENT; | 392 | return -ENOENT; |
| 379 | } | 393 | } |
| 380 | 394 | ||
| @@ -388,8 +402,8 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr) | |||
| 388 | slb_vsid &= ~SLB_VSID_KP; | 402 | slb_vsid &= ~SLB_VSID_KP; |
| 389 | slb_esid |= slb_index; | 403 | slb_esid |= slb_index; |
| 390 | 404 | ||
| 391 | get_paca()->kvm_slb[slb_index].esid = slb_esid; | 405 | to_svcpu(vcpu)->slb[slb_index].esid = slb_esid; |
| 392 | get_paca()->kvm_slb[slb_index].vsid = slb_vsid; | 406 | to_svcpu(vcpu)->slb[slb_index].vsid = slb_vsid; |
| 393 | 407 | ||
| 394 | dprintk_slb("slbmte %#llx, %#llx\n", slb_vsid, slb_esid); | 408 | dprintk_slb("slbmte %#llx, %#llx\n", slb_vsid, slb_esid); |
| 395 | 409 | ||
| @@ -398,11 +412,29 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr) | |||
| 398 | 412 | ||
| 399 | void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) | 413 | void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) |
| 400 | { | 414 | { |
| 401 | get_paca()->kvm_slb_max = 1; | 415 | to_svcpu(vcpu)->slb_max = 1; |
| 402 | get_paca()->kvm_slb[0].esid = 0; | 416 | to_svcpu(vcpu)->slb[0].esid = 0; |
| 403 | } | 417 | } |
| 404 | 418 | ||
| 405 | void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) | 419 | void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) |
| 406 | { | 420 | { |
| 407 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | 421 | kvmppc_mmu_pte_flush(vcpu, 0, 0); |
| 422 | __destroy_context(to_book3s(vcpu)->context_id); | ||
| 423 | } | ||
| 424 | |||
| 425 | int kvmppc_mmu_init(struct kvm_vcpu *vcpu) | ||
| 426 | { | ||
| 427 | struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); | ||
| 428 | int err; | ||
| 429 | |||
| 430 | err = __init_new_context(); | ||
| 431 | if (err < 0) | ||
| 432 | return -1; | ||
| 433 | vcpu3s->context_id = err; | ||
| 434 | |||
| 435 | vcpu3s->vsid_max = ((vcpu3s->context_id + 1) << USER_ESID_BITS) - 1; | ||
| 436 | vcpu3s->vsid_first = vcpu3s->context_id << USER_ESID_BITS; | ||
| 437 | vcpu3s->vsid_next = vcpu3s->vsid_first; | ||
| 438 | |||
| 439 | return 0; | ||
| 408 | } | 440 | } |
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S index 35b762722187..04e7d3bbfe8b 100644 --- a/arch/powerpc/kvm/book3s_64_slb.S +++ b/arch/powerpc/kvm/book3s_64_slb.S | |||
| @@ -44,8 +44,7 @@ slb_exit_skip_ ## num: | |||
| 44 | * * | 44 | * * |
| 45 | *****************************************************************************/ | 45 | *****************************************************************************/ |
| 46 | 46 | ||
| 47 | .global kvmppc_handler_trampoline_enter | 47 | .macro LOAD_GUEST_SEGMENTS |
| 48 | kvmppc_handler_trampoline_enter: | ||
| 49 | 48 | ||
| 50 | /* Required state: | 49 | /* Required state: |
| 51 | * | 50 | * |
| @@ -53,20 +52,14 @@ kvmppc_handler_trampoline_enter: | |||
| 53 | * R13 = PACA | 52 | * R13 = PACA |
| 54 | * R1 = host R1 | 53 | * R1 = host R1 |
| 55 | * R2 = host R2 | 54 | * R2 = host R2 |
| 56 | * R9 = guest IP | 55 | * R3 = shadow vcpu |
| 57 | * R10 = guest MSR | 56 | * all other volatile GPRS = free |
| 58 | * all other GPRS = free | 57 | * SVCPU[CR] = guest CR |
| 59 | * PACA[KVM_CR] = guest CR | 58 | * SVCPU[XER] = guest XER |
| 60 | * PACA[KVM_XER] = guest XER | 59 | * SVCPU[CTR] = guest CTR |
| 60 | * SVCPU[LR] = guest LR | ||
| 61 | */ | 61 | */ |
| 62 | 62 | ||
| 63 | mtsrr0 r9 | ||
| 64 | mtsrr1 r10 | ||
| 65 | |||
| 66 | /* Activate guest mode, so faults get handled by KVM */ | ||
| 67 | li r11, KVM_GUEST_MODE_GUEST | ||
| 68 | stb r11, PACA_KVM_IN_GUEST(r13) | ||
| 69 | |||
| 70 | /* Remove LPAR shadow entries */ | 63 | /* Remove LPAR shadow entries */ |
| 71 | 64 | ||
| 72 | #if SLB_NUM_BOLTED == 3 | 65 | #if SLB_NUM_BOLTED == 3 |
| @@ -101,14 +94,14 @@ kvmppc_handler_trampoline_enter: | |||
| 101 | 94 | ||
| 102 | /* Fill SLB with our shadow */ | 95 | /* Fill SLB with our shadow */ |
| 103 | 96 | ||
| 104 | lbz r12, PACA_KVM_SLB_MAX(r13) | 97 | lbz r12, SVCPU_SLB_MAX(r3) |
| 105 | mulli r12, r12, 16 | 98 | mulli r12, r12, 16 |
| 106 | addi r12, r12, PACA_KVM_SLB | 99 | addi r12, r12, SVCPU_SLB |
| 107 | add r12, r12, r13 | 100 | add r12, r12, r3 |
| 108 | 101 | ||
| 109 | /* for (r11 = kvm_slb; r11 < kvm_slb + kvm_slb_size; r11+=slb_entry) */ | 102 | /* for (r11 = kvm_slb; r11 < kvm_slb + kvm_slb_size; r11+=slb_entry) */ |
| 110 | li r11, PACA_KVM_SLB | 103 | li r11, SVCPU_SLB |
| 111 | add r11, r11, r13 | 104 | add r11, r11, r3 |
| 112 | 105 | ||
| 113 | slb_loop_enter: | 106 | slb_loop_enter: |
| 114 | 107 | ||
| @@ -127,34 +120,7 @@ slb_loop_enter_skip: | |||
| 127 | 120 | ||
| 128 | slb_do_enter: | 121 | slb_do_enter: |
| 129 | 122 | ||
| 130 | /* Enter guest */ | 123 | .endm |
| 131 | |||
| 132 | ld r0, (PACA_KVM_R0)(r13) | ||
| 133 | ld r1, (PACA_KVM_R1)(r13) | ||
| 134 | ld r2, (PACA_KVM_R2)(r13) | ||
| 135 | ld r3, (PACA_KVM_R3)(r13) | ||
| 136 | ld r4, (PACA_KVM_R4)(r13) | ||
| 137 | ld r5, (PACA_KVM_R5)(r13) | ||
| 138 | ld r6, (PACA_KVM_R6)(r13) | ||
| 139 | ld r7, (PACA_KVM_R7)(r13) | ||
| 140 | ld r8, (PACA_KVM_R8)(r13) | ||
| 141 | ld r9, (PACA_KVM_R9)(r13) | ||
| 142 | ld r10, (PACA_KVM_R10)(r13) | ||
| 143 | ld r12, (PACA_KVM_R12)(r13) | ||
| 144 | |||
| 145 | lwz r11, (PACA_KVM_CR)(r13) | ||
| 146 | mtcr r11 | ||
| 147 | |||
| 148 | ld r11, (PACA_KVM_XER)(r13) | ||
| 149 | mtxer r11 | ||
| 150 | |||
| 151 | ld r11, (PACA_KVM_R11)(r13) | ||
| 152 | ld r13, (PACA_KVM_R13)(r13) | ||
| 153 | |||
| 154 | RFI | ||
| 155 | kvmppc_handler_trampoline_enter_end: | ||
| 156 | |||
| 157 | |||
| 158 | 124 | ||
| 159 | /****************************************************************************** | 125 | /****************************************************************************** |
| 160 | * * | 126 | * * |
| @@ -162,99 +128,22 @@ kvmppc_handler_trampoline_enter_end: | |||
| 162 | * * | 128 | * * |
| 163 | *****************************************************************************/ | 129 | *****************************************************************************/ |
| 164 | 130 | ||
| 165 | .global kvmppc_handler_trampoline_exit | 131 | .macro LOAD_HOST_SEGMENTS |
| 166 | kvmppc_handler_trampoline_exit: | ||
| 167 | 132 | ||
| 168 | /* Register usage at this point: | 133 | /* Register usage at this point: |
| 169 | * | 134 | * |
| 170 | * SPRG_SCRATCH0 = guest R13 | 135 | * R1 = host R1 |
| 171 | * R12 = exit handler id | 136 | * R2 = host R2 |
| 172 | * R13 = PACA | 137 | * R12 = exit handler id |
| 173 | * PACA.KVM.SCRATCH0 = guest R12 | 138 | * R13 = shadow vcpu - SHADOW_VCPU_OFF [=PACA on PPC64] |
| 174 | * PACA.KVM.SCRATCH1 = guest CR | 139 | * SVCPU.* = guest * |
| 140 | * SVCPU[CR] = guest CR | ||
| 141 | * SVCPU[XER] = guest XER | ||
| 142 | * SVCPU[CTR] = guest CTR | ||
| 143 | * SVCPU[LR] = guest LR | ||
| 175 | * | 144 | * |
| 176 | */ | 145 | */ |
| 177 | 146 | ||
| 178 | /* Save registers */ | ||
| 179 | |||
| 180 | std r0, PACA_KVM_R0(r13) | ||
| 181 | std r1, PACA_KVM_R1(r13) | ||
| 182 | std r2, PACA_KVM_R2(r13) | ||
| 183 | std r3, PACA_KVM_R3(r13) | ||
| 184 | std r4, PACA_KVM_R4(r13) | ||
| 185 | std r5, PACA_KVM_R5(r13) | ||
| 186 | std r6, PACA_KVM_R6(r13) | ||
| 187 | std r7, PACA_KVM_R7(r13) | ||
| 188 | std r8, PACA_KVM_R8(r13) | ||
| 189 | std r9, PACA_KVM_R9(r13) | ||
| 190 | std r10, PACA_KVM_R10(r13) | ||
| 191 | std r11, PACA_KVM_R11(r13) | ||
| 192 | |||
| 193 | /* Restore R1/R2 so we can handle faults */ | ||
| 194 | ld r1, PACA_KVM_HOST_R1(r13) | ||
| 195 | ld r2, PACA_KVM_HOST_R2(r13) | ||
| 196 | |||
| 197 | /* Save guest PC and MSR in GPRs */ | ||
| 198 | mfsrr0 r3 | ||
| 199 | mfsrr1 r4 | ||
| 200 | |||
| 201 | /* Get scratch'ed off registers */ | ||
| 202 | mfspr r9, SPRN_SPRG_SCRATCH0 | ||
| 203 | std r9, PACA_KVM_R13(r13) | ||
| 204 | |||
| 205 | ld r8, PACA_KVM_SCRATCH0(r13) | ||
| 206 | std r8, PACA_KVM_R12(r13) | ||
| 207 | |||
| 208 | lwz r7, PACA_KVM_SCRATCH1(r13) | ||
| 209 | stw r7, PACA_KVM_CR(r13) | ||
| 210 | |||
| 211 | /* Save more register state */ | ||
| 212 | |||
| 213 | mfxer r6 | ||
| 214 | stw r6, PACA_KVM_XER(r13) | ||
| 215 | |||
| 216 | mfdar r5 | ||
| 217 | mfdsisr r6 | ||
| 218 | |||
| 219 | /* | ||
| 220 | * In order for us to easily get the last instruction, | ||
| 221 | * we got the #vmexit at, we exploit the fact that the | ||
| 222 | * virtual layout is still the same here, so we can just | ||
| 223 | * ld from the guest's PC address | ||
| 224 | */ | ||
| 225 | |||
| 226 | /* We only load the last instruction when it's safe */ | ||
| 227 | cmpwi r12, BOOK3S_INTERRUPT_DATA_STORAGE | ||
| 228 | beq ld_last_inst | ||
| 229 | cmpwi r12, BOOK3S_INTERRUPT_PROGRAM | ||
| 230 | beq ld_last_inst | ||
| 231 | |||
| 232 | b no_ld_last_inst | ||
| 233 | |||
| 234 | ld_last_inst: | ||
| 235 | /* Save off the guest instruction we're at */ | ||
| 236 | |||
| 237 | /* Set guest mode to 'jump over instruction' so if lwz faults | ||
| 238 | * we'll just continue at the next IP. */ | ||
| 239 | li r9, KVM_GUEST_MODE_SKIP | ||
| 240 | stb r9, PACA_KVM_IN_GUEST(r13) | ||
| 241 | |||
| 242 | /* 1) enable paging for data */ | ||
| 243 | mfmsr r9 | ||
| 244 | ori r11, r9, MSR_DR /* Enable paging for data */ | ||
| 245 | mtmsr r11 | ||
| 246 | /* 2) fetch the instruction */ | ||
| 247 | li r0, KVM_INST_FETCH_FAILED /* In case lwz faults */ | ||
| 248 | lwz r0, 0(r3) | ||
| 249 | /* 3) disable paging again */ | ||
| 250 | mtmsr r9 | ||
| 251 | |||
| 252 | no_ld_last_inst: | ||
| 253 | |||
| 254 | /* Unset guest mode */ | ||
| 255 | li r9, KVM_GUEST_MODE_NONE | ||
| 256 | stb r9, PACA_KVM_IN_GUEST(r13) | ||
| 257 | |||
| 258 | /* Restore bolted entries from the shadow and fix it along the way */ | 147 | /* Restore bolted entries from the shadow and fix it along the way */ |
| 259 | 148 | ||
| 260 | /* We don't store anything in entry 0, so we don't need to take care of it */ | 149 | /* We don't store anything in entry 0, so we don't need to take care of it */ |
| @@ -275,28 +164,4 @@ no_ld_last_inst: | |||
| 275 | 164 | ||
| 276 | slb_do_exit: | 165 | slb_do_exit: |
| 277 | 166 | ||
| 278 | /* Register usage at this point: | 167 | .endm |
| 279 | * | ||
| 280 | * R0 = guest last inst | ||
| 281 | * R1 = host R1 | ||
| 282 | * R2 = host R2 | ||
| 283 | * R3 = guest PC | ||
| 284 | * R4 = guest MSR | ||
| 285 | * R5 = guest DAR | ||
| 286 | * R6 = guest DSISR | ||
| 287 | * R12 = exit handler id | ||
| 288 | * R13 = PACA | ||
| 289 | * PACA.KVM.* = guest * | ||
| 290 | * | ||
| 291 | */ | ||
| 292 | |||
| 293 | /* RFI into the highmem handler */ | ||
| 294 | mfmsr r7 | ||
| 295 | ori r7, r7, MSR_IR|MSR_DR|MSR_RI /* Enable paging */ | ||
| 296 | mtsrr1 r7 | ||
| 297 | ld r8, PACA_KVM_VMHANDLER(r13) /* Highmem handler address */ | ||
| 298 | mtsrr0 r8 | ||
| 299 | |||
| 300 | RFI | ||
| 301 | kvmppc_handler_trampoline_exit_end: | ||
| 302 | |||
diff --git a/arch/powerpc/kvm/book3s_64_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 2b0ee7e040c9..c85f906038ce 100644 --- a/arch/powerpc/kvm/book3s_64_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c | |||
| @@ -28,13 +28,16 @@ | |||
| 28 | #define OP_31_XOP_MFMSR 83 | 28 | #define OP_31_XOP_MFMSR 83 |
| 29 | #define OP_31_XOP_MTMSR 146 | 29 | #define OP_31_XOP_MTMSR 146 |
| 30 | #define OP_31_XOP_MTMSRD 178 | 30 | #define OP_31_XOP_MTMSRD 178 |
| 31 | #define OP_31_XOP_MTSR 210 | ||
| 31 | #define OP_31_XOP_MTSRIN 242 | 32 | #define OP_31_XOP_MTSRIN 242 |
| 32 | #define OP_31_XOP_TLBIEL 274 | 33 | #define OP_31_XOP_TLBIEL 274 |
| 33 | #define OP_31_XOP_TLBIE 306 | 34 | #define OP_31_XOP_TLBIE 306 |
| 34 | #define OP_31_XOP_SLBMTE 402 | 35 | #define OP_31_XOP_SLBMTE 402 |
| 35 | #define OP_31_XOP_SLBIE 434 | 36 | #define OP_31_XOP_SLBIE 434 |
| 36 | #define OP_31_XOP_SLBIA 498 | 37 | #define OP_31_XOP_SLBIA 498 |
| 38 | #define OP_31_XOP_MFSR 595 | ||
| 37 | #define OP_31_XOP_MFSRIN 659 | 39 | #define OP_31_XOP_MFSRIN 659 |
| 40 | #define OP_31_XOP_DCBA 758 | ||
| 38 | #define OP_31_XOP_SLBMFEV 851 | 41 | #define OP_31_XOP_SLBMFEV 851 |
| 39 | #define OP_31_XOP_EIOIO 854 | 42 | #define OP_31_XOP_EIOIO 854 |
| 40 | #define OP_31_XOP_SLBMFEE 915 | 43 | #define OP_31_XOP_SLBMFEE 915 |
| @@ -42,6 +45,24 @@ | |||
| 42 | /* DCBZ is actually 1014, but we patch it to 1010 so we get a trap */ | 45 | /* DCBZ is actually 1014, but we patch it to 1010 so we get a trap */ |
| 43 | #define OP_31_XOP_DCBZ 1010 | 46 | #define OP_31_XOP_DCBZ 1010 |
| 44 | 47 | ||
| 48 | #define OP_LFS 48 | ||
| 49 | #define OP_LFD 50 | ||
| 50 | #define OP_STFS 52 | ||
| 51 | #define OP_STFD 54 | ||
| 52 | |||
| 53 | #define SPRN_GQR0 912 | ||
| 54 | #define SPRN_GQR1 913 | ||
| 55 | #define SPRN_GQR2 914 | ||
| 56 | #define SPRN_GQR3 915 | ||
| 57 | #define SPRN_GQR4 916 | ||
| 58 | #define SPRN_GQR5 917 | ||
| 59 | #define SPRN_GQR6 918 | ||
| 60 | #define SPRN_GQR7 919 | ||
| 61 | |||
| 62 | /* Book3S_32 defines mfsrin(v) - but that messes up our abstract | ||
| 63 | * function pointers, so let's just disable the define. */ | ||
| 64 | #undef mfsrin | ||
| 65 | |||
| 45 | int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | 66 | int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, |
| 46 | unsigned int inst, int *advance) | 67 | unsigned int inst, int *advance) |
| 47 | { | 68 | { |
| @@ -52,7 +73,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 52 | switch (get_xop(inst)) { | 73 | switch (get_xop(inst)) { |
| 53 | case OP_19_XOP_RFID: | 74 | case OP_19_XOP_RFID: |
| 54 | case OP_19_XOP_RFI: | 75 | case OP_19_XOP_RFI: |
| 55 | vcpu->arch.pc = vcpu->arch.srr0; | 76 | kvmppc_set_pc(vcpu, vcpu->arch.srr0); |
| 56 | kvmppc_set_msr(vcpu, vcpu->arch.srr1); | 77 | kvmppc_set_msr(vcpu, vcpu->arch.srr1); |
| 57 | *advance = 0; | 78 | *advance = 0; |
| 58 | break; | 79 | break; |
| @@ -80,6 +101,18 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 80 | case OP_31_XOP_MTMSR: | 101 | case OP_31_XOP_MTMSR: |
| 81 | kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst))); | 102 | kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst))); |
| 82 | break; | 103 | break; |
| 104 | case OP_31_XOP_MFSR: | ||
| 105 | { | ||
| 106 | int srnum; | ||
| 107 | |||
| 108 | srnum = kvmppc_get_field(inst, 12 + 32, 15 + 32); | ||
| 109 | if (vcpu->arch.mmu.mfsrin) { | ||
| 110 | u32 sr; | ||
| 111 | sr = vcpu->arch.mmu.mfsrin(vcpu, srnum); | ||
| 112 | kvmppc_set_gpr(vcpu, get_rt(inst), sr); | ||
| 113 | } | ||
| 114 | break; | ||
| 115 | } | ||
| 83 | case OP_31_XOP_MFSRIN: | 116 | case OP_31_XOP_MFSRIN: |
| 84 | { | 117 | { |
| 85 | int srnum; | 118 | int srnum; |
| @@ -92,6 +125,11 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 92 | } | 125 | } |
| 93 | break; | 126 | break; |
| 94 | } | 127 | } |
| 128 | case OP_31_XOP_MTSR: | ||
| 129 | vcpu->arch.mmu.mtsrin(vcpu, | ||
| 130 | (inst >> 16) & 0xf, | ||
| 131 | kvmppc_get_gpr(vcpu, get_rs(inst))); | ||
| 132 | break; | ||
| 95 | case OP_31_XOP_MTSRIN: | 133 | case OP_31_XOP_MTSRIN: |
| 96 | vcpu->arch.mmu.mtsrin(vcpu, | 134 | vcpu->arch.mmu.mtsrin(vcpu, |
| 97 | (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf, | 135 | (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf, |
| @@ -150,12 +188,17 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 150 | kvmppc_set_gpr(vcpu, get_rt(inst), t); | 188 | kvmppc_set_gpr(vcpu, get_rt(inst), t); |
| 151 | } | 189 | } |
| 152 | break; | 190 | break; |
| 191 | case OP_31_XOP_DCBA: | ||
| 192 | /* Gets treated as NOP */ | ||
| 193 | break; | ||
| 153 | case OP_31_XOP_DCBZ: | 194 | case OP_31_XOP_DCBZ: |
| 154 | { | 195 | { |
| 155 | ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst)); | 196 | ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst)); |
| 156 | ulong ra = 0; | 197 | ulong ra = 0; |
| 157 | ulong addr; | 198 | ulong addr, vaddr; |
| 158 | u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; | 199 | u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; |
| 200 | u32 dsisr; | ||
| 201 | int r; | ||
| 159 | 202 | ||
| 160 | if (get_ra(inst)) | 203 | if (get_ra(inst)) |
| 161 | ra = kvmppc_get_gpr(vcpu, get_ra(inst)); | 204 | ra = kvmppc_get_gpr(vcpu, get_ra(inst)); |
| @@ -163,15 +206,25 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 163 | addr = (ra + rb) & ~31ULL; | 206 | addr = (ra + rb) & ~31ULL; |
| 164 | if (!(vcpu->arch.msr & MSR_SF)) | 207 | if (!(vcpu->arch.msr & MSR_SF)) |
| 165 | addr &= 0xffffffff; | 208 | addr &= 0xffffffff; |
| 209 | vaddr = addr; | ||
| 210 | |||
| 211 | r = kvmppc_st(vcpu, &addr, 32, zeros, true); | ||
| 212 | if ((r == -ENOENT) || (r == -EPERM)) { | ||
| 213 | *advance = 0; | ||
| 214 | vcpu->arch.dear = vaddr; | ||
| 215 | to_svcpu(vcpu)->fault_dar = vaddr; | ||
| 216 | |||
| 217 | dsisr = DSISR_ISSTORE; | ||
| 218 | if (r == -ENOENT) | ||
| 219 | dsisr |= DSISR_NOHPTE; | ||
| 220 | else if (r == -EPERM) | ||
| 221 | dsisr |= DSISR_PROTFAULT; | ||
| 222 | |||
| 223 | to_book3s(vcpu)->dsisr = dsisr; | ||
| 224 | to_svcpu(vcpu)->fault_dsisr = dsisr; | ||
| 166 | 225 | ||
| 167 | if (kvmppc_st(vcpu, addr, 32, zeros)) { | ||
| 168 | vcpu->arch.dear = addr; | ||
| 169 | vcpu->arch.fault_dear = addr; | ||
| 170 | to_book3s(vcpu)->dsisr = DSISR_PROTFAULT | | ||
| 171 | DSISR_ISSTORE; | ||
| 172 | kvmppc_book3s_queue_irqprio(vcpu, | 226 | kvmppc_book3s_queue_irqprio(vcpu, |
| 173 | BOOK3S_INTERRUPT_DATA_STORAGE); | 227 | BOOK3S_INTERRUPT_DATA_STORAGE); |
| 174 | kvmppc_mmu_pte_flush(vcpu, addr, ~0xFFFULL); | ||
| 175 | } | 228 | } |
| 176 | 229 | ||
| 177 | break; | 230 | break; |
| @@ -184,6 +237,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 184 | emulated = EMULATE_FAIL; | 237 | emulated = EMULATE_FAIL; |
| 185 | } | 238 | } |
| 186 | 239 | ||
| 240 | if (emulated == EMULATE_FAIL) | ||
| 241 | emulated = kvmppc_emulate_paired_single(run, vcpu); | ||
| 242 | |||
| 187 | return emulated; | 243 | return emulated; |
| 188 | } | 244 | } |
| 189 | 245 | ||
| @@ -207,6 +263,34 @@ void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, | |||
| 207 | } | 263 | } |
| 208 | } | 264 | } |
| 209 | 265 | ||
| 266 | static u32 kvmppc_read_bat(struct kvm_vcpu *vcpu, int sprn) | ||
| 267 | { | ||
| 268 | struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); | ||
| 269 | struct kvmppc_bat *bat; | ||
| 270 | |||
| 271 | switch (sprn) { | ||
| 272 | case SPRN_IBAT0U ... SPRN_IBAT3L: | ||
| 273 | bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT0U) / 2]; | ||
| 274 | break; | ||
| 275 | case SPRN_IBAT4U ... SPRN_IBAT7L: | ||
| 276 | bat = &vcpu_book3s->ibat[4 + ((sprn - SPRN_IBAT4U) / 2)]; | ||
| 277 | break; | ||
| 278 | case SPRN_DBAT0U ... SPRN_DBAT3L: | ||
| 279 | bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT0U) / 2]; | ||
| 280 | break; | ||
| 281 | case SPRN_DBAT4U ... SPRN_DBAT7L: | ||
| 282 | bat = &vcpu_book3s->dbat[4 + ((sprn - SPRN_DBAT4U) / 2)]; | ||
| 283 | break; | ||
| 284 | default: | ||
| 285 | BUG(); | ||
| 286 | } | ||
| 287 | |||
| 288 | if (sprn % 2) | ||
| 289 | return bat->raw >> 32; | ||
| 290 | else | ||
| 291 | return bat->raw; | ||
| 292 | } | ||
| 293 | |||
| 210 | static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val) | 294 | static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val) |
| 211 | { | 295 | { |
| 212 | struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); | 296 | struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); |
| @@ -217,13 +301,13 @@ static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val) | |||
| 217 | bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT0U) / 2]; | 301 | bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT0U) / 2]; |
| 218 | break; | 302 | break; |
| 219 | case SPRN_IBAT4U ... SPRN_IBAT7L: | 303 | case SPRN_IBAT4U ... SPRN_IBAT7L: |
| 220 | bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT4U) / 2]; | 304 | bat = &vcpu_book3s->ibat[4 + ((sprn - SPRN_IBAT4U) / 2)]; |
| 221 | break; | 305 | break; |
| 222 | case SPRN_DBAT0U ... SPRN_DBAT3L: | 306 | case SPRN_DBAT0U ... SPRN_DBAT3L: |
| 223 | bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT0U) / 2]; | 307 | bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT0U) / 2]; |
| 224 | break; | 308 | break; |
| 225 | case SPRN_DBAT4U ... SPRN_DBAT7L: | 309 | case SPRN_DBAT4U ... SPRN_DBAT7L: |
| 226 | bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT4U) / 2]; | 310 | bat = &vcpu_book3s->dbat[4 + ((sprn - SPRN_DBAT4U) / 2)]; |
| 227 | break; | 311 | break; |
| 228 | default: | 312 | default: |
| 229 | BUG(); | 313 | BUG(); |
| @@ -258,6 +342,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) | |||
| 258 | /* BAT writes happen so rarely that we're ok to flush | 342 | /* BAT writes happen so rarely that we're ok to flush |
| 259 | * everything here */ | 343 | * everything here */ |
| 260 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | 344 | kvmppc_mmu_pte_flush(vcpu, 0, 0); |
| 345 | kvmppc_mmu_flush_segments(vcpu); | ||
| 261 | break; | 346 | break; |
| 262 | case SPRN_HID0: | 347 | case SPRN_HID0: |
| 263 | to_book3s(vcpu)->hid[0] = spr_val; | 348 | to_book3s(vcpu)->hid[0] = spr_val; |
| @@ -268,7 +353,32 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) | |||
| 268 | case SPRN_HID2: | 353 | case SPRN_HID2: |
| 269 | to_book3s(vcpu)->hid[2] = spr_val; | 354 | to_book3s(vcpu)->hid[2] = spr_val; |
| 270 | break; | 355 | break; |
| 356 | case SPRN_HID2_GEKKO: | ||
| 357 | to_book3s(vcpu)->hid[2] = spr_val; | ||
| 358 | /* HID2.PSE controls paired single on gekko */ | ||
| 359 | switch (vcpu->arch.pvr) { | ||
| 360 | case 0x00080200: /* lonestar 2.0 */ | ||
| 361 | case 0x00088202: /* lonestar 2.2 */ | ||
| 362 | case 0x70000100: /* gekko 1.0 */ | ||
| 363 | case 0x00080100: /* gekko 2.0 */ | ||
| 364 | case 0x00083203: /* gekko 2.3a */ | ||
| 365 | case 0x00083213: /* gekko 2.3b */ | ||
| 366 | case 0x00083204: /* gekko 2.4 */ | ||
| 367 | case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */ | ||
| 368 | case 0x00087200: /* broadway */ | ||
| 369 | if (vcpu->arch.hflags & BOOK3S_HFLAG_NATIVE_PS) { | ||
| 370 | /* Native paired singles */ | ||
| 371 | } else if (spr_val & (1 << 29)) { /* HID2.PSE */ | ||
| 372 | vcpu->arch.hflags |= BOOK3S_HFLAG_PAIRED_SINGLE; | ||
| 373 | kvmppc_giveup_ext(vcpu, MSR_FP); | ||
| 374 | } else { | ||
| 375 | vcpu->arch.hflags &= ~BOOK3S_HFLAG_PAIRED_SINGLE; | ||
| 376 | } | ||
| 377 | break; | ||
| 378 | } | ||
| 379 | break; | ||
| 271 | case SPRN_HID4: | 380 | case SPRN_HID4: |
| 381 | case SPRN_HID4_GEKKO: | ||
| 272 | to_book3s(vcpu)->hid[4] = spr_val; | 382 | to_book3s(vcpu)->hid[4] = spr_val; |
| 273 | break; | 383 | break; |
| 274 | case SPRN_HID5: | 384 | case SPRN_HID5: |
| @@ -278,12 +388,30 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) | |||
| 278 | (mfmsr() & MSR_HV)) | 388 | (mfmsr() & MSR_HV)) |
| 279 | vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; | 389 | vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; |
| 280 | break; | 390 | break; |
| 391 | case SPRN_GQR0: | ||
| 392 | case SPRN_GQR1: | ||
| 393 | case SPRN_GQR2: | ||
| 394 | case SPRN_GQR3: | ||
| 395 | case SPRN_GQR4: | ||
| 396 | case SPRN_GQR5: | ||
| 397 | case SPRN_GQR6: | ||
| 398 | case SPRN_GQR7: | ||
| 399 | to_book3s(vcpu)->gqr[sprn - SPRN_GQR0] = spr_val; | ||
| 400 | break; | ||
| 281 | case SPRN_ICTC: | 401 | case SPRN_ICTC: |
| 282 | case SPRN_THRM1: | 402 | case SPRN_THRM1: |
| 283 | case SPRN_THRM2: | 403 | case SPRN_THRM2: |
| 284 | case SPRN_THRM3: | 404 | case SPRN_THRM3: |
| 285 | case SPRN_CTRLF: | 405 | case SPRN_CTRLF: |
| 286 | case SPRN_CTRLT: | 406 | case SPRN_CTRLT: |
| 407 | case SPRN_L2CR: | ||
| 408 | case SPRN_MMCR0_GEKKO: | ||
| 409 | case SPRN_MMCR1_GEKKO: | ||
| 410 | case SPRN_PMC1_GEKKO: | ||
| 411 | case SPRN_PMC2_GEKKO: | ||
| 412 | case SPRN_PMC3_GEKKO: | ||
| 413 | case SPRN_PMC4_GEKKO: | ||
| 414 | case SPRN_WPAR_GEKKO: | ||
| 287 | break; | 415 | break; |
| 288 | default: | 416 | default: |
| 289 | printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn); | 417 | printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn); |
| @@ -301,6 +429,12 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) | |||
| 301 | int emulated = EMULATE_DONE; | 429 | int emulated = EMULATE_DONE; |
| 302 | 430 | ||
| 303 | switch (sprn) { | 431 | switch (sprn) { |
| 432 | case SPRN_IBAT0U ... SPRN_IBAT3L: | ||
| 433 | case SPRN_IBAT4U ... SPRN_IBAT7L: | ||
| 434 | case SPRN_DBAT0U ... SPRN_DBAT3L: | ||
| 435 | case SPRN_DBAT4U ... SPRN_DBAT7L: | ||
| 436 | kvmppc_set_gpr(vcpu, rt, kvmppc_read_bat(vcpu, sprn)); | ||
| 437 | break; | ||
| 304 | case SPRN_SDR1: | 438 | case SPRN_SDR1: |
| 305 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1); | 439 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1); |
| 306 | break; | 440 | break; |
| @@ -320,19 +454,40 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) | |||
| 320 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]); | 454 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]); |
| 321 | break; | 455 | break; |
| 322 | case SPRN_HID2: | 456 | case SPRN_HID2: |
| 457 | case SPRN_HID2_GEKKO: | ||
| 323 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]); | 458 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]); |
| 324 | break; | 459 | break; |
| 325 | case SPRN_HID4: | 460 | case SPRN_HID4: |
| 461 | case SPRN_HID4_GEKKO: | ||
| 326 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]); | 462 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]); |
| 327 | break; | 463 | break; |
| 328 | case SPRN_HID5: | 464 | case SPRN_HID5: |
| 329 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]); | 465 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]); |
| 330 | break; | 466 | break; |
| 467 | case SPRN_GQR0: | ||
| 468 | case SPRN_GQR1: | ||
| 469 | case SPRN_GQR2: | ||
| 470 | case SPRN_GQR3: | ||
| 471 | case SPRN_GQR4: | ||
| 472 | case SPRN_GQR5: | ||
| 473 | case SPRN_GQR6: | ||
| 474 | case SPRN_GQR7: | ||
| 475 | kvmppc_set_gpr(vcpu, rt, | ||
| 476 | to_book3s(vcpu)->gqr[sprn - SPRN_GQR0]); | ||
| 477 | break; | ||
| 331 | case SPRN_THRM1: | 478 | case SPRN_THRM1: |
| 332 | case SPRN_THRM2: | 479 | case SPRN_THRM2: |
| 333 | case SPRN_THRM3: | 480 | case SPRN_THRM3: |
| 334 | case SPRN_CTRLF: | 481 | case SPRN_CTRLF: |
| 335 | case SPRN_CTRLT: | 482 | case SPRN_CTRLT: |
| 483 | case SPRN_L2CR: | ||
| 484 | case SPRN_MMCR0_GEKKO: | ||
| 485 | case SPRN_MMCR1_GEKKO: | ||
| 486 | case SPRN_PMC1_GEKKO: | ||
| 487 | case SPRN_PMC2_GEKKO: | ||
| 488 | case SPRN_PMC3_GEKKO: | ||
| 489 | case SPRN_PMC4_GEKKO: | ||
| 490 | case SPRN_WPAR_GEKKO: | ||
| 336 | kvmppc_set_gpr(vcpu, rt, 0); | 491 | kvmppc_set_gpr(vcpu, rt, 0); |
| 337 | break; | 492 | break; |
| 338 | default: | 493 | default: |
| @@ -346,3 +501,73 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) | |||
| 346 | return emulated; | 501 | return emulated; |
| 347 | } | 502 | } |
| 348 | 503 | ||
| 504 | u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst) | ||
| 505 | { | ||
| 506 | u32 dsisr = 0; | ||
| 507 | |||
| 508 | /* | ||
| 509 | * This is what the spec says about DSISR bits (not mentioned = 0): | ||
| 510 | * | ||
| 511 | * 12:13 [DS] Set to bits 30:31 | ||
| 512 | * 15:16 [X] Set to bits 29:30 | ||
| 513 | * 17 [X] Set to bit 25 | ||
| 514 | * [D/DS] Set to bit 5 | ||
| 515 | * 18:21 [X] Set to bits 21:24 | ||
| 516 | * [D/DS] Set to bits 1:4 | ||
| 517 | * 22:26 Set to bits 6:10 (RT/RS/FRT/FRS) | ||
| 518 | * 27:31 Set to bits 11:15 (RA) | ||
| 519 | */ | ||
| 520 | |||
| 521 | switch (get_op(inst)) { | ||
| 522 | /* D-form */ | ||
| 523 | case OP_LFS: | ||
| 524 | case OP_LFD: | ||
| 525 | case OP_STFD: | ||
| 526 | case OP_STFS: | ||
| 527 | dsisr |= (inst >> 12) & 0x4000; /* bit 17 */ | ||
| 528 | dsisr |= (inst >> 17) & 0x3c00; /* bits 18:21 */ | ||
| 529 | break; | ||
| 530 | /* X-form */ | ||
| 531 | case 31: | ||
| 532 | dsisr |= (inst << 14) & 0x18000; /* bits 15:16 */ | ||
| 533 | dsisr |= (inst << 8) & 0x04000; /* bit 17 */ | ||
| 534 | dsisr |= (inst << 3) & 0x03c00; /* bits 18:21 */ | ||
| 535 | break; | ||
| 536 | default: | ||
| 537 | printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst); | ||
| 538 | break; | ||
| 539 | } | ||
| 540 | |||
| 541 | dsisr |= (inst >> 16) & 0x03ff; /* bits 22:31 */ | ||
| 542 | |||
| 543 | return dsisr; | ||
| 544 | } | ||
| 545 | |||
| 546 | ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst) | ||
| 547 | { | ||
| 548 | ulong dar = 0; | ||
| 549 | ulong ra; | ||
| 550 | |||
| 551 | switch (get_op(inst)) { | ||
| 552 | case OP_LFS: | ||
| 553 | case OP_LFD: | ||
| 554 | case OP_STFD: | ||
| 555 | case OP_STFS: | ||
| 556 | ra = get_ra(inst); | ||
| 557 | if (ra) | ||
| 558 | dar = kvmppc_get_gpr(vcpu, ra); | ||
| 559 | dar += (s32)((s16)inst); | ||
| 560 | break; | ||
| 561 | case 31: | ||
| 562 | ra = get_ra(inst); | ||
| 563 | if (ra) | ||
| 564 | dar = kvmppc_get_gpr(vcpu, ra); | ||
| 565 | dar += kvmppc_get_gpr(vcpu, get_rb(inst)); | ||
| 566 | break; | ||
| 567 | default: | ||
| 568 | printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst); | ||
| 569 | break; | ||
| 570 | } | ||
| 571 | |||
| 572 | return dar; | ||
| 573 | } | ||
diff --git a/arch/powerpc/kvm/book3s_64_exports.c b/arch/powerpc/kvm/book3s_exports.c index 1dd5a1ddfd0d..1dd5a1ddfd0d 100644 --- a/arch/powerpc/kvm/book3s_64_exports.c +++ b/arch/powerpc/kvm/book3s_exports.c | |||
diff --git a/arch/powerpc/kvm/book3s_64_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index c1584d0cbce8..2f0bc928b08a 100644 --- a/arch/powerpc/kvm/book3s_64_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S | |||
| @@ -24,36 +24,56 @@ | |||
| 24 | #include <asm/asm-offsets.h> | 24 | #include <asm/asm-offsets.h> |
| 25 | #include <asm/exception-64s.h> | 25 | #include <asm/exception-64s.h> |
| 26 | 26 | ||
| 27 | #define KVMPPC_HANDLE_EXIT .kvmppc_handle_exit | 27 | #if defined(CONFIG_PPC_BOOK3S_64) |
| 28 | #define ULONG_SIZE 8 | ||
| 29 | #define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE)) | ||
| 30 | 28 | ||
| 31 | .macro DISABLE_INTERRUPTS | 29 | #define ULONG_SIZE 8 |
| 32 | mfmsr r0 | 30 | #define FUNC(name) GLUE(.,name) |
| 33 | rldicl r0,r0,48,1 | ||
| 34 | rotldi r0,r0,16 | ||
| 35 | mtmsrd r0,1 | ||
| 36 | .endm | ||
| 37 | 31 | ||
| 32 | #define GET_SHADOW_VCPU(reg) \ | ||
| 33 | addi reg, r13, PACA_KVM_SVCPU | ||
| 34 | |||
| 35 | #define DISABLE_INTERRUPTS \ | ||
| 36 | mfmsr r0; \ | ||
| 37 | rldicl r0,r0,48,1; \ | ||
| 38 | rotldi r0,r0,16; \ | ||
| 39 | mtmsrd r0,1; \ | ||
| 40 | |||
| 41 | #elif defined(CONFIG_PPC_BOOK3S_32) | ||
| 42 | |||
| 43 | #define ULONG_SIZE 4 | ||
| 44 | #define FUNC(name) name | ||
| 45 | |||
| 46 | #define GET_SHADOW_VCPU(reg) \ | ||
| 47 | lwz reg, (THREAD + THREAD_KVM_SVCPU)(r2) | ||
| 48 | |||
| 49 | #define DISABLE_INTERRUPTS \ | ||
| 50 | mfmsr r0; \ | ||
| 51 | rlwinm r0,r0,0,17,15; \ | ||
| 52 | mtmsr r0; \ | ||
| 53 | |||
| 54 | #endif /* CONFIG_PPC_BOOK3S_XX */ | ||
| 55 | |||
| 56 | |||
| 57 | #define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE)) | ||
| 38 | #define VCPU_LOAD_NVGPRS(vcpu) \ | 58 | #define VCPU_LOAD_NVGPRS(vcpu) \ |
| 39 | ld r14, VCPU_GPR(r14)(vcpu); \ | 59 | PPC_LL r14, VCPU_GPR(r14)(vcpu); \ |
| 40 | ld r15, VCPU_GPR(r15)(vcpu); \ | 60 | PPC_LL r15, VCPU_GPR(r15)(vcpu); \ |
| 41 | ld r16, VCPU_GPR(r16)(vcpu); \ | 61 | PPC_LL r16, VCPU_GPR(r16)(vcpu); \ |
| 42 | ld r17, VCPU_GPR(r17)(vcpu); \ | 62 | PPC_LL r17, VCPU_GPR(r17)(vcpu); \ |
| 43 | ld r18, VCPU_GPR(r18)(vcpu); \ | 63 | PPC_LL r18, VCPU_GPR(r18)(vcpu); \ |
| 44 | ld r19, VCPU_GPR(r19)(vcpu); \ | 64 | PPC_LL r19, VCPU_GPR(r19)(vcpu); \ |
| 45 | ld r20, VCPU_GPR(r20)(vcpu); \ | 65 | PPC_LL r20, VCPU_GPR(r20)(vcpu); \ |
| 46 | ld r21, VCPU_GPR(r21)(vcpu); \ | 66 | PPC_LL r21, VCPU_GPR(r21)(vcpu); \ |
| 47 | ld r22, VCPU_GPR(r22)(vcpu); \ | 67 | PPC_LL r22, VCPU_GPR(r22)(vcpu); \ |
| 48 | ld r23, VCPU_GPR(r23)(vcpu); \ | 68 | PPC_LL r23, VCPU_GPR(r23)(vcpu); \ |
| 49 | ld r24, VCPU_GPR(r24)(vcpu); \ | 69 | PPC_LL r24, VCPU_GPR(r24)(vcpu); \ |
| 50 | ld r25, VCPU_GPR(r25)(vcpu); \ | 70 | PPC_LL r25, VCPU_GPR(r25)(vcpu); \ |
| 51 | ld r26, VCPU_GPR(r26)(vcpu); \ | 71 | PPC_LL r26, VCPU_GPR(r26)(vcpu); \ |
| 52 | ld r27, VCPU_GPR(r27)(vcpu); \ | 72 | PPC_LL r27, VCPU_GPR(r27)(vcpu); \ |
| 53 | ld r28, VCPU_GPR(r28)(vcpu); \ | 73 | PPC_LL r28, VCPU_GPR(r28)(vcpu); \ |
| 54 | ld r29, VCPU_GPR(r29)(vcpu); \ | 74 | PPC_LL r29, VCPU_GPR(r29)(vcpu); \ |
| 55 | ld r30, VCPU_GPR(r30)(vcpu); \ | 75 | PPC_LL r30, VCPU_GPR(r30)(vcpu); \ |
| 56 | ld r31, VCPU_GPR(r31)(vcpu); \ | 76 | PPC_LL r31, VCPU_GPR(r31)(vcpu); \ |
| 57 | 77 | ||
| 58 | /***************************************************************************** | 78 | /***************************************************************************** |
| 59 | * * | 79 | * * |
| @@ -69,11 +89,11 @@ _GLOBAL(__kvmppc_vcpu_entry) | |||
| 69 | 89 | ||
| 70 | kvm_start_entry: | 90 | kvm_start_entry: |
| 71 | /* Write correct stack frame */ | 91 | /* Write correct stack frame */ |
| 72 | mflr r0 | 92 | mflr r0 |
| 73 | std r0,16(r1) | 93 | PPC_STL r0,PPC_LR_STKOFF(r1) |
| 74 | 94 | ||
| 75 | /* Save host state to the stack */ | 95 | /* Save host state to the stack */ |
| 76 | stdu r1, -SWITCH_FRAME_SIZE(r1) | 96 | PPC_STLU r1, -SWITCH_FRAME_SIZE(r1) |
| 77 | 97 | ||
| 78 | /* Save r3 (kvm_run) and r4 (vcpu) */ | 98 | /* Save r3 (kvm_run) and r4 (vcpu) */ |
| 79 | SAVE_2GPRS(3, r1) | 99 | SAVE_2GPRS(3, r1) |
| @@ -82,33 +102,28 @@ kvm_start_entry: | |||
| 82 | SAVE_NVGPRS(r1) | 102 | SAVE_NVGPRS(r1) |
| 83 | 103 | ||
| 84 | /* Save LR */ | 104 | /* Save LR */ |
| 85 | std r0, _LINK(r1) | 105 | PPC_STL r0, _LINK(r1) |
| 86 | 106 | ||
| 87 | /* Load non-volatile guest state from the vcpu */ | 107 | /* Load non-volatile guest state from the vcpu */ |
| 88 | VCPU_LOAD_NVGPRS(r4) | 108 | VCPU_LOAD_NVGPRS(r4) |
| 89 | 109 | ||
| 110 | GET_SHADOW_VCPU(r5) | ||
| 111 | |||
| 90 | /* Save R1/R2 in the PACA */ | 112 | /* Save R1/R2 in the PACA */ |
| 91 | std r1, PACA_KVM_HOST_R1(r13) | 113 | PPC_STL r1, SVCPU_HOST_R1(r5) |
| 92 | std r2, PACA_KVM_HOST_R2(r13) | 114 | PPC_STL r2, SVCPU_HOST_R2(r5) |
| 93 | 115 | ||
| 94 | /* XXX swap in/out on load? */ | 116 | /* XXX swap in/out on load? */ |
| 95 | ld r3, VCPU_HIGHMEM_HANDLER(r4) | 117 | PPC_LL r3, VCPU_HIGHMEM_HANDLER(r4) |
| 96 | std r3, PACA_KVM_VMHANDLER(r13) | 118 | PPC_STL r3, SVCPU_VMHANDLER(r5) |
| 97 | 119 | ||
| 98 | kvm_start_lightweight: | 120 | kvm_start_lightweight: |
| 99 | 121 | ||
| 100 | ld r9, VCPU_PC(r4) /* r9 = vcpu->arch.pc */ | 122 | PPC_LL r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */ |
| 101 | ld r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */ | ||
| 102 | |||
| 103 | /* Load some guest state in the respective registers */ | ||
| 104 | ld r5, VCPU_CTR(r4) /* r5 = vcpu->arch.ctr */ | ||
| 105 | /* will be swapped in by rmcall */ | ||
| 106 | |||
| 107 | ld r3, VCPU_LR(r4) /* r3 = vcpu->arch.lr */ | ||
| 108 | mtlr r3 /* LR = r3 */ | ||
| 109 | 123 | ||
| 110 | DISABLE_INTERRUPTS | 124 | DISABLE_INTERRUPTS |
| 111 | 125 | ||
| 126 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
| 112 | /* Some guests may need to have dcbz set to 32 byte length. | 127 | /* Some guests may need to have dcbz set to 32 byte length. |
| 113 | * | 128 | * |
| 114 | * Usually we ensure that by patching the guest's instructions | 129 | * Usually we ensure that by patching the guest's instructions |
| @@ -118,7 +133,7 @@ kvm_start_lightweight: | |||
| 118 | * because that's a lot faster. | 133 | * because that's a lot faster. |
| 119 | */ | 134 | */ |
| 120 | 135 | ||
| 121 | ld r3, VCPU_HFLAGS(r4) | 136 | PPC_LL r3, VCPU_HFLAGS(r4) |
| 122 | rldicl. r3, r3, 0, 63 /* CR = ((r3 & 1) == 0) */ | 137 | rldicl. r3, r3, 0, 63 /* CR = ((r3 & 1) == 0) */ |
| 123 | beq no_dcbz32_on | 138 | beq no_dcbz32_on |
| 124 | 139 | ||
| @@ -128,13 +143,15 @@ kvm_start_lightweight: | |||
| 128 | 143 | ||
| 129 | no_dcbz32_on: | 144 | no_dcbz32_on: |
| 130 | 145 | ||
| 131 | ld r6, VCPU_RMCALL(r4) | 146 | #endif /* CONFIG_PPC_BOOK3S_64 */ |
| 147 | |||
| 148 | PPC_LL r6, VCPU_RMCALL(r4) | ||
| 132 | mtctr r6 | 149 | mtctr r6 |
| 133 | 150 | ||
| 134 | ld r3, VCPU_TRAMPOLINE_ENTER(r4) | 151 | PPC_LL r3, VCPU_TRAMPOLINE_ENTER(r4) |
| 135 | LOAD_REG_IMMEDIATE(r4, MSR_KERNEL & ~(MSR_IR | MSR_DR)) | 152 | LOAD_REG_IMMEDIATE(r4, MSR_KERNEL & ~(MSR_IR | MSR_DR)) |
| 136 | 153 | ||
| 137 | /* Jump to SLB patching handlder and into our guest */ | 154 | /* Jump to segment patching handler and into our guest */ |
| 138 | bctr | 155 | bctr |
| 139 | 156 | ||
| 140 | /* | 157 | /* |
| @@ -149,31 +166,20 @@ kvmppc_handler_highmem: | |||
| 149 | /* | 166 | /* |
| 150 | * Register usage at this point: | 167 | * Register usage at this point: |
| 151 | * | 168 | * |
| 152 | * R0 = guest last inst | 169 | * R1 = host R1 |
| 153 | * R1 = host R1 | 170 | * R2 = host R2 |
| 154 | * R2 = host R2 | 171 | * R12 = exit handler id |
| 155 | * R3 = guest PC | 172 | * R13 = PACA |
| 156 | * R4 = guest MSR | 173 | * SVCPU.* = guest * |
| 157 | * R5 = guest DAR | ||
| 158 | * R6 = guest DSISR | ||
| 159 | * R13 = PACA | ||
| 160 | * PACA.KVM.* = guest * | ||
| 161 | * | 174 | * |
| 162 | */ | 175 | */ |
| 163 | 176 | ||
| 164 | /* R7 = vcpu */ | 177 | /* R7 = vcpu */ |
| 165 | ld r7, GPR4(r1) | 178 | PPC_LL r7, GPR4(r1) |
| 166 | 179 | ||
| 167 | /* Now save the guest state */ | 180 | #ifdef CONFIG_PPC_BOOK3S_64 |
| 168 | 181 | ||
| 169 | stw r0, VCPU_LAST_INST(r7) | 182 | PPC_LL r5, VCPU_HFLAGS(r7) |
| 170 | |||
| 171 | std r3, VCPU_PC(r7) | ||
| 172 | std r4, VCPU_SHADOW_SRR1(r7) | ||
| 173 | std r5, VCPU_FAULT_DEAR(r7) | ||
| 174 | std r6, VCPU_FAULT_DSISR(r7) | ||
| 175 | |||
| 176 | ld r5, VCPU_HFLAGS(r7) | ||
| 177 | rldicl. r5, r5, 0, 63 /* CR = ((r5 & 1) == 0) */ | 183 | rldicl. r5, r5, 0, 63 /* CR = ((r5 & 1) == 0) */ |
| 178 | beq no_dcbz32_off | 184 | beq no_dcbz32_off |
| 179 | 185 | ||
| @@ -184,35 +190,29 @@ kvmppc_handler_highmem: | |||
| 184 | 190 | ||
| 185 | no_dcbz32_off: | 191 | no_dcbz32_off: |
| 186 | 192 | ||
| 187 | std r14, VCPU_GPR(r14)(r7) | 193 | #endif /* CONFIG_PPC_BOOK3S_64 */ |
| 188 | std r15, VCPU_GPR(r15)(r7) | 194 | |
| 189 | std r16, VCPU_GPR(r16)(r7) | 195 | PPC_STL r14, VCPU_GPR(r14)(r7) |
| 190 | std r17, VCPU_GPR(r17)(r7) | 196 | PPC_STL r15, VCPU_GPR(r15)(r7) |
| 191 | std r18, VCPU_GPR(r18)(r7) | 197 | PPC_STL r16, VCPU_GPR(r16)(r7) |
| 192 | std r19, VCPU_GPR(r19)(r7) | 198 | PPC_STL r17, VCPU_GPR(r17)(r7) |
| 193 | std r20, VCPU_GPR(r20)(r7) | 199 | PPC_STL r18, VCPU_GPR(r18)(r7) |
| 194 | std r21, VCPU_GPR(r21)(r7) | 200 | PPC_STL r19, VCPU_GPR(r19)(r7) |
| 195 | std r22, VCPU_GPR(r22)(r7) | 201 | PPC_STL r20, VCPU_GPR(r20)(r7) |
| 196 | std r23, VCPU_GPR(r23)(r7) | 202 | PPC_STL r21, VCPU_GPR(r21)(r7) |
| 197 | std r24, VCPU_GPR(r24)(r7) | 203 | PPC_STL r22, VCPU_GPR(r22)(r7) |
| 198 | std r25, VCPU_GPR(r25)(r7) | 204 | PPC_STL r23, VCPU_GPR(r23)(r7) |
| 199 | std r26, VCPU_GPR(r26)(r7) | 205 | PPC_STL r24, VCPU_GPR(r24)(r7) |
| 200 | std r27, VCPU_GPR(r27)(r7) | 206 | PPC_STL r25, VCPU_GPR(r25)(r7) |
| 201 | std r28, VCPU_GPR(r28)(r7) | 207 | PPC_STL r26, VCPU_GPR(r26)(r7) |
| 202 | std r29, VCPU_GPR(r29)(r7) | 208 | PPC_STL r27, VCPU_GPR(r27)(r7) |
| 203 | std r30, VCPU_GPR(r30)(r7) | 209 | PPC_STL r28, VCPU_GPR(r28)(r7) |
| 204 | std r31, VCPU_GPR(r31)(r7) | 210 | PPC_STL r29, VCPU_GPR(r29)(r7) |
| 205 | 211 | PPC_STL r30, VCPU_GPR(r30)(r7) | |
| 206 | /* Save guest CTR */ | 212 | PPC_STL r31, VCPU_GPR(r31)(r7) |
| 207 | mfctr r5 | ||
| 208 | std r5, VCPU_CTR(r7) | ||
| 209 | |||
| 210 | /* Save guest LR */ | ||
| 211 | mflr r5 | ||
| 212 | std r5, VCPU_LR(r7) | ||
| 213 | 213 | ||
| 214 | /* Restore host msr -> SRR1 */ | 214 | /* Restore host msr -> SRR1 */ |
| 215 | ld r6, VCPU_HOST_MSR(r7) | 215 | PPC_LL r6, VCPU_HOST_MSR(r7) |
| 216 | 216 | ||
| 217 | /* | 217 | /* |
| 218 | * For some interrupts, we need to call the real Linux | 218 | * For some interrupts, we need to call the real Linux |
| @@ -228,9 +228,12 @@ no_dcbz32_off: | |||
| 228 | beq call_linux_handler | 228 | beq call_linux_handler |
| 229 | cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER | 229 | cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER |
| 230 | beq call_linux_handler | 230 | beq call_linux_handler |
| 231 | cmpwi r12, BOOK3S_INTERRUPT_PERFMON | ||
| 232 | beq call_linux_handler | ||
| 231 | 233 | ||
| 232 | /* Back to EE=1 */ | 234 | /* Back to EE=1 */ |
| 233 | mtmsr r6 | 235 | mtmsr r6 |
| 236 | sync | ||
| 234 | b kvm_return_point | 237 | b kvm_return_point |
| 235 | 238 | ||
| 236 | call_linux_handler: | 239 | call_linux_handler: |
| @@ -249,14 +252,14 @@ call_linux_handler: | |||
| 249 | */ | 252 | */ |
| 250 | 253 | ||
| 251 | /* Restore host IP -> SRR0 */ | 254 | /* Restore host IP -> SRR0 */ |
| 252 | ld r5, VCPU_HOST_RETIP(r7) | 255 | PPC_LL r5, VCPU_HOST_RETIP(r7) |
| 253 | 256 | ||
| 254 | /* XXX Better move to a safe function? | 257 | /* XXX Better move to a safe function? |
| 255 | * What if we get an HTAB flush in between mtsrr0 and mtsrr1? */ | 258 | * What if we get an HTAB flush in between mtsrr0 and mtsrr1? */ |
| 256 | 259 | ||
| 257 | mtlr r12 | 260 | mtlr r12 |
| 258 | 261 | ||
| 259 | ld r4, VCPU_TRAMPOLINE_LOWMEM(r7) | 262 | PPC_LL r4, VCPU_TRAMPOLINE_LOWMEM(r7) |
| 260 | mtsrr0 r4 | 263 | mtsrr0 r4 |
| 261 | LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR)) | 264 | LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR)) |
| 262 | mtsrr1 r3 | 265 | mtsrr1 r3 |
| @@ -274,7 +277,7 @@ kvm_return_point: | |||
| 274 | 277 | ||
| 275 | /* Restore r3 (kvm_run) and r4 (vcpu) */ | 278 | /* Restore r3 (kvm_run) and r4 (vcpu) */ |
| 276 | REST_2GPRS(3, r1) | 279 | REST_2GPRS(3, r1) |
| 277 | bl KVMPPC_HANDLE_EXIT | 280 | bl FUNC(kvmppc_handle_exit) |
| 278 | 281 | ||
| 279 | /* If RESUME_GUEST, get back in the loop */ | 282 | /* If RESUME_GUEST, get back in the loop */ |
| 280 | cmpwi r3, RESUME_GUEST | 283 | cmpwi r3, RESUME_GUEST |
| @@ -285,7 +288,7 @@ kvm_return_point: | |||
| 285 | 288 | ||
| 286 | kvm_exit_loop: | 289 | kvm_exit_loop: |
| 287 | 290 | ||
| 288 | ld r4, _LINK(r1) | 291 | PPC_LL r4, _LINK(r1) |
| 289 | mtlr r4 | 292 | mtlr r4 |
| 290 | 293 | ||
| 291 | /* Restore non-volatile host registers (r14 - r31) */ | 294 | /* Restore non-volatile host registers (r14 - r31) */ |
| @@ -296,8 +299,8 @@ kvm_exit_loop: | |||
| 296 | 299 | ||
| 297 | kvm_loop_heavyweight: | 300 | kvm_loop_heavyweight: |
| 298 | 301 | ||
| 299 | ld r4, _LINK(r1) | 302 | PPC_LL r4, _LINK(r1) |
| 300 | std r4, (16 + SWITCH_FRAME_SIZE)(r1) | 303 | PPC_STL r4, (PPC_LR_STKOFF + SWITCH_FRAME_SIZE)(r1) |
| 301 | 304 | ||
| 302 | /* Load vcpu and cpu_run */ | 305 | /* Load vcpu and cpu_run */ |
| 303 | REST_2GPRS(3, r1) | 306 | REST_2GPRS(3, r1) |
| @@ -315,4 +318,3 @@ kvm_loop_lightweight: | |||
| 315 | 318 | ||
| 316 | /* Jump back into the beginning of this function */ | 319 | /* Jump back into the beginning of this function */ |
| 317 | b kvm_start_lightweight | 320 | b kvm_start_lightweight |
| 318 | |||
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c new file mode 100644 index 000000000000..a9f66abafcb3 --- /dev/null +++ b/arch/powerpc/kvm/book3s_paired_singles.c | |||
| @@ -0,0 +1,1289 @@ | |||
| 1 | /* | ||
| 2 | * This program is free software; you can redistribute it and/or modify | ||
| 3 | * it under the terms of the GNU General Public License, version 2, as | ||
| 4 | * published by the Free Software Foundation. | ||
| 5 | * | ||
| 6 | * This program is distributed in the hope that it will be useful, | ||
| 7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 9 | * GNU General Public License for more details. | ||
| 10 | * | ||
| 11 | * You should have received a copy of the GNU General Public License | ||
| 12 | * along with this program; if not, write to the Free Software | ||
| 13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
| 14 | * | ||
| 15 | * Copyright Novell Inc 2010 | ||
| 16 | * | ||
| 17 | * Authors: Alexander Graf <agraf@suse.de> | ||
| 18 | */ | ||
| 19 | |||
| 20 | #include <asm/kvm.h> | ||
| 21 | #include <asm/kvm_ppc.h> | ||
| 22 | #include <asm/disassemble.h> | ||
| 23 | #include <asm/kvm_book3s.h> | ||
| 24 | #include <asm/kvm_fpu.h> | ||
| 25 | #include <asm/reg.h> | ||
| 26 | #include <asm/cacheflush.h> | ||
| 27 | #include <linux/vmalloc.h> | ||
| 28 | |||
| 29 | /* #define DEBUG */ | ||
| 30 | |||
| 31 | #ifdef DEBUG | ||
| 32 | #define dprintk printk | ||
| 33 | #else | ||
| 34 | #define dprintk(...) do { } while(0); | ||
| 35 | #endif | ||
| 36 | |||
| 37 | #define OP_LFS 48 | ||
| 38 | #define OP_LFSU 49 | ||
| 39 | #define OP_LFD 50 | ||
| 40 | #define OP_LFDU 51 | ||
| 41 | #define OP_STFS 52 | ||
| 42 | #define OP_STFSU 53 | ||
| 43 | #define OP_STFD 54 | ||
| 44 | #define OP_STFDU 55 | ||
| 45 | #define OP_PSQ_L 56 | ||
| 46 | #define OP_PSQ_LU 57 | ||
| 47 | #define OP_PSQ_ST 60 | ||
| 48 | #define OP_PSQ_STU 61 | ||
| 49 | |||
| 50 | #define OP_31_LFSX 535 | ||
| 51 | #define OP_31_LFSUX 567 | ||
| 52 | #define OP_31_LFDX 599 | ||
| 53 | #define OP_31_LFDUX 631 | ||
| 54 | #define OP_31_STFSX 663 | ||
| 55 | #define OP_31_STFSUX 695 | ||
| 56 | #define OP_31_STFX 727 | ||
| 57 | #define OP_31_STFUX 759 | ||
| 58 | #define OP_31_LWIZX 887 | ||
| 59 | #define OP_31_STFIWX 983 | ||
| 60 | |||
| 61 | #define OP_59_FADDS 21 | ||
| 62 | #define OP_59_FSUBS 20 | ||
| 63 | #define OP_59_FSQRTS 22 | ||
| 64 | #define OP_59_FDIVS 18 | ||
| 65 | #define OP_59_FRES 24 | ||
| 66 | #define OP_59_FMULS 25 | ||
| 67 | #define OP_59_FRSQRTES 26 | ||
| 68 | #define OP_59_FMSUBS 28 | ||
| 69 | #define OP_59_FMADDS 29 | ||
| 70 | #define OP_59_FNMSUBS 30 | ||
| 71 | #define OP_59_FNMADDS 31 | ||
| 72 | |||
| 73 | #define OP_63_FCMPU 0 | ||
| 74 | #define OP_63_FCPSGN 8 | ||
| 75 | #define OP_63_FRSP 12 | ||
| 76 | #define OP_63_FCTIW 14 | ||
| 77 | #define OP_63_FCTIWZ 15 | ||
| 78 | #define OP_63_FDIV 18 | ||
| 79 | #define OP_63_FADD 21 | ||
| 80 | #define OP_63_FSQRT 22 | ||
| 81 | #define OP_63_FSEL 23 | ||
| 82 | #define OP_63_FRE 24 | ||
| 83 | #define OP_63_FMUL 25 | ||
| 84 | #define OP_63_FRSQRTE 26 | ||
| 85 | #define OP_63_FMSUB 28 | ||
| 86 | #define OP_63_FMADD 29 | ||
| 87 | #define OP_63_FNMSUB 30 | ||
| 88 | #define OP_63_FNMADD 31 | ||
| 89 | #define OP_63_FCMPO 32 | ||
| 90 | #define OP_63_MTFSB1 38 // XXX | ||
| 91 | #define OP_63_FSUB 20 | ||
| 92 | #define OP_63_FNEG 40 | ||
| 93 | #define OP_63_MCRFS 64 | ||
| 94 | #define OP_63_MTFSB0 70 | ||
| 95 | #define OP_63_FMR 72 | ||
| 96 | #define OP_63_MTFSFI 134 | ||
| 97 | #define OP_63_FABS 264 | ||
| 98 | #define OP_63_MFFS 583 | ||
| 99 | #define OP_63_MTFSF 711 | ||
| 100 | |||
| 101 | #define OP_4X_PS_CMPU0 0 | ||
| 102 | #define OP_4X_PSQ_LX 6 | ||
| 103 | #define OP_4XW_PSQ_STX 7 | ||
| 104 | #define OP_4A_PS_SUM0 10 | ||
| 105 | #define OP_4A_PS_SUM1 11 | ||
| 106 | #define OP_4A_PS_MULS0 12 | ||
| 107 | #define OP_4A_PS_MULS1 13 | ||
| 108 | #define OP_4A_PS_MADDS0 14 | ||
| 109 | #define OP_4A_PS_MADDS1 15 | ||
| 110 | #define OP_4A_PS_DIV 18 | ||
| 111 | #define OP_4A_PS_SUB 20 | ||
| 112 | #define OP_4A_PS_ADD 21 | ||
| 113 | #define OP_4A_PS_SEL 23 | ||
| 114 | #define OP_4A_PS_RES 24 | ||
| 115 | #define OP_4A_PS_MUL 25 | ||
| 116 | #define OP_4A_PS_RSQRTE 26 | ||
| 117 | #define OP_4A_PS_MSUB 28 | ||
| 118 | #define OP_4A_PS_MADD 29 | ||
| 119 | #define OP_4A_PS_NMSUB 30 | ||
| 120 | #define OP_4A_PS_NMADD 31 | ||
| 121 | #define OP_4X_PS_CMPO0 32 | ||
| 122 | #define OP_4X_PSQ_LUX 38 | ||
| 123 | #define OP_4XW_PSQ_STUX 39 | ||
| 124 | #define OP_4X_PS_NEG 40 | ||
| 125 | #define OP_4X_PS_CMPU1 64 | ||
| 126 | #define OP_4X_PS_MR 72 | ||
| 127 | #define OP_4X_PS_CMPO1 96 | ||
| 128 | #define OP_4X_PS_NABS 136 | ||
| 129 | #define OP_4X_PS_ABS 264 | ||
| 130 | #define OP_4X_PS_MERGE00 528 | ||
| 131 | #define OP_4X_PS_MERGE01 560 | ||
| 132 | #define OP_4X_PS_MERGE10 592 | ||
| 133 | #define OP_4X_PS_MERGE11 624 | ||
| 134 | |||
| 135 | #define SCALAR_NONE 0 | ||
| 136 | #define SCALAR_HIGH (1 << 0) | ||
| 137 | #define SCALAR_LOW (1 << 1) | ||
| 138 | #define SCALAR_NO_PS0 (1 << 2) | ||
| 139 | #define SCALAR_NO_PS1 (1 << 3) | ||
| 140 | |||
| 141 | #define GQR_ST_TYPE_MASK 0x00000007 | ||
| 142 | #define GQR_ST_TYPE_SHIFT 0 | ||
| 143 | #define GQR_ST_SCALE_MASK 0x00003f00 | ||
| 144 | #define GQR_ST_SCALE_SHIFT 8 | ||
| 145 | #define GQR_LD_TYPE_MASK 0x00070000 | ||
| 146 | #define GQR_LD_TYPE_SHIFT 16 | ||
| 147 | #define GQR_LD_SCALE_MASK 0x3f000000 | ||
| 148 | #define GQR_LD_SCALE_SHIFT 24 | ||
| 149 | |||
| 150 | #define GQR_QUANTIZE_FLOAT 0 | ||
| 151 | #define GQR_QUANTIZE_U8 4 | ||
| 152 | #define GQR_QUANTIZE_U16 5 | ||
| 153 | #define GQR_QUANTIZE_S8 6 | ||
| 154 | #define GQR_QUANTIZE_S16 7 | ||
| 155 | |||
| 156 | #define FPU_LS_SINGLE 0 | ||
| 157 | #define FPU_LS_DOUBLE 1 | ||
| 158 | #define FPU_LS_SINGLE_LOW 2 | ||
| 159 | |||
| 160 | static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt) | ||
| 161 | { | ||
| 162 | struct thread_struct t; | ||
| 163 | |||
| 164 | t.fpscr.val = vcpu->arch.fpscr; | ||
| 165 | cvt_df((double*)&vcpu->arch.fpr[rt], (float*)&vcpu->arch.qpr[rt], &t); | ||
| 166 | } | ||
| 167 | |||
| 168 | static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store) | ||
| 169 | { | ||
| 170 | u64 dsisr; | ||
| 171 | |||
| 172 | vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 36, 0); | ||
| 173 | vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0); | ||
| 174 | vcpu->arch.dear = eaddr; | ||
| 175 | /* Page Fault */ | ||
| 176 | dsisr = kvmppc_set_field(0, 33, 33, 1); | ||
| 177 | if (is_store) | ||
| 178 | to_book3s(vcpu)->dsisr = kvmppc_set_field(dsisr, 38, 38, 1); | ||
| 179 | kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); | ||
| 180 | } | ||
| 181 | |||
| 182 | static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
| 183 | int rs, ulong addr, int ls_type) | ||
| 184 | { | ||
| 185 | int emulated = EMULATE_FAIL; | ||
| 186 | struct thread_struct t; | ||
| 187 | int r; | ||
| 188 | char tmp[8]; | ||
| 189 | int len = sizeof(u32); | ||
| 190 | |||
| 191 | if (ls_type == FPU_LS_DOUBLE) | ||
| 192 | len = sizeof(u64); | ||
| 193 | |||
| 194 | t.fpscr.val = vcpu->arch.fpscr; | ||
| 195 | |||
| 196 | /* read from memory */ | ||
| 197 | r = kvmppc_ld(vcpu, &addr, len, tmp, true); | ||
| 198 | vcpu->arch.paddr_accessed = addr; | ||
| 199 | |||
| 200 | if (r < 0) { | ||
| 201 | kvmppc_inject_pf(vcpu, addr, false); | ||
| 202 | goto done_load; | ||
| 203 | } else if (r == EMULATE_DO_MMIO) { | ||
| 204 | emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, len, 1); | ||
| 205 | goto done_load; | ||
| 206 | } | ||
| 207 | |||
| 208 | emulated = EMULATE_DONE; | ||
| 209 | |||
| 210 | /* put in registers */ | ||
| 211 | switch (ls_type) { | ||
| 212 | case FPU_LS_SINGLE: | ||
| 213 | cvt_fd((float*)tmp, (double*)&vcpu->arch.fpr[rs], &t); | ||
| 214 | vcpu->arch.qpr[rs] = *((u32*)tmp); | ||
| 215 | break; | ||
| 216 | case FPU_LS_DOUBLE: | ||
| 217 | vcpu->arch.fpr[rs] = *((u64*)tmp); | ||
| 218 | break; | ||
| 219 | } | ||
| 220 | |||
| 221 | dprintk(KERN_INFO "KVM: FPR_LD [0x%llx] at 0x%lx (%d)\n", *(u64*)tmp, | ||
| 222 | addr, len); | ||
| 223 | |||
| 224 | done_load: | ||
| 225 | return emulated; | ||
| 226 | } | ||
| 227 | |||
| 228 | static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
| 229 | int rs, ulong addr, int ls_type) | ||
| 230 | { | ||
| 231 | int emulated = EMULATE_FAIL; | ||
| 232 | struct thread_struct t; | ||
| 233 | int r; | ||
| 234 | char tmp[8]; | ||
| 235 | u64 val; | ||
| 236 | int len; | ||
| 237 | |||
| 238 | t.fpscr.val = vcpu->arch.fpscr; | ||
| 239 | |||
| 240 | switch (ls_type) { | ||
| 241 | case FPU_LS_SINGLE: | ||
| 242 | cvt_df((double*)&vcpu->arch.fpr[rs], (float*)tmp, &t); | ||
| 243 | val = *((u32*)tmp); | ||
| 244 | len = sizeof(u32); | ||
| 245 | break; | ||
| 246 | case FPU_LS_SINGLE_LOW: | ||
| 247 | *((u32*)tmp) = vcpu->arch.fpr[rs]; | ||
| 248 | val = vcpu->arch.fpr[rs] & 0xffffffff; | ||
| 249 | len = sizeof(u32); | ||
| 250 | break; | ||
| 251 | case FPU_LS_DOUBLE: | ||
| 252 | *((u64*)tmp) = vcpu->arch.fpr[rs]; | ||
| 253 | val = vcpu->arch.fpr[rs]; | ||
| 254 | len = sizeof(u64); | ||
| 255 | break; | ||
| 256 | default: | ||
| 257 | val = 0; | ||
| 258 | len = 0; | ||
| 259 | } | ||
| 260 | |||
| 261 | r = kvmppc_st(vcpu, &addr, len, tmp, true); | ||
| 262 | vcpu->arch.paddr_accessed = addr; | ||
| 263 | if (r < 0) { | ||
| 264 | kvmppc_inject_pf(vcpu, addr, true); | ||
| 265 | } else if (r == EMULATE_DO_MMIO) { | ||
| 266 | emulated = kvmppc_handle_store(run, vcpu, val, len, 1); | ||
| 267 | } else { | ||
| 268 | emulated = EMULATE_DONE; | ||
| 269 | } | ||
| 270 | |||
| 271 | dprintk(KERN_INFO "KVM: FPR_ST [0x%llx] at 0x%lx (%d)\n", | ||
| 272 | val, addr, len); | ||
| 273 | |||
| 274 | return emulated; | ||
| 275 | } | ||
| 276 | |||
| 277 | static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
| 278 | int rs, ulong addr, bool w, int i) | ||
| 279 | { | ||
| 280 | int emulated = EMULATE_FAIL; | ||
| 281 | struct thread_struct t; | ||
| 282 | int r; | ||
| 283 | float one = 1.0; | ||
| 284 | u32 tmp[2]; | ||
| 285 | |||
| 286 | t.fpscr.val = vcpu->arch.fpscr; | ||
| 287 | |||
| 288 | /* read from memory */ | ||
| 289 | if (w) { | ||
| 290 | r = kvmppc_ld(vcpu, &addr, sizeof(u32), tmp, true); | ||
| 291 | memcpy(&tmp[1], &one, sizeof(u32)); | ||
| 292 | } else { | ||
| 293 | r = kvmppc_ld(vcpu, &addr, sizeof(u32) * 2, tmp, true); | ||
| 294 | } | ||
| 295 | vcpu->arch.paddr_accessed = addr; | ||
| 296 | if (r < 0) { | ||
| 297 | kvmppc_inject_pf(vcpu, addr, false); | ||
| 298 | goto done_load; | ||
| 299 | } else if ((r == EMULATE_DO_MMIO) && w) { | ||
| 300 | emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, 4, 1); | ||
| 301 | vcpu->arch.qpr[rs] = tmp[1]; | ||
| 302 | goto done_load; | ||
| 303 | } else if (r == EMULATE_DO_MMIO) { | ||
| 304 | emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FQPR | rs, 8, 1); | ||
| 305 | goto done_load; | ||
| 306 | } | ||
| 307 | |||
| 308 | emulated = EMULATE_DONE; | ||
| 309 | |||
| 310 | /* put in registers */ | ||
| 311 | cvt_fd((float*)&tmp[0], (double*)&vcpu->arch.fpr[rs], &t); | ||
| 312 | vcpu->arch.qpr[rs] = tmp[1]; | ||
| 313 | |||
| 314 | dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0], | ||
| 315 | tmp[1], addr, w ? 4 : 8); | ||
| 316 | |||
| 317 | done_load: | ||
| 318 | return emulated; | ||
| 319 | } | ||
| 320 | |||
| 321 | static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
| 322 | int rs, ulong addr, bool w, int i) | ||
| 323 | { | ||
| 324 | int emulated = EMULATE_FAIL; | ||
| 325 | struct thread_struct t; | ||
| 326 | int r; | ||
| 327 | u32 tmp[2]; | ||
| 328 | int len = w ? sizeof(u32) : sizeof(u64); | ||
| 329 | |||
| 330 | t.fpscr.val = vcpu->arch.fpscr; | ||
| 331 | |||
| 332 | cvt_df((double*)&vcpu->arch.fpr[rs], (float*)&tmp[0], &t); | ||
| 333 | tmp[1] = vcpu->arch.qpr[rs]; | ||
| 334 | |||
| 335 | r = kvmppc_st(vcpu, &addr, len, tmp, true); | ||
| 336 | vcpu->arch.paddr_accessed = addr; | ||
| 337 | if (r < 0) { | ||
| 338 | kvmppc_inject_pf(vcpu, addr, true); | ||
| 339 | } else if ((r == EMULATE_DO_MMIO) && w) { | ||
| 340 | emulated = kvmppc_handle_store(run, vcpu, tmp[0], 4, 1); | ||
| 341 | } else if (r == EMULATE_DO_MMIO) { | ||
| 342 | u64 val = ((u64)tmp[0] << 32) | tmp[1]; | ||
| 343 | emulated = kvmppc_handle_store(run, vcpu, val, 8, 1); | ||
| 344 | } else { | ||
| 345 | emulated = EMULATE_DONE; | ||
| 346 | } | ||
| 347 | |||
| 348 | dprintk(KERN_INFO "KVM: PSQ_ST [0x%x, 0x%x] at 0x%lx (%d)\n", | ||
| 349 | tmp[0], tmp[1], addr, len); | ||
| 350 | |||
| 351 | return emulated; | ||
| 352 | } | ||
| 353 | |||
| 354 | /* | ||
| 355 | * Cuts out inst bits with ordering according to spec. | ||
| 356 | * That means the leftmost bit is zero. All given bits are included. | ||
| 357 | */ | ||
| 358 | static inline u32 inst_get_field(u32 inst, int msb, int lsb) | ||
| 359 | { | ||
| 360 | return kvmppc_get_field(inst, msb + 32, lsb + 32); | ||
| 361 | } | ||
| 362 | |||
| 363 | /* | ||
| 364 | * Replaces inst bits with ordering according to spec. | ||
| 365 | */ | ||
| 366 | static inline u32 inst_set_field(u32 inst, int msb, int lsb, int value) | ||
| 367 | { | ||
| 368 | return kvmppc_set_field(inst, msb + 32, lsb + 32, value); | ||
| 369 | } | ||
| 370 | |||
| 371 | bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst) | ||
| 372 | { | ||
| 373 | if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) | ||
| 374 | return false; | ||
| 375 | |||
| 376 | switch (get_op(inst)) { | ||
| 377 | case OP_PSQ_L: | ||
| 378 | case OP_PSQ_LU: | ||
| 379 | case OP_PSQ_ST: | ||
| 380 | case OP_PSQ_STU: | ||
| 381 | case OP_LFS: | ||
| 382 | case OP_LFSU: | ||
| 383 | case OP_LFD: | ||
| 384 | case OP_LFDU: | ||
| 385 | case OP_STFS: | ||
| 386 | case OP_STFSU: | ||
| 387 | case OP_STFD: | ||
| 388 | case OP_STFDU: | ||
| 389 | return true; | ||
| 390 | case 4: | ||
| 391 | /* X form */ | ||
| 392 | switch (inst_get_field(inst, 21, 30)) { | ||
| 393 | case OP_4X_PS_CMPU0: | ||
| 394 | case OP_4X_PSQ_LX: | ||
| 395 | case OP_4X_PS_CMPO0: | ||
| 396 | case OP_4X_PSQ_LUX: | ||
| 397 | case OP_4X_PS_NEG: | ||
| 398 | case OP_4X_PS_CMPU1: | ||
| 399 | case OP_4X_PS_MR: | ||
| 400 | case OP_4X_PS_CMPO1: | ||
| 401 | case OP_4X_PS_NABS: | ||
| 402 | case OP_4X_PS_ABS: | ||
| 403 | case OP_4X_PS_MERGE00: | ||
| 404 | case OP_4X_PS_MERGE01: | ||
| 405 | case OP_4X_PS_MERGE10: | ||
| 406 | case OP_4X_PS_MERGE11: | ||
| 407 | return true; | ||
| 408 | } | ||
| 409 | /* XW form */ | ||
| 410 | switch (inst_get_field(inst, 25, 30)) { | ||
| 411 | case OP_4XW_PSQ_STX: | ||
| 412 | case OP_4XW_PSQ_STUX: | ||
| 413 | return true; | ||
| 414 | } | ||
| 415 | /* A form */ | ||
| 416 | switch (inst_get_field(inst, 26, 30)) { | ||
| 417 | case OP_4A_PS_SUM1: | ||
| 418 | case OP_4A_PS_SUM0: | ||
| 419 | case OP_4A_PS_MULS0: | ||
| 420 | case OP_4A_PS_MULS1: | ||
| 421 | case OP_4A_PS_MADDS0: | ||
| 422 | case OP_4A_PS_MADDS1: | ||
| 423 | case OP_4A_PS_DIV: | ||
| 424 | case OP_4A_PS_SUB: | ||
| 425 | case OP_4A_PS_ADD: | ||
| 426 | case OP_4A_PS_SEL: | ||
| 427 | case OP_4A_PS_RES: | ||
| 428 | case OP_4A_PS_MUL: | ||
| 429 | case OP_4A_PS_RSQRTE: | ||
| 430 | case OP_4A_PS_MSUB: | ||
| 431 | case OP_4A_PS_MADD: | ||
| 432 | case OP_4A_PS_NMSUB: | ||
| 433 | case OP_4A_PS_NMADD: | ||
| 434 | return true; | ||
| 435 | } | ||
| 436 | break; | ||
| 437 | case 59: | ||
| 438 | switch (inst_get_field(inst, 21, 30)) { | ||
| 439 | case OP_59_FADDS: | ||
| 440 | case OP_59_FSUBS: | ||
| 441 | case OP_59_FDIVS: | ||
| 442 | case OP_59_FRES: | ||
| 443 | case OP_59_FRSQRTES: | ||
| 444 | return true; | ||
| 445 | } | ||
| 446 | switch (inst_get_field(inst, 26, 30)) { | ||
| 447 | case OP_59_FMULS: | ||
| 448 | case OP_59_FMSUBS: | ||
| 449 | case OP_59_FMADDS: | ||
| 450 | case OP_59_FNMSUBS: | ||
| 451 | case OP_59_FNMADDS: | ||
| 452 | return true; | ||
| 453 | } | ||
| 454 | break; | ||
| 455 | case 63: | ||
| 456 | switch (inst_get_field(inst, 21, 30)) { | ||
| 457 | case OP_63_MTFSB0: | ||
| 458 | case OP_63_MTFSB1: | ||
| 459 | case OP_63_MTFSF: | ||
| 460 | case OP_63_MTFSFI: | ||
| 461 | case OP_63_MCRFS: | ||
| 462 | case OP_63_MFFS: | ||
| 463 | case OP_63_FCMPU: | ||
| 464 | case OP_63_FCMPO: | ||
| 465 | case OP_63_FNEG: | ||
| 466 | case OP_63_FMR: | ||
| 467 | case OP_63_FABS: | ||
| 468 | case OP_63_FRSP: | ||
| 469 | case OP_63_FDIV: | ||
| 470 | case OP_63_FADD: | ||
| 471 | case OP_63_FSUB: | ||
| 472 | case OP_63_FCTIW: | ||
| 473 | case OP_63_FCTIWZ: | ||
| 474 | case OP_63_FRSQRTE: | ||
| 475 | case OP_63_FCPSGN: | ||
| 476 | return true; | ||
| 477 | } | ||
| 478 | switch (inst_get_field(inst, 26, 30)) { | ||
| 479 | case OP_63_FMUL: | ||
| 480 | case OP_63_FSEL: | ||
| 481 | case OP_63_FMSUB: | ||
| 482 | case OP_63_FMADD: | ||
| 483 | case OP_63_FNMSUB: | ||
| 484 | case OP_63_FNMADD: | ||
| 485 | return true; | ||
| 486 | } | ||
| 487 | break; | ||
| 488 | case 31: | ||
| 489 | switch (inst_get_field(inst, 21, 30)) { | ||
| 490 | case OP_31_LFSX: | ||
| 491 | case OP_31_LFSUX: | ||
| 492 | case OP_31_LFDX: | ||
| 493 | case OP_31_LFDUX: | ||
| 494 | case OP_31_STFSX: | ||
| 495 | case OP_31_STFSUX: | ||
| 496 | case OP_31_STFX: | ||
| 497 | case OP_31_STFUX: | ||
| 498 | case OP_31_STFIWX: | ||
| 499 | return true; | ||
| 500 | } | ||
| 501 | break; | ||
| 502 | } | ||
| 503 | |||
| 504 | return false; | ||
| 505 | } | ||
| 506 | |||
| 507 | static int get_d_signext(u32 inst) | ||
| 508 | { | ||
| 509 | int d = inst & 0x8ff; | ||
| 510 | |||
| 511 | if (d & 0x800) | ||
| 512 | return -(d & 0x7ff); | ||
| 513 | |||
| 514 | return (d & 0x7ff); | ||
| 515 | } | ||
| 516 | |||
| 517 | static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, | ||
| 518 | int reg_out, int reg_in1, int reg_in2, | ||
| 519 | int reg_in3, int scalar, | ||
| 520 | void (*func)(struct thread_struct *t, | ||
| 521 | u32 *dst, u32 *src1, | ||
| 522 | u32 *src2, u32 *src3)) | ||
| 523 | { | ||
| 524 | u32 *qpr = vcpu->arch.qpr; | ||
| 525 | u64 *fpr = vcpu->arch.fpr; | ||
| 526 | u32 ps0_out; | ||
| 527 | u32 ps0_in1, ps0_in2, ps0_in3; | ||
| 528 | u32 ps1_in1, ps1_in2, ps1_in3; | ||
| 529 | struct thread_struct t; | ||
| 530 | t.fpscr.val = vcpu->arch.fpscr; | ||
| 531 | |||
| 532 | /* RC */ | ||
| 533 | WARN_ON(rc); | ||
| 534 | |||
| 535 | /* PS0 */ | ||
| 536 | cvt_df((double*)&fpr[reg_in1], (float*)&ps0_in1, &t); | ||
| 537 | cvt_df((double*)&fpr[reg_in2], (float*)&ps0_in2, &t); | ||
| 538 | cvt_df((double*)&fpr[reg_in3], (float*)&ps0_in3, &t); | ||
| 539 | |||
| 540 | if (scalar & SCALAR_LOW) | ||
| 541 | ps0_in2 = qpr[reg_in2]; | ||
| 542 | |||
| 543 | func(&t, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3); | ||
| 544 | |||
| 545 | dprintk(KERN_INFO "PS3 ps0 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", | ||
| 546 | ps0_in1, ps0_in2, ps0_in3, ps0_out); | ||
| 547 | |||
| 548 | if (!(scalar & SCALAR_NO_PS0)) | ||
| 549 | cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t); | ||
| 550 | |||
| 551 | /* PS1 */ | ||
| 552 | ps1_in1 = qpr[reg_in1]; | ||
| 553 | ps1_in2 = qpr[reg_in2]; | ||
| 554 | ps1_in3 = qpr[reg_in3]; | ||
| 555 | |||
| 556 | if (scalar & SCALAR_HIGH) | ||
| 557 | ps1_in2 = ps0_in2; | ||
| 558 | |||
| 559 | if (!(scalar & SCALAR_NO_PS1)) | ||
| 560 | func(&t, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3); | ||
| 561 | |||
| 562 | dprintk(KERN_INFO "PS3 ps1 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", | ||
| 563 | ps1_in1, ps1_in2, ps1_in3, qpr[reg_out]); | ||
| 564 | |||
| 565 | return EMULATE_DONE; | ||
| 566 | } | ||
| 567 | |||
| 568 | static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, | ||
| 569 | int reg_out, int reg_in1, int reg_in2, | ||
| 570 | int scalar, | ||
| 571 | void (*func)(struct thread_struct *t, | ||
| 572 | u32 *dst, u32 *src1, | ||
| 573 | u32 *src2)) | ||
| 574 | { | ||
| 575 | u32 *qpr = vcpu->arch.qpr; | ||
| 576 | u64 *fpr = vcpu->arch.fpr; | ||
| 577 | u32 ps0_out; | ||
| 578 | u32 ps0_in1, ps0_in2; | ||
| 579 | u32 ps1_out; | ||
| 580 | u32 ps1_in1, ps1_in2; | ||
| 581 | struct thread_struct t; | ||
| 582 | t.fpscr.val = vcpu->arch.fpscr; | ||
| 583 | |||
| 584 | /* RC */ | ||
| 585 | WARN_ON(rc); | ||
| 586 | |||
| 587 | /* PS0 */ | ||
| 588 | cvt_df((double*)&fpr[reg_in1], (float*)&ps0_in1, &t); | ||
| 589 | |||
| 590 | if (scalar & SCALAR_LOW) | ||
| 591 | ps0_in2 = qpr[reg_in2]; | ||
| 592 | else | ||
| 593 | cvt_df((double*)&fpr[reg_in2], (float*)&ps0_in2, &t); | ||
| 594 | |||
| 595 | func(&t, &ps0_out, &ps0_in1, &ps0_in2); | ||
| 596 | |||
| 597 | if (!(scalar & SCALAR_NO_PS0)) { | ||
| 598 | dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n", | ||
| 599 | ps0_in1, ps0_in2, ps0_out); | ||
| 600 | |||
| 601 | cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t); | ||
| 602 | } | ||
| 603 | |||
| 604 | /* PS1 */ | ||
| 605 | ps1_in1 = qpr[reg_in1]; | ||
| 606 | ps1_in2 = qpr[reg_in2]; | ||
| 607 | |||
| 608 | if (scalar & SCALAR_HIGH) | ||
| 609 | ps1_in2 = ps0_in2; | ||
| 610 | |||
| 611 | func(&t, &ps1_out, &ps1_in1, &ps1_in2); | ||
| 612 | |||
| 613 | if (!(scalar & SCALAR_NO_PS1)) { | ||
| 614 | qpr[reg_out] = ps1_out; | ||
| 615 | |||
| 616 | dprintk(KERN_INFO "PS2 ps1 -> f(0x%x, 0x%x) = 0x%x\n", | ||
| 617 | ps1_in1, ps1_in2, qpr[reg_out]); | ||
| 618 | } | ||
| 619 | |||
| 620 | return EMULATE_DONE; | ||
| 621 | } | ||
| 622 | |||
| 623 | static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc, | ||
| 624 | int reg_out, int reg_in, | ||
| 625 | void (*func)(struct thread_struct *t, | ||
| 626 | u32 *dst, u32 *src1)) | ||
| 627 | { | ||
| 628 | u32 *qpr = vcpu->arch.qpr; | ||
| 629 | u64 *fpr = vcpu->arch.fpr; | ||
| 630 | u32 ps0_out, ps0_in; | ||
| 631 | u32 ps1_in; | ||
| 632 | struct thread_struct t; | ||
| 633 | t.fpscr.val = vcpu->arch.fpscr; | ||
| 634 | |||
| 635 | /* RC */ | ||
| 636 | WARN_ON(rc); | ||
| 637 | |||
| 638 | /* PS0 */ | ||
| 639 | cvt_df((double*)&fpr[reg_in], (float*)&ps0_in, &t); | ||
| 640 | func(&t, &ps0_out, &ps0_in); | ||
| 641 | |||
| 642 | dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n", | ||
| 643 | ps0_in, ps0_out); | ||
| 644 | |||
| 645 | cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t); | ||
| 646 | |||
| 647 | /* PS1 */ | ||
| 648 | ps1_in = qpr[reg_in]; | ||
| 649 | func(&t, &qpr[reg_out], &ps1_in); | ||
| 650 | |||
| 651 | dprintk(KERN_INFO "PS1 ps1 -> f(0x%x) = 0x%x\n", | ||
| 652 | ps1_in, qpr[reg_out]); | ||
| 653 | |||
| 654 | return EMULATE_DONE; | ||
| 655 | } | ||
| 656 | |||
| 657 | int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) | ||
| 658 | { | ||
| 659 | u32 inst = kvmppc_get_last_inst(vcpu); | ||
| 660 | enum emulation_result emulated = EMULATE_DONE; | ||
| 661 | |||
| 662 | int ax_rd = inst_get_field(inst, 6, 10); | ||
| 663 | int ax_ra = inst_get_field(inst, 11, 15); | ||
| 664 | int ax_rb = inst_get_field(inst, 16, 20); | ||
| 665 | int ax_rc = inst_get_field(inst, 21, 25); | ||
| 666 | short full_d = inst_get_field(inst, 16, 31); | ||
| 667 | |||
| 668 | u64 *fpr_d = &vcpu->arch.fpr[ax_rd]; | ||
| 669 | u64 *fpr_a = &vcpu->arch.fpr[ax_ra]; | ||
| 670 | u64 *fpr_b = &vcpu->arch.fpr[ax_rb]; | ||
| 671 | u64 *fpr_c = &vcpu->arch.fpr[ax_rc]; | ||
| 672 | |||
| 673 | bool rcomp = (inst & 1) ? true : false; | ||
| 674 | u32 cr = kvmppc_get_cr(vcpu); | ||
| 675 | struct thread_struct t; | ||
| 676 | #ifdef DEBUG | ||
| 677 | int i; | ||
| 678 | #endif | ||
| 679 | |||
| 680 | t.fpscr.val = vcpu->arch.fpscr; | ||
| 681 | |||
| 682 | if (!kvmppc_inst_is_paired_single(vcpu, inst)) | ||
| 683 | return EMULATE_FAIL; | ||
| 684 | |||
| 685 | if (!(vcpu->arch.msr & MSR_FP)) { | ||
| 686 | kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL); | ||
| 687 | return EMULATE_AGAIN; | ||
| 688 | } | ||
| 689 | |||
| 690 | kvmppc_giveup_ext(vcpu, MSR_FP); | ||
| 691 | preempt_disable(); | ||
| 692 | enable_kernel_fp(); | ||
| 693 | /* Do we need to clear FE0 / FE1 here? Don't think so. */ | ||
| 694 | |||
| 695 | #ifdef DEBUG | ||
| 696 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { | ||
| 697 | u32 f; | ||
| 698 | cvt_df((double*)&vcpu->arch.fpr[i], (float*)&f, &t); | ||
| 699 | dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx QPR[%d] = 0x%x\n", | ||
| 700 | i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]); | ||
| 701 | } | ||
| 702 | #endif | ||
| 703 | |||
| 704 | switch (get_op(inst)) { | ||
| 705 | case OP_PSQ_L: | ||
| 706 | { | ||
| 707 | ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; | ||
| 708 | bool w = inst_get_field(inst, 16, 16) ? true : false; | ||
| 709 | int i = inst_get_field(inst, 17, 19); | ||
| 710 | |||
| 711 | addr += get_d_signext(inst); | ||
| 712 | emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); | ||
| 713 | break; | ||
| 714 | } | ||
| 715 | case OP_PSQ_LU: | ||
| 716 | { | ||
| 717 | ulong addr = kvmppc_get_gpr(vcpu, ax_ra); | ||
| 718 | bool w = inst_get_field(inst, 16, 16) ? true : false; | ||
| 719 | int i = inst_get_field(inst, 17, 19); | ||
| 720 | |||
| 721 | addr += get_d_signext(inst); | ||
| 722 | emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); | ||
| 723 | |||
| 724 | if (emulated == EMULATE_DONE) | ||
| 725 | kvmppc_set_gpr(vcpu, ax_ra, addr); | ||
| 726 | break; | ||
| 727 | } | ||
| 728 | case OP_PSQ_ST: | ||
| 729 | { | ||
| 730 | ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; | ||
| 731 | bool w = inst_get_field(inst, 16, 16) ? true : false; | ||
| 732 | int i = inst_get_field(inst, 17, 19); | ||
| 733 | |||
| 734 | addr += get_d_signext(inst); | ||
| 735 | emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); | ||
| 736 | break; | ||
| 737 | } | ||
| 738 | case OP_PSQ_STU: | ||
| 739 | { | ||
| 740 | ulong addr = kvmppc_get_gpr(vcpu, ax_ra); | ||
| 741 | bool w = inst_get_field(inst, 16, 16) ? true : false; | ||
| 742 | int i = inst_get_field(inst, 17, 19); | ||
| 743 | |||
| 744 | addr += get_d_signext(inst); | ||
| 745 | emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); | ||
| 746 | |||
| 747 | if (emulated == EMULATE_DONE) | ||
| 748 | kvmppc_set_gpr(vcpu, ax_ra, addr); | ||
| 749 | break; | ||
| 750 | } | ||
| 751 | case 4: | ||
| 752 | /* X form */ | ||
| 753 | switch (inst_get_field(inst, 21, 30)) { | ||
| 754 | case OP_4X_PS_CMPU0: | ||
| 755 | /* XXX */ | ||
| 756 | emulated = EMULATE_FAIL; | ||
| 757 | break; | ||
| 758 | case OP_4X_PSQ_LX: | ||
| 759 | { | ||
| 760 | ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; | ||
| 761 | bool w = inst_get_field(inst, 21, 21) ? true : false; | ||
| 762 | int i = inst_get_field(inst, 22, 24); | ||
| 763 | |||
| 764 | addr += kvmppc_get_gpr(vcpu, ax_rb); | ||
| 765 | emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); | ||
| 766 | break; | ||
| 767 | } | ||
| 768 | case OP_4X_PS_CMPO0: | ||
| 769 | /* XXX */ | ||
| 770 | emulated = EMULATE_FAIL; | ||
| 771 | break; | ||
| 772 | case OP_4X_PSQ_LUX: | ||
| 773 | { | ||
| 774 | ulong addr = kvmppc_get_gpr(vcpu, ax_ra); | ||
| 775 | bool w = inst_get_field(inst, 21, 21) ? true : false; | ||
| 776 | int i = inst_get_field(inst, 22, 24); | ||
| 777 | |||
| 778 | addr += kvmppc_get_gpr(vcpu, ax_rb); | ||
| 779 | emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); | ||
| 780 | |||
| 781 | if (emulated == EMULATE_DONE) | ||
| 782 | kvmppc_set_gpr(vcpu, ax_ra, addr); | ||
| 783 | break; | ||
| 784 | } | ||
| 785 | case OP_4X_PS_NEG: | ||
| 786 | vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; | ||
| 787 | vcpu->arch.fpr[ax_rd] ^= 0x8000000000000000ULL; | ||
| 788 | vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; | ||
| 789 | vcpu->arch.qpr[ax_rd] ^= 0x80000000; | ||
| 790 | break; | ||
| 791 | case OP_4X_PS_CMPU1: | ||
| 792 | /* XXX */ | ||
| 793 | emulated = EMULATE_FAIL; | ||
| 794 | break; | ||
| 795 | case OP_4X_PS_MR: | ||
| 796 | WARN_ON(rcomp); | ||
| 797 | vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; | ||
| 798 | vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; | ||
| 799 | break; | ||
| 800 | case OP_4X_PS_CMPO1: | ||
| 801 | /* XXX */ | ||
| 802 | emulated = EMULATE_FAIL; | ||
| 803 | break; | ||
| 804 | case OP_4X_PS_NABS: | ||
| 805 | WARN_ON(rcomp); | ||
| 806 | vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; | ||
| 807 | vcpu->arch.fpr[ax_rd] |= 0x8000000000000000ULL; | ||
| 808 | vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; | ||
| 809 | vcpu->arch.qpr[ax_rd] |= 0x80000000; | ||
| 810 | break; | ||
| 811 | case OP_4X_PS_ABS: | ||
| 812 | WARN_ON(rcomp); | ||
| 813 | vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; | ||
| 814 | vcpu->arch.fpr[ax_rd] &= ~0x8000000000000000ULL; | ||
| 815 | vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; | ||
| 816 | vcpu->arch.qpr[ax_rd] &= ~0x80000000; | ||
| 817 | break; | ||
| 818 | case OP_4X_PS_MERGE00: | ||
| 819 | WARN_ON(rcomp); | ||
| 820 | vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; | ||
| 821 | /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ | ||
| 822 | cvt_df((double*)&vcpu->arch.fpr[ax_rb], | ||
| 823 | (float*)&vcpu->arch.qpr[ax_rd], &t); | ||
| 824 | break; | ||
| 825 | case OP_4X_PS_MERGE01: | ||
| 826 | WARN_ON(rcomp); | ||
| 827 | vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; | ||
| 828 | vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; | ||
| 829 | break; | ||
| 830 | case OP_4X_PS_MERGE10: | ||
| 831 | WARN_ON(rcomp); | ||
| 832 | /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ | ||
| 833 | cvt_fd((float*)&vcpu->arch.qpr[ax_ra], | ||
| 834 | (double*)&vcpu->arch.fpr[ax_rd], &t); | ||
| 835 | /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ | ||
| 836 | cvt_df((double*)&vcpu->arch.fpr[ax_rb], | ||
| 837 | (float*)&vcpu->arch.qpr[ax_rd], &t); | ||
| 838 | break; | ||
| 839 | case OP_4X_PS_MERGE11: | ||
| 840 | WARN_ON(rcomp); | ||
| 841 | /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ | ||
| 842 | cvt_fd((float*)&vcpu->arch.qpr[ax_ra], | ||
| 843 | (double*)&vcpu->arch.fpr[ax_rd], &t); | ||
| 844 | vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; | ||
| 845 | break; | ||
| 846 | } | ||
| 847 | /* XW form */ | ||
| 848 | switch (inst_get_field(inst, 25, 30)) { | ||
| 849 | case OP_4XW_PSQ_STX: | ||
| 850 | { | ||
| 851 | ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; | ||
| 852 | bool w = inst_get_field(inst, 21, 21) ? true : false; | ||
| 853 | int i = inst_get_field(inst, 22, 24); | ||
| 854 | |||
| 855 | addr += kvmppc_get_gpr(vcpu, ax_rb); | ||
| 856 | emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); | ||
| 857 | break; | ||
| 858 | } | ||
| 859 | case OP_4XW_PSQ_STUX: | ||
| 860 | { | ||
| 861 | ulong addr = kvmppc_get_gpr(vcpu, ax_ra); | ||
| 862 | bool w = inst_get_field(inst, 21, 21) ? true : false; | ||
| 863 | int i = inst_get_field(inst, 22, 24); | ||
| 864 | |||
| 865 | addr += kvmppc_get_gpr(vcpu, ax_rb); | ||
| 866 | emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); | ||
| 867 | |||
| 868 | if (emulated == EMULATE_DONE) | ||
| 869 | kvmppc_set_gpr(vcpu, ax_ra, addr); | ||
| 870 | break; | ||
| 871 | } | ||
| 872 | } | ||
| 873 | /* A form */ | ||
| 874 | switch (inst_get_field(inst, 26, 30)) { | ||
| 875 | case OP_4A_PS_SUM1: | ||
| 876 | emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, | ||
| 877 | ax_rb, ax_ra, SCALAR_NO_PS0 | SCALAR_HIGH, fps_fadds); | ||
| 878 | vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rc]; | ||
| 879 | break; | ||
| 880 | case OP_4A_PS_SUM0: | ||
| 881 | emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, | ||
| 882 | ax_ra, ax_rb, SCALAR_NO_PS1 | SCALAR_LOW, fps_fadds); | ||
| 883 | vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rc]; | ||
| 884 | break; | ||
| 885 | case OP_4A_PS_MULS0: | ||
| 886 | emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, | ||
| 887 | ax_ra, ax_rc, SCALAR_HIGH, fps_fmuls); | ||
| 888 | break; | ||
| 889 | case OP_4A_PS_MULS1: | ||
| 890 | emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, | ||
| 891 | ax_ra, ax_rc, SCALAR_LOW, fps_fmuls); | ||
| 892 | break; | ||
| 893 | case OP_4A_PS_MADDS0: | ||
| 894 | emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, | ||
| 895 | ax_ra, ax_rc, ax_rb, SCALAR_HIGH, fps_fmadds); | ||
| 896 | break; | ||
| 897 | case OP_4A_PS_MADDS1: | ||
| 898 | emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, | ||
| 899 | ax_ra, ax_rc, ax_rb, SCALAR_LOW, fps_fmadds); | ||
| 900 | break; | ||
| 901 | case OP_4A_PS_DIV: | ||
| 902 | emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, | ||
| 903 | ax_ra, ax_rb, SCALAR_NONE, fps_fdivs); | ||
| 904 | break; | ||
| 905 | case OP_4A_PS_SUB: | ||
| 906 | emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, | ||
| 907 | ax_ra, ax_rb, SCALAR_NONE, fps_fsubs); | ||
| 908 | break; | ||
| 909 | case OP_4A_PS_ADD: | ||
| 910 | emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, | ||
| 911 | ax_ra, ax_rb, SCALAR_NONE, fps_fadds); | ||
| 912 | break; | ||
| 913 | case OP_4A_PS_SEL: | ||
| 914 | emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, | ||
| 915 | ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fsel); | ||
| 916 | break; | ||
| 917 | case OP_4A_PS_RES: | ||
| 918 | emulated = kvmppc_ps_one_in(vcpu, rcomp, ax_rd, | ||
| 919 | ax_rb, fps_fres); | ||
| 920 | break; | ||
| 921 | case OP_4A_PS_MUL: | ||
| 922 | emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, | ||
| 923 | ax_ra, ax_rc, SCALAR_NONE, fps_fmuls); | ||
| 924 | break; | ||
| 925 | case OP_4A_PS_RSQRTE: | ||
| 926 | emulated = kvmppc_ps_one_in(vcpu, rcomp, ax_rd, | ||
| 927 | ax_rb, fps_frsqrte); | ||
| 928 | break; | ||
| 929 | case OP_4A_PS_MSUB: | ||
| 930 | emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, | ||
| 931 | ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fmsubs); | ||
| 932 | break; | ||
| 933 | case OP_4A_PS_MADD: | ||
| 934 | emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, | ||
| 935 | ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fmadds); | ||
| 936 | break; | ||
| 937 | case OP_4A_PS_NMSUB: | ||
| 938 | emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, | ||
| 939 | ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fnmsubs); | ||
| 940 | break; | ||
| 941 | case OP_4A_PS_NMADD: | ||
| 942 | emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, | ||
| 943 | ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fnmadds); | ||
| 944 | break; | ||
| 945 | } | ||
| 946 | break; | ||
| 947 | |||
| 948 | /* Real FPU operations */ | ||
| 949 | |||
| 950 | case OP_LFS: | ||
| 951 | { | ||
| 952 | ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; | ||
| 953 | |||
| 954 | emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, | ||
| 955 | FPU_LS_SINGLE); | ||
| 956 | break; | ||
| 957 | } | ||
| 958 | case OP_LFSU: | ||
| 959 | { | ||
| 960 | ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; | ||
| 961 | |||
| 962 | emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, | ||
| 963 | FPU_LS_SINGLE); | ||
| 964 | |||
| 965 | if (emulated == EMULATE_DONE) | ||
| 966 | kvmppc_set_gpr(vcpu, ax_ra, addr); | ||
| 967 | break; | ||
| 968 | } | ||
| 969 | case OP_LFD: | ||
| 970 | { | ||
| 971 | ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; | ||
| 972 | |||
| 973 | emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, | ||
| 974 | FPU_LS_DOUBLE); | ||
| 975 | break; | ||
| 976 | } | ||
| 977 | case OP_LFDU: | ||
| 978 | { | ||
| 979 | ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; | ||
| 980 | |||
| 981 | emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, | ||
| 982 | FPU_LS_DOUBLE); | ||
| 983 | |||
| 984 | if (emulated == EMULATE_DONE) | ||
| 985 | kvmppc_set_gpr(vcpu, ax_ra, addr); | ||
| 986 | break; | ||
| 987 | } | ||
| 988 | case OP_STFS: | ||
| 989 | { | ||
| 990 | ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; | ||
| 991 | |||
| 992 | emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, | ||
| 993 | FPU_LS_SINGLE); | ||
| 994 | break; | ||
| 995 | } | ||
| 996 | case OP_STFSU: | ||
| 997 | { | ||
| 998 | ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; | ||
| 999 | |||
| 1000 | emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, | ||
| 1001 | FPU_LS_SINGLE); | ||
| 1002 | |||
| 1003 | if (emulated == EMULATE_DONE) | ||
| 1004 | kvmppc_set_gpr(vcpu, ax_ra, addr); | ||
| 1005 | break; | ||
| 1006 | } | ||
| 1007 | case OP_STFD: | ||
| 1008 | { | ||
| 1009 | ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; | ||
| 1010 | |||
| 1011 | emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, | ||
| 1012 | FPU_LS_DOUBLE); | ||
| 1013 | break; | ||
| 1014 | } | ||
| 1015 | case OP_STFDU: | ||
| 1016 | { | ||
| 1017 | ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; | ||
| 1018 | |||
| 1019 | emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, | ||
| 1020 | FPU_LS_DOUBLE); | ||
| 1021 | |||
| 1022 | if (emulated == EMULATE_DONE) | ||
| 1023 | kvmppc_set_gpr(vcpu, ax_ra, addr); | ||
| 1024 | break; | ||
| 1025 | } | ||
| 1026 | case 31: | ||
| 1027 | switch (inst_get_field(inst, 21, 30)) { | ||
| 1028 | case OP_31_LFSX: | ||
| 1029 | { | ||
| 1030 | ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; | ||
| 1031 | |||
| 1032 | addr += kvmppc_get_gpr(vcpu, ax_rb); | ||
| 1033 | emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, | ||
| 1034 | addr, FPU_LS_SINGLE); | ||
| 1035 | break; | ||
| 1036 | } | ||
| 1037 | case OP_31_LFSUX: | ||
| 1038 | { | ||
| 1039 | ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + | ||
| 1040 | kvmppc_get_gpr(vcpu, ax_rb); | ||
| 1041 | |||
| 1042 | emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, | ||
| 1043 | addr, FPU_LS_SINGLE); | ||
| 1044 | |||
| 1045 | if (emulated == EMULATE_DONE) | ||
| 1046 | kvmppc_set_gpr(vcpu, ax_ra, addr); | ||
| 1047 | break; | ||
| 1048 | } | ||
| 1049 | case OP_31_LFDX: | ||
| 1050 | { | ||
| 1051 | ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + | ||
| 1052 | kvmppc_get_gpr(vcpu, ax_rb); | ||
| 1053 | |||
| 1054 | emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, | ||
| 1055 | addr, FPU_LS_DOUBLE); | ||
| 1056 | break; | ||
| 1057 | } | ||
| 1058 | case OP_31_LFDUX: | ||
| 1059 | { | ||
| 1060 | ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + | ||
| 1061 | kvmppc_get_gpr(vcpu, ax_rb); | ||
| 1062 | |||
| 1063 | emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, | ||
| 1064 | addr, FPU_LS_DOUBLE); | ||
| 1065 | |||
| 1066 | if (emulated == EMULATE_DONE) | ||
| 1067 | kvmppc_set_gpr(vcpu, ax_ra, addr); | ||
| 1068 | break; | ||
| 1069 | } | ||
| 1070 | case OP_31_STFSX: | ||
| 1071 | { | ||
| 1072 | ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + | ||
| 1073 | kvmppc_get_gpr(vcpu, ax_rb); | ||
| 1074 | |||
| 1075 | emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, | ||
| 1076 | addr, FPU_LS_SINGLE); | ||
| 1077 | break; | ||
| 1078 | } | ||
| 1079 | case OP_31_STFSUX: | ||
| 1080 | { | ||
| 1081 | ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + | ||
| 1082 | kvmppc_get_gpr(vcpu, ax_rb); | ||
| 1083 | |||
| 1084 | emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, | ||
| 1085 | addr, FPU_LS_SINGLE); | ||
| 1086 | |||
| 1087 | if (emulated == EMULATE_DONE) | ||
| 1088 | kvmppc_set_gpr(vcpu, ax_ra, addr); | ||
| 1089 | break; | ||
| 1090 | } | ||
| 1091 | case OP_31_STFX: | ||
| 1092 | { | ||
| 1093 | ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + | ||
| 1094 | kvmppc_get_gpr(vcpu, ax_rb); | ||
| 1095 | |||
| 1096 | emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, | ||
| 1097 | addr, FPU_LS_DOUBLE); | ||
| 1098 | break; | ||
| 1099 | } | ||
| 1100 | case OP_31_STFUX: | ||
| 1101 | { | ||
| 1102 | ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + | ||
| 1103 | kvmppc_get_gpr(vcpu, ax_rb); | ||
| 1104 | |||
| 1105 | emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, | ||
| 1106 | addr, FPU_LS_DOUBLE); | ||
| 1107 | |||
| 1108 | if (emulated == EMULATE_DONE) | ||
| 1109 | kvmppc_set_gpr(vcpu, ax_ra, addr); | ||
| 1110 | break; | ||
| 1111 | } | ||
| 1112 | case OP_31_STFIWX: | ||
| 1113 | { | ||
| 1114 | ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + | ||
| 1115 | kvmppc_get_gpr(vcpu, ax_rb); | ||
| 1116 | |||
| 1117 | emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, | ||
| 1118 | addr, | ||
| 1119 | FPU_LS_SINGLE_LOW); | ||
| 1120 | break; | ||
| 1121 | } | ||
| 1122 | break; | ||
| 1123 | } | ||
| 1124 | break; | ||
| 1125 | case 59: | ||
| 1126 | switch (inst_get_field(inst, 21, 30)) { | ||
| 1127 | case OP_59_FADDS: | ||
| 1128 | fpd_fadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); | ||
| 1129 | kvmppc_sync_qpr(vcpu, ax_rd); | ||
| 1130 | break; | ||
| 1131 | case OP_59_FSUBS: | ||
| 1132 | fpd_fsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); | ||
| 1133 | kvmppc_sync_qpr(vcpu, ax_rd); | ||
| 1134 | break; | ||
| 1135 | case OP_59_FDIVS: | ||
| 1136 | fpd_fdivs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); | ||
| 1137 | kvmppc_sync_qpr(vcpu, ax_rd); | ||
| 1138 | break; | ||
| 1139 | case OP_59_FRES: | ||
| 1140 | fpd_fres(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); | ||
| 1141 | kvmppc_sync_qpr(vcpu, ax_rd); | ||
| 1142 | break; | ||
| 1143 | case OP_59_FRSQRTES: | ||
| 1144 | fpd_frsqrtes(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); | ||
| 1145 | kvmppc_sync_qpr(vcpu, ax_rd); | ||
| 1146 | break; | ||
| 1147 | } | ||
| 1148 | switch (inst_get_field(inst, 26, 30)) { | ||
| 1149 | case OP_59_FMULS: | ||
| 1150 | fpd_fmuls(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c); | ||
| 1151 | kvmppc_sync_qpr(vcpu, ax_rd); | ||
| 1152 | break; | ||
| 1153 | case OP_59_FMSUBS: | ||
| 1154 | fpd_fmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); | ||
| 1155 | kvmppc_sync_qpr(vcpu, ax_rd); | ||
| 1156 | break; | ||
| 1157 | case OP_59_FMADDS: | ||
| 1158 | fpd_fmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); | ||
| 1159 | kvmppc_sync_qpr(vcpu, ax_rd); | ||
| 1160 | break; | ||
| 1161 | case OP_59_FNMSUBS: | ||
| 1162 | fpd_fnmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); | ||
| 1163 | kvmppc_sync_qpr(vcpu, ax_rd); | ||
| 1164 | break; | ||
| 1165 | case OP_59_FNMADDS: | ||
| 1166 | fpd_fnmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); | ||
| 1167 | kvmppc_sync_qpr(vcpu, ax_rd); | ||
| 1168 | break; | ||
| 1169 | } | ||
| 1170 | break; | ||
| 1171 | case 63: | ||
| 1172 | switch (inst_get_field(inst, 21, 30)) { | ||
| 1173 | case OP_63_MTFSB0: | ||
| 1174 | case OP_63_MTFSB1: | ||
| 1175 | case OP_63_MCRFS: | ||
| 1176 | case OP_63_MTFSFI: | ||
| 1177 | /* XXX need to implement */ | ||
| 1178 | break; | ||
| 1179 | case OP_63_MFFS: | ||
| 1180 | /* XXX missing CR */ | ||
| 1181 | *fpr_d = vcpu->arch.fpscr; | ||
| 1182 | break; | ||
| 1183 | case OP_63_MTFSF: | ||
| 1184 | /* XXX missing fm bits */ | ||
| 1185 | /* XXX missing CR */ | ||
| 1186 | vcpu->arch.fpscr = *fpr_b; | ||
| 1187 | break; | ||
| 1188 | case OP_63_FCMPU: | ||
| 1189 | { | ||
| 1190 | u32 tmp_cr; | ||
| 1191 | u32 cr0_mask = 0xf0000000; | ||
| 1192 | u32 cr_shift = inst_get_field(inst, 6, 8) * 4; | ||
| 1193 | |||
| 1194 | fpd_fcmpu(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b); | ||
| 1195 | cr &= ~(cr0_mask >> cr_shift); | ||
| 1196 | cr |= (cr & cr0_mask) >> cr_shift; | ||
| 1197 | break; | ||
| 1198 | } | ||
| 1199 | case OP_63_FCMPO: | ||
| 1200 | { | ||
| 1201 | u32 tmp_cr; | ||
| 1202 | u32 cr0_mask = 0xf0000000; | ||
| 1203 | u32 cr_shift = inst_get_field(inst, 6, 8) * 4; | ||
| 1204 | |||
| 1205 | fpd_fcmpo(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b); | ||
| 1206 | cr &= ~(cr0_mask >> cr_shift); | ||
| 1207 | cr |= (cr & cr0_mask) >> cr_shift; | ||
| 1208 | break; | ||
| 1209 | } | ||
| 1210 | case OP_63_FNEG: | ||
| 1211 | fpd_fneg(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); | ||
| 1212 | break; | ||
| 1213 | case OP_63_FMR: | ||
| 1214 | *fpr_d = *fpr_b; | ||
| 1215 | break; | ||
| 1216 | case OP_63_FABS: | ||
| 1217 | fpd_fabs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); | ||
| 1218 | break; | ||
| 1219 | case OP_63_FCPSGN: | ||
| 1220 | fpd_fcpsgn(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); | ||
| 1221 | break; | ||
| 1222 | case OP_63_FDIV: | ||
| 1223 | fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); | ||
| 1224 | break; | ||
| 1225 | case OP_63_FADD: | ||
| 1226 | fpd_fadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); | ||
| 1227 | break; | ||
| 1228 | case OP_63_FSUB: | ||
| 1229 | fpd_fsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); | ||
| 1230 | break; | ||
| 1231 | case OP_63_FCTIW: | ||
| 1232 | fpd_fctiw(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); | ||
| 1233 | break; | ||
| 1234 | case OP_63_FCTIWZ: | ||
| 1235 | fpd_fctiwz(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); | ||
| 1236 | break; | ||
| 1237 | case OP_63_FRSP: | ||
| 1238 | fpd_frsp(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); | ||
| 1239 | kvmppc_sync_qpr(vcpu, ax_rd); | ||
| 1240 | break; | ||
| 1241 | case OP_63_FRSQRTE: | ||
| 1242 | { | ||
| 1243 | double one = 1.0f; | ||
| 1244 | |||
| 1245 | /* fD = sqrt(fB) */ | ||
| 1246 | fpd_fsqrt(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); | ||
| 1247 | /* fD = 1.0f / fD */ | ||
| 1248 | fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, (u64*)&one, fpr_d); | ||
| 1249 | break; | ||
| 1250 | } | ||
| 1251 | } | ||
| 1252 | switch (inst_get_field(inst, 26, 30)) { | ||
| 1253 | case OP_63_FMUL: | ||
| 1254 | fpd_fmul(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c); | ||
| 1255 | break; | ||
| 1256 | case OP_63_FSEL: | ||
| 1257 | fpd_fsel(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); | ||
| 1258 | break; | ||
| 1259 | case OP_63_FMSUB: | ||
| 1260 | fpd_fmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); | ||
| 1261 | break; | ||
| 1262 | case OP_63_FMADD: | ||
| 1263 | fpd_fmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); | ||
| 1264 | break; | ||
| 1265 | case OP_63_FNMSUB: | ||
| 1266 | fpd_fnmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); | ||
| 1267 | break; | ||
| 1268 | case OP_63_FNMADD: | ||
| 1269 | fpd_fnmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); | ||
| 1270 | break; | ||
| 1271 | } | ||
| 1272 | break; | ||
| 1273 | } | ||
| 1274 | |||
| 1275 | #ifdef DEBUG | ||
| 1276 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { | ||
| 1277 | u32 f; | ||
| 1278 | cvt_df((double*)&vcpu->arch.fpr[i], (float*)&f, &t); | ||
| 1279 | dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f); | ||
| 1280 | } | ||
| 1281 | #endif | ||
| 1282 | |||
| 1283 | if (rcomp) | ||
| 1284 | kvmppc_set_cr(vcpu, cr); | ||
| 1285 | |||
| 1286 | preempt_enable(); | ||
| 1287 | |||
| 1288 | return emulated; | ||
| 1289 | } | ||
diff --git a/arch/powerpc/kvm/book3s_64_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index c83c60ad96c5..506d5c316c96 100644 --- a/arch/powerpc/kvm/book3s_64_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S | |||
| @@ -22,7 +22,10 @@ | |||
| 22 | #include <asm/reg.h> | 22 | #include <asm/reg.h> |
| 23 | #include <asm/page.h> | 23 | #include <asm/page.h> |
| 24 | #include <asm/asm-offsets.h> | 24 | #include <asm/asm-offsets.h> |
| 25 | |||
| 26 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
| 25 | #include <asm/exception-64s.h> | 27 | #include <asm/exception-64s.h> |
| 28 | #endif | ||
| 26 | 29 | ||
| 27 | /***************************************************************************** | 30 | /***************************************************************************** |
| 28 | * * | 31 | * * |
| @@ -30,6 +33,39 @@ | |||
| 30 | * * | 33 | * * |
| 31 | ****************************************************************************/ | 34 | ****************************************************************************/ |
| 32 | 35 | ||
| 36 | #if defined(CONFIG_PPC_BOOK3S_64) | ||
| 37 | |||
| 38 | #define LOAD_SHADOW_VCPU(reg) \ | ||
| 39 | mfspr reg, SPRN_SPRG_PACA | ||
| 40 | |||
| 41 | #define SHADOW_VCPU_OFF PACA_KVM_SVCPU | ||
| 42 | #define MSR_NOIRQ MSR_KERNEL & ~(MSR_IR | MSR_DR) | ||
| 43 | #define FUNC(name) GLUE(.,name) | ||
| 44 | |||
| 45 | #elif defined(CONFIG_PPC_BOOK3S_32) | ||
| 46 | |||
| 47 | #define LOAD_SHADOW_VCPU(reg) \ | ||
| 48 | mfspr reg, SPRN_SPRG_THREAD; \ | ||
| 49 | lwz reg, THREAD_KVM_SVCPU(reg); \ | ||
| 50 | /* PPC32 can have a NULL pointer - let's check for that */ \ | ||
| 51 | mtspr SPRN_SPRG_SCRATCH1, r12; /* Save r12 */ \ | ||
| 52 | mfcr r12; \ | ||
| 53 | cmpwi reg, 0; \ | ||
| 54 | bne 1f; \ | ||
| 55 | mfspr reg, SPRN_SPRG_SCRATCH0; \ | ||
| 56 | mtcr r12; \ | ||
| 57 | mfspr r12, SPRN_SPRG_SCRATCH1; \ | ||
| 58 | b kvmppc_resume_\intno; \ | ||
| 59 | 1:; \ | ||
| 60 | mtcr r12; \ | ||
| 61 | mfspr r12, SPRN_SPRG_SCRATCH1; \ | ||
| 62 | tophys(reg, reg) | ||
| 63 | |||
| 64 | #define SHADOW_VCPU_OFF 0 | ||
| 65 | #define MSR_NOIRQ MSR_KERNEL | ||
| 66 | #define FUNC(name) name | ||
| 67 | |||
| 68 | #endif | ||
| 33 | 69 | ||
| 34 | .macro INTERRUPT_TRAMPOLINE intno | 70 | .macro INTERRUPT_TRAMPOLINE intno |
| 35 | 71 | ||
| @@ -42,19 +78,19 @@ kvmppc_trampoline_\intno: | |||
| 42 | * First thing to do is to find out if we're coming | 78 | * First thing to do is to find out if we're coming |
| 43 | * from a KVM guest or a Linux process. | 79 | * from a KVM guest or a Linux process. |
| 44 | * | 80 | * |
| 45 | * To distinguish, we check a magic byte in the PACA | 81 | * To distinguish, we check a magic byte in the PACA/current |
| 46 | */ | 82 | */ |
| 47 | mfspr r13, SPRN_SPRG_PACA /* r13 = PACA */ | 83 | LOAD_SHADOW_VCPU(r13) |
| 48 | std r12, PACA_KVM_SCRATCH0(r13) | 84 | PPC_STL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) |
| 49 | mfcr r12 | 85 | mfcr r12 |
| 50 | stw r12, PACA_KVM_SCRATCH1(r13) | 86 | stw r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) |
| 51 | lbz r12, PACA_KVM_IN_GUEST(r13) | 87 | lbz r12, (SHADOW_VCPU_OFF + SVCPU_IN_GUEST)(r13) |
| 52 | cmpwi r12, KVM_GUEST_MODE_NONE | 88 | cmpwi r12, KVM_GUEST_MODE_NONE |
| 53 | bne ..kvmppc_handler_hasmagic_\intno | 89 | bne ..kvmppc_handler_hasmagic_\intno |
| 54 | /* No KVM guest? Then jump back to the Linux handler! */ | 90 | /* No KVM guest? Then jump back to the Linux handler! */ |
| 55 | lwz r12, PACA_KVM_SCRATCH1(r13) | 91 | lwz r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) |
| 56 | mtcr r12 | 92 | mtcr r12 |
| 57 | ld r12, PACA_KVM_SCRATCH0(r13) | 93 | PPC_LL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) |
| 58 | mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */ | 94 | mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */ |
| 59 | b kvmppc_resume_\intno /* Get back original handler */ | 95 | b kvmppc_resume_\intno /* Get back original handler */ |
| 60 | 96 | ||
| @@ -76,9 +112,7 @@ kvmppc_trampoline_\intno: | |||
| 76 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_SYSTEM_RESET | 112 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_SYSTEM_RESET |
| 77 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_MACHINE_CHECK | 113 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_MACHINE_CHECK |
| 78 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_STORAGE | 114 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_STORAGE |
| 79 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_SEGMENT | ||
| 80 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_STORAGE | 115 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_STORAGE |
| 81 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_SEGMENT | ||
| 82 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_EXTERNAL | 116 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_EXTERNAL |
| 83 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALIGNMENT | 117 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALIGNMENT |
| 84 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PROGRAM | 118 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PROGRAM |
| @@ -88,7 +122,14 @@ INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_SYSCALL | |||
| 88 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_TRACE | 122 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_TRACE |
| 89 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PERFMON | 123 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PERFMON |
| 90 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALTIVEC | 124 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALTIVEC |
| 125 | |||
| 126 | /* Those are only available on 64 bit machines */ | ||
| 127 | |||
| 128 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
| 129 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_SEGMENT | ||
| 130 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_SEGMENT | ||
| 91 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX | 131 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX |
| 132 | #endif | ||
| 92 | 133 | ||
| 93 | /* | 134 | /* |
| 94 | * Bring us back to the faulting code, but skip the | 135 | * Bring us back to the faulting code, but skip the |
| @@ -99,11 +140,11 @@ INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX | |||
| 99 | * | 140 | * |
| 100 | * Input Registers: | 141 | * Input Registers: |
| 101 | * | 142 | * |
| 102 | * R12 = free | 143 | * R12 = free |
| 103 | * R13 = PACA | 144 | * R13 = Shadow VCPU (PACA) |
| 104 | * PACA.KVM.SCRATCH0 = guest R12 | 145 | * SVCPU.SCRATCH0 = guest R12 |
| 105 | * PACA.KVM.SCRATCH1 = guest CR | 146 | * SVCPU.SCRATCH1 = guest CR |
| 106 | * SPRG_SCRATCH0 = guest R13 | 147 | * SPRG_SCRATCH0 = guest R13 |
| 107 | * | 148 | * |
| 108 | */ | 149 | */ |
| 109 | kvmppc_handler_skip_ins: | 150 | kvmppc_handler_skip_ins: |
| @@ -114,9 +155,9 @@ kvmppc_handler_skip_ins: | |||
| 114 | mtsrr0 r12 | 155 | mtsrr0 r12 |
| 115 | 156 | ||
| 116 | /* Clean up all state */ | 157 | /* Clean up all state */ |
| 117 | lwz r12, PACA_KVM_SCRATCH1(r13) | 158 | lwz r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) |
| 118 | mtcr r12 | 159 | mtcr r12 |
| 119 | ld r12, PACA_KVM_SCRATCH0(r13) | 160 | PPC_LL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) |
| 120 | mfspr r13, SPRN_SPRG_SCRATCH0 | 161 | mfspr r13, SPRN_SPRG_SCRATCH0 |
| 121 | 162 | ||
| 122 | /* And get back into the code */ | 163 | /* And get back into the code */ |
| @@ -147,41 +188,48 @@ kvmppc_handler_lowmem_trampoline_end: | |||
| 147 | * | 188 | * |
| 148 | * R3 = function | 189 | * R3 = function |
| 149 | * R4 = MSR | 190 | * R4 = MSR |
| 150 | * R5 = CTR | 191 | * R5 = scratch register |
| 151 | * | 192 | * |
| 152 | */ | 193 | */ |
| 153 | _GLOBAL(kvmppc_rmcall) | 194 | _GLOBAL(kvmppc_rmcall) |
| 154 | mtmsr r4 /* Disable relocation, so mtsrr | 195 | LOAD_REG_IMMEDIATE(r5, MSR_NOIRQ) |
| 196 | mtmsr r5 /* Disable relocation and interrupts, so mtsrr | ||
| 155 | doesn't get interrupted */ | 197 | doesn't get interrupted */ |
| 156 | mtctr r5 | 198 | sync |
| 157 | mtsrr0 r3 | 199 | mtsrr0 r3 |
| 158 | mtsrr1 r4 | 200 | mtsrr1 r4 |
| 159 | RFI | 201 | RFI |
| 160 | 202 | ||
| 203 | #if defined(CONFIG_PPC_BOOK3S_32) | ||
| 204 | #define STACK_LR INT_FRAME_SIZE+4 | ||
| 205 | #elif defined(CONFIG_PPC_BOOK3S_64) | ||
| 206 | #define STACK_LR _LINK | ||
| 207 | #endif | ||
| 208 | |||
| 161 | /* | 209 | /* |
| 162 | * Activate current's external feature (FPU/Altivec/VSX) | 210 | * Activate current's external feature (FPU/Altivec/VSX) |
| 163 | */ | 211 | */ |
| 164 | #define define_load_up(what) \ | 212 | #define define_load_up(what) \ |
| 165 | \ | 213 | \ |
| 166 | _GLOBAL(kvmppc_load_up_ ## what); \ | 214 | _GLOBAL(kvmppc_load_up_ ## what); \ |
| 167 | subi r1, r1, INT_FRAME_SIZE; \ | 215 | PPC_STLU r1, -INT_FRAME_SIZE(r1); \ |
| 168 | mflr r3; \ | 216 | mflr r3; \ |
| 169 | std r3, _LINK(r1); \ | 217 | PPC_STL r3, STACK_LR(r1); \ |
| 170 | mfmsr r4; \ | 218 | PPC_STL r20, _NIP(r1); \ |
| 171 | std r31, GPR3(r1); \ | 219 | mfmsr r20; \ |
| 172 | mr r31, r4; \ | 220 | LOAD_REG_IMMEDIATE(r3, MSR_DR|MSR_EE); \ |
| 173 | li r5, MSR_DR; \ | 221 | andc r3,r20,r3; /* Disable DR,EE */ \ |
| 174 | oris r5, r5, MSR_EE@h; \ | 222 | mtmsr r3; \ |
| 175 | andc r4, r4, r5; \ | 223 | sync; \ |
| 176 | mtmsr r4; \ | 224 | \ |
| 177 | \ | 225 | bl FUNC(load_up_ ## what); \ |
| 178 | bl .load_up_ ## what; \ | 226 | \ |
| 179 | \ | 227 | mtmsr r20; /* Enable DR,EE */ \ |
| 180 | mtmsr r31; \ | 228 | sync; \ |
| 181 | ld r3, _LINK(r1); \ | 229 | PPC_LL r3, STACK_LR(r1); \ |
| 182 | ld r31, GPR3(r1); \ | 230 | PPC_LL r20, _NIP(r1); \ |
| 183 | addi r1, r1, INT_FRAME_SIZE; \ | 231 | mtlr r3; \ |
| 184 | mtlr r3; \ | 232 | addi r1, r1, INT_FRAME_SIZE; \ |
| 185 | blr | 233 | blr |
| 186 | 234 | ||
| 187 | define_load_up(fpu) | 235 | define_load_up(fpu) |
| @@ -194,11 +242,10 @@ define_load_up(vsx) | |||
| 194 | 242 | ||
| 195 | .global kvmppc_trampoline_lowmem | 243 | .global kvmppc_trampoline_lowmem |
| 196 | kvmppc_trampoline_lowmem: | 244 | kvmppc_trampoline_lowmem: |
| 197 | .long kvmppc_handler_lowmem_trampoline - _stext | 245 | .long kvmppc_handler_lowmem_trampoline - CONFIG_KERNEL_START |
| 198 | 246 | ||
| 199 | .global kvmppc_trampoline_enter | 247 | .global kvmppc_trampoline_enter |
| 200 | kvmppc_trampoline_enter: | 248 | kvmppc_trampoline_enter: |
| 201 | .long kvmppc_handler_trampoline_enter - _stext | 249 | .long kvmppc_handler_trampoline_enter - CONFIG_KERNEL_START |
| 202 | |||
| 203 | #include "book3s_64_slb.S" | ||
| 204 | 250 | ||
| 251 | #include "book3s_segment.S" | ||
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S new file mode 100644 index 000000000000..7c52ed0b7051 --- /dev/null +++ b/arch/powerpc/kvm/book3s_segment.S | |||
| @@ -0,0 +1,259 @@ | |||
| 1 | /* | ||
| 2 | * This program is free software; you can redistribute it and/or modify | ||
| 3 | * it under the terms of the GNU General Public License, version 2, as | ||
| 4 | * published by the Free Software Foundation. | ||
| 5 | * | ||
| 6 | * This program is distributed in the hope that it will be useful, | ||
| 7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 9 | * GNU General Public License for more details. | ||
| 10 | * | ||
| 11 | * You should have received a copy of the GNU General Public License | ||
| 12 | * along with this program; if not, write to the Free Software | ||
| 13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
| 14 | * | ||
| 15 | * Copyright SUSE Linux Products GmbH 2010 | ||
| 16 | * | ||
| 17 | * Authors: Alexander Graf <agraf@suse.de> | ||
| 18 | */ | ||
| 19 | |||
| 20 | /* Real mode helpers */ | ||
| 21 | |||
| 22 | #if defined(CONFIG_PPC_BOOK3S_64) | ||
| 23 | |||
| 24 | #define GET_SHADOW_VCPU(reg) \ | ||
| 25 | addi reg, r13, PACA_KVM_SVCPU | ||
| 26 | |||
| 27 | #elif defined(CONFIG_PPC_BOOK3S_32) | ||
| 28 | |||
| 29 | #define GET_SHADOW_VCPU(reg) \ | ||
| 30 | tophys(reg, r2); \ | ||
| 31 | lwz reg, (THREAD + THREAD_KVM_SVCPU)(reg); \ | ||
| 32 | tophys(reg, reg) | ||
| 33 | |||
| 34 | #endif | ||
| 35 | |||
| 36 | /* Disable for nested KVM */ | ||
| 37 | #define USE_QUICK_LAST_INST | ||
| 38 | |||
| 39 | |||
| 40 | /* Get helper functions for subarch specific functionality */ | ||
| 41 | |||
| 42 | #if defined(CONFIG_PPC_BOOK3S_64) | ||
| 43 | #include "book3s_64_slb.S" | ||
| 44 | #elif defined(CONFIG_PPC_BOOK3S_32) | ||
| 45 | #include "book3s_32_sr.S" | ||
| 46 | #endif | ||
| 47 | |||
| 48 | /****************************************************************************** | ||
| 49 | * * | ||
| 50 | * Entry code * | ||
| 51 | * * | ||
| 52 | *****************************************************************************/ | ||
| 53 | |||
| 54 | .global kvmppc_handler_trampoline_enter | ||
| 55 | kvmppc_handler_trampoline_enter: | ||
| 56 | |||
| 57 | /* Required state: | ||
| 58 | * | ||
| 59 | * MSR = ~IR|DR | ||
| 60 | * R13 = PACA | ||
| 61 | * R1 = host R1 | ||
| 62 | * R2 = host R2 | ||
| 63 | * R10 = guest MSR | ||
| 64 | * all other volatile GPRS = free | ||
| 65 | * SVCPU[CR] = guest CR | ||
| 66 | * SVCPU[XER] = guest XER | ||
| 67 | * SVCPU[CTR] = guest CTR | ||
| 68 | * SVCPU[LR] = guest LR | ||
| 69 | */ | ||
| 70 | |||
| 71 | /* r3 = shadow vcpu */ | ||
| 72 | GET_SHADOW_VCPU(r3) | ||
| 73 | |||
| 74 | /* Move SRR0 and SRR1 into the respective regs */ | ||
| 75 | PPC_LL r9, SVCPU_PC(r3) | ||
| 76 | mtsrr0 r9 | ||
| 77 | mtsrr1 r10 | ||
| 78 | |||
| 79 | /* Activate guest mode, so faults get handled by KVM */ | ||
| 80 | li r11, KVM_GUEST_MODE_GUEST | ||
| 81 | stb r11, SVCPU_IN_GUEST(r3) | ||
| 82 | |||
| 83 | /* Switch to guest segment. This is subarch specific. */ | ||
| 84 | LOAD_GUEST_SEGMENTS | ||
| 85 | |||
| 86 | /* Enter guest */ | ||
| 87 | |||
| 88 | PPC_LL r4, (SVCPU_CTR)(r3) | ||
| 89 | PPC_LL r5, (SVCPU_LR)(r3) | ||
| 90 | lwz r6, (SVCPU_CR)(r3) | ||
| 91 | lwz r7, (SVCPU_XER)(r3) | ||
| 92 | |||
| 93 | mtctr r4 | ||
| 94 | mtlr r5 | ||
| 95 | mtcr r6 | ||
| 96 | mtxer r7 | ||
| 97 | |||
| 98 | PPC_LL r0, (SVCPU_R0)(r3) | ||
| 99 | PPC_LL r1, (SVCPU_R1)(r3) | ||
| 100 | PPC_LL r2, (SVCPU_R2)(r3) | ||
| 101 | PPC_LL r4, (SVCPU_R4)(r3) | ||
| 102 | PPC_LL r5, (SVCPU_R5)(r3) | ||
| 103 | PPC_LL r6, (SVCPU_R6)(r3) | ||
| 104 | PPC_LL r7, (SVCPU_R7)(r3) | ||
| 105 | PPC_LL r8, (SVCPU_R8)(r3) | ||
| 106 | PPC_LL r9, (SVCPU_R9)(r3) | ||
| 107 | PPC_LL r10, (SVCPU_R10)(r3) | ||
| 108 | PPC_LL r11, (SVCPU_R11)(r3) | ||
| 109 | PPC_LL r12, (SVCPU_R12)(r3) | ||
| 110 | PPC_LL r13, (SVCPU_R13)(r3) | ||
| 111 | |||
| 112 | PPC_LL r3, (SVCPU_R3)(r3) | ||
| 113 | |||
| 114 | RFI | ||
| 115 | kvmppc_handler_trampoline_enter_end: | ||
| 116 | |||
| 117 | |||
| 118 | |||
| 119 | /****************************************************************************** | ||
| 120 | * * | ||
| 121 | * Exit code * | ||
| 122 | * * | ||
| 123 | *****************************************************************************/ | ||
| 124 | |||
| 125 | .global kvmppc_handler_trampoline_exit | ||
| 126 | kvmppc_handler_trampoline_exit: | ||
| 127 | |||
| 128 | /* Register usage at this point: | ||
| 129 | * | ||
| 130 | * SPRG_SCRATCH0 = guest R13 | ||
| 131 | * R12 = exit handler id | ||
| 132 | * R13 = shadow vcpu - SHADOW_VCPU_OFF [=PACA on PPC64] | ||
| 133 | * SVCPU.SCRATCH0 = guest R12 | ||
| 134 | * SVCPU.SCRATCH1 = guest CR | ||
| 135 | * | ||
| 136 | */ | ||
| 137 | |||
| 138 | /* Save registers */ | ||
| 139 | |||
| 140 | PPC_STL r0, (SHADOW_VCPU_OFF + SVCPU_R0)(r13) | ||
| 141 | PPC_STL r1, (SHADOW_VCPU_OFF + SVCPU_R1)(r13) | ||
| 142 | PPC_STL r2, (SHADOW_VCPU_OFF + SVCPU_R2)(r13) | ||
| 143 | PPC_STL r3, (SHADOW_VCPU_OFF + SVCPU_R3)(r13) | ||
| 144 | PPC_STL r4, (SHADOW_VCPU_OFF + SVCPU_R4)(r13) | ||
| 145 | PPC_STL r5, (SHADOW_VCPU_OFF + SVCPU_R5)(r13) | ||
| 146 | PPC_STL r6, (SHADOW_VCPU_OFF + SVCPU_R6)(r13) | ||
| 147 | PPC_STL r7, (SHADOW_VCPU_OFF + SVCPU_R7)(r13) | ||
| 148 | PPC_STL r8, (SHADOW_VCPU_OFF + SVCPU_R8)(r13) | ||
| 149 | PPC_STL r9, (SHADOW_VCPU_OFF + SVCPU_R9)(r13) | ||
| 150 | PPC_STL r10, (SHADOW_VCPU_OFF + SVCPU_R10)(r13) | ||
| 151 | PPC_STL r11, (SHADOW_VCPU_OFF + SVCPU_R11)(r13) | ||
| 152 | |||
| 153 | /* Restore R1/R2 so we can handle faults */ | ||
| 154 | PPC_LL r1, (SHADOW_VCPU_OFF + SVCPU_HOST_R1)(r13) | ||
| 155 | PPC_LL r2, (SHADOW_VCPU_OFF + SVCPU_HOST_R2)(r13) | ||
| 156 | |||
| 157 | /* Save guest PC and MSR */ | ||
| 158 | mfsrr0 r3 | ||
| 159 | mfsrr1 r4 | ||
| 160 | |||
| 161 | PPC_STL r3, (SHADOW_VCPU_OFF + SVCPU_PC)(r13) | ||
| 162 | PPC_STL r4, (SHADOW_VCPU_OFF + SVCPU_SHADOW_SRR1)(r13) | ||
| 163 | |||
| 164 | /* Get scratch'ed off registers */ | ||
| 165 | mfspr r9, SPRN_SPRG_SCRATCH0 | ||
| 166 | PPC_LL r8, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) | ||
| 167 | lwz r7, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) | ||
| 168 | |||
| 169 | PPC_STL r9, (SHADOW_VCPU_OFF + SVCPU_R13)(r13) | ||
| 170 | PPC_STL r8, (SHADOW_VCPU_OFF + SVCPU_R12)(r13) | ||
| 171 | stw r7, (SHADOW_VCPU_OFF + SVCPU_CR)(r13) | ||
| 172 | |||
| 173 | /* Save more register state */ | ||
| 174 | |||
| 175 | mfxer r5 | ||
| 176 | mfdar r6 | ||
| 177 | mfdsisr r7 | ||
| 178 | mfctr r8 | ||
| 179 | mflr r9 | ||
| 180 | |||
| 181 | stw r5, (SHADOW_VCPU_OFF + SVCPU_XER)(r13) | ||
| 182 | PPC_STL r6, (SHADOW_VCPU_OFF + SVCPU_FAULT_DAR)(r13) | ||
| 183 | stw r7, (SHADOW_VCPU_OFF + SVCPU_FAULT_DSISR)(r13) | ||
| 184 | PPC_STL r8, (SHADOW_VCPU_OFF + SVCPU_CTR)(r13) | ||
| 185 | PPC_STL r9, (SHADOW_VCPU_OFF + SVCPU_LR)(r13) | ||
| 186 | |||
| 187 | /* | ||
| 188 | * In order for us to easily get the last instruction, | ||
| 189 | * we got the #vmexit at, we exploit the fact that the | ||
| 190 | * virtual layout is still the same here, so we can just | ||
| 191 | * ld from the guest's PC address | ||
| 192 | */ | ||
| 193 | |||
| 194 | /* We only load the last instruction when it's safe */ | ||
| 195 | cmpwi r12, BOOK3S_INTERRUPT_DATA_STORAGE | ||
| 196 | beq ld_last_inst | ||
| 197 | cmpwi r12, BOOK3S_INTERRUPT_PROGRAM | ||
| 198 | beq ld_last_inst | ||
| 199 | cmpwi r12, BOOK3S_INTERRUPT_ALIGNMENT | ||
| 200 | beq- ld_last_inst | ||
| 201 | |||
| 202 | b no_ld_last_inst | ||
| 203 | |||
| 204 | ld_last_inst: | ||
| 205 | /* Save off the guest instruction we're at */ | ||
| 206 | |||
| 207 | /* In case lwz faults */ | ||
| 208 | li r0, KVM_INST_FETCH_FAILED | ||
| 209 | |||
| 210 | #ifdef USE_QUICK_LAST_INST | ||
| 211 | |||
| 212 | /* Set guest mode to 'jump over instruction' so if lwz faults | ||
| 213 | * we'll just continue at the next IP. */ | ||
| 214 | li r9, KVM_GUEST_MODE_SKIP | ||
| 215 | stb r9, (SHADOW_VCPU_OFF + SVCPU_IN_GUEST)(r13) | ||
| 216 | |||
| 217 | /* 1) enable paging for data */ | ||
| 218 | mfmsr r9 | ||
| 219 | ori r11, r9, MSR_DR /* Enable paging for data */ | ||
| 220 | mtmsr r11 | ||
| 221 | sync | ||
| 222 | /* 2) fetch the instruction */ | ||
| 223 | lwz r0, 0(r3) | ||
| 224 | /* 3) disable paging again */ | ||
| 225 | mtmsr r9 | ||
| 226 | sync | ||
| 227 | |||
| 228 | #endif | ||
| 229 | stw r0, (SHADOW_VCPU_OFF + SVCPU_LAST_INST)(r13) | ||
| 230 | |||
| 231 | no_ld_last_inst: | ||
| 232 | |||
| 233 | /* Unset guest mode */ | ||
| 234 | li r9, KVM_GUEST_MODE_NONE | ||
| 235 | stb r9, (SHADOW_VCPU_OFF + SVCPU_IN_GUEST)(r13) | ||
| 236 | |||
| 237 | /* Switch back to host MMU */ | ||
| 238 | LOAD_HOST_SEGMENTS | ||
| 239 | |||
| 240 | /* Register usage at this point: | ||
| 241 | * | ||
| 242 | * R1 = host R1 | ||
| 243 | * R2 = host R2 | ||
| 244 | * R12 = exit handler id | ||
| 245 | * R13 = shadow vcpu - SHADOW_VCPU_OFF [=PACA on PPC64] | ||
| 246 | * SVCPU.* = guest * | ||
| 247 | * | ||
| 248 | */ | ||
| 249 | |||
| 250 | /* RFI into the highmem handler */ | ||
| 251 | mfmsr r7 | ||
| 252 | ori r7, r7, MSR_IR|MSR_DR|MSR_RI|MSR_ME /* Enable paging */ | ||
| 253 | mtsrr1 r7 | ||
| 254 | /* Load highmem handler address */ | ||
| 255 | PPC_LL r8, (SHADOW_VCPU_OFF + SVCPU_VMHANDLER)(r13) | ||
| 256 | mtsrr0 r8 | ||
| 257 | |||
| 258 | RFI | ||
| 259 | kvmppc_handler_trampoline_exit_end: | ||
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 2a3a1953d4bd..a33ab8cc2ccc 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c | |||
| @@ -133,6 +133,12 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, | |||
| 133 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL); | 133 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL); |
| 134 | } | 134 | } |
| 135 | 135 | ||
| 136 | void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, | ||
| 137 | struct kvm_interrupt *irq) | ||
| 138 | { | ||
| 139 | clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions); | ||
| 140 | } | ||
| 141 | |||
| 136 | /* Deliver the interrupt of the corresponding priority, if possible. */ | 142 | /* Deliver the interrupt of the corresponding priority, if possible. */ |
| 137 | static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, | 143 | static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, |
| 138 | unsigned int priority) | 144 | unsigned int priority) |
| @@ -479,6 +485,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 479 | { | 485 | { |
| 480 | int i; | 486 | int i; |
| 481 | 487 | ||
| 488 | vcpu_load(vcpu); | ||
| 489 | |||
| 482 | regs->pc = vcpu->arch.pc; | 490 | regs->pc = vcpu->arch.pc; |
| 483 | regs->cr = kvmppc_get_cr(vcpu); | 491 | regs->cr = kvmppc_get_cr(vcpu); |
| 484 | regs->ctr = vcpu->arch.ctr; | 492 | regs->ctr = vcpu->arch.ctr; |
| @@ -499,6 +507,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 499 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 507 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
| 500 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); | 508 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); |
| 501 | 509 | ||
| 510 | vcpu_put(vcpu); | ||
| 511 | |||
| 502 | return 0; | 512 | return 0; |
| 503 | } | 513 | } |
| 504 | 514 | ||
| @@ -506,6 +516,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 506 | { | 516 | { |
| 507 | int i; | 517 | int i; |
| 508 | 518 | ||
| 519 | vcpu_load(vcpu); | ||
| 520 | |||
| 509 | vcpu->arch.pc = regs->pc; | 521 | vcpu->arch.pc = regs->pc; |
| 510 | kvmppc_set_cr(vcpu, regs->cr); | 522 | kvmppc_set_cr(vcpu, regs->cr); |
| 511 | vcpu->arch.ctr = regs->ctr; | 523 | vcpu->arch.ctr = regs->ctr; |
| @@ -525,6 +537,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 525 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 537 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
| 526 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); | 538 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); |
| 527 | 539 | ||
| 540 | vcpu_put(vcpu); | ||
| 541 | |||
| 528 | return 0; | 542 | return 0; |
| 529 | } | 543 | } |
| 530 | 544 | ||
| @@ -553,7 +567,12 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | |||
| 553 | int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | 567 | int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, |
| 554 | struct kvm_translation *tr) | 568 | struct kvm_translation *tr) |
| 555 | { | 569 | { |
| 556 | return kvmppc_core_vcpu_translate(vcpu, tr); | 570 | int r; |
| 571 | |||
| 572 | vcpu_load(vcpu); | ||
| 573 | r = kvmppc_core_vcpu_translate(vcpu, tr); | ||
| 574 | vcpu_put(vcpu); | ||
| 575 | return r; | ||
| 557 | } | 576 | } |
| 558 | 577 | ||
| 559 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) | 578 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) |
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 669a5c5fc7d7..bc2b4004eb26 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c | |||
| @@ -161,7 +161,7 @@ static int __init kvmppc_e500_init(void) | |||
| 161 | flush_icache_range(kvmppc_booke_handlers, | 161 | flush_icache_range(kvmppc_booke_handlers, |
| 162 | kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); | 162 | kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); |
| 163 | 163 | ||
| 164 | return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), THIS_MODULE); | 164 | return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); |
| 165 | } | 165 | } |
| 166 | 166 | ||
| 167 | static void __init kvmppc_e500_exit(void) | 167 | static void __init kvmppc_e500_exit(void) |
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index cb72a65f4ecc..4568ec386c2a 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c | |||
| @@ -38,10 +38,12 @@ | |||
| 38 | #define OP_31_XOP_LBZX 87 | 38 | #define OP_31_XOP_LBZX 87 |
| 39 | #define OP_31_XOP_STWX 151 | 39 | #define OP_31_XOP_STWX 151 |
| 40 | #define OP_31_XOP_STBX 215 | 40 | #define OP_31_XOP_STBX 215 |
| 41 | #define OP_31_XOP_LBZUX 119 | ||
| 41 | #define OP_31_XOP_STBUX 247 | 42 | #define OP_31_XOP_STBUX 247 |
| 42 | #define OP_31_XOP_LHZX 279 | 43 | #define OP_31_XOP_LHZX 279 |
| 43 | #define OP_31_XOP_LHZUX 311 | 44 | #define OP_31_XOP_LHZUX 311 |
| 44 | #define OP_31_XOP_MFSPR 339 | 45 | #define OP_31_XOP_MFSPR 339 |
| 46 | #define OP_31_XOP_LHAX 343 | ||
| 45 | #define OP_31_XOP_STHX 407 | 47 | #define OP_31_XOP_STHX 407 |
| 46 | #define OP_31_XOP_STHUX 439 | 48 | #define OP_31_XOP_STHUX 439 |
| 47 | #define OP_31_XOP_MTSPR 467 | 49 | #define OP_31_XOP_MTSPR 467 |
| @@ -62,10 +64,12 @@ | |||
| 62 | #define OP_STBU 39 | 64 | #define OP_STBU 39 |
| 63 | #define OP_LHZ 40 | 65 | #define OP_LHZ 40 |
| 64 | #define OP_LHZU 41 | 66 | #define OP_LHZU 41 |
| 67 | #define OP_LHA 42 | ||
| 68 | #define OP_LHAU 43 | ||
| 65 | #define OP_STH 44 | 69 | #define OP_STH 44 |
| 66 | #define OP_STHU 45 | 70 | #define OP_STHU 45 |
| 67 | 71 | ||
| 68 | #ifdef CONFIG_PPC64 | 72 | #ifdef CONFIG_PPC_BOOK3S |
| 69 | static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu) | 73 | static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu) |
| 70 | { | 74 | { |
| 71 | return 1; | 75 | return 1; |
| @@ -82,7 +86,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) | |||
| 82 | unsigned long dec_nsec; | 86 | unsigned long dec_nsec; |
| 83 | 87 | ||
| 84 | pr_debug("mtDEC: %x\n", vcpu->arch.dec); | 88 | pr_debug("mtDEC: %x\n", vcpu->arch.dec); |
| 85 | #ifdef CONFIG_PPC64 | 89 | #ifdef CONFIG_PPC_BOOK3S |
| 86 | /* mtdec lowers the interrupt line when positive. */ | 90 | /* mtdec lowers the interrupt line when positive. */ |
| 87 | kvmppc_core_dequeue_dec(vcpu); | 91 | kvmppc_core_dequeue_dec(vcpu); |
| 88 | 92 | ||
| @@ -128,7 +132,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) | |||
| 128 | * from opcode tables in the future. */ | 132 | * from opcode tables in the future. */ |
| 129 | int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | 133 | int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) |
| 130 | { | 134 | { |
| 131 | u32 inst = vcpu->arch.last_inst; | 135 | u32 inst = kvmppc_get_last_inst(vcpu); |
| 132 | u32 ea; | 136 | u32 ea; |
| 133 | int ra; | 137 | int ra; |
| 134 | int rb; | 138 | int rb; |
| @@ -143,13 +147,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
| 143 | 147 | ||
| 144 | pr_debug(KERN_INFO "Emulating opcode %d / %d\n", get_op(inst), get_xop(inst)); | 148 | pr_debug(KERN_INFO "Emulating opcode %d / %d\n", get_op(inst), get_xop(inst)); |
| 145 | 149 | ||
| 146 | /* Try again next time */ | ||
| 147 | if (inst == KVM_INST_FETCH_FAILED) | ||
| 148 | return EMULATE_DONE; | ||
| 149 | |||
| 150 | switch (get_op(inst)) { | 150 | switch (get_op(inst)) { |
| 151 | case OP_TRAP: | 151 | case OP_TRAP: |
| 152 | #ifdef CONFIG_PPC64 | 152 | #ifdef CONFIG_PPC_BOOK3S |
| 153 | case OP_TRAP_64: | 153 | case OP_TRAP_64: |
| 154 | kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP); | 154 | kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP); |
| 155 | #else | 155 | #else |
| @@ -171,6 +171,19 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
| 171 | emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); | 171 | emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); |
| 172 | break; | 172 | break; |
| 173 | 173 | ||
| 174 | case OP_31_XOP_LBZUX: | ||
| 175 | rt = get_rt(inst); | ||
| 176 | ra = get_ra(inst); | ||
| 177 | rb = get_rb(inst); | ||
| 178 | |||
| 179 | ea = kvmppc_get_gpr(vcpu, rb); | ||
| 180 | if (ra) | ||
| 181 | ea += kvmppc_get_gpr(vcpu, ra); | ||
| 182 | |||
| 183 | emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); | ||
| 184 | kvmppc_set_gpr(vcpu, ra, ea); | ||
| 185 | break; | ||
| 186 | |||
| 174 | case OP_31_XOP_STWX: | 187 | case OP_31_XOP_STWX: |
| 175 | rs = get_rs(inst); | 188 | rs = get_rs(inst); |
| 176 | emulated = kvmppc_handle_store(run, vcpu, | 189 | emulated = kvmppc_handle_store(run, vcpu, |
| @@ -200,6 +213,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
| 200 | kvmppc_set_gpr(vcpu, rs, ea); | 213 | kvmppc_set_gpr(vcpu, rs, ea); |
| 201 | break; | 214 | break; |
| 202 | 215 | ||
| 216 | case OP_31_XOP_LHAX: | ||
| 217 | rt = get_rt(inst); | ||
| 218 | emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); | ||
| 219 | break; | ||
| 220 | |||
| 203 | case OP_31_XOP_LHZX: | 221 | case OP_31_XOP_LHZX: |
| 204 | rt = get_rt(inst); | 222 | rt = get_rt(inst); |
| 205 | emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); | 223 | emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); |
| @@ -450,6 +468,18 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
| 450 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); | 468 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); |
| 451 | break; | 469 | break; |
| 452 | 470 | ||
| 471 | case OP_LHA: | ||
| 472 | rt = get_rt(inst); | ||
| 473 | emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); | ||
| 474 | break; | ||
| 475 | |||
| 476 | case OP_LHAU: | ||
| 477 | ra = get_ra(inst); | ||
| 478 | rt = get_rt(inst); | ||
| 479 | emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); | ||
| 480 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); | ||
| 481 | break; | ||
| 482 | |||
| 453 | case OP_STH: | 483 | case OP_STH: |
| 454 | rs = get_rs(inst); | 484 | rs = get_rs(inst); |
| 455 | emulated = kvmppc_handle_store(run, vcpu, | 485 | emulated = kvmppc_handle_store(run, vcpu, |
| @@ -472,7 +502,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
| 472 | 502 | ||
| 473 | if (emulated == EMULATE_FAIL) { | 503 | if (emulated == EMULATE_FAIL) { |
| 474 | emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance); | 504 | emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance); |
| 475 | if (emulated == EMULATE_FAIL) { | 505 | if (emulated == EMULATE_AGAIN) { |
| 506 | advance = 0; | ||
| 507 | } else if (emulated == EMULATE_FAIL) { | ||
| 476 | advance = 0; | 508 | advance = 0; |
| 477 | printk(KERN_ERR "Couldn't emulate instruction 0x%08x " | 509 | printk(KERN_ERR "Couldn't emulate instruction 0x%08x " |
| 478 | "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst)); | 510 | "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst)); |
| @@ -480,10 +512,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
| 480 | } | 512 | } |
| 481 | } | 513 | } |
| 482 | 514 | ||
| 483 | trace_kvm_ppc_instr(inst, vcpu->arch.pc, emulated); | 515 | trace_kvm_ppc_instr(inst, kvmppc_get_pc(vcpu), emulated); |
| 484 | 516 | ||
| 517 | /* Advance past emulated instruction. */ | ||
| 485 | if (advance) | 518 | if (advance) |
| 486 | vcpu->arch.pc += 4; /* Advance past emulated instruction. */ | 519 | kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4); |
| 487 | 520 | ||
| 488 | return emulated; | 521 | return emulated; |
| 489 | } | 522 | } |
diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S new file mode 100644 index 000000000000..2b340a3eee90 --- /dev/null +++ b/arch/powerpc/kvm/fpu.S | |||
| @@ -0,0 +1,273 @@ | |||
| 1 | /* | ||
| 2 | * FPU helper code to use FPU operations from inside the kernel | ||
| 3 | * | ||
| 4 | * Copyright (C) 2010 Alexander Graf (agraf@suse.de) | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public License | ||
| 8 | * as published by the Free Software Foundation; either version | ||
| 9 | * 2 of the License, or (at your option) any later version. | ||
| 10 | * | ||
| 11 | */ | ||
| 12 | |||
| 13 | #include <asm/reg.h> | ||
| 14 | #include <asm/page.h> | ||
| 15 | #include <asm/mmu.h> | ||
| 16 | #include <asm/pgtable.h> | ||
| 17 | #include <asm/cputable.h> | ||
| 18 | #include <asm/cache.h> | ||
| 19 | #include <asm/thread_info.h> | ||
| 20 | #include <asm/ppc_asm.h> | ||
| 21 | #include <asm/asm-offsets.h> | ||
| 22 | |||
| 23 | /* Instructions operating on single parameters */ | ||
| 24 | |||
| 25 | /* | ||
| 26 | * Single operation with one input operand | ||
| 27 | * | ||
| 28 | * R3 = (double*)&fpscr | ||
| 29 | * R4 = (short*)&result | ||
| 30 | * R5 = (short*)¶m1 | ||
| 31 | */ | ||
| 32 | #define FPS_ONE_IN(name) \ | ||
| 33 | _GLOBAL(fps_ ## name); \ | ||
| 34 | lfd 0,0(r3); /* load up fpscr value */ \ | ||
| 35 | MTFSF_L(0); \ | ||
| 36 | lfs 0,0(r5); \ | ||
| 37 | \ | ||
| 38 | name 0,0; \ | ||
| 39 | \ | ||
| 40 | stfs 0,0(r4); \ | ||
| 41 | mffs 0; \ | ||
| 42 | stfd 0,0(r3); /* save new fpscr value */ \ | ||
| 43 | blr | ||
| 44 | |||
| 45 | /* | ||
| 46 | * Single operation with two input operands | ||
| 47 | * | ||
| 48 | * R3 = (double*)&fpscr | ||
| 49 | * R4 = (short*)&result | ||
| 50 | * R5 = (short*)¶m1 | ||
| 51 | * R6 = (short*)¶m2 | ||
| 52 | */ | ||
| 53 | #define FPS_TWO_IN(name) \ | ||
| 54 | _GLOBAL(fps_ ## name); \ | ||
| 55 | lfd 0,0(r3); /* load up fpscr value */ \ | ||
| 56 | MTFSF_L(0); \ | ||
| 57 | lfs 0,0(r5); \ | ||
| 58 | lfs 1,0(r6); \ | ||
| 59 | \ | ||
| 60 | name 0,0,1; \ | ||
| 61 | \ | ||
| 62 | stfs 0,0(r4); \ | ||
| 63 | mffs 0; \ | ||
| 64 | stfd 0,0(r3); /* save new fpscr value */ \ | ||
| 65 | blr | ||
| 66 | |||
| 67 | /* | ||
| 68 | * Single operation with three input operands | ||
| 69 | * | ||
| 70 | * R3 = (double*)&fpscr | ||
| 71 | * R4 = (short*)&result | ||
| 72 | * R5 = (short*)¶m1 | ||
| 73 | * R6 = (short*)¶m2 | ||
| 74 | * R7 = (short*)¶m3 | ||
| 75 | */ | ||
| 76 | #define FPS_THREE_IN(name) \ | ||
| 77 | _GLOBAL(fps_ ## name); \ | ||
| 78 | lfd 0,0(r3); /* load up fpscr value */ \ | ||
| 79 | MTFSF_L(0); \ | ||
| 80 | lfs 0,0(r5); \ | ||
| 81 | lfs 1,0(r6); \ | ||
| 82 | lfs 2,0(r7); \ | ||
| 83 | \ | ||
| 84 | name 0,0,1,2; \ | ||
| 85 | \ | ||
| 86 | stfs 0,0(r4); \ | ||
| 87 | mffs 0; \ | ||
| 88 | stfd 0,0(r3); /* save new fpscr value */ \ | ||
| 89 | blr | ||
| 90 | |||
| 91 | FPS_ONE_IN(fres) | ||
| 92 | FPS_ONE_IN(frsqrte) | ||
| 93 | FPS_ONE_IN(fsqrts) | ||
| 94 | FPS_TWO_IN(fadds) | ||
| 95 | FPS_TWO_IN(fdivs) | ||
| 96 | FPS_TWO_IN(fmuls) | ||
| 97 | FPS_TWO_IN(fsubs) | ||
| 98 | FPS_THREE_IN(fmadds) | ||
| 99 | FPS_THREE_IN(fmsubs) | ||
| 100 | FPS_THREE_IN(fnmadds) | ||
| 101 | FPS_THREE_IN(fnmsubs) | ||
| 102 | FPS_THREE_IN(fsel) | ||
| 103 | |||
| 104 | |||
| 105 | /* Instructions operating on double parameters */ | ||
| 106 | |||
| 107 | /* | ||
| 108 | * Beginning of double instruction processing | ||
| 109 | * | ||
| 110 | * R3 = (double*)&fpscr | ||
| 111 | * R4 = (u32*)&cr | ||
| 112 | * R5 = (double*)&result | ||
| 113 | * R6 = (double*)¶m1 | ||
| 114 | * R7 = (double*)¶m2 [load_two] | ||
| 115 | * R8 = (double*)¶m3 [load_three] | ||
| 116 | * LR = instruction call function | ||
| 117 | */ | ||
| 118 | fpd_load_three: | ||
| 119 | lfd 2,0(r8) /* load param3 */ | ||
| 120 | fpd_load_two: | ||
| 121 | lfd 1,0(r7) /* load param2 */ | ||
| 122 | fpd_load_one: | ||
| 123 | lfd 0,0(r6) /* load param1 */ | ||
| 124 | fpd_load_none: | ||
| 125 | lfd 3,0(r3) /* load up fpscr value */ | ||
| 126 | MTFSF_L(3) | ||
| 127 | lwz r6, 0(r4) /* load cr */ | ||
| 128 | mtcr r6 | ||
| 129 | blr | ||
| 130 | |||
| 131 | /* | ||
| 132 | * End of double instruction processing | ||
| 133 | * | ||
| 134 | * R3 = (double*)&fpscr | ||
| 135 | * R4 = (u32*)&cr | ||
| 136 | * R5 = (double*)&result | ||
| 137 | * LR = caller of instruction call function | ||
| 138 | */ | ||
| 139 | fpd_return: | ||
| 140 | mfcr r6 | ||
| 141 | stfd 0,0(r5) /* save result */ | ||
| 142 | mffs 0 | ||
| 143 | stfd 0,0(r3) /* save new fpscr value */ | ||
| 144 | stw r6,0(r4) /* save new cr value */ | ||
| 145 | blr | ||
| 146 | |||
| 147 | /* | ||
| 148 | * Double operation with no input operand | ||
| 149 | * | ||
| 150 | * R3 = (double*)&fpscr | ||
| 151 | * R4 = (u32*)&cr | ||
| 152 | * R5 = (double*)&result | ||
| 153 | */ | ||
| 154 | #define FPD_NONE_IN(name) \ | ||
| 155 | _GLOBAL(fpd_ ## name); \ | ||
| 156 | mflr r12; \ | ||
| 157 | bl fpd_load_none; \ | ||
| 158 | mtlr r12; \ | ||
| 159 | \ | ||
| 160 | name. 0; /* call instruction */ \ | ||
| 161 | b fpd_return | ||
| 162 | |||
| 163 | /* | ||
| 164 | * Double operation with one input operand | ||
| 165 | * | ||
| 166 | * R3 = (double*)&fpscr | ||
| 167 | * R4 = (u32*)&cr | ||
| 168 | * R5 = (double*)&result | ||
| 169 | * R6 = (double*)¶m1 | ||
| 170 | */ | ||
| 171 | #define FPD_ONE_IN(name) \ | ||
| 172 | _GLOBAL(fpd_ ## name); \ | ||
| 173 | mflr r12; \ | ||
| 174 | bl fpd_load_one; \ | ||
| 175 | mtlr r12; \ | ||
| 176 | \ | ||
| 177 | name. 0,0; /* call instruction */ \ | ||
| 178 | b fpd_return | ||
| 179 | |||
| 180 | /* | ||
| 181 | * Double operation with two input operands | ||
| 182 | * | ||
| 183 | * R3 = (double*)&fpscr | ||
| 184 | * R4 = (u32*)&cr | ||
| 185 | * R5 = (double*)&result | ||
| 186 | * R6 = (double*)¶m1 | ||
| 187 | * R7 = (double*)¶m2 | ||
| 188 | * R8 = (double*)¶m3 | ||
| 189 | */ | ||
| 190 | #define FPD_TWO_IN(name) \ | ||
| 191 | _GLOBAL(fpd_ ## name); \ | ||
| 192 | mflr r12; \ | ||
| 193 | bl fpd_load_two; \ | ||
| 194 | mtlr r12; \ | ||
| 195 | \ | ||
| 196 | name. 0,0,1; /* call instruction */ \ | ||
| 197 | b fpd_return | ||
| 198 | |||
| 199 | /* | ||
| 200 | * CR Double operation with two input operands | ||
| 201 | * | ||
| 202 | * R3 = (double*)&fpscr | ||
| 203 | * R4 = (u32*)&cr | ||
| 204 | * R5 = (double*)¶m1 | ||
| 205 | * R6 = (double*)¶m2 | ||
| 206 | * R7 = (double*)¶m3 | ||
| 207 | */ | ||
| 208 | #define FPD_TWO_IN_CR(name) \ | ||
| 209 | _GLOBAL(fpd_ ## name); \ | ||
| 210 | lfd 1,0(r6); /* load param2 */ \ | ||
| 211 | lfd 0,0(r5); /* load param1 */ \ | ||
| 212 | lfd 3,0(r3); /* load up fpscr value */ \ | ||
| 213 | MTFSF_L(3); \ | ||
| 214 | lwz r6, 0(r4); /* load cr */ \ | ||
| 215 | mtcr r6; \ | ||
| 216 | \ | ||
| 217 | name 0,0,1; /* call instruction */ \ | ||
| 218 | mfcr r6; \ | ||
| 219 | mffs 0; \ | ||
| 220 | stfd 0,0(r3); /* save new fpscr value */ \ | ||
| 221 | stw r6,0(r4); /* save new cr value */ \ | ||
| 222 | blr | ||
| 223 | |||
| 224 | /* | ||
| 225 | * Double operation with three input operands | ||
| 226 | * | ||
| 227 | * R3 = (double*)&fpscr | ||
| 228 | * R4 = (u32*)&cr | ||
| 229 | * R5 = (double*)&result | ||
| 230 | * R6 = (double*)¶m1 | ||
| 231 | * R7 = (double*)¶m2 | ||
| 232 | * R8 = (double*)¶m3 | ||
| 233 | */ | ||
| 234 | #define FPD_THREE_IN(name) \ | ||
| 235 | _GLOBAL(fpd_ ## name); \ | ||
| 236 | mflr r12; \ | ||
| 237 | bl fpd_load_three; \ | ||
| 238 | mtlr r12; \ | ||
| 239 | \ | ||
| 240 | name. 0,0,1,2; /* call instruction */ \ | ||
| 241 | b fpd_return | ||
| 242 | |||
| 243 | FPD_ONE_IN(fsqrts) | ||
| 244 | FPD_ONE_IN(frsqrtes) | ||
| 245 | FPD_ONE_IN(fres) | ||
| 246 | FPD_ONE_IN(frsp) | ||
| 247 | FPD_ONE_IN(fctiw) | ||
| 248 | FPD_ONE_IN(fctiwz) | ||
| 249 | FPD_ONE_IN(fsqrt) | ||
| 250 | FPD_ONE_IN(fre) | ||
| 251 | FPD_ONE_IN(frsqrte) | ||
| 252 | FPD_ONE_IN(fneg) | ||
| 253 | FPD_ONE_IN(fabs) | ||
| 254 | FPD_TWO_IN(fadds) | ||
| 255 | FPD_TWO_IN(fsubs) | ||
| 256 | FPD_TWO_IN(fdivs) | ||
| 257 | FPD_TWO_IN(fmuls) | ||
| 258 | FPD_TWO_IN_CR(fcmpu) | ||
| 259 | FPD_TWO_IN(fcpsgn) | ||
| 260 | FPD_TWO_IN(fdiv) | ||
| 261 | FPD_TWO_IN(fadd) | ||
| 262 | FPD_TWO_IN(fmul) | ||
| 263 | FPD_TWO_IN_CR(fcmpo) | ||
| 264 | FPD_TWO_IN(fsub) | ||
| 265 | FPD_THREE_IN(fmsubs) | ||
| 266 | FPD_THREE_IN(fmadds) | ||
| 267 | FPD_THREE_IN(fnmsubs) | ||
| 268 | FPD_THREE_IN(fnmadds) | ||
| 269 | FPD_THREE_IN(fsel) | ||
| 270 | FPD_THREE_IN(fmsub) | ||
| 271 | FPD_THREE_IN(fmadd) | ||
| 272 | FPD_THREE_IN(fnmsub) | ||
| 273 | FPD_THREE_IN(fnmadd) | ||
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 297fcd2ff7d0..9b8683f39e05 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
| @@ -70,7 +70,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
| 70 | case EMULATE_FAIL: | 70 | case EMULATE_FAIL: |
| 71 | /* XXX Deliver Program interrupt to guest. */ | 71 | /* XXX Deliver Program interrupt to guest. */ |
| 72 | printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__, | 72 | printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__, |
| 73 | vcpu->arch.last_inst); | 73 | kvmppc_get_last_inst(vcpu)); |
| 74 | r = RESUME_HOST; | 74 | r = RESUME_HOST; |
| 75 | break; | 75 | break; |
| 76 | default: | 76 | default: |
| @@ -148,6 +148,10 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
| 148 | 148 | ||
| 149 | switch (ext) { | 149 | switch (ext) { |
| 150 | case KVM_CAP_PPC_SEGSTATE: | 150 | case KVM_CAP_PPC_SEGSTATE: |
| 151 | case KVM_CAP_PPC_PAIRED_SINGLES: | ||
| 152 | case KVM_CAP_PPC_UNSET_IRQ: | ||
| 153 | case KVM_CAP_ENABLE_CAP: | ||
| 154 | case KVM_CAP_PPC_OSI: | ||
| 151 | r = 1; | 155 | r = 1; |
| 152 | break; | 156 | break; |
| 153 | case KVM_CAP_COALESCED_MMIO: | 157 | case KVM_CAP_COALESCED_MMIO: |
| @@ -193,12 +197,17 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) | |||
| 193 | { | 197 | { |
| 194 | struct kvm_vcpu *vcpu; | 198 | struct kvm_vcpu *vcpu; |
| 195 | vcpu = kvmppc_core_vcpu_create(kvm, id); | 199 | vcpu = kvmppc_core_vcpu_create(kvm, id); |
| 196 | kvmppc_create_vcpu_debugfs(vcpu, id); | 200 | if (!IS_ERR(vcpu)) |
| 201 | kvmppc_create_vcpu_debugfs(vcpu, id); | ||
| 197 | return vcpu; | 202 | return vcpu; |
| 198 | } | 203 | } |
| 199 | 204 | ||
| 200 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | 205 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) |
| 201 | { | 206 | { |
| 207 | /* Make sure we're not using the vcpu anymore */ | ||
| 208 | hrtimer_cancel(&vcpu->arch.dec_timer); | ||
| 209 | tasklet_kill(&vcpu->arch.tasklet); | ||
| 210 | |||
| 202 | kvmppc_remove_vcpu_debugfs(vcpu); | 211 | kvmppc_remove_vcpu_debugfs(vcpu); |
| 203 | kvmppc_core_vcpu_free(vcpu); | 212 | kvmppc_core_vcpu_free(vcpu); |
| 204 | } | 213 | } |
| @@ -278,7 +287,7 @@ static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, | |||
| 278 | static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, | 287 | static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, |
| 279 | struct kvm_run *run) | 288 | struct kvm_run *run) |
| 280 | { | 289 | { |
| 281 | ulong gpr; | 290 | u64 gpr; |
| 282 | 291 | ||
| 283 | if (run->mmio.len > sizeof(gpr)) { | 292 | if (run->mmio.len > sizeof(gpr)) { |
| 284 | printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); | 293 | printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); |
| @@ -287,6 +296,7 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, | |||
| 287 | 296 | ||
| 288 | if (vcpu->arch.mmio_is_bigendian) { | 297 | if (vcpu->arch.mmio_is_bigendian) { |
| 289 | switch (run->mmio.len) { | 298 | switch (run->mmio.len) { |
| 299 | case 8: gpr = *(u64 *)run->mmio.data; break; | ||
| 290 | case 4: gpr = *(u32 *)run->mmio.data; break; | 300 | case 4: gpr = *(u32 *)run->mmio.data; break; |
| 291 | case 2: gpr = *(u16 *)run->mmio.data; break; | 301 | case 2: gpr = *(u16 *)run->mmio.data; break; |
| 292 | case 1: gpr = *(u8 *)run->mmio.data; break; | 302 | case 1: gpr = *(u8 *)run->mmio.data; break; |
| @@ -300,7 +310,43 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, | |||
| 300 | } | 310 | } |
| 301 | } | 311 | } |
| 302 | 312 | ||
| 313 | if (vcpu->arch.mmio_sign_extend) { | ||
| 314 | switch (run->mmio.len) { | ||
| 315 | #ifdef CONFIG_PPC64 | ||
| 316 | case 4: | ||
| 317 | gpr = (s64)(s32)gpr; | ||
| 318 | break; | ||
| 319 | #endif | ||
| 320 | case 2: | ||
| 321 | gpr = (s64)(s16)gpr; | ||
| 322 | break; | ||
| 323 | case 1: | ||
| 324 | gpr = (s64)(s8)gpr; | ||
| 325 | break; | ||
| 326 | } | ||
| 327 | } | ||
| 328 | |||
| 303 | kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); | 329 | kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); |
| 330 | |||
| 331 | switch (vcpu->arch.io_gpr & KVM_REG_EXT_MASK) { | ||
| 332 | case KVM_REG_GPR: | ||
| 333 | kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); | ||
| 334 | break; | ||
| 335 | case KVM_REG_FPR: | ||
| 336 | vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; | ||
| 337 | break; | ||
| 338 | #ifdef CONFIG_PPC_BOOK3S | ||
| 339 | case KVM_REG_QPR: | ||
| 340 | vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; | ||
| 341 | break; | ||
| 342 | case KVM_REG_FQPR: | ||
| 343 | vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; | ||
| 344 | vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; | ||
| 345 | break; | ||
| 346 | #endif | ||
| 347 | default: | ||
| 348 | BUG(); | ||
| 349 | } | ||
| 304 | } | 350 | } |
| 305 | 351 | ||
| 306 | int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | 352 | int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, |
| @@ -319,12 +365,25 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 319 | vcpu->arch.mmio_is_bigendian = is_bigendian; | 365 | vcpu->arch.mmio_is_bigendian = is_bigendian; |
| 320 | vcpu->mmio_needed = 1; | 366 | vcpu->mmio_needed = 1; |
| 321 | vcpu->mmio_is_write = 0; | 367 | vcpu->mmio_is_write = 0; |
| 368 | vcpu->arch.mmio_sign_extend = 0; | ||
| 322 | 369 | ||
| 323 | return EMULATE_DO_MMIO; | 370 | return EMULATE_DO_MMIO; |
| 324 | } | 371 | } |
| 325 | 372 | ||
| 373 | /* Same as above, but sign extends */ | ||
| 374 | int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
| 375 | unsigned int rt, unsigned int bytes, int is_bigendian) | ||
| 376 | { | ||
| 377 | int r; | ||
| 378 | |||
| 379 | r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian); | ||
| 380 | vcpu->arch.mmio_sign_extend = 1; | ||
| 381 | |||
| 382 | return r; | ||
| 383 | } | ||
| 384 | |||
| 326 | int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | 385 | int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, |
| 327 | u32 val, unsigned int bytes, int is_bigendian) | 386 | u64 val, unsigned int bytes, int is_bigendian) |
| 328 | { | 387 | { |
| 329 | void *data = run->mmio.data; | 388 | void *data = run->mmio.data; |
| 330 | 389 | ||
| @@ -342,6 +401,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 342 | /* Store the value at the lowest bytes in 'data'. */ | 401 | /* Store the value at the lowest bytes in 'data'. */ |
| 343 | if (is_bigendian) { | 402 | if (is_bigendian) { |
| 344 | switch (bytes) { | 403 | switch (bytes) { |
| 404 | case 8: *(u64 *)data = val; break; | ||
| 345 | case 4: *(u32 *)data = val; break; | 405 | case 4: *(u32 *)data = val; break; |
| 346 | case 2: *(u16 *)data = val; break; | 406 | case 2: *(u16 *)data = val; break; |
| 347 | case 1: *(u8 *)data = val; break; | 407 | case 1: *(u8 *)data = val; break; |
| @@ -376,6 +436,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
| 376 | if (!vcpu->arch.dcr_is_write) | 436 | if (!vcpu->arch.dcr_is_write) |
| 377 | kvmppc_complete_dcr_load(vcpu, run); | 437 | kvmppc_complete_dcr_load(vcpu, run); |
| 378 | vcpu->arch.dcr_needed = 0; | 438 | vcpu->arch.dcr_needed = 0; |
| 439 | } else if (vcpu->arch.osi_needed) { | ||
| 440 | u64 *gprs = run->osi.gprs; | ||
| 441 | int i; | ||
| 442 | |||
| 443 | for (i = 0; i < 32; i++) | ||
| 444 | kvmppc_set_gpr(vcpu, i, gprs[i]); | ||
| 445 | vcpu->arch.osi_needed = 0; | ||
| 379 | } | 446 | } |
| 380 | 447 | ||
| 381 | kvmppc_core_deliver_interrupts(vcpu); | 448 | kvmppc_core_deliver_interrupts(vcpu); |
| @@ -396,7 +463,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
| 396 | 463 | ||
| 397 | int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) | 464 | int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) |
| 398 | { | 465 | { |
| 399 | kvmppc_core_queue_external(vcpu, irq); | 466 | if (irq->irq == KVM_INTERRUPT_UNSET) |
| 467 | kvmppc_core_dequeue_external(vcpu, irq); | ||
| 468 | else | ||
| 469 | kvmppc_core_queue_external(vcpu, irq); | ||
| 400 | 470 | ||
| 401 | if (waitqueue_active(&vcpu->wq)) { | 471 | if (waitqueue_active(&vcpu->wq)) { |
| 402 | wake_up_interruptible(&vcpu->wq); | 472 | wake_up_interruptible(&vcpu->wq); |
| @@ -406,6 +476,27 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) | |||
| 406 | return 0; | 476 | return 0; |
| 407 | } | 477 | } |
| 408 | 478 | ||
| 479 | static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, | ||
| 480 | struct kvm_enable_cap *cap) | ||
| 481 | { | ||
| 482 | int r; | ||
| 483 | |||
| 484 | if (cap->flags) | ||
| 485 | return -EINVAL; | ||
| 486 | |||
| 487 | switch (cap->cap) { | ||
| 488 | case KVM_CAP_PPC_OSI: | ||
| 489 | r = 0; | ||
| 490 | vcpu->arch.osi_enabled = true; | ||
| 491 | break; | ||
| 492 | default: | ||
| 493 | r = -EINVAL; | ||
| 494 | break; | ||
| 495 | } | ||
| 496 | |||
| 497 | return r; | ||
| 498 | } | ||
| 499 | |||
| 409 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | 500 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, |
| 410 | struct kvm_mp_state *mp_state) | 501 | struct kvm_mp_state *mp_state) |
| 411 | { | 502 | { |
| @@ -434,6 +525,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
| 434 | r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); | 525 | r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); |
| 435 | break; | 526 | break; |
| 436 | } | 527 | } |
| 528 | case KVM_ENABLE_CAP: | ||
| 529 | { | ||
| 530 | struct kvm_enable_cap cap; | ||
| 531 | r = -EFAULT; | ||
| 532 | if (copy_from_user(&cap, argp, sizeof(cap))) | ||
| 533 | goto out; | ||
| 534 | r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); | ||
| 535 | break; | ||
| 536 | } | ||
| 437 | default: | 537 | default: |
| 438 | r = -EINVAL; | 538 | r = -EINVAL; |
| 439 | } | 539 | } |
diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/mmu_context_hash32.c index 0dfba2bf7f31..d0ee554e86e4 100644 --- a/arch/powerpc/mm/mmu_context_hash32.c +++ b/arch/powerpc/mm/mmu_context_hash32.c | |||
| @@ -60,11 +60,7 @@ | |||
| 60 | static unsigned long next_mmu_context; | 60 | static unsigned long next_mmu_context; |
| 61 | static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; | 61 | static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; |
| 62 | 62 | ||
| 63 | 63 | unsigned long __init_new_context(void) | |
| 64 | /* | ||
| 65 | * Set up the context for a new address space. | ||
| 66 | */ | ||
| 67 | int init_new_context(struct task_struct *t, struct mm_struct *mm) | ||
| 68 | { | 64 | { |
| 69 | unsigned long ctx = next_mmu_context; | 65 | unsigned long ctx = next_mmu_context; |
| 70 | 66 | ||
| @@ -74,19 +70,38 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm) | |||
| 74 | ctx = 0; | 70 | ctx = 0; |
| 75 | } | 71 | } |
| 76 | next_mmu_context = (ctx + 1) & LAST_CONTEXT; | 72 | next_mmu_context = (ctx + 1) & LAST_CONTEXT; |
| 77 | mm->context.id = ctx; | 73 | |
| 74 | return ctx; | ||
| 75 | } | ||
| 76 | EXPORT_SYMBOL_GPL(__init_new_context); | ||
| 77 | |||
| 78 | /* | ||
| 79 | * Set up the context for a new address space. | ||
| 80 | */ | ||
| 81 | int init_new_context(struct task_struct *t, struct mm_struct *mm) | ||
| 82 | { | ||
| 83 | mm->context.id = __init_new_context(); | ||
| 78 | 84 | ||
| 79 | return 0; | 85 | return 0; |
| 80 | } | 86 | } |
| 81 | 87 | ||
| 82 | /* | 88 | /* |
| 89 | * Free a context ID. Make sure to call this with preempt disabled! | ||
| 90 | */ | ||
| 91 | void __destroy_context(unsigned long ctx) | ||
| 92 | { | ||
| 93 | clear_bit(ctx, context_map); | ||
| 94 | } | ||
| 95 | EXPORT_SYMBOL_GPL(__destroy_context); | ||
| 96 | |||
| 97 | /* | ||
| 83 | * We're finished using the context for an address space. | 98 | * We're finished using the context for an address space. |
| 84 | */ | 99 | */ |
| 85 | void destroy_context(struct mm_struct *mm) | 100 | void destroy_context(struct mm_struct *mm) |
| 86 | { | 101 | { |
| 87 | preempt_disable(); | 102 | preempt_disable(); |
| 88 | if (mm->context.id != NO_CONTEXT) { | 103 | if (mm->context.id != NO_CONTEXT) { |
| 89 | clear_bit(mm->context.id, context_map); | 104 | __destroy_context(mm->context.id); |
| 90 | mm->context.id = NO_CONTEXT; | 105 | mm->context.id = NO_CONTEXT; |
| 91 | } | 106 | } |
| 92 | preempt_enable(); | 107 | preempt_enable(); |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 49292869a5cd..8093e6f47f49 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
| @@ -341,11 +341,13 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, | |||
| 341 | 341 | ||
| 342 | rc = kvm_vcpu_init(vcpu, kvm, id); | 342 | rc = kvm_vcpu_init(vcpu, kvm, id); |
| 343 | if (rc) | 343 | if (rc) |
| 344 | goto out_free_cpu; | 344 | goto out_free_sie_block; |
| 345 | VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, | 345 | VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, |
| 346 | vcpu->arch.sie_block); | 346 | vcpu->arch.sie_block); |
| 347 | 347 | ||
| 348 | return vcpu; | 348 | return vcpu; |
| 349 | out_free_sie_block: | ||
| 350 | free_page((unsigned long)(vcpu->arch.sie_block)); | ||
| 349 | out_free_cpu: | 351 | out_free_cpu: |
| 350 | kfree(vcpu); | 352 | kfree(vcpu); |
| 351 | out_nomem: | 353 | out_nomem: |
| @@ -750,7 +752,7 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | |||
| 750 | static int __init kvm_s390_init(void) | 752 | static int __init kvm_s390_init(void) |
| 751 | { | 753 | { |
| 752 | int ret; | 754 | int ret; |
| 753 | ret = kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE); | 755 | ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); |
| 754 | if (ret) | 756 | if (ret) |
| 755 | return ret; | 757 | return ret; |
| 756 | 758 | ||
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 60f09ab3672c..cfa9d1777457 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
| @@ -72,7 +72,7 @@ static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu) | |||
| 72 | struct kvm_memslots *memslots; | 72 | struct kvm_memslots *memslots; |
| 73 | 73 | ||
| 74 | idx = srcu_read_lock(&vcpu->kvm->srcu); | 74 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
| 75 | memslots = rcu_dereference(vcpu->kvm->memslots); | 75 | memslots = kvm_memslots(vcpu->kvm); |
| 76 | 76 | ||
| 77 | mem = &memslots->memslots[0]; | 77 | mem = &memslots->memslots[0]; |
| 78 | 78 | ||
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index f46b79f6c16c..ff90055c7f0b 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #define __KVM_HAVE_PIT_STATE2 | 21 | #define __KVM_HAVE_PIT_STATE2 |
| 22 | #define __KVM_HAVE_XEN_HVM | 22 | #define __KVM_HAVE_XEN_HVM |
| 23 | #define __KVM_HAVE_VCPU_EVENTS | 23 | #define __KVM_HAVE_VCPU_EVENTS |
| 24 | #define __KVM_HAVE_DEBUGREGS | ||
| 24 | 25 | ||
| 25 | /* Architectural interrupt line count. */ | 26 | /* Architectural interrupt line count. */ |
| 26 | #define KVM_NR_INTERRUPTS 256 | 27 | #define KVM_NR_INTERRUPTS 256 |
| @@ -257,6 +258,11 @@ struct kvm_reinject_control { | |||
| 257 | /* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */ | 258 | /* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */ |
| 258 | #define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 | 259 | #define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 |
| 259 | #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 | 260 | #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 |
| 261 | #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 | ||
| 262 | |||
| 263 | /* Interrupt shadow states */ | ||
| 264 | #define KVM_X86_SHADOW_INT_MOV_SS 0x01 | ||
| 265 | #define KVM_X86_SHADOW_INT_STI 0x02 | ||
| 260 | 266 | ||
| 261 | /* for KVM_GET/SET_VCPU_EVENTS */ | 267 | /* for KVM_GET/SET_VCPU_EVENTS */ |
| 262 | struct kvm_vcpu_events { | 268 | struct kvm_vcpu_events { |
| @@ -271,7 +277,7 @@ struct kvm_vcpu_events { | |||
| 271 | __u8 injected; | 277 | __u8 injected; |
| 272 | __u8 nr; | 278 | __u8 nr; |
| 273 | __u8 soft; | 279 | __u8 soft; |
| 274 | __u8 pad; | 280 | __u8 shadow; |
| 275 | } interrupt; | 281 | } interrupt; |
| 276 | struct { | 282 | struct { |
| 277 | __u8 injected; | 283 | __u8 injected; |
| @@ -284,4 +290,13 @@ struct kvm_vcpu_events { | |||
| 284 | __u32 reserved[10]; | 290 | __u32 reserved[10]; |
| 285 | }; | 291 | }; |
| 286 | 292 | ||
| 293 | /* for KVM_GET/SET_DEBUGREGS */ | ||
| 294 | struct kvm_debugregs { | ||
| 295 | __u64 db[4]; | ||
| 296 | __u64 dr6; | ||
| 297 | __u64 dr7; | ||
| 298 | __u64 flags; | ||
| 299 | __u64 reserved[9]; | ||
| 300 | }; | ||
| 301 | |||
| 287 | #endif /* _ASM_X86_KVM_H */ | 302 | #endif /* _ASM_X86_KVM_H */ |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 7a6f54fa13ba..0b2729bf2070 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
| @@ -11,6 +11,8 @@ | |||
| 11 | #ifndef _ASM_X86_KVM_X86_EMULATE_H | 11 | #ifndef _ASM_X86_KVM_X86_EMULATE_H |
| 12 | #define _ASM_X86_KVM_X86_EMULATE_H | 12 | #define _ASM_X86_KVM_X86_EMULATE_H |
| 13 | 13 | ||
| 14 | #include <asm/desc_defs.h> | ||
| 15 | |||
| 14 | struct x86_emulate_ctxt; | 16 | struct x86_emulate_ctxt; |
| 15 | 17 | ||
| 16 | /* | 18 | /* |
| @@ -63,6 +65,15 @@ struct x86_emulate_ops { | |||
| 63 | unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); | 65 | unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); |
| 64 | 66 | ||
| 65 | /* | 67 | /* |
| 68 | * write_std: Write bytes of standard (non-emulated/special) memory. | ||
| 69 | * Used for descriptor writing. | ||
| 70 | * @addr: [IN ] Linear address to which to write. | ||
| 71 | * @val: [OUT] Value write to memory, zero-extended to 'u_long'. | ||
| 72 | * @bytes: [IN ] Number of bytes to write to memory. | ||
| 73 | */ | ||
| 74 | int (*write_std)(unsigned long addr, void *val, | ||
| 75 | unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); | ||
| 76 | /* | ||
| 66 | * fetch: Read bytes of standard (non-emulated/special) memory. | 77 | * fetch: Read bytes of standard (non-emulated/special) memory. |
| 67 | * Used for instruction fetch. | 78 | * Used for instruction fetch. |
| 68 | * @addr: [IN ] Linear address from which to read. | 79 | * @addr: [IN ] Linear address from which to read. |
| @@ -109,6 +120,23 @@ struct x86_emulate_ops { | |||
| 109 | unsigned int bytes, | 120 | unsigned int bytes, |
| 110 | struct kvm_vcpu *vcpu); | 121 | struct kvm_vcpu *vcpu); |
| 111 | 122 | ||
| 123 | int (*pio_in_emulated)(int size, unsigned short port, void *val, | ||
| 124 | unsigned int count, struct kvm_vcpu *vcpu); | ||
| 125 | |||
| 126 | int (*pio_out_emulated)(int size, unsigned short port, const void *val, | ||
| 127 | unsigned int count, struct kvm_vcpu *vcpu); | ||
| 128 | |||
| 129 | bool (*get_cached_descriptor)(struct desc_struct *desc, | ||
| 130 | int seg, struct kvm_vcpu *vcpu); | ||
| 131 | void (*set_cached_descriptor)(struct desc_struct *desc, | ||
| 132 | int seg, struct kvm_vcpu *vcpu); | ||
| 133 | u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); | ||
| 134 | void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); | ||
| 135 | void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); | ||
| 136 | ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); | ||
| 137 | void (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); | ||
| 138 | int (*cpl)(struct kvm_vcpu *vcpu); | ||
| 139 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); | ||
| 112 | }; | 140 | }; |
| 113 | 141 | ||
| 114 | /* Type, address-of, and value of an instruction's operand. */ | 142 | /* Type, address-of, and value of an instruction's operand. */ |
| @@ -124,6 +152,12 @@ struct fetch_cache { | |||
| 124 | unsigned long end; | 152 | unsigned long end; |
| 125 | }; | 153 | }; |
| 126 | 154 | ||
| 155 | struct read_cache { | ||
| 156 | u8 data[1024]; | ||
| 157 | unsigned long pos; | ||
| 158 | unsigned long end; | ||
| 159 | }; | ||
| 160 | |||
| 127 | struct decode_cache { | 161 | struct decode_cache { |
| 128 | u8 twobyte; | 162 | u8 twobyte; |
| 129 | u8 b; | 163 | u8 b; |
| @@ -139,7 +173,7 @@ struct decode_cache { | |||
| 139 | u8 seg_override; | 173 | u8 seg_override; |
| 140 | unsigned int d; | 174 | unsigned int d; |
| 141 | unsigned long regs[NR_VCPU_REGS]; | 175 | unsigned long regs[NR_VCPU_REGS]; |
| 142 | unsigned long eip, eip_orig; | 176 | unsigned long eip; |
| 143 | /* modrm */ | 177 | /* modrm */ |
| 144 | u8 modrm; | 178 | u8 modrm; |
| 145 | u8 modrm_mod; | 179 | u8 modrm_mod; |
| @@ -151,16 +185,15 @@ struct decode_cache { | |||
| 151 | void *modrm_ptr; | 185 | void *modrm_ptr; |
| 152 | unsigned long modrm_val; | 186 | unsigned long modrm_val; |
| 153 | struct fetch_cache fetch; | 187 | struct fetch_cache fetch; |
| 188 | struct read_cache io_read; | ||
| 154 | }; | 189 | }; |
| 155 | 190 | ||
| 156 | #define X86_SHADOW_INT_MOV_SS 1 | ||
| 157 | #define X86_SHADOW_INT_STI 2 | ||
| 158 | |||
| 159 | struct x86_emulate_ctxt { | 191 | struct x86_emulate_ctxt { |
| 160 | /* Register state before/after emulation. */ | 192 | /* Register state before/after emulation. */ |
| 161 | struct kvm_vcpu *vcpu; | 193 | struct kvm_vcpu *vcpu; |
| 162 | 194 | ||
| 163 | unsigned long eflags; | 195 | unsigned long eflags; |
| 196 | unsigned long eip; /* eip before instruction emulation */ | ||
| 164 | /* Emulated execution mode, represented by an X86EMUL_MODE value. */ | 197 | /* Emulated execution mode, represented by an X86EMUL_MODE value. */ |
| 165 | int mode; | 198 | int mode; |
| 166 | u32 cs_base; | 199 | u32 cs_base; |
| @@ -168,6 +201,7 @@ struct x86_emulate_ctxt { | |||
| 168 | /* interruptibility state, as a result of execution of STI or MOV SS */ | 201 | /* interruptibility state, as a result of execution of STI or MOV SS */ |
| 169 | int interruptibility; | 202 | int interruptibility; |
| 170 | 203 | ||
| 204 | bool restart; /* restart string instruction after writeback */ | ||
| 171 | /* decode cache */ | 205 | /* decode cache */ |
| 172 | struct decode_cache decode; | 206 | struct decode_cache decode; |
| 173 | }; | 207 | }; |
| @@ -194,5 +228,9 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, | |||
| 194 | struct x86_emulate_ops *ops); | 228 | struct x86_emulate_ops *ops); |
| 195 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, | 229 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, |
| 196 | struct x86_emulate_ops *ops); | 230 | struct x86_emulate_ops *ops); |
| 231 | int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | ||
| 232 | struct x86_emulate_ops *ops, | ||
| 233 | u16 tss_selector, int reason, | ||
| 234 | bool has_error_code, u32 error_code); | ||
| 197 | 235 | ||
| 198 | #endif /* _ASM_X86_KVM_X86_EMULATE_H */ | 236 | #endif /* _ASM_X86_KVM_X86_EMULATE_H */ |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 06d9e79ca37d..76f5483cffec 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
| @@ -171,15 +171,15 @@ struct kvm_pte_chain { | |||
| 171 | union kvm_mmu_page_role { | 171 | union kvm_mmu_page_role { |
| 172 | unsigned word; | 172 | unsigned word; |
| 173 | struct { | 173 | struct { |
| 174 | unsigned glevels:4; | ||
| 175 | unsigned level:4; | 174 | unsigned level:4; |
| 175 | unsigned cr4_pae:1; | ||
| 176 | unsigned quadrant:2; | 176 | unsigned quadrant:2; |
| 177 | unsigned pad_for_nice_hex_output:6; | 177 | unsigned pad_for_nice_hex_output:6; |
| 178 | unsigned direct:1; | 178 | unsigned direct:1; |
| 179 | unsigned access:3; | 179 | unsigned access:3; |
| 180 | unsigned invalid:1; | 180 | unsigned invalid:1; |
| 181 | unsigned cr4_pge:1; | ||
| 182 | unsigned nxe:1; | 181 | unsigned nxe:1; |
| 182 | unsigned cr0_wp:1; | ||
| 183 | }; | 183 | }; |
| 184 | }; | 184 | }; |
| 185 | 185 | ||
| @@ -187,8 +187,6 @@ struct kvm_mmu_page { | |||
| 187 | struct list_head link; | 187 | struct list_head link; |
| 188 | struct hlist_node hash_link; | 188 | struct hlist_node hash_link; |
| 189 | 189 | ||
| 190 | struct list_head oos_link; | ||
| 191 | |||
| 192 | /* | 190 | /* |
| 193 | * The following two entries are used to key the shadow page in the | 191 | * The following two entries are used to key the shadow page in the |
| 194 | * hash table. | 192 | * hash table. |
| @@ -204,9 +202,9 @@ struct kvm_mmu_page { | |||
| 204 | * in this shadow page. | 202 | * in this shadow page. |
| 205 | */ | 203 | */ |
| 206 | DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | 204 | DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); |
| 207 | int multimapped; /* More than one parent_pte? */ | 205 | bool multimapped; /* More than one parent_pte? */ |
| 208 | int root_count; /* Currently serving as active root */ | ||
| 209 | bool unsync; | 206 | bool unsync; |
| 207 | int root_count; /* Currently serving as active root */ | ||
| 210 | unsigned int unsync_children; | 208 | unsigned int unsync_children; |
| 211 | union { | 209 | union { |
| 212 | u64 *parent_pte; /* !multimapped */ | 210 | u64 *parent_pte; /* !multimapped */ |
| @@ -224,14 +222,9 @@ struct kvm_pv_mmu_op_buffer { | |||
| 224 | 222 | ||
| 225 | struct kvm_pio_request { | 223 | struct kvm_pio_request { |
| 226 | unsigned long count; | 224 | unsigned long count; |
| 227 | int cur_count; | ||
| 228 | gva_t guest_gva; | ||
| 229 | int in; | 225 | int in; |
| 230 | int port; | 226 | int port; |
| 231 | int size; | 227 | int size; |
| 232 | int string; | ||
| 233 | int down; | ||
| 234 | int rep; | ||
| 235 | }; | 228 | }; |
| 236 | 229 | ||
| 237 | /* | 230 | /* |
| @@ -320,6 +313,7 @@ struct kvm_vcpu_arch { | |||
| 320 | struct kvm_queued_exception { | 313 | struct kvm_queued_exception { |
| 321 | bool pending; | 314 | bool pending; |
| 322 | bool has_error_code; | 315 | bool has_error_code; |
| 316 | bool reinject; | ||
| 323 | u8 nr; | 317 | u8 nr; |
| 324 | u32 error_code; | 318 | u32 error_code; |
| 325 | } exception; | 319 | } exception; |
| @@ -362,8 +356,8 @@ struct kvm_vcpu_arch { | |||
| 362 | u64 *mce_banks; | 356 | u64 *mce_banks; |
| 363 | 357 | ||
| 364 | /* used for guest single stepping over the given code position */ | 358 | /* used for guest single stepping over the given code position */ |
| 365 | u16 singlestep_cs; | ||
| 366 | unsigned long singlestep_rip; | 359 | unsigned long singlestep_rip; |
| 360 | |||
| 367 | /* fields used by HYPER-V emulation */ | 361 | /* fields used by HYPER-V emulation */ |
| 368 | u64 hv_vapic; | 362 | u64 hv_vapic; |
| 369 | }; | 363 | }; |
| @@ -389,6 +383,7 @@ struct kvm_arch { | |||
| 389 | unsigned int n_free_mmu_pages; | 383 | unsigned int n_free_mmu_pages; |
| 390 | unsigned int n_requested_mmu_pages; | 384 | unsigned int n_requested_mmu_pages; |
| 391 | unsigned int n_alloc_mmu_pages; | 385 | unsigned int n_alloc_mmu_pages; |
| 386 | atomic_t invlpg_counter; | ||
| 392 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; | 387 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; |
| 393 | /* | 388 | /* |
| 394 | * Hash table of struct kvm_mmu_page. | 389 | * Hash table of struct kvm_mmu_page. |
| @@ -461,11 +456,6 @@ struct kvm_vcpu_stat { | |||
| 461 | u32 nmi_injections; | 456 | u32 nmi_injections; |
| 462 | }; | 457 | }; |
| 463 | 458 | ||
| 464 | struct descriptor_table { | ||
| 465 | u16 limit; | ||
| 466 | unsigned long base; | ||
| 467 | } __attribute__((packed)); | ||
| 468 | |||
| 469 | struct kvm_x86_ops { | 459 | struct kvm_x86_ops { |
| 470 | int (*cpu_has_kvm_support)(void); /* __init */ | 460 | int (*cpu_has_kvm_support)(void); /* __init */ |
| 471 | int (*disabled_by_bios)(void); /* __init */ | 461 | int (*disabled_by_bios)(void); /* __init */ |
| @@ -503,12 +493,11 @@ struct kvm_x86_ops { | |||
| 503 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); | 493 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); |
| 504 | void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); | 494 | void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); |
| 505 | void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); | 495 | void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); |
| 506 | void (*get_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 496 | void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
| 507 | void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 497 | void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
| 508 | void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 498 | void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
| 509 | void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 499 | void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
| 510 | int (*get_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long *dest); | 500 | void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); |
| 511 | int (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value); | ||
| 512 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); | 501 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); |
| 513 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); | 502 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); |
| 514 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); | 503 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); |
| @@ -527,7 +516,8 @@ struct kvm_x86_ops { | |||
| 527 | void (*set_irq)(struct kvm_vcpu *vcpu); | 516 | void (*set_irq)(struct kvm_vcpu *vcpu); |
| 528 | void (*set_nmi)(struct kvm_vcpu *vcpu); | 517 | void (*set_nmi)(struct kvm_vcpu *vcpu); |
| 529 | void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, | 518 | void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, |
| 530 | bool has_error_code, u32 error_code); | 519 | bool has_error_code, u32 error_code, |
| 520 | bool reinject); | ||
| 531 | int (*interrupt_allowed)(struct kvm_vcpu *vcpu); | 521 | int (*interrupt_allowed)(struct kvm_vcpu *vcpu); |
| 532 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); | 522 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); |
| 533 | bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); | 523 | bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); |
| @@ -541,6 +531,8 @@ struct kvm_x86_ops { | |||
| 541 | int (*get_lpage_level)(void); | 531 | int (*get_lpage_level)(void); |
| 542 | bool (*rdtscp_supported)(void); | 532 | bool (*rdtscp_supported)(void); |
| 543 | 533 | ||
| 534 | void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry); | ||
| 535 | |||
| 544 | const struct trace_print_flags *exit_reasons_str; | 536 | const struct trace_print_flags *exit_reasons_str; |
| 545 | }; | 537 | }; |
| 546 | 538 | ||
| @@ -587,23 +579,14 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 587 | void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); | 579 | void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); |
| 588 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | 580 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); |
| 589 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | 581 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); |
| 590 | void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, | ||
| 591 | unsigned long *rflags); | ||
| 592 | 582 | ||
| 593 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr); | ||
| 594 | void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value, | ||
| 595 | unsigned long *rflags); | ||
| 596 | void kvm_enable_efer_bits(u64); | 583 | void kvm_enable_efer_bits(u64); |
| 597 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); | 584 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); |
| 598 | int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); | 585 | int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); |
| 599 | 586 | ||
| 600 | struct x86_emulate_ctxt; | 587 | struct x86_emulate_ctxt; |
| 601 | 588 | ||
| 602 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, | 589 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); |
| 603 | int size, unsigned port); | ||
| 604 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, | ||
| 605 | int size, unsigned long count, int down, | ||
| 606 | gva_t address, int rep, unsigned port); | ||
| 607 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); | 590 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); |
| 608 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); | 591 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); |
| 609 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); | 592 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); |
| @@ -616,12 +599,15 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, | |||
| 616 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); | 599 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |
| 617 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); | 600 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); |
| 618 | 601 | ||
| 619 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason); | 602 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, |
| 603 | bool has_error_code, u32 error_code); | ||
| 620 | 604 | ||
| 621 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); | 605 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); |
| 622 | void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); | 606 | void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); |
| 623 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); | 607 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); |
| 624 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); | 608 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); |
| 609 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); | ||
| 610 | int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); | ||
| 625 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); | 611 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); |
| 626 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); | 612 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); |
| 627 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); | 613 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); |
| @@ -634,6 +620,8 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); | |||
| 634 | 620 | ||
| 635 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); | 621 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); |
| 636 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); | 622 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); |
| 623 | void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); | ||
| 624 | void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); | ||
| 637 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, | 625 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, |
| 638 | u32 error_code); | 626 | u32 error_code); |
| 639 | bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); | 627 | bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); |
| @@ -649,8 +637,6 @@ int emulator_write_emulated(unsigned long addr, | |||
| 649 | unsigned int bytes, | 637 | unsigned int bytes, |
| 650 | struct kvm_vcpu *vcpu); | 638 | struct kvm_vcpu *vcpu); |
| 651 | 639 | ||
| 652 | unsigned long segment_base(u16 selector); | ||
| 653 | |||
| 654 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); | 640 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); |
| 655 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 641 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
| 656 | const u8 *new, int bytes, | 642 | const u8 *new, int bytes, |
| @@ -675,7 +661,6 @@ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); | |||
| 675 | void kvm_enable_tdp(void); | 661 | void kvm_enable_tdp(void); |
| 676 | void kvm_disable_tdp(void); | 662 | void kvm_disable_tdp(void); |
| 677 | 663 | ||
| 678 | int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); | ||
| 679 | int complete_pio(struct kvm_vcpu *vcpu); | 664 | int complete_pio(struct kvm_vcpu *vcpu); |
| 680 | bool kvm_check_iopl(struct kvm_vcpu *vcpu); | 665 | bool kvm_check_iopl(struct kvm_vcpu *vcpu); |
| 681 | 666 | ||
| @@ -724,23 +709,6 @@ static inline void kvm_load_ldt(u16 sel) | |||
| 724 | asm("lldt %0" : : "rm"(sel)); | 709 | asm("lldt %0" : : "rm"(sel)); |
| 725 | } | 710 | } |
| 726 | 711 | ||
| 727 | static inline void kvm_get_idt(struct descriptor_table *table) | ||
| 728 | { | ||
| 729 | asm("sidt %0" : "=m"(*table)); | ||
| 730 | } | ||
| 731 | |||
| 732 | static inline void kvm_get_gdt(struct descriptor_table *table) | ||
| 733 | { | ||
| 734 | asm("sgdt %0" : "=m"(*table)); | ||
| 735 | } | ||
| 736 | |||
| 737 | static inline unsigned long kvm_read_tr_base(void) | ||
| 738 | { | ||
| 739 | u16 tr; | ||
| 740 | asm("str %0" : "=g"(tr)); | ||
| 741 | return segment_base(tr); | ||
| 742 | } | ||
| 743 | |||
| 744 | #ifdef CONFIG_X86_64 | 712 | #ifdef CONFIG_X86_64 |
| 745 | static inline unsigned long read_msr(unsigned long msr) | 713 | static inline unsigned long read_msr(unsigned long msr) |
| 746 | { | 714 | { |
| @@ -826,4 +794,6 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v); | |||
| 826 | void kvm_define_shared_msr(unsigned index, u32 msr); | 794 | void kvm_define_shared_msr(unsigned index, u32 msr); |
| 827 | void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); | 795 | void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); |
| 828 | 796 | ||
| 797 | bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); | ||
| 798 | |||
| 829 | #endif /* _ASM_X86_KVM_HOST_H */ | 799 | #endif /* _ASM_X86_KVM_HOST_H */ |
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index ffae1420e7d7..05eba5e9a8e8 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h | |||
| @@ -16,10 +16,23 @@ | |||
| 16 | #define KVM_FEATURE_CLOCKSOURCE 0 | 16 | #define KVM_FEATURE_CLOCKSOURCE 0 |
| 17 | #define KVM_FEATURE_NOP_IO_DELAY 1 | 17 | #define KVM_FEATURE_NOP_IO_DELAY 1 |
| 18 | #define KVM_FEATURE_MMU_OP 2 | 18 | #define KVM_FEATURE_MMU_OP 2 |
| 19 | /* This indicates that the new set of kvmclock msrs | ||
| 20 | * are available. The use of 0x11 and 0x12 is deprecated | ||
| 21 | */ | ||
| 22 | #define KVM_FEATURE_CLOCKSOURCE2 3 | ||
| 23 | |||
| 24 | /* The last 8 bits are used to indicate how to interpret the flags field | ||
| 25 | * in pvclock structure. If no bits are set, all flags are ignored. | ||
| 26 | */ | ||
| 27 | #define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 24 | ||
| 19 | 28 | ||
| 20 | #define MSR_KVM_WALL_CLOCK 0x11 | 29 | #define MSR_KVM_WALL_CLOCK 0x11 |
| 21 | #define MSR_KVM_SYSTEM_TIME 0x12 | 30 | #define MSR_KVM_SYSTEM_TIME 0x12 |
| 22 | 31 | ||
| 32 | /* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */ | ||
| 33 | #define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00 | ||
| 34 | #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 | ||
| 35 | |||
| 23 | #define KVM_MAX_MMU_OP_BATCH 32 | 36 | #define KVM_MAX_MMU_OP_BATCH 32 |
| 24 | 37 | ||
| 25 | /* Operations for KVM_HC_MMU_OP */ | 38 | /* Operations for KVM_HC_MMU_OP */ |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index bc473acfa7f9..f9324851eba0 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
| @@ -202,8 +202,9 @@ | |||
| 202 | #define MSR_IA32_EBL_CR_POWERON 0x0000002a | 202 | #define MSR_IA32_EBL_CR_POWERON 0x0000002a |
| 203 | #define MSR_IA32_FEATURE_CONTROL 0x0000003a | 203 | #define MSR_IA32_FEATURE_CONTROL 0x0000003a |
| 204 | 204 | ||
| 205 | #define FEATURE_CONTROL_LOCKED (1<<0) | 205 | #define FEATURE_CONTROL_LOCKED (1<<0) |
| 206 | #define FEATURE_CONTROL_VMXON_ENABLED (1<<2) | 206 | #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) |
| 207 | #define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) | ||
| 207 | 208 | ||
| 208 | #define MSR_IA32_APICBASE 0x0000001b | 209 | #define MSR_IA32_APICBASE 0x0000001b |
| 209 | #define MSR_IA32_APICBASE_BSP (1<<8) | 210 | #define MSR_IA32_APICBASE_BSP (1<<8) |
diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h index 6d93508f2626..35f2d1948ada 100644 --- a/arch/x86/include/asm/pvclock-abi.h +++ b/arch/x86/include/asm/pvclock-abi.h | |||
| @@ -29,7 +29,8 @@ struct pvclock_vcpu_time_info { | |||
| 29 | u64 system_time; | 29 | u64 system_time; |
| 30 | u32 tsc_to_system_mul; | 30 | u32 tsc_to_system_mul; |
| 31 | s8 tsc_shift; | 31 | s8 tsc_shift; |
| 32 | u8 pad[3]; | 32 | u8 flags; |
| 33 | u8 pad[2]; | ||
| 33 | } __attribute__((__packed__)); /* 32 bytes */ | 34 | } __attribute__((__packed__)); /* 32 bytes */ |
| 34 | 35 | ||
| 35 | struct pvclock_wall_clock { | 36 | struct pvclock_wall_clock { |
| @@ -38,5 +39,6 @@ struct pvclock_wall_clock { | |||
| 38 | u32 nsec; | 39 | u32 nsec; |
| 39 | } __attribute__((__packed__)); | 40 | } __attribute__((__packed__)); |
| 40 | 41 | ||
| 42 | #define PVCLOCK_TSC_STABLE_BIT (1 << 0) | ||
| 41 | #endif /* __ASSEMBLY__ */ | 43 | #endif /* __ASSEMBLY__ */ |
| 42 | #endif /* _ASM_X86_PVCLOCK_ABI_H */ | 44 | #endif /* _ASM_X86_PVCLOCK_ABI_H */ |
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 53235fd5f8ce..cd02f324aa6b 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | /* some helper functions for xen and kvm pv clock sources */ | 7 | /* some helper functions for xen and kvm pv clock sources */ |
| 8 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); | 8 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); |
| 9 | void pvclock_set_flags(u8 flags); | ||
| 9 | unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); | 10 | unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); |
| 10 | void pvclock_read_wallclock(struct pvclock_wall_clock *wall, | 11 | void pvclock_read_wallclock(struct pvclock_wall_clock *wall, |
| 11 | struct pvclock_vcpu_time_info *vcpu, | 12 | struct pvclock_vcpu_time_info *vcpu, |
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 38638cd2fa4c..0e831059ac5a 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h | |||
| @@ -81,7 +81,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { | |||
| 81 | u32 event_inj_err; | 81 | u32 event_inj_err; |
| 82 | u64 nested_cr3; | 82 | u64 nested_cr3; |
| 83 | u64 lbr_ctl; | 83 | u64 lbr_ctl; |
| 84 | u8 reserved_5[832]; | 84 | u64 reserved_5; |
| 85 | u64 next_rip; | ||
| 86 | u8 reserved_6[816]; | ||
| 85 | }; | 87 | }; |
| 86 | 88 | ||
| 87 | 89 | ||
| @@ -115,6 +117,10 @@ struct __attribute__ ((__packed__)) vmcb_control_area { | |||
| 115 | #define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) | 117 | #define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) |
| 116 | #define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) | 118 | #define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) |
| 117 | 119 | ||
| 120 | #define SVM_VM_CR_VALID_MASK 0x001fULL | ||
| 121 | #define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL | ||
| 122 | #define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL | ||
| 123 | |||
| 118 | struct __attribute__ ((__packed__)) vmcb_seg { | 124 | struct __attribute__ ((__packed__)) vmcb_seg { |
| 119 | u16 selector; | 125 | u16 selector; |
| 120 | u16 attrib; | 126 | u16 attrib; |
| @@ -238,6 +244,7 @@ struct __attribute__ ((__packed__)) vmcb { | |||
| 238 | 244 | ||
| 239 | #define SVM_EXITINFOSHIFT_TS_REASON_IRET 36 | 245 | #define SVM_EXITINFOSHIFT_TS_REASON_IRET 36 |
| 240 | #define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 | 246 | #define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 |
| 247 | #define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44 | ||
| 241 | 248 | ||
| 242 | #define SVM_EXIT_READ_CR0 0x000 | 249 | #define SVM_EXIT_READ_CR0 0x000 |
| 243 | #define SVM_EXIT_READ_CR3 0x003 | 250 | #define SVM_EXIT_READ_CR3 0x003 |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index fb9a080740ec..9e6779f7cf2d 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
| @@ -25,6 +25,8 @@ | |||
| 25 | * | 25 | * |
| 26 | */ | 26 | */ |
| 27 | 27 | ||
| 28 | #include <linux/types.h> | ||
| 29 | |||
| 28 | /* | 30 | /* |
| 29 | * Definitions of Primary Processor-Based VM-Execution Controls. | 31 | * Definitions of Primary Processor-Based VM-Execution Controls. |
| 30 | */ | 32 | */ |
| @@ -120,6 +122,8 @@ enum vmcs_field { | |||
| 120 | GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, | 122 | GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, |
| 121 | GUEST_IA32_PAT = 0x00002804, | 123 | GUEST_IA32_PAT = 0x00002804, |
| 122 | GUEST_IA32_PAT_HIGH = 0x00002805, | 124 | GUEST_IA32_PAT_HIGH = 0x00002805, |
| 125 | GUEST_IA32_EFER = 0x00002806, | ||
| 126 | GUEST_IA32_EFER_HIGH = 0x00002807, | ||
| 123 | GUEST_PDPTR0 = 0x0000280a, | 127 | GUEST_PDPTR0 = 0x0000280a, |
| 124 | GUEST_PDPTR0_HIGH = 0x0000280b, | 128 | GUEST_PDPTR0_HIGH = 0x0000280b, |
| 125 | GUEST_PDPTR1 = 0x0000280c, | 129 | GUEST_PDPTR1 = 0x0000280c, |
| @@ -130,6 +134,8 @@ enum vmcs_field { | |||
| 130 | GUEST_PDPTR3_HIGH = 0x00002811, | 134 | GUEST_PDPTR3_HIGH = 0x00002811, |
| 131 | HOST_IA32_PAT = 0x00002c00, | 135 | HOST_IA32_PAT = 0x00002c00, |
| 132 | HOST_IA32_PAT_HIGH = 0x00002c01, | 136 | HOST_IA32_PAT_HIGH = 0x00002c01, |
| 137 | HOST_IA32_EFER = 0x00002c02, | ||
| 138 | HOST_IA32_EFER_HIGH = 0x00002c03, | ||
| 133 | PIN_BASED_VM_EXEC_CONTROL = 0x00004000, | 139 | PIN_BASED_VM_EXEC_CONTROL = 0x00004000, |
| 134 | CPU_BASED_VM_EXEC_CONTROL = 0x00004002, | 140 | CPU_BASED_VM_EXEC_CONTROL = 0x00004002, |
| 135 | EXCEPTION_BITMAP = 0x00004004, | 141 | EXCEPTION_BITMAP = 0x00004004, |
| @@ -394,6 +400,10 @@ enum vmcs_field { | |||
| 394 | #define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" | 400 | #define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" |
| 395 | #define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" | 401 | #define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" |
| 396 | 402 | ||
| 397 | 403 | struct vmx_msr_entry { | |
| 404 | u32 index; | ||
| 405 | u32 reserved; | ||
| 406 | u64 value; | ||
| 407 | } __aligned(16); | ||
| 398 | 408 | ||
| 399 | #endif | 409 | #endif |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index feaeb0d3aa4f..eb9b76c716c2 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
| @@ -29,6 +29,8 @@ | |||
| 29 | #define KVM_SCALE 22 | 29 | #define KVM_SCALE 22 |
| 30 | 30 | ||
| 31 | static int kvmclock = 1; | 31 | static int kvmclock = 1; |
| 32 | static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; | ||
| 33 | static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; | ||
| 32 | 34 | ||
| 33 | static int parse_no_kvmclock(char *arg) | 35 | static int parse_no_kvmclock(char *arg) |
| 34 | { | 36 | { |
| @@ -54,7 +56,8 @@ static unsigned long kvm_get_wallclock(void) | |||
| 54 | 56 | ||
| 55 | low = (int)__pa_symbol(&wall_clock); | 57 | low = (int)__pa_symbol(&wall_clock); |
| 56 | high = ((u64)__pa_symbol(&wall_clock) >> 32); | 58 | high = ((u64)__pa_symbol(&wall_clock) >> 32); |
| 57 | native_write_msr(MSR_KVM_WALL_CLOCK, low, high); | 59 | |
| 60 | native_write_msr(msr_kvm_wall_clock, low, high); | ||
| 58 | 61 | ||
| 59 | vcpu_time = &get_cpu_var(hv_clock); | 62 | vcpu_time = &get_cpu_var(hv_clock); |
| 60 | pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); | 63 | pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); |
| @@ -130,7 +133,8 @@ static int kvm_register_clock(char *txt) | |||
| 130 | high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); | 133 | high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); |
| 131 | printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", | 134 | printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", |
| 132 | cpu, high, low, txt); | 135 | cpu, high, low, txt); |
| 133 | return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); | 136 | |
| 137 | return native_write_msr_safe(msr_kvm_system_time, low, high); | ||
| 134 | } | 138 | } |
| 135 | 139 | ||
| 136 | #ifdef CONFIG_X86_LOCAL_APIC | 140 | #ifdef CONFIG_X86_LOCAL_APIC |
| @@ -165,14 +169,14 @@ static void __init kvm_smp_prepare_boot_cpu(void) | |||
| 165 | #ifdef CONFIG_KEXEC | 169 | #ifdef CONFIG_KEXEC |
| 166 | static void kvm_crash_shutdown(struct pt_regs *regs) | 170 | static void kvm_crash_shutdown(struct pt_regs *regs) |
| 167 | { | 171 | { |
| 168 | native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); | 172 | native_write_msr(msr_kvm_system_time, 0, 0); |
| 169 | native_machine_crash_shutdown(regs); | 173 | native_machine_crash_shutdown(regs); |
| 170 | } | 174 | } |
| 171 | #endif | 175 | #endif |
| 172 | 176 | ||
| 173 | static void kvm_shutdown(void) | 177 | static void kvm_shutdown(void) |
| 174 | { | 178 | { |
| 175 | native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); | 179 | native_write_msr(msr_kvm_system_time, 0, 0); |
| 176 | native_machine_shutdown(); | 180 | native_machine_shutdown(); |
| 177 | } | 181 | } |
| 178 | 182 | ||
| @@ -181,27 +185,37 @@ void __init kvmclock_init(void) | |||
| 181 | if (!kvm_para_available()) | 185 | if (!kvm_para_available()) |
| 182 | return; | 186 | return; |
| 183 | 187 | ||
| 184 | if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { | 188 | if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE2)) { |
| 185 | if (kvm_register_clock("boot clock")) | 189 | msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW; |
| 186 | return; | 190 | msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW; |
| 187 | pv_time_ops.sched_clock = kvm_clock_read; | 191 | } else if (!(kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE))) |
| 188 | x86_platform.calibrate_tsc = kvm_get_tsc_khz; | 192 | return; |
| 189 | x86_platform.get_wallclock = kvm_get_wallclock; | 193 | |
| 190 | x86_platform.set_wallclock = kvm_set_wallclock; | 194 | printk(KERN_INFO "kvm-clock: Using msrs %x and %x", |
| 195 | msr_kvm_system_time, msr_kvm_wall_clock); | ||
| 196 | |||
| 197 | if (kvm_register_clock("boot clock")) | ||
| 198 | return; | ||
| 199 | pv_time_ops.sched_clock = kvm_clock_read; | ||
| 200 | x86_platform.calibrate_tsc = kvm_get_tsc_khz; | ||
| 201 | x86_platform.get_wallclock = kvm_get_wallclock; | ||
| 202 | x86_platform.set_wallclock = kvm_set_wallclock; | ||
| 191 | #ifdef CONFIG_X86_LOCAL_APIC | 203 | #ifdef CONFIG_X86_LOCAL_APIC |
| 192 | x86_cpuinit.setup_percpu_clockev = | 204 | x86_cpuinit.setup_percpu_clockev = |
| 193 | kvm_setup_secondary_clock; | 205 | kvm_setup_secondary_clock; |
| 194 | #endif | 206 | #endif |
| 195 | #ifdef CONFIG_SMP | 207 | #ifdef CONFIG_SMP |
| 196 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; | 208 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; |
| 197 | #endif | 209 | #endif |
| 198 | machine_ops.shutdown = kvm_shutdown; | 210 | machine_ops.shutdown = kvm_shutdown; |
| 199 | #ifdef CONFIG_KEXEC | 211 | #ifdef CONFIG_KEXEC |
| 200 | machine_ops.crash_shutdown = kvm_crash_shutdown; | 212 | machine_ops.crash_shutdown = kvm_crash_shutdown; |
| 201 | #endif | 213 | #endif |
| 202 | kvm_get_preset_lpj(); | 214 | kvm_get_preset_lpj(); |
| 203 | clocksource_register(&kvm_clock); | 215 | clocksource_register(&kvm_clock); |
| 204 | pv_info.paravirt_enabled = 1; | 216 | pv_info.paravirt_enabled = 1; |
| 205 | pv_info.name = "KVM"; | 217 | pv_info.name = "KVM"; |
| 206 | } | 218 | |
| 219 | if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) | ||
| 220 | pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); | ||
| 207 | } | 221 | } |
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 03801f2f761f..239427ca02af 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c | |||
| @@ -31,8 +31,16 @@ struct pvclock_shadow_time { | |||
| 31 | u32 tsc_to_nsec_mul; | 31 | u32 tsc_to_nsec_mul; |
| 32 | int tsc_shift; | 32 | int tsc_shift; |
| 33 | u32 version; | 33 | u32 version; |
| 34 | u8 flags; | ||
| 34 | }; | 35 | }; |
| 35 | 36 | ||
| 37 | static u8 valid_flags __read_mostly = 0; | ||
| 38 | |||
| 39 | void pvclock_set_flags(u8 flags) | ||
| 40 | { | ||
| 41 | valid_flags = flags; | ||
| 42 | } | ||
| 43 | |||
| 36 | /* | 44 | /* |
| 37 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, | 45 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, |
| 38 | * yielding a 64-bit result. | 46 | * yielding a 64-bit result. |
| @@ -91,6 +99,7 @@ static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst, | |||
| 91 | dst->system_timestamp = src->system_time; | 99 | dst->system_timestamp = src->system_time; |
| 92 | dst->tsc_to_nsec_mul = src->tsc_to_system_mul; | 100 | dst->tsc_to_nsec_mul = src->tsc_to_system_mul; |
| 93 | dst->tsc_shift = src->tsc_shift; | 101 | dst->tsc_shift = src->tsc_shift; |
| 102 | dst->flags = src->flags; | ||
| 94 | rmb(); /* test version after fetching data */ | 103 | rmb(); /* test version after fetching data */ |
| 95 | } while ((src->version & 1) || (dst->version != src->version)); | 104 | } while ((src->version & 1) || (dst->version != src->version)); |
| 96 | 105 | ||
| @@ -109,11 +118,14 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) | |||
| 109 | return pv_tsc_khz; | 118 | return pv_tsc_khz; |
| 110 | } | 119 | } |
| 111 | 120 | ||
| 121 | static atomic64_t last_value = ATOMIC64_INIT(0); | ||
| 122 | |||
| 112 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) | 123 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) |
| 113 | { | 124 | { |
| 114 | struct pvclock_shadow_time shadow; | 125 | struct pvclock_shadow_time shadow; |
| 115 | unsigned version; | 126 | unsigned version; |
| 116 | cycle_t ret, offset; | 127 | cycle_t ret, offset; |
| 128 | u64 last; | ||
| 117 | 129 | ||
| 118 | do { | 130 | do { |
| 119 | version = pvclock_get_time_values(&shadow, src); | 131 | version = pvclock_get_time_values(&shadow, src); |
| @@ -123,6 +135,31 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) | |||
| 123 | barrier(); | 135 | barrier(); |
| 124 | } while (version != src->version); | 136 | } while (version != src->version); |
| 125 | 137 | ||
| 138 | if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && | ||
| 139 | (shadow.flags & PVCLOCK_TSC_STABLE_BIT)) | ||
| 140 | return ret; | ||
| 141 | |||
| 142 | /* | ||
| 143 | * Assumption here is that last_value, a global accumulator, always goes | ||
| 144 | * forward. If we are less than that, we should not be much smaller. | ||
| 145 | * We assume there is an error marging we're inside, and then the correction | ||
| 146 | * does not sacrifice accuracy. | ||
| 147 | * | ||
| 148 | * For reads: global may have changed between test and return, | ||
| 149 | * but this means someone else updated poked the clock at a later time. | ||
| 150 | * We just need to make sure we are not seeing a backwards event. | ||
| 151 | * | ||
| 152 | * For updates: last_value = ret is not enough, since two vcpus could be | ||
| 153 | * updating at the same time, and one of them could be slightly behind, | ||
| 154 | * making the assumption that last_value always go forward fail to hold. | ||
| 155 | */ | ||
| 156 | last = atomic64_read(&last_value); | ||
| 157 | do { | ||
| 158 | if (ret < last) | ||
| 159 | return last; | ||
| 160 | last = atomic64_cmpxchg(&last_value, last, ret); | ||
| 161 | } while (unlikely(last != ret)); | ||
| 162 | |||
| 126 | return ret; | 163 | return ret; |
| 127 | } | 164 | } |
| 128 | 165 | ||
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index cc2c60474fd0..c2f1b26141e2 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c | |||
| @@ -46,6 +46,7 @@ | |||
| 46 | 46 | ||
| 47 | /* Global pointer to shared data; NULL means no measured launch. */ | 47 | /* Global pointer to shared data; NULL means no measured launch. */ |
| 48 | struct tboot *tboot __read_mostly; | 48 | struct tboot *tboot __read_mostly; |
| 49 | EXPORT_SYMBOL(tboot); | ||
| 49 | 50 | ||
| 50 | /* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */ | 51 | /* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */ |
| 51 | #define AP_WAIT_TIMEOUT 1 | 52 | #define AP_WAIT_TIMEOUT 1 |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4dade6ac0827..5ac0bb465ed6 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
| @@ -33,6 +33,7 @@ | |||
| 33 | #include <asm/kvm_emulate.h> | 33 | #include <asm/kvm_emulate.h> |
| 34 | 34 | ||
| 35 | #include "x86.h" | 35 | #include "x86.h" |
| 36 | #include "tss.h" | ||
| 36 | 37 | ||
| 37 | /* | 38 | /* |
| 38 | * Opcode effective-address decode tables. | 39 | * Opcode effective-address decode tables. |
| @@ -50,6 +51,8 @@ | |||
| 50 | #define DstReg (2<<1) /* Register operand. */ | 51 | #define DstReg (2<<1) /* Register operand. */ |
| 51 | #define DstMem (3<<1) /* Memory operand. */ | 52 | #define DstMem (3<<1) /* Memory operand. */ |
| 52 | #define DstAcc (4<<1) /* Destination Accumulator */ | 53 | #define DstAcc (4<<1) /* Destination Accumulator */ |
| 54 | #define DstDI (5<<1) /* Destination is in ES:(E)DI */ | ||
| 55 | #define DstMem64 (6<<1) /* 64bit memory operand */ | ||
| 53 | #define DstMask (7<<1) | 56 | #define DstMask (7<<1) |
| 54 | /* Source operand type. */ | 57 | /* Source operand type. */ |
| 55 | #define SrcNone (0<<4) /* No source operand. */ | 58 | #define SrcNone (0<<4) /* No source operand. */ |
| @@ -63,6 +66,7 @@ | |||
| 63 | #define SrcOne (7<<4) /* Implied '1' */ | 66 | #define SrcOne (7<<4) /* Implied '1' */ |
| 64 | #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ | 67 | #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ |
| 65 | #define SrcImmU (9<<4) /* Immediate operand, unsigned */ | 68 | #define SrcImmU (9<<4) /* Immediate operand, unsigned */ |
| 69 | #define SrcSI (0xa<<4) /* Source is in the DS:RSI */ | ||
| 66 | #define SrcMask (0xf<<4) | 70 | #define SrcMask (0xf<<4) |
| 67 | /* Generic ModRM decode. */ | 71 | /* Generic ModRM decode. */ |
| 68 | #define ModRM (1<<8) | 72 | #define ModRM (1<<8) |
| @@ -85,6 +89,9 @@ | |||
| 85 | #define Src2ImmByte (2<<29) | 89 | #define Src2ImmByte (2<<29) |
| 86 | #define Src2One (3<<29) | 90 | #define Src2One (3<<29) |
| 87 | #define Src2Imm16 (4<<29) | 91 | #define Src2Imm16 (4<<29) |
| 92 | #define Src2Mem16 (5<<29) /* Used for Ep encoding. First argument has to be | ||
| 93 | in memory and second argument is located | ||
| 94 | immediately after the first one in memory. */ | ||
| 88 | #define Src2Mask (7<<29) | 95 | #define Src2Mask (7<<29) |
| 89 | 96 | ||
| 90 | enum { | 97 | enum { |
| @@ -147,8 +154,8 @@ static u32 opcode_table[256] = { | |||
| 147 | 0, 0, 0, 0, | 154 | 0, 0, 0, 0, |
| 148 | /* 0x68 - 0x6F */ | 155 | /* 0x68 - 0x6F */ |
| 149 | SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, | 156 | SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, |
| 150 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ | 157 | DstDI | ByteOp | Mov | String, DstDI | Mov | String, /* insb, insw/insd */ |
| 151 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ | 158 | SrcSI | ByteOp | ImplicitOps | String, SrcSI | ImplicitOps | String, /* outsb, outsw/outsd */ |
| 152 | /* 0x70 - 0x77 */ | 159 | /* 0x70 - 0x77 */ |
| 153 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, | 160 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, |
| 154 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, | 161 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, |
| @@ -173,12 +180,12 @@ static u32 opcode_table[256] = { | |||
| 173 | /* 0xA0 - 0xA7 */ | 180 | /* 0xA0 - 0xA7 */ |
| 174 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, | 181 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, |
| 175 | ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, | 182 | ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, |
| 176 | ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, | 183 | ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String, |
| 177 | ByteOp | ImplicitOps | String, ImplicitOps | String, | 184 | ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String, |
| 178 | /* 0xA8 - 0xAF */ | 185 | /* 0xA8 - 0xAF */ |
| 179 | 0, 0, ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, | 186 | 0, 0, ByteOp | DstDI | Mov | String, DstDI | Mov | String, |
| 180 | ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, | 187 | ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String, |
| 181 | ByteOp | ImplicitOps | String, ImplicitOps | String, | 188 | ByteOp | DstDI | String, DstDI | String, |
| 182 | /* 0xB0 - 0xB7 */ | 189 | /* 0xB0 - 0xB7 */ |
| 183 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, | 190 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, |
| 184 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, | 191 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, |
| @@ -204,13 +211,13 @@ static u32 opcode_table[256] = { | |||
| 204 | 0, 0, 0, 0, 0, 0, 0, 0, | 211 | 0, 0, 0, 0, 0, 0, 0, 0, |
| 205 | /* 0xE0 - 0xE7 */ | 212 | /* 0xE0 - 0xE7 */ |
| 206 | 0, 0, 0, 0, | 213 | 0, 0, 0, 0, |
| 207 | ByteOp | SrcImmUByte, SrcImmUByte, | 214 | ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, |
| 208 | ByteOp | SrcImmUByte, SrcImmUByte, | 215 | ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, |
| 209 | /* 0xE8 - 0xEF */ | 216 | /* 0xE8 - 0xEF */ |
| 210 | SrcImm | Stack, SrcImm | ImplicitOps, | 217 | SrcImm | Stack, SrcImm | ImplicitOps, |
| 211 | SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, | 218 | SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, |
| 212 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 219 | SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, |
| 213 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 220 | SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, |
| 214 | /* 0xF0 - 0xF7 */ | 221 | /* 0xF0 - 0xF7 */ |
| 215 | 0, 0, 0, 0, | 222 | 0, 0, 0, 0, |
| 216 | ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3, | 223 | ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3, |
| @@ -343,7 +350,8 @@ static u32 group_table[] = { | |||
| 343 | [Group5*8] = | 350 | [Group5*8] = |
| 344 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, | 351 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, |
| 345 | SrcMem | ModRM | Stack, 0, | 352 | SrcMem | ModRM | Stack, 0, |
| 346 | SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0, | 353 | SrcMem | ModRM | Stack, SrcMem | ModRM | Src2Mem16 | ImplicitOps, |
| 354 | SrcMem | ModRM | Stack, 0, | ||
| 347 | [Group7*8] = | 355 | [Group7*8] = |
| 348 | 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, | 356 | 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, |
| 349 | SrcNone | ModRM | DstMem | Mov, 0, | 357 | SrcNone | ModRM | DstMem | Mov, 0, |
| @@ -353,14 +361,14 @@ static u32 group_table[] = { | |||
| 353 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock, | 361 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock, |
| 354 | DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock, | 362 | DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock, |
| 355 | [Group9*8] = | 363 | [Group9*8] = |
| 356 | 0, ImplicitOps | ModRM | Lock, 0, 0, 0, 0, 0, 0, | 364 | 0, DstMem64 | ModRM | Lock, 0, 0, 0, 0, 0, 0, |
| 357 | }; | 365 | }; |
| 358 | 366 | ||
| 359 | static u32 group2_table[] = { | 367 | static u32 group2_table[] = { |
| 360 | [Group7*8] = | 368 | [Group7*8] = |
| 361 | SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM, | 369 | SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM | Priv, |
| 362 | SrcNone | ModRM | DstMem | Mov, 0, | 370 | SrcNone | ModRM | DstMem | Mov, 0, |
| 363 | SrcMem16 | ModRM | Mov, 0, | 371 | SrcMem16 | ModRM | Mov | Priv, 0, |
| 364 | [Group9*8] = | 372 | [Group9*8] = |
| 365 | 0, 0, 0, 0, 0, 0, 0, 0, | 373 | 0, 0, 0, 0, 0, 0, 0, 0, |
| 366 | }; | 374 | }; |
| @@ -562,7 +570,7 @@ static u32 group2_table[] = { | |||
| 562 | #define insn_fetch(_type, _size, _eip) \ | 570 | #define insn_fetch(_type, _size, _eip) \ |
| 563 | ({ unsigned long _x; \ | 571 | ({ unsigned long _x; \ |
| 564 | rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \ | 572 | rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \ |
| 565 | if (rc != 0) \ | 573 | if (rc != X86EMUL_CONTINUE) \ |
| 566 | goto done; \ | 574 | goto done; \ |
| 567 | (_eip) += (_size); \ | 575 | (_eip) += (_size); \ |
| 568 | (_type)_x; \ | 576 | (_type)_x; \ |
| @@ -638,40 +646,40 @@ static unsigned long ss_base(struct x86_emulate_ctxt *ctxt) | |||
| 638 | 646 | ||
| 639 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, | 647 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, |
| 640 | struct x86_emulate_ops *ops, | 648 | struct x86_emulate_ops *ops, |
| 641 | unsigned long linear, u8 *dest) | 649 | unsigned long eip, u8 *dest) |
| 642 | { | 650 | { |
| 643 | struct fetch_cache *fc = &ctxt->decode.fetch; | 651 | struct fetch_cache *fc = &ctxt->decode.fetch; |
| 644 | int rc; | 652 | int rc; |
| 645 | int size; | 653 | int size, cur_size; |
| 646 | 654 | ||
| 647 | if (linear < fc->start || linear >= fc->end) { | 655 | if (eip == fc->end) { |
| 648 | size = min(15UL, PAGE_SIZE - offset_in_page(linear)); | 656 | cur_size = fc->end - fc->start; |
| 649 | rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL); | 657 | size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); |
| 650 | if (rc) | 658 | rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size, |
| 659 | size, ctxt->vcpu, NULL); | ||
| 660 | if (rc != X86EMUL_CONTINUE) | ||
| 651 | return rc; | 661 | return rc; |
| 652 | fc->start = linear; | 662 | fc->end += size; |
| 653 | fc->end = linear + size; | ||
| 654 | } | 663 | } |
| 655 | *dest = fc->data[linear - fc->start]; | 664 | *dest = fc->data[eip - fc->start]; |
| 656 | return 0; | 665 | return X86EMUL_CONTINUE; |
| 657 | } | 666 | } |
| 658 | 667 | ||
| 659 | static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, | 668 | static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, |
| 660 | struct x86_emulate_ops *ops, | 669 | struct x86_emulate_ops *ops, |
| 661 | unsigned long eip, void *dest, unsigned size) | 670 | unsigned long eip, void *dest, unsigned size) |
| 662 | { | 671 | { |
| 663 | int rc = 0; | 672 | int rc; |
| 664 | 673 | ||
| 665 | /* x86 instructions are limited to 15 bytes. */ | 674 | /* x86 instructions are limited to 15 bytes. */ |
| 666 | if (eip + size - ctxt->decode.eip_orig > 15) | 675 | if (eip + size - ctxt->eip > 15) |
| 667 | return X86EMUL_UNHANDLEABLE; | 676 | return X86EMUL_UNHANDLEABLE; |
| 668 | eip += ctxt->cs_base; | ||
| 669 | while (size--) { | 677 | while (size--) { |
| 670 | rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); | 678 | rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); |
| 671 | if (rc) | 679 | if (rc != X86EMUL_CONTINUE) |
| 672 | return rc; | 680 | return rc; |
| 673 | } | 681 | } |
| 674 | return 0; | 682 | return X86EMUL_CONTINUE; |
| 675 | } | 683 | } |
| 676 | 684 | ||
| 677 | /* | 685 | /* |
| @@ -702,7 +710,7 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, | |||
| 702 | *address = 0; | 710 | *address = 0; |
| 703 | rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, | 711 | rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, |
| 704 | ctxt->vcpu, NULL); | 712 | ctxt->vcpu, NULL); |
| 705 | if (rc) | 713 | if (rc != X86EMUL_CONTINUE) |
| 706 | return rc; | 714 | return rc; |
| 707 | rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, | 715 | rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, |
| 708 | ctxt->vcpu, NULL); | 716 | ctxt->vcpu, NULL); |
| @@ -782,7 +790,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
| 782 | struct decode_cache *c = &ctxt->decode; | 790 | struct decode_cache *c = &ctxt->decode; |
| 783 | u8 sib; | 791 | u8 sib; |
| 784 | int index_reg = 0, base_reg = 0, scale; | 792 | int index_reg = 0, base_reg = 0, scale; |
| 785 | int rc = 0; | 793 | int rc = X86EMUL_CONTINUE; |
| 786 | 794 | ||
| 787 | if (c->rex_prefix) { | 795 | if (c->rex_prefix) { |
| 788 | c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */ | 796 | c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */ |
| @@ -895,7 +903,7 @@ static int decode_abs(struct x86_emulate_ctxt *ctxt, | |||
| 895 | struct x86_emulate_ops *ops) | 903 | struct x86_emulate_ops *ops) |
| 896 | { | 904 | { |
| 897 | struct decode_cache *c = &ctxt->decode; | 905 | struct decode_cache *c = &ctxt->decode; |
| 898 | int rc = 0; | 906 | int rc = X86EMUL_CONTINUE; |
| 899 | 907 | ||
| 900 | switch (c->ad_bytes) { | 908 | switch (c->ad_bytes) { |
| 901 | case 2: | 909 | case 2: |
| @@ -916,14 +924,18 @@ int | |||
| 916 | x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | 924 | x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) |
| 917 | { | 925 | { |
| 918 | struct decode_cache *c = &ctxt->decode; | 926 | struct decode_cache *c = &ctxt->decode; |
| 919 | int rc = 0; | 927 | int rc = X86EMUL_CONTINUE; |
| 920 | int mode = ctxt->mode; | 928 | int mode = ctxt->mode; |
| 921 | int def_op_bytes, def_ad_bytes, group; | 929 | int def_op_bytes, def_ad_bytes, group; |
| 922 | 930 | ||
| 923 | /* Shadow copy of register state. Committed on successful emulation. */ | ||
| 924 | 931 | ||
| 932 | /* we cannot decode insn before we complete previous rep insn */ | ||
| 933 | WARN_ON(ctxt->restart); | ||
| 934 | |||
| 935 | /* Shadow copy of register state. Committed on successful emulation. */ | ||
| 925 | memset(c, 0, sizeof(struct decode_cache)); | 936 | memset(c, 0, sizeof(struct decode_cache)); |
| 926 | c->eip = c->eip_orig = kvm_rip_read(ctxt->vcpu); | 937 | c->eip = ctxt->eip; |
| 938 | c->fetch.start = c->fetch.end = c->eip; | ||
| 927 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); | 939 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); |
| 928 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | 940 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); |
| 929 | 941 | ||
| @@ -1015,11 +1027,6 @@ done_prefixes: | |||
| 1015 | } | 1027 | } |
| 1016 | } | 1028 | } |
| 1017 | 1029 | ||
| 1018 | if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { | ||
| 1019 | kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction"); | ||
| 1020 | return -1; | ||
| 1021 | } | ||
| 1022 | |||
| 1023 | if (c->d & Group) { | 1030 | if (c->d & Group) { |
| 1024 | group = c->d & GroupMask; | 1031 | group = c->d & GroupMask; |
| 1025 | c->modrm = insn_fetch(u8, 1, c->eip); | 1032 | c->modrm = insn_fetch(u8, 1, c->eip); |
| @@ -1046,7 +1053,7 @@ done_prefixes: | |||
| 1046 | rc = decode_modrm(ctxt, ops); | 1053 | rc = decode_modrm(ctxt, ops); |
| 1047 | else if (c->d & MemAbs) | 1054 | else if (c->d & MemAbs) |
| 1048 | rc = decode_abs(ctxt, ops); | 1055 | rc = decode_abs(ctxt, ops); |
| 1049 | if (rc) | 1056 | if (rc != X86EMUL_CONTINUE) |
| 1050 | goto done; | 1057 | goto done; |
| 1051 | 1058 | ||
| 1052 | if (!c->has_seg_override) | 1059 | if (!c->has_seg_override) |
| @@ -1057,6 +1064,10 @@ done_prefixes: | |||
| 1057 | 1064 | ||
| 1058 | if (c->ad_bytes != 8) | 1065 | if (c->ad_bytes != 8) |
| 1059 | c->modrm_ea = (u32)c->modrm_ea; | 1066 | c->modrm_ea = (u32)c->modrm_ea; |
| 1067 | |||
| 1068 | if (c->rip_relative) | ||
| 1069 | c->modrm_ea += c->eip; | ||
| 1070 | |||
| 1060 | /* | 1071 | /* |
| 1061 | * Decode and fetch the source operand: register, memory | 1072 | * Decode and fetch the source operand: register, memory |
| 1062 | * or immediate. | 1073 | * or immediate. |
| @@ -1091,6 +1102,8 @@ done_prefixes: | |||
| 1091 | break; | 1102 | break; |
| 1092 | } | 1103 | } |
| 1093 | c->src.type = OP_MEM; | 1104 | c->src.type = OP_MEM; |
| 1105 | c->src.ptr = (unsigned long *)c->modrm_ea; | ||
| 1106 | c->src.val = 0; | ||
| 1094 | break; | 1107 | break; |
| 1095 | case SrcImm: | 1108 | case SrcImm: |
| 1096 | case SrcImmU: | 1109 | case SrcImmU: |
| @@ -1139,6 +1152,14 @@ done_prefixes: | |||
| 1139 | c->src.bytes = 1; | 1152 | c->src.bytes = 1; |
| 1140 | c->src.val = 1; | 1153 | c->src.val = 1; |
| 1141 | break; | 1154 | break; |
| 1155 | case SrcSI: | ||
| 1156 | c->src.type = OP_MEM; | ||
| 1157 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
| 1158 | c->src.ptr = (unsigned long *) | ||
| 1159 | register_address(c, seg_override_base(ctxt, c), | ||
| 1160 | c->regs[VCPU_REGS_RSI]); | ||
| 1161 | c->src.val = 0; | ||
| 1162 | break; | ||
| 1142 | } | 1163 | } |
| 1143 | 1164 | ||
| 1144 | /* | 1165 | /* |
| @@ -1168,6 +1189,12 @@ done_prefixes: | |||
| 1168 | c->src2.bytes = 1; | 1189 | c->src2.bytes = 1; |
| 1169 | c->src2.val = 1; | 1190 | c->src2.val = 1; |
| 1170 | break; | 1191 | break; |
| 1192 | case Src2Mem16: | ||
| 1193 | c->src2.type = OP_MEM; | ||
| 1194 | c->src2.bytes = 2; | ||
| 1195 | c->src2.ptr = (unsigned long *)(c->modrm_ea + c->src.bytes); | ||
| 1196 | c->src2.val = 0; | ||
| 1197 | break; | ||
| 1171 | } | 1198 | } |
| 1172 | 1199 | ||
| 1173 | /* Decode and fetch the destination operand: register or memory. */ | 1200 | /* Decode and fetch the destination operand: register or memory. */ |
| @@ -1180,6 +1207,7 @@ done_prefixes: | |||
| 1180 | c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); | 1207 | c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); |
| 1181 | break; | 1208 | break; |
| 1182 | case DstMem: | 1209 | case DstMem: |
| 1210 | case DstMem64: | ||
| 1183 | if ((c->d & ModRM) && c->modrm_mod == 3) { | 1211 | if ((c->d & ModRM) && c->modrm_mod == 3) { |
| 1184 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 1212 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
| 1185 | c->dst.type = OP_REG; | 1213 | c->dst.type = OP_REG; |
| @@ -1188,12 +1216,24 @@ done_prefixes: | |||
| 1188 | break; | 1216 | break; |
| 1189 | } | 1217 | } |
| 1190 | c->dst.type = OP_MEM; | 1218 | c->dst.type = OP_MEM; |
| 1219 | c->dst.ptr = (unsigned long *)c->modrm_ea; | ||
| 1220 | if ((c->d & DstMask) == DstMem64) | ||
| 1221 | c->dst.bytes = 8; | ||
| 1222 | else | ||
| 1223 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
| 1224 | c->dst.val = 0; | ||
| 1225 | if (c->d & BitOp) { | ||
| 1226 | unsigned long mask = ~(c->dst.bytes * 8 - 1); | ||
| 1227 | |||
| 1228 | c->dst.ptr = (void *)c->dst.ptr + | ||
| 1229 | (c->src.val & mask) / 8; | ||
| 1230 | } | ||
| 1191 | break; | 1231 | break; |
| 1192 | case DstAcc: | 1232 | case DstAcc: |
| 1193 | c->dst.type = OP_REG; | 1233 | c->dst.type = OP_REG; |
| 1194 | c->dst.bytes = c->op_bytes; | 1234 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
| 1195 | c->dst.ptr = &c->regs[VCPU_REGS_RAX]; | 1235 | c->dst.ptr = &c->regs[VCPU_REGS_RAX]; |
| 1196 | switch (c->op_bytes) { | 1236 | switch (c->dst.bytes) { |
| 1197 | case 1: | 1237 | case 1: |
| 1198 | c->dst.val = *(u8 *)c->dst.ptr; | 1238 | c->dst.val = *(u8 *)c->dst.ptr; |
| 1199 | break; | 1239 | break; |
| @@ -1203,18 +1243,248 @@ done_prefixes: | |||
| 1203 | case 4: | 1243 | case 4: |
| 1204 | c->dst.val = *(u32 *)c->dst.ptr; | 1244 | c->dst.val = *(u32 *)c->dst.ptr; |
| 1205 | break; | 1245 | break; |
| 1246 | case 8: | ||
| 1247 | c->dst.val = *(u64 *)c->dst.ptr; | ||
| 1248 | break; | ||
| 1206 | } | 1249 | } |
| 1207 | c->dst.orig_val = c->dst.val; | 1250 | c->dst.orig_val = c->dst.val; |
| 1208 | break; | 1251 | break; |
| 1252 | case DstDI: | ||
| 1253 | c->dst.type = OP_MEM; | ||
| 1254 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
| 1255 | c->dst.ptr = (unsigned long *) | ||
| 1256 | register_address(c, es_base(ctxt), | ||
| 1257 | c->regs[VCPU_REGS_RDI]); | ||
| 1258 | c->dst.val = 0; | ||
| 1259 | break; | ||
| 1209 | } | 1260 | } |
| 1210 | 1261 | ||
| 1211 | if (c->rip_relative) | ||
| 1212 | c->modrm_ea += c->eip; | ||
| 1213 | |||
| 1214 | done: | 1262 | done: |
| 1215 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | 1263 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; |
| 1216 | } | 1264 | } |
| 1217 | 1265 | ||
| 1266 | static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, | ||
| 1267 | struct x86_emulate_ops *ops, | ||
| 1268 | unsigned int size, unsigned short port, | ||
| 1269 | void *dest) | ||
| 1270 | { | ||
| 1271 | struct read_cache *rc = &ctxt->decode.io_read; | ||
| 1272 | |||
| 1273 | if (rc->pos == rc->end) { /* refill pio read ahead */ | ||
| 1274 | struct decode_cache *c = &ctxt->decode; | ||
| 1275 | unsigned int in_page, n; | ||
| 1276 | unsigned int count = c->rep_prefix ? | ||
| 1277 | address_mask(c, c->regs[VCPU_REGS_RCX]) : 1; | ||
| 1278 | in_page = (ctxt->eflags & EFLG_DF) ? | ||
| 1279 | offset_in_page(c->regs[VCPU_REGS_RDI]) : | ||
| 1280 | PAGE_SIZE - offset_in_page(c->regs[VCPU_REGS_RDI]); | ||
| 1281 | n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size, | ||
| 1282 | count); | ||
| 1283 | if (n == 0) | ||
| 1284 | n = 1; | ||
| 1285 | rc->pos = rc->end = 0; | ||
| 1286 | if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu)) | ||
| 1287 | return 0; | ||
| 1288 | rc->end = n * size; | ||
| 1289 | } | ||
| 1290 | |||
| 1291 | memcpy(dest, rc->data + rc->pos, size); | ||
| 1292 | rc->pos += size; | ||
| 1293 | return 1; | ||
| 1294 | } | ||
| 1295 | |||
| 1296 | static u32 desc_limit_scaled(struct desc_struct *desc) | ||
| 1297 | { | ||
| 1298 | u32 limit = get_desc_limit(desc); | ||
| 1299 | |||
| 1300 | return desc->g ? (limit << 12) | 0xfff : limit; | ||
| 1301 | } | ||
| 1302 | |||
| 1303 | static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, | ||
| 1304 | struct x86_emulate_ops *ops, | ||
| 1305 | u16 selector, struct desc_ptr *dt) | ||
| 1306 | { | ||
| 1307 | if (selector & 1 << 2) { | ||
| 1308 | struct desc_struct desc; | ||
| 1309 | memset (dt, 0, sizeof *dt); | ||
| 1310 | if (!ops->get_cached_descriptor(&desc, VCPU_SREG_LDTR, ctxt->vcpu)) | ||
| 1311 | return; | ||
| 1312 | |||
| 1313 | dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ | ||
| 1314 | dt->address = get_desc_base(&desc); | ||
| 1315 | } else | ||
| 1316 | ops->get_gdt(dt, ctxt->vcpu); | ||
| 1317 | } | ||
| 1318 | |||
| 1319 | /* allowed just for 8 bytes segments */ | ||
| 1320 | static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, | ||
| 1321 | struct x86_emulate_ops *ops, | ||
| 1322 | u16 selector, struct desc_struct *desc) | ||
| 1323 | { | ||
| 1324 | struct desc_ptr dt; | ||
| 1325 | u16 index = selector >> 3; | ||
| 1326 | int ret; | ||
| 1327 | u32 err; | ||
| 1328 | ulong addr; | ||
| 1329 | |||
| 1330 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); | ||
| 1331 | |||
| 1332 | if (dt.size < index * 8 + 7) { | ||
| 1333 | kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); | ||
| 1334 | return X86EMUL_PROPAGATE_FAULT; | ||
| 1335 | } | ||
| 1336 | addr = dt.address + index * 8; | ||
| 1337 | ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); | ||
| 1338 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
| 1339 | kvm_inject_page_fault(ctxt->vcpu, addr, err); | ||
| 1340 | |||
| 1341 | return ret; | ||
| 1342 | } | ||
| 1343 | |||
| 1344 | /* allowed just for 8 bytes segments */ | ||
| 1345 | static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, | ||
| 1346 | struct x86_emulate_ops *ops, | ||
| 1347 | u16 selector, struct desc_struct *desc) | ||
| 1348 | { | ||
| 1349 | struct desc_ptr dt; | ||
| 1350 | u16 index = selector >> 3; | ||
| 1351 | u32 err; | ||
| 1352 | ulong addr; | ||
| 1353 | int ret; | ||
| 1354 | |||
| 1355 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); | ||
| 1356 | |||
| 1357 | if (dt.size < index * 8 + 7) { | ||
| 1358 | kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); | ||
| 1359 | return X86EMUL_PROPAGATE_FAULT; | ||
| 1360 | } | ||
| 1361 | |||
| 1362 | addr = dt.address + index * 8; | ||
| 1363 | ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); | ||
| 1364 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
| 1365 | kvm_inject_page_fault(ctxt->vcpu, addr, err); | ||
| 1366 | |||
| 1367 | return ret; | ||
| 1368 | } | ||
| 1369 | |||
| 1370 | static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | ||
| 1371 | struct x86_emulate_ops *ops, | ||
| 1372 | u16 selector, int seg) | ||
| 1373 | { | ||
| 1374 | struct desc_struct seg_desc; | ||
| 1375 | u8 dpl, rpl, cpl; | ||
| 1376 | unsigned err_vec = GP_VECTOR; | ||
| 1377 | u32 err_code = 0; | ||
| 1378 | bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ | ||
| 1379 | int ret; | ||
| 1380 | |||
| 1381 | memset(&seg_desc, 0, sizeof seg_desc); | ||
| 1382 | |||
| 1383 | if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) | ||
| 1384 | || ctxt->mode == X86EMUL_MODE_REAL) { | ||
| 1385 | /* set real mode segment descriptor */ | ||
| 1386 | set_desc_base(&seg_desc, selector << 4); | ||
| 1387 | set_desc_limit(&seg_desc, 0xffff); | ||
| 1388 | seg_desc.type = 3; | ||
| 1389 | seg_desc.p = 1; | ||
| 1390 | seg_desc.s = 1; | ||
| 1391 | goto load; | ||
| 1392 | } | ||
| 1393 | |||
| 1394 | /* NULL selector is not valid for TR, CS and SS */ | ||
| 1395 | if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) | ||
| 1396 | && null_selector) | ||
| 1397 | goto exception; | ||
| 1398 | |||
| 1399 | /* TR should be in GDT only */ | ||
| 1400 | if (seg == VCPU_SREG_TR && (selector & (1 << 2))) | ||
| 1401 | goto exception; | ||
| 1402 | |||
| 1403 | if (null_selector) /* for NULL selector skip all following checks */ | ||
| 1404 | goto load; | ||
| 1405 | |||
| 1406 | ret = read_segment_descriptor(ctxt, ops, selector, &seg_desc); | ||
| 1407 | if (ret != X86EMUL_CONTINUE) | ||
| 1408 | return ret; | ||
| 1409 | |||
| 1410 | err_code = selector & 0xfffc; | ||
| 1411 | err_vec = GP_VECTOR; | ||
| 1412 | |||
| 1413 | /* can't load system descriptor into segment selecor */ | ||
| 1414 | if (seg <= VCPU_SREG_GS && !seg_desc.s) | ||
| 1415 | goto exception; | ||
| 1416 | |||
| 1417 | if (!seg_desc.p) { | ||
| 1418 | err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; | ||
| 1419 | goto exception; | ||
| 1420 | } | ||
| 1421 | |||
| 1422 | rpl = selector & 3; | ||
| 1423 | dpl = seg_desc.dpl; | ||
| 1424 | cpl = ops->cpl(ctxt->vcpu); | ||
| 1425 | |||
| 1426 | switch (seg) { | ||
| 1427 | case VCPU_SREG_SS: | ||
| 1428 | /* | ||
| 1429 | * segment is not a writable data segment or segment | ||
| 1430 | * selector's RPL != CPL or segment selector's RPL != CPL | ||
| 1431 | */ | ||
| 1432 | if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl) | ||
| 1433 | goto exception; | ||
| 1434 | break; | ||
| 1435 | case VCPU_SREG_CS: | ||
| 1436 | if (!(seg_desc.type & 8)) | ||
| 1437 | goto exception; | ||
| 1438 | |||
| 1439 | if (seg_desc.type & 4) { | ||
| 1440 | /* conforming */ | ||
| 1441 | if (dpl > cpl) | ||
| 1442 | goto exception; | ||
| 1443 | } else { | ||
| 1444 | /* nonconforming */ | ||
| 1445 | if (rpl > cpl || dpl != cpl) | ||
| 1446 | goto exception; | ||
| 1447 | } | ||
| 1448 | /* CS(RPL) <- CPL */ | ||
| 1449 | selector = (selector & 0xfffc) | cpl; | ||
| 1450 | break; | ||
| 1451 | case VCPU_SREG_TR: | ||
| 1452 | if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9)) | ||
| 1453 | goto exception; | ||
| 1454 | break; | ||
| 1455 | case VCPU_SREG_LDTR: | ||
| 1456 | if (seg_desc.s || seg_desc.type != 2) | ||
| 1457 | goto exception; | ||
| 1458 | break; | ||
| 1459 | default: /* DS, ES, FS, or GS */ | ||
| 1460 | /* | ||
| 1461 | * segment is not a data or readable code segment or | ||
| 1462 | * ((segment is a data or nonconforming code segment) | ||
| 1463 | * and (both RPL and CPL > DPL)) | ||
| 1464 | */ | ||
| 1465 | if ((seg_desc.type & 0xa) == 0x8 || | ||
| 1466 | (((seg_desc.type & 0xc) != 0xc) && | ||
| 1467 | (rpl > dpl && cpl > dpl))) | ||
| 1468 | goto exception; | ||
| 1469 | break; | ||
| 1470 | } | ||
| 1471 | |||
| 1472 | if (seg_desc.s) { | ||
| 1473 | /* mark segment as accessed */ | ||
| 1474 | seg_desc.type |= 1; | ||
| 1475 | ret = write_segment_descriptor(ctxt, ops, selector, &seg_desc); | ||
| 1476 | if (ret != X86EMUL_CONTINUE) | ||
| 1477 | return ret; | ||
| 1478 | } | ||
| 1479 | load: | ||
| 1480 | ops->set_segment_selector(selector, seg, ctxt->vcpu); | ||
| 1481 | ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu); | ||
| 1482 | return X86EMUL_CONTINUE; | ||
| 1483 | exception: | ||
| 1484 | kvm_queue_exception_e(ctxt->vcpu, err_vec, err_code); | ||
| 1485 | return X86EMUL_PROPAGATE_FAULT; | ||
| 1486 | } | ||
| 1487 | |||
| 1218 | static inline void emulate_push(struct x86_emulate_ctxt *ctxt) | 1488 | static inline void emulate_push(struct x86_emulate_ctxt *ctxt) |
| 1219 | { | 1489 | { |
| 1220 | struct decode_cache *c = &ctxt->decode; | 1490 | struct decode_cache *c = &ctxt->decode; |
| @@ -1251,7 +1521,7 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, | |||
| 1251 | int rc; | 1521 | int rc; |
| 1252 | unsigned long val, change_mask; | 1522 | unsigned long val, change_mask; |
| 1253 | int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | 1523 | int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; |
| 1254 | int cpl = kvm_x86_ops->get_cpl(ctxt->vcpu); | 1524 | int cpl = ops->cpl(ctxt->vcpu); |
| 1255 | 1525 | ||
| 1256 | rc = emulate_pop(ctxt, ops, &val, len); | 1526 | rc = emulate_pop(ctxt, ops, &val, len); |
| 1257 | if (rc != X86EMUL_CONTINUE) | 1527 | if (rc != X86EMUL_CONTINUE) |
| @@ -1306,10 +1576,10 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, | |||
| 1306 | int rc; | 1576 | int rc; |
| 1307 | 1577 | ||
| 1308 | rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); | 1578 | rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); |
| 1309 | if (rc != 0) | 1579 | if (rc != X86EMUL_CONTINUE) |
| 1310 | return rc; | 1580 | return rc; |
| 1311 | 1581 | ||
| 1312 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, seg); | 1582 | rc = load_segment_descriptor(ctxt, ops, (u16)selector, seg); |
| 1313 | return rc; | 1583 | return rc; |
| 1314 | } | 1584 | } |
| 1315 | 1585 | ||
| @@ -1332,7 +1602,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt, | |||
| 1332 | struct x86_emulate_ops *ops) | 1602 | struct x86_emulate_ops *ops) |
| 1333 | { | 1603 | { |
| 1334 | struct decode_cache *c = &ctxt->decode; | 1604 | struct decode_cache *c = &ctxt->decode; |
| 1335 | int rc = 0; | 1605 | int rc = X86EMUL_CONTINUE; |
| 1336 | int reg = VCPU_REGS_RDI; | 1606 | int reg = VCPU_REGS_RDI; |
| 1337 | 1607 | ||
| 1338 | while (reg >= VCPU_REGS_RAX) { | 1608 | while (reg >= VCPU_REGS_RAX) { |
| @@ -1343,7 +1613,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt, | |||
| 1343 | } | 1613 | } |
| 1344 | 1614 | ||
| 1345 | rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); | 1615 | rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); |
| 1346 | if (rc != 0) | 1616 | if (rc != X86EMUL_CONTINUE) |
| 1347 | break; | 1617 | break; |
| 1348 | --reg; | 1618 | --reg; |
| 1349 | } | 1619 | } |
| @@ -1354,12 +1624,8 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, | |||
| 1354 | struct x86_emulate_ops *ops) | 1624 | struct x86_emulate_ops *ops) |
| 1355 | { | 1625 | { |
| 1356 | struct decode_cache *c = &ctxt->decode; | 1626 | struct decode_cache *c = &ctxt->decode; |
| 1357 | int rc; | ||
| 1358 | 1627 | ||
| 1359 | rc = emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); | 1628 | return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); |
| 1360 | if (rc != 0) | ||
| 1361 | return rc; | ||
| 1362 | return 0; | ||
| 1363 | } | 1629 | } |
| 1364 | 1630 | ||
| 1365 | static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) | 1631 | static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) |
| @@ -1395,7 +1661,6 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, | |||
| 1395 | struct x86_emulate_ops *ops) | 1661 | struct x86_emulate_ops *ops) |
| 1396 | { | 1662 | { |
| 1397 | struct decode_cache *c = &ctxt->decode; | 1663 | struct decode_cache *c = &ctxt->decode; |
| 1398 | int rc = 0; | ||
| 1399 | 1664 | ||
| 1400 | switch (c->modrm_reg) { | 1665 | switch (c->modrm_reg) { |
| 1401 | case 0 ... 1: /* test */ | 1666 | case 0 ... 1: /* test */ |
| @@ -1408,11 +1673,9 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, | |||
| 1408 | emulate_1op("neg", c->dst, ctxt->eflags); | 1673 | emulate_1op("neg", c->dst, ctxt->eflags); |
| 1409 | break; | 1674 | break; |
| 1410 | default: | 1675 | default: |
| 1411 | DPRINTF("Cannot emulate %02x\n", c->b); | 1676 | return 0; |
| 1412 | rc = X86EMUL_UNHANDLEABLE; | ||
| 1413 | break; | ||
| 1414 | } | 1677 | } |
| 1415 | return rc; | 1678 | return 1; |
| 1416 | } | 1679 | } |
| 1417 | 1680 | ||
| 1418 | static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, | 1681 | static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, |
| @@ -1442,20 +1705,14 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, | |||
| 1442 | emulate_push(ctxt); | 1705 | emulate_push(ctxt); |
| 1443 | break; | 1706 | break; |
| 1444 | } | 1707 | } |
| 1445 | return 0; | 1708 | return X86EMUL_CONTINUE; |
| 1446 | } | 1709 | } |
| 1447 | 1710 | ||
| 1448 | static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, | 1711 | static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, |
| 1449 | struct x86_emulate_ops *ops, | 1712 | struct x86_emulate_ops *ops) |
| 1450 | unsigned long memop) | ||
| 1451 | { | 1713 | { |
| 1452 | struct decode_cache *c = &ctxt->decode; | 1714 | struct decode_cache *c = &ctxt->decode; |
| 1453 | u64 old, new; | 1715 | u64 old = c->dst.orig_val; |
| 1454 | int rc; | ||
| 1455 | |||
| 1456 | rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu); | ||
| 1457 | if (rc != X86EMUL_CONTINUE) | ||
| 1458 | return rc; | ||
| 1459 | 1716 | ||
| 1460 | if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || | 1717 | if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || |
| 1461 | ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) { | 1718 | ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) { |
| @@ -1463,17 +1720,13 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, | |||
| 1463 | c->regs[VCPU_REGS_RAX] = (u32) (old >> 0); | 1720 | c->regs[VCPU_REGS_RAX] = (u32) (old >> 0); |
| 1464 | c->regs[VCPU_REGS_RDX] = (u32) (old >> 32); | 1721 | c->regs[VCPU_REGS_RDX] = (u32) (old >> 32); |
| 1465 | ctxt->eflags &= ~EFLG_ZF; | 1722 | ctxt->eflags &= ~EFLG_ZF; |
| 1466 | |||
| 1467 | } else { | 1723 | } else { |
| 1468 | new = ((u64)c->regs[VCPU_REGS_RCX] << 32) | | 1724 | c->dst.val = ((u64)c->regs[VCPU_REGS_RCX] << 32) | |
| 1469 | (u32) c->regs[VCPU_REGS_RBX]; | 1725 | (u32) c->regs[VCPU_REGS_RBX]; |
| 1470 | 1726 | ||
| 1471 | rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu); | ||
| 1472 | if (rc != X86EMUL_CONTINUE) | ||
| 1473 | return rc; | ||
| 1474 | ctxt->eflags |= EFLG_ZF; | 1727 | ctxt->eflags |= EFLG_ZF; |
| 1475 | } | 1728 | } |
| 1476 | return 0; | 1729 | return X86EMUL_CONTINUE; |
| 1477 | } | 1730 | } |
| 1478 | 1731 | ||
| 1479 | static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, | 1732 | static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, |
| @@ -1484,14 +1737,14 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, | |||
| 1484 | unsigned long cs; | 1737 | unsigned long cs; |
| 1485 | 1738 | ||
| 1486 | rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); | 1739 | rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); |
| 1487 | if (rc) | 1740 | if (rc != X86EMUL_CONTINUE) |
| 1488 | return rc; | 1741 | return rc; |
| 1489 | if (c->op_bytes == 4) | 1742 | if (c->op_bytes == 4) |
| 1490 | c->eip = (u32)c->eip; | 1743 | c->eip = (u32)c->eip; |
| 1491 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); | 1744 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); |
| 1492 | if (rc) | 1745 | if (rc != X86EMUL_CONTINUE) |
| 1493 | return rc; | 1746 | return rc; |
| 1494 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, VCPU_SREG_CS); | 1747 | rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS); |
| 1495 | return rc; | 1748 | return rc; |
| 1496 | } | 1749 | } |
| 1497 | 1750 | ||
| @@ -1544,7 +1797,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, | |||
| 1544 | default: | 1797 | default: |
| 1545 | break; | 1798 | break; |
| 1546 | } | 1799 | } |
| 1547 | return 0; | 1800 | return X86EMUL_CONTINUE; |
| 1548 | } | 1801 | } |
| 1549 | 1802 | ||
| 1550 | static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) | 1803 | static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) |
| @@ -1598,8 +1851,11 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt) | |||
| 1598 | u64 msr_data; | 1851 | u64 msr_data; |
| 1599 | 1852 | ||
| 1600 | /* syscall is not available in real mode */ | 1853 | /* syscall is not available in real mode */ |
| 1601 | if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86) | 1854 | if (ctxt->mode == X86EMUL_MODE_REAL || |
| 1602 | return X86EMUL_UNHANDLEABLE; | 1855 | ctxt->mode == X86EMUL_MODE_VM86) { |
| 1856 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
| 1857 | return X86EMUL_PROPAGATE_FAULT; | ||
| 1858 | } | ||
| 1603 | 1859 | ||
| 1604 | setup_syscalls_segments(ctxt, &cs, &ss); | 1860 | setup_syscalls_segments(ctxt, &cs, &ss); |
| 1605 | 1861 | ||
| @@ -1649,14 +1905,16 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt) | |||
| 1649 | /* inject #GP if in real mode */ | 1905 | /* inject #GP if in real mode */ |
| 1650 | if (ctxt->mode == X86EMUL_MODE_REAL) { | 1906 | if (ctxt->mode == X86EMUL_MODE_REAL) { |
| 1651 | kvm_inject_gp(ctxt->vcpu, 0); | 1907 | kvm_inject_gp(ctxt->vcpu, 0); |
| 1652 | return X86EMUL_UNHANDLEABLE; | 1908 | return X86EMUL_PROPAGATE_FAULT; |
| 1653 | } | 1909 | } |
| 1654 | 1910 | ||
| 1655 | /* XXX sysenter/sysexit have not been tested in 64bit mode. | 1911 | /* XXX sysenter/sysexit have not been tested in 64bit mode. |
| 1656 | * Therefore, we inject an #UD. | 1912 | * Therefore, we inject an #UD. |
| 1657 | */ | 1913 | */ |
| 1658 | if (ctxt->mode == X86EMUL_MODE_PROT64) | 1914 | if (ctxt->mode == X86EMUL_MODE_PROT64) { |
| 1659 | return X86EMUL_UNHANDLEABLE; | 1915 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
| 1916 | return X86EMUL_PROPAGATE_FAULT; | ||
| 1917 | } | ||
| 1660 | 1918 | ||
| 1661 | setup_syscalls_segments(ctxt, &cs, &ss); | 1919 | setup_syscalls_segments(ctxt, &cs, &ss); |
| 1662 | 1920 | ||
| @@ -1711,7 +1969,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
| 1711 | if (ctxt->mode == X86EMUL_MODE_REAL || | 1969 | if (ctxt->mode == X86EMUL_MODE_REAL || |
| 1712 | ctxt->mode == X86EMUL_MODE_VM86) { | 1970 | ctxt->mode == X86EMUL_MODE_VM86) { |
| 1713 | kvm_inject_gp(ctxt->vcpu, 0); | 1971 | kvm_inject_gp(ctxt->vcpu, 0); |
| 1714 | return X86EMUL_UNHANDLEABLE; | 1972 | return X86EMUL_PROPAGATE_FAULT; |
| 1715 | } | 1973 | } |
| 1716 | 1974 | ||
| 1717 | setup_syscalls_segments(ctxt, &cs, &ss); | 1975 | setup_syscalls_segments(ctxt, &cs, &ss); |
| @@ -1756,7 +2014,8 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
| 1756 | return X86EMUL_CONTINUE; | 2014 | return X86EMUL_CONTINUE; |
| 1757 | } | 2015 | } |
| 1758 | 2016 | ||
| 1759 | static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) | 2017 | static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt, |
| 2018 | struct x86_emulate_ops *ops) | ||
| 1760 | { | 2019 | { |
| 1761 | int iopl; | 2020 | int iopl; |
| 1762 | if (ctxt->mode == X86EMUL_MODE_REAL) | 2021 | if (ctxt->mode == X86EMUL_MODE_REAL) |
| @@ -1764,7 +2023,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) | |||
| 1764 | if (ctxt->mode == X86EMUL_MODE_VM86) | 2023 | if (ctxt->mode == X86EMUL_MODE_VM86) |
| 1765 | return true; | 2024 | return true; |
| 1766 | iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | 2025 | iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; |
| 1767 | return kvm_x86_ops->get_cpl(ctxt->vcpu) > iopl; | 2026 | return ops->cpl(ctxt->vcpu) > iopl; |
| 1768 | } | 2027 | } |
| 1769 | 2028 | ||
| 1770 | static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | 2029 | static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, |
| @@ -1801,22 +2060,419 @@ static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt, | |||
| 1801 | struct x86_emulate_ops *ops, | 2060 | struct x86_emulate_ops *ops, |
| 1802 | u16 port, u16 len) | 2061 | u16 port, u16 len) |
| 1803 | { | 2062 | { |
| 1804 | if (emulator_bad_iopl(ctxt)) | 2063 | if (emulator_bad_iopl(ctxt, ops)) |
| 1805 | if (!emulator_io_port_access_allowed(ctxt, ops, port, len)) | 2064 | if (!emulator_io_port_access_allowed(ctxt, ops, port, len)) |
| 1806 | return false; | 2065 | return false; |
| 1807 | return true; | 2066 | return true; |
| 1808 | } | 2067 | } |
| 1809 | 2068 | ||
| 2069 | static u32 get_cached_descriptor_base(struct x86_emulate_ctxt *ctxt, | ||
| 2070 | struct x86_emulate_ops *ops, | ||
| 2071 | int seg) | ||
| 2072 | { | ||
| 2073 | struct desc_struct desc; | ||
| 2074 | if (ops->get_cached_descriptor(&desc, seg, ctxt->vcpu)) | ||
| 2075 | return get_desc_base(&desc); | ||
| 2076 | else | ||
| 2077 | return ~0; | ||
| 2078 | } | ||
| 2079 | |||
| 2080 | static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, | ||
| 2081 | struct x86_emulate_ops *ops, | ||
| 2082 | struct tss_segment_16 *tss) | ||
| 2083 | { | ||
| 2084 | struct decode_cache *c = &ctxt->decode; | ||
| 2085 | |||
| 2086 | tss->ip = c->eip; | ||
| 2087 | tss->flag = ctxt->eflags; | ||
| 2088 | tss->ax = c->regs[VCPU_REGS_RAX]; | ||
| 2089 | tss->cx = c->regs[VCPU_REGS_RCX]; | ||
| 2090 | tss->dx = c->regs[VCPU_REGS_RDX]; | ||
| 2091 | tss->bx = c->regs[VCPU_REGS_RBX]; | ||
| 2092 | tss->sp = c->regs[VCPU_REGS_RSP]; | ||
| 2093 | tss->bp = c->regs[VCPU_REGS_RBP]; | ||
| 2094 | tss->si = c->regs[VCPU_REGS_RSI]; | ||
| 2095 | tss->di = c->regs[VCPU_REGS_RDI]; | ||
| 2096 | |||
| 2097 | tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); | ||
| 2098 | tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); | ||
| 2099 | tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); | ||
| 2100 | tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); | ||
| 2101 | tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); | ||
| 2102 | } | ||
| 2103 | |||
| 2104 | static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, | ||
| 2105 | struct x86_emulate_ops *ops, | ||
| 2106 | struct tss_segment_16 *tss) | ||
| 2107 | { | ||
| 2108 | struct decode_cache *c = &ctxt->decode; | ||
| 2109 | int ret; | ||
| 2110 | |||
| 2111 | c->eip = tss->ip; | ||
| 2112 | ctxt->eflags = tss->flag | 2; | ||
| 2113 | c->regs[VCPU_REGS_RAX] = tss->ax; | ||
| 2114 | c->regs[VCPU_REGS_RCX] = tss->cx; | ||
| 2115 | c->regs[VCPU_REGS_RDX] = tss->dx; | ||
| 2116 | c->regs[VCPU_REGS_RBX] = tss->bx; | ||
| 2117 | c->regs[VCPU_REGS_RSP] = tss->sp; | ||
| 2118 | c->regs[VCPU_REGS_RBP] = tss->bp; | ||
| 2119 | c->regs[VCPU_REGS_RSI] = tss->si; | ||
| 2120 | c->regs[VCPU_REGS_RDI] = tss->di; | ||
| 2121 | |||
| 2122 | /* | ||
| 2123 | * SDM says that segment selectors are loaded before segment | ||
| 2124 | * descriptors | ||
| 2125 | */ | ||
| 2126 | ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu); | ||
| 2127 | ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); | ||
| 2128 | ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); | ||
| 2129 | ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); | ||
| 2130 | ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); | ||
| 2131 | |||
| 2132 | /* | ||
| 2133 | * Now load segment descriptors. If fault happenes at this stage | ||
| 2134 | * it is handled in a context of new task | ||
| 2135 | */ | ||
| 2136 | ret = load_segment_descriptor(ctxt, ops, tss->ldt, VCPU_SREG_LDTR); | ||
| 2137 | if (ret != X86EMUL_CONTINUE) | ||
| 2138 | return ret; | ||
| 2139 | ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES); | ||
| 2140 | if (ret != X86EMUL_CONTINUE) | ||
| 2141 | return ret; | ||
| 2142 | ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS); | ||
| 2143 | if (ret != X86EMUL_CONTINUE) | ||
| 2144 | return ret; | ||
| 2145 | ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS); | ||
| 2146 | if (ret != X86EMUL_CONTINUE) | ||
| 2147 | return ret; | ||
| 2148 | ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS); | ||
| 2149 | if (ret != X86EMUL_CONTINUE) | ||
| 2150 | return ret; | ||
| 2151 | |||
| 2152 | return X86EMUL_CONTINUE; | ||
| 2153 | } | ||
| 2154 | |||
| 2155 | static int task_switch_16(struct x86_emulate_ctxt *ctxt, | ||
| 2156 | struct x86_emulate_ops *ops, | ||
| 2157 | u16 tss_selector, u16 old_tss_sel, | ||
| 2158 | ulong old_tss_base, struct desc_struct *new_desc) | ||
| 2159 | { | ||
| 2160 | struct tss_segment_16 tss_seg; | ||
| 2161 | int ret; | ||
| 2162 | u32 err, new_tss_base = get_desc_base(new_desc); | ||
| 2163 | |||
| 2164 | ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
| 2165 | &err); | ||
| 2166 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
| 2167 | /* FIXME: need to provide precise fault address */ | ||
| 2168 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | ||
| 2169 | return ret; | ||
| 2170 | } | ||
| 2171 | |||
| 2172 | save_state_to_tss16(ctxt, ops, &tss_seg); | ||
| 2173 | |||
| 2174 | ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
| 2175 | &err); | ||
| 2176 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
| 2177 | /* FIXME: need to provide precise fault address */ | ||
| 2178 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | ||
| 2179 | return ret; | ||
| 2180 | } | ||
| 2181 | |||
| 2182 | ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
| 2183 | &err); | ||
| 2184 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
| 2185 | /* FIXME: need to provide precise fault address */ | ||
| 2186 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | ||
| 2187 | return ret; | ||
| 2188 | } | ||
| 2189 | |||
| 2190 | if (old_tss_sel != 0xffff) { | ||
| 2191 | tss_seg.prev_task_link = old_tss_sel; | ||
| 2192 | |||
| 2193 | ret = ops->write_std(new_tss_base, | ||
| 2194 | &tss_seg.prev_task_link, | ||
| 2195 | sizeof tss_seg.prev_task_link, | ||
| 2196 | ctxt->vcpu, &err); | ||
| 2197 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
| 2198 | /* FIXME: need to provide precise fault address */ | ||
| 2199 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | ||
| 2200 | return ret; | ||
| 2201 | } | ||
| 2202 | } | ||
| 2203 | |||
| 2204 | return load_state_from_tss16(ctxt, ops, &tss_seg); | ||
| 2205 | } | ||
| 2206 | |||
| 2207 | static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, | ||
| 2208 | struct x86_emulate_ops *ops, | ||
| 2209 | struct tss_segment_32 *tss) | ||
| 2210 | { | ||
| 2211 | struct decode_cache *c = &ctxt->decode; | ||
| 2212 | |||
| 2213 | tss->cr3 = ops->get_cr(3, ctxt->vcpu); | ||
| 2214 | tss->eip = c->eip; | ||
| 2215 | tss->eflags = ctxt->eflags; | ||
| 2216 | tss->eax = c->regs[VCPU_REGS_RAX]; | ||
| 2217 | tss->ecx = c->regs[VCPU_REGS_RCX]; | ||
| 2218 | tss->edx = c->regs[VCPU_REGS_RDX]; | ||
| 2219 | tss->ebx = c->regs[VCPU_REGS_RBX]; | ||
| 2220 | tss->esp = c->regs[VCPU_REGS_RSP]; | ||
| 2221 | tss->ebp = c->regs[VCPU_REGS_RBP]; | ||
| 2222 | tss->esi = c->regs[VCPU_REGS_RSI]; | ||
| 2223 | tss->edi = c->regs[VCPU_REGS_RDI]; | ||
| 2224 | |||
| 2225 | tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); | ||
| 2226 | tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); | ||
| 2227 | tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); | ||
| 2228 | tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); | ||
| 2229 | tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu); | ||
| 2230 | tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu); | ||
| 2231 | tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); | ||
| 2232 | } | ||
| 2233 | |||
| 2234 | static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | ||
| 2235 | struct x86_emulate_ops *ops, | ||
| 2236 | struct tss_segment_32 *tss) | ||
| 2237 | { | ||
| 2238 | struct decode_cache *c = &ctxt->decode; | ||
| 2239 | int ret; | ||
| 2240 | |||
| 2241 | ops->set_cr(3, tss->cr3, ctxt->vcpu); | ||
| 2242 | c->eip = tss->eip; | ||
| 2243 | ctxt->eflags = tss->eflags | 2; | ||
| 2244 | c->regs[VCPU_REGS_RAX] = tss->eax; | ||
| 2245 | c->regs[VCPU_REGS_RCX] = tss->ecx; | ||
| 2246 | c->regs[VCPU_REGS_RDX] = tss->edx; | ||
| 2247 | c->regs[VCPU_REGS_RBX] = tss->ebx; | ||
| 2248 | c->regs[VCPU_REGS_RSP] = tss->esp; | ||
| 2249 | c->regs[VCPU_REGS_RBP] = tss->ebp; | ||
| 2250 | c->regs[VCPU_REGS_RSI] = tss->esi; | ||
| 2251 | c->regs[VCPU_REGS_RDI] = tss->edi; | ||
| 2252 | |||
| 2253 | /* | ||
| 2254 | * SDM says that segment selectors are loaded before segment | ||
| 2255 | * descriptors | ||
| 2256 | */ | ||
| 2257 | ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu); | ||
| 2258 | ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); | ||
| 2259 | ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); | ||
| 2260 | ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); | ||
| 2261 | ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); | ||
| 2262 | ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu); | ||
| 2263 | ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu); | ||
| 2264 | |||
| 2265 | /* | ||
| 2266 | * Now load segment descriptors. If fault happenes at this stage | ||
| 2267 | * it is handled in a context of new task | ||
| 2268 | */ | ||
| 2269 | ret = load_segment_descriptor(ctxt, ops, tss->ldt_selector, VCPU_SREG_LDTR); | ||
| 2270 | if (ret != X86EMUL_CONTINUE) | ||
| 2271 | return ret; | ||
| 2272 | ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES); | ||
| 2273 | if (ret != X86EMUL_CONTINUE) | ||
| 2274 | return ret; | ||
| 2275 | ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS); | ||
| 2276 | if (ret != X86EMUL_CONTINUE) | ||
| 2277 | return ret; | ||
| 2278 | ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS); | ||
| 2279 | if (ret != X86EMUL_CONTINUE) | ||
| 2280 | return ret; | ||
| 2281 | ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS); | ||
| 2282 | if (ret != X86EMUL_CONTINUE) | ||
| 2283 | return ret; | ||
| 2284 | ret = load_segment_descriptor(ctxt, ops, tss->fs, VCPU_SREG_FS); | ||
| 2285 | if (ret != X86EMUL_CONTINUE) | ||
| 2286 | return ret; | ||
| 2287 | ret = load_segment_descriptor(ctxt, ops, tss->gs, VCPU_SREG_GS); | ||
| 2288 | if (ret != X86EMUL_CONTINUE) | ||
| 2289 | return ret; | ||
| 2290 | |||
| 2291 | return X86EMUL_CONTINUE; | ||
| 2292 | } | ||
| 2293 | |||
| 2294 | static int task_switch_32(struct x86_emulate_ctxt *ctxt, | ||
| 2295 | struct x86_emulate_ops *ops, | ||
| 2296 | u16 tss_selector, u16 old_tss_sel, | ||
| 2297 | ulong old_tss_base, struct desc_struct *new_desc) | ||
| 2298 | { | ||
| 2299 | struct tss_segment_32 tss_seg; | ||
| 2300 | int ret; | ||
| 2301 | u32 err, new_tss_base = get_desc_base(new_desc); | ||
| 2302 | |||
| 2303 | ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
| 2304 | &err); | ||
| 2305 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
| 2306 | /* FIXME: need to provide precise fault address */ | ||
| 2307 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | ||
| 2308 | return ret; | ||
| 2309 | } | ||
| 2310 | |||
| 2311 | save_state_to_tss32(ctxt, ops, &tss_seg); | ||
| 2312 | |||
| 2313 | ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
| 2314 | &err); | ||
| 2315 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
| 2316 | /* FIXME: need to provide precise fault address */ | ||
| 2317 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | ||
| 2318 | return ret; | ||
| 2319 | } | ||
| 2320 | |||
| 2321 | ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
| 2322 | &err); | ||
| 2323 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
| 2324 | /* FIXME: need to provide precise fault address */ | ||
| 2325 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | ||
| 2326 | return ret; | ||
| 2327 | } | ||
| 2328 | |||
| 2329 | if (old_tss_sel != 0xffff) { | ||
| 2330 | tss_seg.prev_task_link = old_tss_sel; | ||
| 2331 | |||
| 2332 | ret = ops->write_std(new_tss_base, | ||
| 2333 | &tss_seg.prev_task_link, | ||
| 2334 | sizeof tss_seg.prev_task_link, | ||
| 2335 | ctxt->vcpu, &err); | ||
| 2336 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
| 2337 | /* FIXME: need to provide precise fault address */ | ||
| 2338 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | ||
| 2339 | return ret; | ||
| 2340 | } | ||
| 2341 | } | ||
| 2342 | |||
| 2343 | return load_state_from_tss32(ctxt, ops, &tss_seg); | ||
| 2344 | } | ||
| 2345 | |||
| 2346 | static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | ||
| 2347 | struct x86_emulate_ops *ops, | ||
| 2348 | u16 tss_selector, int reason, | ||
| 2349 | bool has_error_code, u32 error_code) | ||
| 2350 | { | ||
| 2351 | struct desc_struct curr_tss_desc, next_tss_desc; | ||
| 2352 | int ret; | ||
| 2353 | u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); | ||
| 2354 | ulong old_tss_base = | ||
| 2355 | get_cached_descriptor_base(ctxt, ops, VCPU_SREG_TR); | ||
| 2356 | u32 desc_limit; | ||
| 2357 | |||
| 2358 | /* FIXME: old_tss_base == ~0 ? */ | ||
| 2359 | |||
| 2360 | ret = read_segment_descriptor(ctxt, ops, tss_selector, &next_tss_desc); | ||
| 2361 | if (ret != X86EMUL_CONTINUE) | ||
| 2362 | return ret; | ||
| 2363 | ret = read_segment_descriptor(ctxt, ops, old_tss_sel, &curr_tss_desc); | ||
| 2364 | if (ret != X86EMUL_CONTINUE) | ||
| 2365 | return ret; | ||
| 2366 | |||
| 2367 | /* FIXME: check that next_tss_desc is tss */ | ||
| 2368 | |||
| 2369 | if (reason != TASK_SWITCH_IRET) { | ||
| 2370 | if ((tss_selector & 3) > next_tss_desc.dpl || | ||
| 2371 | ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) { | ||
| 2372 | kvm_inject_gp(ctxt->vcpu, 0); | ||
| 2373 | return X86EMUL_PROPAGATE_FAULT; | ||
| 2374 | } | ||
| 2375 | } | ||
| 2376 | |||
| 2377 | desc_limit = desc_limit_scaled(&next_tss_desc); | ||
| 2378 | if (!next_tss_desc.p || | ||
| 2379 | ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || | ||
| 2380 | desc_limit < 0x2b)) { | ||
| 2381 | kvm_queue_exception_e(ctxt->vcpu, TS_VECTOR, | ||
| 2382 | tss_selector & 0xfffc); | ||
| 2383 | return X86EMUL_PROPAGATE_FAULT; | ||
| 2384 | } | ||
| 2385 | |||
| 2386 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { | ||
| 2387 | curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */ | ||
| 2388 | write_segment_descriptor(ctxt, ops, old_tss_sel, | ||
| 2389 | &curr_tss_desc); | ||
| 2390 | } | ||
| 2391 | |||
| 2392 | if (reason == TASK_SWITCH_IRET) | ||
| 2393 | ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT; | ||
| 2394 | |||
| 2395 | /* set back link to prev task only if NT bit is set in eflags | ||
| 2396 | note that old_tss_sel is not used afetr this point */ | ||
| 2397 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | ||
| 2398 | old_tss_sel = 0xffff; | ||
| 2399 | |||
| 2400 | if (next_tss_desc.type & 8) | ||
| 2401 | ret = task_switch_32(ctxt, ops, tss_selector, old_tss_sel, | ||
| 2402 | old_tss_base, &next_tss_desc); | ||
| 2403 | else | ||
| 2404 | ret = task_switch_16(ctxt, ops, tss_selector, old_tss_sel, | ||
| 2405 | old_tss_base, &next_tss_desc); | ||
| 2406 | if (ret != X86EMUL_CONTINUE) | ||
| 2407 | return ret; | ||
| 2408 | |||
| 2409 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) | ||
| 2410 | ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT; | ||
| 2411 | |||
| 2412 | if (reason != TASK_SWITCH_IRET) { | ||
| 2413 | next_tss_desc.type |= (1 << 1); /* set busy flag */ | ||
| 2414 | write_segment_descriptor(ctxt, ops, tss_selector, | ||
| 2415 | &next_tss_desc); | ||
| 2416 | } | ||
| 2417 | |||
| 2418 | ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); | ||
| 2419 | ops->set_cached_descriptor(&next_tss_desc, VCPU_SREG_TR, ctxt->vcpu); | ||
| 2420 | ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu); | ||
| 2421 | |||
| 2422 | if (has_error_code) { | ||
| 2423 | struct decode_cache *c = &ctxt->decode; | ||
| 2424 | |||
| 2425 | c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; | ||
| 2426 | c->lock_prefix = 0; | ||
| 2427 | c->src.val = (unsigned long) error_code; | ||
| 2428 | emulate_push(ctxt); | ||
| 2429 | } | ||
| 2430 | |||
| 2431 | return ret; | ||
| 2432 | } | ||
| 2433 | |||
| 2434 | int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | ||
| 2435 | struct x86_emulate_ops *ops, | ||
| 2436 | u16 tss_selector, int reason, | ||
| 2437 | bool has_error_code, u32 error_code) | ||
| 2438 | { | ||
| 2439 | struct decode_cache *c = &ctxt->decode; | ||
| 2440 | int rc; | ||
| 2441 | |||
| 2442 | memset(c, 0, sizeof(struct decode_cache)); | ||
| 2443 | c->eip = ctxt->eip; | ||
| 2444 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | ||
| 2445 | c->dst.type = OP_NONE; | ||
| 2446 | |||
| 2447 | rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, | ||
| 2448 | has_error_code, error_code); | ||
| 2449 | |||
| 2450 | if (rc == X86EMUL_CONTINUE) { | ||
| 2451 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); | ||
| 2452 | kvm_rip_write(ctxt->vcpu, c->eip); | ||
| 2453 | rc = writeback(ctxt, ops); | ||
| 2454 | } | ||
| 2455 | |||
| 2456 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | ||
| 2457 | } | ||
| 2458 | |||
| 2459 | static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned long base, | ||
| 2460 | int reg, struct operand *op) | ||
| 2461 | { | ||
| 2462 | struct decode_cache *c = &ctxt->decode; | ||
| 2463 | int df = (ctxt->eflags & EFLG_DF) ? -1 : 1; | ||
| 2464 | |||
| 2465 | register_address_increment(c, &c->regs[reg], df * op->bytes); | ||
| 2466 | op->ptr = (unsigned long *)register_address(c, base, c->regs[reg]); | ||
| 2467 | } | ||
| 2468 | |||
| 1810 | int | 2469 | int |
| 1811 | x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | 2470 | x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) |
| 1812 | { | 2471 | { |
| 1813 | unsigned long memop = 0; | ||
| 1814 | u64 msr_data; | 2472 | u64 msr_data; |
| 1815 | unsigned long saved_eip = 0; | ||
| 1816 | struct decode_cache *c = &ctxt->decode; | 2473 | struct decode_cache *c = &ctxt->decode; |
| 1817 | unsigned int port; | 2474 | int rc = X86EMUL_CONTINUE; |
| 1818 | int io_dir_in; | 2475 | int saved_dst_type = c->dst.type; |
| 1819 | int rc = 0; | ||
| 1820 | 2476 | ||
| 1821 | ctxt->interruptibility = 0; | 2477 | ctxt->interruptibility = 0; |
| 1822 | 2478 | ||
| @@ -1826,26 +2482,30 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
| 1826 | */ | 2482 | */ |
| 1827 | 2483 | ||
| 1828 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | 2484 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); |
| 1829 | saved_eip = c->eip; | 2485 | |
| 2486 | if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { | ||
| 2487 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
| 2488 | goto done; | ||
| 2489 | } | ||
| 1830 | 2490 | ||
| 1831 | /* LOCK prefix is allowed only with some instructions */ | 2491 | /* LOCK prefix is allowed only with some instructions */ |
| 1832 | if (c->lock_prefix && !(c->d & Lock)) { | 2492 | if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) { |
| 1833 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 2493 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
| 1834 | goto done; | 2494 | goto done; |
| 1835 | } | 2495 | } |
| 1836 | 2496 | ||
| 1837 | /* Privileged instruction can be executed only in CPL=0 */ | 2497 | /* Privileged instruction can be executed only in CPL=0 */ |
| 1838 | if ((c->d & Priv) && kvm_x86_ops->get_cpl(ctxt->vcpu)) { | 2498 | if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { |
| 1839 | kvm_inject_gp(ctxt->vcpu, 0); | 2499 | kvm_inject_gp(ctxt->vcpu, 0); |
| 1840 | goto done; | 2500 | goto done; |
| 1841 | } | 2501 | } |
| 1842 | 2502 | ||
| 1843 | if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs)) | ||
| 1844 | memop = c->modrm_ea; | ||
| 1845 | |||
| 1846 | if (c->rep_prefix && (c->d & String)) { | 2503 | if (c->rep_prefix && (c->d & String)) { |
| 2504 | ctxt->restart = true; | ||
| 1847 | /* All REP prefixes have the same first termination condition */ | 2505 | /* All REP prefixes have the same first termination condition */ |
| 1848 | if (c->regs[VCPU_REGS_RCX] == 0) { | 2506 | if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { |
| 2507 | string_done: | ||
| 2508 | ctxt->restart = false; | ||
| 1849 | kvm_rip_write(ctxt->vcpu, c->eip); | 2509 | kvm_rip_write(ctxt->vcpu, c->eip); |
| 1850 | goto done; | 2510 | goto done; |
| 1851 | } | 2511 | } |
| @@ -1857,25 +2517,18 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
| 1857 | * - if REPNE/REPNZ and ZF = 1 then done | 2517 | * - if REPNE/REPNZ and ZF = 1 then done |
| 1858 | */ | 2518 | */ |
| 1859 | if ((c->b == 0xa6) || (c->b == 0xa7) || | 2519 | if ((c->b == 0xa6) || (c->b == 0xa7) || |
| 1860 | (c->b == 0xae) || (c->b == 0xaf)) { | 2520 | (c->b == 0xae) || (c->b == 0xaf)) { |
| 1861 | if ((c->rep_prefix == REPE_PREFIX) && | 2521 | if ((c->rep_prefix == REPE_PREFIX) && |
| 1862 | ((ctxt->eflags & EFLG_ZF) == 0)) { | 2522 | ((ctxt->eflags & EFLG_ZF) == 0)) |
| 1863 | kvm_rip_write(ctxt->vcpu, c->eip); | 2523 | goto string_done; |
| 1864 | goto done; | ||
| 1865 | } | ||
| 1866 | if ((c->rep_prefix == REPNE_PREFIX) && | 2524 | if ((c->rep_prefix == REPNE_PREFIX) && |
| 1867 | ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) { | 2525 | ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) |
| 1868 | kvm_rip_write(ctxt->vcpu, c->eip); | 2526 | goto string_done; |
| 1869 | goto done; | ||
| 1870 | } | ||
| 1871 | } | 2527 | } |
| 1872 | c->regs[VCPU_REGS_RCX]--; | 2528 | c->eip = ctxt->eip; |
| 1873 | c->eip = kvm_rip_read(ctxt->vcpu); | ||
| 1874 | } | 2529 | } |
| 1875 | 2530 | ||
| 1876 | if (c->src.type == OP_MEM) { | 2531 | if (c->src.type == OP_MEM) { |
| 1877 | c->src.ptr = (unsigned long *)memop; | ||
| 1878 | c->src.val = 0; | ||
| 1879 | rc = ops->read_emulated((unsigned long)c->src.ptr, | 2532 | rc = ops->read_emulated((unsigned long)c->src.ptr, |
| 1880 | &c->src.val, | 2533 | &c->src.val, |
| 1881 | c->src.bytes, | 2534 | c->src.bytes, |
| @@ -1885,29 +2538,25 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
| 1885 | c->src.orig_val = c->src.val; | 2538 | c->src.orig_val = c->src.val; |
| 1886 | } | 2539 | } |
| 1887 | 2540 | ||
| 2541 | if (c->src2.type == OP_MEM) { | ||
| 2542 | rc = ops->read_emulated((unsigned long)c->src2.ptr, | ||
| 2543 | &c->src2.val, | ||
| 2544 | c->src2.bytes, | ||
| 2545 | ctxt->vcpu); | ||
| 2546 | if (rc != X86EMUL_CONTINUE) | ||
| 2547 | goto done; | ||
| 2548 | } | ||
| 2549 | |||
| 1888 | if ((c->d & DstMask) == ImplicitOps) | 2550 | if ((c->d & DstMask) == ImplicitOps) |
| 1889 | goto special_insn; | 2551 | goto special_insn; |
| 1890 | 2552 | ||
| 1891 | 2553 | ||
| 1892 | if (c->dst.type == OP_MEM) { | 2554 | if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { |
| 1893 | c->dst.ptr = (unsigned long *)memop; | 2555 | /* optimisation - avoid slow emulated read if Mov */ |
| 1894 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 2556 | rc = ops->read_emulated((unsigned long)c->dst.ptr, &c->dst.val, |
| 1895 | c->dst.val = 0; | 2557 | c->dst.bytes, ctxt->vcpu); |
| 1896 | if (c->d & BitOp) { | 2558 | if (rc != X86EMUL_CONTINUE) |
| 1897 | unsigned long mask = ~(c->dst.bytes * 8 - 1); | 2559 | goto done; |
| 1898 | |||
| 1899 | c->dst.ptr = (void *)c->dst.ptr + | ||
| 1900 | (c->src.val & mask) / 8; | ||
| 1901 | } | ||
| 1902 | if (!(c->d & Mov)) { | ||
| 1903 | /* optimisation - avoid slow emulated read */ | ||
| 1904 | rc = ops->read_emulated((unsigned long)c->dst.ptr, | ||
| 1905 | &c->dst.val, | ||
| 1906 | c->dst.bytes, | ||
| 1907 | ctxt->vcpu); | ||
| 1908 | if (rc != X86EMUL_CONTINUE) | ||
| 1909 | goto done; | ||
| 1910 | } | ||
| 1911 | } | 2560 | } |
| 1912 | c->dst.orig_val = c->dst.val; | 2561 | c->dst.orig_val = c->dst.val; |
| 1913 | 2562 | ||
| @@ -1926,7 +2575,7 @@ special_insn: | |||
| 1926 | break; | 2575 | break; |
| 1927 | case 0x07: /* pop es */ | 2576 | case 0x07: /* pop es */ |
| 1928 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); | 2577 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); |
| 1929 | if (rc != 0) | 2578 | if (rc != X86EMUL_CONTINUE) |
| 1930 | goto done; | 2579 | goto done; |
| 1931 | break; | 2580 | break; |
| 1932 | case 0x08 ... 0x0d: | 2581 | case 0x08 ... 0x0d: |
| @@ -1945,7 +2594,7 @@ special_insn: | |||
| 1945 | break; | 2594 | break; |
| 1946 | case 0x17: /* pop ss */ | 2595 | case 0x17: /* pop ss */ |
| 1947 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); | 2596 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); |
| 1948 | if (rc != 0) | 2597 | if (rc != X86EMUL_CONTINUE) |
| 1949 | goto done; | 2598 | goto done; |
| 1950 | break; | 2599 | break; |
| 1951 | case 0x18 ... 0x1d: | 2600 | case 0x18 ... 0x1d: |
| @@ -1957,7 +2606,7 @@ special_insn: | |||
| 1957 | break; | 2606 | break; |
| 1958 | case 0x1f: /* pop ds */ | 2607 | case 0x1f: /* pop ds */ |
| 1959 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); | 2608 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); |
| 1960 | if (rc != 0) | 2609 | if (rc != X86EMUL_CONTINUE) |
| 1961 | goto done; | 2610 | goto done; |
| 1962 | break; | 2611 | break; |
| 1963 | case 0x20 ... 0x25: | 2612 | case 0x20 ... 0x25: |
| @@ -1988,7 +2637,7 @@ special_insn: | |||
| 1988 | case 0x58 ... 0x5f: /* pop reg */ | 2637 | case 0x58 ... 0x5f: /* pop reg */ |
| 1989 | pop_instruction: | 2638 | pop_instruction: |
| 1990 | rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); | 2639 | rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); |
| 1991 | if (rc != 0) | 2640 | if (rc != X86EMUL_CONTINUE) |
| 1992 | goto done; | 2641 | goto done; |
| 1993 | break; | 2642 | break; |
| 1994 | case 0x60: /* pusha */ | 2643 | case 0x60: /* pusha */ |
| @@ -1996,7 +2645,7 @@ special_insn: | |||
| 1996 | break; | 2645 | break; |
| 1997 | case 0x61: /* popa */ | 2646 | case 0x61: /* popa */ |
| 1998 | rc = emulate_popa(ctxt, ops); | 2647 | rc = emulate_popa(ctxt, ops); |
| 1999 | if (rc != 0) | 2648 | if (rc != X86EMUL_CONTINUE) |
| 2000 | goto done; | 2649 | goto done; |
| 2001 | break; | 2650 | break; |
| 2002 | case 0x63: /* movsxd */ | 2651 | case 0x63: /* movsxd */ |
| @@ -2010,47 +2659,29 @@ special_insn: | |||
| 2010 | break; | 2659 | break; |
| 2011 | case 0x6c: /* insb */ | 2660 | case 0x6c: /* insb */ |
| 2012 | case 0x6d: /* insw/insd */ | 2661 | case 0x6d: /* insw/insd */ |
| 2662 | c->dst.bytes = min(c->dst.bytes, 4u); | ||
| 2013 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], | 2663 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], |
| 2014 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | 2664 | c->dst.bytes)) { |
| 2015 | kvm_inject_gp(ctxt->vcpu, 0); | 2665 | kvm_inject_gp(ctxt->vcpu, 0); |
| 2016 | goto done; | 2666 | goto done; |
| 2017 | } | 2667 | } |
| 2018 | if (kvm_emulate_pio_string(ctxt->vcpu, | 2668 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, |
| 2019 | 1, | 2669 | c->regs[VCPU_REGS_RDX], &c->dst.val)) |
| 2020 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2670 | goto done; /* IO is needed, skip writeback */ |
| 2021 | c->rep_prefix ? | 2671 | break; |
| 2022 | address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, | ||
| 2023 | (ctxt->eflags & EFLG_DF), | ||
| 2024 | register_address(c, es_base(ctxt), | ||
| 2025 | c->regs[VCPU_REGS_RDI]), | ||
| 2026 | c->rep_prefix, | ||
| 2027 | c->regs[VCPU_REGS_RDX]) == 0) { | ||
| 2028 | c->eip = saved_eip; | ||
| 2029 | return -1; | ||
| 2030 | } | ||
| 2031 | return 0; | ||
| 2032 | case 0x6e: /* outsb */ | 2672 | case 0x6e: /* outsb */ |
| 2033 | case 0x6f: /* outsw/outsd */ | 2673 | case 0x6f: /* outsw/outsd */ |
| 2674 | c->src.bytes = min(c->src.bytes, 4u); | ||
| 2034 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], | 2675 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], |
| 2035 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | 2676 | c->src.bytes)) { |
| 2036 | kvm_inject_gp(ctxt->vcpu, 0); | 2677 | kvm_inject_gp(ctxt->vcpu, 0); |
| 2037 | goto done; | 2678 | goto done; |
| 2038 | } | 2679 | } |
| 2039 | if (kvm_emulate_pio_string(ctxt->vcpu, | 2680 | ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX], |
| 2040 | 0, | 2681 | &c->src.val, 1, ctxt->vcpu); |
| 2041 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2682 | |
| 2042 | c->rep_prefix ? | 2683 | c->dst.type = OP_NONE; /* nothing to writeback */ |
| 2043 | address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, | 2684 | break; |
| 2044 | (ctxt->eflags & EFLG_DF), | ||
| 2045 | register_address(c, | ||
| 2046 | seg_override_base(ctxt, c), | ||
| 2047 | c->regs[VCPU_REGS_RSI]), | ||
| 2048 | c->rep_prefix, | ||
| 2049 | c->regs[VCPU_REGS_RDX]) == 0) { | ||
| 2050 | c->eip = saved_eip; | ||
| 2051 | return -1; | ||
| 2052 | } | ||
| 2053 | return 0; | ||
| 2054 | case 0x70 ... 0x7f: /* jcc (short) */ | 2685 | case 0x70 ... 0x7f: /* jcc (short) */ |
| 2055 | if (test_cc(c->b, ctxt->eflags)) | 2686 | if (test_cc(c->b, ctxt->eflags)) |
| 2056 | jmp_rel(c, c->src.val); | 2687 | jmp_rel(c, c->src.val); |
| @@ -2107,12 +2738,11 @@ special_insn: | |||
| 2107 | case 0x8c: { /* mov r/m, sreg */ | 2738 | case 0x8c: { /* mov r/m, sreg */ |
| 2108 | struct kvm_segment segreg; | 2739 | struct kvm_segment segreg; |
| 2109 | 2740 | ||
| 2110 | if (c->modrm_reg <= 5) | 2741 | if (c->modrm_reg <= VCPU_SREG_GS) |
| 2111 | kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg); | 2742 | kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg); |
| 2112 | else { | 2743 | else { |
| 2113 | printk(KERN_INFO "0x8c: Invalid segreg in modrm byte 0x%02x\n", | 2744 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
| 2114 | c->modrm); | 2745 | goto done; |
| 2115 | goto cannot_emulate; | ||
| 2116 | } | 2746 | } |
| 2117 | c->dst.val = segreg.selector; | 2747 | c->dst.val = segreg.selector; |
| 2118 | break; | 2748 | break; |
| @@ -2132,16 +2762,16 @@ special_insn: | |||
| 2132 | } | 2762 | } |
| 2133 | 2763 | ||
| 2134 | if (c->modrm_reg == VCPU_SREG_SS) | 2764 | if (c->modrm_reg == VCPU_SREG_SS) |
| 2135 | toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS); | 2765 | toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_MOV_SS); |
| 2136 | 2766 | ||
| 2137 | rc = kvm_load_segment_descriptor(ctxt->vcpu, sel, c->modrm_reg); | 2767 | rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg); |
| 2138 | 2768 | ||
| 2139 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2769 | c->dst.type = OP_NONE; /* Disable writeback. */ |
| 2140 | break; | 2770 | break; |
| 2141 | } | 2771 | } |
| 2142 | case 0x8f: /* pop (sole member of Grp1a) */ | 2772 | case 0x8f: /* pop (sole member of Grp1a) */ |
| 2143 | rc = emulate_grp1a(ctxt, ops); | 2773 | rc = emulate_grp1a(ctxt, ops); |
| 2144 | if (rc != 0) | 2774 | if (rc != X86EMUL_CONTINUE) |
| 2145 | goto done; | 2775 | goto done; |
| 2146 | break; | 2776 | break; |
| 2147 | case 0x90: /* nop / xchg r8,rax */ | 2777 | case 0x90: /* nop / xchg r8,rax */ |
| @@ -2175,89 +2805,16 @@ special_insn: | |||
| 2175 | c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX]; | 2805 | c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX]; |
| 2176 | break; | 2806 | break; |
| 2177 | case 0xa4 ... 0xa5: /* movs */ | 2807 | case 0xa4 ... 0xa5: /* movs */ |
| 2178 | c->dst.type = OP_MEM; | 2808 | goto mov; |
| 2179 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
| 2180 | c->dst.ptr = (unsigned long *)register_address(c, | ||
| 2181 | es_base(ctxt), | ||
| 2182 | c->regs[VCPU_REGS_RDI]); | ||
| 2183 | rc = ops->read_emulated(register_address(c, | ||
| 2184 | seg_override_base(ctxt, c), | ||
| 2185 | c->regs[VCPU_REGS_RSI]), | ||
| 2186 | &c->dst.val, | ||
| 2187 | c->dst.bytes, ctxt->vcpu); | ||
| 2188 | if (rc != X86EMUL_CONTINUE) | ||
| 2189 | goto done; | ||
| 2190 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], | ||
| 2191 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
| 2192 | : c->dst.bytes); | ||
| 2193 | register_address_increment(c, &c->regs[VCPU_REGS_RDI], | ||
| 2194 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
| 2195 | : c->dst.bytes); | ||
| 2196 | break; | ||
| 2197 | case 0xa6 ... 0xa7: /* cmps */ | 2809 | case 0xa6 ... 0xa7: /* cmps */ |
| 2198 | c->src.type = OP_NONE; /* Disable writeback. */ | ||
| 2199 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
| 2200 | c->src.ptr = (unsigned long *)register_address(c, | ||
| 2201 | seg_override_base(ctxt, c), | ||
| 2202 | c->regs[VCPU_REGS_RSI]); | ||
| 2203 | rc = ops->read_emulated((unsigned long)c->src.ptr, | ||
| 2204 | &c->src.val, | ||
| 2205 | c->src.bytes, | ||
| 2206 | ctxt->vcpu); | ||
| 2207 | if (rc != X86EMUL_CONTINUE) | ||
| 2208 | goto done; | ||
| 2209 | |||
| 2210 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2810 | c->dst.type = OP_NONE; /* Disable writeback. */ |
| 2211 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
| 2212 | c->dst.ptr = (unsigned long *)register_address(c, | ||
| 2213 | es_base(ctxt), | ||
| 2214 | c->regs[VCPU_REGS_RDI]); | ||
| 2215 | rc = ops->read_emulated((unsigned long)c->dst.ptr, | ||
| 2216 | &c->dst.val, | ||
| 2217 | c->dst.bytes, | ||
| 2218 | ctxt->vcpu); | ||
| 2219 | if (rc != X86EMUL_CONTINUE) | ||
| 2220 | goto done; | ||
| 2221 | |||
| 2222 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); | 2811 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); |
| 2223 | 2812 | goto cmp; | |
| 2224 | emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); | ||
| 2225 | |||
| 2226 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], | ||
| 2227 | (ctxt->eflags & EFLG_DF) ? -c->src.bytes | ||
| 2228 | : c->src.bytes); | ||
| 2229 | register_address_increment(c, &c->regs[VCPU_REGS_RDI], | ||
| 2230 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
| 2231 | : c->dst.bytes); | ||
| 2232 | |||
| 2233 | break; | ||
| 2234 | case 0xaa ... 0xab: /* stos */ | 2813 | case 0xaa ... 0xab: /* stos */ |
| 2235 | c->dst.type = OP_MEM; | ||
| 2236 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
| 2237 | c->dst.ptr = (unsigned long *)register_address(c, | ||
| 2238 | es_base(ctxt), | ||
| 2239 | c->regs[VCPU_REGS_RDI]); | ||
| 2240 | c->dst.val = c->regs[VCPU_REGS_RAX]; | 2814 | c->dst.val = c->regs[VCPU_REGS_RAX]; |
| 2241 | register_address_increment(c, &c->regs[VCPU_REGS_RDI], | ||
| 2242 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
| 2243 | : c->dst.bytes); | ||
| 2244 | break; | 2815 | break; |
| 2245 | case 0xac ... 0xad: /* lods */ | 2816 | case 0xac ... 0xad: /* lods */ |
| 2246 | c->dst.type = OP_REG; | 2817 | goto mov; |
| 2247 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
| 2248 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; | ||
| 2249 | rc = ops->read_emulated(register_address(c, | ||
| 2250 | seg_override_base(ctxt, c), | ||
| 2251 | c->regs[VCPU_REGS_RSI]), | ||
| 2252 | &c->dst.val, | ||
| 2253 | c->dst.bytes, | ||
| 2254 | ctxt->vcpu); | ||
| 2255 | if (rc != X86EMUL_CONTINUE) | ||
| 2256 | goto done; | ||
| 2257 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], | ||
| 2258 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
| 2259 | : c->dst.bytes); | ||
| 2260 | break; | ||
| 2261 | case 0xae ... 0xaf: /* scas */ | 2818 | case 0xae ... 0xaf: /* scas */ |
| 2262 | DPRINTF("Urk! I don't handle SCAS.\n"); | 2819 | DPRINTF("Urk! I don't handle SCAS.\n"); |
| 2263 | goto cannot_emulate; | 2820 | goto cannot_emulate; |
| @@ -2277,7 +2834,7 @@ special_insn: | |||
| 2277 | break; | 2834 | break; |
| 2278 | case 0xcb: /* ret far */ | 2835 | case 0xcb: /* ret far */ |
| 2279 | rc = emulate_ret_far(ctxt, ops); | 2836 | rc = emulate_ret_far(ctxt, ops); |
| 2280 | if (rc) | 2837 | if (rc != X86EMUL_CONTINUE) |
| 2281 | goto done; | 2838 | goto done; |
| 2282 | break; | 2839 | break; |
| 2283 | case 0xd0 ... 0xd1: /* Grp2 */ | 2840 | case 0xd0 ... 0xd1: /* Grp2 */ |
| @@ -2290,14 +2847,10 @@ special_insn: | |||
| 2290 | break; | 2847 | break; |
| 2291 | case 0xe4: /* inb */ | 2848 | case 0xe4: /* inb */ |
| 2292 | case 0xe5: /* in */ | 2849 | case 0xe5: /* in */ |
| 2293 | port = c->src.val; | 2850 | goto do_io_in; |
| 2294 | io_dir_in = 1; | ||
| 2295 | goto do_io; | ||
| 2296 | case 0xe6: /* outb */ | 2851 | case 0xe6: /* outb */ |
| 2297 | case 0xe7: /* out */ | 2852 | case 0xe7: /* out */ |
| 2298 | port = c->src.val; | 2853 | goto do_io_out; |
| 2299 | io_dir_in = 0; | ||
| 2300 | goto do_io; | ||
| 2301 | case 0xe8: /* call (near) */ { | 2854 | case 0xe8: /* call (near) */ { |
| 2302 | long int rel = c->src.val; | 2855 | long int rel = c->src.val; |
| 2303 | c->src.val = (unsigned long) c->eip; | 2856 | c->src.val = (unsigned long) c->eip; |
| @@ -2308,8 +2861,9 @@ special_insn: | |||
| 2308 | case 0xe9: /* jmp rel */ | 2861 | case 0xe9: /* jmp rel */ |
| 2309 | goto jmp; | 2862 | goto jmp; |
| 2310 | case 0xea: /* jmp far */ | 2863 | case 0xea: /* jmp far */ |
| 2311 | if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, | 2864 | jump_far: |
| 2312 | VCPU_SREG_CS)) | 2865 | if (load_segment_descriptor(ctxt, ops, c->src2.val, |
| 2866 | VCPU_SREG_CS)) | ||
| 2313 | goto done; | 2867 | goto done; |
| 2314 | 2868 | ||
| 2315 | c->eip = c->src.val; | 2869 | c->eip = c->src.val; |
| @@ -2321,25 +2875,29 @@ special_insn: | |||
| 2321 | break; | 2875 | break; |
| 2322 | case 0xec: /* in al,dx */ | 2876 | case 0xec: /* in al,dx */ |
| 2323 | case 0xed: /* in (e/r)ax,dx */ | 2877 | case 0xed: /* in (e/r)ax,dx */ |
| 2324 | port = c->regs[VCPU_REGS_RDX]; | 2878 | c->src.val = c->regs[VCPU_REGS_RDX]; |
| 2325 | io_dir_in = 1; | 2879 | do_io_in: |
| 2326 | goto do_io; | 2880 | c->dst.bytes = min(c->dst.bytes, 4u); |
| 2881 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { | ||
| 2882 | kvm_inject_gp(ctxt->vcpu, 0); | ||
| 2883 | goto done; | ||
| 2884 | } | ||
| 2885 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, | ||
| 2886 | &c->dst.val)) | ||
| 2887 | goto done; /* IO is needed */ | ||
| 2888 | break; | ||
| 2327 | case 0xee: /* out al,dx */ | 2889 | case 0xee: /* out al,dx */ |
| 2328 | case 0xef: /* out (e/r)ax,dx */ | 2890 | case 0xef: /* out (e/r)ax,dx */ |
| 2329 | port = c->regs[VCPU_REGS_RDX]; | 2891 | c->src.val = c->regs[VCPU_REGS_RDX]; |
| 2330 | io_dir_in = 0; | 2892 | do_io_out: |
| 2331 | do_io: | 2893 | c->dst.bytes = min(c->dst.bytes, 4u); |
| 2332 | if (!emulator_io_permited(ctxt, ops, port, | 2894 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { |
| 2333 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | ||
| 2334 | kvm_inject_gp(ctxt->vcpu, 0); | 2895 | kvm_inject_gp(ctxt->vcpu, 0); |
| 2335 | goto done; | 2896 | goto done; |
| 2336 | } | 2897 | } |
| 2337 | if (kvm_emulate_pio(ctxt->vcpu, io_dir_in, | 2898 | ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1, |
| 2338 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2899 | ctxt->vcpu); |
| 2339 | port) != 0) { | 2900 | c->dst.type = OP_NONE; /* Disable writeback. */ |
| 2340 | c->eip = saved_eip; | ||
| 2341 | goto cannot_emulate; | ||
| 2342 | } | ||
| 2343 | break; | 2901 | break; |
| 2344 | case 0xf4: /* hlt */ | 2902 | case 0xf4: /* hlt */ |
| 2345 | ctxt->vcpu->arch.halt_request = 1; | 2903 | ctxt->vcpu->arch.halt_request = 1; |
| @@ -2350,16 +2908,15 @@ special_insn: | |||
| 2350 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2908 | c->dst.type = OP_NONE; /* Disable writeback. */ |
| 2351 | break; | 2909 | break; |
| 2352 | case 0xf6 ... 0xf7: /* Grp3 */ | 2910 | case 0xf6 ... 0xf7: /* Grp3 */ |
| 2353 | rc = emulate_grp3(ctxt, ops); | 2911 | if (!emulate_grp3(ctxt, ops)) |
| 2354 | if (rc != 0) | 2912 | goto cannot_emulate; |
| 2355 | goto done; | ||
| 2356 | break; | 2913 | break; |
| 2357 | case 0xf8: /* clc */ | 2914 | case 0xf8: /* clc */ |
| 2358 | ctxt->eflags &= ~EFLG_CF; | 2915 | ctxt->eflags &= ~EFLG_CF; |
| 2359 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2916 | c->dst.type = OP_NONE; /* Disable writeback. */ |
| 2360 | break; | 2917 | break; |
| 2361 | case 0xfa: /* cli */ | 2918 | case 0xfa: /* cli */ |
| 2362 | if (emulator_bad_iopl(ctxt)) | 2919 | if (emulator_bad_iopl(ctxt, ops)) |
| 2363 | kvm_inject_gp(ctxt->vcpu, 0); | 2920 | kvm_inject_gp(ctxt->vcpu, 0); |
| 2364 | else { | 2921 | else { |
| 2365 | ctxt->eflags &= ~X86_EFLAGS_IF; | 2922 | ctxt->eflags &= ~X86_EFLAGS_IF; |
| @@ -2367,10 +2924,10 @@ special_insn: | |||
| 2367 | } | 2924 | } |
| 2368 | break; | 2925 | break; |
| 2369 | case 0xfb: /* sti */ | 2926 | case 0xfb: /* sti */ |
| 2370 | if (emulator_bad_iopl(ctxt)) | 2927 | if (emulator_bad_iopl(ctxt, ops)) |
| 2371 | kvm_inject_gp(ctxt->vcpu, 0); | 2928 | kvm_inject_gp(ctxt->vcpu, 0); |
| 2372 | else { | 2929 | else { |
| 2373 | toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); | 2930 | toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_STI); |
| 2374 | ctxt->eflags |= X86_EFLAGS_IF; | 2931 | ctxt->eflags |= X86_EFLAGS_IF; |
| 2375 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2932 | c->dst.type = OP_NONE; /* Disable writeback. */ |
| 2376 | } | 2933 | } |
| @@ -2383,28 +2940,55 @@ special_insn: | |||
| 2383 | ctxt->eflags |= EFLG_DF; | 2940 | ctxt->eflags |= EFLG_DF; |
| 2384 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2941 | c->dst.type = OP_NONE; /* Disable writeback. */ |
| 2385 | break; | 2942 | break; |
| 2386 | case 0xfe ... 0xff: /* Grp4/Grp5 */ | 2943 | case 0xfe: /* Grp4 */ |
| 2944 | grp45: | ||
| 2387 | rc = emulate_grp45(ctxt, ops); | 2945 | rc = emulate_grp45(ctxt, ops); |
| 2388 | if (rc != 0) | 2946 | if (rc != X86EMUL_CONTINUE) |
| 2389 | goto done; | 2947 | goto done; |
| 2390 | break; | 2948 | break; |
| 2949 | case 0xff: /* Grp5 */ | ||
| 2950 | if (c->modrm_reg == 5) | ||
| 2951 | goto jump_far; | ||
| 2952 | goto grp45; | ||
| 2391 | } | 2953 | } |
| 2392 | 2954 | ||
| 2393 | writeback: | 2955 | writeback: |
| 2394 | rc = writeback(ctxt, ops); | 2956 | rc = writeback(ctxt, ops); |
| 2395 | if (rc != 0) | 2957 | if (rc != X86EMUL_CONTINUE) |
| 2396 | goto done; | 2958 | goto done; |
| 2397 | 2959 | ||
| 2960 | /* | ||
| 2961 | * restore dst type in case the decoding will be reused | ||
| 2962 | * (happens for string instruction ) | ||
| 2963 | */ | ||
| 2964 | c->dst.type = saved_dst_type; | ||
| 2965 | |||
| 2966 | if ((c->d & SrcMask) == SrcSI) | ||
| 2967 | string_addr_inc(ctxt, seg_override_base(ctxt, c), VCPU_REGS_RSI, | ||
| 2968 | &c->src); | ||
| 2969 | |||
| 2970 | if ((c->d & DstMask) == DstDI) | ||
| 2971 | string_addr_inc(ctxt, es_base(ctxt), VCPU_REGS_RDI, &c->dst); | ||
| 2972 | |||
| 2973 | if (c->rep_prefix && (c->d & String)) { | ||
| 2974 | struct read_cache *rc = &ctxt->decode.io_read; | ||
| 2975 | register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1); | ||
| 2976 | /* | ||
| 2977 | * Re-enter guest when pio read ahead buffer is empty or, | ||
| 2978 | * if it is not used, after each 1024 iteration. | ||
| 2979 | */ | ||
| 2980 | if ((rc->end == 0 && !(c->regs[VCPU_REGS_RCX] & 0x3ff)) || | ||
| 2981 | (rc->end != 0 && rc->end == rc->pos)) | ||
| 2982 | ctxt->restart = false; | ||
| 2983 | } | ||
| 2984 | |||
| 2398 | /* Commit shadow register state. */ | 2985 | /* Commit shadow register state. */ |
| 2399 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); | 2986 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); |
| 2400 | kvm_rip_write(ctxt->vcpu, c->eip); | 2987 | kvm_rip_write(ctxt->vcpu, c->eip); |
| 2988 | ops->set_rflags(ctxt->vcpu, ctxt->eflags); | ||
| 2401 | 2989 | ||
| 2402 | done: | 2990 | done: |
| 2403 | if (rc == X86EMUL_UNHANDLEABLE) { | 2991 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; |
| 2404 | c->eip = saved_eip; | ||
| 2405 | return -1; | ||
| 2406 | } | ||
| 2407 | return 0; | ||
| 2408 | 2992 | ||
| 2409 | twobyte_insn: | 2993 | twobyte_insn: |
| 2410 | switch (c->b) { | 2994 | switch (c->b) { |
| @@ -2418,18 +3002,18 @@ twobyte_insn: | |||
| 2418 | goto cannot_emulate; | 3002 | goto cannot_emulate; |
| 2419 | 3003 | ||
| 2420 | rc = kvm_fix_hypercall(ctxt->vcpu); | 3004 | rc = kvm_fix_hypercall(ctxt->vcpu); |
| 2421 | if (rc) | 3005 | if (rc != X86EMUL_CONTINUE) |
| 2422 | goto done; | 3006 | goto done; |
| 2423 | 3007 | ||
| 2424 | /* Let the processor re-execute the fixed hypercall */ | 3008 | /* Let the processor re-execute the fixed hypercall */ |
| 2425 | c->eip = kvm_rip_read(ctxt->vcpu); | 3009 | c->eip = ctxt->eip; |
| 2426 | /* Disable writeback. */ | 3010 | /* Disable writeback. */ |
| 2427 | c->dst.type = OP_NONE; | 3011 | c->dst.type = OP_NONE; |
| 2428 | break; | 3012 | break; |
| 2429 | case 2: /* lgdt */ | 3013 | case 2: /* lgdt */ |
| 2430 | rc = read_descriptor(ctxt, ops, c->src.ptr, | 3014 | rc = read_descriptor(ctxt, ops, c->src.ptr, |
| 2431 | &size, &address, c->op_bytes); | 3015 | &size, &address, c->op_bytes); |
| 2432 | if (rc) | 3016 | if (rc != X86EMUL_CONTINUE) |
| 2433 | goto done; | 3017 | goto done; |
| 2434 | realmode_lgdt(ctxt->vcpu, size, address); | 3018 | realmode_lgdt(ctxt->vcpu, size, address); |
| 2435 | /* Disable writeback. */ | 3019 | /* Disable writeback. */ |
| @@ -2440,7 +3024,7 @@ twobyte_insn: | |||
| 2440 | switch (c->modrm_rm) { | 3024 | switch (c->modrm_rm) { |
| 2441 | case 1: | 3025 | case 1: |
| 2442 | rc = kvm_fix_hypercall(ctxt->vcpu); | 3026 | rc = kvm_fix_hypercall(ctxt->vcpu); |
| 2443 | if (rc) | 3027 | if (rc != X86EMUL_CONTINUE) |
| 2444 | goto done; | 3028 | goto done; |
| 2445 | break; | 3029 | break; |
| 2446 | default: | 3030 | default: |
| @@ -2450,7 +3034,7 @@ twobyte_insn: | |||
| 2450 | rc = read_descriptor(ctxt, ops, c->src.ptr, | 3034 | rc = read_descriptor(ctxt, ops, c->src.ptr, |
| 2451 | &size, &address, | 3035 | &size, &address, |
| 2452 | c->op_bytes); | 3036 | c->op_bytes); |
| 2453 | if (rc) | 3037 | if (rc != X86EMUL_CONTINUE) |
| 2454 | goto done; | 3038 | goto done; |
| 2455 | realmode_lidt(ctxt->vcpu, size, address); | 3039 | realmode_lidt(ctxt->vcpu, size, address); |
| 2456 | } | 3040 | } |
| @@ -2459,15 +3043,18 @@ twobyte_insn: | |||
| 2459 | break; | 3043 | break; |
| 2460 | case 4: /* smsw */ | 3044 | case 4: /* smsw */ |
| 2461 | c->dst.bytes = 2; | 3045 | c->dst.bytes = 2; |
| 2462 | c->dst.val = realmode_get_cr(ctxt->vcpu, 0); | 3046 | c->dst.val = ops->get_cr(0, ctxt->vcpu); |
| 2463 | break; | 3047 | break; |
| 2464 | case 6: /* lmsw */ | 3048 | case 6: /* lmsw */ |
| 2465 | realmode_lmsw(ctxt->vcpu, (u16)c->src.val, | 3049 | ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0ful) | |
| 2466 | &ctxt->eflags); | 3050 | (c->src.val & 0x0f), ctxt->vcpu); |
| 2467 | c->dst.type = OP_NONE; | 3051 | c->dst.type = OP_NONE; |
| 2468 | break; | 3052 | break; |
| 3053 | case 5: /* not defined */ | ||
| 3054 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
| 3055 | goto done; | ||
| 2469 | case 7: /* invlpg*/ | 3056 | case 7: /* invlpg*/ |
| 2470 | emulate_invlpg(ctxt->vcpu, memop); | 3057 | emulate_invlpg(ctxt->vcpu, c->modrm_ea); |
| 2471 | /* Disable writeback. */ | 3058 | /* Disable writeback. */ |
| 2472 | c->dst.type = OP_NONE; | 3059 | c->dst.type = OP_NONE; |
| 2473 | break; | 3060 | break; |
| @@ -2493,54 +3080,54 @@ twobyte_insn: | |||
| 2493 | c->dst.type = OP_NONE; | 3080 | c->dst.type = OP_NONE; |
| 2494 | break; | 3081 | break; |
| 2495 | case 0x20: /* mov cr, reg */ | 3082 | case 0x20: /* mov cr, reg */ |
| 2496 | if (c->modrm_mod != 3) | 3083 | switch (c->modrm_reg) { |
| 2497 | goto cannot_emulate; | 3084 | case 1: |
| 2498 | c->regs[c->modrm_rm] = | 3085 | case 5 ... 7: |
| 2499 | realmode_get_cr(ctxt->vcpu, c->modrm_reg); | 3086 | case 9 ... 15: |
| 3087 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
| 3088 | goto done; | ||
| 3089 | } | ||
| 3090 | c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu); | ||
| 2500 | c->dst.type = OP_NONE; /* no writeback */ | 3091 | c->dst.type = OP_NONE; /* no writeback */ |
| 2501 | break; | 3092 | break; |
| 2502 | case 0x21: /* mov from dr to reg */ | 3093 | case 0x21: /* mov from dr to reg */ |
| 2503 | if (c->modrm_mod != 3) | 3094 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && |
| 2504 | goto cannot_emulate; | 3095 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { |
| 2505 | rc = emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]); | 3096 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
| 2506 | if (rc) | 3097 | goto done; |
| 2507 | goto cannot_emulate; | 3098 | } |
| 3099 | emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]); | ||
| 2508 | c->dst.type = OP_NONE; /* no writeback */ | 3100 | c->dst.type = OP_NONE; /* no writeback */ |
| 2509 | break; | 3101 | break; |
| 2510 | case 0x22: /* mov reg, cr */ | 3102 | case 0x22: /* mov reg, cr */ |
| 2511 | if (c->modrm_mod != 3) | 3103 | ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu); |
| 2512 | goto cannot_emulate; | ||
| 2513 | realmode_set_cr(ctxt->vcpu, | ||
| 2514 | c->modrm_reg, c->modrm_val, &ctxt->eflags); | ||
| 2515 | c->dst.type = OP_NONE; | 3104 | c->dst.type = OP_NONE; |
| 2516 | break; | 3105 | break; |
| 2517 | case 0x23: /* mov from reg to dr */ | 3106 | case 0x23: /* mov from reg to dr */ |
| 2518 | if (c->modrm_mod != 3) | 3107 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && |
| 2519 | goto cannot_emulate; | 3108 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { |
| 2520 | rc = emulator_set_dr(ctxt, c->modrm_reg, | 3109 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
| 2521 | c->regs[c->modrm_rm]); | 3110 | goto done; |
| 2522 | if (rc) | 3111 | } |
| 2523 | goto cannot_emulate; | 3112 | emulator_set_dr(ctxt, c->modrm_reg, c->regs[c->modrm_rm]); |
| 2524 | c->dst.type = OP_NONE; /* no writeback */ | 3113 | c->dst.type = OP_NONE; /* no writeback */ |
| 2525 | break; | 3114 | break; |
| 2526 | case 0x30: | 3115 | case 0x30: |
| 2527 | /* wrmsr */ | 3116 | /* wrmsr */ |
| 2528 | msr_data = (u32)c->regs[VCPU_REGS_RAX] | 3117 | msr_data = (u32)c->regs[VCPU_REGS_RAX] |
| 2529 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); | 3118 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); |
| 2530 | rc = kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data); | 3119 | if (kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { |
| 2531 | if (rc) { | ||
| 2532 | kvm_inject_gp(ctxt->vcpu, 0); | 3120 | kvm_inject_gp(ctxt->vcpu, 0); |
| 2533 | c->eip = kvm_rip_read(ctxt->vcpu); | 3121 | goto done; |
| 2534 | } | 3122 | } |
| 2535 | rc = X86EMUL_CONTINUE; | 3123 | rc = X86EMUL_CONTINUE; |
| 2536 | c->dst.type = OP_NONE; | 3124 | c->dst.type = OP_NONE; |
| 2537 | break; | 3125 | break; |
| 2538 | case 0x32: | 3126 | case 0x32: |
| 2539 | /* rdmsr */ | 3127 | /* rdmsr */ |
| 2540 | rc = kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data); | 3128 | if (kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { |
| 2541 | if (rc) { | ||
| 2542 | kvm_inject_gp(ctxt->vcpu, 0); | 3129 | kvm_inject_gp(ctxt->vcpu, 0); |
| 2543 | c->eip = kvm_rip_read(ctxt->vcpu); | 3130 | goto done; |
| 2544 | } else { | 3131 | } else { |
| 2545 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; | 3132 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; |
| 2546 | c->regs[VCPU_REGS_RDX] = msr_data >> 32; | 3133 | c->regs[VCPU_REGS_RDX] = msr_data >> 32; |
| @@ -2577,7 +3164,7 @@ twobyte_insn: | |||
| 2577 | break; | 3164 | break; |
| 2578 | case 0xa1: /* pop fs */ | 3165 | case 0xa1: /* pop fs */ |
| 2579 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); | 3166 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); |
| 2580 | if (rc != 0) | 3167 | if (rc != X86EMUL_CONTINUE) |
| 2581 | goto done; | 3168 | goto done; |
| 2582 | break; | 3169 | break; |
| 2583 | case 0xa3: | 3170 | case 0xa3: |
| @@ -2596,7 +3183,7 @@ twobyte_insn: | |||
| 2596 | break; | 3183 | break; |
| 2597 | case 0xa9: /* pop gs */ | 3184 | case 0xa9: /* pop gs */ |
| 2598 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); | 3185 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); |
| 2599 | if (rc != 0) | 3186 | if (rc != X86EMUL_CONTINUE) |
| 2600 | goto done; | 3187 | goto done; |
| 2601 | break; | 3188 | break; |
| 2602 | case 0xab: | 3189 | case 0xab: |
| @@ -2668,16 +3255,14 @@ twobyte_insn: | |||
| 2668 | (u64) c->src.val; | 3255 | (u64) c->src.val; |
| 2669 | break; | 3256 | break; |
| 2670 | case 0xc7: /* Grp9 (cmpxchg8b) */ | 3257 | case 0xc7: /* Grp9 (cmpxchg8b) */ |
| 2671 | rc = emulate_grp9(ctxt, ops, memop); | 3258 | rc = emulate_grp9(ctxt, ops); |
| 2672 | if (rc != 0) | 3259 | if (rc != X86EMUL_CONTINUE) |
| 2673 | goto done; | 3260 | goto done; |
| 2674 | c->dst.type = OP_NONE; | ||
| 2675 | break; | 3261 | break; |
| 2676 | } | 3262 | } |
| 2677 | goto writeback; | 3263 | goto writeback; |
| 2678 | 3264 | ||
| 2679 | cannot_emulate: | 3265 | cannot_emulate: |
| 2680 | DPRINTF("Cannot emulate %02x\n", c->b); | 3266 | DPRINTF("Cannot emulate %02x\n", c->b); |
| 2681 | c->eip = saved_eip; | ||
| 2682 | return -1; | 3267 | return -1; |
| 2683 | } | 3268 | } |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index a790fa128a9f..93825ff3338f 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
| @@ -33,6 +33,29 @@ | |||
| 33 | #include <linux/kvm_host.h> | 33 | #include <linux/kvm_host.h> |
| 34 | #include "trace.h" | 34 | #include "trace.h" |
| 35 | 35 | ||
| 36 | static void pic_lock(struct kvm_pic *s) | ||
| 37 | __acquires(&s->lock) | ||
| 38 | { | ||
| 39 | raw_spin_lock(&s->lock); | ||
| 40 | } | ||
| 41 | |||
| 42 | static void pic_unlock(struct kvm_pic *s) | ||
| 43 | __releases(&s->lock) | ||
| 44 | { | ||
| 45 | bool wakeup = s->wakeup_needed; | ||
| 46 | struct kvm_vcpu *vcpu; | ||
| 47 | |||
| 48 | s->wakeup_needed = false; | ||
| 49 | |||
| 50 | raw_spin_unlock(&s->lock); | ||
| 51 | |||
| 52 | if (wakeup) { | ||
| 53 | vcpu = s->kvm->bsp_vcpu; | ||
| 54 | if (vcpu) | ||
| 55 | kvm_vcpu_kick(vcpu); | ||
| 56 | } | ||
| 57 | } | ||
| 58 | |||
| 36 | static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | 59 | static void pic_clear_isr(struct kvm_kpic_state *s, int irq) |
| 37 | { | 60 | { |
| 38 | s->isr &= ~(1 << irq); | 61 | s->isr &= ~(1 << irq); |
| @@ -45,19 +68,19 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | |||
| 45 | * Other interrupt may be delivered to PIC while lock is dropped but | 68 | * Other interrupt may be delivered to PIC while lock is dropped but |
| 46 | * it should be safe since PIC state is already updated at this stage. | 69 | * it should be safe since PIC state is already updated at this stage. |
| 47 | */ | 70 | */ |
| 48 | raw_spin_unlock(&s->pics_state->lock); | 71 | pic_unlock(s->pics_state); |
| 49 | kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); | 72 | kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); |
| 50 | raw_spin_lock(&s->pics_state->lock); | 73 | pic_lock(s->pics_state); |
| 51 | } | 74 | } |
| 52 | 75 | ||
| 53 | void kvm_pic_clear_isr_ack(struct kvm *kvm) | 76 | void kvm_pic_clear_isr_ack(struct kvm *kvm) |
| 54 | { | 77 | { |
| 55 | struct kvm_pic *s = pic_irqchip(kvm); | 78 | struct kvm_pic *s = pic_irqchip(kvm); |
| 56 | 79 | ||
| 57 | raw_spin_lock(&s->lock); | 80 | pic_lock(s); |
| 58 | s->pics[0].isr_ack = 0xff; | 81 | s->pics[0].isr_ack = 0xff; |
| 59 | s->pics[1].isr_ack = 0xff; | 82 | s->pics[1].isr_ack = 0xff; |
| 60 | raw_spin_unlock(&s->lock); | 83 | pic_unlock(s); |
| 61 | } | 84 | } |
| 62 | 85 | ||
| 63 | /* | 86 | /* |
| @@ -158,9 +181,9 @@ static void pic_update_irq(struct kvm_pic *s) | |||
| 158 | 181 | ||
| 159 | void kvm_pic_update_irq(struct kvm_pic *s) | 182 | void kvm_pic_update_irq(struct kvm_pic *s) |
| 160 | { | 183 | { |
| 161 | raw_spin_lock(&s->lock); | 184 | pic_lock(s); |
| 162 | pic_update_irq(s); | 185 | pic_update_irq(s); |
| 163 | raw_spin_unlock(&s->lock); | 186 | pic_unlock(s); |
| 164 | } | 187 | } |
| 165 | 188 | ||
| 166 | int kvm_pic_set_irq(void *opaque, int irq, int level) | 189 | int kvm_pic_set_irq(void *opaque, int irq, int level) |
| @@ -168,14 +191,14 @@ int kvm_pic_set_irq(void *opaque, int irq, int level) | |||
| 168 | struct kvm_pic *s = opaque; | 191 | struct kvm_pic *s = opaque; |
| 169 | int ret = -1; | 192 | int ret = -1; |
| 170 | 193 | ||
| 171 | raw_spin_lock(&s->lock); | 194 | pic_lock(s); |
| 172 | if (irq >= 0 && irq < PIC_NUM_PINS) { | 195 | if (irq >= 0 && irq < PIC_NUM_PINS) { |
| 173 | ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); | 196 | ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); |
| 174 | pic_update_irq(s); | 197 | pic_update_irq(s); |
| 175 | trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, | 198 | trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, |
| 176 | s->pics[irq >> 3].imr, ret == 0); | 199 | s->pics[irq >> 3].imr, ret == 0); |
| 177 | } | 200 | } |
| 178 | raw_spin_unlock(&s->lock); | 201 | pic_unlock(s); |
| 179 | 202 | ||
| 180 | return ret; | 203 | return ret; |
| 181 | } | 204 | } |
| @@ -205,7 +228,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
| 205 | int irq, irq2, intno; | 228 | int irq, irq2, intno; |
| 206 | struct kvm_pic *s = pic_irqchip(kvm); | 229 | struct kvm_pic *s = pic_irqchip(kvm); |
| 207 | 230 | ||
| 208 | raw_spin_lock(&s->lock); | 231 | pic_lock(s); |
| 209 | irq = pic_get_irq(&s->pics[0]); | 232 | irq = pic_get_irq(&s->pics[0]); |
| 210 | if (irq >= 0) { | 233 | if (irq >= 0) { |
| 211 | pic_intack(&s->pics[0], irq); | 234 | pic_intack(&s->pics[0], irq); |
| @@ -230,7 +253,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
| 230 | intno = s->pics[0].irq_base + irq; | 253 | intno = s->pics[0].irq_base + irq; |
| 231 | } | 254 | } |
| 232 | pic_update_irq(s); | 255 | pic_update_irq(s); |
| 233 | raw_spin_unlock(&s->lock); | 256 | pic_unlock(s); |
| 234 | 257 | ||
| 235 | return intno; | 258 | return intno; |
| 236 | } | 259 | } |
| @@ -444,7 +467,7 @@ static int picdev_write(struct kvm_io_device *this, | |||
| 444 | printk(KERN_ERR "PIC: non byte write\n"); | 467 | printk(KERN_ERR "PIC: non byte write\n"); |
| 445 | return 0; | 468 | return 0; |
| 446 | } | 469 | } |
| 447 | raw_spin_lock(&s->lock); | 470 | pic_lock(s); |
| 448 | switch (addr) { | 471 | switch (addr) { |
| 449 | case 0x20: | 472 | case 0x20: |
| 450 | case 0x21: | 473 | case 0x21: |
| @@ -457,7 +480,7 @@ static int picdev_write(struct kvm_io_device *this, | |||
| 457 | elcr_ioport_write(&s->pics[addr & 1], addr, data); | 480 | elcr_ioport_write(&s->pics[addr & 1], addr, data); |
| 458 | break; | 481 | break; |
| 459 | } | 482 | } |
| 460 | raw_spin_unlock(&s->lock); | 483 | pic_unlock(s); |
| 461 | return 0; | 484 | return 0; |
| 462 | } | 485 | } |
| 463 | 486 | ||
| @@ -474,7 +497,7 @@ static int picdev_read(struct kvm_io_device *this, | |||
| 474 | printk(KERN_ERR "PIC: non byte read\n"); | 497 | printk(KERN_ERR "PIC: non byte read\n"); |
| 475 | return 0; | 498 | return 0; |
| 476 | } | 499 | } |
| 477 | raw_spin_lock(&s->lock); | 500 | pic_lock(s); |
| 478 | switch (addr) { | 501 | switch (addr) { |
| 479 | case 0x20: | 502 | case 0x20: |
| 480 | case 0x21: | 503 | case 0x21: |
| @@ -488,7 +511,7 @@ static int picdev_read(struct kvm_io_device *this, | |||
| 488 | break; | 511 | break; |
| 489 | } | 512 | } |
| 490 | *(unsigned char *)val = data; | 513 | *(unsigned char *)val = data; |
| 491 | raw_spin_unlock(&s->lock); | 514 | pic_unlock(s); |
| 492 | return 0; | 515 | return 0; |
| 493 | } | 516 | } |
| 494 | 517 | ||
| @@ -505,7 +528,7 @@ static void pic_irq_request(void *opaque, int level) | |||
| 505 | s->output = level; | 528 | s->output = level; |
| 506 | if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { | 529 | if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { |
| 507 | s->pics[0].isr_ack &= ~(1 << irq); | 530 | s->pics[0].isr_ack &= ~(1 << irq); |
| 508 | kvm_vcpu_kick(vcpu); | 531 | s->wakeup_needed = true; |
| 509 | } | 532 | } |
| 510 | } | 533 | } |
| 511 | 534 | ||
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 34b15915754d..cd1f362f413d 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
| @@ -63,6 +63,7 @@ struct kvm_kpic_state { | |||
| 63 | 63 | ||
| 64 | struct kvm_pic { | 64 | struct kvm_pic { |
| 65 | raw_spinlock_t lock; | 65 | raw_spinlock_t lock; |
| 66 | bool wakeup_needed; | ||
| 66 | unsigned pending_acks; | 67 | unsigned pending_acks; |
| 67 | struct kvm *kvm; | 68 | struct kvm *kvm; |
| 68 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ | 69 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ |
diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h index 55c7524dda54..64bc6ea78d90 100644 --- a/arch/x86/kvm/kvm_timer.h +++ b/arch/x86/kvm/kvm_timer.h | |||
| @@ -10,9 +10,7 @@ struct kvm_timer { | |||
| 10 | }; | 10 | }; |
| 11 | 11 | ||
| 12 | struct kvm_timer_ops { | 12 | struct kvm_timer_ops { |
| 13 | bool (*is_periodic)(struct kvm_timer *); | 13 | bool (*is_periodic)(struct kvm_timer *); |
| 14 | }; | 14 | }; |
| 15 | 15 | ||
| 16 | |||
| 17 | enum hrtimer_restart kvm_timer_fn(struct hrtimer *data); | 16 | enum hrtimer_restart kvm_timer_fn(struct hrtimer *data); |
| 18 | |||
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 19a8906bcaa2..81563e76e28f 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
| @@ -148,7 +148,6 @@ module_param(oos_shadow, bool, 0644); | |||
| 148 | 148 | ||
| 149 | #include <trace/events/kvm.h> | 149 | #include <trace/events/kvm.h> |
| 150 | 150 | ||
| 151 | #undef TRACE_INCLUDE_FILE | ||
| 152 | #define CREATE_TRACE_POINTS | 151 | #define CREATE_TRACE_POINTS |
| 153 | #include "mmutrace.h" | 152 | #include "mmutrace.h" |
| 154 | 153 | ||
| @@ -174,12 +173,7 @@ struct kvm_shadow_walk_iterator { | |||
| 174 | shadow_walk_okay(&(_walker)); \ | 173 | shadow_walk_okay(&(_walker)); \ |
| 175 | shadow_walk_next(&(_walker))) | 174 | shadow_walk_next(&(_walker))) |
| 176 | 175 | ||
| 177 | 176 | typedef int (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp); | |
| 178 | struct kvm_unsync_walk { | ||
| 179 | int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk); | ||
| 180 | }; | ||
| 181 | |||
| 182 | typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); | ||
| 183 | 177 | ||
| 184 | static struct kmem_cache *pte_chain_cache; | 178 | static struct kmem_cache *pte_chain_cache; |
| 185 | static struct kmem_cache *rmap_desc_cache; | 179 | static struct kmem_cache *rmap_desc_cache; |
| @@ -223,7 +217,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | |||
| 223 | } | 217 | } |
| 224 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); | 218 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); |
| 225 | 219 | ||
| 226 | static int is_write_protection(struct kvm_vcpu *vcpu) | 220 | static bool is_write_protection(struct kvm_vcpu *vcpu) |
| 227 | { | 221 | { |
| 228 | return kvm_read_cr0_bits(vcpu, X86_CR0_WP); | 222 | return kvm_read_cr0_bits(vcpu, X86_CR0_WP); |
| 229 | } | 223 | } |
| @@ -327,7 +321,6 @@ static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, | |||
| 327 | page = alloc_page(GFP_KERNEL); | 321 | page = alloc_page(GFP_KERNEL); |
| 328 | if (!page) | 322 | if (!page) |
| 329 | return -ENOMEM; | 323 | return -ENOMEM; |
| 330 | set_page_private(page, 0); | ||
| 331 | cache->objects[cache->nobjs++] = page_address(page); | 324 | cache->objects[cache->nobjs++] = page_address(page); |
| 332 | } | 325 | } |
| 333 | return 0; | 326 | return 0; |
| @@ -438,9 +431,9 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) | |||
| 438 | int i; | 431 | int i; |
| 439 | 432 | ||
| 440 | gfn = unalias_gfn(kvm, gfn); | 433 | gfn = unalias_gfn(kvm, gfn); |
| 434 | slot = gfn_to_memslot_unaliased(kvm, gfn); | ||
| 441 | for (i = PT_DIRECTORY_LEVEL; | 435 | for (i = PT_DIRECTORY_LEVEL; |
| 442 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 436 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
| 443 | slot = gfn_to_memslot_unaliased(kvm, gfn); | ||
| 444 | write_count = slot_largepage_idx(gfn, slot, i); | 437 | write_count = slot_largepage_idx(gfn, slot, i); |
| 445 | *write_count -= 1; | 438 | *write_count -= 1; |
| 446 | WARN_ON(*write_count < 0); | 439 | WARN_ON(*write_count < 0); |
| @@ -654,7 +647,6 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
| 654 | static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) | 647 | static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) |
| 655 | { | 648 | { |
| 656 | struct kvm_rmap_desc *desc; | 649 | struct kvm_rmap_desc *desc; |
| 657 | struct kvm_rmap_desc *prev_desc; | ||
| 658 | u64 *prev_spte; | 650 | u64 *prev_spte; |
| 659 | int i; | 651 | int i; |
| 660 | 652 | ||
| @@ -666,7 +658,6 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) | |||
| 666 | return NULL; | 658 | return NULL; |
| 667 | } | 659 | } |
| 668 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); | 660 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); |
| 669 | prev_desc = NULL; | ||
| 670 | prev_spte = NULL; | 661 | prev_spte = NULL; |
| 671 | while (desc) { | 662 | while (desc) { |
| 672 | for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) { | 663 | for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) { |
| @@ -794,7 +785,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
| 794 | int retval = 0; | 785 | int retval = 0; |
| 795 | struct kvm_memslots *slots; | 786 | struct kvm_memslots *slots; |
| 796 | 787 | ||
| 797 | slots = rcu_dereference(kvm->memslots); | 788 | slots = kvm_memslots(kvm); |
| 798 | 789 | ||
| 799 | for (i = 0; i < slots->nmemslots; i++) { | 790 | for (i = 0; i < slots->nmemslots; i++) { |
| 800 | struct kvm_memory_slot *memslot = &slots->memslots[i]; | 791 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
| @@ -925,7 +916,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | |||
| 925 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); | 916 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); |
| 926 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); | 917 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); |
| 927 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); | 918 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); |
| 928 | INIT_LIST_HEAD(&sp->oos_link); | ||
| 929 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | 919 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); |
| 930 | sp->multimapped = 0; | 920 | sp->multimapped = 0; |
| 931 | sp->parent_pte = parent_pte; | 921 | sp->parent_pte = parent_pte; |
| @@ -1009,8 +999,7 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp, | |||
| 1009 | } | 999 | } |
| 1010 | 1000 | ||
| 1011 | 1001 | ||
| 1012 | static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 1002 | static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn) |
| 1013 | mmu_parent_walk_fn fn) | ||
| 1014 | { | 1003 | { |
| 1015 | struct kvm_pte_chain *pte_chain; | 1004 | struct kvm_pte_chain *pte_chain; |
| 1016 | struct hlist_node *node; | 1005 | struct hlist_node *node; |
| @@ -1019,8 +1008,8 @@ static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
| 1019 | 1008 | ||
| 1020 | if (!sp->multimapped && sp->parent_pte) { | 1009 | if (!sp->multimapped && sp->parent_pte) { |
| 1021 | parent_sp = page_header(__pa(sp->parent_pte)); | 1010 | parent_sp = page_header(__pa(sp->parent_pte)); |
| 1022 | fn(vcpu, parent_sp); | 1011 | fn(parent_sp); |
| 1023 | mmu_parent_walk(vcpu, parent_sp, fn); | 1012 | mmu_parent_walk(parent_sp, fn); |
| 1024 | return; | 1013 | return; |
| 1025 | } | 1014 | } |
| 1026 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) | 1015 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) |
| @@ -1028,8 +1017,8 @@ static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
| 1028 | if (!pte_chain->parent_ptes[i]) | 1017 | if (!pte_chain->parent_ptes[i]) |
| 1029 | break; | 1018 | break; |
| 1030 | parent_sp = page_header(__pa(pte_chain->parent_ptes[i])); | 1019 | parent_sp = page_header(__pa(pte_chain->parent_ptes[i])); |
| 1031 | fn(vcpu, parent_sp); | 1020 | fn(parent_sp); |
| 1032 | mmu_parent_walk(vcpu, parent_sp, fn); | 1021 | mmu_parent_walk(parent_sp, fn); |
| 1033 | } | 1022 | } |
| 1034 | } | 1023 | } |
| 1035 | 1024 | ||
| @@ -1066,16 +1055,15 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) | |||
| 1066 | } | 1055 | } |
| 1067 | } | 1056 | } |
| 1068 | 1057 | ||
| 1069 | static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 1058 | static int unsync_walk_fn(struct kvm_mmu_page *sp) |
| 1070 | { | 1059 | { |
| 1071 | kvm_mmu_update_parents_unsync(sp); | 1060 | kvm_mmu_update_parents_unsync(sp); |
| 1072 | return 1; | 1061 | return 1; |
| 1073 | } | 1062 | } |
| 1074 | 1063 | ||
| 1075 | static void kvm_mmu_mark_parents_unsync(struct kvm_vcpu *vcpu, | 1064 | static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) |
| 1076 | struct kvm_mmu_page *sp) | ||
| 1077 | { | 1065 | { |
| 1078 | mmu_parent_walk(vcpu, sp, unsync_walk_fn); | 1066 | mmu_parent_walk(sp, unsync_walk_fn); |
| 1079 | kvm_mmu_update_parents_unsync(sp); | 1067 | kvm_mmu_update_parents_unsync(sp); |
| 1080 | } | 1068 | } |
| 1081 | 1069 | ||
| @@ -1201,6 +1189,7 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) | |||
| 1201 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 1189 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) |
| 1202 | { | 1190 | { |
| 1203 | WARN_ON(!sp->unsync); | 1191 | WARN_ON(!sp->unsync); |
| 1192 | trace_kvm_mmu_sync_page(sp); | ||
| 1204 | sp->unsync = 0; | 1193 | sp->unsync = 0; |
| 1205 | --kvm->stat.mmu_unsync; | 1194 | --kvm->stat.mmu_unsync; |
| 1206 | } | 1195 | } |
| @@ -1209,12 +1198,11 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp); | |||
| 1209 | 1198 | ||
| 1210 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 1199 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
| 1211 | { | 1200 | { |
| 1212 | if (sp->role.glevels != vcpu->arch.mmu.root_level) { | 1201 | if (sp->role.cr4_pae != !!is_pae(vcpu)) { |
| 1213 | kvm_mmu_zap_page(vcpu->kvm, sp); | 1202 | kvm_mmu_zap_page(vcpu->kvm, sp); |
| 1214 | return 1; | 1203 | return 1; |
| 1215 | } | 1204 | } |
| 1216 | 1205 | ||
| 1217 | trace_kvm_mmu_sync_page(sp); | ||
| 1218 | if (rmap_write_protect(vcpu->kvm, sp->gfn)) | 1206 | if (rmap_write_protect(vcpu->kvm, sp->gfn)) |
| 1219 | kvm_flush_remote_tlbs(vcpu->kvm); | 1207 | kvm_flush_remote_tlbs(vcpu->kvm); |
| 1220 | kvm_unlink_unsync_page(vcpu->kvm, sp); | 1208 | kvm_unlink_unsync_page(vcpu->kvm, sp); |
| @@ -1331,6 +1319,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
| 1331 | role = vcpu->arch.mmu.base_role; | 1319 | role = vcpu->arch.mmu.base_role; |
| 1332 | role.level = level; | 1320 | role.level = level; |
| 1333 | role.direct = direct; | 1321 | role.direct = direct; |
| 1322 | if (role.direct) | ||
| 1323 | role.cr4_pae = 0; | ||
| 1334 | role.access = access; | 1324 | role.access = access; |
| 1335 | if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { | 1325 | if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { |
| 1336 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); | 1326 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); |
| @@ -1351,7 +1341,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
| 1351 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); | 1341 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); |
| 1352 | if (sp->unsync_children) { | 1342 | if (sp->unsync_children) { |
| 1353 | set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); | 1343 | set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); |
| 1354 | kvm_mmu_mark_parents_unsync(vcpu, sp); | 1344 | kvm_mmu_mark_parents_unsync(sp); |
| 1355 | } | 1345 | } |
| 1356 | trace_kvm_mmu_get_page(sp, false); | 1346 | trace_kvm_mmu_get_page(sp, false); |
| 1357 | return sp; | 1347 | return sp; |
| @@ -1573,13 +1563,14 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | |||
| 1573 | r = 0; | 1563 | r = 0; |
| 1574 | index = kvm_page_table_hashfn(gfn); | 1564 | index = kvm_page_table_hashfn(gfn); |
| 1575 | bucket = &kvm->arch.mmu_page_hash[index]; | 1565 | bucket = &kvm->arch.mmu_page_hash[index]; |
| 1566 | restart: | ||
| 1576 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) | 1567 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) |
| 1577 | if (sp->gfn == gfn && !sp->role.direct) { | 1568 | if (sp->gfn == gfn && !sp->role.direct) { |
| 1578 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, | 1569 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, |
| 1579 | sp->role.word); | 1570 | sp->role.word); |
| 1580 | r = 1; | 1571 | r = 1; |
| 1581 | if (kvm_mmu_zap_page(kvm, sp)) | 1572 | if (kvm_mmu_zap_page(kvm, sp)) |
| 1582 | n = bucket->first; | 1573 | goto restart; |
| 1583 | } | 1574 | } |
| 1584 | return r; | 1575 | return r; |
| 1585 | } | 1576 | } |
| @@ -1593,13 +1584,14 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) | |||
| 1593 | 1584 | ||
| 1594 | index = kvm_page_table_hashfn(gfn); | 1585 | index = kvm_page_table_hashfn(gfn); |
| 1595 | bucket = &kvm->arch.mmu_page_hash[index]; | 1586 | bucket = &kvm->arch.mmu_page_hash[index]; |
| 1587 | restart: | ||
| 1596 | hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { | 1588 | hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { |
| 1597 | if (sp->gfn == gfn && !sp->role.direct | 1589 | if (sp->gfn == gfn && !sp->role.direct |
| 1598 | && !sp->role.invalid) { | 1590 | && !sp->role.invalid) { |
| 1599 | pgprintk("%s: zap %lx %x\n", | 1591 | pgprintk("%s: zap %lx %x\n", |
| 1600 | __func__, gfn, sp->role.word); | 1592 | __func__, gfn, sp->role.word); |
| 1601 | if (kvm_mmu_zap_page(kvm, sp)) | 1593 | if (kvm_mmu_zap_page(kvm, sp)) |
| 1602 | nn = bucket->first; | 1594 | goto restart; |
| 1603 | } | 1595 | } |
| 1604 | } | 1596 | } |
| 1605 | } | 1597 | } |
| @@ -1626,20 +1618,6 @@ static void mmu_convert_notrap(struct kvm_mmu_page *sp) | |||
| 1626 | } | 1618 | } |
| 1627 | } | 1619 | } |
| 1628 | 1620 | ||
| 1629 | struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) | ||
| 1630 | { | ||
| 1631 | struct page *page; | ||
| 1632 | |||
| 1633 | gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); | ||
| 1634 | |||
| 1635 | if (gpa == UNMAPPED_GVA) | ||
| 1636 | return NULL; | ||
| 1637 | |||
| 1638 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); | ||
| 1639 | |||
| 1640 | return page; | ||
| 1641 | } | ||
| 1642 | |||
| 1643 | /* | 1621 | /* |
| 1644 | * The function is based on mtrr_type_lookup() in | 1622 | * The function is based on mtrr_type_lookup() in |
| 1645 | * arch/x86/kernel/cpu/mtrr/generic.c | 1623 | * arch/x86/kernel/cpu/mtrr/generic.c |
| @@ -1752,7 +1730,6 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
| 1752 | struct kvm_mmu_page *s; | 1730 | struct kvm_mmu_page *s; |
| 1753 | struct hlist_node *node, *n; | 1731 | struct hlist_node *node, *n; |
| 1754 | 1732 | ||
| 1755 | trace_kvm_mmu_unsync_page(sp); | ||
| 1756 | index = kvm_page_table_hashfn(sp->gfn); | 1733 | index = kvm_page_table_hashfn(sp->gfn); |
| 1757 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | 1734 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; |
| 1758 | /* don't unsync if pagetable is shadowed with multiple roles */ | 1735 | /* don't unsync if pagetable is shadowed with multiple roles */ |
| @@ -1762,10 +1739,11 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
| 1762 | if (s->role.word != sp->role.word) | 1739 | if (s->role.word != sp->role.word) |
| 1763 | return 1; | 1740 | return 1; |
| 1764 | } | 1741 | } |
| 1742 | trace_kvm_mmu_unsync_page(sp); | ||
| 1765 | ++vcpu->kvm->stat.mmu_unsync; | 1743 | ++vcpu->kvm->stat.mmu_unsync; |
| 1766 | sp->unsync = 1; | 1744 | sp->unsync = 1; |
| 1767 | 1745 | ||
| 1768 | kvm_mmu_mark_parents_unsync(vcpu, sp); | 1746 | kvm_mmu_mark_parents_unsync(sp); |
| 1769 | 1747 | ||
| 1770 | mmu_convert_notrap(sp); | 1748 | mmu_convert_notrap(sp); |
| 1771 | return 0; | 1749 | return 0; |
| @@ -2081,21 +2059,23 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
| 2081 | hpa_t root = vcpu->arch.mmu.root_hpa; | 2059 | hpa_t root = vcpu->arch.mmu.root_hpa; |
| 2082 | 2060 | ||
| 2083 | ASSERT(!VALID_PAGE(root)); | 2061 | ASSERT(!VALID_PAGE(root)); |
| 2084 | if (tdp_enabled) | ||
| 2085 | direct = 1; | ||
| 2086 | if (mmu_check_root(vcpu, root_gfn)) | 2062 | if (mmu_check_root(vcpu, root_gfn)) |
| 2087 | return 1; | 2063 | return 1; |
| 2064 | if (tdp_enabled) { | ||
| 2065 | direct = 1; | ||
| 2066 | root_gfn = 0; | ||
| 2067 | } | ||
| 2068 | spin_lock(&vcpu->kvm->mmu_lock); | ||
| 2088 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, | 2069 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, |
| 2089 | PT64_ROOT_LEVEL, direct, | 2070 | PT64_ROOT_LEVEL, direct, |
| 2090 | ACC_ALL, NULL); | 2071 | ACC_ALL, NULL); |
| 2091 | root = __pa(sp->spt); | 2072 | root = __pa(sp->spt); |
| 2092 | ++sp->root_count; | 2073 | ++sp->root_count; |
| 2074 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
| 2093 | vcpu->arch.mmu.root_hpa = root; | 2075 | vcpu->arch.mmu.root_hpa = root; |
| 2094 | return 0; | 2076 | return 0; |
| 2095 | } | 2077 | } |
| 2096 | direct = !is_paging(vcpu); | 2078 | direct = !is_paging(vcpu); |
| 2097 | if (tdp_enabled) | ||
| 2098 | direct = 1; | ||
| 2099 | for (i = 0; i < 4; ++i) { | 2079 | for (i = 0; i < 4; ++i) { |
| 2100 | hpa_t root = vcpu->arch.mmu.pae_root[i]; | 2080 | hpa_t root = vcpu->arch.mmu.pae_root[i]; |
| 2101 | 2081 | ||
| @@ -2111,11 +2091,18 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
| 2111 | root_gfn = 0; | 2091 | root_gfn = 0; |
| 2112 | if (mmu_check_root(vcpu, root_gfn)) | 2092 | if (mmu_check_root(vcpu, root_gfn)) |
| 2113 | return 1; | 2093 | return 1; |
| 2094 | if (tdp_enabled) { | ||
| 2095 | direct = 1; | ||
| 2096 | root_gfn = i << 30; | ||
| 2097 | } | ||
| 2098 | spin_lock(&vcpu->kvm->mmu_lock); | ||
| 2114 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | 2099 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, |
| 2115 | PT32_ROOT_LEVEL, direct, | 2100 | PT32_ROOT_LEVEL, direct, |
| 2116 | ACC_ALL, NULL); | 2101 | ACC_ALL, NULL); |
| 2117 | root = __pa(sp->spt); | 2102 | root = __pa(sp->spt); |
| 2118 | ++sp->root_count; | 2103 | ++sp->root_count; |
| 2104 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
| 2105 | |||
| 2119 | vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; | 2106 | vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; |
| 2120 | } | 2107 | } |
| 2121 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); | 2108 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); |
| @@ -2299,13 +2286,19 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | |||
| 2299 | /* no rsvd bits for 2 level 4K page table entries */ | 2286 | /* no rsvd bits for 2 level 4K page table entries */ |
| 2300 | context->rsvd_bits_mask[0][1] = 0; | 2287 | context->rsvd_bits_mask[0][1] = 0; |
| 2301 | context->rsvd_bits_mask[0][0] = 0; | 2288 | context->rsvd_bits_mask[0][0] = 0; |
| 2289 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; | ||
| 2290 | |||
| 2291 | if (!is_pse(vcpu)) { | ||
| 2292 | context->rsvd_bits_mask[1][1] = 0; | ||
| 2293 | break; | ||
| 2294 | } | ||
| 2295 | |||
| 2302 | if (is_cpuid_PSE36()) | 2296 | if (is_cpuid_PSE36()) |
| 2303 | /* 36bits PSE 4MB page */ | 2297 | /* 36bits PSE 4MB page */ |
| 2304 | context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); | 2298 | context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); |
| 2305 | else | 2299 | else |
| 2306 | /* 32 bits PSE 4MB page */ | 2300 | /* 32 bits PSE 4MB page */ |
| 2307 | context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); | 2301 | context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); |
| 2308 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; | ||
| 2309 | break; | 2302 | break; |
| 2310 | case PT32E_ROOT_LEVEL: | 2303 | case PT32E_ROOT_LEVEL: |
| 2311 | context->rsvd_bits_mask[0][2] = | 2304 | context->rsvd_bits_mask[0][2] = |
| @@ -2318,7 +2311,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | |||
| 2318 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | | 2311 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | |
| 2319 | rsvd_bits(maxphyaddr, 62) | | 2312 | rsvd_bits(maxphyaddr, 62) | |
| 2320 | rsvd_bits(13, 20); /* large page */ | 2313 | rsvd_bits(13, 20); /* large page */ |
| 2321 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; | 2314 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; |
| 2322 | break; | 2315 | break; |
| 2323 | case PT64_ROOT_LEVEL: | 2316 | case PT64_ROOT_LEVEL: |
| 2324 | context->rsvd_bits_mask[0][3] = exb_bit_rsvd | | 2317 | context->rsvd_bits_mask[0][3] = exb_bit_rsvd | |
| @@ -2336,7 +2329,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | |||
| 2336 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | | 2329 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | |
| 2337 | rsvd_bits(maxphyaddr, 51) | | 2330 | rsvd_bits(maxphyaddr, 51) | |
| 2338 | rsvd_bits(13, 20); /* large page */ | 2331 | rsvd_bits(13, 20); /* large page */ |
| 2339 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; | 2332 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; |
| 2340 | break; | 2333 | break; |
| 2341 | } | 2334 | } |
| 2342 | } | 2335 | } |
| @@ -2438,7 +2431,8 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu) | |||
| 2438 | else | 2431 | else |
| 2439 | r = paging32_init_context(vcpu); | 2432 | r = paging32_init_context(vcpu); |
| 2440 | 2433 | ||
| 2441 | vcpu->arch.mmu.base_role.glevels = vcpu->arch.mmu.root_level; | 2434 | vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); |
| 2435 | vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); | ||
| 2442 | 2436 | ||
| 2443 | return r; | 2437 | return r; |
| 2444 | } | 2438 | } |
| @@ -2478,7 +2472,9 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) | |||
| 2478 | goto out; | 2472 | goto out; |
| 2479 | spin_lock(&vcpu->kvm->mmu_lock); | 2473 | spin_lock(&vcpu->kvm->mmu_lock); |
| 2480 | kvm_mmu_free_some_pages(vcpu); | 2474 | kvm_mmu_free_some_pages(vcpu); |
| 2475 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
| 2481 | r = mmu_alloc_roots(vcpu); | 2476 | r = mmu_alloc_roots(vcpu); |
| 2477 | spin_lock(&vcpu->kvm->mmu_lock); | ||
| 2482 | mmu_sync_roots(vcpu); | 2478 | mmu_sync_roots(vcpu); |
| 2483 | spin_unlock(&vcpu->kvm->mmu_lock); | 2479 | spin_unlock(&vcpu->kvm->mmu_lock); |
| 2484 | if (r) | 2480 | if (r) |
| @@ -2527,7 +2523,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | |||
| 2527 | } | 2523 | } |
| 2528 | 2524 | ||
| 2529 | ++vcpu->kvm->stat.mmu_pte_updated; | 2525 | ++vcpu->kvm->stat.mmu_pte_updated; |
| 2530 | if (sp->role.glevels == PT32_ROOT_LEVEL) | 2526 | if (!sp->role.cr4_pae) |
| 2531 | paging32_update_pte(vcpu, sp, spte, new); | 2527 | paging32_update_pte(vcpu, sp, spte, new); |
| 2532 | else | 2528 | else |
| 2533 | paging64_update_pte(vcpu, sp, spte, new); | 2529 | paging64_update_pte(vcpu, sp, spte, new); |
| @@ -2562,36 +2558,11 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) | |||
| 2562 | } | 2558 | } |
| 2563 | 2559 | ||
| 2564 | static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 2560 | static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
| 2565 | const u8 *new, int bytes) | 2561 | u64 gpte) |
| 2566 | { | 2562 | { |
| 2567 | gfn_t gfn; | 2563 | gfn_t gfn; |
| 2568 | int r; | ||
| 2569 | u64 gpte = 0; | ||
| 2570 | pfn_t pfn; | 2564 | pfn_t pfn; |
| 2571 | 2565 | ||
| 2572 | if (bytes != 4 && bytes != 8) | ||
| 2573 | return; | ||
| 2574 | |||
| 2575 | /* | ||
| 2576 | * Assume that the pte write on a page table of the same type | ||
| 2577 | * as the current vcpu paging mode. This is nearly always true | ||
| 2578 | * (might be false while changing modes). Note it is verified later | ||
| 2579 | * by update_pte(). | ||
| 2580 | */ | ||
| 2581 | if (is_pae(vcpu)) { | ||
| 2582 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | ||
| 2583 | if ((bytes == 4) && (gpa % 4 == 0)) { | ||
| 2584 | r = kvm_read_guest(vcpu->kvm, gpa & ~(u64)7, &gpte, 8); | ||
| 2585 | if (r) | ||
| 2586 | return; | ||
| 2587 | memcpy((void *)&gpte + (gpa % 8), new, 4); | ||
| 2588 | } else if ((bytes == 8) && (gpa % 8 == 0)) { | ||
| 2589 | memcpy((void *)&gpte, new, 8); | ||
| 2590 | } | ||
| 2591 | } else { | ||
| 2592 | if ((bytes == 4) && (gpa % 4 == 0)) | ||
| 2593 | memcpy((void *)&gpte, new, 4); | ||
| 2594 | } | ||
| 2595 | if (!is_present_gpte(gpte)) | 2566 | if (!is_present_gpte(gpte)) |
| 2596 | return; | 2567 | return; |
| 2597 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; | 2568 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; |
| @@ -2640,10 +2611,46 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 2640 | int flooded = 0; | 2611 | int flooded = 0; |
| 2641 | int npte; | 2612 | int npte; |
| 2642 | int r; | 2613 | int r; |
| 2614 | int invlpg_counter; | ||
| 2643 | 2615 | ||
| 2644 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); | 2616 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); |
| 2645 | mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); | 2617 | |
| 2618 | invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter); | ||
| 2619 | |||
| 2620 | /* | ||
| 2621 | * Assume that the pte write on a page table of the same type | ||
| 2622 | * as the current vcpu paging mode. This is nearly always true | ||
| 2623 | * (might be false while changing modes). Note it is verified later | ||
| 2624 | * by update_pte(). | ||
| 2625 | */ | ||
| 2626 | if ((is_pae(vcpu) && bytes == 4) || !new) { | ||
| 2627 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | ||
| 2628 | if (is_pae(vcpu)) { | ||
| 2629 | gpa &= ~(gpa_t)7; | ||
| 2630 | bytes = 8; | ||
| 2631 | } | ||
| 2632 | r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8)); | ||
| 2633 | if (r) | ||
| 2634 | gentry = 0; | ||
| 2635 | new = (const u8 *)&gentry; | ||
| 2636 | } | ||
| 2637 | |||
| 2638 | switch (bytes) { | ||
| 2639 | case 4: | ||
| 2640 | gentry = *(const u32 *)new; | ||
| 2641 | break; | ||
| 2642 | case 8: | ||
| 2643 | gentry = *(const u64 *)new; | ||
| 2644 | break; | ||
| 2645 | default: | ||
| 2646 | gentry = 0; | ||
| 2647 | break; | ||
| 2648 | } | ||
| 2649 | |||
| 2650 | mmu_guess_page_from_pte_write(vcpu, gpa, gentry); | ||
| 2646 | spin_lock(&vcpu->kvm->mmu_lock); | 2651 | spin_lock(&vcpu->kvm->mmu_lock); |
| 2652 | if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) | ||
| 2653 | gentry = 0; | ||
| 2647 | kvm_mmu_access_page(vcpu, gfn); | 2654 | kvm_mmu_access_page(vcpu, gfn); |
| 2648 | kvm_mmu_free_some_pages(vcpu); | 2655 | kvm_mmu_free_some_pages(vcpu); |
| 2649 | ++vcpu->kvm->stat.mmu_pte_write; | 2656 | ++vcpu->kvm->stat.mmu_pte_write; |
| @@ -2662,10 +2669,12 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 2662 | } | 2669 | } |
| 2663 | index = kvm_page_table_hashfn(gfn); | 2670 | index = kvm_page_table_hashfn(gfn); |
| 2664 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | 2671 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; |
| 2672 | |||
| 2673 | restart: | ||
| 2665 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { | 2674 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { |
| 2666 | if (sp->gfn != gfn || sp->role.direct || sp->role.invalid) | 2675 | if (sp->gfn != gfn || sp->role.direct || sp->role.invalid) |
| 2667 | continue; | 2676 | continue; |
| 2668 | pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; | 2677 | pte_size = sp->role.cr4_pae ? 8 : 4; |
| 2669 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); | 2678 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); |
| 2670 | misaligned |= bytes < 4; | 2679 | misaligned |= bytes < 4; |
| 2671 | if (misaligned || flooded) { | 2680 | if (misaligned || flooded) { |
| @@ -2682,14 +2691,14 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 2682 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", | 2691 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", |
| 2683 | gpa, bytes, sp->role.word); | 2692 | gpa, bytes, sp->role.word); |
| 2684 | if (kvm_mmu_zap_page(vcpu->kvm, sp)) | 2693 | if (kvm_mmu_zap_page(vcpu->kvm, sp)) |
| 2685 | n = bucket->first; | 2694 | goto restart; |
| 2686 | ++vcpu->kvm->stat.mmu_flooded; | 2695 | ++vcpu->kvm->stat.mmu_flooded; |
| 2687 | continue; | 2696 | continue; |
| 2688 | } | 2697 | } |
| 2689 | page_offset = offset; | 2698 | page_offset = offset; |
| 2690 | level = sp->role.level; | 2699 | level = sp->role.level; |
| 2691 | npte = 1; | 2700 | npte = 1; |
| 2692 | if (sp->role.glevels == PT32_ROOT_LEVEL) { | 2701 | if (!sp->role.cr4_pae) { |
| 2693 | page_offset <<= 1; /* 32->64 */ | 2702 | page_offset <<= 1; /* 32->64 */ |
| 2694 | /* | 2703 | /* |
| 2695 | * A 32-bit pde maps 4MB while the shadow pdes map | 2704 | * A 32-bit pde maps 4MB while the shadow pdes map |
| @@ -2707,20 +2716,11 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 2707 | continue; | 2716 | continue; |
| 2708 | } | 2717 | } |
| 2709 | spte = &sp->spt[page_offset / sizeof(*spte)]; | 2718 | spte = &sp->spt[page_offset / sizeof(*spte)]; |
| 2710 | if ((gpa & (pte_size - 1)) || (bytes < pte_size)) { | ||
| 2711 | gentry = 0; | ||
| 2712 | r = kvm_read_guest_atomic(vcpu->kvm, | ||
| 2713 | gpa & ~(u64)(pte_size - 1), | ||
| 2714 | &gentry, pte_size); | ||
| 2715 | new = (const void *)&gentry; | ||
| 2716 | if (r < 0) | ||
| 2717 | new = NULL; | ||
| 2718 | } | ||
| 2719 | while (npte--) { | 2719 | while (npte--) { |
| 2720 | entry = *spte; | 2720 | entry = *spte; |
| 2721 | mmu_pte_write_zap_pte(vcpu, sp, spte); | 2721 | mmu_pte_write_zap_pte(vcpu, sp, spte); |
| 2722 | if (new) | 2722 | if (gentry) |
| 2723 | mmu_pte_write_new_pte(vcpu, sp, spte, new); | 2723 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); |
| 2724 | mmu_pte_write_flush_tlb(vcpu, entry, *spte); | 2724 | mmu_pte_write_flush_tlb(vcpu, entry, *spte); |
| 2725 | ++spte; | 2725 | ++spte; |
| 2726 | } | 2726 | } |
| @@ -2900,22 +2900,23 @@ void kvm_mmu_zap_all(struct kvm *kvm) | |||
| 2900 | struct kvm_mmu_page *sp, *node; | 2900 | struct kvm_mmu_page *sp, *node; |
| 2901 | 2901 | ||
| 2902 | spin_lock(&kvm->mmu_lock); | 2902 | spin_lock(&kvm->mmu_lock); |
| 2903 | restart: | ||
| 2903 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) | 2904 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) |
| 2904 | if (kvm_mmu_zap_page(kvm, sp)) | 2905 | if (kvm_mmu_zap_page(kvm, sp)) |
| 2905 | node = container_of(kvm->arch.active_mmu_pages.next, | 2906 | goto restart; |
| 2906 | struct kvm_mmu_page, link); | 2907 | |
| 2907 | spin_unlock(&kvm->mmu_lock); | 2908 | spin_unlock(&kvm->mmu_lock); |
| 2908 | 2909 | ||
| 2909 | kvm_flush_remote_tlbs(kvm); | 2910 | kvm_flush_remote_tlbs(kvm); |
| 2910 | } | 2911 | } |
| 2911 | 2912 | ||
| 2912 | static void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm) | 2913 | static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm) |
| 2913 | { | 2914 | { |
| 2914 | struct kvm_mmu_page *page; | 2915 | struct kvm_mmu_page *page; |
| 2915 | 2916 | ||
| 2916 | page = container_of(kvm->arch.active_mmu_pages.prev, | 2917 | page = container_of(kvm->arch.active_mmu_pages.prev, |
| 2917 | struct kvm_mmu_page, link); | 2918 | struct kvm_mmu_page, link); |
| 2918 | kvm_mmu_zap_page(kvm, page); | 2919 | return kvm_mmu_zap_page(kvm, page) + 1; |
| 2919 | } | 2920 | } |
| 2920 | 2921 | ||
| 2921 | static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | 2922 | static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) |
| @@ -2927,7 +2928,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | |||
| 2927 | spin_lock(&kvm_lock); | 2928 | spin_lock(&kvm_lock); |
| 2928 | 2929 | ||
| 2929 | list_for_each_entry(kvm, &vm_list, vm_list) { | 2930 | list_for_each_entry(kvm, &vm_list, vm_list) { |
| 2930 | int npages, idx; | 2931 | int npages, idx, freed_pages; |
| 2931 | 2932 | ||
| 2932 | idx = srcu_read_lock(&kvm->srcu); | 2933 | idx = srcu_read_lock(&kvm->srcu); |
| 2933 | spin_lock(&kvm->mmu_lock); | 2934 | spin_lock(&kvm->mmu_lock); |
| @@ -2935,8 +2936,8 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | |||
| 2935 | kvm->arch.n_free_mmu_pages; | 2936 | kvm->arch.n_free_mmu_pages; |
| 2936 | cache_count += npages; | 2937 | cache_count += npages; |
| 2937 | if (!kvm_freed && nr_to_scan > 0 && npages > 0) { | 2938 | if (!kvm_freed && nr_to_scan > 0 && npages > 0) { |
| 2938 | kvm_mmu_remove_one_alloc_mmu_page(kvm); | 2939 | freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm); |
| 2939 | cache_count--; | 2940 | cache_count -= freed_pages; |
| 2940 | kvm_freed = kvm; | 2941 | kvm_freed = kvm; |
| 2941 | } | 2942 | } |
| 2942 | nr_to_scan--; | 2943 | nr_to_scan--; |
| @@ -3011,7 +3012,8 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) | |||
| 3011 | unsigned int nr_pages = 0; | 3012 | unsigned int nr_pages = 0; |
| 3012 | struct kvm_memslots *slots; | 3013 | struct kvm_memslots *slots; |
| 3013 | 3014 | ||
| 3014 | slots = rcu_dereference(kvm->memslots); | 3015 | slots = kvm_memslots(kvm); |
| 3016 | |||
| 3015 | for (i = 0; i < slots->nmemslots; i++) | 3017 | for (i = 0; i < slots->nmemslots; i++) |
| 3016 | nr_pages += slots->memslots[i].npages; | 3018 | nr_pages += slots->memslots[i].npages; |
| 3017 | 3019 | ||
| @@ -3174,8 +3176,7 @@ static gva_t canonicalize(gva_t gva) | |||
| 3174 | } | 3176 | } |
| 3175 | 3177 | ||
| 3176 | 3178 | ||
| 3177 | typedef void (*inspect_spte_fn) (struct kvm *kvm, struct kvm_mmu_page *sp, | 3179 | typedef void (*inspect_spte_fn) (struct kvm *kvm, u64 *sptep); |
| 3178 | u64 *sptep); | ||
| 3179 | 3180 | ||
| 3180 | static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, | 3181 | static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, |
| 3181 | inspect_spte_fn fn) | 3182 | inspect_spte_fn fn) |
| @@ -3191,7 +3192,7 @@ static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
| 3191 | child = page_header(ent & PT64_BASE_ADDR_MASK); | 3192 | child = page_header(ent & PT64_BASE_ADDR_MASK); |
| 3192 | __mmu_spte_walk(kvm, child, fn); | 3193 | __mmu_spte_walk(kvm, child, fn); |
| 3193 | } else | 3194 | } else |
| 3194 | fn(kvm, sp, &sp->spt[i]); | 3195 | fn(kvm, &sp->spt[i]); |
| 3195 | } | 3196 | } |
| 3196 | } | 3197 | } |
| 3197 | } | 3198 | } |
| @@ -3282,11 +3283,13 @@ static void audit_mappings(struct kvm_vcpu *vcpu) | |||
| 3282 | 3283 | ||
| 3283 | static int count_rmaps(struct kvm_vcpu *vcpu) | 3284 | static int count_rmaps(struct kvm_vcpu *vcpu) |
| 3284 | { | 3285 | { |
| 3286 | struct kvm *kvm = vcpu->kvm; | ||
| 3287 | struct kvm_memslots *slots; | ||
| 3285 | int nmaps = 0; | 3288 | int nmaps = 0; |
| 3286 | int i, j, k, idx; | 3289 | int i, j, k, idx; |
| 3287 | 3290 | ||
| 3288 | idx = srcu_read_lock(&kvm->srcu); | 3291 | idx = srcu_read_lock(&kvm->srcu); |
| 3289 | slots = rcu_dereference(kvm->memslots); | 3292 | slots = kvm_memslots(kvm); |
| 3290 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 3293 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
| 3291 | struct kvm_memory_slot *m = &slots->memslots[i]; | 3294 | struct kvm_memory_slot *m = &slots->memslots[i]; |
| 3292 | struct kvm_rmap_desc *d; | 3295 | struct kvm_rmap_desc *d; |
| @@ -3315,7 +3318,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu) | |||
| 3315 | return nmaps; | 3318 | return nmaps; |
| 3316 | } | 3319 | } |
| 3317 | 3320 | ||
| 3318 | void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *sptep) | 3321 | void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) |
| 3319 | { | 3322 | { |
| 3320 | unsigned long *rmapp; | 3323 | unsigned long *rmapp; |
| 3321 | struct kvm_mmu_page *rev_sp; | 3324 | struct kvm_mmu_page *rev_sp; |
| @@ -3331,14 +3334,14 @@ void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *sptep) | |||
| 3331 | printk(KERN_ERR "%s: no memslot for gfn %ld\n", | 3334 | printk(KERN_ERR "%s: no memslot for gfn %ld\n", |
| 3332 | audit_msg, gfn); | 3335 | audit_msg, gfn); |
| 3333 | printk(KERN_ERR "%s: index %ld of sp (gfn=%lx)\n", | 3336 | printk(KERN_ERR "%s: index %ld of sp (gfn=%lx)\n", |
| 3334 | audit_msg, sptep - rev_sp->spt, | 3337 | audit_msg, (long int)(sptep - rev_sp->spt), |
| 3335 | rev_sp->gfn); | 3338 | rev_sp->gfn); |
| 3336 | dump_stack(); | 3339 | dump_stack(); |
| 3337 | return; | 3340 | return; |
| 3338 | } | 3341 | } |
| 3339 | 3342 | ||
| 3340 | rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt], | 3343 | rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt], |
| 3341 | is_large_pte(*sptep)); | 3344 | rev_sp->role.level); |
| 3342 | if (!*rmapp) { | 3345 | if (!*rmapp) { |
| 3343 | if (!printk_ratelimit()) | 3346 | if (!printk_ratelimit()) |
| 3344 | return; | 3347 | return; |
| @@ -3373,7 +3376,7 @@ static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu) | |||
| 3373 | continue; | 3376 | continue; |
| 3374 | if (!(ent & PT_WRITABLE_MASK)) | 3377 | if (!(ent & PT_WRITABLE_MASK)) |
| 3375 | continue; | 3378 | continue; |
| 3376 | inspect_spte_has_rmap(vcpu->kvm, sp, &pt[i]); | 3379 | inspect_spte_has_rmap(vcpu->kvm, &pt[i]); |
| 3377 | } | 3380 | } |
| 3378 | } | 3381 | } |
| 3379 | return; | 3382 | return; |
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 3e4a5c6ca2a9..42f07b1bfbc9 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h | |||
| @@ -6,14 +6,12 @@ | |||
| 6 | 6 | ||
| 7 | #undef TRACE_SYSTEM | 7 | #undef TRACE_SYSTEM |
| 8 | #define TRACE_SYSTEM kvmmmu | 8 | #define TRACE_SYSTEM kvmmmu |
| 9 | #define TRACE_INCLUDE_PATH . | ||
| 10 | #define TRACE_INCLUDE_FILE mmutrace | ||
| 11 | 9 | ||
| 12 | #define KVM_MMU_PAGE_FIELDS \ | 10 | #define KVM_MMU_PAGE_FIELDS \ |
| 13 | __field(__u64, gfn) \ | 11 | __field(__u64, gfn) \ |
| 14 | __field(__u32, role) \ | 12 | __field(__u32, role) \ |
| 15 | __field(__u32, root_count) \ | 13 | __field(__u32, root_count) \ |
| 16 | __field(__u32, unsync) | 14 | __field(bool, unsync) |
| 17 | 15 | ||
| 18 | #define KVM_MMU_PAGE_ASSIGN(sp) \ | 16 | #define KVM_MMU_PAGE_ASSIGN(sp) \ |
| 19 | __entry->gfn = sp->gfn; \ | 17 | __entry->gfn = sp->gfn; \ |
| @@ -30,14 +28,14 @@ | |||
| 30 | \ | 28 | \ |
| 31 | role.word = __entry->role; \ | 29 | role.word = __entry->role; \ |
| 32 | \ | 30 | \ |
| 33 | trace_seq_printf(p, "sp gfn %llx %u/%u q%u%s %s%s %spge" \ | 31 | trace_seq_printf(p, "sp gfn %llx %u%s q%u%s %s%s" \ |
| 34 | " %snxe root %u %s%c", \ | 32 | " %snxe root %u %s%c", \ |
| 35 | __entry->gfn, role.level, role.glevels, \ | 33 | __entry->gfn, role.level, \ |
| 34 | role.cr4_pae ? " pae" : "", \ | ||
| 36 | role.quadrant, \ | 35 | role.quadrant, \ |
| 37 | role.direct ? " direct" : "", \ | 36 | role.direct ? " direct" : "", \ |
| 38 | access_str[role.access], \ | 37 | access_str[role.access], \ |
| 39 | role.invalid ? " invalid" : "", \ | 38 | role.invalid ? " invalid" : "", \ |
| 40 | role.cr4_pge ? "" : "!", \ | ||
| 41 | role.nxe ? "" : "!", \ | 39 | role.nxe ? "" : "!", \ |
| 42 | __entry->root_count, \ | 40 | __entry->root_count, \ |
| 43 | __entry->unsync ? "unsync" : "sync", 0); \ | 41 | __entry->unsync ? "unsync" : "sync", 0); \ |
| @@ -94,15 +92,15 @@ TRACE_EVENT( | |||
| 94 | TP_printk("pte %llx level %u", __entry->pte, __entry->level) | 92 | TP_printk("pte %llx level %u", __entry->pte, __entry->level) |
| 95 | ); | 93 | ); |
| 96 | 94 | ||
| 97 | /* We set a pte accessed bit */ | 95 | DECLARE_EVENT_CLASS(kvm_mmu_set_bit_class, |
| 98 | TRACE_EVENT( | 96 | |
| 99 | kvm_mmu_set_accessed_bit, | ||
| 100 | TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), | 97 | TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), |
| 98 | |||
| 101 | TP_ARGS(table_gfn, index, size), | 99 | TP_ARGS(table_gfn, index, size), |
| 102 | 100 | ||
| 103 | TP_STRUCT__entry( | 101 | TP_STRUCT__entry( |
| 104 | __field(__u64, gpa) | 102 | __field(__u64, gpa) |
| 105 | ), | 103 | ), |
| 106 | 104 | ||
| 107 | TP_fast_assign( | 105 | TP_fast_assign( |
| 108 | __entry->gpa = ((u64)table_gfn << PAGE_SHIFT) | 106 | __entry->gpa = ((u64)table_gfn << PAGE_SHIFT) |
| @@ -112,22 +110,20 @@ TRACE_EVENT( | |||
| 112 | TP_printk("gpa %llx", __entry->gpa) | 110 | TP_printk("gpa %llx", __entry->gpa) |
| 113 | ); | 111 | ); |
| 114 | 112 | ||
| 115 | /* We set a pte dirty bit */ | 113 | /* We set a pte accessed bit */ |
| 116 | TRACE_EVENT( | 114 | DEFINE_EVENT(kvm_mmu_set_bit_class, kvm_mmu_set_accessed_bit, |
| 117 | kvm_mmu_set_dirty_bit, | 115 | |
| 118 | TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), | 116 | TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), |
| 119 | TP_ARGS(table_gfn, index, size), | ||
| 120 | 117 | ||
| 121 | TP_STRUCT__entry( | 118 | TP_ARGS(table_gfn, index, size) |
| 122 | __field(__u64, gpa) | 119 | ); |
| 123 | ), | ||
| 124 | 120 | ||
| 125 | TP_fast_assign( | 121 | /* We set a pte dirty bit */ |
| 126 | __entry->gpa = ((u64)table_gfn << PAGE_SHIFT) | 122 | DEFINE_EVENT(kvm_mmu_set_bit_class, kvm_mmu_set_dirty_bit, |
| 127 | + index * size; | ||
| 128 | ), | ||
| 129 | 123 | ||
| 130 | TP_printk("gpa %llx", __entry->gpa) | 124 | TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), |
| 125 | |||
| 126 | TP_ARGS(table_gfn, index, size) | ||
| 131 | ); | 127 | ); |
| 132 | 128 | ||
| 133 | TRACE_EVENT( | 129 | TRACE_EVENT( |
| @@ -166,55 +162,45 @@ TRACE_EVENT( | |||
| 166 | __entry->created ? "new" : "existing") | 162 | __entry->created ? "new" : "existing") |
| 167 | ); | 163 | ); |
| 168 | 164 | ||
| 169 | TRACE_EVENT( | 165 | DECLARE_EVENT_CLASS(kvm_mmu_page_class, |
| 170 | kvm_mmu_sync_page, | 166 | |
| 171 | TP_PROTO(struct kvm_mmu_page *sp), | 167 | TP_PROTO(struct kvm_mmu_page *sp), |
| 172 | TP_ARGS(sp), | 168 | TP_ARGS(sp), |
| 173 | 169 | ||
| 174 | TP_STRUCT__entry( | 170 | TP_STRUCT__entry( |
| 175 | KVM_MMU_PAGE_FIELDS | 171 | KVM_MMU_PAGE_FIELDS |
| 176 | ), | 172 | ), |
| 177 | 173 | ||
| 178 | TP_fast_assign( | 174 | TP_fast_assign( |
| 179 | KVM_MMU_PAGE_ASSIGN(sp) | 175 | KVM_MMU_PAGE_ASSIGN(sp) |
| 180 | ), | 176 | ), |
| 181 | 177 | ||
| 182 | TP_printk("%s", KVM_MMU_PAGE_PRINTK()) | 178 | TP_printk("%s", KVM_MMU_PAGE_PRINTK()) |
| 183 | ); | 179 | ); |
| 184 | 180 | ||
| 185 | TRACE_EVENT( | 181 | DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_sync_page, |
| 186 | kvm_mmu_unsync_page, | ||
| 187 | TP_PROTO(struct kvm_mmu_page *sp), | 182 | TP_PROTO(struct kvm_mmu_page *sp), |
| 188 | TP_ARGS(sp), | ||
| 189 | |||
| 190 | TP_STRUCT__entry( | ||
| 191 | KVM_MMU_PAGE_FIELDS | ||
| 192 | ), | ||
| 193 | 183 | ||
| 194 | TP_fast_assign( | 184 | TP_ARGS(sp) |
| 195 | KVM_MMU_PAGE_ASSIGN(sp) | ||
| 196 | ), | ||
| 197 | |||
| 198 | TP_printk("%s", KVM_MMU_PAGE_PRINTK()) | ||
| 199 | ); | 185 | ); |
| 200 | 186 | ||
| 201 | TRACE_EVENT( | 187 | DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_unsync_page, |
| 202 | kvm_mmu_zap_page, | ||
| 203 | TP_PROTO(struct kvm_mmu_page *sp), | 188 | TP_PROTO(struct kvm_mmu_page *sp), |
| 204 | TP_ARGS(sp), | ||
| 205 | 189 | ||
| 206 | TP_STRUCT__entry( | 190 | TP_ARGS(sp) |
| 207 | KVM_MMU_PAGE_FIELDS | 191 | ); |
| 208 | ), | ||
| 209 | 192 | ||
| 210 | TP_fast_assign( | 193 | DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_zap_page, |
| 211 | KVM_MMU_PAGE_ASSIGN(sp) | 194 | TP_PROTO(struct kvm_mmu_page *sp), |
| 212 | ), | ||
| 213 | 195 | ||
| 214 | TP_printk("%s", KVM_MMU_PAGE_PRINTK()) | 196 | TP_ARGS(sp) |
| 215 | ); | 197 | ); |
| 216 | |||
| 217 | #endif /* _TRACE_KVMMMU_H */ | 198 | #endif /* _TRACE_KVMMMU_H */ |
| 218 | 199 | ||
| 200 | #undef TRACE_INCLUDE_PATH | ||
| 201 | #define TRACE_INCLUDE_PATH . | ||
| 202 | #undef TRACE_INCLUDE_FILE | ||
| 203 | #define TRACE_INCLUDE_FILE mmutrace | ||
| 204 | |||
| 219 | /* This part must be outside protection */ | 205 | /* This part must be outside protection */ |
| 220 | #include <trace/define_trace.h> | 206 | #include <trace/define_trace.h> |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 81eab9a50e6a..89d66ca4d87c 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
| @@ -170,7 +170,7 @@ walk: | |||
| 170 | goto access_error; | 170 | goto access_error; |
| 171 | 171 | ||
| 172 | #if PTTYPE == 64 | 172 | #if PTTYPE == 64 |
| 173 | if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK)) | 173 | if (fetch_fault && (pte & PT64_NX_MASK)) |
| 174 | goto access_error; | 174 | goto access_error; |
| 175 | #endif | 175 | #endif |
| 176 | 176 | ||
| @@ -190,10 +190,10 @@ walk: | |||
| 190 | 190 | ||
| 191 | if ((walker->level == PT_PAGE_TABLE_LEVEL) || | 191 | if ((walker->level == PT_PAGE_TABLE_LEVEL) || |
| 192 | ((walker->level == PT_DIRECTORY_LEVEL) && | 192 | ((walker->level == PT_DIRECTORY_LEVEL) && |
| 193 | (pte & PT_PAGE_SIZE_MASK) && | 193 | is_large_pte(pte) && |
| 194 | (PTTYPE == 64 || is_pse(vcpu))) || | 194 | (PTTYPE == 64 || is_pse(vcpu))) || |
| 195 | ((walker->level == PT_PDPE_LEVEL) && | 195 | ((walker->level == PT_PDPE_LEVEL) && |
| 196 | (pte & PT_PAGE_SIZE_MASK) && | 196 | is_large_pte(pte) && |
| 197 | is_long_mode(vcpu))) { | 197 | is_long_mode(vcpu))) { |
| 198 | int lvl = walker->level; | 198 | int lvl = walker->level; |
| 199 | 199 | ||
| @@ -258,11 +258,17 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
| 258 | pt_element_t gpte; | 258 | pt_element_t gpte; |
| 259 | unsigned pte_access; | 259 | unsigned pte_access; |
| 260 | pfn_t pfn; | 260 | pfn_t pfn; |
| 261 | u64 new_spte; | ||
| 261 | 262 | ||
| 262 | gpte = *(const pt_element_t *)pte; | 263 | gpte = *(const pt_element_t *)pte; |
| 263 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { | 264 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { |
| 264 | if (!is_present_gpte(gpte)) | 265 | if (!is_present_gpte(gpte)) { |
| 265 | __set_spte(spte, shadow_notrap_nonpresent_pte); | 266 | if (page->unsync) |
| 267 | new_spte = shadow_trap_nonpresent_pte; | ||
| 268 | else | ||
| 269 | new_spte = shadow_notrap_nonpresent_pte; | ||
| 270 | __set_spte(spte, new_spte); | ||
| 271 | } | ||
| 266 | return; | 272 | return; |
| 267 | } | 273 | } |
| 268 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); | 274 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); |
| @@ -457,6 +463,7 @@ out_unlock: | |||
| 457 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | 463 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) |
| 458 | { | 464 | { |
| 459 | struct kvm_shadow_walk_iterator iterator; | 465 | struct kvm_shadow_walk_iterator iterator; |
| 466 | gpa_t pte_gpa = -1; | ||
| 460 | int level; | 467 | int level; |
| 461 | u64 *sptep; | 468 | u64 *sptep; |
| 462 | int need_flush = 0; | 469 | int need_flush = 0; |
| @@ -467,9 +474,16 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
| 467 | level = iterator.level; | 474 | level = iterator.level; |
| 468 | sptep = iterator.sptep; | 475 | sptep = iterator.sptep; |
| 469 | 476 | ||
| 470 | if (level == PT_PAGE_TABLE_LEVEL || | 477 | if (is_last_spte(*sptep, level)) { |
| 471 | ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || | 478 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); |
| 472 | ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { | 479 | int offset, shift; |
| 480 | |||
| 481 | shift = PAGE_SHIFT - | ||
| 482 | (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level; | ||
| 483 | offset = sp->role.quadrant << shift; | ||
| 484 | |||
| 485 | pte_gpa = (sp->gfn << PAGE_SHIFT) + offset; | ||
| 486 | pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); | ||
| 473 | 487 | ||
| 474 | if (is_shadow_present_pte(*sptep)) { | 488 | if (is_shadow_present_pte(*sptep)) { |
| 475 | rmap_remove(vcpu->kvm, sptep); | 489 | rmap_remove(vcpu->kvm, sptep); |
| @@ -487,7 +501,17 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
| 487 | 501 | ||
| 488 | if (need_flush) | 502 | if (need_flush) |
| 489 | kvm_flush_remote_tlbs(vcpu->kvm); | 503 | kvm_flush_remote_tlbs(vcpu->kvm); |
| 504 | |||
| 505 | atomic_inc(&vcpu->kvm->arch.invlpg_counter); | ||
| 506 | |||
| 490 | spin_unlock(&vcpu->kvm->mmu_lock); | 507 | spin_unlock(&vcpu->kvm->mmu_lock); |
| 508 | |||
| 509 | if (pte_gpa == -1) | ||
| 510 | return; | ||
| 511 | |||
| 512 | if (mmu_topup_memory_caches(vcpu)) | ||
| 513 | return; | ||
| 514 | kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0); | ||
| 491 | } | 515 | } |
| 492 | 516 | ||
| 493 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, | 517 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, |
| @@ -551,12 +575,15 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
| 551 | { | 575 | { |
| 552 | int i, offset, nr_present; | 576 | int i, offset, nr_present; |
| 553 | bool reset_host_protection; | 577 | bool reset_host_protection; |
| 578 | gpa_t first_pte_gpa; | ||
| 554 | 579 | ||
| 555 | offset = nr_present = 0; | 580 | offset = nr_present = 0; |
| 556 | 581 | ||
| 557 | if (PTTYPE == 32) | 582 | if (PTTYPE == 32) |
| 558 | offset = sp->role.quadrant << PT64_LEVEL_BITS; | 583 | offset = sp->role.quadrant << PT64_LEVEL_BITS; |
| 559 | 584 | ||
| 585 | first_pte_gpa = gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t); | ||
| 586 | |||
| 560 | for (i = 0; i < PT64_ENT_PER_PAGE; i++) { | 587 | for (i = 0; i < PT64_ENT_PER_PAGE; i++) { |
| 561 | unsigned pte_access; | 588 | unsigned pte_access; |
| 562 | pt_element_t gpte; | 589 | pt_element_t gpte; |
| @@ -566,8 +593,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
| 566 | if (!is_shadow_present_pte(sp->spt[i])) | 593 | if (!is_shadow_present_pte(sp->spt[i])) |
| 567 | continue; | 594 | continue; |
| 568 | 595 | ||
| 569 | pte_gpa = gfn_to_gpa(sp->gfn); | 596 | pte_gpa = first_pte_gpa + i * sizeof(pt_element_t); |
| 570 | pte_gpa += (i+offset) * sizeof(pt_element_t); | ||
| 571 | 597 | ||
| 572 | if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, | 598 | if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, |
| 573 | sizeof(pt_element_t))) | 599 | sizeof(pt_element_t))) |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 737361fcd503..96dc232bfc56 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
| @@ -44,10 +44,11 @@ MODULE_LICENSE("GPL"); | |||
| 44 | #define SEG_TYPE_LDT 2 | 44 | #define SEG_TYPE_LDT 2 |
| 45 | #define SEG_TYPE_BUSY_TSS16 3 | 45 | #define SEG_TYPE_BUSY_TSS16 3 |
| 46 | 46 | ||
| 47 | #define SVM_FEATURE_NPT (1 << 0) | 47 | #define SVM_FEATURE_NPT (1 << 0) |
| 48 | #define SVM_FEATURE_LBRV (1 << 1) | 48 | #define SVM_FEATURE_LBRV (1 << 1) |
| 49 | #define SVM_FEATURE_SVML (1 << 2) | 49 | #define SVM_FEATURE_SVML (1 << 2) |
| 50 | #define SVM_FEATURE_PAUSE_FILTER (1 << 10) | 50 | #define SVM_FEATURE_NRIP (1 << 3) |
| 51 | #define SVM_FEATURE_PAUSE_FILTER (1 << 10) | ||
| 51 | 52 | ||
| 52 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ | 53 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ |
| 53 | #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ | 54 | #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ |
| @@ -70,6 +71,7 @@ struct kvm_vcpu; | |||
| 70 | struct nested_state { | 71 | struct nested_state { |
| 71 | struct vmcb *hsave; | 72 | struct vmcb *hsave; |
| 72 | u64 hsave_msr; | 73 | u64 hsave_msr; |
| 74 | u64 vm_cr_msr; | ||
| 73 | u64 vmcb; | 75 | u64 vmcb; |
| 74 | 76 | ||
| 75 | /* These are the merged vectors */ | 77 | /* These are the merged vectors */ |
| @@ -77,6 +79,7 @@ struct nested_state { | |||
| 77 | 79 | ||
| 78 | /* gpa pointers to the real vectors */ | 80 | /* gpa pointers to the real vectors */ |
| 79 | u64 vmcb_msrpm; | 81 | u64 vmcb_msrpm; |
| 82 | u64 vmcb_iopm; | ||
| 80 | 83 | ||
| 81 | /* A VMEXIT is required but not yet emulated */ | 84 | /* A VMEXIT is required but not yet emulated */ |
| 82 | bool exit_required; | 85 | bool exit_required; |
| @@ -91,6 +94,9 @@ struct nested_state { | |||
| 91 | 94 | ||
| 92 | }; | 95 | }; |
| 93 | 96 | ||
| 97 | #define MSRPM_OFFSETS 16 | ||
| 98 | static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; | ||
| 99 | |||
| 94 | struct vcpu_svm { | 100 | struct vcpu_svm { |
| 95 | struct kvm_vcpu vcpu; | 101 | struct kvm_vcpu vcpu; |
| 96 | struct vmcb *vmcb; | 102 | struct vmcb *vmcb; |
| @@ -110,13 +116,39 @@ struct vcpu_svm { | |||
| 110 | struct nested_state nested; | 116 | struct nested_state nested; |
| 111 | 117 | ||
| 112 | bool nmi_singlestep; | 118 | bool nmi_singlestep; |
| 119 | |||
| 120 | unsigned int3_injected; | ||
| 121 | unsigned long int3_rip; | ||
| 122 | }; | ||
| 123 | |||
| 124 | #define MSR_INVALID 0xffffffffU | ||
| 125 | |||
| 126 | static struct svm_direct_access_msrs { | ||
| 127 | u32 index; /* Index of the MSR */ | ||
| 128 | bool always; /* True if intercept is always on */ | ||
| 129 | } direct_access_msrs[] = { | ||
| 130 | { .index = MSR_K6_STAR, .always = true }, | ||
| 131 | { .index = MSR_IA32_SYSENTER_CS, .always = true }, | ||
| 132 | #ifdef CONFIG_X86_64 | ||
| 133 | { .index = MSR_GS_BASE, .always = true }, | ||
| 134 | { .index = MSR_FS_BASE, .always = true }, | ||
| 135 | { .index = MSR_KERNEL_GS_BASE, .always = true }, | ||
| 136 | { .index = MSR_LSTAR, .always = true }, | ||
| 137 | { .index = MSR_CSTAR, .always = true }, | ||
| 138 | { .index = MSR_SYSCALL_MASK, .always = true }, | ||
| 139 | #endif | ||
| 140 | { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, | ||
| 141 | { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, | ||
| 142 | { .index = MSR_IA32_LASTINTFROMIP, .always = false }, | ||
| 143 | { .index = MSR_IA32_LASTINTTOIP, .always = false }, | ||
| 144 | { .index = MSR_INVALID, .always = false }, | ||
| 113 | }; | 145 | }; |
| 114 | 146 | ||
| 115 | /* enable NPT for AMD64 and X86 with PAE */ | 147 | /* enable NPT for AMD64 and X86 with PAE */ |
| 116 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | 148 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) |
| 117 | static bool npt_enabled = true; | 149 | static bool npt_enabled = true; |
| 118 | #else | 150 | #else |
| 119 | static bool npt_enabled = false; | 151 | static bool npt_enabled; |
| 120 | #endif | 152 | #endif |
| 121 | static int npt = 1; | 153 | static int npt = 1; |
| 122 | 154 | ||
| @@ -129,6 +161,7 @@ static void svm_flush_tlb(struct kvm_vcpu *vcpu); | |||
| 129 | static void svm_complete_interrupts(struct vcpu_svm *svm); | 161 | static void svm_complete_interrupts(struct vcpu_svm *svm); |
| 130 | 162 | ||
| 131 | static int nested_svm_exit_handled(struct vcpu_svm *svm); | 163 | static int nested_svm_exit_handled(struct vcpu_svm *svm); |
| 164 | static int nested_svm_intercept(struct vcpu_svm *svm); | ||
| 132 | static int nested_svm_vmexit(struct vcpu_svm *svm); | 165 | static int nested_svm_vmexit(struct vcpu_svm *svm); |
| 133 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | 166 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, |
| 134 | bool has_error_code, u32 error_code); | 167 | bool has_error_code, u32 error_code); |
| @@ -163,8 +196,8 @@ static unsigned long iopm_base; | |||
| 163 | struct kvm_ldttss_desc { | 196 | struct kvm_ldttss_desc { |
| 164 | u16 limit0; | 197 | u16 limit0; |
| 165 | u16 base0; | 198 | u16 base0; |
| 166 | unsigned base1 : 8, type : 5, dpl : 2, p : 1; | 199 | unsigned base1:8, type:5, dpl:2, p:1; |
| 167 | unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; | 200 | unsigned limit1:4, zero0:3, g:1, base2:8; |
| 168 | u32 base3; | 201 | u32 base3; |
| 169 | u32 zero1; | 202 | u32 zero1; |
| 170 | } __attribute__((packed)); | 203 | } __attribute__((packed)); |
| @@ -194,6 +227,27 @@ static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; | |||
| 194 | #define MSRS_RANGE_SIZE 2048 | 227 | #define MSRS_RANGE_SIZE 2048 |
| 195 | #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2) | 228 | #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2) |
| 196 | 229 | ||
| 230 | static u32 svm_msrpm_offset(u32 msr) | ||
| 231 | { | ||
| 232 | u32 offset; | ||
| 233 | int i; | ||
| 234 | |||
| 235 | for (i = 0; i < NUM_MSR_MAPS; i++) { | ||
| 236 | if (msr < msrpm_ranges[i] || | ||
| 237 | msr >= msrpm_ranges[i] + MSRS_IN_RANGE) | ||
| 238 | continue; | ||
| 239 | |||
| 240 | offset = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */ | ||
| 241 | offset += (i * MSRS_RANGE_SIZE); /* add range offset */ | ||
| 242 | |||
| 243 | /* Now we have the u8 offset - but need the u32 offset */ | ||
| 244 | return offset / 4; | ||
| 245 | } | ||
| 246 | |||
| 247 | /* MSR not in any range */ | ||
| 248 | return MSR_INVALID; | ||
| 249 | } | ||
| 250 | |||
| 197 | #define MAX_INST_SIZE 15 | 251 | #define MAX_INST_SIZE 15 |
| 198 | 252 | ||
| 199 | static inline u32 svm_has(u32 feat) | 253 | static inline u32 svm_has(u32 feat) |
| @@ -213,7 +267,7 @@ static inline void stgi(void) | |||
| 213 | 267 | ||
| 214 | static inline void invlpga(unsigned long addr, u32 asid) | 268 | static inline void invlpga(unsigned long addr, u32 asid) |
| 215 | { | 269 | { |
| 216 | asm volatile (__ex(SVM_INVLPGA) :: "a"(addr), "c"(asid)); | 270 | asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid)); |
| 217 | } | 271 | } |
| 218 | 272 | ||
| 219 | static inline void force_new_asid(struct kvm_vcpu *vcpu) | 273 | static inline void force_new_asid(struct kvm_vcpu *vcpu) |
| @@ -235,23 +289,6 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
| 235 | vcpu->arch.efer = efer; | 289 | vcpu->arch.efer = efer; |
| 236 | } | 290 | } |
| 237 | 291 | ||
| 238 | static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | ||
| 239 | bool has_error_code, u32 error_code) | ||
| 240 | { | ||
| 241 | struct vcpu_svm *svm = to_svm(vcpu); | ||
| 242 | |||
| 243 | /* If we are within a nested VM we'd better #VMEXIT and let the | ||
| 244 | guest handle the exception */ | ||
| 245 | if (nested_svm_check_exception(svm, nr, has_error_code, error_code)) | ||
| 246 | return; | ||
| 247 | |||
| 248 | svm->vmcb->control.event_inj = nr | ||
| 249 | | SVM_EVTINJ_VALID | ||
| 250 | | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) | ||
| 251 | | SVM_EVTINJ_TYPE_EXEPT; | ||
| 252 | svm->vmcb->control.event_inj_err = error_code; | ||
| 253 | } | ||
| 254 | |||
| 255 | static int is_external_interrupt(u32 info) | 292 | static int is_external_interrupt(u32 info) |
| 256 | { | 293 | { |
| 257 | info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; | 294 | info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; |
| @@ -264,7 +301,7 @@ static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | |||
| 264 | u32 ret = 0; | 301 | u32 ret = 0; |
| 265 | 302 | ||
| 266 | if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) | 303 | if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) |
| 267 | ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS; | 304 | ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS; |
| 268 | return ret & mask; | 305 | return ret & mask; |
| 269 | } | 306 | } |
| 270 | 307 | ||
| @@ -283,6 +320,9 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
| 283 | { | 320 | { |
| 284 | struct vcpu_svm *svm = to_svm(vcpu); | 321 | struct vcpu_svm *svm = to_svm(vcpu); |
| 285 | 322 | ||
| 323 | if (svm->vmcb->control.next_rip != 0) | ||
| 324 | svm->next_rip = svm->vmcb->control.next_rip; | ||
| 325 | |||
| 286 | if (!svm->next_rip) { | 326 | if (!svm->next_rip) { |
| 287 | if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != | 327 | if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != |
| 288 | EMULATE_DONE) | 328 | EMULATE_DONE) |
| @@ -297,6 +337,43 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
| 297 | svm_set_interrupt_shadow(vcpu, 0); | 337 | svm_set_interrupt_shadow(vcpu, 0); |
| 298 | } | 338 | } |
| 299 | 339 | ||
| 340 | static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | ||
| 341 | bool has_error_code, u32 error_code, | ||
| 342 | bool reinject) | ||
| 343 | { | ||
| 344 | struct vcpu_svm *svm = to_svm(vcpu); | ||
| 345 | |||
| 346 | /* | ||
| 347 | * If we are within a nested VM we'd better #VMEXIT and let the guest | ||
| 348 | * handle the exception | ||
| 349 | */ | ||
| 350 | if (!reinject && | ||
| 351 | nested_svm_check_exception(svm, nr, has_error_code, error_code)) | ||
| 352 | return; | ||
| 353 | |||
| 354 | if (nr == BP_VECTOR && !svm_has(SVM_FEATURE_NRIP)) { | ||
| 355 | unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu); | ||
| 356 | |||
| 357 | /* | ||
| 358 | * For guest debugging where we have to reinject #BP if some | ||
| 359 | * INT3 is guest-owned: | ||
| 360 | * Emulate nRIP by moving RIP forward. Will fail if injection | ||
| 361 | * raises a fault that is not intercepted. Still better than | ||
| 362 | * failing in all cases. | ||
| 363 | */ | ||
| 364 | skip_emulated_instruction(&svm->vcpu); | ||
| 365 | rip = kvm_rip_read(&svm->vcpu); | ||
| 366 | svm->int3_rip = rip + svm->vmcb->save.cs.base; | ||
| 367 | svm->int3_injected = rip - old_rip; | ||
| 368 | } | ||
| 369 | |||
| 370 | svm->vmcb->control.event_inj = nr | ||
| 371 | | SVM_EVTINJ_VALID | ||
| 372 | | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) | ||
| 373 | | SVM_EVTINJ_TYPE_EXEPT; | ||
| 374 | svm->vmcb->control.event_inj_err = error_code; | ||
| 375 | } | ||
| 376 | |||
| 300 | static int has_svm(void) | 377 | static int has_svm(void) |
| 301 | { | 378 | { |
| 302 | const char *msg; | 379 | const char *msg; |
| @@ -319,7 +396,7 @@ static int svm_hardware_enable(void *garbage) | |||
| 319 | 396 | ||
| 320 | struct svm_cpu_data *sd; | 397 | struct svm_cpu_data *sd; |
| 321 | uint64_t efer; | 398 | uint64_t efer; |
| 322 | struct descriptor_table gdt_descr; | 399 | struct desc_ptr gdt_descr; |
| 323 | struct desc_struct *gdt; | 400 | struct desc_struct *gdt; |
| 324 | int me = raw_smp_processor_id(); | 401 | int me = raw_smp_processor_id(); |
| 325 | 402 | ||
| @@ -344,8 +421,8 @@ static int svm_hardware_enable(void *garbage) | |||
| 344 | sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; | 421 | sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; |
| 345 | sd->next_asid = sd->max_asid + 1; | 422 | sd->next_asid = sd->max_asid + 1; |
| 346 | 423 | ||
| 347 | kvm_get_gdt(&gdt_descr); | 424 | native_store_gdt(&gdt_descr); |
| 348 | gdt = (struct desc_struct *)gdt_descr.base; | 425 | gdt = (struct desc_struct *)gdt_descr.address; |
| 349 | sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); | 426 | sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); |
| 350 | 427 | ||
| 351 | wrmsrl(MSR_EFER, efer | EFER_SVME); | 428 | wrmsrl(MSR_EFER, efer | EFER_SVME); |
| @@ -391,42 +468,98 @@ err_1: | |||
| 391 | 468 | ||
| 392 | } | 469 | } |
| 393 | 470 | ||
| 471 | static bool valid_msr_intercept(u32 index) | ||
| 472 | { | ||
| 473 | int i; | ||
| 474 | |||
| 475 | for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) | ||
| 476 | if (direct_access_msrs[i].index == index) | ||
| 477 | return true; | ||
| 478 | |||
| 479 | return false; | ||
| 480 | } | ||
| 481 | |||
| 394 | static void set_msr_interception(u32 *msrpm, unsigned msr, | 482 | static void set_msr_interception(u32 *msrpm, unsigned msr, |
| 395 | int read, int write) | 483 | int read, int write) |
| 396 | { | 484 | { |
| 485 | u8 bit_read, bit_write; | ||
| 486 | unsigned long tmp; | ||
| 487 | u32 offset; | ||
| 488 | |||
| 489 | /* | ||
| 490 | * If this warning triggers extend the direct_access_msrs list at the | ||
| 491 | * beginning of the file | ||
| 492 | */ | ||
| 493 | WARN_ON(!valid_msr_intercept(msr)); | ||
| 494 | |||
| 495 | offset = svm_msrpm_offset(msr); | ||
| 496 | bit_read = 2 * (msr & 0x0f); | ||
| 497 | bit_write = 2 * (msr & 0x0f) + 1; | ||
| 498 | tmp = msrpm[offset]; | ||
| 499 | |||
| 500 | BUG_ON(offset == MSR_INVALID); | ||
| 501 | |||
| 502 | read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp); | ||
| 503 | write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp); | ||
| 504 | |||
| 505 | msrpm[offset] = tmp; | ||
| 506 | } | ||
| 507 | |||
| 508 | static void svm_vcpu_init_msrpm(u32 *msrpm) | ||
| 509 | { | ||
| 397 | int i; | 510 | int i; |
| 398 | 511 | ||
| 399 | for (i = 0; i < NUM_MSR_MAPS; i++) { | 512 | memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); |
| 400 | if (msr >= msrpm_ranges[i] && | 513 | |
| 401 | msr < msrpm_ranges[i] + MSRS_IN_RANGE) { | 514 | for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { |
| 402 | u32 msr_offset = (i * MSRS_IN_RANGE + msr - | 515 | if (!direct_access_msrs[i].always) |
| 403 | msrpm_ranges[i]) * 2; | 516 | continue; |
| 404 | 517 | ||
| 405 | u32 *base = msrpm + (msr_offset / 32); | 518 | set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1); |
| 406 | u32 msr_shift = msr_offset % 32; | 519 | } |
| 407 | u32 mask = ((write) ? 0 : 2) | ((read) ? 0 : 1); | 520 | } |
| 408 | *base = (*base & ~(0x3 << msr_shift)) | | 521 | |
| 409 | (mask << msr_shift); | 522 | static void add_msr_offset(u32 offset) |
| 523 | { | ||
| 524 | int i; | ||
| 525 | |||
| 526 | for (i = 0; i < MSRPM_OFFSETS; ++i) { | ||
| 527 | |||
| 528 | /* Offset already in list? */ | ||
| 529 | if (msrpm_offsets[i] == offset) | ||
| 410 | return; | 530 | return; |
| 411 | } | 531 | |
| 532 | /* Slot used by another offset? */ | ||
| 533 | if (msrpm_offsets[i] != MSR_INVALID) | ||
| 534 | continue; | ||
| 535 | |||
| 536 | /* Add offset to list */ | ||
| 537 | msrpm_offsets[i] = offset; | ||
| 538 | |||
| 539 | return; | ||
| 412 | } | 540 | } |
| 541 | |||
| 542 | /* | ||
| 543 | * If this BUG triggers the msrpm_offsets table has an overflow. Just | ||
| 544 | * increase MSRPM_OFFSETS in this case. | ||
| 545 | */ | ||
| 413 | BUG(); | 546 | BUG(); |
| 414 | } | 547 | } |
| 415 | 548 | ||
| 416 | static void svm_vcpu_init_msrpm(u32 *msrpm) | 549 | static void init_msrpm_offsets(void) |
| 417 | { | 550 | { |
| 418 | memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); | 551 | int i; |
| 419 | 552 | ||
| 420 | #ifdef CONFIG_X86_64 | 553 | memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets)); |
| 421 | set_msr_interception(msrpm, MSR_GS_BASE, 1, 1); | 554 | |
| 422 | set_msr_interception(msrpm, MSR_FS_BASE, 1, 1); | 555 | for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { |
| 423 | set_msr_interception(msrpm, MSR_KERNEL_GS_BASE, 1, 1); | 556 | u32 offset; |
| 424 | set_msr_interception(msrpm, MSR_LSTAR, 1, 1); | 557 | |
| 425 | set_msr_interception(msrpm, MSR_CSTAR, 1, 1); | 558 | offset = svm_msrpm_offset(direct_access_msrs[i].index); |
| 426 | set_msr_interception(msrpm, MSR_SYSCALL_MASK, 1, 1); | 559 | BUG_ON(offset == MSR_INVALID); |
| 427 | #endif | 560 | |
| 428 | set_msr_interception(msrpm, MSR_K6_STAR, 1, 1); | 561 | add_msr_offset(offset); |
| 429 | set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1); | 562 | } |
| 430 | } | 563 | } |
| 431 | 564 | ||
| 432 | static void svm_enable_lbrv(struct vcpu_svm *svm) | 565 | static void svm_enable_lbrv(struct vcpu_svm *svm) |
| @@ -467,6 +600,8 @@ static __init int svm_hardware_setup(void) | |||
| 467 | memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); | 600 | memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); |
| 468 | iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; | 601 | iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; |
| 469 | 602 | ||
| 603 | init_msrpm_offsets(); | ||
| 604 | |||
| 470 | if (boot_cpu_has(X86_FEATURE_NX)) | 605 | if (boot_cpu_has(X86_FEATURE_NX)) |
| 471 | kvm_enable_efer_bits(EFER_NX); | 606 | kvm_enable_efer_bits(EFER_NX); |
| 472 | 607 | ||
| @@ -523,7 +658,7 @@ static void init_seg(struct vmcb_seg *seg) | |||
| 523 | { | 658 | { |
| 524 | seg->selector = 0; | 659 | seg->selector = 0; |
| 525 | seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | | 660 | seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | |
| 526 | SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */ | 661 | SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */ |
| 527 | seg->limit = 0xffff; | 662 | seg->limit = 0xffff; |
| 528 | seg->base = 0; | 663 | seg->base = 0; |
| 529 | } | 664 | } |
| @@ -543,16 +678,16 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 543 | 678 | ||
| 544 | svm->vcpu.fpu_active = 1; | 679 | svm->vcpu.fpu_active = 1; |
| 545 | 680 | ||
| 546 | control->intercept_cr_read = INTERCEPT_CR0_MASK | | 681 | control->intercept_cr_read = INTERCEPT_CR0_MASK | |
| 547 | INTERCEPT_CR3_MASK | | 682 | INTERCEPT_CR3_MASK | |
| 548 | INTERCEPT_CR4_MASK; | 683 | INTERCEPT_CR4_MASK; |
| 549 | 684 | ||
| 550 | control->intercept_cr_write = INTERCEPT_CR0_MASK | | 685 | control->intercept_cr_write = INTERCEPT_CR0_MASK | |
| 551 | INTERCEPT_CR3_MASK | | 686 | INTERCEPT_CR3_MASK | |
| 552 | INTERCEPT_CR4_MASK | | 687 | INTERCEPT_CR4_MASK | |
| 553 | INTERCEPT_CR8_MASK; | 688 | INTERCEPT_CR8_MASK; |
| 554 | 689 | ||
| 555 | control->intercept_dr_read = INTERCEPT_DR0_MASK | | 690 | control->intercept_dr_read = INTERCEPT_DR0_MASK | |
| 556 | INTERCEPT_DR1_MASK | | 691 | INTERCEPT_DR1_MASK | |
| 557 | INTERCEPT_DR2_MASK | | 692 | INTERCEPT_DR2_MASK | |
| 558 | INTERCEPT_DR3_MASK | | 693 | INTERCEPT_DR3_MASK | |
| @@ -561,7 +696,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 561 | INTERCEPT_DR6_MASK | | 696 | INTERCEPT_DR6_MASK | |
| 562 | INTERCEPT_DR7_MASK; | 697 | INTERCEPT_DR7_MASK; |
| 563 | 698 | ||
| 564 | control->intercept_dr_write = INTERCEPT_DR0_MASK | | 699 | control->intercept_dr_write = INTERCEPT_DR0_MASK | |
| 565 | INTERCEPT_DR1_MASK | | 700 | INTERCEPT_DR1_MASK | |
| 566 | INTERCEPT_DR2_MASK | | 701 | INTERCEPT_DR2_MASK | |
| 567 | INTERCEPT_DR3_MASK | | 702 | INTERCEPT_DR3_MASK | |
| @@ -575,7 +710,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 575 | (1 << MC_VECTOR); | 710 | (1 << MC_VECTOR); |
| 576 | 711 | ||
| 577 | 712 | ||
| 578 | control->intercept = (1ULL << INTERCEPT_INTR) | | 713 | control->intercept = (1ULL << INTERCEPT_INTR) | |
| 579 | (1ULL << INTERCEPT_NMI) | | 714 | (1ULL << INTERCEPT_NMI) | |
| 580 | (1ULL << INTERCEPT_SMI) | | 715 | (1ULL << INTERCEPT_SMI) | |
| 581 | (1ULL << INTERCEPT_SELECTIVE_CR0) | | 716 | (1ULL << INTERCEPT_SELECTIVE_CR0) | |
| @@ -636,7 +771,8 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 636 | save->rip = 0x0000fff0; | 771 | save->rip = 0x0000fff0; |
| 637 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; | 772 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; |
| 638 | 773 | ||
| 639 | /* This is the guest-visible cr0 value. | 774 | /* |
| 775 | * This is the guest-visible cr0 value. | ||
| 640 | * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. | 776 | * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. |
| 641 | */ | 777 | */ |
| 642 | svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; | 778 | svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; |
| @@ -729,6 +865,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
| 729 | svm_vcpu_init_msrpm(svm->msrpm); | 865 | svm_vcpu_init_msrpm(svm->msrpm); |
| 730 | 866 | ||
| 731 | svm->nested.msrpm = page_address(nested_msrpm_pages); | 867 | svm->nested.msrpm = page_address(nested_msrpm_pages); |
| 868 | svm_vcpu_init_msrpm(svm->nested.msrpm); | ||
| 732 | 869 | ||
| 733 | svm->vmcb = page_address(page); | 870 | svm->vmcb = page_address(page); |
| 734 | clear_page(svm->vmcb); | 871 | clear_page(svm->vmcb); |
| @@ -882,7 +1019,8 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, | |||
| 882 | var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; | 1019 | var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; |
| 883 | var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; | 1020 | var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; |
| 884 | 1021 | ||
| 885 | /* AMD's VMCB does not have an explicit unusable field, so emulate it | 1022 | /* |
| 1023 | * AMD's VMCB does not have an explicit unusable field, so emulate it | ||
| 886 | * for cross vendor migration purposes by "not present" | 1024 | * for cross vendor migration purposes by "not present" |
| 887 | */ | 1025 | */ |
| 888 | var->unusable = !var->present || (var->type == 0); | 1026 | var->unusable = !var->present || (var->type == 0); |
| @@ -918,7 +1056,8 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, | |||
| 918 | var->type |= 0x1; | 1056 | var->type |= 0x1; |
| 919 | break; | 1057 | break; |
| 920 | case VCPU_SREG_SS: | 1058 | case VCPU_SREG_SS: |
| 921 | /* On AMD CPUs sometimes the DB bit in the segment | 1059 | /* |
| 1060 | * On AMD CPUs sometimes the DB bit in the segment | ||
| 922 | * descriptor is left as 1, although the whole segment has | 1061 | * descriptor is left as 1, although the whole segment has |
| 923 | * been made unusable. Clear it here to pass an Intel VMX | 1062 | * been made unusable. Clear it here to pass an Intel VMX |
| 924 | * entry check when cross vendor migrating. | 1063 | * entry check when cross vendor migrating. |
| @@ -936,36 +1075,36 @@ static int svm_get_cpl(struct kvm_vcpu *vcpu) | |||
| 936 | return save->cpl; | 1075 | return save->cpl; |
| 937 | } | 1076 | } |
| 938 | 1077 | ||
| 939 | static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1078 | static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
| 940 | { | 1079 | { |
| 941 | struct vcpu_svm *svm = to_svm(vcpu); | 1080 | struct vcpu_svm *svm = to_svm(vcpu); |
| 942 | 1081 | ||
| 943 | dt->limit = svm->vmcb->save.idtr.limit; | 1082 | dt->size = svm->vmcb->save.idtr.limit; |
| 944 | dt->base = svm->vmcb->save.idtr.base; | 1083 | dt->address = svm->vmcb->save.idtr.base; |
| 945 | } | 1084 | } |
| 946 | 1085 | ||
| 947 | static void svm_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1086 | static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
| 948 | { | 1087 | { |
| 949 | struct vcpu_svm *svm = to_svm(vcpu); | 1088 | struct vcpu_svm *svm = to_svm(vcpu); |
| 950 | 1089 | ||
| 951 | svm->vmcb->save.idtr.limit = dt->limit; | 1090 | svm->vmcb->save.idtr.limit = dt->size; |
| 952 | svm->vmcb->save.idtr.base = dt->base ; | 1091 | svm->vmcb->save.idtr.base = dt->address ; |
| 953 | } | 1092 | } |
| 954 | 1093 | ||
| 955 | static void svm_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1094 | static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
| 956 | { | 1095 | { |
| 957 | struct vcpu_svm *svm = to_svm(vcpu); | 1096 | struct vcpu_svm *svm = to_svm(vcpu); |
| 958 | 1097 | ||
| 959 | dt->limit = svm->vmcb->save.gdtr.limit; | 1098 | dt->size = svm->vmcb->save.gdtr.limit; |
| 960 | dt->base = svm->vmcb->save.gdtr.base; | 1099 | dt->address = svm->vmcb->save.gdtr.base; |
| 961 | } | 1100 | } |
| 962 | 1101 | ||
| 963 | static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1102 | static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
| 964 | { | 1103 | { |
| 965 | struct vcpu_svm *svm = to_svm(vcpu); | 1104 | struct vcpu_svm *svm = to_svm(vcpu); |
| 966 | 1105 | ||
| 967 | svm->vmcb->save.gdtr.limit = dt->limit; | 1106 | svm->vmcb->save.gdtr.limit = dt->size; |
| 968 | svm->vmcb->save.gdtr.base = dt->base ; | 1107 | svm->vmcb->save.gdtr.base = dt->address ; |
| 969 | } | 1108 | } |
| 970 | 1109 | ||
| 971 | static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | 1110 | static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) |
| @@ -978,6 +1117,7 @@ static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | |||
| 978 | 1117 | ||
| 979 | static void update_cr0_intercept(struct vcpu_svm *svm) | 1118 | static void update_cr0_intercept(struct vcpu_svm *svm) |
| 980 | { | 1119 | { |
| 1120 | struct vmcb *vmcb = svm->vmcb; | ||
| 981 | ulong gcr0 = svm->vcpu.arch.cr0; | 1121 | ulong gcr0 = svm->vcpu.arch.cr0; |
| 982 | u64 *hcr0 = &svm->vmcb->save.cr0; | 1122 | u64 *hcr0 = &svm->vmcb->save.cr0; |
| 983 | 1123 | ||
| @@ -989,11 +1129,25 @@ static void update_cr0_intercept(struct vcpu_svm *svm) | |||
| 989 | 1129 | ||
| 990 | 1130 | ||
| 991 | if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { | 1131 | if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { |
| 992 | svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; | 1132 | vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; |
| 993 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; | 1133 | vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; |
| 1134 | if (is_nested(svm)) { | ||
| 1135 | struct vmcb *hsave = svm->nested.hsave; | ||
| 1136 | |||
| 1137 | hsave->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; | ||
| 1138 | hsave->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; | ||
| 1139 | vmcb->control.intercept_cr_read |= svm->nested.intercept_cr_read; | ||
| 1140 | vmcb->control.intercept_cr_write |= svm->nested.intercept_cr_write; | ||
| 1141 | } | ||
| 994 | } else { | 1142 | } else { |
| 995 | svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK; | 1143 | svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK; |
| 996 | svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK; | 1144 | svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK; |
| 1145 | if (is_nested(svm)) { | ||
| 1146 | struct vmcb *hsave = svm->nested.hsave; | ||
| 1147 | |||
| 1148 | hsave->control.intercept_cr_read |= INTERCEPT_CR0_MASK; | ||
| 1149 | hsave->control.intercept_cr_write |= INTERCEPT_CR0_MASK; | ||
| 1150 | } | ||
| 997 | } | 1151 | } |
| 998 | } | 1152 | } |
| 999 | 1153 | ||
| @@ -1001,6 +1155,27 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
| 1001 | { | 1155 | { |
| 1002 | struct vcpu_svm *svm = to_svm(vcpu); | 1156 | struct vcpu_svm *svm = to_svm(vcpu); |
| 1003 | 1157 | ||
| 1158 | if (is_nested(svm)) { | ||
| 1159 | /* | ||
| 1160 | * We are here because we run in nested mode, the host kvm | ||
| 1161 | * intercepts cr0 writes but the l1 hypervisor does not. | ||
| 1162 | * But the L1 hypervisor may intercept selective cr0 writes. | ||
| 1163 | * This needs to be checked here. | ||
| 1164 | */ | ||
| 1165 | unsigned long old, new; | ||
| 1166 | |||
| 1167 | /* Remove bits that would trigger a real cr0 write intercept */ | ||
| 1168 | old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK; | ||
| 1169 | new = cr0 & SVM_CR0_SELECTIVE_MASK; | ||
| 1170 | |||
| 1171 | if (old == new) { | ||
| 1172 | /* cr0 write with ts and mp unchanged */ | ||
| 1173 | svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; | ||
| 1174 | if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) | ||
| 1175 | return; | ||
| 1176 | } | ||
| 1177 | } | ||
| 1178 | |||
| 1004 | #ifdef CONFIG_X86_64 | 1179 | #ifdef CONFIG_X86_64 |
| 1005 | if (vcpu->arch.efer & EFER_LME) { | 1180 | if (vcpu->arch.efer & EFER_LME) { |
| 1006 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { | 1181 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { |
| @@ -1134,70 +1309,11 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) | |||
| 1134 | svm->vmcb->control.asid = sd->next_asid++; | 1309 | svm->vmcb->control.asid = sd->next_asid++; |
| 1135 | } | 1310 | } |
| 1136 | 1311 | ||
| 1137 | static int svm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *dest) | 1312 | static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) |
| 1138 | { | 1313 | { |
| 1139 | struct vcpu_svm *svm = to_svm(vcpu); | 1314 | struct vcpu_svm *svm = to_svm(vcpu); |
| 1140 | 1315 | ||
| 1141 | switch (dr) { | 1316 | svm->vmcb->save.dr7 = value; |
| 1142 | case 0 ... 3: | ||
| 1143 | *dest = vcpu->arch.db[dr]; | ||
| 1144 | break; | ||
| 1145 | case 4: | ||
| 1146 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
| 1147 | return EMULATE_FAIL; /* will re-inject UD */ | ||
| 1148 | /* fall through */ | ||
| 1149 | case 6: | ||
| 1150 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | ||
| 1151 | *dest = vcpu->arch.dr6; | ||
| 1152 | else | ||
| 1153 | *dest = svm->vmcb->save.dr6; | ||
| 1154 | break; | ||
| 1155 | case 5: | ||
| 1156 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
| 1157 | return EMULATE_FAIL; /* will re-inject UD */ | ||
| 1158 | /* fall through */ | ||
| 1159 | case 7: | ||
| 1160 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | ||
| 1161 | *dest = vcpu->arch.dr7; | ||
| 1162 | else | ||
| 1163 | *dest = svm->vmcb->save.dr7; | ||
| 1164 | break; | ||
| 1165 | } | ||
| 1166 | |||
| 1167 | return EMULATE_DONE; | ||
| 1168 | } | ||
| 1169 | |||
| 1170 | static int svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value) | ||
| 1171 | { | ||
| 1172 | struct vcpu_svm *svm = to_svm(vcpu); | ||
| 1173 | |||
| 1174 | switch (dr) { | ||
| 1175 | case 0 ... 3: | ||
| 1176 | vcpu->arch.db[dr] = value; | ||
| 1177 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | ||
| 1178 | vcpu->arch.eff_db[dr] = value; | ||
| 1179 | break; | ||
| 1180 | case 4: | ||
| 1181 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
| 1182 | return EMULATE_FAIL; /* will re-inject UD */ | ||
| 1183 | /* fall through */ | ||
| 1184 | case 6: | ||
| 1185 | vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1; | ||
| 1186 | break; | ||
| 1187 | case 5: | ||
| 1188 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
| 1189 | return EMULATE_FAIL; /* will re-inject UD */ | ||
| 1190 | /* fall through */ | ||
| 1191 | case 7: | ||
| 1192 | vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1; | ||
| 1193 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | ||
| 1194 | svm->vmcb->save.dr7 = vcpu->arch.dr7; | ||
| 1195 | vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK); | ||
| 1196 | } | ||
| 1197 | break; | ||
| 1198 | } | ||
| 1199 | |||
| 1200 | return EMULATE_DONE; | ||
| 1201 | } | 1317 | } |
| 1202 | 1318 | ||
| 1203 | static int pf_interception(struct vcpu_svm *svm) | 1319 | static int pf_interception(struct vcpu_svm *svm) |
| @@ -1234,7 +1350,7 @@ static int db_interception(struct vcpu_svm *svm) | |||
| 1234 | } | 1350 | } |
| 1235 | 1351 | ||
| 1236 | if (svm->vcpu.guest_debug & | 1352 | if (svm->vcpu.guest_debug & |
| 1237 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)){ | 1353 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) { |
| 1238 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 1354 | kvm_run->exit_reason = KVM_EXIT_DEBUG; |
| 1239 | kvm_run->debug.arch.pc = | 1355 | kvm_run->debug.arch.pc = |
| 1240 | svm->vmcb->save.cs.base + svm->vmcb->save.rip; | 1356 | svm->vmcb->save.cs.base + svm->vmcb->save.rip; |
| @@ -1268,7 +1384,22 @@ static int ud_interception(struct vcpu_svm *svm) | |||
| 1268 | static void svm_fpu_activate(struct kvm_vcpu *vcpu) | 1384 | static void svm_fpu_activate(struct kvm_vcpu *vcpu) |
| 1269 | { | 1385 | { |
| 1270 | struct vcpu_svm *svm = to_svm(vcpu); | 1386 | struct vcpu_svm *svm = to_svm(vcpu); |
| 1271 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); | 1387 | u32 excp; |
| 1388 | |||
| 1389 | if (is_nested(svm)) { | ||
| 1390 | u32 h_excp, n_excp; | ||
| 1391 | |||
| 1392 | h_excp = svm->nested.hsave->control.intercept_exceptions; | ||
| 1393 | n_excp = svm->nested.intercept_exceptions; | ||
| 1394 | h_excp &= ~(1 << NM_VECTOR); | ||
| 1395 | excp = h_excp | n_excp; | ||
| 1396 | } else { | ||
| 1397 | excp = svm->vmcb->control.intercept_exceptions; | ||
| 1398 | excp &= ~(1 << NM_VECTOR); | ||
| 1399 | } | ||
| 1400 | |||
| 1401 | svm->vmcb->control.intercept_exceptions = excp; | ||
| 1402 | |||
| 1272 | svm->vcpu.fpu_active = 1; | 1403 | svm->vcpu.fpu_active = 1; |
| 1273 | update_cr0_intercept(svm); | 1404 | update_cr0_intercept(svm); |
| 1274 | } | 1405 | } |
| @@ -1309,29 +1440,23 @@ static int shutdown_interception(struct vcpu_svm *svm) | |||
| 1309 | 1440 | ||
| 1310 | static int io_interception(struct vcpu_svm *svm) | 1441 | static int io_interception(struct vcpu_svm *svm) |
| 1311 | { | 1442 | { |
| 1443 | struct kvm_vcpu *vcpu = &svm->vcpu; | ||
| 1312 | u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ | 1444 | u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ |
| 1313 | int size, in, string; | 1445 | int size, in, string; |
| 1314 | unsigned port; | 1446 | unsigned port; |
| 1315 | 1447 | ||
| 1316 | ++svm->vcpu.stat.io_exits; | 1448 | ++svm->vcpu.stat.io_exits; |
| 1317 | |||
| 1318 | svm->next_rip = svm->vmcb->control.exit_info_2; | ||
| 1319 | |||
| 1320 | string = (io_info & SVM_IOIO_STR_MASK) != 0; | 1449 | string = (io_info & SVM_IOIO_STR_MASK) != 0; |
| 1321 | |||
| 1322 | if (string) { | ||
| 1323 | if (emulate_instruction(&svm->vcpu, | ||
| 1324 | 0, 0, 0) == EMULATE_DO_MMIO) | ||
| 1325 | return 0; | ||
| 1326 | return 1; | ||
| 1327 | } | ||
| 1328 | |||
| 1329 | in = (io_info & SVM_IOIO_TYPE_MASK) != 0; | 1450 | in = (io_info & SVM_IOIO_TYPE_MASK) != 0; |
| 1451 | if (string || in) | ||
| 1452 | return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); | ||
| 1453 | |||
| 1330 | port = io_info >> 16; | 1454 | port = io_info >> 16; |
| 1331 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; | 1455 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; |
| 1332 | 1456 | svm->next_rip = svm->vmcb->control.exit_info_2; | |
| 1333 | skip_emulated_instruction(&svm->vcpu); | 1457 | skip_emulated_instruction(&svm->vcpu); |
| 1334 | return kvm_emulate_pio(&svm->vcpu, in, size, port); | 1458 | |
| 1459 | return kvm_fast_pio_out(vcpu, size, port); | ||
| 1335 | } | 1460 | } |
| 1336 | 1461 | ||
| 1337 | static int nmi_interception(struct vcpu_svm *svm) | 1462 | static int nmi_interception(struct vcpu_svm *svm) |
| @@ -1384,6 +1509,8 @@ static int nested_svm_check_permissions(struct vcpu_svm *svm) | |||
| 1384 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | 1509 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, |
| 1385 | bool has_error_code, u32 error_code) | 1510 | bool has_error_code, u32 error_code) |
| 1386 | { | 1511 | { |
| 1512 | int vmexit; | ||
| 1513 | |||
| 1387 | if (!is_nested(svm)) | 1514 | if (!is_nested(svm)) |
| 1388 | return 0; | 1515 | return 0; |
| 1389 | 1516 | ||
| @@ -1392,21 +1519,28 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | |||
| 1392 | svm->vmcb->control.exit_info_1 = error_code; | 1519 | svm->vmcb->control.exit_info_1 = error_code; |
| 1393 | svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; | 1520 | svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; |
| 1394 | 1521 | ||
| 1395 | return nested_svm_exit_handled(svm); | 1522 | vmexit = nested_svm_intercept(svm); |
| 1523 | if (vmexit == NESTED_EXIT_DONE) | ||
| 1524 | svm->nested.exit_required = true; | ||
| 1525 | |||
| 1526 | return vmexit; | ||
| 1396 | } | 1527 | } |
| 1397 | 1528 | ||
| 1398 | static inline int nested_svm_intr(struct vcpu_svm *svm) | 1529 | /* This function returns true if it is save to enable the irq window */ |
| 1530 | static inline bool nested_svm_intr(struct vcpu_svm *svm) | ||
| 1399 | { | 1531 | { |
| 1400 | if (!is_nested(svm)) | 1532 | if (!is_nested(svm)) |
| 1401 | return 0; | 1533 | return true; |
| 1402 | 1534 | ||
| 1403 | if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) | 1535 | if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) |
| 1404 | return 0; | 1536 | return true; |
| 1405 | 1537 | ||
| 1406 | if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) | 1538 | if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) |
| 1407 | return 0; | 1539 | return false; |
| 1408 | 1540 | ||
| 1409 | svm->vmcb->control.exit_code = SVM_EXIT_INTR; | 1541 | svm->vmcb->control.exit_code = SVM_EXIT_INTR; |
| 1542 | svm->vmcb->control.exit_info_1 = 0; | ||
| 1543 | svm->vmcb->control.exit_info_2 = 0; | ||
| 1410 | 1544 | ||
| 1411 | if (svm->nested.intercept & 1ULL) { | 1545 | if (svm->nested.intercept & 1ULL) { |
| 1412 | /* | 1546 | /* |
| @@ -1417,21 +1551,40 @@ static inline int nested_svm_intr(struct vcpu_svm *svm) | |||
| 1417 | */ | 1551 | */ |
| 1418 | svm->nested.exit_required = true; | 1552 | svm->nested.exit_required = true; |
| 1419 | trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); | 1553 | trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); |
| 1420 | return 1; | 1554 | return false; |
| 1421 | } | 1555 | } |
| 1422 | 1556 | ||
| 1423 | return 0; | 1557 | return true; |
| 1558 | } | ||
| 1559 | |||
| 1560 | /* This function returns true if it is save to enable the nmi window */ | ||
| 1561 | static inline bool nested_svm_nmi(struct vcpu_svm *svm) | ||
| 1562 | { | ||
| 1563 | if (!is_nested(svm)) | ||
| 1564 | return true; | ||
| 1565 | |||
| 1566 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI))) | ||
| 1567 | return true; | ||
| 1568 | |||
| 1569 | svm->vmcb->control.exit_code = SVM_EXIT_NMI; | ||
| 1570 | svm->nested.exit_required = true; | ||
| 1571 | |||
| 1572 | return false; | ||
| 1424 | } | 1573 | } |
| 1425 | 1574 | ||
| 1426 | static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx) | 1575 | static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page) |
| 1427 | { | 1576 | { |
| 1428 | struct page *page; | 1577 | struct page *page; |
| 1429 | 1578 | ||
| 1579 | might_sleep(); | ||
| 1580 | |||
| 1430 | page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); | 1581 | page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); |
| 1431 | if (is_error_page(page)) | 1582 | if (is_error_page(page)) |
| 1432 | goto error; | 1583 | goto error; |
| 1433 | 1584 | ||
| 1434 | return kmap_atomic(page, idx); | 1585 | *_page = page; |
| 1586 | |||
| 1587 | return kmap(page); | ||
| 1435 | 1588 | ||
| 1436 | error: | 1589 | error: |
| 1437 | kvm_release_page_clean(page); | 1590 | kvm_release_page_clean(page); |
| @@ -1440,61 +1593,55 @@ error: | |||
| 1440 | return NULL; | 1593 | return NULL; |
| 1441 | } | 1594 | } |
| 1442 | 1595 | ||
| 1443 | static void nested_svm_unmap(void *addr, enum km_type idx) | 1596 | static void nested_svm_unmap(struct page *page) |
| 1444 | { | 1597 | { |
| 1445 | struct page *page; | 1598 | kunmap(page); |
| 1599 | kvm_release_page_dirty(page); | ||
| 1600 | } | ||
| 1446 | 1601 | ||
| 1447 | if (!addr) | 1602 | static int nested_svm_intercept_ioio(struct vcpu_svm *svm) |
| 1448 | return; | 1603 | { |
| 1604 | unsigned port; | ||
| 1605 | u8 val, bit; | ||
| 1606 | u64 gpa; | ||
| 1449 | 1607 | ||
| 1450 | page = kmap_atomic_to_page(addr); | 1608 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT))) |
| 1609 | return NESTED_EXIT_HOST; | ||
| 1451 | 1610 | ||
| 1452 | kunmap_atomic(addr, idx); | 1611 | port = svm->vmcb->control.exit_info_1 >> 16; |
| 1453 | kvm_release_page_dirty(page); | 1612 | gpa = svm->nested.vmcb_iopm + (port / 8); |
| 1613 | bit = port % 8; | ||
| 1614 | val = 0; | ||
| 1615 | |||
| 1616 | if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, 1)) | ||
| 1617 | val &= (1 << bit); | ||
| 1618 | |||
| 1619 | return val ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; | ||
| 1454 | } | 1620 | } |
| 1455 | 1621 | ||
| 1456 | static bool nested_svm_exit_handled_msr(struct vcpu_svm *svm) | 1622 | static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) |
| 1457 | { | 1623 | { |
| 1458 | u32 param = svm->vmcb->control.exit_info_1 & 1; | 1624 | u32 offset, msr, value; |
| 1459 | u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; | 1625 | int write, mask; |
| 1460 | bool ret = false; | ||
| 1461 | u32 t0, t1; | ||
| 1462 | u8 *msrpm; | ||
| 1463 | 1626 | ||
| 1464 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) | 1627 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) |
| 1465 | return false; | 1628 | return NESTED_EXIT_HOST; |
| 1466 | 1629 | ||
| 1467 | msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0); | 1630 | msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; |
| 1631 | offset = svm_msrpm_offset(msr); | ||
| 1632 | write = svm->vmcb->control.exit_info_1 & 1; | ||
| 1633 | mask = 1 << ((2 * (msr & 0xf)) + write); | ||
| 1468 | 1634 | ||
| 1469 | if (!msrpm) | 1635 | if (offset == MSR_INVALID) |
| 1470 | goto out; | 1636 | return NESTED_EXIT_DONE; |
| 1471 | 1637 | ||
| 1472 | switch (msr) { | 1638 | /* Offset is in 32 bit units but need in 8 bit units */ |
| 1473 | case 0 ... 0x1fff: | 1639 | offset *= 4; |
| 1474 | t0 = (msr * 2) % 8; | ||
| 1475 | t1 = msr / 8; | ||
| 1476 | break; | ||
| 1477 | case 0xc0000000 ... 0xc0001fff: | ||
| 1478 | t0 = (8192 + msr - 0xc0000000) * 2; | ||
| 1479 | t1 = (t0 / 8); | ||
| 1480 | t0 %= 8; | ||
| 1481 | break; | ||
| 1482 | case 0xc0010000 ... 0xc0011fff: | ||
| 1483 | t0 = (16384 + msr - 0xc0010000) * 2; | ||
| 1484 | t1 = (t0 / 8); | ||
| 1485 | t0 %= 8; | ||
| 1486 | break; | ||
| 1487 | default: | ||
| 1488 | ret = true; | ||
| 1489 | goto out; | ||
| 1490 | } | ||
| 1491 | 1640 | ||
| 1492 | ret = msrpm[t1] & ((1 << param) << t0); | 1641 | if (kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + offset, &value, 4)) |
| 1493 | 1642 | return NESTED_EXIT_DONE; | |
| 1494 | out: | ||
| 1495 | nested_svm_unmap(msrpm, KM_USER0); | ||
| 1496 | 1643 | ||
| 1497 | return ret; | 1644 | return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; |
| 1498 | } | 1645 | } |
| 1499 | 1646 | ||
| 1500 | static int nested_svm_exit_special(struct vcpu_svm *svm) | 1647 | static int nested_svm_exit_special(struct vcpu_svm *svm) |
| @@ -1504,17 +1651,21 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) | |||
| 1504 | switch (exit_code) { | 1651 | switch (exit_code) { |
| 1505 | case SVM_EXIT_INTR: | 1652 | case SVM_EXIT_INTR: |
| 1506 | case SVM_EXIT_NMI: | 1653 | case SVM_EXIT_NMI: |
| 1654 | case SVM_EXIT_EXCP_BASE + MC_VECTOR: | ||
| 1507 | return NESTED_EXIT_HOST; | 1655 | return NESTED_EXIT_HOST; |
| 1508 | /* For now we are always handling NPFs when using them */ | ||
| 1509 | case SVM_EXIT_NPF: | 1656 | case SVM_EXIT_NPF: |
| 1657 | /* For now we are always handling NPFs when using them */ | ||
| 1510 | if (npt_enabled) | 1658 | if (npt_enabled) |
| 1511 | return NESTED_EXIT_HOST; | 1659 | return NESTED_EXIT_HOST; |
| 1512 | break; | 1660 | break; |
| 1513 | /* When we're shadowing, trap PFs */ | ||
| 1514 | case SVM_EXIT_EXCP_BASE + PF_VECTOR: | 1661 | case SVM_EXIT_EXCP_BASE + PF_VECTOR: |
| 1662 | /* When we're shadowing, trap PFs */ | ||
| 1515 | if (!npt_enabled) | 1663 | if (!npt_enabled) |
| 1516 | return NESTED_EXIT_HOST; | 1664 | return NESTED_EXIT_HOST; |
| 1517 | break; | 1665 | break; |
| 1666 | case SVM_EXIT_EXCP_BASE + NM_VECTOR: | ||
| 1667 | nm_interception(svm); | ||
| 1668 | break; | ||
| 1518 | default: | 1669 | default: |
| 1519 | break; | 1670 | break; |
| 1520 | } | 1671 | } |
| @@ -1525,7 +1676,7 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) | |||
| 1525 | /* | 1676 | /* |
| 1526 | * If this function returns true, this #vmexit was already handled | 1677 | * If this function returns true, this #vmexit was already handled |
| 1527 | */ | 1678 | */ |
| 1528 | static int nested_svm_exit_handled(struct vcpu_svm *svm) | 1679 | static int nested_svm_intercept(struct vcpu_svm *svm) |
| 1529 | { | 1680 | { |
| 1530 | u32 exit_code = svm->vmcb->control.exit_code; | 1681 | u32 exit_code = svm->vmcb->control.exit_code; |
| 1531 | int vmexit = NESTED_EXIT_HOST; | 1682 | int vmexit = NESTED_EXIT_HOST; |
| @@ -1534,6 +1685,9 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) | |||
| 1534 | case SVM_EXIT_MSR: | 1685 | case SVM_EXIT_MSR: |
| 1535 | vmexit = nested_svm_exit_handled_msr(svm); | 1686 | vmexit = nested_svm_exit_handled_msr(svm); |
| 1536 | break; | 1687 | break; |
| 1688 | case SVM_EXIT_IOIO: | ||
| 1689 | vmexit = nested_svm_intercept_ioio(svm); | ||
| 1690 | break; | ||
| 1537 | case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: { | 1691 | case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: { |
| 1538 | u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0); | 1692 | u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0); |
| 1539 | if (svm->nested.intercept_cr_read & cr_bits) | 1693 | if (svm->nested.intercept_cr_read & cr_bits) |
| @@ -1564,6 +1718,10 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) | |||
| 1564 | vmexit = NESTED_EXIT_DONE; | 1718 | vmexit = NESTED_EXIT_DONE; |
| 1565 | break; | 1719 | break; |
| 1566 | } | 1720 | } |
| 1721 | case SVM_EXIT_ERR: { | ||
| 1722 | vmexit = NESTED_EXIT_DONE; | ||
| 1723 | break; | ||
| 1724 | } | ||
| 1567 | default: { | 1725 | default: { |
| 1568 | u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); | 1726 | u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); |
| 1569 | if (svm->nested.intercept & exit_bits) | 1727 | if (svm->nested.intercept & exit_bits) |
| @@ -1571,9 +1729,17 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) | |||
| 1571 | } | 1729 | } |
| 1572 | } | 1730 | } |
| 1573 | 1731 | ||
| 1574 | if (vmexit == NESTED_EXIT_DONE) { | 1732 | return vmexit; |
| 1733 | } | ||
| 1734 | |||
| 1735 | static int nested_svm_exit_handled(struct vcpu_svm *svm) | ||
| 1736 | { | ||
| 1737 | int vmexit; | ||
| 1738 | |||
| 1739 | vmexit = nested_svm_intercept(svm); | ||
| 1740 | |||
| 1741 | if (vmexit == NESTED_EXIT_DONE) | ||
| 1575 | nested_svm_vmexit(svm); | 1742 | nested_svm_vmexit(svm); |
| 1576 | } | ||
| 1577 | 1743 | ||
| 1578 | return vmexit; | 1744 | return vmexit; |
| 1579 | } | 1745 | } |
| @@ -1615,6 +1781,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
| 1615 | struct vmcb *nested_vmcb; | 1781 | struct vmcb *nested_vmcb; |
| 1616 | struct vmcb *hsave = svm->nested.hsave; | 1782 | struct vmcb *hsave = svm->nested.hsave; |
| 1617 | struct vmcb *vmcb = svm->vmcb; | 1783 | struct vmcb *vmcb = svm->vmcb; |
| 1784 | struct page *page; | ||
| 1618 | 1785 | ||
| 1619 | trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, | 1786 | trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, |
| 1620 | vmcb->control.exit_info_1, | 1787 | vmcb->control.exit_info_1, |
| @@ -1622,10 +1789,13 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
| 1622 | vmcb->control.exit_int_info, | 1789 | vmcb->control.exit_int_info, |
| 1623 | vmcb->control.exit_int_info_err); | 1790 | vmcb->control.exit_int_info_err); |
| 1624 | 1791 | ||
| 1625 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); | 1792 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page); |
| 1626 | if (!nested_vmcb) | 1793 | if (!nested_vmcb) |
| 1627 | return 1; | 1794 | return 1; |
| 1628 | 1795 | ||
| 1796 | /* Exit nested SVM mode */ | ||
| 1797 | svm->nested.vmcb = 0; | ||
| 1798 | |||
| 1629 | /* Give the current vmcb to the guest */ | 1799 | /* Give the current vmcb to the guest */ |
| 1630 | disable_gif(svm); | 1800 | disable_gif(svm); |
| 1631 | 1801 | ||
| @@ -1635,9 +1805,10 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
| 1635 | nested_vmcb->save.ds = vmcb->save.ds; | 1805 | nested_vmcb->save.ds = vmcb->save.ds; |
| 1636 | nested_vmcb->save.gdtr = vmcb->save.gdtr; | 1806 | nested_vmcb->save.gdtr = vmcb->save.gdtr; |
| 1637 | nested_vmcb->save.idtr = vmcb->save.idtr; | 1807 | nested_vmcb->save.idtr = vmcb->save.idtr; |
| 1638 | if (npt_enabled) | 1808 | nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu); |
| 1639 | nested_vmcb->save.cr3 = vmcb->save.cr3; | 1809 | nested_vmcb->save.cr3 = svm->vcpu.arch.cr3; |
| 1640 | nested_vmcb->save.cr2 = vmcb->save.cr2; | 1810 | nested_vmcb->save.cr2 = vmcb->save.cr2; |
| 1811 | nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; | ||
| 1641 | nested_vmcb->save.rflags = vmcb->save.rflags; | 1812 | nested_vmcb->save.rflags = vmcb->save.rflags; |
| 1642 | nested_vmcb->save.rip = vmcb->save.rip; | 1813 | nested_vmcb->save.rip = vmcb->save.rip; |
| 1643 | nested_vmcb->save.rsp = vmcb->save.rsp; | 1814 | nested_vmcb->save.rsp = vmcb->save.rsp; |
| @@ -1709,10 +1880,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
| 1709 | svm->vmcb->save.cpl = 0; | 1880 | svm->vmcb->save.cpl = 0; |
| 1710 | svm->vmcb->control.exit_int_info = 0; | 1881 | svm->vmcb->control.exit_int_info = 0; |
| 1711 | 1882 | ||
| 1712 | /* Exit nested SVM mode */ | 1883 | nested_svm_unmap(page); |
| 1713 | svm->nested.vmcb = 0; | ||
| 1714 | |||
| 1715 | nested_svm_unmap(nested_vmcb, KM_USER0); | ||
| 1716 | 1884 | ||
| 1717 | kvm_mmu_reset_context(&svm->vcpu); | 1885 | kvm_mmu_reset_context(&svm->vcpu); |
| 1718 | kvm_mmu_load(&svm->vcpu); | 1886 | kvm_mmu_load(&svm->vcpu); |
| @@ -1722,19 +1890,33 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
| 1722 | 1890 | ||
| 1723 | static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) | 1891 | static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) |
| 1724 | { | 1892 | { |
| 1725 | u32 *nested_msrpm; | 1893 | /* |
| 1894 | * This function merges the msr permission bitmaps of kvm and the | ||
| 1895 | * nested vmcb. It is omptimized in that it only merges the parts where | ||
| 1896 | * the kvm msr permission bitmap may contain zero bits | ||
| 1897 | */ | ||
| 1726 | int i; | 1898 | int i; |
| 1727 | 1899 | ||
| 1728 | nested_msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0); | 1900 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) |
| 1729 | if (!nested_msrpm) | 1901 | return true; |
| 1730 | return false; | ||
| 1731 | 1902 | ||
| 1732 | for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++) | 1903 | for (i = 0; i < MSRPM_OFFSETS; i++) { |
| 1733 | svm->nested.msrpm[i] = svm->msrpm[i] | nested_msrpm[i]; | 1904 | u32 value, p; |
| 1905 | u64 offset; | ||
| 1734 | 1906 | ||
| 1735 | svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm); | 1907 | if (msrpm_offsets[i] == 0xffffffff) |
| 1908 | break; | ||
| 1909 | |||
| 1910 | p = msrpm_offsets[i]; | ||
| 1911 | offset = svm->nested.vmcb_msrpm + (p * 4); | ||
| 1912 | |||
| 1913 | if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4)) | ||
| 1914 | return false; | ||
| 1915 | |||
| 1916 | svm->nested.msrpm[p] = svm->msrpm[p] | value; | ||
| 1917 | } | ||
| 1736 | 1918 | ||
| 1737 | nested_svm_unmap(nested_msrpm, KM_USER0); | 1919 | svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm); |
| 1738 | 1920 | ||
| 1739 | return true; | 1921 | return true; |
| 1740 | } | 1922 | } |
| @@ -1744,26 +1926,34 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
| 1744 | struct vmcb *nested_vmcb; | 1926 | struct vmcb *nested_vmcb; |
| 1745 | struct vmcb *hsave = svm->nested.hsave; | 1927 | struct vmcb *hsave = svm->nested.hsave; |
| 1746 | struct vmcb *vmcb = svm->vmcb; | 1928 | struct vmcb *vmcb = svm->vmcb; |
| 1929 | struct page *page; | ||
| 1930 | u64 vmcb_gpa; | ||
| 1747 | 1931 | ||
| 1748 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); | 1932 | vmcb_gpa = svm->vmcb->save.rax; |
| 1933 | |||
| 1934 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); | ||
| 1749 | if (!nested_vmcb) | 1935 | if (!nested_vmcb) |
| 1750 | return false; | 1936 | return false; |
| 1751 | 1937 | ||
| 1752 | /* nested_vmcb is our indicator if nested SVM is activated */ | 1938 | trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, vmcb_gpa, |
| 1753 | svm->nested.vmcb = svm->vmcb->save.rax; | ||
| 1754 | |||
| 1755 | trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb, | ||
| 1756 | nested_vmcb->save.rip, | 1939 | nested_vmcb->save.rip, |
| 1757 | nested_vmcb->control.int_ctl, | 1940 | nested_vmcb->control.int_ctl, |
| 1758 | nested_vmcb->control.event_inj, | 1941 | nested_vmcb->control.event_inj, |
| 1759 | nested_vmcb->control.nested_ctl); | 1942 | nested_vmcb->control.nested_ctl); |
| 1760 | 1943 | ||
| 1944 | trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr_read, | ||
| 1945 | nested_vmcb->control.intercept_cr_write, | ||
| 1946 | nested_vmcb->control.intercept_exceptions, | ||
| 1947 | nested_vmcb->control.intercept); | ||
| 1948 | |||
| 1761 | /* Clear internal status */ | 1949 | /* Clear internal status */ |
| 1762 | kvm_clear_exception_queue(&svm->vcpu); | 1950 | kvm_clear_exception_queue(&svm->vcpu); |
| 1763 | kvm_clear_interrupt_queue(&svm->vcpu); | 1951 | kvm_clear_interrupt_queue(&svm->vcpu); |
| 1764 | 1952 | ||
| 1765 | /* Save the old vmcb, so we don't need to pick what we save, but | 1953 | /* |
| 1766 | can restore everything when a VMEXIT occurs */ | 1954 | * Save the old vmcb, so we don't need to pick what we save, but can |
| 1955 | * restore everything when a VMEXIT occurs | ||
| 1956 | */ | ||
| 1767 | hsave->save.es = vmcb->save.es; | 1957 | hsave->save.es = vmcb->save.es; |
| 1768 | hsave->save.cs = vmcb->save.cs; | 1958 | hsave->save.cs = vmcb->save.cs; |
| 1769 | hsave->save.ss = vmcb->save.ss; | 1959 | hsave->save.ss = vmcb->save.ss; |
| @@ -1803,14 +1993,17 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
| 1803 | if (npt_enabled) { | 1993 | if (npt_enabled) { |
| 1804 | svm->vmcb->save.cr3 = nested_vmcb->save.cr3; | 1994 | svm->vmcb->save.cr3 = nested_vmcb->save.cr3; |
| 1805 | svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; | 1995 | svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; |
| 1806 | } else { | 1996 | } else |
| 1807 | kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); | 1997 | kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); |
| 1808 | kvm_mmu_reset_context(&svm->vcpu); | 1998 | |
| 1809 | } | 1999 | /* Guest paging mode is active - reset mmu */ |
| 2000 | kvm_mmu_reset_context(&svm->vcpu); | ||
| 2001 | |||
| 1810 | svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; | 2002 | svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; |
| 1811 | kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); | 2003 | kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); |
| 1812 | kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); | 2004 | kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); |
| 1813 | kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); | 2005 | kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); |
| 2006 | |||
| 1814 | /* In case we don't even reach vcpu_run, the fields are not updated */ | 2007 | /* In case we don't even reach vcpu_run, the fields are not updated */ |
| 1815 | svm->vmcb->save.rax = nested_vmcb->save.rax; | 2008 | svm->vmcb->save.rax = nested_vmcb->save.rax; |
| 1816 | svm->vmcb->save.rsp = nested_vmcb->save.rsp; | 2009 | svm->vmcb->save.rsp = nested_vmcb->save.rsp; |
| @@ -1819,22 +2012,8 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
| 1819 | svm->vmcb->save.dr6 = nested_vmcb->save.dr6; | 2012 | svm->vmcb->save.dr6 = nested_vmcb->save.dr6; |
| 1820 | svm->vmcb->save.cpl = nested_vmcb->save.cpl; | 2013 | svm->vmcb->save.cpl = nested_vmcb->save.cpl; |
| 1821 | 2014 | ||
| 1822 | /* We don't want a nested guest to be more powerful than the guest, | 2015 | svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL; |
| 1823 | so all intercepts are ORed */ | 2016 | svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL; |
| 1824 | svm->vmcb->control.intercept_cr_read |= | ||
| 1825 | nested_vmcb->control.intercept_cr_read; | ||
| 1826 | svm->vmcb->control.intercept_cr_write |= | ||
| 1827 | nested_vmcb->control.intercept_cr_write; | ||
| 1828 | svm->vmcb->control.intercept_dr_read |= | ||
| 1829 | nested_vmcb->control.intercept_dr_read; | ||
| 1830 | svm->vmcb->control.intercept_dr_write |= | ||
| 1831 | nested_vmcb->control.intercept_dr_write; | ||
| 1832 | svm->vmcb->control.intercept_exceptions |= | ||
| 1833 | nested_vmcb->control.intercept_exceptions; | ||
| 1834 | |||
| 1835 | svm->vmcb->control.intercept |= nested_vmcb->control.intercept; | ||
| 1836 | |||
| 1837 | svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa; | ||
| 1838 | 2017 | ||
| 1839 | /* cache intercepts */ | 2018 | /* cache intercepts */ |
| 1840 | svm->nested.intercept_cr_read = nested_vmcb->control.intercept_cr_read; | 2019 | svm->nested.intercept_cr_read = nested_vmcb->control.intercept_cr_read; |
| @@ -1851,13 +2030,43 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
| 1851 | else | 2030 | else |
| 1852 | svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; | 2031 | svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; |
| 1853 | 2032 | ||
| 2033 | if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { | ||
| 2034 | /* We only want the cr8 intercept bits of the guest */ | ||
| 2035 | svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR8_MASK; | ||
| 2036 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; | ||
| 2037 | } | ||
| 2038 | |||
| 2039 | /* We don't want to see VMMCALLs from a nested guest */ | ||
| 2040 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMMCALL); | ||
| 2041 | |||
| 2042 | /* | ||
| 2043 | * We don't want a nested guest to be more powerful than the guest, so | ||
| 2044 | * all intercepts are ORed | ||
| 2045 | */ | ||
| 2046 | svm->vmcb->control.intercept_cr_read |= | ||
| 2047 | nested_vmcb->control.intercept_cr_read; | ||
| 2048 | svm->vmcb->control.intercept_cr_write |= | ||
| 2049 | nested_vmcb->control.intercept_cr_write; | ||
| 2050 | svm->vmcb->control.intercept_dr_read |= | ||
| 2051 | nested_vmcb->control.intercept_dr_read; | ||
| 2052 | svm->vmcb->control.intercept_dr_write |= | ||
| 2053 | nested_vmcb->control.intercept_dr_write; | ||
| 2054 | svm->vmcb->control.intercept_exceptions |= | ||
| 2055 | nested_vmcb->control.intercept_exceptions; | ||
| 2056 | |||
| 2057 | svm->vmcb->control.intercept |= nested_vmcb->control.intercept; | ||
| 2058 | |||
| 2059 | svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl; | ||
| 1854 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; | 2060 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; |
| 1855 | svm->vmcb->control.int_state = nested_vmcb->control.int_state; | 2061 | svm->vmcb->control.int_state = nested_vmcb->control.int_state; |
| 1856 | svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; | 2062 | svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; |
| 1857 | svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; | 2063 | svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; |
| 1858 | svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; | 2064 | svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; |
| 1859 | 2065 | ||
| 1860 | nested_svm_unmap(nested_vmcb, KM_USER0); | 2066 | nested_svm_unmap(page); |
| 2067 | |||
| 2068 | /* nested_vmcb is our indicator if nested SVM is activated */ | ||
| 2069 | svm->nested.vmcb = vmcb_gpa; | ||
| 1861 | 2070 | ||
| 1862 | enable_gif(svm); | 2071 | enable_gif(svm); |
| 1863 | 2072 | ||
| @@ -1883,6 +2092,7 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) | |||
| 1883 | static int vmload_interception(struct vcpu_svm *svm) | 2092 | static int vmload_interception(struct vcpu_svm *svm) |
| 1884 | { | 2093 | { |
| 1885 | struct vmcb *nested_vmcb; | 2094 | struct vmcb *nested_vmcb; |
| 2095 | struct page *page; | ||
| 1886 | 2096 | ||
| 1887 | if (nested_svm_check_permissions(svm)) | 2097 | if (nested_svm_check_permissions(svm)) |
| 1888 | return 1; | 2098 | return 1; |
| @@ -1890,12 +2100,12 @@ static int vmload_interception(struct vcpu_svm *svm) | |||
| 1890 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 2100 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
| 1891 | skip_emulated_instruction(&svm->vcpu); | 2101 | skip_emulated_instruction(&svm->vcpu); |
| 1892 | 2102 | ||
| 1893 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); | 2103 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); |
| 1894 | if (!nested_vmcb) | 2104 | if (!nested_vmcb) |
| 1895 | return 1; | 2105 | return 1; |
| 1896 | 2106 | ||
| 1897 | nested_svm_vmloadsave(nested_vmcb, svm->vmcb); | 2107 | nested_svm_vmloadsave(nested_vmcb, svm->vmcb); |
| 1898 | nested_svm_unmap(nested_vmcb, KM_USER0); | 2108 | nested_svm_unmap(page); |
| 1899 | 2109 | ||
| 1900 | return 1; | 2110 | return 1; |
| 1901 | } | 2111 | } |
| @@ -1903,6 +2113,7 @@ static int vmload_interception(struct vcpu_svm *svm) | |||
| 1903 | static int vmsave_interception(struct vcpu_svm *svm) | 2113 | static int vmsave_interception(struct vcpu_svm *svm) |
| 1904 | { | 2114 | { |
| 1905 | struct vmcb *nested_vmcb; | 2115 | struct vmcb *nested_vmcb; |
| 2116 | struct page *page; | ||
| 1906 | 2117 | ||
| 1907 | if (nested_svm_check_permissions(svm)) | 2118 | if (nested_svm_check_permissions(svm)) |
| 1908 | return 1; | 2119 | return 1; |
| @@ -1910,12 +2121,12 @@ static int vmsave_interception(struct vcpu_svm *svm) | |||
| 1910 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 2121 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
| 1911 | skip_emulated_instruction(&svm->vcpu); | 2122 | skip_emulated_instruction(&svm->vcpu); |
| 1912 | 2123 | ||
| 1913 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); | 2124 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); |
| 1914 | if (!nested_vmcb) | 2125 | if (!nested_vmcb) |
| 1915 | return 1; | 2126 | return 1; |
| 1916 | 2127 | ||
| 1917 | nested_svm_vmloadsave(svm->vmcb, nested_vmcb); | 2128 | nested_svm_vmloadsave(svm->vmcb, nested_vmcb); |
| 1918 | nested_svm_unmap(nested_vmcb, KM_USER0); | 2129 | nested_svm_unmap(page); |
| 1919 | 2130 | ||
| 1920 | return 1; | 2131 | return 1; |
| 1921 | } | 2132 | } |
| @@ -2018,6 +2229,8 @@ static int task_switch_interception(struct vcpu_svm *svm) | |||
| 2018 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK; | 2229 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK; |
| 2019 | uint32_t idt_v = | 2230 | uint32_t idt_v = |
| 2020 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID; | 2231 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID; |
| 2232 | bool has_error_code = false; | ||
| 2233 | u32 error_code = 0; | ||
| 2021 | 2234 | ||
| 2022 | tss_selector = (u16)svm->vmcb->control.exit_info_1; | 2235 | tss_selector = (u16)svm->vmcb->control.exit_info_1; |
| 2023 | 2236 | ||
| @@ -2038,6 +2251,12 @@ static int task_switch_interception(struct vcpu_svm *svm) | |||
| 2038 | svm->vcpu.arch.nmi_injected = false; | 2251 | svm->vcpu.arch.nmi_injected = false; |
| 2039 | break; | 2252 | break; |
| 2040 | case SVM_EXITINTINFO_TYPE_EXEPT: | 2253 | case SVM_EXITINTINFO_TYPE_EXEPT: |
| 2254 | if (svm->vmcb->control.exit_info_2 & | ||
| 2255 | (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) { | ||
| 2256 | has_error_code = true; | ||
| 2257 | error_code = | ||
| 2258 | (u32)svm->vmcb->control.exit_info_2; | ||
| 2259 | } | ||
| 2041 | kvm_clear_exception_queue(&svm->vcpu); | 2260 | kvm_clear_exception_queue(&svm->vcpu); |
| 2042 | break; | 2261 | break; |
| 2043 | case SVM_EXITINTINFO_TYPE_INTR: | 2262 | case SVM_EXITINTINFO_TYPE_INTR: |
| @@ -2054,7 +2273,14 @@ static int task_switch_interception(struct vcpu_svm *svm) | |||
| 2054 | (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) | 2273 | (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) |
| 2055 | skip_emulated_instruction(&svm->vcpu); | 2274 | skip_emulated_instruction(&svm->vcpu); |
| 2056 | 2275 | ||
| 2057 | return kvm_task_switch(&svm->vcpu, tss_selector, reason); | 2276 | if (kvm_task_switch(&svm->vcpu, tss_selector, reason, |
| 2277 | has_error_code, error_code) == EMULATE_FAIL) { | ||
| 2278 | svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
| 2279 | svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
| 2280 | svm->vcpu.run->internal.ndata = 0; | ||
| 2281 | return 0; | ||
| 2282 | } | ||
| 2283 | return 1; | ||
| 2058 | } | 2284 | } |
| 2059 | 2285 | ||
| 2060 | static int cpuid_interception(struct vcpu_svm *svm) | 2286 | static int cpuid_interception(struct vcpu_svm *svm) |
| @@ -2145,9 +2371,11 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
| 2145 | case MSR_IA32_SYSENTER_ESP: | 2371 | case MSR_IA32_SYSENTER_ESP: |
| 2146 | *data = svm->sysenter_esp; | 2372 | *data = svm->sysenter_esp; |
| 2147 | break; | 2373 | break; |
| 2148 | /* Nobody will change the following 5 values in the VMCB so | 2374 | /* |
| 2149 | we can safely return them on rdmsr. They will always be 0 | 2375 | * Nobody will change the following 5 values in the VMCB so we can |
| 2150 | until LBRV is implemented. */ | 2376 | * safely return them on rdmsr. They will always be 0 until LBRV is |
| 2377 | * implemented. | ||
| 2378 | */ | ||
| 2151 | case MSR_IA32_DEBUGCTLMSR: | 2379 | case MSR_IA32_DEBUGCTLMSR: |
| 2152 | *data = svm->vmcb->save.dbgctl; | 2380 | *data = svm->vmcb->save.dbgctl; |
| 2153 | break; | 2381 | break; |
| @@ -2167,7 +2395,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
| 2167 | *data = svm->nested.hsave_msr; | 2395 | *data = svm->nested.hsave_msr; |
| 2168 | break; | 2396 | break; |
| 2169 | case MSR_VM_CR: | 2397 | case MSR_VM_CR: |
| 2170 | *data = 0; | 2398 | *data = svm->nested.vm_cr_msr; |
| 2171 | break; | 2399 | break; |
| 2172 | case MSR_IA32_UCODE_REV: | 2400 | case MSR_IA32_UCODE_REV: |
| 2173 | *data = 0x01000065; | 2401 | *data = 0x01000065; |
| @@ -2197,6 +2425,31 @@ static int rdmsr_interception(struct vcpu_svm *svm) | |||
| 2197 | return 1; | 2425 | return 1; |
| 2198 | } | 2426 | } |
| 2199 | 2427 | ||
| 2428 | static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data) | ||
| 2429 | { | ||
| 2430 | struct vcpu_svm *svm = to_svm(vcpu); | ||
| 2431 | int svm_dis, chg_mask; | ||
| 2432 | |||
| 2433 | if (data & ~SVM_VM_CR_VALID_MASK) | ||
| 2434 | return 1; | ||
| 2435 | |||
| 2436 | chg_mask = SVM_VM_CR_VALID_MASK; | ||
| 2437 | |||
| 2438 | if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK) | ||
| 2439 | chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK); | ||
| 2440 | |||
| 2441 | svm->nested.vm_cr_msr &= ~chg_mask; | ||
| 2442 | svm->nested.vm_cr_msr |= (data & chg_mask); | ||
| 2443 | |||
| 2444 | svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK; | ||
| 2445 | |||
| 2446 | /* check for svm_disable while efer.svme is set */ | ||
| 2447 | if (svm_dis && (vcpu->arch.efer & EFER_SVME)) | ||
| 2448 | return 1; | ||
| 2449 | |||
| 2450 | return 0; | ||
| 2451 | } | ||
| 2452 | |||
| 2200 | static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | 2453 | static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) |
| 2201 | { | 2454 | { |
| 2202 | struct vcpu_svm *svm = to_svm(vcpu); | 2455 | struct vcpu_svm *svm = to_svm(vcpu); |
| @@ -2263,6 +2516,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
| 2263 | svm->nested.hsave_msr = data; | 2516 | svm->nested.hsave_msr = data; |
| 2264 | break; | 2517 | break; |
| 2265 | case MSR_VM_CR: | 2518 | case MSR_VM_CR: |
| 2519 | return svm_set_vm_cr(vcpu, data); | ||
| 2266 | case MSR_VM_IGNNE: | 2520 | case MSR_VM_IGNNE: |
| 2267 | pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); | 2521 | pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); |
| 2268 | break; | 2522 | break; |
| @@ -2326,16 +2580,16 @@ static int pause_interception(struct vcpu_svm *svm) | |||
| 2326 | } | 2580 | } |
| 2327 | 2581 | ||
| 2328 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | 2582 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { |
| 2329 | [SVM_EXIT_READ_CR0] = emulate_on_interception, | 2583 | [SVM_EXIT_READ_CR0] = emulate_on_interception, |
| 2330 | [SVM_EXIT_READ_CR3] = emulate_on_interception, | 2584 | [SVM_EXIT_READ_CR3] = emulate_on_interception, |
| 2331 | [SVM_EXIT_READ_CR4] = emulate_on_interception, | 2585 | [SVM_EXIT_READ_CR4] = emulate_on_interception, |
| 2332 | [SVM_EXIT_READ_CR8] = emulate_on_interception, | 2586 | [SVM_EXIT_READ_CR8] = emulate_on_interception, |
| 2333 | [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, | 2587 | [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, |
| 2334 | [SVM_EXIT_WRITE_CR0] = emulate_on_interception, | 2588 | [SVM_EXIT_WRITE_CR0] = emulate_on_interception, |
| 2335 | [SVM_EXIT_WRITE_CR3] = emulate_on_interception, | 2589 | [SVM_EXIT_WRITE_CR3] = emulate_on_interception, |
| 2336 | [SVM_EXIT_WRITE_CR4] = emulate_on_interception, | 2590 | [SVM_EXIT_WRITE_CR4] = emulate_on_interception, |
| 2337 | [SVM_EXIT_WRITE_CR8] = cr8_write_interception, | 2591 | [SVM_EXIT_WRITE_CR8] = cr8_write_interception, |
| 2338 | [SVM_EXIT_READ_DR0] = emulate_on_interception, | 2592 | [SVM_EXIT_READ_DR0] = emulate_on_interception, |
| 2339 | [SVM_EXIT_READ_DR1] = emulate_on_interception, | 2593 | [SVM_EXIT_READ_DR1] = emulate_on_interception, |
| 2340 | [SVM_EXIT_READ_DR2] = emulate_on_interception, | 2594 | [SVM_EXIT_READ_DR2] = emulate_on_interception, |
| 2341 | [SVM_EXIT_READ_DR3] = emulate_on_interception, | 2595 | [SVM_EXIT_READ_DR3] = emulate_on_interception, |
| @@ -2354,15 +2608,14 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
| 2354 | [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, | 2608 | [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, |
| 2355 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, | 2609 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, |
| 2356 | [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, | 2610 | [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, |
| 2357 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, | 2611 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, |
| 2358 | [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, | 2612 | [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, |
| 2359 | [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, | 2613 | [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, |
| 2360 | [SVM_EXIT_INTR] = intr_interception, | 2614 | [SVM_EXIT_INTR] = intr_interception, |
| 2361 | [SVM_EXIT_NMI] = nmi_interception, | 2615 | [SVM_EXIT_NMI] = nmi_interception, |
| 2362 | [SVM_EXIT_SMI] = nop_on_interception, | 2616 | [SVM_EXIT_SMI] = nop_on_interception, |
| 2363 | [SVM_EXIT_INIT] = nop_on_interception, | 2617 | [SVM_EXIT_INIT] = nop_on_interception, |
| 2364 | [SVM_EXIT_VINTR] = interrupt_window_interception, | 2618 | [SVM_EXIT_VINTR] = interrupt_window_interception, |
| 2365 | /* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */ | ||
| 2366 | [SVM_EXIT_CPUID] = cpuid_interception, | 2619 | [SVM_EXIT_CPUID] = cpuid_interception, |
| 2367 | [SVM_EXIT_IRET] = iret_interception, | 2620 | [SVM_EXIT_IRET] = iret_interception, |
| 2368 | [SVM_EXIT_INVD] = emulate_on_interception, | 2621 | [SVM_EXIT_INVD] = emulate_on_interception, |
| @@ -2370,7 +2623,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
| 2370 | [SVM_EXIT_HLT] = halt_interception, | 2623 | [SVM_EXIT_HLT] = halt_interception, |
| 2371 | [SVM_EXIT_INVLPG] = invlpg_interception, | 2624 | [SVM_EXIT_INVLPG] = invlpg_interception, |
| 2372 | [SVM_EXIT_INVLPGA] = invlpga_interception, | 2625 | [SVM_EXIT_INVLPGA] = invlpga_interception, |
| 2373 | [SVM_EXIT_IOIO] = io_interception, | 2626 | [SVM_EXIT_IOIO] = io_interception, |
| 2374 | [SVM_EXIT_MSR] = msr_interception, | 2627 | [SVM_EXIT_MSR] = msr_interception, |
| 2375 | [SVM_EXIT_TASK_SWITCH] = task_switch_interception, | 2628 | [SVM_EXIT_TASK_SWITCH] = task_switch_interception, |
| 2376 | [SVM_EXIT_SHUTDOWN] = shutdown_interception, | 2629 | [SVM_EXIT_SHUTDOWN] = shutdown_interception, |
| @@ -2393,7 +2646,12 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
| 2393 | struct kvm_run *kvm_run = vcpu->run; | 2646 | struct kvm_run *kvm_run = vcpu->run; |
| 2394 | u32 exit_code = svm->vmcb->control.exit_code; | 2647 | u32 exit_code = svm->vmcb->control.exit_code; |
| 2395 | 2648 | ||
| 2396 | trace_kvm_exit(exit_code, svm->vmcb->save.rip); | 2649 | trace_kvm_exit(exit_code, vcpu); |
| 2650 | |||
| 2651 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK)) | ||
| 2652 | vcpu->arch.cr0 = svm->vmcb->save.cr0; | ||
| 2653 | if (npt_enabled) | ||
| 2654 | vcpu->arch.cr3 = svm->vmcb->save.cr3; | ||
| 2397 | 2655 | ||
| 2398 | if (unlikely(svm->nested.exit_required)) { | 2656 | if (unlikely(svm->nested.exit_required)) { |
| 2399 | nested_svm_vmexit(svm); | 2657 | nested_svm_vmexit(svm); |
| @@ -2422,11 +2680,6 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
| 2422 | 2680 | ||
| 2423 | svm_complete_interrupts(svm); | 2681 | svm_complete_interrupts(svm); |
| 2424 | 2682 | ||
| 2425 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK)) | ||
| 2426 | vcpu->arch.cr0 = svm->vmcb->save.cr0; | ||
| 2427 | if (npt_enabled) | ||
| 2428 | vcpu->arch.cr3 = svm->vmcb->save.cr3; | ||
| 2429 | |||
| 2430 | if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { | 2683 | if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { |
| 2431 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 2684 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
| 2432 | kvm_run->fail_entry.hardware_entry_failure_reason | 2685 | kvm_run->fail_entry.hardware_entry_failure_reason |
| @@ -2511,6 +2764,9 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | |||
| 2511 | { | 2764 | { |
| 2512 | struct vcpu_svm *svm = to_svm(vcpu); | 2765 | struct vcpu_svm *svm = to_svm(vcpu); |
| 2513 | 2766 | ||
| 2767 | if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) | ||
| 2768 | return; | ||
| 2769 | |||
| 2514 | if (irr == -1) | 2770 | if (irr == -1) |
| 2515 | return; | 2771 | return; |
| 2516 | 2772 | ||
| @@ -2522,8 +2778,12 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu) | |||
| 2522 | { | 2778 | { |
| 2523 | struct vcpu_svm *svm = to_svm(vcpu); | 2779 | struct vcpu_svm *svm = to_svm(vcpu); |
| 2524 | struct vmcb *vmcb = svm->vmcb; | 2780 | struct vmcb *vmcb = svm->vmcb; |
| 2525 | return !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && | 2781 | int ret; |
| 2526 | !(svm->vcpu.arch.hflags & HF_NMI_MASK); | 2782 | ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && |
| 2783 | !(svm->vcpu.arch.hflags & HF_NMI_MASK); | ||
| 2784 | ret = ret && gif_set(svm) && nested_svm_nmi(svm); | ||
| 2785 | |||
| 2786 | return ret; | ||
| 2527 | } | 2787 | } |
| 2528 | 2788 | ||
| 2529 | static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) | 2789 | static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) |
| @@ -2568,13 +2828,13 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) | |||
| 2568 | { | 2828 | { |
| 2569 | struct vcpu_svm *svm = to_svm(vcpu); | 2829 | struct vcpu_svm *svm = to_svm(vcpu); |
| 2570 | 2830 | ||
| 2571 | nested_svm_intr(svm); | 2831 | /* |
| 2572 | 2832 | * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes | |
| 2573 | /* In case GIF=0 we can't rely on the CPU to tell us when | 2833 | * 1, because that's a separate STGI/VMRUN intercept. The next time we |
| 2574 | * GIF becomes 1, because that's a separate STGI/VMRUN intercept. | 2834 | * get that intercept, this function will be called again though and |
| 2575 | * The next time we get that intercept, this function will be | 2835 | * we'll get the vintr intercept. |
| 2576 | * called again though and we'll get the vintr intercept. */ | 2836 | */ |
| 2577 | if (gif_set(svm)) { | 2837 | if (gif_set(svm) && nested_svm_intr(svm)) { |
| 2578 | svm_set_vintr(svm); | 2838 | svm_set_vintr(svm); |
| 2579 | svm_inject_irq(svm, 0x0); | 2839 | svm_inject_irq(svm, 0x0); |
| 2580 | } | 2840 | } |
| @@ -2588,9 +2848,10 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
| 2588 | == HF_NMI_MASK) | 2848 | == HF_NMI_MASK) |
| 2589 | return; /* IRET will cause a vm exit */ | 2849 | return; /* IRET will cause a vm exit */ |
| 2590 | 2850 | ||
| 2591 | /* Something prevents NMI from been injected. Single step over | 2851 | /* |
| 2592 | possible problem (IRET or exception injection or interrupt | 2852 | * Something prevents NMI from been injected. Single step over possible |
| 2593 | shadow) */ | 2853 | * problem (IRET or exception injection or interrupt shadow) |
| 2854 | */ | ||
| 2594 | svm->nmi_singlestep = true; | 2855 | svm->nmi_singlestep = true; |
| 2595 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); | 2856 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); |
| 2596 | update_db_intercept(vcpu); | 2857 | update_db_intercept(vcpu); |
| @@ -2614,6 +2875,9 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu) | |||
| 2614 | { | 2875 | { |
| 2615 | struct vcpu_svm *svm = to_svm(vcpu); | 2876 | struct vcpu_svm *svm = to_svm(vcpu); |
| 2616 | 2877 | ||
| 2878 | if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) | ||
| 2879 | return; | ||
| 2880 | |||
| 2617 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { | 2881 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { |
| 2618 | int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; | 2882 | int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; |
| 2619 | kvm_set_cr8(vcpu, cr8); | 2883 | kvm_set_cr8(vcpu, cr8); |
| @@ -2625,6 +2889,9 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) | |||
| 2625 | struct vcpu_svm *svm = to_svm(vcpu); | 2889 | struct vcpu_svm *svm = to_svm(vcpu); |
| 2626 | u64 cr8; | 2890 | u64 cr8; |
| 2627 | 2891 | ||
| 2892 | if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) | ||
| 2893 | return; | ||
| 2894 | |||
| 2628 | cr8 = kvm_get_cr8(vcpu); | 2895 | cr8 = kvm_get_cr8(vcpu); |
| 2629 | svm->vmcb->control.int_ctl &= ~V_TPR_MASK; | 2896 | svm->vmcb->control.int_ctl &= ~V_TPR_MASK; |
| 2630 | svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; | 2897 | svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; |
| @@ -2635,6 +2902,9 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) | |||
| 2635 | u8 vector; | 2902 | u8 vector; |
| 2636 | int type; | 2903 | int type; |
| 2637 | u32 exitintinfo = svm->vmcb->control.exit_int_info; | 2904 | u32 exitintinfo = svm->vmcb->control.exit_int_info; |
| 2905 | unsigned int3_injected = svm->int3_injected; | ||
| 2906 | |||
| 2907 | svm->int3_injected = 0; | ||
| 2638 | 2908 | ||
| 2639 | if (svm->vcpu.arch.hflags & HF_IRET_MASK) | 2909 | if (svm->vcpu.arch.hflags & HF_IRET_MASK) |
| 2640 | svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); | 2910 | svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); |
| @@ -2654,18 +2924,25 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) | |||
| 2654 | svm->vcpu.arch.nmi_injected = true; | 2924 | svm->vcpu.arch.nmi_injected = true; |
| 2655 | break; | 2925 | break; |
| 2656 | case SVM_EXITINTINFO_TYPE_EXEPT: | 2926 | case SVM_EXITINTINFO_TYPE_EXEPT: |
| 2657 | /* In case of software exception do not reinject an exception | 2927 | /* |
| 2658 | vector, but re-execute and instruction instead */ | 2928 | * In case of software exceptions, do not reinject the vector, |
| 2659 | if (is_nested(svm)) | 2929 | * but re-execute the instruction instead. Rewind RIP first |
| 2660 | break; | 2930 | * if we emulated INT3 before. |
| 2661 | if (kvm_exception_is_soft(vector)) | 2931 | */ |
| 2932 | if (kvm_exception_is_soft(vector)) { | ||
| 2933 | if (vector == BP_VECTOR && int3_injected && | ||
| 2934 | kvm_is_linear_rip(&svm->vcpu, svm->int3_rip)) | ||
| 2935 | kvm_rip_write(&svm->vcpu, | ||
| 2936 | kvm_rip_read(&svm->vcpu) - | ||
| 2937 | int3_injected); | ||
| 2662 | break; | 2938 | break; |
| 2939 | } | ||
| 2663 | if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { | 2940 | if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { |
| 2664 | u32 err = svm->vmcb->control.exit_int_info_err; | 2941 | u32 err = svm->vmcb->control.exit_int_info_err; |
| 2665 | kvm_queue_exception_e(&svm->vcpu, vector, err); | 2942 | kvm_requeue_exception_e(&svm->vcpu, vector, err); |
| 2666 | 2943 | ||
| 2667 | } else | 2944 | } else |
| 2668 | kvm_queue_exception(&svm->vcpu, vector); | 2945 | kvm_requeue_exception(&svm->vcpu, vector); |
| 2669 | break; | 2946 | break; |
| 2670 | case SVM_EXITINTINFO_TYPE_INTR: | 2947 | case SVM_EXITINTINFO_TYPE_INTR: |
| 2671 | kvm_queue_interrupt(&svm->vcpu, vector, false); | 2948 | kvm_queue_interrupt(&svm->vcpu, vector, false); |
| @@ -2688,6 +2965,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
| 2688 | u16 gs_selector; | 2965 | u16 gs_selector; |
| 2689 | u16 ldt_selector; | 2966 | u16 ldt_selector; |
| 2690 | 2967 | ||
| 2968 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; | ||
| 2969 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; | ||
| 2970 | svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; | ||
| 2971 | |||
| 2691 | /* | 2972 | /* |
| 2692 | * A vmexit emulation is required before the vcpu can be executed | 2973 | * A vmexit emulation is required before the vcpu can be executed |
| 2693 | * again. | 2974 | * again. |
| @@ -2695,10 +2976,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
| 2695 | if (unlikely(svm->nested.exit_required)) | 2976 | if (unlikely(svm->nested.exit_required)) |
| 2696 | return; | 2977 | return; |
| 2697 | 2978 | ||
| 2698 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; | ||
| 2699 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; | ||
| 2700 | svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; | ||
| 2701 | |||
| 2702 | pre_svm_run(svm); | 2979 | pre_svm_run(svm); |
| 2703 | 2980 | ||
| 2704 | sync_lapic_to_cr8(vcpu); | 2981 | sync_lapic_to_cr8(vcpu); |
| @@ -2879,25 +3156,39 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) | |||
| 2879 | { | 3156 | { |
| 2880 | } | 3157 | } |
| 2881 | 3158 | ||
| 3159 | static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | ||
| 3160 | { | ||
| 3161 | switch (func) { | ||
| 3162 | case 0x8000000A: | ||
| 3163 | entry->eax = 1; /* SVM revision 1 */ | ||
| 3164 | entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper | ||
| 3165 | ASID emulation to nested SVM */ | ||
| 3166 | entry->ecx = 0; /* Reserved */ | ||
| 3167 | entry->edx = 0; /* Do not support any additional features */ | ||
| 3168 | |||
| 3169 | break; | ||
| 3170 | } | ||
| 3171 | } | ||
| 3172 | |||
| 2882 | static const struct trace_print_flags svm_exit_reasons_str[] = { | 3173 | static const struct trace_print_flags svm_exit_reasons_str[] = { |
| 2883 | { SVM_EXIT_READ_CR0, "read_cr0" }, | 3174 | { SVM_EXIT_READ_CR0, "read_cr0" }, |
| 2884 | { SVM_EXIT_READ_CR3, "read_cr3" }, | 3175 | { SVM_EXIT_READ_CR3, "read_cr3" }, |
| 2885 | { SVM_EXIT_READ_CR4, "read_cr4" }, | 3176 | { SVM_EXIT_READ_CR4, "read_cr4" }, |
| 2886 | { SVM_EXIT_READ_CR8, "read_cr8" }, | 3177 | { SVM_EXIT_READ_CR8, "read_cr8" }, |
| 2887 | { SVM_EXIT_WRITE_CR0, "write_cr0" }, | 3178 | { SVM_EXIT_WRITE_CR0, "write_cr0" }, |
| 2888 | { SVM_EXIT_WRITE_CR3, "write_cr3" }, | 3179 | { SVM_EXIT_WRITE_CR3, "write_cr3" }, |
| 2889 | { SVM_EXIT_WRITE_CR4, "write_cr4" }, | 3180 | { SVM_EXIT_WRITE_CR4, "write_cr4" }, |
| 2890 | { SVM_EXIT_WRITE_CR8, "write_cr8" }, | 3181 | { SVM_EXIT_WRITE_CR8, "write_cr8" }, |
| 2891 | { SVM_EXIT_READ_DR0, "read_dr0" }, | 3182 | { SVM_EXIT_READ_DR0, "read_dr0" }, |
| 2892 | { SVM_EXIT_READ_DR1, "read_dr1" }, | 3183 | { SVM_EXIT_READ_DR1, "read_dr1" }, |
| 2893 | { SVM_EXIT_READ_DR2, "read_dr2" }, | 3184 | { SVM_EXIT_READ_DR2, "read_dr2" }, |
| 2894 | { SVM_EXIT_READ_DR3, "read_dr3" }, | 3185 | { SVM_EXIT_READ_DR3, "read_dr3" }, |
| 2895 | { SVM_EXIT_WRITE_DR0, "write_dr0" }, | 3186 | { SVM_EXIT_WRITE_DR0, "write_dr0" }, |
| 2896 | { SVM_EXIT_WRITE_DR1, "write_dr1" }, | 3187 | { SVM_EXIT_WRITE_DR1, "write_dr1" }, |
| 2897 | { SVM_EXIT_WRITE_DR2, "write_dr2" }, | 3188 | { SVM_EXIT_WRITE_DR2, "write_dr2" }, |
| 2898 | { SVM_EXIT_WRITE_DR3, "write_dr3" }, | 3189 | { SVM_EXIT_WRITE_DR3, "write_dr3" }, |
| 2899 | { SVM_EXIT_WRITE_DR5, "write_dr5" }, | 3190 | { SVM_EXIT_WRITE_DR5, "write_dr5" }, |
| 2900 | { SVM_EXIT_WRITE_DR7, "write_dr7" }, | 3191 | { SVM_EXIT_WRITE_DR7, "write_dr7" }, |
| 2901 | { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, | 3192 | { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, |
| 2902 | { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, | 3193 | { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, |
| 2903 | { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, | 3194 | { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, |
| @@ -2946,8 +3237,10 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) | |||
| 2946 | { | 3237 | { |
| 2947 | struct vcpu_svm *svm = to_svm(vcpu); | 3238 | struct vcpu_svm *svm = to_svm(vcpu); |
| 2948 | 3239 | ||
| 2949 | update_cr0_intercept(svm); | ||
| 2950 | svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR; | 3240 | svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR; |
| 3241 | if (is_nested(svm)) | ||
| 3242 | svm->nested.hsave->control.intercept_exceptions |= 1 << NM_VECTOR; | ||
| 3243 | update_cr0_intercept(svm); | ||
| 2951 | } | 3244 | } |
| 2952 | 3245 | ||
| 2953 | static struct kvm_x86_ops svm_x86_ops = { | 3246 | static struct kvm_x86_ops svm_x86_ops = { |
| @@ -2986,8 +3279,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
| 2986 | .set_idt = svm_set_idt, | 3279 | .set_idt = svm_set_idt, |
| 2987 | .get_gdt = svm_get_gdt, | 3280 | .get_gdt = svm_get_gdt, |
| 2988 | .set_gdt = svm_set_gdt, | 3281 | .set_gdt = svm_set_gdt, |
| 2989 | .get_dr = svm_get_dr, | 3282 | .set_dr7 = svm_set_dr7, |
| 2990 | .set_dr = svm_set_dr, | ||
| 2991 | .cache_reg = svm_cache_reg, | 3283 | .cache_reg = svm_cache_reg, |
| 2992 | .get_rflags = svm_get_rflags, | 3284 | .get_rflags = svm_get_rflags, |
| 2993 | .set_rflags = svm_set_rflags, | 3285 | .set_rflags = svm_set_rflags, |
| @@ -3023,12 +3315,14 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
| 3023 | .cpuid_update = svm_cpuid_update, | 3315 | .cpuid_update = svm_cpuid_update, |
| 3024 | 3316 | ||
| 3025 | .rdtscp_supported = svm_rdtscp_supported, | 3317 | .rdtscp_supported = svm_rdtscp_supported, |
| 3318 | |||
| 3319 | .set_supported_cpuid = svm_set_supported_cpuid, | ||
| 3026 | }; | 3320 | }; |
| 3027 | 3321 | ||
| 3028 | static int __init svm_init(void) | 3322 | static int __init svm_init(void) |
| 3029 | { | 3323 | { |
| 3030 | return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm), | 3324 | return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm), |
| 3031 | THIS_MODULE); | 3325 | __alignof__(struct vcpu_svm), THIS_MODULE); |
| 3032 | } | 3326 | } |
| 3033 | 3327 | ||
| 3034 | static void __exit svm_exit(void) | 3328 | static void __exit svm_exit(void) |
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c index eea40439066c..4ddadb1a5ffe 100644 --- a/arch/x86/kvm/timer.c +++ b/arch/x86/kvm/timer.c | |||
| @@ -12,7 +12,8 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer) | |||
| 12 | /* | 12 | /* |
| 13 | * There is a race window between reading and incrementing, but we do | 13 | * There is a race window between reading and incrementing, but we do |
| 14 | * not care about potentially loosing timer events in the !reinject | 14 | * not care about potentially loosing timer events in the !reinject |
| 15 | * case anyway. | 15 | * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked |
| 16 | * in vcpu_enter_guest. | ||
| 16 | */ | 17 | */ |
| 17 | if (ktimer->reinject || !atomic_read(&ktimer->pending)) { | 18 | if (ktimer->reinject || !atomic_read(&ktimer->pending)) { |
| 18 | atomic_inc(&ktimer->pending); | 19 | atomic_inc(&ktimer->pending); |
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 6ad30a29f044..a6544b8e7c0f 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
| @@ -5,8 +5,6 @@ | |||
| 5 | 5 | ||
| 6 | #undef TRACE_SYSTEM | 6 | #undef TRACE_SYSTEM |
| 7 | #define TRACE_SYSTEM kvm | 7 | #define TRACE_SYSTEM kvm |
| 8 | #define TRACE_INCLUDE_PATH arch/x86/kvm | ||
| 9 | #define TRACE_INCLUDE_FILE trace | ||
| 10 | 8 | ||
| 11 | /* | 9 | /* |
| 12 | * Tracepoint for guest mode entry. | 10 | * Tracepoint for guest mode entry. |
| @@ -184,8 +182,8 @@ TRACE_EVENT(kvm_apic, | |||
| 184 | * Tracepoint for kvm guest exit: | 182 | * Tracepoint for kvm guest exit: |
| 185 | */ | 183 | */ |
| 186 | TRACE_EVENT(kvm_exit, | 184 | TRACE_EVENT(kvm_exit, |
| 187 | TP_PROTO(unsigned int exit_reason, unsigned long guest_rip), | 185 | TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu), |
| 188 | TP_ARGS(exit_reason, guest_rip), | 186 | TP_ARGS(exit_reason, vcpu), |
| 189 | 187 | ||
| 190 | TP_STRUCT__entry( | 188 | TP_STRUCT__entry( |
| 191 | __field( unsigned int, exit_reason ) | 189 | __field( unsigned int, exit_reason ) |
| @@ -194,7 +192,7 @@ TRACE_EVENT(kvm_exit, | |||
| 194 | 192 | ||
| 195 | TP_fast_assign( | 193 | TP_fast_assign( |
| 196 | __entry->exit_reason = exit_reason; | 194 | __entry->exit_reason = exit_reason; |
| 197 | __entry->guest_rip = guest_rip; | 195 | __entry->guest_rip = kvm_rip_read(vcpu); |
| 198 | ), | 196 | ), |
| 199 | 197 | ||
| 200 | TP_printk("reason %s rip 0x%lx", | 198 | TP_printk("reason %s rip 0x%lx", |
| @@ -221,6 +219,38 @@ TRACE_EVENT(kvm_inj_virq, | |||
| 221 | TP_printk("irq %u", __entry->irq) | 219 | TP_printk("irq %u", __entry->irq) |
| 222 | ); | 220 | ); |
| 223 | 221 | ||
| 222 | #define EXS(x) { x##_VECTOR, "#" #x } | ||
| 223 | |||
| 224 | #define kvm_trace_sym_exc \ | ||
| 225 | EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM), \ | ||
| 226 | EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), \ | ||
| 227 | EXS(MF), EXS(MC) | ||
| 228 | |||
| 229 | /* | ||
| 230 | * Tracepoint for kvm interrupt injection: | ||
| 231 | */ | ||
| 232 | TRACE_EVENT(kvm_inj_exception, | ||
| 233 | TP_PROTO(unsigned exception, bool has_error, unsigned error_code), | ||
| 234 | TP_ARGS(exception, has_error, error_code), | ||
| 235 | |||
| 236 | TP_STRUCT__entry( | ||
| 237 | __field( u8, exception ) | ||
| 238 | __field( u8, has_error ) | ||
| 239 | __field( u32, error_code ) | ||
| 240 | ), | ||
| 241 | |||
| 242 | TP_fast_assign( | ||
| 243 | __entry->exception = exception; | ||
| 244 | __entry->has_error = has_error; | ||
| 245 | __entry->error_code = error_code; | ||
| 246 | ), | ||
| 247 | |||
| 248 | TP_printk("%s (0x%x)", | ||
| 249 | __print_symbolic(__entry->exception, kvm_trace_sym_exc), | ||
| 250 | /* FIXME: don't print error_code if not present */ | ||
| 251 | __entry->has_error ? __entry->error_code : 0) | ||
| 252 | ); | ||
| 253 | |||
| 224 | /* | 254 | /* |
| 225 | * Tracepoint for page fault. | 255 | * Tracepoint for page fault. |
| 226 | */ | 256 | */ |
| @@ -413,12 +443,34 @@ TRACE_EVENT(kvm_nested_vmrun, | |||
| 413 | ), | 443 | ), |
| 414 | 444 | ||
| 415 | TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x " | 445 | TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x " |
| 416 | "event_inj: 0x%08x npt: %s\n", | 446 | "event_inj: 0x%08x npt: %s", |
| 417 | __entry->rip, __entry->vmcb, __entry->nested_rip, | 447 | __entry->rip, __entry->vmcb, __entry->nested_rip, |
| 418 | __entry->int_ctl, __entry->event_inj, | 448 | __entry->int_ctl, __entry->event_inj, |
| 419 | __entry->npt ? "on" : "off") | 449 | __entry->npt ? "on" : "off") |
| 420 | ); | 450 | ); |
| 421 | 451 | ||
| 452 | TRACE_EVENT(kvm_nested_intercepts, | ||
| 453 | TP_PROTO(__u16 cr_read, __u16 cr_write, __u32 exceptions, __u64 intercept), | ||
| 454 | TP_ARGS(cr_read, cr_write, exceptions, intercept), | ||
| 455 | |||
| 456 | TP_STRUCT__entry( | ||
| 457 | __field( __u16, cr_read ) | ||
| 458 | __field( __u16, cr_write ) | ||
| 459 | __field( __u32, exceptions ) | ||
| 460 | __field( __u64, intercept ) | ||
| 461 | ), | ||
| 462 | |||
| 463 | TP_fast_assign( | ||
| 464 | __entry->cr_read = cr_read; | ||
| 465 | __entry->cr_write = cr_write; | ||
| 466 | __entry->exceptions = exceptions; | ||
| 467 | __entry->intercept = intercept; | ||
| 468 | ), | ||
| 469 | |||
| 470 | TP_printk("cr_read: %04x cr_write: %04x excp: %08x intercept: %016llx", | ||
| 471 | __entry->cr_read, __entry->cr_write, __entry->exceptions, | ||
| 472 | __entry->intercept) | ||
| 473 | ); | ||
| 422 | /* | 474 | /* |
| 423 | * Tracepoint for #VMEXIT while nested | 475 | * Tracepoint for #VMEXIT while nested |
| 424 | */ | 476 | */ |
| @@ -447,7 +499,7 @@ TRACE_EVENT(kvm_nested_vmexit, | |||
| 447 | __entry->exit_int_info_err = exit_int_info_err; | 499 | __entry->exit_int_info_err = exit_int_info_err; |
| 448 | ), | 500 | ), |
| 449 | TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " | 501 | TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " |
| 450 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", | 502 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", |
| 451 | __entry->rip, | 503 | __entry->rip, |
| 452 | ftrace_print_symbols_seq(p, __entry->exit_code, | 504 | ftrace_print_symbols_seq(p, __entry->exit_code, |
| 453 | kvm_x86_ops->exit_reasons_str), | 505 | kvm_x86_ops->exit_reasons_str), |
| @@ -482,7 +534,7 @@ TRACE_EVENT(kvm_nested_vmexit_inject, | |||
| 482 | ), | 534 | ), |
| 483 | 535 | ||
| 484 | TP_printk("reason: %s ext_inf1: 0x%016llx " | 536 | TP_printk("reason: %s ext_inf1: 0x%016llx " |
| 485 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", | 537 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", |
| 486 | ftrace_print_symbols_seq(p, __entry->exit_code, | 538 | ftrace_print_symbols_seq(p, __entry->exit_code, |
| 487 | kvm_x86_ops->exit_reasons_str), | 539 | kvm_x86_ops->exit_reasons_str), |
| 488 | __entry->exit_info1, __entry->exit_info2, | 540 | __entry->exit_info1, __entry->exit_info2, |
| @@ -504,7 +556,7 @@ TRACE_EVENT(kvm_nested_intr_vmexit, | |||
| 504 | __entry->rip = rip | 556 | __entry->rip = rip |
| 505 | ), | 557 | ), |
| 506 | 558 | ||
| 507 | TP_printk("rip: 0x%016llx\n", __entry->rip) | 559 | TP_printk("rip: 0x%016llx", __entry->rip) |
| 508 | ); | 560 | ); |
| 509 | 561 | ||
| 510 | /* | 562 | /* |
| @@ -526,7 +578,7 @@ TRACE_EVENT(kvm_invlpga, | |||
| 526 | __entry->address = address; | 578 | __entry->address = address; |
| 527 | ), | 579 | ), |
| 528 | 580 | ||
| 529 | TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx\n", | 581 | TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx", |
| 530 | __entry->rip, __entry->asid, __entry->address) | 582 | __entry->rip, __entry->asid, __entry->address) |
| 531 | ); | 583 | ); |
| 532 | 584 | ||
| @@ -547,11 +599,102 @@ TRACE_EVENT(kvm_skinit, | |||
| 547 | __entry->slb = slb; | 599 | __entry->slb = slb; |
| 548 | ), | 600 | ), |
| 549 | 601 | ||
| 550 | TP_printk("rip: 0x%016llx slb: 0x%08x\n", | 602 | TP_printk("rip: 0x%016llx slb: 0x%08x", |
| 551 | __entry->rip, __entry->slb) | 603 | __entry->rip, __entry->slb) |
| 552 | ); | 604 | ); |
| 553 | 605 | ||
| 606 | #define __print_insn(insn, ilen) ({ \ | ||
| 607 | int i; \ | ||
| 608 | const char *ret = p->buffer + p->len; \ | ||
| 609 | \ | ||
| 610 | for (i = 0; i < ilen; ++i) \ | ||
| 611 | trace_seq_printf(p, " %02x", insn[i]); \ | ||
| 612 | trace_seq_printf(p, "%c", 0); \ | ||
| 613 | ret; \ | ||
| 614 | }) | ||
| 615 | |||
| 616 | #define KVM_EMUL_INSN_F_CR0_PE (1 << 0) | ||
| 617 | #define KVM_EMUL_INSN_F_EFL_VM (1 << 1) | ||
| 618 | #define KVM_EMUL_INSN_F_CS_D (1 << 2) | ||
| 619 | #define KVM_EMUL_INSN_F_CS_L (1 << 3) | ||
| 620 | |||
| 621 | #define kvm_trace_symbol_emul_flags \ | ||
| 622 | { 0, "real" }, \ | ||
| 623 | { KVM_EMUL_INSN_F_CR0_PE \ | ||
| 624 | | KVM_EMUL_INSN_F_EFL_VM, "vm16" }, \ | ||
| 625 | { KVM_EMUL_INSN_F_CR0_PE, "prot16" }, \ | ||
| 626 | { KVM_EMUL_INSN_F_CR0_PE \ | ||
| 627 | | KVM_EMUL_INSN_F_CS_D, "prot32" }, \ | ||
| 628 | { KVM_EMUL_INSN_F_CR0_PE \ | ||
| 629 | | KVM_EMUL_INSN_F_CS_L, "prot64" } | ||
| 630 | |||
| 631 | #define kei_decode_mode(mode) ({ \ | ||
| 632 | u8 flags = 0xff; \ | ||
| 633 | switch (mode) { \ | ||
| 634 | case X86EMUL_MODE_REAL: \ | ||
| 635 | flags = 0; \ | ||
| 636 | break; \ | ||
| 637 | case X86EMUL_MODE_VM86: \ | ||
| 638 | flags = KVM_EMUL_INSN_F_EFL_VM; \ | ||
| 639 | break; \ | ||
| 640 | case X86EMUL_MODE_PROT16: \ | ||
| 641 | flags = KVM_EMUL_INSN_F_CR0_PE; \ | ||
| 642 | break; \ | ||
| 643 | case X86EMUL_MODE_PROT32: \ | ||
| 644 | flags = KVM_EMUL_INSN_F_CR0_PE \ | ||
| 645 | | KVM_EMUL_INSN_F_CS_D; \ | ||
| 646 | break; \ | ||
| 647 | case X86EMUL_MODE_PROT64: \ | ||
| 648 | flags = KVM_EMUL_INSN_F_CR0_PE \ | ||
| 649 | | KVM_EMUL_INSN_F_CS_L; \ | ||
| 650 | break; \ | ||
| 651 | } \ | ||
| 652 | flags; \ | ||
| 653 | }) | ||
| 654 | |||
| 655 | TRACE_EVENT(kvm_emulate_insn, | ||
| 656 | TP_PROTO(struct kvm_vcpu *vcpu, __u8 failed), | ||
| 657 | TP_ARGS(vcpu, failed), | ||
| 658 | |||
| 659 | TP_STRUCT__entry( | ||
| 660 | __field( __u64, rip ) | ||
| 661 | __field( __u32, csbase ) | ||
| 662 | __field( __u8, len ) | ||
| 663 | __array( __u8, insn, 15 ) | ||
| 664 | __field( __u8, flags ) | ||
| 665 | __field( __u8, failed ) | ||
| 666 | ), | ||
| 667 | |||
| 668 | TP_fast_assign( | ||
| 669 | __entry->rip = vcpu->arch.emulate_ctxt.decode.fetch.start; | ||
| 670 | __entry->csbase = kvm_x86_ops->get_segment_base(vcpu, VCPU_SREG_CS); | ||
| 671 | __entry->len = vcpu->arch.emulate_ctxt.decode.eip | ||
| 672 | - vcpu->arch.emulate_ctxt.decode.fetch.start; | ||
| 673 | memcpy(__entry->insn, | ||
| 674 | vcpu->arch.emulate_ctxt.decode.fetch.data, | ||
| 675 | 15); | ||
| 676 | __entry->flags = kei_decode_mode(vcpu->arch.emulate_ctxt.mode); | ||
| 677 | __entry->failed = failed; | ||
| 678 | ), | ||
| 679 | |||
| 680 | TP_printk("%x:%llx:%s (%s)%s", | ||
| 681 | __entry->csbase, __entry->rip, | ||
| 682 | __print_insn(__entry->insn, __entry->len), | ||
| 683 | __print_symbolic(__entry->flags, | ||
| 684 | kvm_trace_symbol_emul_flags), | ||
| 685 | __entry->failed ? " failed" : "" | ||
| 686 | ) | ||
| 687 | ); | ||
| 688 | |||
| 689 | #define trace_kvm_emulate_insn_start(vcpu) trace_kvm_emulate_insn(vcpu, 0) | ||
| 690 | #define trace_kvm_emulate_insn_failed(vcpu) trace_kvm_emulate_insn(vcpu, 1) | ||
| 691 | |||
| 554 | #endif /* _TRACE_KVM_H */ | 692 | #endif /* _TRACE_KVM_H */ |
| 555 | 693 | ||
| 694 | #undef TRACE_INCLUDE_PATH | ||
| 695 | #define TRACE_INCLUDE_PATH arch/x86/kvm | ||
| 696 | #undef TRACE_INCLUDE_FILE | ||
| 697 | #define TRACE_INCLUDE_FILE trace | ||
| 698 | |||
| 556 | /* This part must be outside protection */ | 699 | /* This part must be outside protection */ |
| 557 | #include <trace/define_trace.h> | 700 | #include <trace/define_trace.h> |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index edca080407a5..859a01a07dbf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include <linux/moduleparam.h> | 27 | #include <linux/moduleparam.h> |
| 28 | #include <linux/ftrace_event.h> | 28 | #include <linux/ftrace_event.h> |
| 29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
| 30 | #include <linux/tboot.h> | ||
| 30 | #include "kvm_cache_regs.h" | 31 | #include "kvm_cache_regs.h" |
| 31 | #include "x86.h" | 32 | #include "x86.h" |
| 32 | 33 | ||
| @@ -98,6 +99,8 @@ module_param(ple_gap, int, S_IRUGO); | |||
| 98 | static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; | 99 | static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; |
| 99 | module_param(ple_window, int, S_IRUGO); | 100 | module_param(ple_window, int, S_IRUGO); |
| 100 | 101 | ||
| 102 | #define NR_AUTOLOAD_MSRS 1 | ||
| 103 | |||
| 101 | struct vmcs { | 104 | struct vmcs { |
| 102 | u32 revision_id; | 105 | u32 revision_id; |
| 103 | u32 abort; | 106 | u32 abort; |
| @@ -125,6 +128,11 @@ struct vcpu_vmx { | |||
| 125 | u64 msr_guest_kernel_gs_base; | 128 | u64 msr_guest_kernel_gs_base; |
| 126 | #endif | 129 | #endif |
| 127 | struct vmcs *vmcs; | 130 | struct vmcs *vmcs; |
| 131 | struct msr_autoload { | ||
| 132 | unsigned nr; | ||
| 133 | struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS]; | ||
| 134 | struct vmx_msr_entry host[NR_AUTOLOAD_MSRS]; | ||
| 135 | } msr_autoload; | ||
| 128 | struct { | 136 | struct { |
| 129 | int loaded; | 137 | int loaded; |
| 130 | u16 fs_sel, gs_sel, ldt_sel; | 138 | u16 fs_sel, gs_sel, ldt_sel; |
| @@ -234,56 +242,56 @@ static const u32 vmx_msr_index[] = { | |||
| 234 | }; | 242 | }; |
| 235 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) | 243 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) |
| 236 | 244 | ||
| 237 | static inline int is_page_fault(u32 intr_info) | 245 | static inline bool is_page_fault(u32 intr_info) |
| 238 | { | 246 | { |
| 239 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 247 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
| 240 | INTR_INFO_VALID_MASK)) == | 248 | INTR_INFO_VALID_MASK)) == |
| 241 | (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); | 249 | (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); |
| 242 | } | 250 | } |
| 243 | 251 | ||
| 244 | static inline int is_no_device(u32 intr_info) | 252 | static inline bool is_no_device(u32 intr_info) |
| 245 | { | 253 | { |
| 246 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 254 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
| 247 | INTR_INFO_VALID_MASK)) == | 255 | INTR_INFO_VALID_MASK)) == |
| 248 | (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); | 256 | (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); |
| 249 | } | 257 | } |
| 250 | 258 | ||
| 251 | static inline int is_invalid_opcode(u32 intr_info) | 259 | static inline bool is_invalid_opcode(u32 intr_info) |
| 252 | { | 260 | { |
| 253 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 261 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
| 254 | INTR_INFO_VALID_MASK)) == | 262 | INTR_INFO_VALID_MASK)) == |
| 255 | (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); | 263 | (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); |
| 256 | } | 264 | } |
| 257 | 265 | ||
| 258 | static inline int is_external_interrupt(u32 intr_info) | 266 | static inline bool is_external_interrupt(u32 intr_info) |
| 259 | { | 267 | { |
| 260 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) | 268 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) |
| 261 | == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); | 269 | == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); |
| 262 | } | 270 | } |
| 263 | 271 | ||
| 264 | static inline int is_machine_check(u32 intr_info) | 272 | static inline bool is_machine_check(u32 intr_info) |
| 265 | { | 273 | { |
| 266 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 274 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
| 267 | INTR_INFO_VALID_MASK)) == | 275 | INTR_INFO_VALID_MASK)) == |
| 268 | (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); | 276 | (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); |
| 269 | } | 277 | } |
| 270 | 278 | ||
| 271 | static inline int cpu_has_vmx_msr_bitmap(void) | 279 | static inline bool cpu_has_vmx_msr_bitmap(void) |
| 272 | { | 280 | { |
| 273 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; | 281 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; |
| 274 | } | 282 | } |
| 275 | 283 | ||
| 276 | static inline int cpu_has_vmx_tpr_shadow(void) | 284 | static inline bool cpu_has_vmx_tpr_shadow(void) |
| 277 | { | 285 | { |
| 278 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; | 286 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; |
| 279 | } | 287 | } |
| 280 | 288 | ||
| 281 | static inline int vm_need_tpr_shadow(struct kvm *kvm) | 289 | static inline bool vm_need_tpr_shadow(struct kvm *kvm) |
| 282 | { | 290 | { |
| 283 | return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); | 291 | return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); |
| 284 | } | 292 | } |
| 285 | 293 | ||
| 286 | static inline int cpu_has_secondary_exec_ctrls(void) | 294 | static inline bool cpu_has_secondary_exec_ctrls(void) |
| 287 | { | 295 | { |
| 288 | return vmcs_config.cpu_based_exec_ctrl & | 296 | return vmcs_config.cpu_based_exec_ctrl & |
| 289 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | 297 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
| @@ -303,80 +311,80 @@ static inline bool cpu_has_vmx_flexpriority(void) | |||
| 303 | 311 | ||
| 304 | static inline bool cpu_has_vmx_ept_execute_only(void) | 312 | static inline bool cpu_has_vmx_ept_execute_only(void) |
| 305 | { | 313 | { |
| 306 | return !!(vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT); | 314 | return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT; |
| 307 | } | 315 | } |
| 308 | 316 | ||
| 309 | static inline bool cpu_has_vmx_eptp_uncacheable(void) | 317 | static inline bool cpu_has_vmx_eptp_uncacheable(void) |
| 310 | { | 318 | { |
| 311 | return !!(vmx_capability.ept & VMX_EPTP_UC_BIT); | 319 | return vmx_capability.ept & VMX_EPTP_UC_BIT; |
| 312 | } | 320 | } |
| 313 | 321 | ||
| 314 | static inline bool cpu_has_vmx_eptp_writeback(void) | 322 | static inline bool cpu_has_vmx_eptp_writeback(void) |
| 315 | { | 323 | { |
| 316 | return !!(vmx_capability.ept & VMX_EPTP_WB_BIT); | 324 | return vmx_capability.ept & VMX_EPTP_WB_BIT; |
| 317 | } | 325 | } |
| 318 | 326 | ||
| 319 | static inline bool cpu_has_vmx_ept_2m_page(void) | 327 | static inline bool cpu_has_vmx_ept_2m_page(void) |
| 320 | { | 328 | { |
| 321 | return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT); | 329 | return vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT; |
| 322 | } | 330 | } |
| 323 | 331 | ||
| 324 | static inline bool cpu_has_vmx_ept_1g_page(void) | 332 | static inline bool cpu_has_vmx_ept_1g_page(void) |
| 325 | { | 333 | { |
| 326 | return !!(vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT); | 334 | return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT; |
| 327 | } | 335 | } |
| 328 | 336 | ||
| 329 | static inline int cpu_has_vmx_invept_individual_addr(void) | 337 | static inline bool cpu_has_vmx_invept_individual_addr(void) |
| 330 | { | 338 | { |
| 331 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); | 339 | return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT; |
| 332 | } | 340 | } |
| 333 | 341 | ||
| 334 | static inline int cpu_has_vmx_invept_context(void) | 342 | static inline bool cpu_has_vmx_invept_context(void) |
| 335 | { | 343 | { |
| 336 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT); | 344 | return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT; |
| 337 | } | 345 | } |
| 338 | 346 | ||
| 339 | static inline int cpu_has_vmx_invept_global(void) | 347 | static inline bool cpu_has_vmx_invept_global(void) |
| 340 | { | 348 | { |
| 341 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT); | 349 | return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT; |
| 342 | } | 350 | } |
| 343 | 351 | ||
| 344 | static inline int cpu_has_vmx_ept(void) | 352 | static inline bool cpu_has_vmx_ept(void) |
| 345 | { | 353 | { |
| 346 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 354 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
| 347 | SECONDARY_EXEC_ENABLE_EPT; | 355 | SECONDARY_EXEC_ENABLE_EPT; |
| 348 | } | 356 | } |
| 349 | 357 | ||
| 350 | static inline int cpu_has_vmx_unrestricted_guest(void) | 358 | static inline bool cpu_has_vmx_unrestricted_guest(void) |
| 351 | { | 359 | { |
| 352 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 360 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
| 353 | SECONDARY_EXEC_UNRESTRICTED_GUEST; | 361 | SECONDARY_EXEC_UNRESTRICTED_GUEST; |
| 354 | } | 362 | } |
| 355 | 363 | ||
| 356 | static inline int cpu_has_vmx_ple(void) | 364 | static inline bool cpu_has_vmx_ple(void) |
| 357 | { | 365 | { |
| 358 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 366 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
| 359 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; | 367 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; |
| 360 | } | 368 | } |
| 361 | 369 | ||
| 362 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) | 370 | static inline bool vm_need_virtualize_apic_accesses(struct kvm *kvm) |
| 363 | { | 371 | { |
| 364 | return flexpriority_enabled && irqchip_in_kernel(kvm); | 372 | return flexpriority_enabled && irqchip_in_kernel(kvm); |
| 365 | } | 373 | } |
| 366 | 374 | ||
| 367 | static inline int cpu_has_vmx_vpid(void) | 375 | static inline bool cpu_has_vmx_vpid(void) |
| 368 | { | 376 | { |
| 369 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 377 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
| 370 | SECONDARY_EXEC_ENABLE_VPID; | 378 | SECONDARY_EXEC_ENABLE_VPID; |
| 371 | } | 379 | } |
| 372 | 380 | ||
| 373 | static inline int cpu_has_vmx_rdtscp(void) | 381 | static inline bool cpu_has_vmx_rdtscp(void) |
| 374 | { | 382 | { |
| 375 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 383 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
| 376 | SECONDARY_EXEC_RDTSCP; | 384 | SECONDARY_EXEC_RDTSCP; |
| 377 | } | 385 | } |
| 378 | 386 | ||
| 379 | static inline int cpu_has_virtual_nmis(void) | 387 | static inline bool cpu_has_virtual_nmis(void) |
| 380 | { | 388 | { |
| 381 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; | 389 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; |
| 382 | } | 390 | } |
| @@ -595,16 +603,56 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
| 595 | vmcs_write32(EXCEPTION_BITMAP, eb); | 603 | vmcs_write32(EXCEPTION_BITMAP, eb); |
| 596 | } | 604 | } |
| 597 | 605 | ||
| 606 | static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) | ||
| 607 | { | ||
| 608 | unsigned i; | ||
| 609 | struct msr_autoload *m = &vmx->msr_autoload; | ||
| 610 | |||
| 611 | for (i = 0; i < m->nr; ++i) | ||
| 612 | if (m->guest[i].index == msr) | ||
| 613 | break; | ||
| 614 | |||
| 615 | if (i == m->nr) | ||
| 616 | return; | ||
| 617 | --m->nr; | ||
| 618 | m->guest[i] = m->guest[m->nr]; | ||
| 619 | m->host[i] = m->host[m->nr]; | ||
| 620 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr); | ||
| 621 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); | ||
| 622 | } | ||
| 623 | |||
| 624 | static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, | ||
| 625 | u64 guest_val, u64 host_val) | ||
| 626 | { | ||
| 627 | unsigned i; | ||
| 628 | struct msr_autoload *m = &vmx->msr_autoload; | ||
| 629 | |||
| 630 | for (i = 0; i < m->nr; ++i) | ||
| 631 | if (m->guest[i].index == msr) | ||
| 632 | break; | ||
| 633 | |||
| 634 | if (i == m->nr) { | ||
| 635 | ++m->nr; | ||
| 636 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr); | ||
| 637 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); | ||
| 638 | } | ||
| 639 | |||
| 640 | m->guest[i].index = msr; | ||
| 641 | m->guest[i].value = guest_val; | ||
| 642 | m->host[i].index = msr; | ||
| 643 | m->host[i].value = host_val; | ||
| 644 | } | ||
| 645 | |||
| 598 | static void reload_tss(void) | 646 | static void reload_tss(void) |
| 599 | { | 647 | { |
| 600 | /* | 648 | /* |
| 601 | * VT restores TR but not its size. Useless. | 649 | * VT restores TR but not its size. Useless. |
| 602 | */ | 650 | */ |
| 603 | struct descriptor_table gdt; | 651 | struct desc_ptr gdt; |
| 604 | struct desc_struct *descs; | 652 | struct desc_struct *descs; |
| 605 | 653 | ||
| 606 | kvm_get_gdt(&gdt); | 654 | native_store_gdt(&gdt); |
| 607 | descs = (void *)gdt.base; | 655 | descs = (void *)gdt.address; |
| 608 | descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ | 656 | descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ |
| 609 | load_TR_desc(); | 657 | load_TR_desc(); |
| 610 | } | 658 | } |
| @@ -631,9 +679,57 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) | |||
| 631 | guest_efer |= host_efer & ignore_bits; | 679 | guest_efer |= host_efer & ignore_bits; |
| 632 | vmx->guest_msrs[efer_offset].data = guest_efer; | 680 | vmx->guest_msrs[efer_offset].data = guest_efer; |
| 633 | vmx->guest_msrs[efer_offset].mask = ~ignore_bits; | 681 | vmx->guest_msrs[efer_offset].mask = ~ignore_bits; |
| 682 | |||
| 683 | clear_atomic_switch_msr(vmx, MSR_EFER); | ||
| 684 | /* On ept, can't emulate nx, and must switch nx atomically */ | ||
| 685 | if (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX)) { | ||
| 686 | guest_efer = vmx->vcpu.arch.efer; | ||
| 687 | if (!(guest_efer & EFER_LMA)) | ||
| 688 | guest_efer &= ~EFER_LME; | ||
| 689 | add_atomic_switch_msr(vmx, MSR_EFER, guest_efer, host_efer); | ||
| 690 | return false; | ||
| 691 | } | ||
| 692 | |||
| 634 | return true; | 693 | return true; |
| 635 | } | 694 | } |
| 636 | 695 | ||
| 696 | static unsigned long segment_base(u16 selector) | ||
| 697 | { | ||
| 698 | struct desc_ptr gdt; | ||
| 699 | struct desc_struct *d; | ||
| 700 | unsigned long table_base; | ||
| 701 | unsigned long v; | ||
| 702 | |||
| 703 | if (!(selector & ~3)) | ||
| 704 | return 0; | ||
| 705 | |||
| 706 | native_store_gdt(&gdt); | ||
| 707 | table_base = gdt.address; | ||
| 708 | |||
| 709 | if (selector & 4) { /* from ldt */ | ||
| 710 | u16 ldt_selector = kvm_read_ldt(); | ||
| 711 | |||
| 712 | if (!(ldt_selector & ~3)) | ||
| 713 | return 0; | ||
| 714 | |||
| 715 | table_base = segment_base(ldt_selector); | ||
| 716 | } | ||
| 717 | d = (struct desc_struct *)(table_base + (selector & ~7)); | ||
| 718 | v = get_desc_base(d); | ||
| 719 | #ifdef CONFIG_X86_64 | ||
| 720 | if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) | ||
| 721 | v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32; | ||
| 722 | #endif | ||
| 723 | return v; | ||
| 724 | } | ||
| 725 | |||
| 726 | static inline unsigned long kvm_read_tr_base(void) | ||
| 727 | { | ||
| 728 | u16 tr; | ||
| 729 | asm("str %0" : "=g"(tr)); | ||
| 730 | return segment_base(tr); | ||
| 731 | } | ||
| 732 | |||
| 637 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) | 733 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) |
| 638 | { | 734 | { |
| 639 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 735 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| @@ -758,7 +854,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
| 758 | } | 854 | } |
| 759 | 855 | ||
| 760 | if (vcpu->cpu != cpu) { | 856 | if (vcpu->cpu != cpu) { |
| 761 | struct descriptor_table dt; | 857 | struct desc_ptr dt; |
| 762 | unsigned long sysenter_esp; | 858 | unsigned long sysenter_esp; |
| 763 | 859 | ||
| 764 | vcpu->cpu = cpu; | 860 | vcpu->cpu = cpu; |
| @@ -767,8 +863,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
| 767 | * processors. | 863 | * processors. |
| 768 | */ | 864 | */ |
| 769 | vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ | 865 | vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ |
| 770 | kvm_get_gdt(&dt); | 866 | native_store_gdt(&dt); |
| 771 | vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */ | 867 | vmcs_writel(HOST_GDTR_BASE, dt.address); /* 22.2.4 */ |
| 772 | 868 | ||
| 773 | rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); | 869 | rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); |
| 774 | vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ | 870 | vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ |
| @@ -846,9 +942,9 @@ static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | |||
| 846 | int ret = 0; | 942 | int ret = 0; |
| 847 | 943 | ||
| 848 | if (interruptibility & GUEST_INTR_STATE_STI) | 944 | if (interruptibility & GUEST_INTR_STATE_STI) |
| 849 | ret |= X86_SHADOW_INT_STI; | 945 | ret |= KVM_X86_SHADOW_INT_STI; |
| 850 | if (interruptibility & GUEST_INTR_STATE_MOV_SS) | 946 | if (interruptibility & GUEST_INTR_STATE_MOV_SS) |
| 851 | ret |= X86_SHADOW_INT_MOV_SS; | 947 | ret |= KVM_X86_SHADOW_INT_MOV_SS; |
| 852 | 948 | ||
| 853 | return ret & mask; | 949 | return ret & mask; |
| 854 | } | 950 | } |
| @@ -860,9 +956,9 @@ static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | |||
| 860 | 956 | ||
| 861 | interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); | 957 | interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); |
| 862 | 958 | ||
| 863 | if (mask & X86_SHADOW_INT_MOV_SS) | 959 | if (mask & KVM_X86_SHADOW_INT_MOV_SS) |
| 864 | interruptibility |= GUEST_INTR_STATE_MOV_SS; | 960 | interruptibility |= GUEST_INTR_STATE_MOV_SS; |
| 865 | if (mask & X86_SHADOW_INT_STI) | 961 | else if (mask & KVM_X86_SHADOW_INT_STI) |
| 866 | interruptibility |= GUEST_INTR_STATE_STI; | 962 | interruptibility |= GUEST_INTR_STATE_STI; |
| 867 | 963 | ||
| 868 | if ((interruptibility != interruptibility_old)) | 964 | if ((interruptibility != interruptibility_old)) |
| @@ -882,7 +978,8 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
| 882 | } | 978 | } |
| 883 | 979 | ||
| 884 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | 980 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, |
| 885 | bool has_error_code, u32 error_code) | 981 | bool has_error_code, u32 error_code, |
| 982 | bool reinject) | ||
| 886 | { | 983 | { |
| 887 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 984 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 888 | u32 intr_info = nr | INTR_INFO_VALID_MASK; | 985 | u32 intr_info = nr | INTR_INFO_VALID_MASK; |
| @@ -1176,9 +1273,16 @@ static __init int vmx_disabled_by_bios(void) | |||
| 1176 | u64 msr; | 1273 | u64 msr; |
| 1177 | 1274 | ||
| 1178 | rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); | 1275 | rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); |
| 1179 | return (msr & (FEATURE_CONTROL_LOCKED | | 1276 | if (msr & FEATURE_CONTROL_LOCKED) { |
| 1180 | FEATURE_CONTROL_VMXON_ENABLED)) | 1277 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) |
| 1181 | == FEATURE_CONTROL_LOCKED; | 1278 | && tboot_enabled()) |
| 1279 | return 1; | ||
| 1280 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) | ||
| 1281 | && !tboot_enabled()) | ||
| 1282 | return 1; | ||
| 1283 | } | ||
| 1284 | |||
| 1285 | return 0; | ||
| 1182 | /* locked but not enabled */ | 1286 | /* locked but not enabled */ |
| 1183 | } | 1287 | } |
| 1184 | 1288 | ||
| @@ -1186,21 +1290,23 @@ static int hardware_enable(void *garbage) | |||
| 1186 | { | 1290 | { |
| 1187 | int cpu = raw_smp_processor_id(); | 1291 | int cpu = raw_smp_processor_id(); |
| 1188 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); | 1292 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); |
| 1189 | u64 old; | 1293 | u64 old, test_bits; |
| 1190 | 1294 | ||
| 1191 | if (read_cr4() & X86_CR4_VMXE) | 1295 | if (read_cr4() & X86_CR4_VMXE) |
| 1192 | return -EBUSY; | 1296 | return -EBUSY; |
| 1193 | 1297 | ||
| 1194 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); | 1298 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); |
| 1195 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); | 1299 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); |
| 1196 | if ((old & (FEATURE_CONTROL_LOCKED | | 1300 | |
| 1197 | FEATURE_CONTROL_VMXON_ENABLED)) | 1301 | test_bits = FEATURE_CONTROL_LOCKED; |
| 1198 | != (FEATURE_CONTROL_LOCKED | | 1302 | test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; |
| 1199 | FEATURE_CONTROL_VMXON_ENABLED)) | 1303 | if (tboot_enabled()) |
| 1304 | test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX; | ||
| 1305 | |||
| 1306 | if ((old & test_bits) != test_bits) { | ||
| 1200 | /* enable and lock */ | 1307 | /* enable and lock */ |
| 1201 | wrmsrl(MSR_IA32_FEATURE_CONTROL, old | | 1308 | wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); |
| 1202 | FEATURE_CONTROL_LOCKED | | 1309 | } |
| 1203 | FEATURE_CONTROL_VMXON_ENABLED); | ||
| 1204 | write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ | 1310 | write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ |
| 1205 | asm volatile (ASM_VMX_VMXON_RAX | 1311 | asm volatile (ASM_VMX_VMXON_RAX |
| 1206 | : : "a"(&phys_addr), "m"(phys_addr) | 1312 | : : "a"(&phys_addr), "m"(phys_addr) |
| @@ -1521,7 +1627,7 @@ static gva_t rmode_tss_base(struct kvm *kvm) | |||
| 1521 | struct kvm_memslots *slots; | 1627 | struct kvm_memslots *slots; |
| 1522 | gfn_t base_gfn; | 1628 | gfn_t base_gfn; |
| 1523 | 1629 | ||
| 1524 | slots = rcu_dereference(kvm->memslots); | 1630 | slots = kvm_memslots(kvm); |
| 1525 | base_gfn = kvm->memslots->memslots[0].base_gfn + | 1631 | base_gfn = kvm->memslots->memslots[0].base_gfn + |
| 1526 | kvm->memslots->memslots[0].npages - 3; | 1632 | kvm->memslots->memslots[0].npages - 3; |
| 1527 | return base_gfn << PAGE_SHIFT; | 1633 | return base_gfn << PAGE_SHIFT; |
| @@ -1649,6 +1755,7 @@ static void exit_lmode(struct kvm_vcpu *vcpu) | |||
| 1649 | vmcs_write32(VM_ENTRY_CONTROLS, | 1755 | vmcs_write32(VM_ENTRY_CONTROLS, |
| 1650 | vmcs_read32(VM_ENTRY_CONTROLS) | 1756 | vmcs_read32(VM_ENTRY_CONTROLS) |
| 1651 | & ~VM_ENTRY_IA32E_MODE); | 1757 | & ~VM_ENTRY_IA32E_MODE); |
| 1758 | vmx_set_efer(vcpu, vcpu->arch.efer); | ||
| 1652 | } | 1759 | } |
| 1653 | 1760 | ||
| 1654 | #endif | 1761 | #endif |
| @@ -1934,28 +2041,28 @@ static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | |||
| 1934 | *l = (ar >> 13) & 1; | 2041 | *l = (ar >> 13) & 1; |
| 1935 | } | 2042 | } |
| 1936 | 2043 | ||
| 1937 | static void vmx_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 2044 | static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
| 1938 | { | 2045 | { |
| 1939 | dt->limit = vmcs_read32(GUEST_IDTR_LIMIT); | 2046 | dt->size = vmcs_read32(GUEST_IDTR_LIMIT); |
| 1940 | dt->base = vmcs_readl(GUEST_IDTR_BASE); | 2047 | dt->address = vmcs_readl(GUEST_IDTR_BASE); |
| 1941 | } | 2048 | } |
| 1942 | 2049 | ||
| 1943 | static void vmx_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 2050 | static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
| 1944 | { | 2051 | { |
| 1945 | vmcs_write32(GUEST_IDTR_LIMIT, dt->limit); | 2052 | vmcs_write32(GUEST_IDTR_LIMIT, dt->size); |
| 1946 | vmcs_writel(GUEST_IDTR_BASE, dt->base); | 2053 | vmcs_writel(GUEST_IDTR_BASE, dt->address); |
| 1947 | } | 2054 | } |
| 1948 | 2055 | ||
| 1949 | static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 2056 | static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
| 1950 | { | 2057 | { |
| 1951 | dt->limit = vmcs_read32(GUEST_GDTR_LIMIT); | 2058 | dt->size = vmcs_read32(GUEST_GDTR_LIMIT); |
| 1952 | dt->base = vmcs_readl(GUEST_GDTR_BASE); | 2059 | dt->address = vmcs_readl(GUEST_GDTR_BASE); |
| 1953 | } | 2060 | } |
| 1954 | 2061 | ||
| 1955 | static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 2062 | static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
| 1956 | { | 2063 | { |
| 1957 | vmcs_write32(GUEST_GDTR_LIMIT, dt->limit); | 2064 | vmcs_write32(GUEST_GDTR_LIMIT, dt->size); |
| 1958 | vmcs_writel(GUEST_GDTR_BASE, dt->base); | 2065 | vmcs_writel(GUEST_GDTR_BASE, dt->address); |
| 1959 | } | 2066 | } |
| 1960 | 2067 | ||
| 1961 | static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) | 2068 | static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) |
| @@ -2296,6 +2403,16 @@ static void allocate_vpid(struct vcpu_vmx *vmx) | |||
| 2296 | spin_unlock(&vmx_vpid_lock); | 2403 | spin_unlock(&vmx_vpid_lock); |
| 2297 | } | 2404 | } |
| 2298 | 2405 | ||
| 2406 | static void free_vpid(struct vcpu_vmx *vmx) | ||
| 2407 | { | ||
| 2408 | if (!enable_vpid) | ||
| 2409 | return; | ||
| 2410 | spin_lock(&vmx_vpid_lock); | ||
| 2411 | if (vmx->vpid != 0) | ||
| 2412 | __clear_bit(vmx->vpid, vmx_vpid_bitmap); | ||
| 2413 | spin_unlock(&vmx_vpid_lock); | ||
| 2414 | } | ||
| 2415 | |||
| 2299 | static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) | 2416 | static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) |
| 2300 | { | 2417 | { |
| 2301 | int f = sizeof(unsigned long); | 2418 | int f = sizeof(unsigned long); |
| @@ -2334,7 +2451,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
| 2334 | u32 junk; | 2451 | u32 junk; |
| 2335 | u64 host_pat, tsc_this, tsc_base; | 2452 | u64 host_pat, tsc_this, tsc_base; |
| 2336 | unsigned long a; | 2453 | unsigned long a; |
| 2337 | struct descriptor_table dt; | 2454 | struct desc_ptr dt; |
| 2338 | int i; | 2455 | int i; |
| 2339 | unsigned long kvm_vmx_return; | 2456 | unsigned long kvm_vmx_return; |
| 2340 | u32 exec_control; | 2457 | u32 exec_control; |
| @@ -2415,14 +2532,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
| 2415 | 2532 | ||
| 2416 | vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ | 2533 | vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ |
| 2417 | 2534 | ||
| 2418 | kvm_get_idt(&dt); | 2535 | native_store_idt(&dt); |
| 2419 | vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ | 2536 | vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ |
| 2420 | 2537 | ||
| 2421 | asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); | 2538 | asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); |
| 2422 | vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ | 2539 | vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ |
| 2423 | vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); | 2540 | vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); |
| 2424 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); | 2541 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); |
| 2542 | vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host)); | ||
| 2425 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); | 2543 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); |
| 2544 | vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest)); | ||
| 2426 | 2545 | ||
| 2427 | rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); | 2546 | rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); |
| 2428 | vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); | 2547 | vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); |
| @@ -2947,22 +3066,20 @@ static int handle_io(struct kvm_vcpu *vcpu) | |||
| 2947 | int size, in, string; | 3066 | int size, in, string; |
| 2948 | unsigned port; | 3067 | unsigned port; |
| 2949 | 3068 | ||
| 2950 | ++vcpu->stat.io_exits; | ||
| 2951 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3069 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
| 2952 | string = (exit_qualification & 16) != 0; | 3070 | string = (exit_qualification & 16) != 0; |
| 3071 | in = (exit_qualification & 8) != 0; | ||
| 2953 | 3072 | ||
| 2954 | if (string) { | 3073 | ++vcpu->stat.io_exits; |
| 2955 | if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO) | ||
| 2956 | return 0; | ||
| 2957 | return 1; | ||
| 2958 | } | ||
| 2959 | 3074 | ||
| 2960 | size = (exit_qualification & 7) + 1; | 3075 | if (string || in) |
| 2961 | in = (exit_qualification & 8) != 0; | 3076 | return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); |
| 2962 | port = exit_qualification >> 16; | ||
| 2963 | 3077 | ||
| 3078 | port = exit_qualification >> 16; | ||
| 3079 | size = (exit_qualification & 7) + 1; | ||
| 2964 | skip_emulated_instruction(vcpu); | 3080 | skip_emulated_instruction(vcpu); |
| 2965 | return kvm_emulate_pio(vcpu, in, size, port); | 3081 | |
| 3082 | return kvm_fast_pio_out(vcpu, size, port); | ||
| 2966 | } | 3083 | } |
| 2967 | 3084 | ||
| 2968 | static void | 3085 | static void |
| @@ -3053,19 +3170,9 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
| 3053 | return 0; | 3170 | return 0; |
| 3054 | } | 3171 | } |
| 3055 | 3172 | ||
| 3056 | static int check_dr_alias(struct kvm_vcpu *vcpu) | ||
| 3057 | { | ||
| 3058 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
| 3059 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
| 3060 | return -1; | ||
| 3061 | } | ||
| 3062 | return 0; | ||
| 3063 | } | ||
| 3064 | |||
| 3065 | static int handle_dr(struct kvm_vcpu *vcpu) | 3173 | static int handle_dr(struct kvm_vcpu *vcpu) |
| 3066 | { | 3174 | { |
| 3067 | unsigned long exit_qualification; | 3175 | unsigned long exit_qualification; |
| 3068 | unsigned long val; | ||
| 3069 | int dr, reg; | 3176 | int dr, reg; |
| 3070 | 3177 | ||
| 3071 | /* Do not handle if the CPL > 0, will trigger GP on re-entry */ | 3178 | /* Do not handle if the CPL > 0, will trigger GP on re-entry */ |
| @@ -3100,67 +3207,20 @@ static int handle_dr(struct kvm_vcpu *vcpu) | |||
| 3100 | dr = exit_qualification & DEBUG_REG_ACCESS_NUM; | 3207 | dr = exit_qualification & DEBUG_REG_ACCESS_NUM; |
| 3101 | reg = DEBUG_REG_ACCESS_REG(exit_qualification); | 3208 | reg = DEBUG_REG_ACCESS_REG(exit_qualification); |
| 3102 | if (exit_qualification & TYPE_MOV_FROM_DR) { | 3209 | if (exit_qualification & TYPE_MOV_FROM_DR) { |
| 3103 | switch (dr) { | 3210 | unsigned long val; |
| 3104 | case 0 ... 3: | 3211 | if (!kvm_get_dr(vcpu, dr, &val)) |
| 3105 | val = vcpu->arch.db[dr]; | 3212 | kvm_register_write(vcpu, reg, val); |
| 3106 | break; | 3213 | } else |
| 3107 | case 4: | 3214 | kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]); |
| 3108 | if (check_dr_alias(vcpu) < 0) | ||
| 3109 | return 1; | ||
| 3110 | /* fall through */ | ||
| 3111 | case 6: | ||
| 3112 | val = vcpu->arch.dr6; | ||
| 3113 | break; | ||
| 3114 | case 5: | ||
| 3115 | if (check_dr_alias(vcpu) < 0) | ||
| 3116 | return 1; | ||
| 3117 | /* fall through */ | ||
| 3118 | default: /* 7 */ | ||
| 3119 | val = vcpu->arch.dr7; | ||
| 3120 | break; | ||
| 3121 | } | ||
| 3122 | kvm_register_write(vcpu, reg, val); | ||
| 3123 | } else { | ||
| 3124 | val = vcpu->arch.regs[reg]; | ||
| 3125 | switch (dr) { | ||
| 3126 | case 0 ... 3: | ||
| 3127 | vcpu->arch.db[dr] = val; | ||
| 3128 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | ||
| 3129 | vcpu->arch.eff_db[dr] = val; | ||
| 3130 | break; | ||
| 3131 | case 4: | ||
| 3132 | if (check_dr_alias(vcpu) < 0) | ||
| 3133 | return 1; | ||
| 3134 | /* fall through */ | ||
| 3135 | case 6: | ||
| 3136 | if (val & 0xffffffff00000000ULL) { | ||
| 3137 | kvm_inject_gp(vcpu, 0); | ||
| 3138 | return 1; | ||
| 3139 | } | ||
| 3140 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; | ||
| 3141 | break; | ||
| 3142 | case 5: | ||
| 3143 | if (check_dr_alias(vcpu) < 0) | ||
| 3144 | return 1; | ||
| 3145 | /* fall through */ | ||
| 3146 | default: /* 7 */ | ||
| 3147 | if (val & 0xffffffff00000000ULL) { | ||
| 3148 | kvm_inject_gp(vcpu, 0); | ||
| 3149 | return 1; | ||
| 3150 | } | ||
| 3151 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; | ||
| 3152 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | ||
| 3153 | vmcs_writel(GUEST_DR7, vcpu->arch.dr7); | ||
| 3154 | vcpu->arch.switch_db_regs = | ||
| 3155 | (val & DR7_BP_EN_MASK); | ||
| 3156 | } | ||
| 3157 | break; | ||
| 3158 | } | ||
| 3159 | } | ||
| 3160 | skip_emulated_instruction(vcpu); | 3215 | skip_emulated_instruction(vcpu); |
| 3161 | return 1; | 3216 | return 1; |
| 3162 | } | 3217 | } |
| 3163 | 3218 | ||
| 3219 | static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) | ||
| 3220 | { | ||
| 3221 | vmcs_writel(GUEST_DR7, val); | ||
| 3222 | } | ||
| 3223 | |||
| 3164 | static int handle_cpuid(struct kvm_vcpu *vcpu) | 3224 | static int handle_cpuid(struct kvm_vcpu *vcpu) |
| 3165 | { | 3225 | { |
| 3166 | kvm_emulate_cpuid(vcpu); | 3226 | kvm_emulate_cpuid(vcpu); |
| @@ -3292,6 +3352,8 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
| 3292 | { | 3352 | { |
| 3293 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3353 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 3294 | unsigned long exit_qualification; | 3354 | unsigned long exit_qualification; |
| 3355 | bool has_error_code = false; | ||
| 3356 | u32 error_code = 0; | ||
| 3295 | u16 tss_selector; | 3357 | u16 tss_selector; |
| 3296 | int reason, type, idt_v; | 3358 | int reason, type, idt_v; |
| 3297 | 3359 | ||
| @@ -3314,6 +3376,13 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
| 3314 | kvm_clear_interrupt_queue(vcpu); | 3376 | kvm_clear_interrupt_queue(vcpu); |
| 3315 | break; | 3377 | break; |
| 3316 | case INTR_TYPE_HARD_EXCEPTION: | 3378 | case INTR_TYPE_HARD_EXCEPTION: |
| 3379 | if (vmx->idt_vectoring_info & | ||
| 3380 | VECTORING_INFO_DELIVER_CODE_MASK) { | ||
| 3381 | has_error_code = true; | ||
| 3382 | error_code = | ||
| 3383 | vmcs_read32(IDT_VECTORING_ERROR_CODE); | ||
| 3384 | } | ||
| 3385 | /* fall through */ | ||
| 3317 | case INTR_TYPE_SOFT_EXCEPTION: | 3386 | case INTR_TYPE_SOFT_EXCEPTION: |
| 3318 | kvm_clear_exception_queue(vcpu); | 3387 | kvm_clear_exception_queue(vcpu); |
| 3319 | break; | 3388 | break; |
| @@ -3328,8 +3397,13 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
| 3328 | type != INTR_TYPE_NMI_INTR)) | 3397 | type != INTR_TYPE_NMI_INTR)) |
| 3329 | skip_emulated_instruction(vcpu); | 3398 | skip_emulated_instruction(vcpu); |
| 3330 | 3399 | ||
| 3331 | if (!kvm_task_switch(vcpu, tss_selector, reason)) | 3400 | if (kvm_task_switch(vcpu, tss_selector, reason, |
| 3401 | has_error_code, error_code) == EMULATE_FAIL) { | ||
| 3402 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
| 3403 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
| 3404 | vcpu->run->internal.ndata = 0; | ||
| 3332 | return 0; | 3405 | return 0; |
| 3406 | } | ||
| 3333 | 3407 | ||
| 3334 | /* clear all local breakpoint enable flags */ | 3408 | /* clear all local breakpoint enable flags */ |
| 3335 | vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55); | 3409 | vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55); |
| @@ -3574,7 +3648,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
| 3574 | u32 exit_reason = vmx->exit_reason; | 3648 | u32 exit_reason = vmx->exit_reason; |
| 3575 | u32 vectoring_info = vmx->idt_vectoring_info; | 3649 | u32 vectoring_info = vmx->idt_vectoring_info; |
| 3576 | 3650 | ||
| 3577 | trace_kvm_exit(exit_reason, kvm_rip_read(vcpu)); | 3651 | trace_kvm_exit(exit_reason, vcpu); |
| 3578 | 3652 | ||
| 3579 | /* If guest state is invalid, start emulating */ | 3653 | /* If guest state is invalid, start emulating */ |
| 3580 | if (vmx->emulation_required && emulate_invalid_guest_state) | 3654 | if (vmx->emulation_required && emulate_invalid_guest_state) |
| @@ -3923,10 +3997,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | |||
| 3923 | { | 3997 | { |
| 3924 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3998 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 3925 | 3999 | ||
| 3926 | spin_lock(&vmx_vpid_lock); | 4000 | free_vpid(vmx); |
| 3927 | if (vmx->vpid != 0) | ||
| 3928 | __clear_bit(vmx->vpid, vmx_vpid_bitmap); | ||
| 3929 | spin_unlock(&vmx_vpid_lock); | ||
| 3930 | vmx_free_vmcs(vcpu); | 4001 | vmx_free_vmcs(vcpu); |
| 3931 | kfree(vmx->guest_msrs); | 4002 | kfree(vmx->guest_msrs); |
| 3932 | kvm_vcpu_uninit(vcpu); | 4003 | kvm_vcpu_uninit(vcpu); |
| @@ -3988,6 +4059,7 @@ free_msrs: | |||
| 3988 | uninit_vcpu: | 4059 | uninit_vcpu: |
| 3989 | kvm_vcpu_uninit(&vmx->vcpu); | 4060 | kvm_vcpu_uninit(&vmx->vcpu); |
| 3990 | free_vcpu: | 4061 | free_vcpu: |
| 4062 | free_vpid(vmx); | ||
| 3991 | kmem_cache_free(kvm_vcpu_cache, vmx); | 4063 | kmem_cache_free(kvm_vcpu_cache, vmx); |
| 3992 | return ERR_PTR(err); | 4064 | return ERR_PTR(err); |
| 3993 | } | 4065 | } |
| @@ -4118,6 +4190,10 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) | |||
| 4118 | } | 4190 | } |
| 4119 | } | 4191 | } |
| 4120 | 4192 | ||
| 4193 | static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | ||
| 4194 | { | ||
| 4195 | } | ||
| 4196 | |||
| 4121 | static struct kvm_x86_ops vmx_x86_ops = { | 4197 | static struct kvm_x86_ops vmx_x86_ops = { |
| 4122 | .cpu_has_kvm_support = cpu_has_kvm_support, | 4198 | .cpu_has_kvm_support = cpu_has_kvm_support, |
| 4123 | .disabled_by_bios = vmx_disabled_by_bios, | 4199 | .disabled_by_bios = vmx_disabled_by_bios, |
| @@ -4154,6 +4230,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
| 4154 | .set_idt = vmx_set_idt, | 4230 | .set_idt = vmx_set_idt, |
| 4155 | .get_gdt = vmx_get_gdt, | 4231 | .get_gdt = vmx_get_gdt, |
| 4156 | .set_gdt = vmx_set_gdt, | 4232 | .set_gdt = vmx_set_gdt, |
| 4233 | .set_dr7 = vmx_set_dr7, | ||
| 4157 | .cache_reg = vmx_cache_reg, | 4234 | .cache_reg = vmx_cache_reg, |
| 4158 | .get_rflags = vmx_get_rflags, | 4235 | .get_rflags = vmx_get_rflags, |
| 4159 | .set_rflags = vmx_set_rflags, | 4236 | .set_rflags = vmx_set_rflags, |
| @@ -4189,6 +4266,8 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
| 4189 | .cpuid_update = vmx_cpuid_update, | 4266 | .cpuid_update = vmx_cpuid_update, |
| 4190 | 4267 | ||
| 4191 | .rdtscp_supported = vmx_rdtscp_supported, | 4268 | .rdtscp_supported = vmx_rdtscp_supported, |
| 4269 | |||
| 4270 | .set_supported_cpuid = vmx_set_supported_cpuid, | ||
| 4192 | }; | 4271 | }; |
| 4193 | 4272 | ||
| 4194 | static int __init vmx_init(void) | 4273 | static int __init vmx_init(void) |
| @@ -4236,7 +4315,8 @@ static int __init vmx_init(void) | |||
| 4236 | 4315 | ||
| 4237 | set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ | 4316 | set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ |
| 4238 | 4317 | ||
| 4239 | r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE); | 4318 | r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), |
| 4319 | __alignof__(struct vcpu_vmx), THIS_MODULE); | ||
| 4240 | if (r) | 4320 | if (r) |
| 4241 | goto out3; | 4321 | goto out3; |
| 4242 | 4322 | ||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index dd9bc8fb81ab..05d571f6f196 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
| @@ -42,7 +42,7 @@ | |||
| 42 | #include <linux/slab.h> | 42 | #include <linux/slab.h> |
| 43 | #include <linux/perf_event.h> | 43 | #include <linux/perf_event.h> |
| 44 | #include <trace/events/kvm.h> | 44 | #include <trace/events/kvm.h> |
| 45 | #undef TRACE_INCLUDE_FILE | 45 | |
| 46 | #define CREATE_TRACE_POINTS | 46 | #define CREATE_TRACE_POINTS |
| 47 | #include "trace.h" | 47 | #include "trace.h" |
| 48 | 48 | ||
| @@ -224,34 +224,6 @@ static void drop_user_return_notifiers(void *ignore) | |||
| 224 | kvm_on_user_return(&smsr->urn); | 224 | kvm_on_user_return(&smsr->urn); |
| 225 | } | 225 | } |
| 226 | 226 | ||
| 227 | unsigned long segment_base(u16 selector) | ||
| 228 | { | ||
| 229 | struct descriptor_table gdt; | ||
| 230 | struct desc_struct *d; | ||
| 231 | unsigned long table_base; | ||
| 232 | unsigned long v; | ||
| 233 | |||
| 234 | if (selector == 0) | ||
| 235 | return 0; | ||
| 236 | |||
| 237 | kvm_get_gdt(&gdt); | ||
| 238 | table_base = gdt.base; | ||
| 239 | |||
| 240 | if (selector & 4) { /* from ldt */ | ||
| 241 | u16 ldt_selector = kvm_read_ldt(); | ||
| 242 | |||
| 243 | table_base = segment_base(ldt_selector); | ||
| 244 | } | ||
| 245 | d = (struct desc_struct *)(table_base + (selector & ~7)); | ||
| 246 | v = get_desc_base(d); | ||
| 247 | #ifdef CONFIG_X86_64 | ||
| 248 | if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) | ||
| 249 | v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32; | ||
| 250 | #endif | ||
| 251 | return v; | ||
| 252 | } | ||
| 253 | EXPORT_SYMBOL_GPL(segment_base); | ||
| 254 | |||
| 255 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) | 227 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) |
| 256 | { | 228 | { |
| 257 | if (irqchip_in_kernel(vcpu->kvm)) | 229 | if (irqchip_in_kernel(vcpu->kvm)) |
| @@ -293,7 +265,8 @@ static int exception_class(int vector) | |||
| 293 | } | 265 | } |
| 294 | 266 | ||
| 295 | static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | 267 | static void kvm_multiple_exception(struct kvm_vcpu *vcpu, |
| 296 | unsigned nr, bool has_error, u32 error_code) | 268 | unsigned nr, bool has_error, u32 error_code, |
| 269 | bool reinject) | ||
| 297 | { | 270 | { |
| 298 | u32 prev_nr; | 271 | u32 prev_nr; |
| 299 | int class1, class2; | 272 | int class1, class2; |
| @@ -304,6 +277,7 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | |||
| 304 | vcpu->arch.exception.has_error_code = has_error; | 277 | vcpu->arch.exception.has_error_code = has_error; |
| 305 | vcpu->arch.exception.nr = nr; | 278 | vcpu->arch.exception.nr = nr; |
| 306 | vcpu->arch.exception.error_code = error_code; | 279 | vcpu->arch.exception.error_code = error_code; |
| 280 | vcpu->arch.exception.reinject = reinject; | ||
| 307 | return; | 281 | return; |
| 308 | } | 282 | } |
| 309 | 283 | ||
| @@ -332,10 +306,16 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | |||
| 332 | 306 | ||
| 333 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) | 307 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) |
| 334 | { | 308 | { |
| 335 | kvm_multiple_exception(vcpu, nr, false, 0); | 309 | kvm_multiple_exception(vcpu, nr, false, 0, false); |
| 336 | } | 310 | } |
| 337 | EXPORT_SYMBOL_GPL(kvm_queue_exception); | 311 | EXPORT_SYMBOL_GPL(kvm_queue_exception); |
| 338 | 312 | ||
| 313 | void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr) | ||
| 314 | { | ||
| 315 | kvm_multiple_exception(vcpu, nr, false, 0, true); | ||
| 316 | } | ||
| 317 | EXPORT_SYMBOL_GPL(kvm_requeue_exception); | ||
| 318 | |||
| 339 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, | 319 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, |
| 340 | u32 error_code) | 320 | u32 error_code) |
| 341 | { | 321 | { |
| @@ -352,10 +332,16 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi); | |||
| 352 | 332 | ||
| 353 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) | 333 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) |
| 354 | { | 334 | { |
| 355 | kvm_multiple_exception(vcpu, nr, true, error_code); | 335 | kvm_multiple_exception(vcpu, nr, true, error_code, false); |
| 356 | } | 336 | } |
| 357 | EXPORT_SYMBOL_GPL(kvm_queue_exception_e); | 337 | EXPORT_SYMBOL_GPL(kvm_queue_exception_e); |
| 358 | 338 | ||
| 339 | void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) | ||
| 340 | { | ||
| 341 | kvm_multiple_exception(vcpu, nr, true, error_code, true); | ||
| 342 | } | ||
| 343 | EXPORT_SYMBOL_GPL(kvm_requeue_exception_e); | ||
| 344 | |||
| 359 | /* | 345 | /* |
| 360 | * Checks if cpl <= required_cpl; if true, return true. Otherwise queue | 346 | * Checks if cpl <= required_cpl; if true, return true. Otherwise queue |
| 361 | * a #GP and return false. | 347 | * a #GP and return false. |
| @@ -476,7 +462,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
| 476 | } | 462 | } |
| 477 | 463 | ||
| 478 | kvm_x86_ops->set_cr0(vcpu, cr0); | 464 | kvm_x86_ops->set_cr0(vcpu, cr0); |
| 479 | vcpu->arch.cr0 = cr0; | ||
| 480 | 465 | ||
| 481 | kvm_mmu_reset_context(vcpu); | 466 | kvm_mmu_reset_context(vcpu); |
| 482 | return; | 467 | return; |
| @@ -485,7 +470,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0); | |||
| 485 | 470 | ||
| 486 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) | 471 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) |
| 487 | { | 472 | { |
| 488 | kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0ful) | (msw & 0x0f)); | 473 | kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f)); |
| 489 | } | 474 | } |
| 490 | EXPORT_SYMBOL_GPL(kvm_lmsw); | 475 | EXPORT_SYMBOL_GPL(kvm_lmsw); |
| 491 | 476 | ||
| @@ -517,7 +502,6 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
| 517 | } | 502 | } |
| 518 | kvm_x86_ops->set_cr4(vcpu, cr4); | 503 | kvm_x86_ops->set_cr4(vcpu, cr4); |
| 519 | vcpu->arch.cr4 = cr4; | 504 | vcpu->arch.cr4 = cr4; |
| 520 | vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled; | ||
| 521 | kvm_mmu_reset_context(vcpu); | 505 | kvm_mmu_reset_context(vcpu); |
| 522 | } | 506 | } |
| 523 | EXPORT_SYMBOL_GPL(kvm_set_cr4); | 507 | EXPORT_SYMBOL_GPL(kvm_set_cr4); |
| @@ -592,6 +576,80 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) | |||
| 592 | } | 576 | } |
| 593 | EXPORT_SYMBOL_GPL(kvm_get_cr8); | 577 | EXPORT_SYMBOL_GPL(kvm_get_cr8); |
| 594 | 578 | ||
| 579 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | ||
| 580 | { | ||
| 581 | switch (dr) { | ||
| 582 | case 0 ... 3: | ||
| 583 | vcpu->arch.db[dr] = val; | ||
| 584 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | ||
| 585 | vcpu->arch.eff_db[dr] = val; | ||
| 586 | break; | ||
| 587 | case 4: | ||
| 588 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
| 589 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
| 590 | return 1; | ||
| 591 | } | ||
| 592 | /* fall through */ | ||
| 593 | case 6: | ||
| 594 | if (val & 0xffffffff00000000ULL) { | ||
| 595 | kvm_inject_gp(vcpu, 0); | ||
| 596 | return 1; | ||
| 597 | } | ||
| 598 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; | ||
| 599 | break; | ||
| 600 | case 5: | ||
| 601 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
| 602 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
| 603 | return 1; | ||
| 604 | } | ||
| 605 | /* fall through */ | ||
| 606 | default: /* 7 */ | ||
| 607 | if (val & 0xffffffff00000000ULL) { | ||
| 608 | kvm_inject_gp(vcpu, 0); | ||
| 609 | return 1; | ||
| 610 | } | ||
| 611 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; | ||
| 612 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | ||
| 613 | kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7); | ||
| 614 | vcpu->arch.switch_db_regs = (val & DR7_BP_EN_MASK); | ||
| 615 | } | ||
| 616 | break; | ||
| 617 | } | ||
| 618 | |||
| 619 | return 0; | ||
| 620 | } | ||
| 621 | EXPORT_SYMBOL_GPL(kvm_set_dr); | ||
| 622 | |||
| 623 | int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) | ||
| 624 | { | ||
| 625 | switch (dr) { | ||
| 626 | case 0 ... 3: | ||
| 627 | *val = vcpu->arch.db[dr]; | ||
| 628 | break; | ||
| 629 | case 4: | ||
| 630 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
| 631 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
| 632 | return 1; | ||
| 633 | } | ||
| 634 | /* fall through */ | ||
| 635 | case 6: | ||
| 636 | *val = vcpu->arch.dr6; | ||
| 637 | break; | ||
| 638 | case 5: | ||
| 639 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
| 640 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
| 641 | return 1; | ||
| 642 | } | ||
| 643 | /* fall through */ | ||
| 644 | default: /* 7 */ | ||
| 645 | *val = vcpu->arch.dr7; | ||
| 646 | break; | ||
| 647 | } | ||
| 648 | |||
| 649 | return 0; | ||
| 650 | } | ||
| 651 | EXPORT_SYMBOL_GPL(kvm_get_dr); | ||
| 652 | |||
| 595 | static inline u32 bit(int bitno) | 653 | static inline u32 bit(int bitno) |
| 596 | { | 654 | { |
| 597 | return 1 << (bitno & 31); | 655 | return 1 << (bitno & 31); |
| @@ -606,9 +664,10 @@ static inline u32 bit(int bitno) | |||
| 606 | * kvm-specific. Those are put in the beginning of the list. | 664 | * kvm-specific. Those are put in the beginning of the list. |
| 607 | */ | 665 | */ |
| 608 | 666 | ||
| 609 | #define KVM_SAVE_MSRS_BEGIN 5 | 667 | #define KVM_SAVE_MSRS_BEGIN 7 |
| 610 | static u32 msrs_to_save[] = { | 668 | static u32 msrs_to_save[] = { |
| 611 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | 669 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, |
| 670 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, | ||
| 612 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | 671 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, |
| 613 | HV_X64_MSR_APIC_ASSIST_PAGE, | 672 | HV_X64_MSR_APIC_ASSIST_PAGE, |
| 614 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 673 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
| @@ -625,48 +684,42 @@ static u32 emulated_msrs[] = { | |||
| 625 | MSR_IA32_MISC_ENABLE, | 684 | MSR_IA32_MISC_ENABLE, |
| 626 | }; | 685 | }; |
| 627 | 686 | ||
| 628 | static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | 687 | static int set_efer(struct kvm_vcpu *vcpu, u64 efer) |
| 629 | { | 688 | { |
| 630 | if (efer & efer_reserved_bits) { | 689 | if (efer & efer_reserved_bits) |
| 631 | kvm_inject_gp(vcpu, 0); | 690 | return 1; |
| 632 | return; | ||
| 633 | } | ||
| 634 | 691 | ||
| 635 | if (is_paging(vcpu) | 692 | if (is_paging(vcpu) |
| 636 | && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { | 693 | && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) |
| 637 | kvm_inject_gp(vcpu, 0); | 694 | return 1; |
| 638 | return; | ||
| 639 | } | ||
| 640 | 695 | ||
| 641 | if (efer & EFER_FFXSR) { | 696 | if (efer & EFER_FFXSR) { |
| 642 | struct kvm_cpuid_entry2 *feat; | 697 | struct kvm_cpuid_entry2 *feat; |
| 643 | 698 | ||
| 644 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 699 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); |
| 645 | if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { | 700 | if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) |
| 646 | kvm_inject_gp(vcpu, 0); | 701 | return 1; |
| 647 | return; | ||
| 648 | } | ||
| 649 | } | 702 | } |
| 650 | 703 | ||
| 651 | if (efer & EFER_SVME) { | 704 | if (efer & EFER_SVME) { |
| 652 | struct kvm_cpuid_entry2 *feat; | 705 | struct kvm_cpuid_entry2 *feat; |
| 653 | 706 | ||
| 654 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 707 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); |
| 655 | if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { | 708 | if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) |
| 656 | kvm_inject_gp(vcpu, 0); | 709 | return 1; |
| 657 | return; | ||
| 658 | } | ||
| 659 | } | 710 | } |
| 660 | 711 | ||
| 661 | kvm_x86_ops->set_efer(vcpu, efer); | ||
| 662 | |||
| 663 | efer &= ~EFER_LMA; | 712 | efer &= ~EFER_LMA; |
| 664 | efer |= vcpu->arch.efer & EFER_LMA; | 713 | efer |= vcpu->arch.efer & EFER_LMA; |
| 665 | 714 | ||
| 715 | kvm_x86_ops->set_efer(vcpu, efer); | ||
| 716 | |||
| 666 | vcpu->arch.efer = efer; | 717 | vcpu->arch.efer = efer; |
| 667 | 718 | ||
| 668 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; | 719 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; |
| 669 | kvm_mmu_reset_context(vcpu); | 720 | kvm_mmu_reset_context(vcpu); |
| 721 | |||
| 722 | return 0; | ||
| 670 | } | 723 | } |
| 671 | 724 | ||
| 672 | void kvm_enable_efer_bits(u64 mask) | 725 | void kvm_enable_efer_bits(u64 mask) |
| @@ -696,14 +749,22 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) | |||
| 696 | 749 | ||
| 697 | static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) | 750 | static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) |
| 698 | { | 751 | { |
| 699 | static int version; | 752 | int version; |
| 753 | int r; | ||
| 700 | struct pvclock_wall_clock wc; | 754 | struct pvclock_wall_clock wc; |
| 701 | struct timespec boot; | 755 | struct timespec boot; |
| 702 | 756 | ||
| 703 | if (!wall_clock) | 757 | if (!wall_clock) |
| 704 | return; | 758 | return; |
| 705 | 759 | ||
| 706 | version++; | 760 | r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version)); |
| 761 | if (r) | ||
| 762 | return; | ||
| 763 | |||
| 764 | if (version & 1) | ||
| 765 | ++version; /* first time write, random junk */ | ||
| 766 | |||
| 767 | ++version; | ||
| 707 | 768 | ||
| 708 | kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); | 769 | kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); |
| 709 | 770 | ||
| @@ -796,6 +857,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) | |||
| 796 | vcpu->hv_clock.system_time = ts.tv_nsec + | 857 | vcpu->hv_clock.system_time = ts.tv_nsec + |
| 797 | (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; | 858 | (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; |
| 798 | 859 | ||
| 860 | vcpu->hv_clock.flags = 0; | ||
| 861 | |||
| 799 | /* | 862 | /* |
| 800 | * The interface expects us to write an even number signaling that the | 863 | * The interface expects us to write an even number signaling that the |
| 801 | * update is finished. Since the guest won't see the intermediate | 864 | * update is finished. Since the guest won't see the intermediate |
| @@ -1087,10 +1150,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
| 1087 | { | 1150 | { |
| 1088 | switch (msr) { | 1151 | switch (msr) { |
| 1089 | case MSR_EFER: | 1152 | case MSR_EFER: |
| 1090 | set_efer(vcpu, data); | 1153 | return set_efer(vcpu, data); |
| 1091 | break; | ||
| 1092 | case MSR_K7_HWCR: | 1154 | case MSR_K7_HWCR: |
| 1093 | data &= ~(u64)0x40; /* ignore flush filter disable */ | 1155 | data &= ~(u64)0x40; /* ignore flush filter disable */ |
| 1156 | data &= ~(u64)0x100; /* ignore ignne emulation enable */ | ||
| 1094 | if (data != 0) { | 1157 | if (data != 0) { |
| 1095 | pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", | 1158 | pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", |
| 1096 | data); | 1159 | data); |
| @@ -1133,10 +1196,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
| 1133 | case MSR_IA32_MISC_ENABLE: | 1196 | case MSR_IA32_MISC_ENABLE: |
| 1134 | vcpu->arch.ia32_misc_enable_msr = data; | 1197 | vcpu->arch.ia32_misc_enable_msr = data; |
| 1135 | break; | 1198 | break; |
| 1199 | case MSR_KVM_WALL_CLOCK_NEW: | ||
| 1136 | case MSR_KVM_WALL_CLOCK: | 1200 | case MSR_KVM_WALL_CLOCK: |
| 1137 | vcpu->kvm->arch.wall_clock = data; | 1201 | vcpu->kvm->arch.wall_clock = data; |
| 1138 | kvm_write_wall_clock(vcpu->kvm, data); | 1202 | kvm_write_wall_clock(vcpu->kvm, data); |
| 1139 | break; | 1203 | break; |
| 1204 | case MSR_KVM_SYSTEM_TIME_NEW: | ||
| 1140 | case MSR_KVM_SYSTEM_TIME: { | 1205 | case MSR_KVM_SYSTEM_TIME: { |
| 1141 | if (vcpu->arch.time_page) { | 1206 | if (vcpu->arch.time_page) { |
| 1142 | kvm_release_page_dirty(vcpu->arch.time_page); | 1207 | kvm_release_page_dirty(vcpu->arch.time_page); |
| @@ -1408,9 +1473,11 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
| 1408 | data = vcpu->arch.efer; | 1473 | data = vcpu->arch.efer; |
| 1409 | break; | 1474 | break; |
| 1410 | case MSR_KVM_WALL_CLOCK: | 1475 | case MSR_KVM_WALL_CLOCK: |
| 1476 | case MSR_KVM_WALL_CLOCK_NEW: | ||
| 1411 | data = vcpu->kvm->arch.wall_clock; | 1477 | data = vcpu->kvm->arch.wall_clock; |
| 1412 | break; | 1478 | break; |
| 1413 | case MSR_KVM_SYSTEM_TIME: | 1479 | case MSR_KVM_SYSTEM_TIME: |
| 1480 | case MSR_KVM_SYSTEM_TIME_NEW: | ||
| 1414 | data = vcpu->arch.time; | 1481 | data = vcpu->arch.time; |
| 1415 | break; | 1482 | break; |
| 1416 | case MSR_IA32_P5_MC_ADDR: | 1483 | case MSR_IA32_P5_MC_ADDR: |
| @@ -1549,6 +1616,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
| 1549 | case KVM_CAP_HYPERV_VAPIC: | 1616 | case KVM_CAP_HYPERV_VAPIC: |
| 1550 | case KVM_CAP_HYPERV_SPIN: | 1617 | case KVM_CAP_HYPERV_SPIN: |
| 1551 | case KVM_CAP_PCI_SEGMENT: | 1618 | case KVM_CAP_PCI_SEGMENT: |
| 1619 | case KVM_CAP_DEBUGREGS: | ||
| 1552 | case KVM_CAP_X86_ROBUST_SINGLESTEP: | 1620 | case KVM_CAP_X86_ROBUST_SINGLESTEP: |
| 1553 | r = 1; | 1621 | r = 1; |
| 1554 | break; | 1622 | break; |
| @@ -1769,6 +1837,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | |||
| 1769 | { | 1837 | { |
| 1770 | int r; | 1838 | int r; |
| 1771 | 1839 | ||
| 1840 | vcpu_load(vcpu); | ||
| 1772 | r = -E2BIG; | 1841 | r = -E2BIG; |
| 1773 | if (cpuid->nent < vcpu->arch.cpuid_nent) | 1842 | if (cpuid->nent < vcpu->arch.cpuid_nent) |
| 1774 | goto out; | 1843 | goto out; |
| @@ -1780,6 +1849,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | |||
| 1780 | 1849 | ||
| 1781 | out: | 1850 | out: |
| 1782 | cpuid->nent = vcpu->arch.cpuid_nent; | 1851 | cpuid->nent = vcpu->arch.cpuid_nent; |
| 1852 | vcpu_put(vcpu); | ||
| 1783 | return r; | 1853 | return r; |
| 1784 | } | 1854 | } |
| 1785 | 1855 | ||
| @@ -1910,6 +1980,24 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
| 1910 | } | 1980 | } |
| 1911 | break; | 1981 | break; |
| 1912 | } | 1982 | } |
| 1983 | case KVM_CPUID_SIGNATURE: { | ||
| 1984 | char signature[12] = "KVMKVMKVM\0\0"; | ||
| 1985 | u32 *sigptr = (u32 *)signature; | ||
| 1986 | entry->eax = 0; | ||
| 1987 | entry->ebx = sigptr[0]; | ||
| 1988 | entry->ecx = sigptr[1]; | ||
| 1989 | entry->edx = sigptr[2]; | ||
| 1990 | break; | ||
| 1991 | } | ||
| 1992 | case KVM_CPUID_FEATURES: | ||
| 1993 | entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | | ||
| 1994 | (1 << KVM_FEATURE_NOP_IO_DELAY) | | ||
| 1995 | (1 << KVM_FEATURE_CLOCKSOURCE2) | | ||
| 1996 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); | ||
| 1997 | entry->ebx = 0; | ||
| 1998 | entry->ecx = 0; | ||
| 1999 | entry->edx = 0; | ||
| 2000 | break; | ||
| 1913 | case 0x80000000: | 2001 | case 0x80000000: |
| 1914 | entry->eax = min(entry->eax, 0x8000001a); | 2002 | entry->eax = min(entry->eax, 0x8000001a); |
| 1915 | break; | 2003 | break; |
| @@ -1918,6 +2006,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
| 1918 | entry->ecx &= kvm_supported_word6_x86_features; | 2006 | entry->ecx &= kvm_supported_word6_x86_features; |
| 1919 | break; | 2007 | break; |
| 1920 | } | 2008 | } |
| 2009 | |||
| 2010 | kvm_x86_ops->set_supported_cpuid(function, entry); | ||
| 2011 | |||
| 1921 | put_cpu(); | 2012 | put_cpu(); |
| 1922 | } | 2013 | } |
| 1923 | 2014 | ||
| @@ -1953,6 +2044,23 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | |||
| 1953 | for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) | 2044 | for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) |
| 1954 | do_cpuid_ent(&cpuid_entries[nent], func, 0, | 2045 | do_cpuid_ent(&cpuid_entries[nent], func, 0, |
| 1955 | &nent, cpuid->nent); | 2046 | &nent, cpuid->nent); |
| 2047 | |||
| 2048 | |||
| 2049 | |||
| 2050 | r = -E2BIG; | ||
| 2051 | if (nent >= cpuid->nent) | ||
| 2052 | goto out_free; | ||
| 2053 | |||
| 2054 | do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent, | ||
| 2055 | cpuid->nent); | ||
| 2056 | |||
| 2057 | r = -E2BIG; | ||
| 2058 | if (nent >= cpuid->nent) | ||
| 2059 | goto out_free; | ||
| 2060 | |||
| 2061 | do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_FEATURES, 0, &nent, | ||
| 2062 | cpuid->nent); | ||
| 2063 | |||
| 1956 | r = -E2BIG; | 2064 | r = -E2BIG; |
| 1957 | if (nent >= cpuid->nent) | 2065 | if (nent >= cpuid->nent) |
| 1958 | goto out_free; | 2066 | goto out_free; |
| @@ -2032,6 +2140,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, | |||
| 2032 | int r; | 2140 | int r; |
| 2033 | unsigned bank_num = mcg_cap & 0xff, bank; | 2141 | unsigned bank_num = mcg_cap & 0xff, bank; |
| 2034 | 2142 | ||
| 2143 | vcpu_load(vcpu); | ||
| 2035 | r = -EINVAL; | 2144 | r = -EINVAL; |
| 2036 | if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) | 2145 | if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) |
| 2037 | goto out; | 2146 | goto out; |
| @@ -2046,6 +2155,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, | |||
| 2046 | for (bank = 0; bank < bank_num; bank++) | 2155 | for (bank = 0; bank < bank_num; bank++) |
| 2047 | vcpu->arch.mce_banks[bank*4] = ~(u64)0; | 2156 | vcpu->arch.mce_banks[bank*4] = ~(u64)0; |
| 2048 | out: | 2157 | out: |
| 2158 | vcpu_put(vcpu); | ||
| 2049 | return r; | 2159 | return r; |
| 2050 | } | 2160 | } |
| 2051 | 2161 | ||
| @@ -2105,14 +2215,20 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
| 2105 | { | 2215 | { |
| 2106 | vcpu_load(vcpu); | 2216 | vcpu_load(vcpu); |
| 2107 | 2217 | ||
| 2108 | events->exception.injected = vcpu->arch.exception.pending; | 2218 | events->exception.injected = |
| 2219 | vcpu->arch.exception.pending && | ||
| 2220 | !kvm_exception_is_soft(vcpu->arch.exception.nr); | ||
| 2109 | events->exception.nr = vcpu->arch.exception.nr; | 2221 | events->exception.nr = vcpu->arch.exception.nr; |
| 2110 | events->exception.has_error_code = vcpu->arch.exception.has_error_code; | 2222 | events->exception.has_error_code = vcpu->arch.exception.has_error_code; |
| 2111 | events->exception.error_code = vcpu->arch.exception.error_code; | 2223 | events->exception.error_code = vcpu->arch.exception.error_code; |
| 2112 | 2224 | ||
| 2113 | events->interrupt.injected = vcpu->arch.interrupt.pending; | 2225 | events->interrupt.injected = |
| 2226 | vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft; | ||
| 2114 | events->interrupt.nr = vcpu->arch.interrupt.nr; | 2227 | events->interrupt.nr = vcpu->arch.interrupt.nr; |
| 2115 | events->interrupt.soft = vcpu->arch.interrupt.soft; | 2228 | events->interrupt.soft = 0; |
| 2229 | events->interrupt.shadow = | ||
| 2230 | kvm_x86_ops->get_interrupt_shadow(vcpu, | ||
| 2231 | KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI); | ||
| 2116 | 2232 | ||
| 2117 | events->nmi.injected = vcpu->arch.nmi_injected; | 2233 | events->nmi.injected = vcpu->arch.nmi_injected; |
| 2118 | events->nmi.pending = vcpu->arch.nmi_pending; | 2234 | events->nmi.pending = vcpu->arch.nmi_pending; |
| @@ -2121,7 +2237,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
| 2121 | events->sipi_vector = vcpu->arch.sipi_vector; | 2237 | events->sipi_vector = vcpu->arch.sipi_vector; |
| 2122 | 2238 | ||
| 2123 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING | 2239 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING |
| 2124 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR); | 2240 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR |
| 2241 | | KVM_VCPUEVENT_VALID_SHADOW); | ||
| 2125 | 2242 | ||
| 2126 | vcpu_put(vcpu); | 2243 | vcpu_put(vcpu); |
| 2127 | } | 2244 | } |
| @@ -2130,7 +2247,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
| 2130 | struct kvm_vcpu_events *events) | 2247 | struct kvm_vcpu_events *events) |
| 2131 | { | 2248 | { |
| 2132 | if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING | 2249 | if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING |
| 2133 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR)) | 2250 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR |
| 2251 | | KVM_VCPUEVENT_VALID_SHADOW)) | ||
| 2134 | return -EINVAL; | 2252 | return -EINVAL; |
| 2135 | 2253 | ||
| 2136 | vcpu_load(vcpu); | 2254 | vcpu_load(vcpu); |
| @@ -2145,6 +2263,9 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
| 2145 | vcpu->arch.interrupt.soft = events->interrupt.soft; | 2263 | vcpu->arch.interrupt.soft = events->interrupt.soft; |
| 2146 | if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) | 2264 | if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) |
| 2147 | kvm_pic_clear_isr_ack(vcpu->kvm); | 2265 | kvm_pic_clear_isr_ack(vcpu->kvm); |
| 2266 | if (events->flags & KVM_VCPUEVENT_VALID_SHADOW) | ||
| 2267 | kvm_x86_ops->set_interrupt_shadow(vcpu, | ||
| 2268 | events->interrupt.shadow); | ||
| 2148 | 2269 | ||
| 2149 | vcpu->arch.nmi_injected = events->nmi.injected; | 2270 | vcpu->arch.nmi_injected = events->nmi.injected; |
| 2150 | if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) | 2271 | if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) |
| @@ -2159,6 +2280,36 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
| 2159 | return 0; | 2280 | return 0; |
| 2160 | } | 2281 | } |
| 2161 | 2282 | ||
| 2283 | static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, | ||
| 2284 | struct kvm_debugregs *dbgregs) | ||
| 2285 | { | ||
| 2286 | vcpu_load(vcpu); | ||
| 2287 | |||
| 2288 | memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); | ||
| 2289 | dbgregs->dr6 = vcpu->arch.dr6; | ||
| 2290 | dbgregs->dr7 = vcpu->arch.dr7; | ||
| 2291 | dbgregs->flags = 0; | ||
| 2292 | |||
| 2293 | vcpu_put(vcpu); | ||
| 2294 | } | ||
| 2295 | |||
| 2296 | static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, | ||
| 2297 | struct kvm_debugregs *dbgregs) | ||
| 2298 | { | ||
| 2299 | if (dbgregs->flags) | ||
| 2300 | return -EINVAL; | ||
| 2301 | |||
| 2302 | vcpu_load(vcpu); | ||
| 2303 | |||
| 2304 | memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); | ||
| 2305 | vcpu->arch.dr6 = dbgregs->dr6; | ||
| 2306 | vcpu->arch.dr7 = dbgregs->dr7; | ||
| 2307 | |||
| 2308 | vcpu_put(vcpu); | ||
| 2309 | |||
| 2310 | return 0; | ||
| 2311 | } | ||
| 2312 | |||
| 2162 | long kvm_arch_vcpu_ioctl(struct file *filp, | 2313 | long kvm_arch_vcpu_ioctl(struct file *filp, |
| 2163 | unsigned int ioctl, unsigned long arg) | 2314 | unsigned int ioctl, unsigned long arg) |
| 2164 | { | 2315 | { |
| @@ -2313,7 +2464,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
| 2313 | r = -EFAULT; | 2464 | r = -EFAULT; |
| 2314 | if (copy_from_user(&mce, argp, sizeof mce)) | 2465 | if (copy_from_user(&mce, argp, sizeof mce)) |
| 2315 | goto out; | 2466 | goto out; |
| 2467 | vcpu_load(vcpu); | ||
| 2316 | r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); | 2468 | r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); |
| 2469 | vcpu_put(vcpu); | ||
| 2317 | break; | 2470 | break; |
| 2318 | } | 2471 | } |
| 2319 | case KVM_GET_VCPU_EVENTS: { | 2472 | case KVM_GET_VCPU_EVENTS: { |
| @@ -2337,6 +2490,29 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
| 2337 | r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); | 2490 | r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); |
| 2338 | break; | 2491 | break; |
| 2339 | } | 2492 | } |
| 2493 | case KVM_GET_DEBUGREGS: { | ||
| 2494 | struct kvm_debugregs dbgregs; | ||
| 2495 | |||
| 2496 | kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs); | ||
| 2497 | |||
| 2498 | r = -EFAULT; | ||
| 2499 | if (copy_to_user(argp, &dbgregs, | ||
| 2500 | sizeof(struct kvm_debugregs))) | ||
| 2501 | break; | ||
| 2502 | r = 0; | ||
| 2503 | break; | ||
| 2504 | } | ||
| 2505 | case KVM_SET_DEBUGREGS: { | ||
| 2506 | struct kvm_debugregs dbgregs; | ||
| 2507 | |||
| 2508 | r = -EFAULT; | ||
| 2509 | if (copy_from_user(&dbgregs, argp, | ||
| 2510 | sizeof(struct kvm_debugregs))) | ||
| 2511 | break; | ||
| 2512 | |||
| 2513 | r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs); | ||
| 2514 | break; | ||
| 2515 | } | ||
| 2340 | default: | 2516 | default: |
| 2341 | r = -EINVAL; | 2517 | r = -EINVAL; |
| 2342 | } | 2518 | } |
| @@ -2390,7 +2566,7 @@ gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn) | |||
| 2390 | struct kvm_mem_alias *alias; | 2566 | struct kvm_mem_alias *alias; |
| 2391 | struct kvm_mem_aliases *aliases; | 2567 | struct kvm_mem_aliases *aliases; |
| 2392 | 2568 | ||
| 2393 | aliases = rcu_dereference(kvm->arch.aliases); | 2569 | aliases = kvm_aliases(kvm); |
| 2394 | 2570 | ||
| 2395 | for (i = 0; i < aliases->naliases; ++i) { | 2571 | for (i = 0; i < aliases->naliases; ++i) { |
| 2396 | alias = &aliases->aliases[i]; | 2572 | alias = &aliases->aliases[i]; |
| @@ -2409,7 +2585,7 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | |||
| 2409 | struct kvm_mem_alias *alias; | 2585 | struct kvm_mem_alias *alias; |
| 2410 | struct kvm_mem_aliases *aliases; | 2586 | struct kvm_mem_aliases *aliases; |
| 2411 | 2587 | ||
| 2412 | aliases = rcu_dereference(kvm->arch.aliases); | 2588 | aliases = kvm_aliases(kvm); |
| 2413 | 2589 | ||
| 2414 | for (i = 0; i < aliases->naliases; ++i) { | 2590 | for (i = 0; i < aliases->naliases; ++i) { |
| 2415 | alias = &aliases->aliases[i]; | 2591 | alias = &aliases->aliases[i]; |
| @@ -2804,11 +2980,13 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 2804 | r = -EFAULT; | 2980 | r = -EFAULT; |
| 2805 | if (copy_from_user(&irq_event, argp, sizeof irq_event)) | 2981 | if (copy_from_user(&irq_event, argp, sizeof irq_event)) |
| 2806 | goto out; | 2982 | goto out; |
| 2983 | r = -ENXIO; | ||
| 2807 | if (irqchip_in_kernel(kvm)) { | 2984 | if (irqchip_in_kernel(kvm)) { |
| 2808 | __s32 status; | 2985 | __s32 status; |
| 2809 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, | 2986 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, |
| 2810 | irq_event.irq, irq_event.level); | 2987 | irq_event.irq, irq_event.level); |
| 2811 | if (ioctl == KVM_IRQ_LINE_STATUS) { | 2988 | if (ioctl == KVM_IRQ_LINE_STATUS) { |
| 2989 | r = -EFAULT; | ||
| 2812 | irq_event.status = status; | 2990 | irq_event.status = status; |
| 2813 | if (copy_to_user(argp, &irq_event, | 2991 | if (copy_to_user(argp, &irq_event, |
| 2814 | sizeof irq_event)) | 2992 | sizeof irq_event)) |
| @@ -3024,6 +3202,18 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) | |||
| 3024 | return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); | 3202 | return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); |
| 3025 | } | 3203 | } |
| 3026 | 3204 | ||
| 3205 | static void kvm_set_segment(struct kvm_vcpu *vcpu, | ||
| 3206 | struct kvm_segment *var, int seg) | ||
| 3207 | { | ||
| 3208 | kvm_x86_ops->set_segment(vcpu, var, seg); | ||
| 3209 | } | ||
| 3210 | |||
| 3211 | void kvm_get_segment(struct kvm_vcpu *vcpu, | ||
| 3212 | struct kvm_segment *var, int seg) | ||
| 3213 | { | ||
| 3214 | kvm_x86_ops->get_segment(vcpu, var, seg); | ||
| 3215 | } | ||
| 3216 | |||
| 3027 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | 3217 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) |
| 3028 | { | 3218 | { |
| 3029 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3219 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
| @@ -3104,14 +3294,17 @@ static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, | |||
| 3104 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); | 3294 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); |
| 3105 | } | 3295 | } |
| 3106 | 3296 | ||
| 3107 | static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, | 3297 | static int kvm_write_guest_virt_system(gva_t addr, void *val, |
| 3108 | struct kvm_vcpu *vcpu, u32 *error) | 3298 | unsigned int bytes, |
| 3299 | struct kvm_vcpu *vcpu, | ||
| 3300 | u32 *error) | ||
| 3109 | { | 3301 | { |
| 3110 | void *data = val; | 3302 | void *data = val; |
| 3111 | int r = X86EMUL_CONTINUE; | 3303 | int r = X86EMUL_CONTINUE; |
| 3112 | 3304 | ||
| 3113 | while (bytes) { | 3305 | while (bytes) { |
| 3114 | gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error); | 3306 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, |
| 3307 | PFERR_WRITE_MASK, error); | ||
| 3115 | unsigned offset = addr & (PAGE_SIZE-1); | 3308 | unsigned offset = addr & (PAGE_SIZE-1); |
| 3116 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); | 3309 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); |
| 3117 | int ret; | 3310 | int ret; |
| @@ -3134,7 +3327,6 @@ out: | |||
| 3134 | return r; | 3327 | return r; |
| 3135 | } | 3328 | } |
| 3136 | 3329 | ||
| 3137 | |||
| 3138 | static int emulator_read_emulated(unsigned long addr, | 3330 | static int emulator_read_emulated(unsigned long addr, |
| 3139 | void *val, | 3331 | void *val, |
| 3140 | unsigned int bytes, | 3332 | unsigned int bytes, |
| @@ -3237,9 +3429,9 @@ mmio: | |||
| 3237 | } | 3429 | } |
| 3238 | 3430 | ||
| 3239 | int emulator_write_emulated(unsigned long addr, | 3431 | int emulator_write_emulated(unsigned long addr, |
| 3240 | const void *val, | 3432 | const void *val, |
| 3241 | unsigned int bytes, | 3433 | unsigned int bytes, |
| 3242 | struct kvm_vcpu *vcpu) | 3434 | struct kvm_vcpu *vcpu) |
| 3243 | { | 3435 | { |
| 3244 | /* Crossing a page boundary? */ | 3436 | /* Crossing a page boundary? */ |
| 3245 | if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { | 3437 | if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { |
| @@ -3257,45 +3449,150 @@ int emulator_write_emulated(unsigned long addr, | |||
| 3257 | } | 3449 | } |
| 3258 | EXPORT_SYMBOL_GPL(emulator_write_emulated); | 3450 | EXPORT_SYMBOL_GPL(emulator_write_emulated); |
| 3259 | 3451 | ||
| 3452 | #define CMPXCHG_TYPE(t, ptr, old, new) \ | ||
| 3453 | (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old)) | ||
| 3454 | |||
| 3455 | #ifdef CONFIG_X86_64 | ||
| 3456 | # define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new) | ||
| 3457 | #else | ||
| 3458 | # define CMPXCHG64(ptr, old, new) \ | ||
| 3459 | (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) | ||
| 3460 | #endif | ||
| 3461 | |||
| 3260 | static int emulator_cmpxchg_emulated(unsigned long addr, | 3462 | static int emulator_cmpxchg_emulated(unsigned long addr, |
| 3261 | const void *old, | 3463 | const void *old, |
| 3262 | const void *new, | 3464 | const void *new, |
| 3263 | unsigned int bytes, | 3465 | unsigned int bytes, |
| 3264 | struct kvm_vcpu *vcpu) | 3466 | struct kvm_vcpu *vcpu) |
| 3265 | { | 3467 | { |
| 3266 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); | 3468 | gpa_t gpa; |
| 3267 | #ifndef CONFIG_X86_64 | 3469 | struct page *page; |
| 3268 | /* guests cmpxchg8b have to be emulated atomically */ | 3470 | char *kaddr; |
| 3269 | if (bytes == 8) { | 3471 | bool exchanged; |
| 3270 | gpa_t gpa; | ||
| 3271 | struct page *page; | ||
| 3272 | char *kaddr; | ||
| 3273 | u64 val; | ||
| 3274 | 3472 | ||
| 3275 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); | 3473 | /* guests cmpxchg8b have to be emulated atomically */ |
| 3474 | if (bytes > 8 || (bytes & (bytes - 1))) | ||
| 3475 | goto emul_write; | ||
| 3276 | 3476 | ||
| 3277 | if (gpa == UNMAPPED_GVA || | 3477 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); |
| 3278 | (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | ||
| 3279 | goto emul_write; | ||
| 3280 | 3478 | ||
| 3281 | if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) | 3479 | if (gpa == UNMAPPED_GVA || |
| 3282 | goto emul_write; | 3480 | (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
| 3481 | goto emul_write; | ||
| 3283 | 3482 | ||
| 3284 | val = *(u64 *)new; | 3483 | if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) |
| 3484 | goto emul_write; | ||
| 3285 | 3485 | ||
| 3286 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 3486 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
| 3287 | 3487 | ||
| 3288 | kaddr = kmap_atomic(page, KM_USER0); | 3488 | kaddr = kmap_atomic(page, KM_USER0); |
| 3289 | set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); | 3489 | kaddr += offset_in_page(gpa); |
| 3290 | kunmap_atomic(kaddr, KM_USER0); | 3490 | switch (bytes) { |
| 3291 | kvm_release_page_dirty(page); | 3491 | case 1: |
| 3492 | exchanged = CMPXCHG_TYPE(u8, kaddr, old, new); | ||
| 3493 | break; | ||
| 3494 | case 2: | ||
| 3495 | exchanged = CMPXCHG_TYPE(u16, kaddr, old, new); | ||
| 3496 | break; | ||
| 3497 | case 4: | ||
| 3498 | exchanged = CMPXCHG_TYPE(u32, kaddr, old, new); | ||
| 3499 | break; | ||
| 3500 | case 8: | ||
| 3501 | exchanged = CMPXCHG64(kaddr, old, new); | ||
| 3502 | break; | ||
| 3503 | default: | ||
| 3504 | BUG(); | ||
| 3292 | } | 3505 | } |
| 3506 | kunmap_atomic(kaddr, KM_USER0); | ||
| 3507 | kvm_release_page_dirty(page); | ||
| 3508 | |||
| 3509 | if (!exchanged) | ||
| 3510 | return X86EMUL_CMPXCHG_FAILED; | ||
| 3511 | |||
| 3512 | kvm_mmu_pte_write(vcpu, gpa, new, bytes, 1); | ||
| 3513 | |||
| 3514 | return X86EMUL_CONTINUE; | ||
| 3515 | |||
| 3293 | emul_write: | 3516 | emul_write: |
| 3294 | #endif | 3517 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); |
| 3295 | 3518 | ||
| 3296 | return emulator_write_emulated(addr, new, bytes, vcpu); | 3519 | return emulator_write_emulated(addr, new, bytes, vcpu); |
| 3297 | } | 3520 | } |
| 3298 | 3521 | ||
| 3522 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | ||
| 3523 | { | ||
| 3524 | /* TODO: String I/O for in kernel device */ | ||
| 3525 | int r; | ||
| 3526 | |||
| 3527 | if (vcpu->arch.pio.in) | ||
| 3528 | r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, | ||
| 3529 | vcpu->arch.pio.size, pd); | ||
| 3530 | else | ||
| 3531 | r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, | ||
| 3532 | vcpu->arch.pio.port, vcpu->arch.pio.size, | ||
| 3533 | pd); | ||
| 3534 | return r; | ||
| 3535 | } | ||
| 3536 | |||
| 3537 | |||
| 3538 | static int emulator_pio_in_emulated(int size, unsigned short port, void *val, | ||
| 3539 | unsigned int count, struct kvm_vcpu *vcpu) | ||
| 3540 | { | ||
| 3541 | if (vcpu->arch.pio.count) | ||
| 3542 | goto data_avail; | ||
| 3543 | |||
| 3544 | trace_kvm_pio(1, port, size, 1); | ||
| 3545 | |||
| 3546 | vcpu->arch.pio.port = port; | ||
| 3547 | vcpu->arch.pio.in = 1; | ||
| 3548 | vcpu->arch.pio.count = count; | ||
| 3549 | vcpu->arch.pio.size = size; | ||
| 3550 | |||
| 3551 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | ||
| 3552 | data_avail: | ||
| 3553 | memcpy(val, vcpu->arch.pio_data, size * count); | ||
| 3554 | vcpu->arch.pio.count = 0; | ||
| 3555 | return 1; | ||
| 3556 | } | ||
| 3557 | |||
| 3558 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
| 3559 | vcpu->run->io.direction = KVM_EXIT_IO_IN; | ||
| 3560 | vcpu->run->io.size = size; | ||
| 3561 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
| 3562 | vcpu->run->io.count = count; | ||
| 3563 | vcpu->run->io.port = port; | ||
| 3564 | |||
| 3565 | return 0; | ||
| 3566 | } | ||
| 3567 | |||
| 3568 | static int emulator_pio_out_emulated(int size, unsigned short port, | ||
| 3569 | const void *val, unsigned int count, | ||
| 3570 | struct kvm_vcpu *vcpu) | ||
| 3571 | { | ||
| 3572 | trace_kvm_pio(0, port, size, 1); | ||
| 3573 | |||
| 3574 | vcpu->arch.pio.port = port; | ||
| 3575 | vcpu->arch.pio.in = 0; | ||
| 3576 | vcpu->arch.pio.count = count; | ||
| 3577 | vcpu->arch.pio.size = size; | ||
| 3578 | |||
| 3579 | memcpy(vcpu->arch.pio_data, val, size * count); | ||
| 3580 | |||
| 3581 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | ||
| 3582 | vcpu->arch.pio.count = 0; | ||
| 3583 | return 1; | ||
| 3584 | } | ||
| 3585 | |||
| 3586 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
| 3587 | vcpu->run->io.direction = KVM_EXIT_IO_OUT; | ||
| 3588 | vcpu->run->io.size = size; | ||
| 3589 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
| 3590 | vcpu->run->io.count = count; | ||
| 3591 | vcpu->run->io.port = port; | ||
| 3592 | |||
| 3593 | return 0; | ||
| 3594 | } | ||
| 3595 | |||
| 3299 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) | 3596 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) |
| 3300 | { | 3597 | { |
| 3301 | return kvm_x86_ops->get_segment_base(vcpu, seg); | 3598 | return kvm_x86_ops->get_segment_base(vcpu, seg); |
| @@ -3316,14 +3613,14 @@ int emulate_clts(struct kvm_vcpu *vcpu) | |||
| 3316 | 3613 | ||
| 3317 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) | 3614 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) |
| 3318 | { | 3615 | { |
| 3319 | return kvm_x86_ops->get_dr(ctxt->vcpu, dr, dest); | 3616 | return kvm_get_dr(ctxt->vcpu, dr, dest); |
| 3320 | } | 3617 | } |
| 3321 | 3618 | ||
| 3322 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) | 3619 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) |
| 3323 | { | 3620 | { |
| 3324 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; | 3621 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; |
| 3325 | 3622 | ||
| 3326 | return kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask); | 3623 | return kvm_set_dr(ctxt->vcpu, dr, value & mask); |
| 3327 | } | 3624 | } |
| 3328 | 3625 | ||
| 3329 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | 3626 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) |
| @@ -3344,12 +3641,167 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | |||
| 3344 | } | 3641 | } |
| 3345 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); | 3642 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); |
| 3346 | 3643 | ||
| 3644 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) | ||
| 3645 | { | ||
| 3646 | return (curr_cr & ~((1ULL << 32) - 1)) | new_val; | ||
| 3647 | } | ||
| 3648 | |||
| 3649 | static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) | ||
| 3650 | { | ||
| 3651 | unsigned long value; | ||
| 3652 | |||
| 3653 | switch (cr) { | ||
| 3654 | case 0: | ||
| 3655 | value = kvm_read_cr0(vcpu); | ||
| 3656 | break; | ||
| 3657 | case 2: | ||
| 3658 | value = vcpu->arch.cr2; | ||
| 3659 | break; | ||
| 3660 | case 3: | ||
| 3661 | value = vcpu->arch.cr3; | ||
| 3662 | break; | ||
| 3663 | case 4: | ||
| 3664 | value = kvm_read_cr4(vcpu); | ||
| 3665 | break; | ||
| 3666 | case 8: | ||
| 3667 | value = kvm_get_cr8(vcpu); | ||
| 3668 | break; | ||
| 3669 | default: | ||
| 3670 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | ||
| 3671 | return 0; | ||
| 3672 | } | ||
| 3673 | |||
| 3674 | return value; | ||
| 3675 | } | ||
| 3676 | |||
| 3677 | static void emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) | ||
| 3678 | { | ||
| 3679 | switch (cr) { | ||
| 3680 | case 0: | ||
| 3681 | kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); | ||
| 3682 | break; | ||
| 3683 | case 2: | ||
| 3684 | vcpu->arch.cr2 = val; | ||
| 3685 | break; | ||
| 3686 | case 3: | ||
| 3687 | kvm_set_cr3(vcpu, val); | ||
| 3688 | break; | ||
| 3689 | case 4: | ||
| 3690 | kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); | ||
| 3691 | break; | ||
| 3692 | case 8: | ||
| 3693 | kvm_set_cr8(vcpu, val & 0xfUL); | ||
| 3694 | break; | ||
| 3695 | default: | ||
| 3696 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | ||
| 3697 | } | ||
| 3698 | } | ||
| 3699 | |||
| 3700 | static int emulator_get_cpl(struct kvm_vcpu *vcpu) | ||
| 3701 | { | ||
| 3702 | return kvm_x86_ops->get_cpl(vcpu); | ||
| 3703 | } | ||
| 3704 | |||
| 3705 | static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) | ||
| 3706 | { | ||
| 3707 | kvm_x86_ops->get_gdt(vcpu, dt); | ||
| 3708 | } | ||
| 3709 | |||
| 3710 | static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, | ||
| 3711 | struct kvm_vcpu *vcpu) | ||
| 3712 | { | ||
| 3713 | struct kvm_segment var; | ||
| 3714 | |||
| 3715 | kvm_get_segment(vcpu, &var, seg); | ||
| 3716 | |||
| 3717 | if (var.unusable) | ||
| 3718 | return false; | ||
| 3719 | |||
| 3720 | if (var.g) | ||
| 3721 | var.limit >>= 12; | ||
| 3722 | set_desc_limit(desc, var.limit); | ||
| 3723 | set_desc_base(desc, (unsigned long)var.base); | ||
| 3724 | desc->type = var.type; | ||
| 3725 | desc->s = var.s; | ||
| 3726 | desc->dpl = var.dpl; | ||
| 3727 | desc->p = var.present; | ||
| 3728 | desc->avl = var.avl; | ||
| 3729 | desc->l = var.l; | ||
| 3730 | desc->d = var.db; | ||
| 3731 | desc->g = var.g; | ||
| 3732 | |||
| 3733 | return true; | ||
| 3734 | } | ||
| 3735 | |||
| 3736 | static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg, | ||
| 3737 | struct kvm_vcpu *vcpu) | ||
| 3738 | { | ||
| 3739 | struct kvm_segment var; | ||
| 3740 | |||
| 3741 | /* needed to preserve selector */ | ||
| 3742 | kvm_get_segment(vcpu, &var, seg); | ||
| 3743 | |||
| 3744 | var.base = get_desc_base(desc); | ||
| 3745 | var.limit = get_desc_limit(desc); | ||
| 3746 | if (desc->g) | ||
| 3747 | var.limit = (var.limit << 12) | 0xfff; | ||
| 3748 | var.type = desc->type; | ||
| 3749 | var.present = desc->p; | ||
| 3750 | var.dpl = desc->dpl; | ||
| 3751 | var.db = desc->d; | ||
| 3752 | var.s = desc->s; | ||
| 3753 | var.l = desc->l; | ||
| 3754 | var.g = desc->g; | ||
| 3755 | var.avl = desc->avl; | ||
| 3756 | var.present = desc->p; | ||
| 3757 | var.unusable = !var.present; | ||
| 3758 | var.padding = 0; | ||
| 3759 | |||
| 3760 | kvm_set_segment(vcpu, &var, seg); | ||
| 3761 | return; | ||
| 3762 | } | ||
| 3763 | |||
| 3764 | static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu) | ||
| 3765 | { | ||
| 3766 | struct kvm_segment kvm_seg; | ||
| 3767 | |||
| 3768 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
| 3769 | return kvm_seg.selector; | ||
| 3770 | } | ||
| 3771 | |||
| 3772 | static void emulator_set_segment_selector(u16 sel, int seg, | ||
| 3773 | struct kvm_vcpu *vcpu) | ||
| 3774 | { | ||
| 3775 | struct kvm_segment kvm_seg; | ||
| 3776 | |||
| 3777 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
| 3778 | kvm_seg.selector = sel; | ||
| 3779 | kvm_set_segment(vcpu, &kvm_seg, seg); | ||
| 3780 | } | ||
| 3781 | |||
| 3782 | static void emulator_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | ||
| 3783 | { | ||
| 3784 | kvm_x86_ops->set_rflags(vcpu, rflags); | ||
| 3785 | } | ||
| 3786 | |||
| 3347 | static struct x86_emulate_ops emulate_ops = { | 3787 | static struct x86_emulate_ops emulate_ops = { |
| 3348 | .read_std = kvm_read_guest_virt_system, | 3788 | .read_std = kvm_read_guest_virt_system, |
| 3789 | .write_std = kvm_write_guest_virt_system, | ||
| 3349 | .fetch = kvm_fetch_guest_virt, | 3790 | .fetch = kvm_fetch_guest_virt, |
| 3350 | .read_emulated = emulator_read_emulated, | 3791 | .read_emulated = emulator_read_emulated, |
| 3351 | .write_emulated = emulator_write_emulated, | 3792 | .write_emulated = emulator_write_emulated, |
| 3352 | .cmpxchg_emulated = emulator_cmpxchg_emulated, | 3793 | .cmpxchg_emulated = emulator_cmpxchg_emulated, |
| 3794 | .pio_in_emulated = emulator_pio_in_emulated, | ||
| 3795 | .pio_out_emulated = emulator_pio_out_emulated, | ||
| 3796 | .get_cached_descriptor = emulator_get_cached_descriptor, | ||
| 3797 | .set_cached_descriptor = emulator_set_cached_descriptor, | ||
| 3798 | .get_segment_selector = emulator_get_segment_selector, | ||
| 3799 | .set_segment_selector = emulator_set_segment_selector, | ||
| 3800 | .get_gdt = emulator_get_gdt, | ||
| 3801 | .get_cr = emulator_get_cr, | ||
| 3802 | .set_cr = emulator_set_cr, | ||
| 3803 | .cpl = emulator_get_cpl, | ||
| 3804 | .set_rflags = emulator_set_rflags, | ||
| 3353 | }; | 3805 | }; |
| 3354 | 3806 | ||
| 3355 | static void cache_all_regs(struct kvm_vcpu *vcpu) | 3807 | static void cache_all_regs(struct kvm_vcpu *vcpu) |
| @@ -3380,14 +3832,14 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 3380 | cache_all_regs(vcpu); | 3832 | cache_all_regs(vcpu); |
| 3381 | 3833 | ||
| 3382 | vcpu->mmio_is_write = 0; | 3834 | vcpu->mmio_is_write = 0; |
| 3383 | vcpu->arch.pio.string = 0; | ||
| 3384 | 3835 | ||
| 3385 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { | 3836 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { |
| 3386 | int cs_db, cs_l; | 3837 | int cs_db, cs_l; |
| 3387 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 3838 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
| 3388 | 3839 | ||
| 3389 | vcpu->arch.emulate_ctxt.vcpu = vcpu; | 3840 | vcpu->arch.emulate_ctxt.vcpu = vcpu; |
| 3390 | vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); | 3841 | vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); |
| 3842 | vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); | ||
| 3391 | vcpu->arch.emulate_ctxt.mode = | 3843 | vcpu->arch.emulate_ctxt.mode = |
| 3392 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : | 3844 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : |
| 3393 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) | 3845 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) |
| @@ -3396,6 +3848,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 3396 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | 3848 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; |
| 3397 | 3849 | ||
| 3398 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); | 3850 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); |
| 3851 | trace_kvm_emulate_insn_start(vcpu); | ||
| 3399 | 3852 | ||
| 3400 | /* Only allow emulation of specific instructions on #UD | 3853 | /* Only allow emulation of specific instructions on #UD |
| 3401 | * (namely VMMCALL, sysenter, sysexit, syscall)*/ | 3854 | * (namely VMMCALL, sysenter, sysexit, syscall)*/ |
| @@ -3428,6 +3881,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 3428 | ++vcpu->stat.insn_emulation; | 3881 | ++vcpu->stat.insn_emulation; |
| 3429 | if (r) { | 3882 | if (r) { |
| 3430 | ++vcpu->stat.insn_emulation_fail; | 3883 | ++vcpu->stat.insn_emulation_fail; |
| 3884 | trace_kvm_emulate_insn_failed(vcpu); | ||
| 3431 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) | 3885 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) |
| 3432 | return EMULATE_DONE; | 3886 | return EMULATE_DONE; |
| 3433 | return EMULATE_FAIL; | 3887 | return EMULATE_FAIL; |
| @@ -3439,16 +3893,20 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 3439 | return EMULATE_DONE; | 3893 | return EMULATE_DONE; |
| 3440 | } | 3894 | } |
| 3441 | 3895 | ||
| 3896 | restart: | ||
| 3442 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); | 3897 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); |
| 3443 | shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; | 3898 | shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; |
| 3444 | 3899 | ||
| 3445 | if (r == 0) | 3900 | if (r == 0) |
| 3446 | kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); | 3901 | kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); |
| 3447 | 3902 | ||
| 3448 | if (vcpu->arch.pio.string) | 3903 | if (vcpu->arch.pio.count) { |
| 3904 | if (!vcpu->arch.pio.in) | ||
| 3905 | vcpu->arch.pio.count = 0; | ||
| 3449 | return EMULATE_DO_MMIO; | 3906 | return EMULATE_DO_MMIO; |
| 3907 | } | ||
| 3450 | 3908 | ||
| 3451 | if ((r || vcpu->mmio_is_write) && run) { | 3909 | if (r || vcpu->mmio_is_write) { |
| 3452 | run->exit_reason = KVM_EXIT_MMIO; | 3910 | run->exit_reason = KVM_EXIT_MMIO; |
| 3453 | run->mmio.phys_addr = vcpu->mmio_phys_addr; | 3911 | run->mmio.phys_addr = vcpu->mmio_phys_addr; |
| 3454 | memcpy(run->mmio.data, vcpu->mmio_data, 8); | 3912 | memcpy(run->mmio.data, vcpu->mmio_data, 8); |
| @@ -3458,222 +3916,41 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 3458 | 3916 | ||
| 3459 | if (r) { | 3917 | if (r) { |
| 3460 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) | 3918 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) |
| 3461 | return EMULATE_DONE; | 3919 | goto done; |
| 3462 | if (!vcpu->mmio_needed) { | 3920 | if (!vcpu->mmio_needed) { |
| 3921 | ++vcpu->stat.insn_emulation_fail; | ||
| 3922 | trace_kvm_emulate_insn_failed(vcpu); | ||
| 3463 | kvm_report_emulation_failure(vcpu, "mmio"); | 3923 | kvm_report_emulation_failure(vcpu, "mmio"); |
| 3464 | return EMULATE_FAIL; | 3924 | return EMULATE_FAIL; |
| 3465 | } | 3925 | } |
| 3466 | return EMULATE_DO_MMIO; | 3926 | return EMULATE_DO_MMIO; |
| 3467 | } | 3927 | } |
| 3468 | 3928 | ||
| 3469 | kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | ||
| 3470 | |||
| 3471 | if (vcpu->mmio_is_write) { | 3929 | if (vcpu->mmio_is_write) { |
| 3472 | vcpu->mmio_needed = 0; | 3930 | vcpu->mmio_needed = 0; |
| 3473 | return EMULATE_DO_MMIO; | 3931 | return EMULATE_DO_MMIO; |
| 3474 | } | 3932 | } |
| 3475 | 3933 | ||
| 3476 | return EMULATE_DONE; | 3934 | done: |
| 3477 | } | 3935 | if (vcpu->arch.exception.pending) |
| 3478 | EXPORT_SYMBOL_GPL(emulate_instruction); | 3936 | vcpu->arch.emulate_ctxt.restart = false; |
| 3479 | |||
| 3480 | static int pio_copy_data(struct kvm_vcpu *vcpu) | ||
| 3481 | { | ||
| 3482 | void *p = vcpu->arch.pio_data; | ||
| 3483 | gva_t q = vcpu->arch.pio.guest_gva; | ||
| 3484 | unsigned bytes; | ||
| 3485 | int ret; | ||
| 3486 | u32 error_code; | ||
| 3487 | |||
| 3488 | bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; | ||
| 3489 | if (vcpu->arch.pio.in) | ||
| 3490 | ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code); | ||
| 3491 | else | ||
| 3492 | ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code); | ||
| 3493 | |||
| 3494 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
| 3495 | kvm_inject_page_fault(vcpu, q, error_code); | ||
| 3496 | |||
| 3497 | return ret; | ||
| 3498 | } | ||
| 3499 | |||
| 3500 | int complete_pio(struct kvm_vcpu *vcpu) | ||
| 3501 | { | ||
| 3502 | struct kvm_pio_request *io = &vcpu->arch.pio; | ||
| 3503 | long delta; | ||
| 3504 | int r; | ||
| 3505 | unsigned long val; | ||
| 3506 | |||
| 3507 | if (!io->string) { | ||
| 3508 | if (io->in) { | ||
| 3509 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
| 3510 | memcpy(&val, vcpu->arch.pio_data, io->size); | ||
| 3511 | kvm_register_write(vcpu, VCPU_REGS_RAX, val); | ||
| 3512 | } | ||
| 3513 | } else { | ||
| 3514 | if (io->in) { | ||
| 3515 | r = pio_copy_data(vcpu); | ||
| 3516 | if (r) | ||
| 3517 | goto out; | ||
| 3518 | } | ||
| 3519 | |||
| 3520 | delta = 1; | ||
| 3521 | if (io->rep) { | ||
| 3522 | delta *= io->cur_count; | ||
| 3523 | /* | ||
| 3524 | * The size of the register should really depend on | ||
| 3525 | * current address size. | ||
| 3526 | */ | ||
| 3527 | val = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
| 3528 | val -= delta; | ||
| 3529 | kvm_register_write(vcpu, VCPU_REGS_RCX, val); | ||
| 3530 | } | ||
| 3531 | if (io->down) | ||
| 3532 | delta = -delta; | ||
| 3533 | delta *= io->size; | ||
| 3534 | if (io->in) { | ||
| 3535 | val = kvm_register_read(vcpu, VCPU_REGS_RDI); | ||
| 3536 | val += delta; | ||
| 3537 | kvm_register_write(vcpu, VCPU_REGS_RDI, val); | ||
| 3538 | } else { | ||
| 3539 | val = kvm_register_read(vcpu, VCPU_REGS_RSI); | ||
| 3540 | val += delta; | ||
| 3541 | kvm_register_write(vcpu, VCPU_REGS_RSI, val); | ||
| 3542 | } | ||
| 3543 | } | ||
| 3544 | out: | ||
| 3545 | io->count -= io->cur_count; | ||
| 3546 | io->cur_count = 0; | ||
| 3547 | |||
| 3548 | return 0; | ||
| 3549 | } | ||
| 3550 | |||
| 3551 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | ||
| 3552 | { | ||
| 3553 | /* TODO: String I/O for in kernel device */ | ||
| 3554 | int r; | ||
| 3555 | |||
| 3556 | if (vcpu->arch.pio.in) | ||
| 3557 | r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, | ||
| 3558 | vcpu->arch.pio.size, pd); | ||
| 3559 | else | ||
| 3560 | r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, | ||
| 3561 | vcpu->arch.pio.port, vcpu->arch.pio.size, | ||
| 3562 | pd); | ||
| 3563 | return r; | ||
| 3564 | } | ||
| 3565 | 3937 | ||
| 3566 | static int pio_string_write(struct kvm_vcpu *vcpu) | 3938 | if (vcpu->arch.emulate_ctxt.restart) |
| 3567 | { | 3939 | goto restart; |
| 3568 | struct kvm_pio_request *io = &vcpu->arch.pio; | ||
| 3569 | void *pd = vcpu->arch.pio_data; | ||
| 3570 | int i, r = 0; | ||
| 3571 | 3940 | ||
| 3572 | for (i = 0; i < io->cur_count; i++) { | 3941 | return EMULATE_DONE; |
| 3573 | if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, | ||
| 3574 | io->port, io->size, pd)) { | ||
| 3575 | r = -EOPNOTSUPP; | ||
| 3576 | break; | ||
| 3577 | } | ||
| 3578 | pd += io->size; | ||
| 3579 | } | ||
| 3580 | return r; | ||
| 3581 | } | ||
| 3582 | |||
| 3583 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port) | ||
| 3584 | { | ||
| 3585 | unsigned long val; | ||
| 3586 | |||
| 3587 | trace_kvm_pio(!in, port, size, 1); | ||
| 3588 | |||
| 3589 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
| 3590 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | ||
| 3591 | vcpu->run->io.size = vcpu->arch.pio.size = size; | ||
| 3592 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
| 3593 | vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = 1; | ||
| 3594 | vcpu->run->io.port = vcpu->arch.pio.port = port; | ||
| 3595 | vcpu->arch.pio.in = in; | ||
| 3596 | vcpu->arch.pio.string = 0; | ||
| 3597 | vcpu->arch.pio.down = 0; | ||
| 3598 | vcpu->arch.pio.rep = 0; | ||
| 3599 | |||
| 3600 | if (!vcpu->arch.pio.in) { | ||
| 3601 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
| 3602 | memcpy(vcpu->arch.pio_data, &val, 4); | ||
| 3603 | } | ||
| 3604 | |||
| 3605 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | ||
| 3606 | complete_pio(vcpu); | ||
| 3607 | return 1; | ||
| 3608 | } | ||
| 3609 | return 0; | ||
| 3610 | } | 3942 | } |
| 3611 | EXPORT_SYMBOL_GPL(kvm_emulate_pio); | 3943 | EXPORT_SYMBOL_GPL(emulate_instruction); |
| 3612 | 3944 | ||
| 3613 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, | 3945 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) |
| 3614 | int size, unsigned long count, int down, | ||
| 3615 | gva_t address, int rep, unsigned port) | ||
| 3616 | { | 3946 | { |
| 3617 | unsigned now, in_page; | 3947 | unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); |
| 3618 | int ret = 0; | 3948 | int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu); |
| 3619 | 3949 | /* do not return to emulator after return from userspace */ | |
| 3620 | trace_kvm_pio(!in, port, size, count); | 3950 | vcpu->arch.pio.count = 0; |
| 3621 | |||
| 3622 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
| 3623 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | ||
| 3624 | vcpu->run->io.size = vcpu->arch.pio.size = size; | ||
| 3625 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
| 3626 | vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = count; | ||
| 3627 | vcpu->run->io.port = vcpu->arch.pio.port = port; | ||
| 3628 | vcpu->arch.pio.in = in; | ||
| 3629 | vcpu->arch.pio.string = 1; | ||
| 3630 | vcpu->arch.pio.down = down; | ||
| 3631 | vcpu->arch.pio.rep = rep; | ||
| 3632 | |||
| 3633 | if (!count) { | ||
| 3634 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
| 3635 | return 1; | ||
| 3636 | } | ||
| 3637 | |||
| 3638 | if (!down) | ||
| 3639 | in_page = PAGE_SIZE - offset_in_page(address); | ||
| 3640 | else | ||
| 3641 | in_page = offset_in_page(address) + size; | ||
| 3642 | now = min(count, (unsigned long)in_page / size); | ||
| 3643 | if (!now) | ||
| 3644 | now = 1; | ||
| 3645 | if (down) { | ||
| 3646 | /* | ||
| 3647 | * String I/O in reverse. Yuck. Kill the guest, fix later. | ||
| 3648 | */ | ||
| 3649 | pr_unimpl(vcpu, "guest string pio down\n"); | ||
| 3650 | kvm_inject_gp(vcpu, 0); | ||
| 3651 | return 1; | ||
| 3652 | } | ||
| 3653 | vcpu->run->io.count = now; | ||
| 3654 | vcpu->arch.pio.cur_count = now; | ||
| 3655 | |||
| 3656 | if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count) | ||
| 3657 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
| 3658 | |||
| 3659 | vcpu->arch.pio.guest_gva = address; | ||
| 3660 | |||
| 3661 | if (!vcpu->arch.pio.in) { | ||
| 3662 | /* string PIO write */ | ||
| 3663 | ret = pio_copy_data(vcpu); | ||
| 3664 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
| 3665 | return 1; | ||
| 3666 | if (ret == 0 && !pio_string_write(vcpu)) { | ||
| 3667 | complete_pio(vcpu); | ||
| 3668 | if (vcpu->arch.pio.count == 0) | ||
| 3669 | ret = 1; | ||
| 3670 | } | ||
| 3671 | } | ||
| 3672 | /* no string PIO read support yet */ | ||
| 3673 | |||
| 3674 | return ret; | 3951 | return ret; |
| 3675 | } | 3952 | } |
| 3676 | EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); | 3953 | EXPORT_SYMBOL_GPL(kvm_fast_pio_out); |
| 3677 | 3954 | ||
| 3678 | static void bounce_off(void *info) | 3955 | static void bounce_off(void *info) |
| 3679 | { | 3956 | { |
| @@ -3996,85 +4273,20 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | |||
| 3996 | return emulator_write_emulated(rip, instruction, 3, vcpu); | 4273 | return emulator_write_emulated(rip, instruction, 3, vcpu); |
| 3997 | } | 4274 | } |
| 3998 | 4275 | ||
| 3999 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) | ||
| 4000 | { | ||
| 4001 | return (curr_cr & ~((1ULL << 32) - 1)) | new_val; | ||
| 4002 | } | ||
| 4003 | |||
| 4004 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | 4276 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) |
| 4005 | { | 4277 | { |
| 4006 | struct descriptor_table dt = { limit, base }; | 4278 | struct desc_ptr dt = { limit, base }; |
| 4007 | 4279 | ||
| 4008 | kvm_x86_ops->set_gdt(vcpu, &dt); | 4280 | kvm_x86_ops->set_gdt(vcpu, &dt); |
| 4009 | } | 4281 | } |
| 4010 | 4282 | ||
| 4011 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | 4283 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) |
| 4012 | { | 4284 | { |
| 4013 | struct descriptor_table dt = { limit, base }; | 4285 | struct desc_ptr dt = { limit, base }; |
| 4014 | 4286 | ||
| 4015 | kvm_x86_ops->set_idt(vcpu, &dt); | 4287 | kvm_x86_ops->set_idt(vcpu, &dt); |
| 4016 | } | 4288 | } |
| 4017 | 4289 | ||
| 4018 | void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, | ||
| 4019 | unsigned long *rflags) | ||
| 4020 | { | ||
| 4021 | kvm_lmsw(vcpu, msw); | ||
| 4022 | *rflags = kvm_get_rflags(vcpu); | ||
| 4023 | } | ||
| 4024 | |||
| 4025 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) | ||
| 4026 | { | ||
| 4027 | unsigned long value; | ||
| 4028 | |||
| 4029 | switch (cr) { | ||
| 4030 | case 0: | ||
| 4031 | value = kvm_read_cr0(vcpu); | ||
| 4032 | break; | ||
| 4033 | case 2: | ||
| 4034 | value = vcpu->arch.cr2; | ||
| 4035 | break; | ||
| 4036 | case 3: | ||
| 4037 | value = vcpu->arch.cr3; | ||
| 4038 | break; | ||
| 4039 | case 4: | ||
| 4040 | value = kvm_read_cr4(vcpu); | ||
| 4041 | break; | ||
| 4042 | case 8: | ||
| 4043 | value = kvm_get_cr8(vcpu); | ||
| 4044 | break; | ||
| 4045 | default: | ||
| 4046 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | ||
| 4047 | return 0; | ||
| 4048 | } | ||
| 4049 | |||
| 4050 | return value; | ||
| 4051 | } | ||
| 4052 | |||
| 4053 | void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, | ||
| 4054 | unsigned long *rflags) | ||
| 4055 | { | ||
| 4056 | switch (cr) { | ||
| 4057 | case 0: | ||
| 4058 | kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); | ||
| 4059 | *rflags = kvm_get_rflags(vcpu); | ||
| 4060 | break; | ||
| 4061 | case 2: | ||
| 4062 | vcpu->arch.cr2 = val; | ||
| 4063 | break; | ||
| 4064 | case 3: | ||
| 4065 | kvm_set_cr3(vcpu, val); | ||
| 4066 | break; | ||
| 4067 | case 4: | ||
| 4068 | kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); | ||
| 4069 | break; | ||
| 4070 | case 8: | ||
| 4071 | kvm_set_cr8(vcpu, val & 0xfUL); | ||
| 4072 | break; | ||
| 4073 | default: | ||
| 4074 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | ||
| 4075 | } | ||
| 4076 | } | ||
| 4077 | |||
| 4078 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) | 4290 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) |
| 4079 | { | 4291 | { |
| 4080 | struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; | 4292 | struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; |
| @@ -4138,9 +4350,13 @@ int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) | |||
| 4138 | { | 4350 | { |
| 4139 | struct kvm_cpuid_entry2 *best; | 4351 | struct kvm_cpuid_entry2 *best; |
| 4140 | 4352 | ||
| 4353 | best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0); | ||
| 4354 | if (!best || best->eax < 0x80000008) | ||
| 4355 | goto not_found; | ||
| 4141 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); | 4356 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); |
| 4142 | if (best) | 4357 | if (best) |
| 4143 | return best->eax & 0xff; | 4358 | return best->eax & 0xff; |
| 4359 | not_found: | ||
| 4144 | return 36; | 4360 | return 36; |
| 4145 | } | 4361 | } |
| 4146 | 4362 | ||
| @@ -4254,9 +4470,13 @@ static void inject_pending_event(struct kvm_vcpu *vcpu) | |||
| 4254 | { | 4470 | { |
| 4255 | /* try to reinject previous events if any */ | 4471 | /* try to reinject previous events if any */ |
| 4256 | if (vcpu->arch.exception.pending) { | 4472 | if (vcpu->arch.exception.pending) { |
| 4473 | trace_kvm_inj_exception(vcpu->arch.exception.nr, | ||
| 4474 | vcpu->arch.exception.has_error_code, | ||
| 4475 | vcpu->arch.exception.error_code); | ||
| 4257 | kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, | 4476 | kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, |
| 4258 | vcpu->arch.exception.has_error_code, | 4477 | vcpu->arch.exception.has_error_code, |
| 4259 | vcpu->arch.exception.error_code); | 4478 | vcpu->arch.exception.error_code, |
| 4479 | vcpu->arch.exception.reinject); | ||
| 4260 | return; | 4480 | return; |
| 4261 | } | 4481 | } |
| 4262 | 4482 | ||
| @@ -4486,7 +4706,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
| 4486 | } | 4706 | } |
| 4487 | 4707 | ||
| 4488 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); | 4708 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
| 4489 | post_kvm_run_save(vcpu); | ||
| 4490 | 4709 | ||
| 4491 | vapic_exit(vcpu); | 4710 | vapic_exit(vcpu); |
| 4492 | 4711 | ||
| @@ -4514,26 +4733,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 4514 | if (!irqchip_in_kernel(vcpu->kvm)) | 4733 | if (!irqchip_in_kernel(vcpu->kvm)) |
| 4515 | kvm_set_cr8(vcpu, kvm_run->cr8); | 4734 | kvm_set_cr8(vcpu, kvm_run->cr8); |
| 4516 | 4735 | ||
| 4517 | if (vcpu->arch.pio.cur_count) { | 4736 | if (vcpu->arch.pio.count || vcpu->mmio_needed || |
| 4518 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 4737 | vcpu->arch.emulate_ctxt.restart) { |
| 4519 | r = complete_pio(vcpu); | 4738 | if (vcpu->mmio_needed) { |
| 4520 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 4739 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); |
| 4521 | if (r) | 4740 | vcpu->mmio_read_completed = 1; |
| 4522 | goto out; | 4741 | vcpu->mmio_needed = 0; |
| 4523 | } | 4742 | } |
| 4524 | if (vcpu->mmio_needed) { | ||
| 4525 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); | ||
| 4526 | vcpu->mmio_read_completed = 1; | ||
| 4527 | vcpu->mmio_needed = 0; | ||
| 4528 | |||
| 4529 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 4743 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
| 4530 | r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, | 4744 | r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE); |
| 4531 | EMULTYPE_NO_DECODE); | ||
| 4532 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 4745 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
| 4533 | if (r == EMULATE_DO_MMIO) { | 4746 | if (r == EMULATE_DO_MMIO) { |
| 4534 | /* | ||
| 4535 | * Read-modify-write. Back to userspace. | ||
| 4536 | */ | ||
| 4537 | r = 0; | 4747 | r = 0; |
| 4538 | goto out; | 4748 | goto out; |
| 4539 | } | 4749 | } |
| @@ -4545,6 +4755,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 4545 | r = __vcpu_run(vcpu); | 4755 | r = __vcpu_run(vcpu); |
| 4546 | 4756 | ||
| 4547 | out: | 4757 | out: |
| 4758 | post_kvm_run_save(vcpu); | ||
| 4548 | if (vcpu->sigset_active) | 4759 | if (vcpu->sigset_active) |
| 4549 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 4760 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
| 4550 | 4761 | ||
| @@ -4616,12 +4827,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 4616 | return 0; | 4827 | return 0; |
| 4617 | } | 4828 | } |
| 4618 | 4829 | ||
| 4619 | void kvm_get_segment(struct kvm_vcpu *vcpu, | ||
| 4620 | struct kvm_segment *var, int seg) | ||
| 4621 | { | ||
| 4622 | kvm_x86_ops->get_segment(vcpu, var, seg); | ||
| 4623 | } | ||
| 4624 | |||
| 4625 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | 4830 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) |
| 4626 | { | 4831 | { |
| 4627 | struct kvm_segment cs; | 4832 | struct kvm_segment cs; |
| @@ -4635,7 +4840,7 @@ EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits); | |||
| 4635 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | 4840 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, |
| 4636 | struct kvm_sregs *sregs) | 4841 | struct kvm_sregs *sregs) |
| 4637 | { | 4842 | { |
| 4638 | struct descriptor_table dt; | 4843 | struct desc_ptr dt; |
| 4639 | 4844 | ||
| 4640 | vcpu_load(vcpu); | 4845 | vcpu_load(vcpu); |
| 4641 | 4846 | ||
| @@ -4650,11 +4855,11 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
| 4650 | kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); | 4855 | kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); |
| 4651 | 4856 | ||
| 4652 | kvm_x86_ops->get_idt(vcpu, &dt); | 4857 | kvm_x86_ops->get_idt(vcpu, &dt); |
| 4653 | sregs->idt.limit = dt.limit; | 4858 | sregs->idt.limit = dt.size; |
| 4654 | sregs->idt.base = dt.base; | 4859 | sregs->idt.base = dt.address; |
| 4655 | kvm_x86_ops->get_gdt(vcpu, &dt); | 4860 | kvm_x86_ops->get_gdt(vcpu, &dt); |
| 4656 | sregs->gdt.limit = dt.limit; | 4861 | sregs->gdt.limit = dt.size; |
| 4657 | sregs->gdt.base = dt.base; | 4862 | sregs->gdt.base = dt.address; |
| 4658 | 4863 | ||
| 4659 | sregs->cr0 = kvm_read_cr0(vcpu); | 4864 | sregs->cr0 = kvm_read_cr0(vcpu); |
| 4660 | sregs->cr2 = vcpu->arch.cr2; | 4865 | sregs->cr2 = vcpu->arch.cr2; |
| @@ -4693,563 +4898,33 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | |||
| 4693 | return 0; | 4898 | return 0; |
| 4694 | } | 4899 | } |
| 4695 | 4900 | ||
| 4696 | static void kvm_set_segment(struct kvm_vcpu *vcpu, | 4901 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, |
| 4697 | struct kvm_segment *var, int seg) | 4902 | bool has_error_code, u32 error_code) |
| 4698 | { | ||
| 4699 | kvm_x86_ops->set_segment(vcpu, var, seg); | ||
| 4700 | } | ||
| 4701 | |||
| 4702 | static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, | ||
| 4703 | struct kvm_segment *kvm_desct) | ||
| 4704 | { | ||
| 4705 | kvm_desct->base = get_desc_base(seg_desc); | ||
| 4706 | kvm_desct->limit = get_desc_limit(seg_desc); | ||
| 4707 | if (seg_desc->g) { | ||
| 4708 | kvm_desct->limit <<= 12; | ||
| 4709 | kvm_desct->limit |= 0xfff; | ||
| 4710 | } | ||
| 4711 | kvm_desct->selector = selector; | ||
| 4712 | kvm_desct->type = seg_desc->type; | ||
| 4713 | kvm_desct->present = seg_desc->p; | ||
| 4714 | kvm_desct->dpl = seg_desc->dpl; | ||
| 4715 | kvm_desct->db = seg_desc->d; | ||
| 4716 | kvm_desct->s = seg_desc->s; | ||
| 4717 | kvm_desct->l = seg_desc->l; | ||
| 4718 | kvm_desct->g = seg_desc->g; | ||
| 4719 | kvm_desct->avl = seg_desc->avl; | ||
| 4720 | if (!selector) | ||
| 4721 | kvm_desct->unusable = 1; | ||
| 4722 | else | ||
| 4723 | kvm_desct->unusable = 0; | ||
| 4724 | kvm_desct->padding = 0; | ||
| 4725 | } | ||
| 4726 | |||
| 4727 | static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu, | ||
| 4728 | u16 selector, | ||
| 4729 | struct descriptor_table *dtable) | ||
| 4730 | { | ||
| 4731 | if (selector & 1 << 2) { | ||
| 4732 | struct kvm_segment kvm_seg; | ||
| 4733 | |||
| 4734 | kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR); | ||
| 4735 | |||
| 4736 | if (kvm_seg.unusable) | ||
| 4737 | dtable->limit = 0; | ||
| 4738 | else | ||
| 4739 | dtable->limit = kvm_seg.limit; | ||
| 4740 | dtable->base = kvm_seg.base; | ||
| 4741 | } | ||
| 4742 | else | ||
| 4743 | kvm_x86_ops->get_gdt(vcpu, dtable); | ||
| 4744 | } | ||
| 4745 | |||
| 4746 | /* allowed just for 8 bytes segments */ | ||
| 4747 | static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | ||
| 4748 | struct desc_struct *seg_desc) | ||
| 4749 | { | ||
| 4750 | struct descriptor_table dtable; | ||
| 4751 | u16 index = selector >> 3; | ||
| 4752 | int ret; | ||
| 4753 | u32 err; | ||
| 4754 | gva_t addr; | ||
| 4755 | |||
| 4756 | get_segment_descriptor_dtable(vcpu, selector, &dtable); | ||
| 4757 | |||
| 4758 | if (dtable.limit < index * 8 + 7) { | ||
| 4759 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); | ||
| 4760 | return X86EMUL_PROPAGATE_FAULT; | ||
| 4761 | } | ||
| 4762 | addr = dtable.base + index * 8; | ||
| 4763 | ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc), | ||
| 4764 | vcpu, &err); | ||
| 4765 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
| 4766 | kvm_inject_page_fault(vcpu, addr, err); | ||
| 4767 | |||
| 4768 | return ret; | ||
| 4769 | } | ||
| 4770 | |||
| 4771 | /* allowed just for 8 bytes segments */ | ||
| 4772 | static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | ||
| 4773 | struct desc_struct *seg_desc) | ||
| 4774 | { | ||
| 4775 | struct descriptor_table dtable; | ||
| 4776 | u16 index = selector >> 3; | ||
| 4777 | |||
| 4778 | get_segment_descriptor_dtable(vcpu, selector, &dtable); | ||
| 4779 | |||
| 4780 | if (dtable.limit < index * 8 + 7) | ||
| 4781 | return 1; | ||
| 4782 | return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL); | ||
| 4783 | } | ||
| 4784 | |||
| 4785 | static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu, | ||
| 4786 | struct desc_struct *seg_desc) | ||
| 4787 | { | ||
| 4788 | u32 base_addr = get_desc_base(seg_desc); | ||
| 4789 | |||
| 4790 | return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL); | ||
| 4791 | } | ||
| 4792 | |||
| 4793 | static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu, | ||
| 4794 | struct desc_struct *seg_desc) | ||
| 4795 | { | ||
| 4796 | u32 base_addr = get_desc_base(seg_desc); | ||
| 4797 | |||
| 4798 | return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL); | ||
| 4799 | } | ||
| 4800 | |||
| 4801 | static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) | ||
| 4802 | { | ||
| 4803 | struct kvm_segment kvm_seg; | ||
| 4804 | |||
| 4805 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
| 4806 | return kvm_seg.selector; | ||
| 4807 | } | ||
| 4808 | |||
| 4809 | static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) | ||
| 4810 | { | ||
| 4811 | struct kvm_segment segvar = { | ||
| 4812 | .base = selector << 4, | ||
| 4813 | .limit = 0xffff, | ||
| 4814 | .selector = selector, | ||
| 4815 | .type = 3, | ||
| 4816 | .present = 1, | ||
| 4817 | .dpl = 3, | ||
| 4818 | .db = 0, | ||
| 4819 | .s = 1, | ||
| 4820 | .l = 0, | ||
| 4821 | .g = 0, | ||
| 4822 | .avl = 0, | ||
| 4823 | .unusable = 0, | ||
| 4824 | }; | ||
| 4825 | kvm_x86_ops->set_segment(vcpu, &segvar, seg); | ||
| 4826 | return X86EMUL_CONTINUE; | ||
| 4827 | } | ||
| 4828 | |||
| 4829 | static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) | ||
| 4830 | { | 4903 | { |
| 4831 | return (seg != VCPU_SREG_LDTR) && | 4904 | int cs_db, cs_l, ret; |
| 4832 | (seg != VCPU_SREG_TR) && | 4905 | cache_all_regs(vcpu); |
| 4833 | (kvm_get_rflags(vcpu) & X86_EFLAGS_VM); | ||
| 4834 | } | ||
| 4835 | |||
| 4836 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg) | ||
| 4837 | { | ||
| 4838 | struct kvm_segment kvm_seg; | ||
| 4839 | struct desc_struct seg_desc; | ||
| 4840 | u8 dpl, rpl, cpl; | ||
| 4841 | unsigned err_vec = GP_VECTOR; | ||
| 4842 | u32 err_code = 0; | ||
| 4843 | bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ | ||
| 4844 | int ret; | ||
| 4845 | 4906 | ||
| 4846 | if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu)) | 4907 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
| 4847 | return kvm_load_realmode_segment(vcpu, selector, seg); | ||
| 4848 | 4908 | ||
| 4849 | /* NULL selector is not valid for TR, CS and SS */ | 4909 | vcpu->arch.emulate_ctxt.vcpu = vcpu; |
| 4850 | if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) | 4910 | vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); |
| 4851 | && null_selector) | 4911 | vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); |
| 4852 | goto exception; | 4912 | vcpu->arch.emulate_ctxt.mode = |
| 4913 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : | ||
| 4914 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) | ||
| 4915 | ? X86EMUL_MODE_VM86 : cs_l | ||
| 4916 | ? X86EMUL_MODE_PROT64 : cs_db | ||
| 4917 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | ||
| 4853 | 4918 | ||
| 4854 | /* TR should be in GDT only */ | 4919 | ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops, |
| 4855 | if (seg == VCPU_SREG_TR && (selector & (1 << 2))) | 4920 | tss_selector, reason, has_error_code, |
| 4856 | goto exception; | 4921 | error_code); |
| 4857 | 4922 | ||
| 4858 | ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc); | ||
| 4859 | if (ret) | 4923 | if (ret) |
| 4860 | return ret; | 4924 | return EMULATE_FAIL; |
| 4861 | |||
| 4862 | seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg); | ||
| 4863 | |||
| 4864 | if (null_selector) { /* for NULL selector skip all following checks */ | ||
| 4865 | kvm_seg.unusable = 1; | ||
| 4866 | goto load; | ||
| 4867 | } | ||
| 4868 | |||
| 4869 | err_code = selector & 0xfffc; | ||
| 4870 | err_vec = GP_VECTOR; | ||
| 4871 | |||
| 4872 | /* can't load system descriptor into segment selecor */ | ||
| 4873 | if (seg <= VCPU_SREG_GS && !kvm_seg.s) | ||
| 4874 | goto exception; | ||
| 4875 | |||
| 4876 | if (!kvm_seg.present) { | ||
| 4877 | err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; | ||
| 4878 | goto exception; | ||
| 4879 | } | ||
| 4880 | |||
| 4881 | rpl = selector & 3; | ||
| 4882 | dpl = kvm_seg.dpl; | ||
| 4883 | cpl = kvm_x86_ops->get_cpl(vcpu); | ||
| 4884 | |||
| 4885 | switch (seg) { | ||
| 4886 | case VCPU_SREG_SS: | ||
| 4887 | /* | ||
| 4888 | * segment is not a writable data segment or segment | ||
| 4889 | * selector's RPL != CPL or segment selector's RPL != CPL | ||
| 4890 | */ | ||
| 4891 | if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl) | ||
| 4892 | goto exception; | ||
| 4893 | break; | ||
| 4894 | case VCPU_SREG_CS: | ||
| 4895 | if (!(kvm_seg.type & 8)) | ||
| 4896 | goto exception; | ||
| 4897 | |||
| 4898 | if (kvm_seg.type & 4) { | ||
| 4899 | /* conforming */ | ||
| 4900 | if (dpl > cpl) | ||
| 4901 | goto exception; | ||
| 4902 | } else { | ||
| 4903 | /* nonconforming */ | ||
| 4904 | if (rpl > cpl || dpl != cpl) | ||
| 4905 | goto exception; | ||
| 4906 | } | ||
| 4907 | /* CS(RPL) <- CPL */ | ||
| 4908 | selector = (selector & 0xfffc) | cpl; | ||
| 4909 | break; | ||
| 4910 | case VCPU_SREG_TR: | ||
| 4911 | if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9)) | ||
| 4912 | goto exception; | ||
| 4913 | break; | ||
| 4914 | case VCPU_SREG_LDTR: | ||
| 4915 | if (kvm_seg.s || kvm_seg.type != 2) | ||
| 4916 | goto exception; | ||
| 4917 | break; | ||
| 4918 | default: /* DS, ES, FS, or GS */ | ||
| 4919 | /* | ||
| 4920 | * segment is not a data or readable code segment or | ||
| 4921 | * ((segment is a data or nonconforming code segment) | ||
| 4922 | * and (both RPL and CPL > DPL)) | ||
| 4923 | */ | ||
| 4924 | if ((kvm_seg.type & 0xa) == 0x8 || | ||
| 4925 | (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl))) | ||
| 4926 | goto exception; | ||
| 4927 | break; | ||
| 4928 | } | ||
| 4929 | |||
| 4930 | if (!kvm_seg.unusable && kvm_seg.s) { | ||
| 4931 | /* mark segment as accessed */ | ||
| 4932 | kvm_seg.type |= 1; | ||
| 4933 | seg_desc.type |= 1; | ||
| 4934 | save_guest_segment_descriptor(vcpu, selector, &seg_desc); | ||
| 4935 | } | ||
| 4936 | load: | ||
| 4937 | kvm_set_segment(vcpu, &kvm_seg, seg); | ||
| 4938 | return X86EMUL_CONTINUE; | ||
| 4939 | exception: | ||
| 4940 | kvm_queue_exception_e(vcpu, err_vec, err_code); | ||
| 4941 | return X86EMUL_PROPAGATE_FAULT; | ||
| 4942 | } | ||
| 4943 | |||
| 4944 | static void save_state_to_tss32(struct kvm_vcpu *vcpu, | ||
| 4945 | struct tss_segment_32 *tss) | ||
| 4946 | { | ||
| 4947 | tss->cr3 = vcpu->arch.cr3; | ||
| 4948 | tss->eip = kvm_rip_read(vcpu); | ||
| 4949 | tss->eflags = kvm_get_rflags(vcpu); | ||
| 4950 | tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
| 4951 | tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
| 4952 | tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX); | ||
| 4953 | tss->ebx = kvm_register_read(vcpu, VCPU_REGS_RBX); | ||
| 4954 | tss->esp = kvm_register_read(vcpu, VCPU_REGS_RSP); | ||
| 4955 | tss->ebp = kvm_register_read(vcpu, VCPU_REGS_RBP); | ||
| 4956 | tss->esi = kvm_register_read(vcpu, VCPU_REGS_RSI); | ||
| 4957 | tss->edi = kvm_register_read(vcpu, VCPU_REGS_RDI); | ||
| 4958 | tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); | ||
| 4959 | tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); | ||
| 4960 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); | ||
| 4961 | tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); | ||
| 4962 | tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS); | ||
| 4963 | tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS); | ||
| 4964 | tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); | ||
| 4965 | } | ||
| 4966 | |||
| 4967 | static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg) | ||
| 4968 | { | ||
| 4969 | struct kvm_segment kvm_seg; | ||
| 4970 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
| 4971 | kvm_seg.selector = sel; | ||
| 4972 | kvm_set_segment(vcpu, &kvm_seg, seg); | ||
| 4973 | } | ||
| 4974 | |||
| 4975 | static int load_state_from_tss32(struct kvm_vcpu *vcpu, | ||
| 4976 | struct tss_segment_32 *tss) | ||
| 4977 | { | ||
| 4978 | kvm_set_cr3(vcpu, tss->cr3); | ||
| 4979 | |||
| 4980 | kvm_rip_write(vcpu, tss->eip); | ||
| 4981 | kvm_set_rflags(vcpu, tss->eflags | 2); | ||
| 4982 | |||
| 4983 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax); | ||
| 4984 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx); | ||
| 4985 | kvm_register_write(vcpu, VCPU_REGS_RDX, tss->edx); | ||
| 4986 | kvm_register_write(vcpu, VCPU_REGS_RBX, tss->ebx); | ||
| 4987 | kvm_register_write(vcpu, VCPU_REGS_RSP, tss->esp); | ||
| 4988 | kvm_register_write(vcpu, VCPU_REGS_RBP, tss->ebp); | ||
| 4989 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi); | ||
| 4990 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi); | ||
| 4991 | |||
| 4992 | /* | ||
| 4993 | * SDM says that segment selectors are loaded before segment | ||
| 4994 | * descriptors | ||
| 4995 | */ | ||
| 4996 | kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR); | ||
| 4997 | kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); | ||
| 4998 | kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); | ||
| 4999 | kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); | ||
| 5000 | kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); | ||
| 5001 | kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS); | ||
| 5002 | kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS); | ||
| 5003 | |||
| 5004 | /* | ||
| 5005 | * Now load segment descriptors. If fault happenes at this stage | ||
| 5006 | * it is handled in a context of new task | ||
| 5007 | */ | ||
| 5008 | if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR)) | ||
| 5009 | return 1; | ||
| 5010 | |||
| 5011 | if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) | ||
| 5012 | return 1; | ||
| 5013 | 4925 | ||
| 5014 | if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) | 4926 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); |
| 5015 | return 1; | 4927 | return EMULATE_DONE; |
| 5016 | |||
| 5017 | if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) | ||
| 5018 | return 1; | ||
| 5019 | |||
| 5020 | if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) | ||
| 5021 | return 1; | ||
| 5022 | |||
| 5023 | if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS)) | ||
| 5024 | return 1; | ||
| 5025 | |||
| 5026 | if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS)) | ||
| 5027 | return 1; | ||
| 5028 | return 0; | ||
| 5029 | } | ||
| 5030 | |||
| 5031 | static void save_state_to_tss16(struct kvm_vcpu *vcpu, | ||
| 5032 | struct tss_segment_16 *tss) | ||
| 5033 | { | ||
| 5034 | tss->ip = kvm_rip_read(vcpu); | ||
| 5035 | tss->flag = kvm_get_rflags(vcpu); | ||
| 5036 | tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
| 5037 | tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
| 5038 | tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX); | ||
| 5039 | tss->bx = kvm_register_read(vcpu, VCPU_REGS_RBX); | ||
| 5040 | tss->sp = kvm_register_read(vcpu, VCPU_REGS_RSP); | ||
| 5041 | tss->bp = kvm_register_read(vcpu, VCPU_REGS_RBP); | ||
| 5042 | tss->si = kvm_register_read(vcpu, VCPU_REGS_RSI); | ||
| 5043 | tss->di = kvm_register_read(vcpu, VCPU_REGS_RDI); | ||
| 5044 | |||
| 5045 | tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); | ||
| 5046 | tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); | ||
| 5047 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); | ||
| 5048 | tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); | ||
| 5049 | tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR); | ||
| 5050 | } | ||
| 5051 | |||
| 5052 | static int load_state_from_tss16(struct kvm_vcpu *vcpu, | ||
| 5053 | struct tss_segment_16 *tss) | ||
| 5054 | { | ||
| 5055 | kvm_rip_write(vcpu, tss->ip); | ||
| 5056 | kvm_set_rflags(vcpu, tss->flag | 2); | ||
| 5057 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax); | ||
| 5058 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx); | ||
| 5059 | kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx); | ||
| 5060 | kvm_register_write(vcpu, VCPU_REGS_RBX, tss->bx); | ||
| 5061 | kvm_register_write(vcpu, VCPU_REGS_RSP, tss->sp); | ||
| 5062 | kvm_register_write(vcpu, VCPU_REGS_RBP, tss->bp); | ||
| 5063 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si); | ||
| 5064 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di); | ||
| 5065 | |||
| 5066 | /* | ||
| 5067 | * SDM says that segment selectors are loaded before segment | ||
| 5068 | * descriptors | ||
| 5069 | */ | ||
| 5070 | kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR); | ||
| 5071 | kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); | ||
| 5072 | kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); | ||
| 5073 | kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); | ||
| 5074 | kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); | ||
| 5075 | |||
| 5076 | /* | ||
| 5077 | * Now load segment descriptors. If fault happenes at this stage | ||
| 5078 | * it is handled in a context of new task | ||
| 5079 | */ | ||
| 5080 | if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR)) | ||
| 5081 | return 1; | ||
| 5082 | |||
| 5083 | if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) | ||
| 5084 | return 1; | ||
| 5085 | |||
| 5086 | if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) | ||
| 5087 | return 1; | ||
| 5088 | |||
| 5089 | if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) | ||
| 5090 | return 1; | ||
| 5091 | |||
| 5092 | if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) | ||
| 5093 | return 1; | ||
| 5094 | return 0; | ||
| 5095 | } | ||
| 5096 | |||
| 5097 | static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, | ||
| 5098 | u16 old_tss_sel, u32 old_tss_base, | ||
| 5099 | struct desc_struct *nseg_desc) | ||
| 5100 | { | ||
| 5101 | struct tss_segment_16 tss_segment_16; | ||
| 5102 | int ret = 0; | ||
| 5103 | |||
| 5104 | if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16, | ||
| 5105 | sizeof tss_segment_16)) | ||
| 5106 | goto out; | ||
| 5107 | |||
| 5108 | save_state_to_tss16(vcpu, &tss_segment_16); | ||
| 5109 | |||
| 5110 | if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16, | ||
| 5111 | sizeof tss_segment_16)) | ||
| 5112 | goto out; | ||
| 5113 | |||
| 5114 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), | ||
| 5115 | &tss_segment_16, sizeof tss_segment_16)) | ||
| 5116 | goto out; | ||
| 5117 | |||
| 5118 | if (old_tss_sel != 0xffff) { | ||
| 5119 | tss_segment_16.prev_task_link = old_tss_sel; | ||
| 5120 | |||
| 5121 | if (kvm_write_guest(vcpu->kvm, | ||
| 5122 | get_tss_base_addr_write(vcpu, nseg_desc), | ||
| 5123 | &tss_segment_16.prev_task_link, | ||
| 5124 | sizeof tss_segment_16.prev_task_link)) | ||
| 5125 | goto out; | ||
| 5126 | } | ||
| 5127 | |||
| 5128 | if (load_state_from_tss16(vcpu, &tss_segment_16)) | ||
| 5129 | goto out; | ||
| 5130 | |||
| 5131 | ret = 1; | ||
| 5132 | out: | ||
| 5133 | return ret; | ||
| 5134 | } | ||
| 5135 | |||
| 5136 | static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, | ||
| 5137 | u16 old_tss_sel, u32 old_tss_base, | ||
| 5138 | struct desc_struct *nseg_desc) | ||
| 5139 | { | ||
| 5140 | struct tss_segment_32 tss_segment_32; | ||
| 5141 | int ret = 0; | ||
| 5142 | |||
| 5143 | if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32, | ||
| 5144 | sizeof tss_segment_32)) | ||
| 5145 | goto out; | ||
| 5146 | |||
| 5147 | save_state_to_tss32(vcpu, &tss_segment_32); | ||
| 5148 | |||
| 5149 | if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32, | ||
| 5150 | sizeof tss_segment_32)) | ||
| 5151 | goto out; | ||
| 5152 | |||
| 5153 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), | ||
| 5154 | &tss_segment_32, sizeof tss_segment_32)) | ||
| 5155 | goto out; | ||
| 5156 | |||
| 5157 | if (old_tss_sel != 0xffff) { | ||
| 5158 | tss_segment_32.prev_task_link = old_tss_sel; | ||
| 5159 | |||
| 5160 | if (kvm_write_guest(vcpu->kvm, | ||
| 5161 | get_tss_base_addr_write(vcpu, nseg_desc), | ||
| 5162 | &tss_segment_32.prev_task_link, | ||
| 5163 | sizeof tss_segment_32.prev_task_link)) | ||
| 5164 | goto out; | ||
| 5165 | } | ||
| 5166 | |||
| 5167 | if (load_state_from_tss32(vcpu, &tss_segment_32)) | ||
| 5168 | goto out; | ||
| 5169 | |||
| 5170 | ret = 1; | ||
| 5171 | out: | ||
| 5172 | return ret; | ||
| 5173 | } | ||
| 5174 | |||
| 5175 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | ||
| 5176 | { | ||
| 5177 | struct kvm_segment tr_seg; | ||
| 5178 | struct desc_struct cseg_desc; | ||
| 5179 | struct desc_struct nseg_desc; | ||
| 5180 | int ret = 0; | ||
| 5181 | u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); | ||
| 5182 | u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); | ||
| 5183 | u32 desc_limit; | ||
| 5184 | |||
| 5185 | old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL); | ||
| 5186 | |||
| 5187 | /* FIXME: Handle errors. Failure to read either TSS or their | ||
| 5188 | * descriptors should generate a pagefault. | ||
| 5189 | */ | ||
| 5190 | if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) | ||
| 5191 | goto out; | ||
| 5192 | |||
| 5193 | if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc)) | ||
| 5194 | goto out; | ||
| 5195 | |||
| 5196 | if (reason != TASK_SWITCH_IRET) { | ||
| 5197 | int cpl; | ||
| 5198 | |||
| 5199 | cpl = kvm_x86_ops->get_cpl(vcpu); | ||
| 5200 | if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) { | ||
| 5201 | kvm_queue_exception_e(vcpu, GP_VECTOR, 0); | ||
| 5202 | return 1; | ||
| 5203 | } | ||
| 5204 | } | ||
| 5205 | |||
| 5206 | desc_limit = get_desc_limit(&nseg_desc); | ||
| 5207 | if (!nseg_desc.p || | ||
| 5208 | ((desc_limit < 0x67 && (nseg_desc.type & 8)) || | ||
| 5209 | desc_limit < 0x2b)) { | ||
| 5210 | kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); | ||
| 5211 | return 1; | ||
| 5212 | } | ||
| 5213 | |||
| 5214 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { | ||
| 5215 | cseg_desc.type &= ~(1 << 1); //clear the B flag | ||
| 5216 | save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc); | ||
| 5217 | } | ||
| 5218 | |||
| 5219 | if (reason == TASK_SWITCH_IRET) { | ||
| 5220 | u32 eflags = kvm_get_rflags(vcpu); | ||
| 5221 | kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); | ||
| 5222 | } | ||
| 5223 | |||
| 5224 | /* set back link to prev task only if NT bit is set in eflags | ||
| 5225 | note that old_tss_sel is not used afetr this point */ | ||
| 5226 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | ||
| 5227 | old_tss_sel = 0xffff; | ||
| 5228 | |||
| 5229 | if (nseg_desc.type & 8) | ||
| 5230 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, | ||
| 5231 | old_tss_base, &nseg_desc); | ||
| 5232 | else | ||
| 5233 | ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel, | ||
| 5234 | old_tss_base, &nseg_desc); | ||
| 5235 | |||
| 5236 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { | ||
| 5237 | u32 eflags = kvm_get_rflags(vcpu); | ||
| 5238 | kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT); | ||
| 5239 | } | ||
| 5240 | |||
| 5241 | if (reason != TASK_SWITCH_IRET) { | ||
| 5242 | nseg_desc.type |= (1 << 1); | ||
| 5243 | save_guest_segment_descriptor(vcpu, tss_selector, | ||
| 5244 | &nseg_desc); | ||
| 5245 | } | ||
| 5246 | |||
| 5247 | kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS); | ||
| 5248 | seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); | ||
| 5249 | tr_seg.type = 11; | ||
| 5250 | kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); | ||
| 5251 | out: | ||
| 5252 | return ret; | ||
| 5253 | } | 4928 | } |
| 5254 | EXPORT_SYMBOL_GPL(kvm_task_switch); | 4929 | EXPORT_SYMBOL_GPL(kvm_task_switch); |
| 5255 | 4930 | ||
| @@ -5258,15 +4933,15 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 5258 | { | 4933 | { |
| 5259 | int mmu_reset_needed = 0; | 4934 | int mmu_reset_needed = 0; |
| 5260 | int pending_vec, max_bits; | 4935 | int pending_vec, max_bits; |
| 5261 | struct descriptor_table dt; | 4936 | struct desc_ptr dt; |
| 5262 | 4937 | ||
| 5263 | vcpu_load(vcpu); | 4938 | vcpu_load(vcpu); |
| 5264 | 4939 | ||
| 5265 | dt.limit = sregs->idt.limit; | 4940 | dt.size = sregs->idt.limit; |
| 5266 | dt.base = sregs->idt.base; | 4941 | dt.address = sregs->idt.base; |
| 5267 | kvm_x86_ops->set_idt(vcpu, &dt); | 4942 | kvm_x86_ops->set_idt(vcpu, &dt); |
| 5268 | dt.limit = sregs->gdt.limit; | 4943 | dt.size = sregs->gdt.limit; |
| 5269 | dt.base = sregs->gdt.base; | 4944 | dt.address = sregs->gdt.base; |
| 5270 | kvm_x86_ops->set_gdt(vcpu, &dt); | 4945 | kvm_x86_ops->set_gdt(vcpu, &dt); |
| 5271 | 4946 | ||
| 5272 | vcpu->arch.cr2 = sregs->cr2; | 4947 | vcpu->arch.cr2 = sregs->cr2; |
| @@ -5365,11 +5040,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
| 5365 | vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); | 5040 | vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); |
| 5366 | } | 5041 | } |
| 5367 | 5042 | ||
| 5368 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { | 5043 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) |
| 5369 | vcpu->arch.singlestep_cs = | 5044 | vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) + |
| 5370 | get_segment_selector(vcpu, VCPU_SREG_CS); | 5045 | get_segment_base(vcpu, VCPU_SREG_CS); |
| 5371 | vcpu->arch.singlestep_rip = kvm_rip_read(vcpu); | ||
| 5372 | } | ||
| 5373 | 5046 | ||
| 5374 | /* | 5047 | /* |
| 5375 | * Trigger an rflags update that will inject or remove the trace | 5048 | * Trigger an rflags update that will inject or remove the trace |
| @@ -5860,13 +5533,22 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) | |||
| 5860 | return kvm_x86_ops->interrupt_allowed(vcpu); | 5533 | return kvm_x86_ops->interrupt_allowed(vcpu); |
| 5861 | } | 5534 | } |
| 5862 | 5535 | ||
| 5536 | bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip) | ||
| 5537 | { | ||
| 5538 | unsigned long current_rip = kvm_rip_read(vcpu) + | ||
| 5539 | get_segment_base(vcpu, VCPU_SREG_CS); | ||
| 5540 | |||
| 5541 | return current_rip == linear_rip; | ||
| 5542 | } | ||
| 5543 | EXPORT_SYMBOL_GPL(kvm_is_linear_rip); | ||
| 5544 | |||
| 5863 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) | 5545 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) |
| 5864 | { | 5546 | { |
| 5865 | unsigned long rflags; | 5547 | unsigned long rflags; |
| 5866 | 5548 | ||
| 5867 | rflags = kvm_x86_ops->get_rflags(vcpu); | 5549 | rflags = kvm_x86_ops->get_rflags(vcpu); |
| 5868 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | 5550 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) |
| 5869 | rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF); | 5551 | rflags &= ~X86_EFLAGS_TF; |
| 5870 | return rflags; | 5552 | return rflags; |
| 5871 | } | 5553 | } |
| 5872 | EXPORT_SYMBOL_GPL(kvm_get_rflags); | 5554 | EXPORT_SYMBOL_GPL(kvm_get_rflags); |
| @@ -5874,10 +5556,8 @@ EXPORT_SYMBOL_GPL(kvm_get_rflags); | |||
| 5874 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | 5556 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) |
| 5875 | { | 5557 | { |
| 5876 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && | 5558 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && |
| 5877 | vcpu->arch.singlestep_cs == | 5559 | kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip)) |
| 5878 | get_segment_selector(vcpu, VCPU_SREG_CS) && | 5560 | rflags |= X86_EFLAGS_TF; |
| 5879 | vcpu->arch.singlestep_rip == kvm_rip_read(vcpu)) | ||
| 5880 | rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; | ||
| 5881 | kvm_x86_ops->set_rflags(vcpu, rflags); | 5561 | kvm_x86_ops->set_rflags(vcpu, rflags); |
| 5882 | } | 5562 | } |
| 5883 | EXPORT_SYMBOL_GPL(kvm_set_rflags); | 5563 | EXPORT_SYMBOL_GPL(kvm_set_rflags); |
| @@ -5893,3 +5573,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject); | |||
| 5893 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); | 5573 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); |
| 5894 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); | 5574 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); |
| 5895 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); | 5575 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); |
| 5576 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts); | ||
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index b7a404722d2b..f4b54458285b 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
| @@ -65,6 +65,13 @@ static inline int is_paging(struct kvm_vcpu *vcpu) | |||
| 65 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); | 65 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); |
| 66 | } | 66 | } |
| 67 | 67 | ||
| 68 | static inline struct kvm_mem_aliases *kvm_aliases(struct kvm *kvm) | ||
| 69 | { | ||
| 70 | return rcu_dereference_check(kvm->arch.aliases, | ||
| 71 | srcu_read_lock_held(&kvm->srcu) | ||
| 72 | || lockdep_is_held(&kvm->slots_lock)); | ||
| 73 | } | ||
| 74 | |||
| 68 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); | 75 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); |
| 69 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); | 76 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); |
| 70 | 77 | ||
diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 60df9c84ecae..23ea02253900 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h | |||
| @@ -160,6 +160,7 @@ struct kvm_pit_config { | |||
| 160 | #define KVM_EXIT_DCR 15 | 160 | #define KVM_EXIT_DCR 15 |
| 161 | #define KVM_EXIT_NMI 16 | 161 | #define KVM_EXIT_NMI 16 |
| 162 | #define KVM_EXIT_INTERNAL_ERROR 17 | 162 | #define KVM_EXIT_INTERNAL_ERROR 17 |
| 163 | #define KVM_EXIT_OSI 18 | ||
| 163 | 164 | ||
| 164 | /* For KVM_EXIT_INTERNAL_ERROR */ | 165 | /* For KVM_EXIT_INTERNAL_ERROR */ |
| 165 | #define KVM_INTERNAL_ERROR_EMULATION 1 | 166 | #define KVM_INTERNAL_ERROR_EMULATION 1 |
| @@ -259,6 +260,10 @@ struct kvm_run { | |||
| 259 | __u32 ndata; | 260 | __u32 ndata; |
| 260 | __u64 data[16]; | 261 | __u64 data[16]; |
| 261 | } internal; | 262 | } internal; |
| 263 | /* KVM_EXIT_OSI */ | ||
| 264 | struct { | ||
| 265 | __u64 gprs[32]; | ||
| 266 | } osi; | ||
| 262 | /* Fix the size of the union. */ | 267 | /* Fix the size of the union. */ |
| 263 | char padding[256]; | 268 | char padding[256]; |
| 264 | }; | 269 | }; |
| @@ -400,6 +405,15 @@ struct kvm_ioeventfd { | |||
| 400 | __u8 pad[36]; | 405 | __u8 pad[36]; |
| 401 | }; | 406 | }; |
| 402 | 407 | ||
| 408 | /* for KVM_ENABLE_CAP */ | ||
| 409 | struct kvm_enable_cap { | ||
| 410 | /* in */ | ||
| 411 | __u32 cap; | ||
| 412 | __u32 flags; | ||
| 413 | __u64 args[4]; | ||
| 414 | __u8 pad[64]; | ||
| 415 | }; | ||
| 416 | |||
| 403 | #define KVMIO 0xAE | 417 | #define KVMIO 0xAE |
| 404 | 418 | ||
| 405 | /* | 419 | /* |
| @@ -501,7 +515,15 @@ struct kvm_ioeventfd { | |||
| 501 | #define KVM_CAP_HYPERV_VAPIC 45 | 515 | #define KVM_CAP_HYPERV_VAPIC 45 |
| 502 | #define KVM_CAP_HYPERV_SPIN 46 | 516 | #define KVM_CAP_HYPERV_SPIN 46 |
| 503 | #define KVM_CAP_PCI_SEGMENT 47 | 517 | #define KVM_CAP_PCI_SEGMENT 47 |
| 518 | #define KVM_CAP_PPC_PAIRED_SINGLES 48 | ||
| 519 | #define KVM_CAP_INTR_SHADOW 49 | ||
| 520 | #ifdef __KVM_HAVE_DEBUGREGS | ||
| 521 | #define KVM_CAP_DEBUGREGS 50 | ||
| 522 | #endif | ||
| 504 | #define KVM_CAP_X86_ROBUST_SINGLESTEP 51 | 523 | #define KVM_CAP_X86_ROBUST_SINGLESTEP 51 |
| 524 | #define KVM_CAP_PPC_OSI 52 | ||
| 525 | #define KVM_CAP_PPC_UNSET_IRQ 53 | ||
| 526 | #define KVM_CAP_ENABLE_CAP 54 | ||
| 505 | 527 | ||
| 506 | #ifdef KVM_CAP_IRQ_ROUTING | 528 | #ifdef KVM_CAP_IRQ_ROUTING |
| 507 | 529 | ||
| @@ -688,6 +710,10 @@ struct kvm_clock_data { | |||
| 688 | /* Available with KVM_CAP_VCPU_EVENTS */ | 710 | /* Available with KVM_CAP_VCPU_EVENTS */ |
| 689 | #define KVM_GET_VCPU_EVENTS _IOR(KVMIO, 0x9f, struct kvm_vcpu_events) | 711 | #define KVM_GET_VCPU_EVENTS _IOR(KVMIO, 0x9f, struct kvm_vcpu_events) |
| 690 | #define KVM_SET_VCPU_EVENTS _IOW(KVMIO, 0xa0, struct kvm_vcpu_events) | 712 | #define KVM_SET_VCPU_EVENTS _IOW(KVMIO, 0xa0, struct kvm_vcpu_events) |
| 713 | /* Available with KVM_CAP_DEBUGREGS */ | ||
| 714 | #define KVM_GET_DEBUGREGS _IOR(KVMIO, 0xa1, struct kvm_debugregs) | ||
| 715 | #define KVM_SET_DEBUGREGS _IOW(KVMIO, 0xa2, struct kvm_debugregs) | ||
| 716 | #define KVM_ENABLE_CAP _IOW(KVMIO, 0xa3, struct kvm_enable_cap) | ||
| 691 | 717 | ||
| 692 | #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) | 718 | #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) |
| 693 | 719 | ||
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 169d07758ee5..7cb116afa1cd 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
| @@ -105,6 +105,12 @@ struct kvm_vcpu { | |||
| 105 | struct kvm_vcpu_arch arch; | 105 | struct kvm_vcpu_arch arch; |
| 106 | }; | 106 | }; |
| 107 | 107 | ||
| 108 | /* | ||
| 109 | * Some of the bitops functions do not support too long bitmaps. | ||
| 110 | * This number must be determined not to exceed such limits. | ||
| 111 | */ | ||
| 112 | #define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1) | ||
| 113 | |||
| 108 | struct kvm_memory_slot { | 114 | struct kvm_memory_slot { |
| 109 | gfn_t base_gfn; | 115 | gfn_t base_gfn; |
| 110 | unsigned long npages; | 116 | unsigned long npages; |
| @@ -237,17 +243,23 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); | |||
| 237 | void vcpu_load(struct kvm_vcpu *vcpu); | 243 | void vcpu_load(struct kvm_vcpu *vcpu); |
| 238 | void vcpu_put(struct kvm_vcpu *vcpu); | 244 | void vcpu_put(struct kvm_vcpu *vcpu); |
| 239 | 245 | ||
| 240 | int kvm_init(void *opaque, unsigned int vcpu_size, | 246 | int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, |
| 241 | struct module *module); | 247 | struct module *module); |
| 242 | void kvm_exit(void); | 248 | void kvm_exit(void); |
| 243 | 249 | ||
| 244 | void kvm_get_kvm(struct kvm *kvm); | 250 | void kvm_get_kvm(struct kvm *kvm); |
| 245 | void kvm_put_kvm(struct kvm *kvm); | 251 | void kvm_put_kvm(struct kvm *kvm); |
| 246 | 252 | ||
| 253 | static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) | ||
| 254 | { | ||
| 255 | return rcu_dereference_check(kvm->memslots, | ||
| 256 | srcu_read_lock_held(&kvm->srcu) | ||
| 257 | || lockdep_is_held(&kvm->slots_lock)); | ||
| 258 | } | ||
| 259 | |||
| 247 | #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) | 260 | #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) |
| 248 | #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) | 261 | #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) |
| 249 | static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } | 262 | static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } |
| 250 | struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); | ||
| 251 | 263 | ||
| 252 | extern struct page *bad_page; | 264 | extern struct page *bad_page; |
| 253 | extern pfn_t bad_pfn; | 265 | extern pfn_t bad_pfn; |
diff --git a/include/linux/tboot.h b/include/linux/tboot.h index bf2a0c748878..1dba6ee55203 100644 --- a/include/linux/tboot.h +++ b/include/linux/tboot.h | |||
| @@ -150,6 +150,7 @@ extern int tboot_force_iommu(void); | |||
| 150 | 150 | ||
| 151 | #else | 151 | #else |
| 152 | 152 | ||
| 153 | #define tboot_enabled() 0 | ||
| 153 | #define tboot_probe() do { } while (0) | 154 | #define tboot_probe() do { } while (0) |
| 154 | #define tboot_shutdown(shutdown_type) do { } while (0) | 155 | #define tboot_shutdown(shutdown_type) do { } while (0) |
| 155 | #define tboot_sleep(sleep_state, pm1a_control, pm1b_control) \ | 156 | #define tboot_sleep(sleep_state, pm1a_control, pm1b_control) \ |
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index b17d49dfc3ef..6dd3a51ab1cb 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | 5 | ||
| 6 | #undef TRACE_SYSTEM | 6 | #undef TRACE_SYSTEM |
| 7 | #define TRACE_SYSTEM kvm | 7 | #define TRACE_SYSTEM kvm |
| 8 | #define TRACE_INCLUDE_FILE kvm | ||
| 9 | 8 | ||
| 10 | #if defined(__KVM_HAVE_IOAPIC) | 9 | #if defined(__KVM_HAVE_IOAPIC) |
| 11 | TRACE_EVENT(kvm_set_irq, | 10 | TRACE_EVENT(kvm_set_irq, |
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 02ff2b19dbe2..4d10b1e047f4 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c | |||
| @@ -316,12 +316,16 @@ static int assigned_device_enable_host_msix(struct kvm *kvm, | |||
| 316 | kvm_assigned_dev_intr, 0, | 316 | kvm_assigned_dev_intr, 0, |
| 317 | "kvm_assigned_msix_device", | 317 | "kvm_assigned_msix_device", |
| 318 | (void *)dev); | 318 | (void *)dev); |
| 319 | /* FIXME: free requested_irq's on failure */ | ||
| 320 | if (r) | 319 | if (r) |
| 321 | return r; | 320 | goto err; |
| 322 | } | 321 | } |
| 323 | 322 | ||
| 324 | return 0; | 323 | return 0; |
| 324 | err: | ||
| 325 | for (i -= 1; i >= 0; i--) | ||
| 326 | free_irq(dev->host_msix_entries[i].vector, (void *)dev); | ||
| 327 | pci_disable_msix(dev->dev); | ||
| 328 | return r; | ||
| 325 | } | 329 | } |
| 326 | 330 | ||
| 327 | #endif | 331 | #endif |
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 36e258029649..53850177163f 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c | |||
| @@ -120,8 +120,10 @@ int kvm_coalesced_mmio_init(struct kvm *kvm) | |||
| 120 | return ret; | 120 | return ret; |
| 121 | 121 | ||
| 122 | out_free_dev: | 122 | out_free_dev: |
| 123 | kvm->coalesced_mmio_dev = NULL; | ||
| 123 | kfree(dev); | 124 | kfree(dev); |
| 124 | out_free_page: | 125 | out_free_page: |
| 126 | kvm->coalesced_mmio_ring = NULL; | ||
| 125 | __free_page(page); | 127 | __free_page(page); |
| 126 | out_err: | 128 | out_err: |
| 127 | return ret; | 129 | return ret; |
| @@ -139,7 +141,7 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, | |||
| 139 | struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; | 141 | struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; |
| 140 | 142 | ||
| 141 | if (dev == NULL) | 143 | if (dev == NULL) |
| 142 | return -EINVAL; | 144 | return -ENXIO; |
| 143 | 145 | ||
| 144 | mutex_lock(&kvm->slots_lock); | 146 | mutex_lock(&kvm->slots_lock); |
| 145 | if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { | 147 | if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { |
| @@ -162,7 +164,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, | |||
| 162 | struct kvm_coalesced_mmio_zone *z; | 164 | struct kvm_coalesced_mmio_zone *z; |
| 163 | 165 | ||
| 164 | if (dev == NULL) | 166 | if (dev == NULL) |
| 165 | return -EINVAL; | 167 | return -ENXIO; |
| 166 | 168 | ||
| 167 | mutex_lock(&kvm->slots_lock); | 169 | mutex_lock(&kvm->slots_lock); |
| 168 | 170 | ||
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 11692b9e8830..d2f06be63354 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c | |||
| @@ -127,7 +127,7 @@ static int kvm_iommu_map_memslots(struct kvm *kvm) | |||
| 127 | int i, r = 0; | 127 | int i, r = 0; |
| 128 | struct kvm_memslots *slots; | 128 | struct kvm_memslots *slots; |
| 129 | 129 | ||
| 130 | slots = rcu_dereference(kvm->memslots); | 130 | slots = kvm_memslots(kvm); |
| 131 | 131 | ||
| 132 | for (i = 0; i < slots->nmemslots; i++) { | 132 | for (i = 0; i < slots->nmemslots; i++) { |
| 133 | r = kvm_iommu_map_pages(kvm, &slots->memslots[i]); | 133 | r = kvm_iommu_map_pages(kvm, &slots->memslots[i]); |
| @@ -286,7 +286,7 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm) | |||
| 286 | int i; | 286 | int i; |
| 287 | struct kvm_memslots *slots; | 287 | struct kvm_memslots *slots; |
| 288 | 288 | ||
| 289 | slots = rcu_dereference(kvm->memslots); | 289 | slots = kvm_memslots(kvm); |
| 290 | 290 | ||
| 291 | for (i = 0; i < slots->nmemslots; i++) { | 291 | for (i = 0; i < slots->nmemslots; i++) { |
| 292 | kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, | 292 | kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c82ae2492634..f032806a212f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
| @@ -422,9 +422,6 @@ static struct kvm *kvm_create_vm(void) | |||
| 422 | spin_lock(&kvm_lock); | 422 | spin_lock(&kvm_lock); |
| 423 | list_add(&kvm->vm_list, &vm_list); | 423 | list_add(&kvm->vm_list, &vm_list); |
| 424 | spin_unlock(&kvm_lock); | 424 | spin_unlock(&kvm_lock); |
| 425 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
| 426 | kvm_coalesced_mmio_init(kvm); | ||
| 427 | #endif | ||
| 428 | out: | 425 | out: |
| 429 | return kvm; | 426 | return kvm; |
| 430 | 427 | ||
| @@ -560,6 +557,10 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
| 560 | base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; | 557 | base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; |
| 561 | npages = mem->memory_size >> PAGE_SHIFT; | 558 | npages = mem->memory_size >> PAGE_SHIFT; |
| 562 | 559 | ||
| 560 | r = -EINVAL; | ||
| 561 | if (npages > KVM_MEM_MAX_NR_PAGES) | ||
| 562 | goto out; | ||
| 563 | |||
| 563 | if (!npages) | 564 | if (!npages) |
| 564 | mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; | 565 | mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; |
| 565 | 566 | ||
| @@ -833,7 +834,7 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva); | |||
| 833 | struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) | 834 | struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) |
| 834 | { | 835 | { |
| 835 | int i; | 836 | int i; |
| 836 | struct kvm_memslots *slots = rcu_dereference(kvm->memslots); | 837 | struct kvm_memslots *slots = kvm_memslots(kvm); |
| 837 | 838 | ||
| 838 | for (i = 0; i < slots->nmemslots; ++i) { | 839 | for (i = 0; i < slots->nmemslots; ++i) { |
| 839 | struct kvm_memory_slot *memslot = &slots->memslots[i]; | 840 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
| @@ -855,7 +856,7 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | |||
| 855 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) | 856 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) |
| 856 | { | 857 | { |
| 857 | int i; | 858 | int i; |
| 858 | struct kvm_memslots *slots = rcu_dereference(kvm->memslots); | 859 | struct kvm_memslots *slots = kvm_memslots(kvm); |
| 859 | 860 | ||
| 860 | gfn = unalias_gfn_instantiation(kvm, gfn); | 861 | gfn = unalias_gfn_instantiation(kvm, gfn); |
| 861 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 862 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
| @@ -899,7 +900,7 @@ out: | |||
| 899 | int memslot_id(struct kvm *kvm, gfn_t gfn) | 900 | int memslot_id(struct kvm *kvm, gfn_t gfn) |
| 900 | { | 901 | { |
| 901 | int i; | 902 | int i; |
| 902 | struct kvm_memslots *slots = rcu_dereference(kvm->memslots); | 903 | struct kvm_memslots *slots = kvm_memslots(kvm); |
| 903 | struct kvm_memory_slot *memslot = NULL; | 904 | struct kvm_memory_slot *memslot = NULL; |
| 904 | 905 | ||
| 905 | gfn = unalias_gfn(kvm, gfn); | 906 | gfn = unalias_gfn(kvm, gfn); |
| @@ -914,6 +915,11 @@ int memslot_id(struct kvm *kvm, gfn_t gfn) | |||
| 914 | return memslot - slots->memslots; | 915 | return memslot - slots->memslots; |
| 915 | } | 916 | } |
| 916 | 917 | ||
| 918 | static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) | ||
| 919 | { | ||
| 920 | return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE; | ||
| 921 | } | ||
| 922 | |||
| 917 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | 923 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) |
| 918 | { | 924 | { |
| 919 | struct kvm_memory_slot *slot; | 925 | struct kvm_memory_slot *slot; |
| @@ -922,7 +928,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | |||
| 922 | slot = gfn_to_memslot_unaliased(kvm, gfn); | 928 | slot = gfn_to_memslot_unaliased(kvm, gfn); |
| 923 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID) | 929 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID) |
| 924 | return bad_hva(); | 930 | return bad_hva(); |
| 925 | return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); | 931 | return gfn_to_hva_memslot(slot, gfn); |
| 926 | } | 932 | } |
| 927 | EXPORT_SYMBOL_GPL(gfn_to_hva); | 933 | EXPORT_SYMBOL_GPL(gfn_to_hva); |
| 928 | 934 | ||
| @@ -972,11 +978,6 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) | |||
| 972 | } | 978 | } |
| 973 | EXPORT_SYMBOL_GPL(gfn_to_pfn); | 979 | EXPORT_SYMBOL_GPL(gfn_to_pfn); |
| 974 | 980 | ||
| 975 | static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) | ||
| 976 | { | ||
| 977 | return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); | ||
| 978 | } | ||
| 979 | |||
| 980 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, | 981 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, |
| 981 | struct kvm_memory_slot *slot, gfn_t gfn) | 982 | struct kvm_memory_slot *slot, gfn_t gfn) |
| 982 | { | 983 | { |
| @@ -1190,13 +1191,8 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) | |||
| 1190 | memslot = gfn_to_memslot_unaliased(kvm, gfn); | 1191 | memslot = gfn_to_memslot_unaliased(kvm, gfn); |
| 1191 | if (memslot && memslot->dirty_bitmap) { | 1192 | if (memslot && memslot->dirty_bitmap) { |
| 1192 | unsigned long rel_gfn = gfn - memslot->base_gfn; | 1193 | unsigned long rel_gfn = gfn - memslot->base_gfn; |
| 1193 | unsigned long *p = memslot->dirty_bitmap + | ||
| 1194 | rel_gfn / BITS_PER_LONG; | ||
| 1195 | int offset = rel_gfn % BITS_PER_LONG; | ||
| 1196 | 1194 | ||
| 1197 | /* avoid RMW */ | 1195 | generic___set_le_bit(rel_gfn, memslot->dirty_bitmap); |
| 1198 | if (!generic_test_le_bit(offset, p)) | ||
| 1199 | generic___set_le_bit(offset, p); | ||
| 1200 | } | 1196 | } |
| 1201 | } | 1197 | } |
| 1202 | 1198 | ||
| @@ -1609,7 +1605,6 @@ static long kvm_vm_ioctl(struct file *filp, | |||
| 1609 | r = -EFAULT; | 1605 | r = -EFAULT; |
| 1610 | if (copy_from_user(&zone, argp, sizeof zone)) | 1606 | if (copy_from_user(&zone, argp, sizeof zone)) |
| 1611 | goto out; | 1607 | goto out; |
| 1612 | r = -ENXIO; | ||
| 1613 | r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); | 1608 | r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); |
| 1614 | if (r) | 1609 | if (r) |
| 1615 | goto out; | 1610 | goto out; |
| @@ -1621,7 +1616,6 @@ static long kvm_vm_ioctl(struct file *filp, | |||
| 1621 | r = -EFAULT; | 1616 | r = -EFAULT; |
| 1622 | if (copy_from_user(&zone, argp, sizeof zone)) | 1617 | if (copy_from_user(&zone, argp, sizeof zone)) |
| 1623 | goto out; | 1618 | goto out; |
| 1624 | r = -ENXIO; | ||
| 1625 | r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); | 1619 | r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); |
| 1626 | if (r) | 1620 | if (r) |
| 1627 | goto out; | 1621 | goto out; |
| @@ -1755,12 +1749,19 @@ static struct file_operations kvm_vm_fops = { | |||
| 1755 | 1749 | ||
| 1756 | static int kvm_dev_ioctl_create_vm(void) | 1750 | static int kvm_dev_ioctl_create_vm(void) |
| 1757 | { | 1751 | { |
| 1758 | int fd; | 1752 | int fd, r; |
| 1759 | struct kvm *kvm; | 1753 | struct kvm *kvm; |
| 1760 | 1754 | ||
| 1761 | kvm = kvm_create_vm(); | 1755 | kvm = kvm_create_vm(); |
| 1762 | if (IS_ERR(kvm)) | 1756 | if (IS_ERR(kvm)) |
| 1763 | return PTR_ERR(kvm); | 1757 | return PTR_ERR(kvm); |
| 1758 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
| 1759 | r = kvm_coalesced_mmio_init(kvm); | ||
| 1760 | if (r < 0) { | ||
| 1761 | kvm_put_kvm(kvm); | ||
| 1762 | return r; | ||
| 1763 | } | ||
| 1764 | #endif | ||
| 1764 | fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); | 1765 | fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); |
| 1765 | if (fd < 0) | 1766 | if (fd < 0) |
| 1766 | kvm_put_kvm(kvm); | 1767 | kvm_put_kvm(kvm); |
| @@ -1928,11 +1929,6 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, | |||
| 1928 | cpu); | 1929 | cpu); |
| 1929 | hardware_disable(NULL); | 1930 | hardware_disable(NULL); |
| 1930 | break; | 1931 | break; |
| 1931 | case CPU_UP_CANCELED: | ||
| 1932 | printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", | ||
| 1933 | cpu); | ||
| 1934 | smp_call_function_single(cpu, hardware_disable, NULL, 1); | ||
| 1935 | break; | ||
| 1936 | case CPU_ONLINE: | 1932 | case CPU_ONLINE: |
| 1937 | printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", | 1933 | printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", |
| 1938 | cpu); | 1934 | cpu); |
| @@ -1991,7 +1987,9 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |||
| 1991 | int len, const void *val) | 1987 | int len, const void *val) |
| 1992 | { | 1988 | { |
| 1993 | int i; | 1989 | int i; |
| 1994 | struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); | 1990 | struct kvm_io_bus *bus; |
| 1991 | |||
| 1992 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | ||
| 1995 | for (i = 0; i < bus->dev_count; i++) | 1993 | for (i = 0; i < bus->dev_count; i++) |
| 1996 | if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) | 1994 | if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) |
| 1997 | return 0; | 1995 | return 0; |
| @@ -2003,8 +2001,9 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |||
| 2003 | int len, void *val) | 2001 | int len, void *val) |
| 2004 | { | 2002 | { |
| 2005 | int i; | 2003 | int i; |
| 2006 | struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); | 2004 | struct kvm_io_bus *bus; |
| 2007 | 2005 | ||
| 2006 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | ||
| 2008 | for (i = 0; i < bus->dev_count; i++) | 2007 | for (i = 0; i < bus->dev_count; i++) |
| 2009 | if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) | 2008 | if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) |
| 2010 | return 0; | 2009 | return 0; |
| @@ -2179,7 +2178,7 @@ static void kvm_sched_out(struct preempt_notifier *pn, | |||
| 2179 | kvm_arch_vcpu_put(vcpu); | 2178 | kvm_arch_vcpu_put(vcpu); |
| 2180 | } | 2179 | } |
| 2181 | 2180 | ||
| 2182 | int kvm_init(void *opaque, unsigned int vcpu_size, | 2181 | int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, |
| 2183 | struct module *module) | 2182 | struct module *module) |
| 2184 | { | 2183 | { |
| 2185 | int r; | 2184 | int r; |
| @@ -2229,8 +2228,9 @@ int kvm_init(void *opaque, unsigned int vcpu_size, | |||
| 2229 | goto out_free_4; | 2228 | goto out_free_4; |
| 2230 | 2229 | ||
| 2231 | /* A kmem cache lets us meet the alignment requirements of fx_save. */ | 2230 | /* A kmem cache lets us meet the alignment requirements of fx_save. */ |
| 2232 | kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, | 2231 | if (!vcpu_align) |
| 2233 | __alignof__(struct kvm_vcpu), | 2232 | vcpu_align = __alignof__(struct kvm_vcpu); |
| 2233 | kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align, | ||
| 2234 | 0, NULL); | 2234 | 0, NULL); |
| 2235 | if (!kvm_vcpu_cache) { | 2235 | if (!kvm_vcpu_cache) { |
| 2236 | r = -ENOMEM; | 2236 | r = -ENOMEM; |
| @@ -2279,7 +2279,6 @@ EXPORT_SYMBOL_GPL(kvm_init); | |||
| 2279 | 2279 | ||
| 2280 | void kvm_exit(void) | 2280 | void kvm_exit(void) |
| 2281 | { | 2281 | { |
| 2282 | tracepoint_synchronize_unregister(); | ||
| 2283 | kvm_exit_debug(); | 2282 | kvm_exit_debug(); |
| 2284 | misc_deregister(&kvm_dev); | 2283 | misc_deregister(&kvm_dev); |
| 2285 | kmem_cache_destroy(kvm_vcpu_cache); | 2284 | kmem_cache_destroy(kvm_vcpu_cache); |
