diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-04 13:43:01 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-04 13:43:01 -0400 |
| commit | 5e83f6fbdb020b70c0e413312801424d13c58d68 (patch) | |
| tree | ca270178fa891813dbc47751c331fed975d3766c | |
| parent | fe445c6e2cb62a566e1a89f8798de11459975710 (diff) | |
| parent | 3444d7da1839b851eefedd372978d8a982316c36 (diff) | |
Merge branch 'kvm-updates/2.6.36' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.36' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (198 commits)
KVM: VMX: Fix host GDT.LIMIT corruption
KVM: MMU: using __xchg_spte more smarter
KVM: MMU: cleanup spte set and accssed/dirty tracking
KVM: MMU: don't atomicly set spte if it's not present
KVM: MMU: fix page dirty tracking lost while sync page
KVM: MMU: fix broken page accessed tracking with ept enabled
KVM: MMU: add missing reserved bits check in speculative path
KVM: MMU: fix mmu notifier invalidate handler for huge spte
KVM: x86 emulator: fix xchg instruction emulation
KVM: x86: Call mask notifiers from pic
KVM: x86: never re-execute instruction with enabled tdp
KVM: Document KVM_GET_SUPPORTED_CPUID2 ioctl
KVM: x86: emulator: inc/dec can have lock prefix
KVM: MMU: Eliminate redundant temporaries in FNAME(fetch)
KVM: MMU: Validate all gptes during fetch, not just those used for new pages
KVM: MMU: Simplify spte fetch() function
KVM: MMU: Add gpte_valid() helper
KVM: MMU: Add validate_direct_spte() helper
KVM: MMU: Add drop_large_spte() helper
KVM: MMU: Use __set_spte to link shadow pages
...
63 files changed, 3328 insertions, 2103 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 79cb554761af..b273d35039ed 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
| @@ -487,17 +487,6 @@ Who: Jan Kiszka <jan.kiszka@web.de> | |||
| 487 | 487 | ||
| 488 | ---------------------------- | 488 | ---------------------------- |
| 489 | 489 | ||
| 490 | What: KVM memory aliases support | ||
| 491 | When: July 2010 | ||
| 492 | Why: Memory aliasing support is used for speeding up guest vga access | ||
| 493 | through the vga windows. | ||
| 494 | |||
| 495 | Modern userspace no longer uses this feature, so it's just bitrotted | ||
| 496 | code and can be removed with no impact. | ||
| 497 | Who: Avi Kivity <avi@redhat.com> | ||
| 498 | |||
| 499 | ---------------------------- | ||
| 500 | |||
| 501 | What: xtime, wall_to_monotonic | 490 | What: xtime, wall_to_monotonic |
| 502 | When: 2.6.36+ | 491 | When: 2.6.36+ |
| 503 | Files: kernel/time/timekeeping.c include/linux/time.h | 492 | Files: kernel/time/timekeeping.c include/linux/time.h |
| @@ -508,16 +497,6 @@ Who: John Stultz <johnstul@us.ibm.com> | |||
| 508 | 497 | ||
| 509 | ---------------------------- | 498 | ---------------------------- |
| 510 | 499 | ||
| 511 | What: KVM kernel-allocated memory slots | ||
| 512 | When: July 2010 | ||
| 513 | Why: Since 2.6.25, kvm supports user-allocated memory slots, which are | ||
| 514 | much more flexible than kernel-allocated slots. All current userspace | ||
| 515 | supports the newer interface and this code can be removed with no | ||
| 516 | impact. | ||
| 517 | Who: Avi Kivity <avi@redhat.com> | ||
| 518 | |||
| 519 | ---------------------------- | ||
| 520 | |||
| 521 | What: KVM paravirt mmu host support | 500 | What: KVM paravirt mmu host support |
| 522 | When: January 2011 | 501 | When: January 2011 |
| 523 | Why: The paravirt mmu host support is slower than non-paravirt mmu, both | 502 | Why: The paravirt mmu host support is slower than non-paravirt mmu, both |
diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index a237518e51b9..5f5b64982b1a 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt | |||
| @@ -126,6 +126,10 @@ user fills in the size of the indices array in nmsrs, and in return | |||
| 126 | kvm adjusts nmsrs to reflect the actual number of msrs and fills in | 126 | kvm adjusts nmsrs to reflect the actual number of msrs and fills in |
| 127 | the indices array with their numbers. | 127 | the indices array with their numbers. |
| 128 | 128 | ||
| 129 | Note: if kvm indicates supports MCE (KVM_CAP_MCE), then the MCE bank MSRs are | ||
| 130 | not returned in the MSR list, as different vcpus can have a different number | ||
| 131 | of banks, as set via the KVM_X86_SETUP_MCE ioctl. | ||
| 132 | |||
| 129 | 4.4 KVM_CHECK_EXTENSION | 133 | 4.4 KVM_CHECK_EXTENSION |
| 130 | 134 | ||
| 131 | Capability: basic | 135 | Capability: basic |
| @@ -160,29 +164,7 @@ Type: vm ioctl | |||
| 160 | Parameters: struct kvm_memory_region (in) | 164 | Parameters: struct kvm_memory_region (in) |
| 161 | Returns: 0 on success, -1 on error | 165 | Returns: 0 on success, -1 on error |
| 162 | 166 | ||
| 163 | struct kvm_memory_region { | 167 | This ioctl is obsolete and has been removed. |
| 164 | __u32 slot; | ||
| 165 | __u32 flags; | ||
| 166 | __u64 guest_phys_addr; | ||
| 167 | __u64 memory_size; /* bytes */ | ||
| 168 | }; | ||
| 169 | |||
| 170 | /* for kvm_memory_region::flags */ | ||
| 171 | #define KVM_MEM_LOG_DIRTY_PAGES 1UL | ||
| 172 | |||
| 173 | This ioctl allows the user to create or modify a guest physical memory | ||
| 174 | slot. When changing an existing slot, it may be moved in the guest | ||
| 175 | physical memory space, or its flags may be modified. It may not be | ||
| 176 | resized. Slots may not overlap. | ||
| 177 | |||
| 178 | The flags field supports just one flag, KVM_MEM_LOG_DIRTY_PAGES, which | ||
| 179 | instructs kvm to keep track of writes to memory within the slot. See | ||
| 180 | the KVM_GET_DIRTY_LOG ioctl. | ||
| 181 | |||
| 182 | It is recommended to use the KVM_SET_USER_MEMORY_REGION ioctl instead | ||
| 183 | of this API, if available. This newer API allows placing guest memory | ||
| 184 | at specified locations in the host address space, yielding better | ||
| 185 | control and easy access. | ||
| 186 | 168 | ||
| 187 | 4.6 KVM_CREATE_VCPU | 169 | 4.6 KVM_CREATE_VCPU |
| 188 | 170 | ||
| @@ -226,17 +208,7 @@ Type: vm ioctl | |||
| 226 | Parameters: struct kvm_memory_alias (in) | 208 | Parameters: struct kvm_memory_alias (in) |
| 227 | Returns: 0 (success), -1 (error) | 209 | Returns: 0 (success), -1 (error) |
| 228 | 210 | ||
| 229 | struct kvm_memory_alias { | 211 | This ioctl is obsolete and has been removed. |
| 230 | __u32 slot; /* this has a different namespace than memory slots */ | ||
| 231 | __u32 flags; | ||
| 232 | __u64 guest_phys_addr; | ||
| 233 | __u64 memory_size; | ||
| 234 | __u64 target_phys_addr; | ||
| 235 | }; | ||
| 236 | |||
| 237 | Defines a guest physical address space region as an alias to another | ||
| 238 | region. Useful for aliased address, for example the VGA low memory | ||
| 239 | window. Should not be used with userspace memory. | ||
| 240 | 212 | ||
| 241 | 4.9 KVM_RUN | 213 | 4.9 KVM_RUN |
| 242 | 214 | ||
| @@ -892,6 +864,174 @@ arguments. | |||
| 892 | This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel | 864 | This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel |
| 893 | irqchip, the multiprocessing state must be maintained by userspace. | 865 | irqchip, the multiprocessing state must be maintained by userspace. |
| 894 | 866 | ||
| 867 | 4.39 KVM_SET_IDENTITY_MAP_ADDR | ||
| 868 | |||
| 869 | Capability: KVM_CAP_SET_IDENTITY_MAP_ADDR | ||
| 870 | Architectures: x86 | ||
| 871 | Type: vm ioctl | ||
| 872 | Parameters: unsigned long identity (in) | ||
| 873 | Returns: 0 on success, -1 on error | ||
| 874 | |||
| 875 | This ioctl defines the physical address of a one-page region in the guest | ||
| 876 | physical address space. The region must be within the first 4GB of the | ||
| 877 | guest physical address space and must not conflict with any memory slot | ||
| 878 | or any mmio address. The guest may malfunction if it accesses this memory | ||
| 879 | region. | ||
| 880 | |||
| 881 | This ioctl is required on Intel-based hosts. This is needed on Intel hardware | ||
| 882 | because of a quirk in the virtualization implementation (see the internals | ||
| 883 | documentation when it pops into existence). | ||
| 884 | |||
| 885 | 4.40 KVM_SET_BOOT_CPU_ID | ||
| 886 | |||
| 887 | Capability: KVM_CAP_SET_BOOT_CPU_ID | ||
| 888 | Architectures: x86, ia64 | ||
| 889 | Type: vm ioctl | ||
| 890 | Parameters: unsigned long vcpu_id | ||
| 891 | Returns: 0 on success, -1 on error | ||
| 892 | |||
| 893 | Define which vcpu is the Bootstrap Processor (BSP). Values are the same | ||
| 894 | as the vcpu id in KVM_CREATE_VCPU. If this ioctl is not called, the default | ||
| 895 | is vcpu 0. | ||
| 896 | |||
| 897 | 4.41 KVM_GET_XSAVE | ||
| 898 | |||
| 899 | Capability: KVM_CAP_XSAVE | ||
| 900 | Architectures: x86 | ||
| 901 | Type: vcpu ioctl | ||
| 902 | Parameters: struct kvm_xsave (out) | ||
| 903 | Returns: 0 on success, -1 on error | ||
| 904 | |||
| 905 | struct kvm_xsave { | ||
| 906 | __u32 region[1024]; | ||
| 907 | }; | ||
| 908 | |||
| 909 | This ioctl would copy current vcpu's xsave struct to the userspace. | ||
| 910 | |||
| 911 | 4.42 KVM_SET_XSAVE | ||
| 912 | |||
| 913 | Capability: KVM_CAP_XSAVE | ||
| 914 | Architectures: x86 | ||
| 915 | Type: vcpu ioctl | ||
| 916 | Parameters: struct kvm_xsave (in) | ||
| 917 | Returns: 0 on success, -1 on error | ||
| 918 | |||
| 919 | struct kvm_xsave { | ||
| 920 | __u32 region[1024]; | ||
| 921 | }; | ||
| 922 | |||
| 923 | This ioctl would copy userspace's xsave struct to the kernel. | ||
| 924 | |||
| 925 | 4.43 KVM_GET_XCRS | ||
| 926 | |||
| 927 | Capability: KVM_CAP_XCRS | ||
| 928 | Architectures: x86 | ||
| 929 | Type: vcpu ioctl | ||
| 930 | Parameters: struct kvm_xcrs (out) | ||
| 931 | Returns: 0 on success, -1 on error | ||
| 932 | |||
| 933 | struct kvm_xcr { | ||
| 934 | __u32 xcr; | ||
| 935 | __u32 reserved; | ||
| 936 | __u64 value; | ||
| 937 | }; | ||
| 938 | |||
| 939 | struct kvm_xcrs { | ||
| 940 | __u32 nr_xcrs; | ||
| 941 | __u32 flags; | ||
| 942 | struct kvm_xcr xcrs[KVM_MAX_XCRS]; | ||
| 943 | __u64 padding[16]; | ||
| 944 | }; | ||
| 945 | |||
| 946 | This ioctl would copy current vcpu's xcrs to the userspace. | ||
| 947 | |||
| 948 | 4.44 KVM_SET_XCRS | ||
| 949 | |||
| 950 | Capability: KVM_CAP_XCRS | ||
| 951 | Architectures: x86 | ||
| 952 | Type: vcpu ioctl | ||
| 953 | Parameters: struct kvm_xcrs (in) | ||
| 954 | Returns: 0 on success, -1 on error | ||
| 955 | |||
| 956 | struct kvm_xcr { | ||
| 957 | __u32 xcr; | ||
| 958 | __u32 reserved; | ||
| 959 | __u64 value; | ||
| 960 | }; | ||
| 961 | |||
| 962 | struct kvm_xcrs { | ||
| 963 | __u32 nr_xcrs; | ||
| 964 | __u32 flags; | ||
| 965 | struct kvm_xcr xcrs[KVM_MAX_XCRS]; | ||
| 966 | __u64 padding[16]; | ||
| 967 | }; | ||
| 968 | |||
| 969 | This ioctl would set vcpu's xcr to the value userspace specified. | ||
| 970 | |||
| 971 | 4.45 KVM_GET_SUPPORTED_CPUID | ||
| 972 | |||
| 973 | Capability: KVM_CAP_EXT_CPUID | ||
| 974 | Architectures: x86 | ||
| 975 | Type: system ioctl | ||
| 976 | Parameters: struct kvm_cpuid2 (in/out) | ||
| 977 | Returns: 0 on success, -1 on error | ||
| 978 | |||
| 979 | struct kvm_cpuid2 { | ||
| 980 | __u32 nent; | ||
| 981 | __u32 padding; | ||
| 982 | struct kvm_cpuid_entry2 entries[0]; | ||
| 983 | }; | ||
| 984 | |||
| 985 | #define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 | ||
| 986 | #define KVM_CPUID_FLAG_STATEFUL_FUNC 2 | ||
| 987 | #define KVM_CPUID_FLAG_STATE_READ_NEXT 4 | ||
| 988 | |||
| 989 | struct kvm_cpuid_entry2 { | ||
| 990 | __u32 function; | ||
| 991 | __u32 index; | ||
| 992 | __u32 flags; | ||
| 993 | __u32 eax; | ||
| 994 | __u32 ebx; | ||
| 995 | __u32 ecx; | ||
| 996 | __u32 edx; | ||
| 997 | __u32 padding[3]; | ||
| 998 | }; | ||
| 999 | |||
| 1000 | This ioctl returns x86 cpuid features which are supported by both the hardware | ||
| 1001 | and kvm. Userspace can use the information returned by this ioctl to | ||
| 1002 | construct cpuid information (for KVM_SET_CPUID2) that is consistent with | ||
| 1003 | hardware, kernel, and userspace capabilities, and with user requirements (for | ||
| 1004 | example, the user may wish to constrain cpuid to emulate older hardware, | ||
| 1005 | or for feature consistency across a cluster). | ||
| 1006 | |||
| 1007 | Userspace invokes KVM_GET_SUPPORTED_CPUID by passing a kvm_cpuid2 structure | ||
| 1008 | with the 'nent' field indicating the number of entries in the variable-size | ||
| 1009 | array 'entries'. If the number of entries is too low to describe the cpu | ||
| 1010 | capabilities, an error (E2BIG) is returned. If the number is too high, | ||
| 1011 | the 'nent' field is adjusted and an error (ENOMEM) is returned. If the | ||
| 1012 | number is just right, the 'nent' field is adjusted to the number of valid | ||
| 1013 | entries in the 'entries' array, which is then filled. | ||
| 1014 | |||
| 1015 | The entries returned are the host cpuid as returned by the cpuid instruction, | ||
| 1016 | with unknown or unsupported features masked out. The fields in each entry | ||
| 1017 | are defined as follows: | ||
| 1018 | |||
| 1019 | function: the eax value used to obtain the entry | ||
| 1020 | index: the ecx value used to obtain the entry (for entries that are | ||
| 1021 | affected by ecx) | ||
| 1022 | flags: an OR of zero or more of the following: | ||
| 1023 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX: | ||
| 1024 | if the index field is valid | ||
| 1025 | KVM_CPUID_FLAG_STATEFUL_FUNC: | ||
| 1026 | if cpuid for this function returns different values for successive | ||
| 1027 | invocations; there will be several entries with the same function, | ||
| 1028 | all with this flag set | ||
| 1029 | KVM_CPUID_FLAG_STATE_READ_NEXT: | ||
| 1030 | for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is | ||
| 1031 | the first entry to be read by a cpu | ||
| 1032 | eax, ebx, ecx, edx: the values returned by the cpuid instruction for | ||
| 1033 | this function/index combination | ||
| 1034 | |||
| 895 | 5. The kvm_run structure | 1035 | 5. The kvm_run structure |
| 896 | 1036 | ||
| 897 | Application code obtains a pointer to the kvm_run structure by | 1037 | Application code obtains a pointer to the kvm_run structure by |
diff --git a/Documentation/kvm/mmu.txt b/Documentation/kvm/mmu.txt index aaed6ab9d7ab..142cc5136650 100644 --- a/Documentation/kvm/mmu.txt +++ b/Documentation/kvm/mmu.txt | |||
| @@ -77,10 +77,10 @@ Memory | |||
| 77 | 77 | ||
| 78 | Guest memory (gpa) is part of the user address space of the process that is | 78 | Guest memory (gpa) is part of the user address space of the process that is |
| 79 | using kvm. Userspace defines the translation between guest addresses and user | 79 | using kvm. Userspace defines the translation between guest addresses and user |
| 80 | addresses (gpa->hva); note that two gpas may alias to the same gva, but not | 80 | addresses (gpa->hva); note that two gpas may alias to the same hva, but not |
| 81 | vice versa. | 81 | vice versa. |
| 82 | 82 | ||
| 83 | These gvas may be backed using any method available to the host: anonymous | 83 | These hvas may be backed using any method available to the host: anonymous |
| 84 | memory, file backed memory, and device memory. Memory might be paged by the | 84 | memory, file backed memory, and device memory. Memory might be paged by the |
| 85 | host at any time. | 85 | host at any time. |
| 86 | 86 | ||
| @@ -161,7 +161,7 @@ Shadow pages contain the following information: | |||
| 161 | role.cr4_pae: | 161 | role.cr4_pae: |
| 162 | Contains the value of cr4.pae for which the page is valid (e.g. whether | 162 | Contains the value of cr4.pae for which the page is valid (e.g. whether |
| 163 | 32-bit or 64-bit gptes are in use). | 163 | 32-bit or 64-bit gptes are in use). |
| 164 | role.cr4_nxe: | 164 | role.nxe: |
| 165 | Contains the value of efer.nxe for which the page is valid. | 165 | Contains the value of efer.nxe for which the page is valid. |
| 166 | role.cr0_wp: | 166 | role.cr0_wp: |
| 167 | Contains the value of cr0.wp for which the page is valid. | 167 | Contains the value of cr0.wp for which the page is valid. |
| @@ -180,7 +180,9 @@ Shadow pages contain the following information: | |||
| 180 | guest pages as leaves. | 180 | guest pages as leaves. |
| 181 | gfns: | 181 | gfns: |
| 182 | An array of 512 guest frame numbers, one for each present pte. Used to | 182 | An array of 512 guest frame numbers, one for each present pte. Used to |
| 183 | perform a reverse map from a pte to a gfn. | 183 | perform a reverse map from a pte to a gfn. When role.direct is set, any |
| 184 | element of this array can be calculated from the gfn field when used, in | ||
| 185 | this case, the array of gfns is not allocated. See role.direct and gfn. | ||
| 184 | slot_bitmap: | 186 | slot_bitmap: |
| 185 | A bitmap containing one bit per memory slot. If the page contains a pte | 187 | A bitmap containing one bit per memory slot. If the page contains a pte |
| 186 | mapping a page from memory slot n, then bit n of slot_bitmap will be set | 188 | mapping a page from memory slot n, then bit n of slot_bitmap will be set |
| @@ -296,6 +298,48 @@ Host translation updates: | |||
| 296 | - look up affected sptes through reverse map | 298 | - look up affected sptes through reverse map |
| 297 | - drop (or update) translations | 299 | - drop (or update) translations |
| 298 | 300 | ||
| 301 | Emulating cr0.wp | ||
| 302 | ================ | ||
| 303 | |||
| 304 | If tdp is not enabled, the host must keep cr0.wp=1 so page write protection | ||
| 305 | works for the guest kernel, not guest guest userspace. When the guest | ||
| 306 | cr0.wp=1, this does not present a problem. However when the guest cr0.wp=0, | ||
| 307 | we cannot map the permissions for gpte.u=1, gpte.w=0 to any spte (the | ||
| 308 | semantics require allowing any guest kernel access plus user read access). | ||
| 309 | |||
| 310 | We handle this by mapping the permissions to two possible sptes, depending | ||
| 311 | on fault type: | ||
| 312 | |||
| 313 | - kernel write fault: spte.u=0, spte.w=1 (allows full kernel access, | ||
| 314 | disallows user access) | ||
| 315 | - read fault: spte.u=1, spte.w=0 (allows full read access, disallows kernel | ||
| 316 | write access) | ||
| 317 | |||
| 318 | (user write faults generate a #PF) | ||
| 319 | |||
| 320 | Large pages | ||
| 321 | =========== | ||
| 322 | |||
| 323 | The mmu supports all combinations of large and small guest and host pages. | ||
| 324 | Supported page sizes include 4k, 2M, 4M, and 1G. 4M pages are treated as | ||
| 325 | two separate 2M pages, on both guest and host, since the mmu always uses PAE | ||
| 326 | paging. | ||
| 327 | |||
| 328 | To instantiate a large spte, four constraints must be satisfied: | ||
| 329 | |||
| 330 | - the spte must point to a large host page | ||
| 331 | - the guest pte must be a large pte of at least equivalent size (if tdp is | ||
| 332 | enabled, there is no guest pte and this condition is satisified) | ||
| 333 | - if the spte will be writeable, the large page frame may not overlap any | ||
| 334 | write-protected pages | ||
| 335 | - the guest page must be wholly contained by a single memory slot | ||
| 336 | |||
| 337 | To check the last two conditions, the mmu maintains a ->write_count set of | ||
| 338 | arrays for each memory slot and large page size. Every write protected page | ||
| 339 | causes its write_count to be incremented, thus preventing instantiation of | ||
| 340 | a large spte. The frames at the end of an unaligned memory slot have | ||
| 341 | artificically inflated ->write_counts so they can never be instantiated. | ||
| 342 | |||
| 299 | Further reading | 343 | Further reading |
| 300 | =============== | 344 | =============== |
| 301 | 345 | ||
diff --git a/Documentation/kvm/msr.txt b/Documentation/kvm/msr.txt new file mode 100644 index 000000000000..8ddcfe84c09a --- /dev/null +++ b/Documentation/kvm/msr.txt | |||
| @@ -0,0 +1,153 @@ | |||
| 1 | KVM-specific MSRs. | ||
| 2 | Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010 | ||
| 3 | ===================================================== | ||
| 4 | |||
| 5 | KVM makes use of some custom MSRs to service some requests. | ||
| 6 | At present, this facility is only used by kvmclock. | ||
| 7 | |||
| 8 | Custom MSRs have a range reserved for them, that goes from | ||
| 9 | 0x4b564d00 to 0x4b564dff. There are MSRs outside this area, | ||
| 10 | but they are deprecated and their use is discouraged. | ||
| 11 | |||
| 12 | Custom MSR list | ||
| 13 | -------- | ||
| 14 | |||
| 15 | The current supported Custom MSR list is: | ||
| 16 | |||
| 17 | MSR_KVM_WALL_CLOCK_NEW: 0x4b564d00 | ||
| 18 | |||
| 19 | data: 4-byte alignment physical address of a memory area which must be | ||
| 20 | in guest RAM. This memory is expected to hold a copy of the following | ||
| 21 | structure: | ||
| 22 | |||
| 23 | struct pvclock_wall_clock { | ||
| 24 | u32 version; | ||
| 25 | u32 sec; | ||
| 26 | u32 nsec; | ||
| 27 | } __attribute__((__packed__)); | ||
| 28 | |||
| 29 | whose data will be filled in by the hypervisor. The hypervisor is only | ||
| 30 | guaranteed to update this data at the moment of MSR write. | ||
| 31 | Users that want to reliably query this information more than once have | ||
| 32 | to write more than once to this MSR. Fields have the following meanings: | ||
| 33 | |||
| 34 | version: guest has to check version before and after grabbing | ||
| 35 | time information and check that they are both equal and even. | ||
| 36 | An odd version indicates an in-progress update. | ||
| 37 | |||
| 38 | sec: number of seconds for wallclock. | ||
| 39 | |||
| 40 | nsec: number of nanoseconds for wallclock. | ||
| 41 | |||
| 42 | Note that although MSRs are per-CPU entities, the effect of this | ||
| 43 | particular MSR is global. | ||
| 44 | |||
| 45 | Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid | ||
| 46 | leaf prior to usage. | ||
| 47 | |||
| 48 | MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01 | ||
| 49 | |||
| 50 | data: 4-byte aligned physical address of a memory area which must be in | ||
| 51 | guest RAM, plus an enable bit in bit 0. This memory is expected to hold | ||
| 52 | a copy of the following structure: | ||
| 53 | |||
| 54 | struct pvclock_vcpu_time_info { | ||
| 55 | u32 version; | ||
| 56 | u32 pad0; | ||
| 57 | u64 tsc_timestamp; | ||
| 58 | u64 system_time; | ||
| 59 | u32 tsc_to_system_mul; | ||
| 60 | s8 tsc_shift; | ||
| 61 | u8 flags; | ||
| 62 | u8 pad[2]; | ||
| 63 | } __attribute__((__packed__)); /* 32 bytes */ | ||
| 64 | |||
| 65 | whose data will be filled in by the hypervisor periodically. Only one | ||
| 66 | write, or registration, is needed for each VCPU. The interval between | ||
| 67 | updates of this structure is arbitrary and implementation-dependent. | ||
| 68 | The hypervisor may update this structure at any time it sees fit until | ||
| 69 | anything with bit0 == 0 is written to it. | ||
| 70 | |||
| 71 | Fields have the following meanings: | ||
| 72 | |||
| 73 | version: guest has to check version before and after grabbing | ||
| 74 | time information and check that they are both equal and even. | ||
| 75 | An odd version indicates an in-progress update. | ||
| 76 | |||
| 77 | tsc_timestamp: the tsc value at the current VCPU at the time | ||
| 78 | of the update of this structure. Guests can subtract this value | ||
| 79 | from current tsc to derive a notion of elapsed time since the | ||
| 80 | structure update. | ||
| 81 | |||
| 82 | system_time: a host notion of monotonic time, including sleep | ||
| 83 | time at the time this structure was last updated. Unit is | ||
| 84 | nanoseconds. | ||
| 85 | |||
| 86 | tsc_to_system_mul: a function of the tsc frequency. One has | ||
| 87 | to multiply any tsc-related quantity by this value to get | ||
| 88 | a value in nanoseconds, besides dividing by 2^tsc_shift | ||
| 89 | |||
| 90 | tsc_shift: cycle to nanosecond divider, as a power of two, to | ||
| 91 | allow for shift rights. One has to shift right any tsc-related | ||
| 92 | quantity by this value to get a value in nanoseconds, besides | ||
| 93 | multiplying by tsc_to_system_mul. | ||
| 94 | |||
| 95 | With this information, guests can derive per-CPU time by | ||
| 96 | doing: | ||
| 97 | |||
| 98 | time = (current_tsc - tsc_timestamp) | ||
| 99 | time = (time * tsc_to_system_mul) >> tsc_shift | ||
| 100 | time = time + system_time | ||
| 101 | |||
| 102 | flags: bits in this field indicate extended capabilities | ||
| 103 | coordinated between the guest and the hypervisor. Availability | ||
| 104 | of specific flags has to be checked in 0x40000001 cpuid leaf. | ||
| 105 | Current flags are: | ||
| 106 | |||
| 107 | flag bit | cpuid bit | meaning | ||
| 108 | ------------------------------------------------------------- | ||
| 109 | | | time measures taken across | ||
| 110 | 0 | 24 | multiple cpus are guaranteed to | ||
| 111 | | | be monotonic | ||
| 112 | ------------------------------------------------------------- | ||
| 113 | |||
| 114 | Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid | ||
| 115 | leaf prior to usage. | ||
| 116 | |||
| 117 | |||
| 118 | MSR_KVM_WALL_CLOCK: 0x11 | ||
| 119 | |||
| 120 | data and functioning: same as MSR_KVM_WALL_CLOCK_NEW. Use that instead. | ||
| 121 | |||
| 122 | This MSR falls outside the reserved KVM range and may be removed in the | ||
| 123 | future. Its usage is deprecated. | ||
| 124 | |||
| 125 | Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid | ||
| 126 | leaf prior to usage. | ||
| 127 | |||
| 128 | MSR_KVM_SYSTEM_TIME: 0x12 | ||
| 129 | |||
| 130 | data and functioning: same as MSR_KVM_SYSTEM_TIME_NEW. Use that instead. | ||
| 131 | |||
| 132 | This MSR falls outside the reserved KVM range and may be removed in the | ||
| 133 | future. Its usage is deprecated. | ||
| 134 | |||
| 135 | Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid | ||
| 136 | leaf prior to usage. | ||
| 137 | |||
| 138 | The suggested algorithm for detecting kvmclock presence is then: | ||
| 139 | |||
| 140 | if (!kvm_para_available()) /* refer to cpuid.txt */ | ||
| 141 | return NON_PRESENT; | ||
| 142 | |||
| 143 | flags = cpuid_eax(0x40000001); | ||
| 144 | if (flags & 3) { | ||
| 145 | msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW; | ||
| 146 | msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW; | ||
| 147 | return PRESENT; | ||
| 148 | } else if (flags & 0) { | ||
| 149 | msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; | ||
| 150 | msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; | ||
| 151 | return PRESENT; | ||
| 152 | } else | ||
| 153 | return NON_PRESENT; | ||
diff --git a/Documentation/kvm/review-checklist.txt b/Documentation/kvm/review-checklist.txt new file mode 100644 index 000000000000..730475ae1b8d --- /dev/null +++ b/Documentation/kvm/review-checklist.txt | |||
| @@ -0,0 +1,38 @@ | |||
| 1 | Review checklist for kvm patches | ||
| 2 | ================================ | ||
| 3 | |||
| 4 | 1. The patch must follow Documentation/CodingStyle and | ||
| 5 | Documentation/SubmittingPatches. | ||
| 6 | |||
| 7 | 2. Patches should be against kvm.git master branch. | ||
| 8 | |||
| 9 | 3. If the patch introduces or modifies a new userspace API: | ||
| 10 | - the API must be documented in Documentation/kvm/api.txt | ||
| 11 | - the API must be discoverable using KVM_CHECK_EXTENSION | ||
| 12 | |||
| 13 | 4. New state must include support for save/restore. | ||
| 14 | |||
| 15 | 5. New features must default to off (userspace should explicitly request them). | ||
| 16 | Performance improvements can and should default to on. | ||
| 17 | |||
| 18 | 6. New cpu features should be exposed via KVM_GET_SUPPORTED_CPUID2 | ||
| 19 | |||
| 20 | 7. Emulator changes should be accompanied by unit tests for qemu-kvm.git | ||
| 21 | kvm/test directory. | ||
| 22 | |||
| 23 | 8. Changes should be vendor neutral when possible. Changes to common code | ||
| 24 | are better than duplicating changes to vendor code. | ||
| 25 | |||
| 26 | 9. Similarly, prefer changes to arch independent code than to arch dependent | ||
| 27 | code. | ||
| 28 | |||
| 29 | 10. User/kernel interfaces and guest/host interfaces must be 64-bit clean | ||
| 30 | (all variables and sizes naturally aligned on 64-bit; use specific types | ||
| 31 | only - u64 rather than ulong). | ||
| 32 | |||
| 33 | 11. New guest visible features must either be documented in a hardware manual | ||
| 34 | or be accompanied by documentation. | ||
| 35 | |||
| 36 | 12. Features must be robust against reset and kexec - for example, shared | ||
| 37 | host/guest memory must be unshared to prevent the host from writing to | ||
| 38 | guest memory that the guest has not reserved for this purpose. | ||
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index a362e67e0ca6..2f229e5de498 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h | |||
| @@ -235,6 +235,7 @@ struct kvm_vm_data { | |||
| 235 | #define KVM_REQ_PTC_G 32 | 235 | #define KVM_REQ_PTC_G 32 |
| 236 | #define KVM_REQ_RESUME 33 | 236 | #define KVM_REQ_RESUME 33 |
| 237 | 237 | ||
| 238 | #define KVM_HPAGE_GFN_SHIFT(x) 0 | ||
| 238 | #define KVM_NR_PAGE_SIZES 1 | 239 | #define KVM_NR_PAGE_SIZES 1 |
| 239 | #define KVM_PAGES_PER_HPAGE(x) 1 | 240 | #define KVM_PAGES_PER_HPAGE(x) 1 |
| 240 | 241 | ||
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 21b701374f72..5cb58655cd5f 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c | |||
| @@ -725,8 +725,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 725 | int r; | 725 | int r; |
| 726 | sigset_t sigsaved; | 726 | sigset_t sigsaved; |
| 727 | 727 | ||
| 728 | vcpu_load(vcpu); | ||
| 729 | |||
| 730 | if (vcpu->sigset_active) | 728 | if (vcpu->sigset_active) |
| 731 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | 729 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); |
| 732 | 730 | ||
| @@ -748,7 +746,6 @@ out: | |||
| 748 | if (vcpu->sigset_active) | 746 | if (vcpu->sigset_active) |
| 749 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 747 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
| 750 | 748 | ||
| 751 | vcpu_put(vcpu); | ||
| 752 | return r; | 749 | return r; |
| 753 | } | 750 | } |
| 754 | 751 | ||
| @@ -883,8 +880,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 883 | struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); | 880 | struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); |
| 884 | int i; | 881 | int i; |
| 885 | 882 | ||
| 886 | vcpu_load(vcpu); | ||
| 887 | |||
| 888 | for (i = 0; i < 16; i++) { | 883 | for (i = 0; i < 16; i++) { |
| 889 | vpd->vgr[i] = regs->vpd.vgr[i]; | 884 | vpd->vgr[i] = regs->vpd.vgr[i]; |
| 890 | vpd->vbgr[i] = regs->vpd.vbgr[i]; | 885 | vpd->vbgr[i] = regs->vpd.vbgr[i]; |
| @@ -931,8 +926,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 931 | vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu); | 926 | vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu); |
| 932 | set_bit(KVM_REQ_RESUME, &vcpu->requests); | 927 | set_bit(KVM_REQ_RESUME, &vcpu->requests); |
| 933 | 928 | ||
| 934 | vcpu_put(vcpu); | ||
| 935 | |||
| 936 | return 0; | 929 | return 0; |
| 937 | } | 930 | } |
| 938 | 931 | ||
| @@ -1802,35 +1795,24 @@ void kvm_arch_exit(void) | |||
| 1802 | kvm_vmm_info = NULL; | 1795 | kvm_vmm_info = NULL; |
| 1803 | } | 1796 | } |
| 1804 | 1797 | ||
| 1805 | static int kvm_ia64_sync_dirty_log(struct kvm *kvm, | 1798 | static void kvm_ia64_sync_dirty_log(struct kvm *kvm, |
| 1806 | struct kvm_dirty_log *log) | 1799 | struct kvm_memory_slot *memslot) |
| 1807 | { | 1800 | { |
| 1808 | struct kvm_memory_slot *memslot; | 1801 | int i; |
| 1809 | int r, i; | ||
| 1810 | long base; | 1802 | long base; |
| 1811 | unsigned long n; | 1803 | unsigned long n; |
| 1812 | unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base + | 1804 | unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base + |
| 1813 | offsetof(struct kvm_vm_data, kvm_mem_dirty_log)); | 1805 | offsetof(struct kvm_vm_data, kvm_mem_dirty_log)); |
| 1814 | 1806 | ||
| 1815 | r = -EINVAL; | ||
| 1816 | if (log->slot >= KVM_MEMORY_SLOTS) | ||
| 1817 | goto out; | ||
| 1818 | |||
| 1819 | memslot = &kvm->memslots->memslots[log->slot]; | ||
| 1820 | r = -ENOENT; | ||
| 1821 | if (!memslot->dirty_bitmap) | ||
| 1822 | goto out; | ||
| 1823 | |||
| 1824 | n = kvm_dirty_bitmap_bytes(memslot); | 1807 | n = kvm_dirty_bitmap_bytes(memslot); |
| 1825 | base = memslot->base_gfn / BITS_PER_LONG; | 1808 | base = memslot->base_gfn / BITS_PER_LONG; |
| 1826 | 1809 | ||
| 1810 | spin_lock(&kvm->arch.dirty_log_lock); | ||
| 1827 | for (i = 0; i < n/sizeof(long); ++i) { | 1811 | for (i = 0; i < n/sizeof(long); ++i) { |
| 1828 | memslot->dirty_bitmap[i] = dirty_bitmap[base + i]; | 1812 | memslot->dirty_bitmap[i] = dirty_bitmap[base + i]; |
| 1829 | dirty_bitmap[base + i] = 0; | 1813 | dirty_bitmap[base + i] = 0; |
| 1830 | } | 1814 | } |
| 1831 | r = 0; | 1815 | spin_unlock(&kvm->arch.dirty_log_lock); |
| 1832 | out: | ||
| 1833 | return r; | ||
| 1834 | } | 1816 | } |
| 1835 | 1817 | ||
| 1836 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | 1818 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, |
| @@ -1842,12 +1824,17 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
| 1842 | int is_dirty = 0; | 1824 | int is_dirty = 0; |
| 1843 | 1825 | ||
| 1844 | mutex_lock(&kvm->slots_lock); | 1826 | mutex_lock(&kvm->slots_lock); |
| 1845 | spin_lock(&kvm->arch.dirty_log_lock); | ||
| 1846 | 1827 | ||
| 1847 | r = kvm_ia64_sync_dirty_log(kvm, log); | 1828 | r = -EINVAL; |
| 1848 | if (r) | 1829 | if (log->slot >= KVM_MEMORY_SLOTS) |
| 1830 | goto out; | ||
| 1831 | |||
| 1832 | memslot = &kvm->memslots->memslots[log->slot]; | ||
| 1833 | r = -ENOENT; | ||
| 1834 | if (!memslot->dirty_bitmap) | ||
| 1849 | goto out; | 1835 | goto out; |
| 1850 | 1836 | ||
| 1837 | kvm_ia64_sync_dirty_log(kvm, memslot); | ||
| 1851 | r = kvm_get_dirty_log(kvm, log, &is_dirty); | 1838 | r = kvm_get_dirty_log(kvm, log, &is_dirty); |
| 1852 | if (r) | 1839 | if (r) |
| 1853 | goto out; | 1840 | goto out; |
| @@ -1855,14 +1842,12 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
| 1855 | /* If nothing is dirty, don't bother messing with page tables. */ | 1842 | /* If nothing is dirty, don't bother messing with page tables. */ |
| 1856 | if (is_dirty) { | 1843 | if (is_dirty) { |
| 1857 | kvm_flush_remote_tlbs(kvm); | 1844 | kvm_flush_remote_tlbs(kvm); |
| 1858 | memslot = &kvm->memslots->memslots[log->slot]; | ||
| 1859 | n = kvm_dirty_bitmap_bytes(memslot); | 1845 | n = kvm_dirty_bitmap_bytes(memslot); |
| 1860 | memset(memslot->dirty_bitmap, 0, n); | 1846 | memset(memslot->dirty_bitmap, 0, n); |
| 1861 | } | 1847 | } |
| 1862 | r = 0; | 1848 | r = 0; |
| 1863 | out: | 1849 | out: |
| 1864 | mutex_unlock(&kvm->slots_lock); | 1850 | mutex_unlock(&kvm->slots_lock); |
| 1865 | spin_unlock(&kvm->arch.dirty_log_lock); | ||
| 1866 | return r; | 1851 | return r; |
| 1867 | } | 1852 | } |
| 1868 | 1853 | ||
| @@ -1953,11 +1938,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | |||
| 1953 | return vcpu->arch.timer_fired; | 1938 | return vcpu->arch.timer_fired; |
| 1954 | } | 1939 | } |
| 1955 | 1940 | ||
| 1956 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | ||
| 1957 | { | ||
| 1958 | return gfn; | ||
| 1959 | } | ||
| 1960 | |||
| 1961 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | 1941 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) |
| 1962 | { | 1942 | { |
| 1963 | return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) || | 1943 | return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) || |
| @@ -1967,9 +1947,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | |||
| 1967 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | 1947 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, |
| 1968 | struct kvm_mp_state *mp_state) | 1948 | struct kvm_mp_state *mp_state) |
| 1969 | { | 1949 | { |
| 1970 | vcpu_load(vcpu); | ||
| 1971 | mp_state->mp_state = vcpu->arch.mp_state; | 1950 | mp_state->mp_state = vcpu->arch.mp_state; |
| 1972 | vcpu_put(vcpu); | ||
| 1973 | return 0; | 1951 | return 0; |
| 1974 | } | 1952 | } |
| 1975 | 1953 | ||
| @@ -2000,10 +1978,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | |||
| 2000 | { | 1978 | { |
| 2001 | int r = 0; | 1979 | int r = 0; |
| 2002 | 1980 | ||
| 2003 | vcpu_load(vcpu); | ||
| 2004 | vcpu->arch.mp_state = mp_state->mp_state; | 1981 | vcpu->arch.mp_state = mp_state->mp_state; |
| 2005 | if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED) | 1982 | if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED) |
| 2006 | r = vcpu_reset(vcpu); | 1983 | r = vcpu_reset(vcpu); |
| 2007 | vcpu_put(vcpu); | ||
| 2008 | return r; | 1984 | return r; |
| 2009 | } | 1985 | } |
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 6f74d93725a0..8274a2d43925 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h | |||
| @@ -115,7 +115,15 @@ extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu); | |||
| 115 | extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); | 115 | extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); |
| 116 | extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); | 116 | extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); |
| 117 | extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); | 117 | extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); |
| 118 | extern struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data); | 118 | |
| 119 | extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte); | ||
| 120 | extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu); | ||
| 121 | extern void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu); | ||
| 122 | extern int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu); | ||
| 123 | extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte); | ||
| 124 | extern int kvmppc_mmu_hpte_sysinit(void); | ||
| 125 | extern void kvmppc_mmu_hpte_sysexit(void); | ||
| 126 | |||
| 119 | extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); | 127 | extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); |
| 120 | extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); | 128 | extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); |
| 121 | extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); | 129 | extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); |
diff --git a/arch/powerpc/include/asm/kvm_fpu.h b/arch/powerpc/include/asm/kvm_fpu.h index 94f05de9ad04..c3d4f0518a67 100644 --- a/arch/powerpc/include/asm/kvm_fpu.h +++ b/arch/powerpc/include/asm/kvm_fpu.h | |||
| @@ -22,24 +22,24 @@ | |||
| 22 | 22 | ||
| 23 | #include <linux/types.h> | 23 | #include <linux/types.h> |
| 24 | 24 | ||
| 25 | extern void fps_fres(struct thread_struct *t, u32 *dst, u32 *src1); | 25 | extern void fps_fres(u64 *fpscr, u32 *dst, u32 *src1); |
| 26 | extern void fps_frsqrte(struct thread_struct *t, u32 *dst, u32 *src1); | 26 | extern void fps_frsqrte(u64 *fpscr, u32 *dst, u32 *src1); |
| 27 | extern void fps_fsqrts(struct thread_struct *t, u32 *dst, u32 *src1); | 27 | extern void fps_fsqrts(u64 *fpscr, u32 *dst, u32 *src1); |
| 28 | 28 | ||
| 29 | extern void fps_fadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); | 29 | extern void fps_fadds(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2); |
| 30 | extern void fps_fdivs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); | 30 | extern void fps_fdivs(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2); |
| 31 | extern void fps_fmuls(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); | 31 | extern void fps_fmuls(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2); |
| 32 | extern void fps_fsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); | 32 | extern void fps_fsubs(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2); |
| 33 | 33 | ||
| 34 | extern void fps_fmadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, | 34 | extern void fps_fmadds(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2, |
| 35 | u32 *src3); | 35 | u32 *src3); |
| 36 | extern void fps_fmsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, | 36 | extern void fps_fmsubs(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2, |
| 37 | u32 *src3); | 37 | u32 *src3); |
| 38 | extern void fps_fnmadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, | 38 | extern void fps_fnmadds(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2, |
| 39 | u32 *src3); | 39 | u32 *src3); |
| 40 | extern void fps_fnmsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, | 40 | extern void fps_fnmsubs(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2, |
| 41 | u32 *src3); | 41 | u32 *src3); |
| 42 | extern void fps_fsel(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, | 42 | extern void fps_fsel(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2, |
| 43 | u32 *src3); | 43 | u32 *src3); |
| 44 | 44 | ||
| 45 | #define FPD_ONE_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \ | 45 | #define FPD_ONE_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \ |
| @@ -82,4 +82,7 @@ FPD_THREE_IN(fmadd) | |||
| 82 | FPD_THREE_IN(fnmsub) | 82 | FPD_THREE_IN(fnmsub) |
| 83 | FPD_THREE_IN(fnmadd) | 83 | FPD_THREE_IN(fnmadd) |
| 84 | 84 | ||
| 85 | extern void kvm_cvt_fd(u32 *from, u64 *to, u64 *fpscr); | ||
| 86 | extern void kvm_cvt_df(u64 *from, u32 *to, u64 *fpscr); | ||
| 87 | |||
| 85 | #endif | 88 | #endif |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 0c9ad869decd..b0b23c007d6e 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
| @@ -35,10 +35,17 @@ | |||
| 35 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 | 35 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 |
| 36 | 36 | ||
| 37 | /* We don't currently support large pages. */ | 37 | /* We don't currently support large pages. */ |
| 38 | #define KVM_HPAGE_GFN_SHIFT(x) 0 | ||
| 38 | #define KVM_NR_PAGE_SIZES 1 | 39 | #define KVM_NR_PAGE_SIZES 1 |
| 39 | #define KVM_PAGES_PER_HPAGE(x) (1UL<<31) | 40 | #define KVM_PAGES_PER_HPAGE(x) (1UL<<31) |
| 40 | 41 | ||
| 41 | #define HPTEG_CACHE_NUM 1024 | 42 | #define HPTEG_CACHE_NUM (1 << 15) |
| 43 | #define HPTEG_HASH_BITS_PTE 13 | ||
| 44 | #define HPTEG_HASH_BITS_VPTE 13 | ||
| 45 | #define HPTEG_HASH_BITS_VPTE_LONG 5 | ||
| 46 | #define HPTEG_HASH_NUM_PTE (1 << HPTEG_HASH_BITS_PTE) | ||
| 47 | #define HPTEG_HASH_NUM_VPTE (1 << HPTEG_HASH_BITS_VPTE) | ||
| 48 | #define HPTEG_HASH_NUM_VPTE_LONG (1 << HPTEG_HASH_BITS_VPTE_LONG) | ||
| 42 | 49 | ||
| 43 | struct kvm; | 50 | struct kvm; |
| 44 | struct kvm_run; | 51 | struct kvm_run; |
| @@ -151,6 +158,9 @@ struct kvmppc_mmu { | |||
| 151 | }; | 158 | }; |
| 152 | 159 | ||
| 153 | struct hpte_cache { | 160 | struct hpte_cache { |
| 161 | struct hlist_node list_pte; | ||
| 162 | struct hlist_node list_vpte; | ||
| 163 | struct hlist_node list_vpte_long; | ||
| 154 | u64 host_va; | 164 | u64 host_va; |
| 155 | u64 pfn; | 165 | u64 pfn; |
| 156 | ulong slot; | 166 | ulong slot; |
| @@ -282,8 +292,10 @@ struct kvm_vcpu_arch { | |||
| 282 | unsigned long pending_exceptions; | 292 | unsigned long pending_exceptions; |
| 283 | 293 | ||
| 284 | #ifdef CONFIG_PPC_BOOK3S | 294 | #ifdef CONFIG_PPC_BOOK3S |
| 285 | struct hpte_cache hpte_cache[HPTEG_CACHE_NUM]; | 295 | struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE]; |
| 286 | int hpte_cache_offset; | 296 | struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE]; |
| 297 | struct hlist_head hpte_hash_vpte_long[HPTEG_HASH_NUM_VPTE_LONG]; | ||
| 298 | int hpte_cache_count; | ||
| 287 | #endif | 299 | #endif |
| 288 | }; | 300 | }; |
| 289 | 301 | ||
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 3b4dcc82a4c1..ab3e392ac63c 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c | |||
| @@ -101,10 +101,6 @@ EXPORT_SYMBOL(pci_dram_offset); | |||
| 101 | EXPORT_SYMBOL(start_thread); | 101 | EXPORT_SYMBOL(start_thread); |
| 102 | EXPORT_SYMBOL(kernel_thread); | 102 | EXPORT_SYMBOL(kernel_thread); |
| 103 | 103 | ||
| 104 | #ifdef CONFIG_PPC_FPU | ||
| 105 | EXPORT_SYMBOL_GPL(cvt_df); | ||
| 106 | EXPORT_SYMBOL_GPL(cvt_fd); | ||
| 107 | #endif | ||
| 108 | EXPORT_SYMBOL(giveup_fpu); | 104 | EXPORT_SYMBOL(giveup_fpu); |
| 109 | #ifdef CONFIG_ALTIVEC | 105 | #ifdef CONFIG_ALTIVEC |
| 110 | EXPORT_SYMBOL(giveup_altivec); | 106 | EXPORT_SYMBOL(giveup_altivec); |
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index 812312542e50..9b9b5cdea840 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c | |||
| @@ -316,7 +316,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, | |||
| 316 | gfn = gpaddr >> PAGE_SHIFT; | 316 | gfn = gpaddr >> PAGE_SHIFT; |
| 317 | new_page = gfn_to_page(vcpu->kvm, gfn); | 317 | new_page = gfn_to_page(vcpu->kvm, gfn); |
| 318 | if (is_error_page(new_page)) { | 318 | if (is_error_page(new_page)) { |
| 319 | printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn); | 319 | printk(KERN_ERR "Couldn't get guest page for gfn %llx!\n", |
| 320 | (unsigned long long)gfn); | ||
| 320 | kvm_release_page_clean(new_page); | 321 | kvm_release_page_clean(new_page); |
| 321 | return; | 322 | return; |
| 322 | } | 323 | } |
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index ff436066bf77..d45c818a384c 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile | |||
| @@ -45,6 +45,7 @@ kvm-book3s_64-objs := \ | |||
| 45 | book3s.o \ | 45 | book3s.o \ |
| 46 | book3s_emulate.o \ | 46 | book3s_emulate.o \ |
| 47 | book3s_interrupts.o \ | 47 | book3s_interrupts.o \ |
| 48 | book3s_mmu_hpte.o \ | ||
| 48 | book3s_64_mmu_host.o \ | 49 | book3s_64_mmu_host.o \ |
| 49 | book3s_64_mmu.o \ | 50 | book3s_64_mmu.o \ |
| 50 | book3s_32_mmu.o | 51 | book3s_32_mmu.o |
| @@ -57,6 +58,7 @@ kvm-book3s_32-objs := \ | |||
| 57 | book3s.o \ | 58 | book3s.o \ |
| 58 | book3s_emulate.o \ | 59 | book3s_emulate.o \ |
| 59 | book3s_interrupts.o \ | 60 | book3s_interrupts.o \ |
| 61 | book3s_mmu_hpte.o \ | ||
| 60 | book3s_32_mmu_host.o \ | 62 | book3s_32_mmu_host.o \ |
| 61 | book3s_32_mmu.o | 63 | book3s_32_mmu.o |
| 62 | kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) | 64 | kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) |
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index b998abf1a63d..a3cef30d1d42 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c | |||
| @@ -1047,8 +1047,6 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 1047 | { | 1047 | { |
| 1048 | int i; | 1048 | int i; |
| 1049 | 1049 | ||
| 1050 | vcpu_load(vcpu); | ||
| 1051 | |||
| 1052 | regs->pc = kvmppc_get_pc(vcpu); | 1050 | regs->pc = kvmppc_get_pc(vcpu); |
| 1053 | regs->cr = kvmppc_get_cr(vcpu); | 1051 | regs->cr = kvmppc_get_cr(vcpu); |
| 1054 | regs->ctr = kvmppc_get_ctr(vcpu); | 1052 | regs->ctr = kvmppc_get_ctr(vcpu); |
| @@ -1069,8 +1067,6 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 1069 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 1067 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
| 1070 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); | 1068 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); |
| 1071 | 1069 | ||
| 1072 | vcpu_put(vcpu); | ||
| 1073 | |||
| 1074 | return 0; | 1070 | return 0; |
| 1075 | } | 1071 | } |
| 1076 | 1072 | ||
| @@ -1078,8 +1074,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 1078 | { | 1074 | { |
| 1079 | int i; | 1075 | int i; |
| 1080 | 1076 | ||
| 1081 | vcpu_load(vcpu); | ||
| 1082 | |||
| 1083 | kvmppc_set_pc(vcpu, regs->pc); | 1077 | kvmppc_set_pc(vcpu, regs->pc); |
| 1084 | kvmppc_set_cr(vcpu, regs->cr); | 1078 | kvmppc_set_cr(vcpu, regs->cr); |
| 1085 | kvmppc_set_ctr(vcpu, regs->ctr); | 1079 | kvmppc_set_ctr(vcpu, regs->ctr); |
| @@ -1099,8 +1093,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 1099 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 1093 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
| 1100 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); | 1094 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); |
| 1101 | 1095 | ||
| 1102 | vcpu_put(vcpu); | ||
| 1103 | |||
| 1104 | return 0; | 1096 | return 0; |
| 1105 | } | 1097 | } |
| 1106 | 1098 | ||
| @@ -1110,8 +1102,6 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
| 1110 | struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); | 1102 | struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); |
| 1111 | int i; | 1103 | int i; |
| 1112 | 1104 | ||
| 1113 | vcpu_load(vcpu); | ||
| 1114 | |||
| 1115 | sregs->pvr = vcpu->arch.pvr; | 1105 | sregs->pvr = vcpu->arch.pvr; |
| 1116 | 1106 | ||
| 1117 | sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1; | 1107 | sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1; |
| @@ -1131,8 +1121,6 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
| 1131 | } | 1121 | } |
| 1132 | } | 1122 | } |
| 1133 | 1123 | ||
| 1134 | vcpu_put(vcpu); | ||
| 1135 | |||
| 1136 | return 0; | 1124 | return 0; |
| 1137 | } | 1125 | } |
| 1138 | 1126 | ||
| @@ -1142,8 +1130,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 1142 | struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); | 1130 | struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); |
| 1143 | int i; | 1131 | int i; |
| 1144 | 1132 | ||
| 1145 | vcpu_load(vcpu); | ||
| 1146 | |||
| 1147 | kvmppc_set_pvr(vcpu, sregs->pvr); | 1133 | kvmppc_set_pvr(vcpu, sregs->pvr); |
| 1148 | 1134 | ||
| 1149 | vcpu3s->sdr1 = sregs->u.s.sdr1; | 1135 | vcpu3s->sdr1 = sregs->u.s.sdr1; |
| @@ -1171,8 +1157,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 1171 | /* Flush the MMU after messing with the segments */ | 1157 | /* Flush the MMU after messing with the segments */ |
| 1172 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | 1158 | kvmppc_mmu_pte_flush(vcpu, 0, 0); |
| 1173 | 1159 | ||
| 1174 | vcpu_put(vcpu); | ||
| 1175 | |||
| 1176 | return 0; | 1160 | return 0; |
| 1177 | } | 1161 | } |
| 1178 | 1162 | ||
| @@ -1309,12 +1293,17 @@ extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); | |||
| 1309 | int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | 1293 | int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) |
| 1310 | { | 1294 | { |
| 1311 | int ret; | 1295 | int ret; |
| 1312 | struct thread_struct ext_bkp; | 1296 | double fpr[32][TS_FPRWIDTH]; |
| 1297 | unsigned int fpscr; | ||
| 1298 | int fpexc_mode; | ||
| 1313 | #ifdef CONFIG_ALTIVEC | 1299 | #ifdef CONFIG_ALTIVEC |
| 1314 | bool save_vec = current->thread.used_vr; | 1300 | vector128 vr[32]; |
| 1301 | vector128 vscr; | ||
| 1302 | unsigned long uninitialized_var(vrsave); | ||
| 1303 | int used_vr; | ||
| 1315 | #endif | 1304 | #endif |
| 1316 | #ifdef CONFIG_VSX | 1305 | #ifdef CONFIG_VSX |
| 1317 | bool save_vsx = current->thread.used_vsr; | 1306 | int used_vsr; |
| 1318 | #endif | 1307 | #endif |
| 1319 | ulong ext_msr; | 1308 | ulong ext_msr; |
| 1320 | 1309 | ||
| @@ -1327,27 +1316,27 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
| 1327 | /* Save FPU state in stack */ | 1316 | /* Save FPU state in stack */ |
| 1328 | if (current->thread.regs->msr & MSR_FP) | 1317 | if (current->thread.regs->msr & MSR_FP) |
| 1329 | giveup_fpu(current); | 1318 | giveup_fpu(current); |
| 1330 | memcpy(ext_bkp.fpr, current->thread.fpr, sizeof(current->thread.fpr)); | 1319 | memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr)); |
| 1331 | ext_bkp.fpscr = current->thread.fpscr; | 1320 | fpscr = current->thread.fpscr.val; |
| 1332 | ext_bkp.fpexc_mode = current->thread.fpexc_mode; | 1321 | fpexc_mode = current->thread.fpexc_mode; |
| 1333 | 1322 | ||
| 1334 | #ifdef CONFIG_ALTIVEC | 1323 | #ifdef CONFIG_ALTIVEC |
| 1335 | /* Save Altivec state in stack */ | 1324 | /* Save Altivec state in stack */ |
| 1336 | if (save_vec) { | 1325 | used_vr = current->thread.used_vr; |
| 1326 | if (used_vr) { | ||
| 1337 | if (current->thread.regs->msr & MSR_VEC) | 1327 | if (current->thread.regs->msr & MSR_VEC) |
| 1338 | giveup_altivec(current); | 1328 | giveup_altivec(current); |
| 1339 | memcpy(ext_bkp.vr, current->thread.vr, sizeof(ext_bkp.vr)); | 1329 | memcpy(vr, current->thread.vr, sizeof(current->thread.vr)); |
| 1340 | ext_bkp.vscr = current->thread.vscr; | 1330 | vscr = current->thread.vscr; |
| 1341 | ext_bkp.vrsave = current->thread.vrsave; | 1331 | vrsave = current->thread.vrsave; |
| 1342 | } | 1332 | } |
| 1343 | ext_bkp.used_vr = current->thread.used_vr; | ||
| 1344 | #endif | 1333 | #endif |
| 1345 | 1334 | ||
| 1346 | #ifdef CONFIG_VSX | 1335 | #ifdef CONFIG_VSX |
| 1347 | /* Save VSX state in stack */ | 1336 | /* Save VSX state in stack */ |
| 1348 | if (save_vsx && (current->thread.regs->msr & MSR_VSX)) | 1337 | used_vsr = current->thread.used_vsr; |
| 1338 | if (used_vsr && (current->thread.regs->msr & MSR_VSX)) | ||
| 1349 | __giveup_vsx(current); | 1339 | __giveup_vsx(current); |
| 1350 | ext_bkp.used_vsr = current->thread.used_vsr; | ||
| 1351 | #endif | 1340 | #endif |
| 1352 | 1341 | ||
| 1353 | /* Remember the MSR with disabled extensions */ | 1342 | /* Remember the MSR with disabled extensions */ |
| @@ -1372,22 +1361,22 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
| 1372 | kvmppc_giveup_ext(vcpu, MSR_VSX); | 1361 | kvmppc_giveup_ext(vcpu, MSR_VSX); |
| 1373 | 1362 | ||
| 1374 | /* Restore FPU state from stack */ | 1363 | /* Restore FPU state from stack */ |
| 1375 | memcpy(current->thread.fpr, ext_bkp.fpr, sizeof(ext_bkp.fpr)); | 1364 | memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr)); |
| 1376 | current->thread.fpscr = ext_bkp.fpscr; | 1365 | current->thread.fpscr.val = fpscr; |
| 1377 | current->thread.fpexc_mode = ext_bkp.fpexc_mode; | 1366 | current->thread.fpexc_mode = fpexc_mode; |
| 1378 | 1367 | ||
| 1379 | #ifdef CONFIG_ALTIVEC | 1368 | #ifdef CONFIG_ALTIVEC |
| 1380 | /* Restore Altivec state from stack */ | 1369 | /* Restore Altivec state from stack */ |
| 1381 | if (save_vec && current->thread.used_vr) { | 1370 | if (used_vr && current->thread.used_vr) { |
| 1382 | memcpy(current->thread.vr, ext_bkp.vr, sizeof(ext_bkp.vr)); | 1371 | memcpy(current->thread.vr, vr, sizeof(current->thread.vr)); |
| 1383 | current->thread.vscr = ext_bkp.vscr; | 1372 | current->thread.vscr = vscr; |
| 1384 | current->thread.vrsave= ext_bkp.vrsave; | 1373 | current->thread.vrsave = vrsave; |
| 1385 | } | 1374 | } |
| 1386 | current->thread.used_vr = ext_bkp.used_vr; | 1375 | current->thread.used_vr = used_vr; |
| 1387 | #endif | 1376 | #endif |
| 1388 | 1377 | ||
| 1389 | #ifdef CONFIG_VSX | 1378 | #ifdef CONFIG_VSX |
| 1390 | current->thread.used_vsr = ext_bkp.used_vsr; | 1379 | current->thread.used_vsr = used_vsr; |
| 1391 | #endif | 1380 | #endif |
| 1392 | 1381 | ||
| 1393 | return ret; | 1382 | return ret; |
| @@ -1395,12 +1384,22 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
| 1395 | 1384 | ||
| 1396 | static int kvmppc_book3s_init(void) | 1385 | static int kvmppc_book3s_init(void) |
| 1397 | { | 1386 | { |
| 1398 | return kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0, | 1387 | int r; |
| 1399 | THIS_MODULE); | 1388 | |
| 1389 | r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0, | ||
| 1390 | THIS_MODULE); | ||
| 1391 | |||
| 1392 | if (r) | ||
| 1393 | return r; | ||
| 1394 | |||
| 1395 | r = kvmppc_mmu_hpte_sysinit(); | ||
| 1396 | |||
| 1397 | return r; | ||
| 1400 | } | 1398 | } |
| 1401 | 1399 | ||
| 1402 | static void kvmppc_book3s_exit(void) | 1400 | static void kvmppc_book3s_exit(void) |
| 1403 | { | 1401 | { |
| 1402 | kvmppc_mmu_hpte_sysexit(); | ||
| 1404 | kvm_exit(); | 1403 | kvm_exit(); |
| 1405 | } | 1404 | } |
| 1406 | 1405 | ||
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index 0b10503c8a4a..3292d76101d2 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c | |||
| @@ -354,10 +354,10 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, | |||
| 354 | *vsid = VSID_REAL_DR | gvsid; | 354 | *vsid = VSID_REAL_DR | gvsid; |
| 355 | break; | 355 | break; |
| 356 | case MSR_DR|MSR_IR: | 356 | case MSR_DR|MSR_IR: |
| 357 | if (!sr->valid) | 357 | if (sr->valid) |
| 358 | return -1; | 358 | *vsid = sr->vsid; |
| 359 | 359 | else | |
| 360 | *vsid = sr->vsid; | 360 | *vsid = VSID_BAT | gvsid; |
| 361 | break; | 361 | break; |
| 362 | default: | 362 | default: |
| 363 | BUG(); | 363 | BUG(); |
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 0bb66005338f..0b51ef872c1e 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | */ | 19 | */ |
| 20 | 20 | ||
| 21 | #include <linux/kvm_host.h> | 21 | #include <linux/kvm_host.h> |
| 22 | #include <linux/hash.h> | ||
| 22 | 23 | ||
| 23 | #include <asm/kvm_ppc.h> | 24 | #include <asm/kvm_ppc.h> |
| 24 | #include <asm/kvm_book3s.h> | 25 | #include <asm/kvm_book3s.h> |
| @@ -57,139 +58,26 @@ | |||
| 57 | static ulong htab; | 58 | static ulong htab; |
| 58 | static u32 htabmask; | 59 | static u32 htabmask; |
| 59 | 60 | ||
| 60 | static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) | 61 | void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) |
| 61 | { | 62 | { |
| 62 | volatile u32 *pteg; | 63 | volatile u32 *pteg; |
| 63 | 64 | ||
| 64 | dprintk_mmu("KVM: Flushing SPTE: 0x%llx (0x%llx) -> 0x%llx\n", | 65 | /* Remove from host HTAB */ |
| 65 | pte->pte.eaddr, pte->pte.vpage, pte->host_va); | ||
| 66 | |||
| 67 | pteg = (u32*)pte->slot; | 66 | pteg = (u32*)pte->slot; |
| 68 | |||
| 69 | pteg[0] = 0; | 67 | pteg[0] = 0; |
| 68 | |||
| 69 | /* And make sure it's gone from the TLB too */ | ||
| 70 | asm volatile ("sync"); | 70 | asm volatile ("sync"); |
| 71 | asm volatile ("tlbie %0" : : "r" (pte->pte.eaddr) : "memory"); | 71 | asm volatile ("tlbie %0" : : "r" (pte->pte.eaddr) : "memory"); |
| 72 | asm volatile ("sync"); | 72 | asm volatile ("sync"); |
| 73 | asm volatile ("tlbsync"); | 73 | asm volatile ("tlbsync"); |
| 74 | |||
| 75 | pte->host_va = 0; | ||
| 76 | |||
| 77 | if (pte->pte.may_write) | ||
| 78 | kvm_release_pfn_dirty(pte->pfn); | ||
| 79 | else | ||
| 80 | kvm_release_pfn_clean(pte->pfn); | ||
| 81 | } | ||
| 82 | |||
| 83 | void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) | ||
| 84 | { | ||
| 85 | int i; | ||
| 86 | |||
| 87 | dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%x & 0x%x\n", | ||
| 88 | vcpu->arch.hpte_cache_offset, guest_ea, ea_mask); | ||
| 89 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); | ||
| 90 | |||
| 91 | guest_ea &= ea_mask; | ||
| 92 | for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { | ||
| 93 | struct hpte_cache *pte; | ||
| 94 | |||
| 95 | pte = &vcpu->arch.hpte_cache[i]; | ||
| 96 | if (!pte->host_va) | ||
| 97 | continue; | ||
| 98 | |||
| 99 | if ((pte->pte.eaddr & ea_mask) == guest_ea) { | ||
| 100 | invalidate_pte(vcpu, pte); | ||
| 101 | } | ||
| 102 | } | ||
| 103 | |||
| 104 | /* Doing a complete flush -> start from scratch */ | ||
| 105 | if (!ea_mask) | ||
| 106 | vcpu->arch.hpte_cache_offset = 0; | ||
| 107 | } | ||
| 108 | |||
| 109 | void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) | ||
| 110 | { | ||
| 111 | int i; | ||
| 112 | |||
| 113 | dprintk_mmu("KVM: Flushing %d Shadow vPTEs: 0x%llx & 0x%llx\n", | ||
| 114 | vcpu->arch.hpte_cache_offset, guest_vp, vp_mask); | ||
| 115 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); | ||
| 116 | |||
| 117 | guest_vp &= vp_mask; | ||
| 118 | for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { | ||
| 119 | struct hpte_cache *pte; | ||
| 120 | |||
| 121 | pte = &vcpu->arch.hpte_cache[i]; | ||
| 122 | if (!pte->host_va) | ||
| 123 | continue; | ||
| 124 | |||
| 125 | if ((pte->pte.vpage & vp_mask) == guest_vp) { | ||
| 126 | invalidate_pte(vcpu, pte); | ||
| 127 | } | ||
| 128 | } | ||
| 129 | } | ||
| 130 | |||
| 131 | void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) | ||
| 132 | { | ||
| 133 | int i; | ||
| 134 | |||
| 135 | dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%llx & 0x%llx\n", | ||
| 136 | vcpu->arch.hpte_cache_offset, pa_start, pa_end); | ||
| 137 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); | ||
| 138 | |||
| 139 | for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { | ||
| 140 | struct hpte_cache *pte; | ||
| 141 | |||
| 142 | pte = &vcpu->arch.hpte_cache[i]; | ||
| 143 | if (!pte->host_va) | ||
| 144 | continue; | ||
| 145 | |||
| 146 | if ((pte->pte.raddr >= pa_start) && | ||
| 147 | (pte->pte.raddr < pa_end)) { | ||
| 148 | invalidate_pte(vcpu, pte); | ||
| 149 | } | ||
| 150 | } | ||
| 151 | } | ||
| 152 | |||
| 153 | struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data) | ||
| 154 | { | ||
| 155 | int i; | ||
| 156 | u64 guest_vp; | ||
| 157 | |||
| 158 | guest_vp = vcpu->arch.mmu.ea_to_vp(vcpu, ea, false); | ||
| 159 | for (i=0; i<vcpu->arch.hpte_cache_offset; i++) { | ||
| 160 | struct hpte_cache *pte; | ||
| 161 | |||
| 162 | pte = &vcpu->arch.hpte_cache[i]; | ||
| 163 | if (!pte->host_va) | ||
| 164 | continue; | ||
| 165 | |||
| 166 | if (pte->pte.vpage == guest_vp) | ||
| 167 | return &pte->pte; | ||
| 168 | } | ||
| 169 | |||
| 170 | return NULL; | ||
| 171 | } | ||
| 172 | |||
| 173 | static int kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu) | ||
| 174 | { | ||
| 175 | if (vcpu->arch.hpte_cache_offset == HPTEG_CACHE_NUM) | ||
| 176 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | ||
| 177 | |||
| 178 | return vcpu->arch.hpte_cache_offset++; | ||
| 179 | } | 74 | } |
| 180 | 75 | ||
| 181 | /* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using | 76 | /* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using |
| 182 | * a hash, so we don't waste cycles on looping */ | 77 | * a hash, so we don't waste cycles on looping */ |
| 183 | static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid) | 78 | static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid) |
| 184 | { | 79 | { |
| 185 | return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^ | 80 | return hash_64(gvsid, SID_MAP_BITS); |
| 186 | ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^ | ||
| 187 | ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^ | ||
| 188 | ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^ | ||
| 189 | ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^ | ||
| 190 | ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^ | ||
| 191 | ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^ | ||
| 192 | ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK)); | ||
| 193 | } | 81 | } |
| 194 | 82 | ||
| 195 | 83 | ||
| @@ -256,7 +144,6 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) | |||
| 256 | register int rr = 0; | 144 | register int rr = 0; |
| 257 | bool primary = false; | 145 | bool primary = false; |
| 258 | bool evict = false; | 146 | bool evict = false; |
| 259 | int hpte_id; | ||
| 260 | struct hpte_cache *pte; | 147 | struct hpte_cache *pte; |
| 261 | 148 | ||
| 262 | /* Get host physical address for gpa */ | 149 | /* Get host physical address for gpa */ |
| @@ -341,8 +228,7 @@ next_pteg: | |||
| 341 | 228 | ||
| 342 | /* Now tell our Shadow PTE code about the new page */ | 229 | /* Now tell our Shadow PTE code about the new page */ |
| 343 | 230 | ||
| 344 | hpte_id = kvmppc_mmu_hpte_cache_next(vcpu); | 231 | pte = kvmppc_mmu_hpte_cache_next(vcpu); |
| 345 | pte = &vcpu->arch.hpte_cache[hpte_id]; | ||
| 346 | 232 | ||
| 347 | dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n", | 233 | dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n", |
| 348 | orig_pte->may_write ? 'w' : '-', | 234 | orig_pte->may_write ? 'w' : '-', |
| @@ -355,6 +241,8 @@ next_pteg: | |||
| 355 | pte->pte = *orig_pte; | 241 | pte->pte = *orig_pte; |
| 356 | pte->pfn = hpaddr >> PAGE_SHIFT; | 242 | pte->pfn = hpaddr >> PAGE_SHIFT; |
| 357 | 243 | ||
| 244 | kvmppc_mmu_hpte_cache_map(vcpu, pte); | ||
| 245 | |||
| 358 | return 0; | 246 | return 0; |
| 359 | } | 247 | } |
| 360 | 248 | ||
| @@ -439,7 +327,7 @@ void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) | |||
| 439 | 327 | ||
| 440 | void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) | 328 | void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) |
| 441 | { | 329 | { |
| 442 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | 330 | kvmppc_mmu_hpte_destroy(vcpu); |
| 443 | preempt_disable(); | 331 | preempt_disable(); |
| 444 | __destroy_context(to_book3s(vcpu)->context_id); | 332 | __destroy_context(to_book3s(vcpu)->context_id); |
| 445 | preempt_enable(); | 333 | preempt_enable(); |
| @@ -479,5 +367,7 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu) | |||
| 479 | htabmask = ((sdr1 & 0x1FF) << 16) | 0xFFC0; | 367 | htabmask = ((sdr1 & 0x1FF) << 16) | 0xFFC0; |
| 480 | htab = (ulong)__va(sdr1 & 0xffff0000); | 368 | htab = (ulong)__va(sdr1 & 0xffff0000); |
| 481 | 369 | ||
| 370 | kvmppc_mmu_hpte_init(vcpu); | ||
| 371 | |||
| 482 | return 0; | 372 | return 0; |
| 483 | } | 373 | } |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index e4b5744977f6..384179a5002b 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | */ | 20 | */ |
| 21 | 21 | ||
| 22 | #include <linux/kvm_host.h> | 22 | #include <linux/kvm_host.h> |
| 23 | #include <linux/hash.h> | ||
| 23 | 24 | ||
| 24 | #include <asm/kvm_ppc.h> | 25 | #include <asm/kvm_ppc.h> |
| 25 | #include <asm/kvm_book3s.h> | 26 | #include <asm/kvm_book3s.h> |
| @@ -46,135 +47,20 @@ | |||
| 46 | #define dprintk_slb(a, ...) do { } while(0) | 47 | #define dprintk_slb(a, ...) do { } while(0) |
| 47 | #endif | 48 | #endif |
| 48 | 49 | ||
| 49 | static void invalidate_pte(struct hpte_cache *pte) | 50 | void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) |
| 50 | { | 51 | { |
| 51 | dprintk_mmu("KVM: Flushing SPT: 0x%lx (0x%llx) -> 0x%llx\n", | ||
| 52 | pte->pte.eaddr, pte->pte.vpage, pte->host_va); | ||
| 53 | |||
| 54 | ppc_md.hpte_invalidate(pte->slot, pte->host_va, | 52 | ppc_md.hpte_invalidate(pte->slot, pte->host_va, |
| 55 | MMU_PAGE_4K, MMU_SEGSIZE_256M, | 53 | MMU_PAGE_4K, MMU_SEGSIZE_256M, |
| 56 | false); | 54 | false); |
| 57 | pte->host_va = 0; | ||
| 58 | |||
| 59 | if (pte->pte.may_write) | ||
| 60 | kvm_release_pfn_dirty(pte->pfn); | ||
| 61 | else | ||
| 62 | kvm_release_pfn_clean(pte->pfn); | ||
| 63 | } | ||
| 64 | |||
| 65 | void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) | ||
| 66 | { | ||
| 67 | int i; | ||
| 68 | |||
| 69 | dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%lx & 0x%lx\n", | ||
| 70 | vcpu->arch.hpte_cache_offset, guest_ea, ea_mask); | ||
| 71 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); | ||
| 72 | |||
| 73 | guest_ea &= ea_mask; | ||
| 74 | for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { | ||
| 75 | struct hpte_cache *pte; | ||
| 76 | |||
| 77 | pte = &vcpu->arch.hpte_cache[i]; | ||
| 78 | if (!pte->host_va) | ||
| 79 | continue; | ||
| 80 | |||
| 81 | if ((pte->pte.eaddr & ea_mask) == guest_ea) { | ||
| 82 | invalidate_pte(pte); | ||
| 83 | } | ||
| 84 | } | ||
| 85 | |||
| 86 | /* Doing a complete flush -> start from scratch */ | ||
| 87 | if (!ea_mask) | ||
| 88 | vcpu->arch.hpte_cache_offset = 0; | ||
| 89 | } | ||
| 90 | |||
| 91 | void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) | ||
| 92 | { | ||
| 93 | int i; | ||
| 94 | |||
| 95 | dprintk_mmu("KVM: Flushing %d Shadow vPTEs: 0x%llx & 0x%llx\n", | ||
| 96 | vcpu->arch.hpte_cache_offset, guest_vp, vp_mask); | ||
| 97 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); | ||
| 98 | |||
| 99 | guest_vp &= vp_mask; | ||
| 100 | for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { | ||
| 101 | struct hpte_cache *pte; | ||
| 102 | |||
| 103 | pte = &vcpu->arch.hpte_cache[i]; | ||
| 104 | if (!pte->host_va) | ||
| 105 | continue; | ||
| 106 | |||
| 107 | if ((pte->pte.vpage & vp_mask) == guest_vp) { | ||
| 108 | invalidate_pte(pte); | ||
| 109 | } | ||
| 110 | } | ||
| 111 | } | ||
| 112 | |||
| 113 | void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) | ||
| 114 | { | ||
| 115 | int i; | ||
| 116 | |||
| 117 | dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%lx & 0x%lx\n", | ||
| 118 | vcpu->arch.hpte_cache_offset, pa_start, pa_end); | ||
| 119 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); | ||
| 120 | |||
| 121 | for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { | ||
| 122 | struct hpte_cache *pte; | ||
| 123 | |||
| 124 | pte = &vcpu->arch.hpte_cache[i]; | ||
| 125 | if (!pte->host_va) | ||
| 126 | continue; | ||
| 127 | |||
| 128 | if ((pte->pte.raddr >= pa_start) && | ||
| 129 | (pte->pte.raddr < pa_end)) { | ||
| 130 | invalidate_pte(pte); | ||
| 131 | } | ||
| 132 | } | ||
| 133 | } | ||
| 134 | |||
| 135 | struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data) | ||
| 136 | { | ||
| 137 | int i; | ||
| 138 | u64 guest_vp; | ||
| 139 | |||
| 140 | guest_vp = vcpu->arch.mmu.ea_to_vp(vcpu, ea, false); | ||
| 141 | for (i=0; i<vcpu->arch.hpte_cache_offset; i++) { | ||
| 142 | struct hpte_cache *pte; | ||
| 143 | |||
| 144 | pte = &vcpu->arch.hpte_cache[i]; | ||
| 145 | if (!pte->host_va) | ||
| 146 | continue; | ||
| 147 | |||
| 148 | if (pte->pte.vpage == guest_vp) | ||
| 149 | return &pte->pte; | ||
| 150 | } | ||
| 151 | |||
| 152 | return NULL; | ||
| 153 | } | ||
| 154 | |||
| 155 | static int kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu) | ||
| 156 | { | ||
| 157 | if (vcpu->arch.hpte_cache_offset == HPTEG_CACHE_NUM) | ||
| 158 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | ||
| 159 | |||
| 160 | return vcpu->arch.hpte_cache_offset++; | ||
| 161 | } | 55 | } |
| 162 | 56 | ||
| 163 | /* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using | 57 | /* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using |
| 164 | * a hash, so we don't waste cycles on looping */ | 58 | * a hash, so we don't waste cycles on looping */ |
| 165 | static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid) | 59 | static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid) |
| 166 | { | 60 | { |
| 167 | return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^ | 61 | return hash_64(gvsid, SID_MAP_BITS); |
| 168 | ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^ | ||
| 169 | ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^ | ||
| 170 | ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^ | ||
| 171 | ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^ | ||
| 172 | ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^ | ||
| 173 | ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^ | ||
| 174 | ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK)); | ||
| 175 | } | 62 | } |
| 176 | 63 | ||
| 177 | |||
| 178 | static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid) | 64 | static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid) |
| 179 | { | 65 | { |
| 180 | struct kvmppc_sid_map *map; | 66 | struct kvmppc_sid_map *map; |
| @@ -273,8 +159,7 @@ map_again: | |||
| 273 | attempt++; | 159 | attempt++; |
| 274 | goto map_again; | 160 | goto map_again; |
| 275 | } else { | 161 | } else { |
| 276 | int hpte_id = kvmppc_mmu_hpte_cache_next(vcpu); | 162 | struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu); |
| 277 | struct hpte_cache *pte = &vcpu->arch.hpte_cache[hpte_id]; | ||
| 278 | 163 | ||
| 279 | dprintk_mmu("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx\n", | 164 | dprintk_mmu("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx\n", |
| 280 | ((rflags & HPTE_R_PP) == 3) ? '-' : 'w', | 165 | ((rflags & HPTE_R_PP) == 3) ? '-' : 'w', |
| @@ -292,6 +177,8 @@ map_again: | |||
| 292 | pte->host_va = va; | 177 | pte->host_va = va; |
| 293 | pte->pte = *orig_pte; | 178 | pte->pte = *orig_pte; |
| 294 | pte->pfn = hpaddr >> PAGE_SHIFT; | 179 | pte->pfn = hpaddr >> PAGE_SHIFT; |
| 180 | |||
| 181 | kvmppc_mmu_hpte_cache_map(vcpu, pte); | ||
| 295 | } | 182 | } |
| 296 | 183 | ||
| 297 | return 0; | 184 | return 0; |
| @@ -418,7 +305,7 @@ void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) | |||
| 418 | 305 | ||
| 419 | void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) | 306 | void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) |
| 420 | { | 307 | { |
| 421 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | 308 | kvmppc_mmu_hpte_destroy(vcpu); |
| 422 | __destroy_context(to_book3s(vcpu)->context_id); | 309 | __destroy_context(to_book3s(vcpu)->context_id); |
| 423 | } | 310 | } |
| 424 | 311 | ||
| @@ -436,5 +323,7 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu) | |||
| 436 | vcpu3s->vsid_first = vcpu3s->context_id << USER_ESID_BITS; | 323 | vcpu3s->vsid_first = vcpu3s->context_id << USER_ESID_BITS; |
| 437 | vcpu3s->vsid_next = vcpu3s->vsid_first; | 324 | vcpu3s->vsid_next = vcpu3s->vsid_first; |
| 438 | 325 | ||
| 326 | kvmppc_mmu_hpte_init(vcpu); | ||
| 327 | |||
| 439 | return 0; | 328 | return 0; |
| 440 | } | 329 | } |
diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c new file mode 100644 index 000000000000..4868d4a7ebc5 --- /dev/null +++ b/arch/powerpc/kvm/book3s_mmu_hpte.c | |||
| @@ -0,0 +1,277 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved. | ||
| 3 | * | ||
| 4 | * Authors: | ||
| 5 | * Alexander Graf <agraf@suse.de> | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or modify | ||
| 8 | * it under the terms of the GNU General Public License, version 2, as | ||
| 9 | * published by the Free Software Foundation. | ||
| 10 | * | ||
| 11 | * This program is distributed in the hope that it will be useful, | ||
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 14 | * GNU General Public License for more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU General Public License | ||
| 17 | * along with this program; if not, write to the Free Software | ||
| 18 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
| 19 | */ | ||
| 20 | |||
| 21 | #include <linux/kvm_host.h> | ||
| 22 | #include <linux/hash.h> | ||
| 23 | #include <linux/slab.h> | ||
| 24 | |||
| 25 | #include <asm/kvm_ppc.h> | ||
| 26 | #include <asm/kvm_book3s.h> | ||
| 27 | #include <asm/machdep.h> | ||
| 28 | #include <asm/mmu_context.h> | ||
| 29 | #include <asm/hw_irq.h> | ||
| 30 | |||
| 31 | #define PTE_SIZE 12 | ||
| 32 | |||
| 33 | /* #define DEBUG_MMU */ | ||
| 34 | |||
| 35 | #ifdef DEBUG_MMU | ||
| 36 | #define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__) | ||
| 37 | #else | ||
| 38 | #define dprintk_mmu(a, ...) do { } while(0) | ||
| 39 | #endif | ||
| 40 | |||
| 41 | static struct kmem_cache *hpte_cache; | ||
| 42 | |||
| 43 | static inline u64 kvmppc_mmu_hash_pte(u64 eaddr) | ||
| 44 | { | ||
| 45 | return hash_64(eaddr >> PTE_SIZE, HPTEG_HASH_BITS_PTE); | ||
| 46 | } | ||
| 47 | |||
| 48 | static inline u64 kvmppc_mmu_hash_vpte(u64 vpage) | ||
| 49 | { | ||
| 50 | return hash_64(vpage & 0xfffffffffULL, HPTEG_HASH_BITS_VPTE); | ||
| 51 | } | ||
| 52 | |||
| 53 | static inline u64 kvmppc_mmu_hash_vpte_long(u64 vpage) | ||
| 54 | { | ||
| 55 | return hash_64((vpage & 0xffffff000ULL) >> 12, | ||
| 56 | HPTEG_HASH_BITS_VPTE_LONG); | ||
| 57 | } | ||
| 58 | |||
| 59 | void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte) | ||
| 60 | { | ||
| 61 | u64 index; | ||
| 62 | |||
| 63 | /* Add to ePTE list */ | ||
| 64 | index = kvmppc_mmu_hash_pte(pte->pte.eaddr); | ||
| 65 | hlist_add_head(&pte->list_pte, &vcpu->arch.hpte_hash_pte[index]); | ||
| 66 | |||
| 67 | /* Add to vPTE list */ | ||
| 68 | index = kvmppc_mmu_hash_vpte(pte->pte.vpage); | ||
| 69 | hlist_add_head(&pte->list_vpte, &vcpu->arch.hpte_hash_vpte[index]); | ||
| 70 | |||
| 71 | /* Add to vPTE_long list */ | ||
| 72 | index = kvmppc_mmu_hash_vpte_long(pte->pte.vpage); | ||
| 73 | hlist_add_head(&pte->list_vpte_long, | ||
| 74 | &vcpu->arch.hpte_hash_vpte_long[index]); | ||
| 75 | } | ||
| 76 | |||
| 77 | static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) | ||
| 78 | { | ||
| 79 | dprintk_mmu("KVM: Flushing SPT: 0x%lx (0x%llx) -> 0x%llx\n", | ||
| 80 | pte->pte.eaddr, pte->pte.vpage, pte->host_va); | ||
| 81 | |||
| 82 | /* Different for 32 and 64 bit */ | ||
| 83 | kvmppc_mmu_invalidate_pte(vcpu, pte); | ||
| 84 | |||
| 85 | if (pte->pte.may_write) | ||
| 86 | kvm_release_pfn_dirty(pte->pfn); | ||
| 87 | else | ||
| 88 | kvm_release_pfn_clean(pte->pfn); | ||
| 89 | |||
| 90 | hlist_del(&pte->list_pte); | ||
| 91 | hlist_del(&pte->list_vpte); | ||
| 92 | hlist_del(&pte->list_vpte_long); | ||
| 93 | |||
| 94 | vcpu->arch.hpte_cache_count--; | ||
| 95 | kmem_cache_free(hpte_cache, pte); | ||
| 96 | } | ||
| 97 | |||
| 98 | static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu) | ||
| 99 | { | ||
| 100 | struct hpte_cache *pte; | ||
| 101 | struct hlist_node *node, *tmp; | ||
| 102 | int i; | ||
| 103 | |||
| 104 | for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) { | ||
| 105 | struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i]; | ||
| 106 | |||
| 107 | hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long) | ||
| 108 | invalidate_pte(vcpu, pte); | ||
| 109 | } | ||
| 110 | } | ||
| 111 | |||
| 112 | static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea) | ||
| 113 | { | ||
| 114 | struct hlist_head *list; | ||
| 115 | struct hlist_node *node, *tmp; | ||
| 116 | struct hpte_cache *pte; | ||
| 117 | |||
| 118 | /* Find the list of entries in the map */ | ||
| 119 | list = &vcpu->arch.hpte_hash_pte[kvmppc_mmu_hash_pte(guest_ea)]; | ||
| 120 | |||
| 121 | /* Check the list for matching entries and invalidate */ | ||
| 122 | hlist_for_each_entry_safe(pte, node, tmp, list, list_pte) | ||
| 123 | if ((pte->pte.eaddr & ~0xfffUL) == guest_ea) | ||
| 124 | invalidate_pte(vcpu, pte); | ||
| 125 | } | ||
| 126 | |||
| 127 | void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) | ||
| 128 | { | ||
| 129 | u64 i; | ||
| 130 | |||
| 131 | dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%lx & 0x%lx\n", | ||
| 132 | vcpu->arch.hpte_cache_count, guest_ea, ea_mask); | ||
| 133 | |||
| 134 | guest_ea &= ea_mask; | ||
| 135 | |||
| 136 | switch (ea_mask) { | ||
| 137 | case ~0xfffUL: | ||
| 138 | kvmppc_mmu_pte_flush_page(vcpu, guest_ea); | ||
| 139 | break; | ||
| 140 | case 0x0ffff000: | ||
| 141 | /* 32-bit flush w/o segment, go through all possible segments */ | ||
| 142 | for (i = 0; i < 0x100000000ULL; i += 0x10000000ULL) | ||
| 143 | kvmppc_mmu_pte_flush(vcpu, guest_ea | i, ~0xfffUL); | ||
| 144 | break; | ||
| 145 | case 0: | ||
| 146 | /* Doing a complete flush -> start from scratch */ | ||
| 147 | kvmppc_mmu_pte_flush_all(vcpu); | ||
| 148 | break; | ||
| 149 | default: | ||
| 150 | WARN_ON(1); | ||
| 151 | break; | ||
| 152 | } | ||
| 153 | } | ||
| 154 | |||
| 155 | /* Flush with mask 0xfffffffff */ | ||
| 156 | static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp) | ||
| 157 | { | ||
| 158 | struct hlist_head *list; | ||
| 159 | struct hlist_node *node, *tmp; | ||
| 160 | struct hpte_cache *pte; | ||
| 161 | u64 vp_mask = 0xfffffffffULL; | ||
| 162 | |||
| 163 | list = &vcpu->arch.hpte_hash_vpte[kvmppc_mmu_hash_vpte(guest_vp)]; | ||
| 164 | |||
| 165 | /* Check the list for matching entries and invalidate */ | ||
| 166 | hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte) | ||
| 167 | if ((pte->pte.vpage & vp_mask) == guest_vp) | ||
| 168 | invalidate_pte(vcpu, pte); | ||
| 169 | } | ||
| 170 | |||
| 171 | /* Flush with mask 0xffffff000 */ | ||
| 172 | static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp) | ||
| 173 | { | ||
| 174 | struct hlist_head *list; | ||
| 175 | struct hlist_node *node, *tmp; | ||
| 176 | struct hpte_cache *pte; | ||
| 177 | u64 vp_mask = 0xffffff000ULL; | ||
| 178 | |||
| 179 | list = &vcpu->arch.hpte_hash_vpte_long[ | ||
| 180 | kvmppc_mmu_hash_vpte_long(guest_vp)]; | ||
| 181 | |||
| 182 | /* Check the list for matching entries and invalidate */ | ||
| 183 | hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long) | ||
| 184 | if ((pte->pte.vpage & vp_mask) == guest_vp) | ||
| 185 | invalidate_pte(vcpu, pte); | ||
| 186 | } | ||
| 187 | |||
| 188 | void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) | ||
| 189 | { | ||
| 190 | dprintk_mmu("KVM: Flushing %d Shadow vPTEs: 0x%llx & 0x%llx\n", | ||
| 191 | vcpu->arch.hpte_cache_count, guest_vp, vp_mask); | ||
| 192 | guest_vp &= vp_mask; | ||
| 193 | |||
| 194 | switch(vp_mask) { | ||
| 195 | case 0xfffffffffULL: | ||
| 196 | kvmppc_mmu_pte_vflush_short(vcpu, guest_vp); | ||
| 197 | break; | ||
| 198 | case 0xffffff000ULL: | ||
| 199 | kvmppc_mmu_pte_vflush_long(vcpu, guest_vp); | ||
| 200 | break; | ||
| 201 | default: | ||
| 202 | WARN_ON(1); | ||
| 203 | return; | ||
| 204 | } | ||
| 205 | } | ||
| 206 | |||
| 207 | void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) | ||
| 208 | { | ||
| 209 | struct hlist_node *node, *tmp; | ||
| 210 | struct hpte_cache *pte; | ||
| 211 | int i; | ||
| 212 | |||
| 213 | dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%lx - 0x%lx\n", | ||
| 214 | vcpu->arch.hpte_cache_count, pa_start, pa_end); | ||
| 215 | |||
| 216 | for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) { | ||
| 217 | struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i]; | ||
| 218 | |||
| 219 | hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long) | ||
| 220 | if ((pte->pte.raddr >= pa_start) && | ||
| 221 | (pte->pte.raddr < pa_end)) | ||
| 222 | invalidate_pte(vcpu, pte); | ||
| 223 | } | ||
| 224 | } | ||
| 225 | |||
| 226 | struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu) | ||
| 227 | { | ||
| 228 | struct hpte_cache *pte; | ||
| 229 | |||
| 230 | pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL); | ||
| 231 | vcpu->arch.hpte_cache_count++; | ||
| 232 | |||
| 233 | if (vcpu->arch.hpte_cache_count == HPTEG_CACHE_NUM) | ||
| 234 | kvmppc_mmu_pte_flush_all(vcpu); | ||
| 235 | |||
| 236 | return pte; | ||
| 237 | } | ||
| 238 | |||
| 239 | void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu) | ||
| 240 | { | ||
| 241 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | ||
| 242 | } | ||
| 243 | |||
| 244 | static void kvmppc_mmu_hpte_init_hash(struct hlist_head *hash_list, int len) | ||
| 245 | { | ||
| 246 | int i; | ||
| 247 | |||
| 248 | for (i = 0; i < len; i++) | ||
| 249 | INIT_HLIST_HEAD(&hash_list[i]); | ||
| 250 | } | ||
| 251 | |||
| 252 | int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu) | ||
| 253 | { | ||
| 254 | /* init hpte lookup hashes */ | ||
| 255 | kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_pte, | ||
| 256 | ARRAY_SIZE(vcpu->arch.hpte_hash_pte)); | ||
| 257 | kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte, | ||
| 258 | ARRAY_SIZE(vcpu->arch.hpte_hash_vpte)); | ||
| 259 | kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte_long, | ||
| 260 | ARRAY_SIZE(vcpu->arch.hpte_hash_vpte_long)); | ||
| 261 | |||
| 262 | return 0; | ||
| 263 | } | ||
| 264 | |||
| 265 | int kvmppc_mmu_hpte_sysinit(void) | ||
| 266 | { | ||
| 267 | /* init hpte slab cache */ | ||
| 268 | hpte_cache = kmem_cache_create("kvm-spt", sizeof(struct hpte_cache), | ||
| 269 | sizeof(struct hpte_cache), 0, NULL); | ||
| 270 | |||
| 271 | return 0; | ||
| 272 | } | ||
| 273 | |||
| 274 | void kvmppc_mmu_hpte_sysexit(void) | ||
| 275 | { | ||
| 276 | kmem_cache_destroy(hpte_cache); | ||
| 277 | } | ||
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c index a9f66abafcb3..474f2e24050a 100644 --- a/arch/powerpc/kvm/book3s_paired_singles.c +++ b/arch/powerpc/kvm/book3s_paired_singles.c | |||
| @@ -159,10 +159,7 @@ | |||
| 159 | 159 | ||
| 160 | static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt) | 160 | static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt) |
| 161 | { | 161 | { |
| 162 | struct thread_struct t; | 162 | kvm_cvt_df(&vcpu->arch.fpr[rt], &vcpu->arch.qpr[rt], &vcpu->arch.fpscr); |
| 163 | |||
| 164 | t.fpscr.val = vcpu->arch.fpscr; | ||
| 165 | cvt_df((double*)&vcpu->arch.fpr[rt], (float*)&vcpu->arch.qpr[rt], &t); | ||
| 166 | } | 163 | } |
| 167 | 164 | ||
| 168 | static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store) | 165 | static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store) |
| @@ -183,7 +180,6 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 183 | int rs, ulong addr, int ls_type) | 180 | int rs, ulong addr, int ls_type) |
| 184 | { | 181 | { |
| 185 | int emulated = EMULATE_FAIL; | 182 | int emulated = EMULATE_FAIL; |
| 186 | struct thread_struct t; | ||
| 187 | int r; | 183 | int r; |
| 188 | char tmp[8]; | 184 | char tmp[8]; |
| 189 | int len = sizeof(u32); | 185 | int len = sizeof(u32); |
| @@ -191,8 +187,6 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 191 | if (ls_type == FPU_LS_DOUBLE) | 187 | if (ls_type == FPU_LS_DOUBLE) |
| 192 | len = sizeof(u64); | 188 | len = sizeof(u64); |
| 193 | 189 | ||
| 194 | t.fpscr.val = vcpu->arch.fpscr; | ||
| 195 | |||
| 196 | /* read from memory */ | 190 | /* read from memory */ |
| 197 | r = kvmppc_ld(vcpu, &addr, len, tmp, true); | 191 | r = kvmppc_ld(vcpu, &addr, len, tmp, true); |
| 198 | vcpu->arch.paddr_accessed = addr; | 192 | vcpu->arch.paddr_accessed = addr; |
| @@ -210,7 +204,7 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 210 | /* put in registers */ | 204 | /* put in registers */ |
| 211 | switch (ls_type) { | 205 | switch (ls_type) { |
| 212 | case FPU_LS_SINGLE: | 206 | case FPU_LS_SINGLE: |
| 213 | cvt_fd((float*)tmp, (double*)&vcpu->arch.fpr[rs], &t); | 207 | kvm_cvt_fd((u32*)tmp, &vcpu->arch.fpr[rs], &vcpu->arch.fpscr); |
| 214 | vcpu->arch.qpr[rs] = *((u32*)tmp); | 208 | vcpu->arch.qpr[rs] = *((u32*)tmp); |
| 215 | break; | 209 | break; |
| 216 | case FPU_LS_DOUBLE: | 210 | case FPU_LS_DOUBLE: |
| @@ -229,17 +223,14 @@ static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 229 | int rs, ulong addr, int ls_type) | 223 | int rs, ulong addr, int ls_type) |
| 230 | { | 224 | { |
| 231 | int emulated = EMULATE_FAIL; | 225 | int emulated = EMULATE_FAIL; |
| 232 | struct thread_struct t; | ||
| 233 | int r; | 226 | int r; |
| 234 | char tmp[8]; | 227 | char tmp[8]; |
| 235 | u64 val; | 228 | u64 val; |
| 236 | int len; | 229 | int len; |
| 237 | 230 | ||
| 238 | t.fpscr.val = vcpu->arch.fpscr; | ||
| 239 | |||
| 240 | switch (ls_type) { | 231 | switch (ls_type) { |
| 241 | case FPU_LS_SINGLE: | 232 | case FPU_LS_SINGLE: |
| 242 | cvt_df((double*)&vcpu->arch.fpr[rs], (float*)tmp, &t); | 233 | kvm_cvt_df(&vcpu->arch.fpr[rs], (u32*)tmp, &vcpu->arch.fpscr); |
| 243 | val = *((u32*)tmp); | 234 | val = *((u32*)tmp); |
| 244 | len = sizeof(u32); | 235 | len = sizeof(u32); |
| 245 | break; | 236 | break; |
| @@ -278,13 +269,10 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 278 | int rs, ulong addr, bool w, int i) | 269 | int rs, ulong addr, bool w, int i) |
| 279 | { | 270 | { |
| 280 | int emulated = EMULATE_FAIL; | 271 | int emulated = EMULATE_FAIL; |
| 281 | struct thread_struct t; | ||
| 282 | int r; | 272 | int r; |
| 283 | float one = 1.0; | 273 | float one = 1.0; |
| 284 | u32 tmp[2]; | 274 | u32 tmp[2]; |
| 285 | 275 | ||
| 286 | t.fpscr.val = vcpu->arch.fpscr; | ||
| 287 | |||
| 288 | /* read from memory */ | 276 | /* read from memory */ |
| 289 | if (w) { | 277 | if (w) { |
| 290 | r = kvmppc_ld(vcpu, &addr, sizeof(u32), tmp, true); | 278 | r = kvmppc_ld(vcpu, &addr, sizeof(u32), tmp, true); |
| @@ -308,7 +296,7 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 308 | emulated = EMULATE_DONE; | 296 | emulated = EMULATE_DONE; |
| 309 | 297 | ||
| 310 | /* put in registers */ | 298 | /* put in registers */ |
| 311 | cvt_fd((float*)&tmp[0], (double*)&vcpu->arch.fpr[rs], &t); | 299 | kvm_cvt_fd(&tmp[0], &vcpu->arch.fpr[rs], &vcpu->arch.fpscr); |
| 312 | vcpu->arch.qpr[rs] = tmp[1]; | 300 | vcpu->arch.qpr[rs] = tmp[1]; |
| 313 | 301 | ||
| 314 | dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0], | 302 | dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0], |
| @@ -322,14 +310,11 @@ static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 322 | int rs, ulong addr, bool w, int i) | 310 | int rs, ulong addr, bool w, int i) |
| 323 | { | 311 | { |
| 324 | int emulated = EMULATE_FAIL; | 312 | int emulated = EMULATE_FAIL; |
| 325 | struct thread_struct t; | ||
| 326 | int r; | 313 | int r; |
| 327 | u32 tmp[2]; | 314 | u32 tmp[2]; |
| 328 | int len = w ? sizeof(u32) : sizeof(u64); | 315 | int len = w ? sizeof(u32) : sizeof(u64); |
| 329 | 316 | ||
| 330 | t.fpscr.val = vcpu->arch.fpscr; | 317 | kvm_cvt_df(&vcpu->arch.fpr[rs], &tmp[0], &vcpu->arch.fpscr); |
| 331 | |||
| 332 | cvt_df((double*)&vcpu->arch.fpr[rs], (float*)&tmp[0], &t); | ||
| 333 | tmp[1] = vcpu->arch.qpr[rs]; | 318 | tmp[1] = vcpu->arch.qpr[rs]; |
| 334 | 319 | ||
| 335 | r = kvmppc_st(vcpu, &addr, len, tmp, true); | 320 | r = kvmppc_st(vcpu, &addr, len, tmp, true); |
| @@ -517,7 +502,7 @@ static int get_d_signext(u32 inst) | |||
| 517 | static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, | 502 | static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, |
| 518 | int reg_out, int reg_in1, int reg_in2, | 503 | int reg_out, int reg_in1, int reg_in2, |
| 519 | int reg_in3, int scalar, | 504 | int reg_in3, int scalar, |
| 520 | void (*func)(struct thread_struct *t, | 505 | void (*func)(u64 *fpscr, |
| 521 | u32 *dst, u32 *src1, | 506 | u32 *dst, u32 *src1, |
| 522 | u32 *src2, u32 *src3)) | 507 | u32 *src2, u32 *src3)) |
| 523 | { | 508 | { |
| @@ -526,27 +511,25 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, | |||
| 526 | u32 ps0_out; | 511 | u32 ps0_out; |
| 527 | u32 ps0_in1, ps0_in2, ps0_in3; | 512 | u32 ps0_in1, ps0_in2, ps0_in3; |
| 528 | u32 ps1_in1, ps1_in2, ps1_in3; | 513 | u32 ps1_in1, ps1_in2, ps1_in3; |
| 529 | struct thread_struct t; | ||
| 530 | t.fpscr.val = vcpu->arch.fpscr; | ||
| 531 | 514 | ||
| 532 | /* RC */ | 515 | /* RC */ |
| 533 | WARN_ON(rc); | 516 | WARN_ON(rc); |
| 534 | 517 | ||
| 535 | /* PS0 */ | 518 | /* PS0 */ |
| 536 | cvt_df((double*)&fpr[reg_in1], (float*)&ps0_in1, &t); | 519 | kvm_cvt_df(&fpr[reg_in1], &ps0_in1, &vcpu->arch.fpscr); |
| 537 | cvt_df((double*)&fpr[reg_in2], (float*)&ps0_in2, &t); | 520 | kvm_cvt_df(&fpr[reg_in2], &ps0_in2, &vcpu->arch.fpscr); |
| 538 | cvt_df((double*)&fpr[reg_in3], (float*)&ps0_in3, &t); | 521 | kvm_cvt_df(&fpr[reg_in3], &ps0_in3, &vcpu->arch.fpscr); |
| 539 | 522 | ||
| 540 | if (scalar & SCALAR_LOW) | 523 | if (scalar & SCALAR_LOW) |
| 541 | ps0_in2 = qpr[reg_in2]; | 524 | ps0_in2 = qpr[reg_in2]; |
| 542 | 525 | ||
| 543 | func(&t, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3); | 526 | func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3); |
| 544 | 527 | ||
| 545 | dprintk(KERN_INFO "PS3 ps0 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", | 528 | dprintk(KERN_INFO "PS3 ps0 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", |
| 546 | ps0_in1, ps0_in2, ps0_in3, ps0_out); | 529 | ps0_in1, ps0_in2, ps0_in3, ps0_out); |
| 547 | 530 | ||
| 548 | if (!(scalar & SCALAR_NO_PS0)) | 531 | if (!(scalar & SCALAR_NO_PS0)) |
| 549 | cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t); | 532 | kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr); |
| 550 | 533 | ||
| 551 | /* PS1 */ | 534 | /* PS1 */ |
| 552 | ps1_in1 = qpr[reg_in1]; | 535 | ps1_in1 = qpr[reg_in1]; |
| @@ -557,7 +540,7 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, | |||
| 557 | ps1_in2 = ps0_in2; | 540 | ps1_in2 = ps0_in2; |
| 558 | 541 | ||
| 559 | if (!(scalar & SCALAR_NO_PS1)) | 542 | if (!(scalar & SCALAR_NO_PS1)) |
| 560 | func(&t, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3); | 543 | func(&vcpu->arch.fpscr, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3); |
| 561 | 544 | ||
| 562 | dprintk(KERN_INFO "PS3 ps1 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", | 545 | dprintk(KERN_INFO "PS3 ps1 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", |
| 563 | ps1_in1, ps1_in2, ps1_in3, qpr[reg_out]); | 546 | ps1_in1, ps1_in2, ps1_in3, qpr[reg_out]); |
| @@ -568,7 +551,7 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, | |||
| 568 | static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, | 551 | static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, |
| 569 | int reg_out, int reg_in1, int reg_in2, | 552 | int reg_out, int reg_in1, int reg_in2, |
| 570 | int scalar, | 553 | int scalar, |
| 571 | void (*func)(struct thread_struct *t, | 554 | void (*func)(u64 *fpscr, |
| 572 | u32 *dst, u32 *src1, | 555 | u32 *dst, u32 *src1, |
| 573 | u32 *src2)) | 556 | u32 *src2)) |
| 574 | { | 557 | { |
| @@ -578,27 +561,25 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, | |||
| 578 | u32 ps0_in1, ps0_in2; | 561 | u32 ps0_in1, ps0_in2; |
| 579 | u32 ps1_out; | 562 | u32 ps1_out; |
| 580 | u32 ps1_in1, ps1_in2; | 563 | u32 ps1_in1, ps1_in2; |
| 581 | struct thread_struct t; | ||
| 582 | t.fpscr.val = vcpu->arch.fpscr; | ||
| 583 | 564 | ||
| 584 | /* RC */ | 565 | /* RC */ |
| 585 | WARN_ON(rc); | 566 | WARN_ON(rc); |
| 586 | 567 | ||
| 587 | /* PS0 */ | 568 | /* PS0 */ |
| 588 | cvt_df((double*)&fpr[reg_in1], (float*)&ps0_in1, &t); | 569 | kvm_cvt_df(&fpr[reg_in1], &ps0_in1, &vcpu->arch.fpscr); |
| 589 | 570 | ||
| 590 | if (scalar & SCALAR_LOW) | 571 | if (scalar & SCALAR_LOW) |
| 591 | ps0_in2 = qpr[reg_in2]; | 572 | ps0_in2 = qpr[reg_in2]; |
| 592 | else | 573 | else |
| 593 | cvt_df((double*)&fpr[reg_in2], (float*)&ps0_in2, &t); | 574 | kvm_cvt_df(&fpr[reg_in2], &ps0_in2, &vcpu->arch.fpscr); |
| 594 | 575 | ||
| 595 | func(&t, &ps0_out, &ps0_in1, &ps0_in2); | 576 | func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2); |
| 596 | 577 | ||
| 597 | if (!(scalar & SCALAR_NO_PS0)) { | 578 | if (!(scalar & SCALAR_NO_PS0)) { |
| 598 | dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n", | 579 | dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n", |
| 599 | ps0_in1, ps0_in2, ps0_out); | 580 | ps0_in1, ps0_in2, ps0_out); |
| 600 | 581 | ||
| 601 | cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t); | 582 | kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr); |
| 602 | } | 583 | } |
| 603 | 584 | ||
| 604 | /* PS1 */ | 585 | /* PS1 */ |
| @@ -608,7 +589,7 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, | |||
| 608 | if (scalar & SCALAR_HIGH) | 589 | if (scalar & SCALAR_HIGH) |
| 609 | ps1_in2 = ps0_in2; | 590 | ps1_in2 = ps0_in2; |
| 610 | 591 | ||
| 611 | func(&t, &ps1_out, &ps1_in1, &ps1_in2); | 592 | func(&vcpu->arch.fpscr, &ps1_out, &ps1_in1, &ps1_in2); |
| 612 | 593 | ||
| 613 | if (!(scalar & SCALAR_NO_PS1)) { | 594 | if (!(scalar & SCALAR_NO_PS1)) { |
| 614 | qpr[reg_out] = ps1_out; | 595 | qpr[reg_out] = ps1_out; |
| @@ -622,31 +603,29 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, | |||
| 622 | 603 | ||
| 623 | static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc, | 604 | static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc, |
| 624 | int reg_out, int reg_in, | 605 | int reg_out, int reg_in, |
| 625 | void (*func)(struct thread_struct *t, | 606 | void (*func)(u64 *t, |
| 626 | u32 *dst, u32 *src1)) | 607 | u32 *dst, u32 *src1)) |
| 627 | { | 608 | { |
| 628 | u32 *qpr = vcpu->arch.qpr; | 609 | u32 *qpr = vcpu->arch.qpr; |
| 629 | u64 *fpr = vcpu->arch.fpr; | 610 | u64 *fpr = vcpu->arch.fpr; |
| 630 | u32 ps0_out, ps0_in; | 611 | u32 ps0_out, ps0_in; |
| 631 | u32 ps1_in; | 612 | u32 ps1_in; |
| 632 | struct thread_struct t; | ||
| 633 | t.fpscr.val = vcpu->arch.fpscr; | ||
| 634 | 613 | ||
| 635 | /* RC */ | 614 | /* RC */ |
| 636 | WARN_ON(rc); | 615 | WARN_ON(rc); |
| 637 | 616 | ||
| 638 | /* PS0 */ | 617 | /* PS0 */ |
| 639 | cvt_df((double*)&fpr[reg_in], (float*)&ps0_in, &t); | 618 | kvm_cvt_df(&fpr[reg_in], &ps0_in, &vcpu->arch.fpscr); |
| 640 | func(&t, &ps0_out, &ps0_in); | 619 | func(&vcpu->arch.fpscr, &ps0_out, &ps0_in); |
| 641 | 620 | ||
| 642 | dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n", | 621 | dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n", |
| 643 | ps0_in, ps0_out); | 622 | ps0_in, ps0_out); |
| 644 | 623 | ||
| 645 | cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t); | 624 | kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr); |
| 646 | 625 | ||
| 647 | /* PS1 */ | 626 | /* PS1 */ |
| 648 | ps1_in = qpr[reg_in]; | 627 | ps1_in = qpr[reg_in]; |
| 649 | func(&t, &qpr[reg_out], &ps1_in); | 628 | func(&vcpu->arch.fpscr, &qpr[reg_out], &ps1_in); |
| 650 | 629 | ||
| 651 | dprintk(KERN_INFO "PS1 ps1 -> f(0x%x) = 0x%x\n", | 630 | dprintk(KERN_INFO "PS1 ps1 -> f(0x%x) = 0x%x\n", |
| 652 | ps1_in, qpr[reg_out]); | 631 | ps1_in, qpr[reg_out]); |
| @@ -672,13 +651,10 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
| 672 | 651 | ||
| 673 | bool rcomp = (inst & 1) ? true : false; | 652 | bool rcomp = (inst & 1) ? true : false; |
| 674 | u32 cr = kvmppc_get_cr(vcpu); | 653 | u32 cr = kvmppc_get_cr(vcpu); |
| 675 | struct thread_struct t; | ||
| 676 | #ifdef DEBUG | 654 | #ifdef DEBUG |
| 677 | int i; | 655 | int i; |
| 678 | #endif | 656 | #endif |
| 679 | 657 | ||
| 680 | t.fpscr.val = vcpu->arch.fpscr; | ||
| 681 | |||
| 682 | if (!kvmppc_inst_is_paired_single(vcpu, inst)) | 658 | if (!kvmppc_inst_is_paired_single(vcpu, inst)) |
| 683 | return EMULATE_FAIL; | 659 | return EMULATE_FAIL; |
| 684 | 660 | ||
| @@ -695,7 +671,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
| 695 | #ifdef DEBUG | 671 | #ifdef DEBUG |
| 696 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { | 672 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { |
| 697 | u32 f; | 673 | u32 f; |
| 698 | cvt_df((double*)&vcpu->arch.fpr[i], (float*)&f, &t); | 674 | kvm_cvt_df(&vcpu->arch.fpr[i], &f, &vcpu->arch.fpscr); |
| 699 | dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx QPR[%d] = 0x%x\n", | 675 | dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx QPR[%d] = 0x%x\n", |
| 700 | i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]); | 676 | i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]); |
| 701 | } | 677 | } |
| @@ -819,8 +795,9 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
| 819 | WARN_ON(rcomp); | 795 | WARN_ON(rcomp); |
| 820 | vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; | 796 | vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; |
| 821 | /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ | 797 | /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ |
| 822 | cvt_df((double*)&vcpu->arch.fpr[ax_rb], | 798 | kvm_cvt_df(&vcpu->arch.fpr[ax_rb], |
| 823 | (float*)&vcpu->arch.qpr[ax_rd], &t); | 799 | &vcpu->arch.qpr[ax_rd], |
| 800 | &vcpu->arch.fpscr); | ||
| 824 | break; | 801 | break; |
| 825 | case OP_4X_PS_MERGE01: | 802 | case OP_4X_PS_MERGE01: |
| 826 | WARN_ON(rcomp); | 803 | WARN_ON(rcomp); |
| @@ -830,17 +807,20 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
| 830 | case OP_4X_PS_MERGE10: | 807 | case OP_4X_PS_MERGE10: |
| 831 | WARN_ON(rcomp); | 808 | WARN_ON(rcomp); |
| 832 | /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ | 809 | /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ |
| 833 | cvt_fd((float*)&vcpu->arch.qpr[ax_ra], | 810 | kvm_cvt_fd(&vcpu->arch.qpr[ax_ra], |
| 834 | (double*)&vcpu->arch.fpr[ax_rd], &t); | 811 | &vcpu->arch.fpr[ax_rd], |
| 812 | &vcpu->arch.fpscr); | ||
| 835 | /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ | 813 | /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ |
| 836 | cvt_df((double*)&vcpu->arch.fpr[ax_rb], | 814 | kvm_cvt_df(&vcpu->arch.fpr[ax_rb], |
| 837 | (float*)&vcpu->arch.qpr[ax_rd], &t); | 815 | &vcpu->arch.qpr[ax_rd], |
| 816 | &vcpu->arch.fpscr); | ||
| 838 | break; | 817 | break; |
| 839 | case OP_4X_PS_MERGE11: | 818 | case OP_4X_PS_MERGE11: |
| 840 | WARN_ON(rcomp); | 819 | WARN_ON(rcomp); |
| 841 | /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ | 820 | /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ |
| 842 | cvt_fd((float*)&vcpu->arch.qpr[ax_ra], | 821 | kvm_cvt_fd(&vcpu->arch.qpr[ax_ra], |
| 843 | (double*)&vcpu->arch.fpr[ax_rd], &t); | 822 | &vcpu->arch.fpr[ax_rd], |
| 823 | &vcpu->arch.fpscr); | ||
| 844 | vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; | 824 | vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; |
| 845 | break; | 825 | break; |
| 846 | } | 826 | } |
| @@ -1275,7 +1255,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
| 1275 | #ifdef DEBUG | 1255 | #ifdef DEBUG |
| 1276 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { | 1256 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { |
| 1277 | u32 f; | 1257 | u32 f; |
| 1278 | cvt_df((double*)&vcpu->arch.fpr[i], (float*)&f, &t); | 1258 | kvm_cvt_df(&vcpu->arch.fpr[i], &f, &vcpu->arch.fpscr); |
| 1279 | dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f); | 1259 | dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f); |
| 1280 | } | 1260 | } |
| 1281 | #endif | 1261 | #endif |
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index a33ab8cc2ccc..8d4e35f5372c 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c | |||
| @@ -144,7 +144,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, | |||
| 144 | unsigned int priority) | 144 | unsigned int priority) |
| 145 | { | 145 | { |
| 146 | int allowed = 0; | 146 | int allowed = 0; |
| 147 | ulong msr_mask; | 147 | ulong uninitialized_var(msr_mask); |
| 148 | bool update_esr = false, update_dear = false; | 148 | bool update_esr = false, update_dear = false; |
| 149 | 149 | ||
| 150 | switch (priority) { | 150 | switch (priority) { |
| @@ -485,8 +485,6 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 485 | { | 485 | { |
| 486 | int i; | 486 | int i; |
| 487 | 487 | ||
| 488 | vcpu_load(vcpu); | ||
| 489 | |||
| 490 | regs->pc = vcpu->arch.pc; | 488 | regs->pc = vcpu->arch.pc; |
| 491 | regs->cr = kvmppc_get_cr(vcpu); | 489 | regs->cr = kvmppc_get_cr(vcpu); |
| 492 | regs->ctr = vcpu->arch.ctr; | 490 | regs->ctr = vcpu->arch.ctr; |
| @@ -507,8 +505,6 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 507 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 505 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
| 508 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); | 506 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); |
| 509 | 507 | ||
| 510 | vcpu_put(vcpu); | ||
| 511 | |||
| 512 | return 0; | 508 | return 0; |
| 513 | } | 509 | } |
| 514 | 510 | ||
| @@ -516,8 +512,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 516 | { | 512 | { |
| 517 | int i; | 513 | int i; |
| 518 | 514 | ||
| 519 | vcpu_load(vcpu); | ||
| 520 | |||
| 521 | vcpu->arch.pc = regs->pc; | 515 | vcpu->arch.pc = regs->pc; |
| 522 | kvmppc_set_cr(vcpu, regs->cr); | 516 | kvmppc_set_cr(vcpu, regs->cr); |
| 523 | vcpu->arch.ctr = regs->ctr; | 517 | vcpu->arch.ctr = regs->ctr; |
| @@ -537,8 +531,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 537 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 531 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
| 538 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); | 532 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); |
| 539 | 533 | ||
| 540 | vcpu_put(vcpu); | ||
| 541 | |||
| 542 | return 0; | 534 | return 0; |
| 543 | } | 535 | } |
| 544 | 536 | ||
| @@ -569,9 +561,7 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | |||
| 569 | { | 561 | { |
| 570 | int r; | 562 | int r; |
| 571 | 563 | ||
| 572 | vcpu_load(vcpu); | ||
| 573 | r = kvmppc_core_vcpu_translate(vcpu, tr); | 564 | r = kvmppc_core_vcpu_translate(vcpu, tr); |
| 574 | vcpu_put(vcpu); | ||
| 575 | return r; | 565 | return r; |
| 576 | } | 566 | } |
| 577 | 567 | ||
diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S index 2b340a3eee90..cb34bbe16113 100644 --- a/arch/powerpc/kvm/fpu.S +++ b/arch/powerpc/kvm/fpu.S | |||
| @@ -271,3 +271,21 @@ FPD_THREE_IN(fmsub) | |||
| 271 | FPD_THREE_IN(fmadd) | 271 | FPD_THREE_IN(fmadd) |
| 272 | FPD_THREE_IN(fnmsub) | 272 | FPD_THREE_IN(fnmsub) |
| 273 | FPD_THREE_IN(fnmadd) | 273 | FPD_THREE_IN(fnmadd) |
| 274 | |||
| 275 | _GLOBAL(kvm_cvt_fd) | ||
| 276 | lfd 0,0(r5) /* load up fpscr value */ | ||
| 277 | MTFSF_L(0) | ||
| 278 | lfs 0,0(r3) | ||
| 279 | stfd 0,0(r4) | ||
| 280 | mffs 0 | ||
| 281 | stfd 0,0(r5) /* save new fpscr value */ | ||
| 282 | blr | ||
| 283 | |||
| 284 | _GLOBAL(kvm_cvt_df) | ||
| 285 | lfd 0,0(r5) /* load up fpscr value */ | ||
| 286 | MTFSF_L(0) | ||
| 287 | lfd 0,0(r3) | ||
| 288 | stfs 0,0(r4) | ||
| 289 | mffs 0 | ||
| 290 | stfd 0,0(r5) /* save new fpscr value */ | ||
| 291 | blr | ||
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 9b8683f39e05..72a4ad86ee91 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
| @@ -36,11 +36,6 @@ | |||
| 36 | #define CREATE_TRACE_POINTS | 36 | #define CREATE_TRACE_POINTS |
| 37 | #include "trace.h" | 37 | #include "trace.h" |
| 38 | 38 | ||
| 39 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | ||
| 40 | { | ||
| 41 | return gfn; | ||
| 42 | } | ||
| 43 | |||
| 44 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) | 39 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) |
| 45 | { | 40 | { |
| 46 | return !(v->arch.msr & MSR_WE) || !!(v->arch.pending_exceptions); | 41 | return !(v->arch.msr & MSR_WE) || !!(v->arch.pending_exceptions); |
| @@ -287,7 +282,7 @@ static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, | |||
| 287 | static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, | 282 | static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, |
| 288 | struct kvm_run *run) | 283 | struct kvm_run *run) |
| 289 | { | 284 | { |
| 290 | u64 gpr; | 285 | u64 uninitialized_var(gpr); |
| 291 | 286 | ||
| 292 | if (run->mmio.len > sizeof(gpr)) { | 287 | if (run->mmio.len > sizeof(gpr)) { |
| 293 | printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); | 288 | printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); |
| @@ -423,8 +418,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
| 423 | int r; | 418 | int r; |
| 424 | sigset_t sigsaved; | 419 | sigset_t sigsaved; |
| 425 | 420 | ||
| 426 | vcpu_load(vcpu); | ||
| 427 | |||
| 428 | if (vcpu->sigset_active) | 421 | if (vcpu->sigset_active) |
| 429 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | 422 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); |
| 430 | 423 | ||
| @@ -456,8 +449,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
| 456 | if (vcpu->sigset_active) | 449 | if (vcpu->sigset_active) |
| 457 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 450 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
| 458 | 451 | ||
| 459 | vcpu_put(vcpu); | ||
| 460 | |||
| 461 | return r; | 452 | return r; |
| 462 | } | 453 | } |
| 463 | 454 | ||
| @@ -523,8 +514,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
| 523 | if (copy_from_user(&irq, argp, sizeof(irq))) | 514 | if (copy_from_user(&irq, argp, sizeof(irq))) |
| 524 | goto out; | 515 | goto out; |
| 525 | r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); | 516 | r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); |
| 526 | break; | 517 | goto out; |
| 527 | } | 518 | } |
| 519 | |||
| 528 | case KVM_ENABLE_CAP: | 520 | case KVM_ENABLE_CAP: |
| 529 | { | 521 | { |
| 530 | struct kvm_enable_cap cap; | 522 | struct kvm_enable_cap cap; |
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 27605b62b980..cef7dbf69dfc 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h | |||
| @@ -26,7 +26,7 @@ | |||
| 26 | 26 | ||
| 27 | struct sca_entry { | 27 | struct sca_entry { |
| 28 | atomic_t scn; | 28 | atomic_t scn; |
| 29 | __u64 reserved; | 29 | __u32 reserved; |
| 30 | __u64 sda; | 30 | __u64 sda; |
| 31 | __u64 reserved2[2]; | 31 | __u64 reserved2[2]; |
| 32 | } __attribute__((packed)); | 32 | } __attribute__((packed)); |
| @@ -41,7 +41,8 @@ struct sca_block { | |||
| 41 | } __attribute__((packed)); | 41 | } __attribute__((packed)); |
| 42 | 42 | ||
| 43 | #define KVM_NR_PAGE_SIZES 2 | 43 | #define KVM_NR_PAGE_SIZES 2 |
| 44 | #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + ((x) - 1) * 8) | 44 | #define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 8) |
| 45 | #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x)) | ||
| 45 | #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) | 46 | #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) |
| 46 | #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) | 47 | #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) |
| 47 | #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) | 48 | #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) |
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 3ddc30895e31..f7b6df45d8be 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c | |||
| @@ -135,7 +135,7 @@ static int handle_stop(struct kvm_vcpu *vcpu) | |||
| 135 | spin_lock_bh(&vcpu->arch.local_int.lock); | 135 | spin_lock_bh(&vcpu->arch.local_int.lock); |
| 136 | if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) { | 136 | if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) { |
| 137 | vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP; | 137 | vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP; |
| 138 | rc = __kvm_s390_vcpu_store_status(vcpu, | 138 | rc = kvm_s390_vcpu_store_status(vcpu, |
| 139 | KVM_S390_STORE_STATUS_NOADDR); | 139 | KVM_S390_STORE_STATUS_NOADDR); |
| 140 | if (rc >= 0) | 140 | if (rc >= 0) |
| 141 | rc = -EOPNOTSUPP; | 141 | rc = -EOPNOTSUPP; |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ae3705816878..4fe68650535c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
| @@ -207,6 +207,7 @@ out_nokvm: | |||
| 207 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | 207 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) |
| 208 | { | 208 | { |
| 209 | VCPU_EVENT(vcpu, 3, "%s", "free cpu"); | 209 | VCPU_EVENT(vcpu, 3, "%s", "free cpu"); |
| 210 | clear_bit(63 - vcpu->vcpu_id, (unsigned long *) &vcpu->kvm->arch.sca->mcn); | ||
| 210 | if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == | 211 | if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == |
| 211 | (__u64) vcpu->arch.sie_block) | 212 | (__u64) vcpu->arch.sie_block) |
| 212 | vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; | 213 | vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; |
| @@ -296,7 +297,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
| 296 | { | 297 | { |
| 297 | atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH); | 298 | atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH); |
| 298 | set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests); | 299 | set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests); |
| 299 | vcpu->arch.sie_block->ecb = 2; | 300 | vcpu->arch.sie_block->ecb = 6; |
| 300 | vcpu->arch.sie_block->eca = 0xC1002001U; | 301 | vcpu->arch.sie_block->eca = 0xC1002001U; |
| 301 | vcpu->arch.sie_block->fac = (int) (long) facilities; | 302 | vcpu->arch.sie_block->fac = (int) (long) facilities; |
| 302 | hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); | 303 | hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); |
| @@ -329,6 +330,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, | |||
| 329 | kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; | 330 | kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; |
| 330 | vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32); | 331 | vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32); |
| 331 | vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; | 332 | vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; |
| 333 | set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); | ||
| 332 | 334 | ||
| 333 | spin_lock_init(&vcpu->arch.local_int.lock); | 335 | spin_lock_init(&vcpu->arch.local_int.lock); |
| 334 | INIT_LIST_HEAD(&vcpu->arch.local_int.list); | 336 | INIT_LIST_HEAD(&vcpu->arch.local_int.list); |
| @@ -363,63 +365,49 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | |||
| 363 | 365 | ||
| 364 | static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) | 366 | static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) |
| 365 | { | 367 | { |
| 366 | vcpu_load(vcpu); | ||
| 367 | kvm_s390_vcpu_initial_reset(vcpu); | 368 | kvm_s390_vcpu_initial_reset(vcpu); |
| 368 | vcpu_put(vcpu); | ||
| 369 | return 0; | 369 | return 0; |
| 370 | } | 370 | } |
| 371 | 371 | ||
| 372 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | 372 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) |
| 373 | { | 373 | { |
| 374 | vcpu_load(vcpu); | ||
| 375 | memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs)); | 374 | memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs)); |
| 376 | vcpu_put(vcpu); | ||
| 377 | return 0; | 375 | return 0; |
| 378 | } | 376 | } |
| 379 | 377 | ||
| 380 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | 378 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) |
| 381 | { | 379 | { |
| 382 | vcpu_load(vcpu); | ||
| 383 | memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs)); | 380 | memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs)); |
| 384 | vcpu_put(vcpu); | ||
| 385 | return 0; | 381 | return 0; |
| 386 | } | 382 | } |
| 387 | 383 | ||
| 388 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | 384 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, |
| 389 | struct kvm_sregs *sregs) | 385 | struct kvm_sregs *sregs) |
| 390 | { | 386 | { |
| 391 | vcpu_load(vcpu); | ||
| 392 | memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs)); | 387 | memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs)); |
| 393 | memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); | 388 | memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); |
| 394 | vcpu_put(vcpu); | ||
| 395 | return 0; | 389 | return 0; |
| 396 | } | 390 | } |
| 397 | 391 | ||
| 398 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | 392 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, |
| 399 | struct kvm_sregs *sregs) | 393 | struct kvm_sregs *sregs) |
| 400 | { | 394 | { |
| 401 | vcpu_load(vcpu); | ||
| 402 | memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs)); | 395 | memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs)); |
| 403 | memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); | 396 | memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); |
| 404 | vcpu_put(vcpu); | ||
| 405 | return 0; | 397 | return 0; |
| 406 | } | 398 | } |
| 407 | 399 | ||
| 408 | int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | 400 | int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) |
| 409 | { | 401 | { |
| 410 | vcpu_load(vcpu); | ||
| 411 | memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); | 402 | memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); |
| 412 | vcpu->arch.guest_fpregs.fpc = fpu->fpc; | 403 | vcpu->arch.guest_fpregs.fpc = fpu->fpc; |
| 413 | vcpu_put(vcpu); | ||
| 414 | return 0; | 404 | return 0; |
| 415 | } | 405 | } |
| 416 | 406 | ||
| 417 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | 407 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) |
| 418 | { | 408 | { |
| 419 | vcpu_load(vcpu); | ||
| 420 | memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs)); | 409 | memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs)); |
| 421 | fpu->fpc = vcpu->arch.guest_fpregs.fpc; | 410 | fpu->fpc = vcpu->arch.guest_fpregs.fpc; |
| 422 | vcpu_put(vcpu); | ||
| 423 | return 0; | 411 | return 0; |
| 424 | } | 412 | } |
| 425 | 413 | ||
| @@ -427,14 +415,12 @@ static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) | |||
| 427 | { | 415 | { |
| 428 | int rc = 0; | 416 | int rc = 0; |
| 429 | 417 | ||
| 430 | vcpu_load(vcpu); | ||
| 431 | if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING) | 418 | if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING) |
| 432 | rc = -EBUSY; | 419 | rc = -EBUSY; |
| 433 | else { | 420 | else { |
| 434 | vcpu->run->psw_mask = psw.mask; | 421 | vcpu->run->psw_mask = psw.mask; |
| 435 | vcpu->run->psw_addr = psw.addr; | 422 | vcpu->run->psw_addr = psw.addr; |
| 436 | } | 423 | } |
| 437 | vcpu_put(vcpu); | ||
| 438 | return rc; | 424 | return rc; |
| 439 | } | 425 | } |
| 440 | 426 | ||
| @@ -498,8 +484,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 498 | int rc; | 484 | int rc; |
| 499 | sigset_t sigsaved; | 485 | sigset_t sigsaved; |
| 500 | 486 | ||
| 501 | vcpu_load(vcpu); | ||
| 502 | |||
| 503 | rerun_vcpu: | 487 | rerun_vcpu: |
| 504 | if (vcpu->requests) | 488 | if (vcpu->requests) |
| 505 | if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) | 489 | if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) |
| @@ -568,8 +552,6 @@ rerun_vcpu: | |||
| 568 | if (vcpu->sigset_active) | 552 | if (vcpu->sigset_active) |
| 569 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 553 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
| 570 | 554 | ||
| 571 | vcpu_put(vcpu); | ||
| 572 | |||
| 573 | vcpu->stat.exit_userspace++; | 555 | vcpu->stat.exit_userspace++; |
| 574 | return rc; | 556 | return rc; |
| 575 | } | 557 | } |
| @@ -589,7 +571,7 @@ static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from, | |||
| 589 | * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit | 571 | * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit |
| 590 | * KVM_S390_STORE_STATUS_PREFIXED: -> prefix | 572 | * KVM_S390_STORE_STATUS_PREFIXED: -> prefix |
| 591 | */ | 573 | */ |
| 592 | int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) | 574 | int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) |
| 593 | { | 575 | { |
| 594 | const unsigned char archmode = 1; | 576 | const unsigned char archmode = 1; |
| 595 | int prefix; | 577 | int prefix; |
| @@ -651,45 +633,42 @@ int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) | |||
| 651 | return 0; | 633 | return 0; |
| 652 | } | 634 | } |
| 653 | 635 | ||
| 654 | static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) | ||
| 655 | { | ||
| 656 | int rc; | ||
| 657 | |||
| 658 | vcpu_load(vcpu); | ||
| 659 | rc = __kvm_s390_vcpu_store_status(vcpu, addr); | ||
| 660 | vcpu_put(vcpu); | ||
| 661 | return rc; | ||
| 662 | } | ||
| 663 | |||
| 664 | long kvm_arch_vcpu_ioctl(struct file *filp, | 636 | long kvm_arch_vcpu_ioctl(struct file *filp, |
| 665 | unsigned int ioctl, unsigned long arg) | 637 | unsigned int ioctl, unsigned long arg) |
| 666 | { | 638 | { |
| 667 | struct kvm_vcpu *vcpu = filp->private_data; | 639 | struct kvm_vcpu *vcpu = filp->private_data; |
| 668 | void __user *argp = (void __user *)arg; | 640 | void __user *argp = (void __user *)arg; |
| 641 | long r; | ||
| 669 | 642 | ||
| 670 | switch (ioctl) { | 643 | switch (ioctl) { |
| 671 | case KVM_S390_INTERRUPT: { | 644 | case KVM_S390_INTERRUPT: { |
| 672 | struct kvm_s390_interrupt s390int; | 645 | struct kvm_s390_interrupt s390int; |
| 673 | 646 | ||
| 647 | r = -EFAULT; | ||
| 674 | if (copy_from_user(&s390int, argp, sizeof(s390int))) | 648 | if (copy_from_user(&s390int, argp, sizeof(s390int))) |
| 675 | return -EFAULT; | 649 | break; |
| 676 | return kvm_s390_inject_vcpu(vcpu, &s390int); | 650 | r = kvm_s390_inject_vcpu(vcpu, &s390int); |
| 651 | break; | ||
| 677 | } | 652 | } |
| 678 | case KVM_S390_STORE_STATUS: | 653 | case KVM_S390_STORE_STATUS: |
| 679 | return kvm_s390_vcpu_store_status(vcpu, arg); | 654 | r = kvm_s390_vcpu_store_status(vcpu, arg); |
| 655 | break; | ||
| 680 | case KVM_S390_SET_INITIAL_PSW: { | 656 | case KVM_S390_SET_INITIAL_PSW: { |
| 681 | psw_t psw; | 657 | psw_t psw; |
| 682 | 658 | ||
| 659 | r = -EFAULT; | ||
| 683 | if (copy_from_user(&psw, argp, sizeof(psw))) | 660 | if (copy_from_user(&psw, argp, sizeof(psw))) |
| 684 | return -EFAULT; | 661 | break; |
| 685 | return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); | 662 | r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); |
| 663 | break; | ||
| 686 | } | 664 | } |
| 687 | case KVM_S390_INITIAL_RESET: | 665 | case KVM_S390_INITIAL_RESET: |
| 688 | return kvm_arch_vcpu_ioctl_initial_reset(vcpu); | 666 | r = kvm_arch_vcpu_ioctl_initial_reset(vcpu); |
| 667 | break; | ||
| 689 | default: | 668 | default: |
| 690 | ; | 669 | r = -EINVAL; |
| 691 | } | 670 | } |
| 692 | return -EINVAL; | 671 | return r; |
| 693 | } | 672 | } |
| 694 | 673 | ||
| 695 | /* Section: memory related */ | 674 | /* Section: memory related */ |
| @@ -744,11 +723,6 @@ void kvm_arch_flush_shadow(struct kvm *kvm) | |||
| 744 | { | 723 | { |
| 745 | } | 724 | } |
| 746 | 725 | ||
| 747 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | ||
| 748 | { | ||
| 749 | return gfn; | ||
| 750 | } | ||
| 751 | |||
| 752 | static int __init kvm_s390_init(void) | 726 | static int __init kvm_s390_init(void) |
| 753 | { | 727 | { |
| 754 | int ret; | 728 | int ret; |
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index cfa9d1777457..a7b7586626db 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
| @@ -92,7 +92,7 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); | |||
| 92 | int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); | 92 | int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); |
| 93 | 93 | ||
| 94 | /* implemented in kvm-s390.c */ | 94 | /* implemented in kvm-s390.c */ |
| 95 | int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, | 95 | int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, |
| 96 | unsigned long addr); | 96 | unsigned long addr); |
| 97 | /* implemented in diag.c */ | 97 | /* implemented in diag.c */ |
| 98 | int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); | 98 | int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index c991b3a7b904..815c5b2b9f57 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
| @@ -482,6 +482,8 @@ static inline void fpu_copy(struct fpu *dst, struct fpu *src) | |||
| 482 | memcpy(dst->state, src->state, xstate_size); | 482 | memcpy(dst->state, src->state, xstate_size); |
| 483 | } | 483 | } |
| 484 | 484 | ||
| 485 | extern void fpu_finit(struct fpu *fpu); | ||
| 486 | |||
| 485 | #endif /* __ASSEMBLY__ */ | 487 | #endif /* __ASSEMBLY__ */ |
| 486 | 488 | ||
| 487 | #define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 | 489 | #define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 |
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index ff90055c7f0b..4d8dcbdfc120 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h | |||
| @@ -22,6 +22,8 @@ | |||
| 22 | #define __KVM_HAVE_XEN_HVM | 22 | #define __KVM_HAVE_XEN_HVM |
| 23 | #define __KVM_HAVE_VCPU_EVENTS | 23 | #define __KVM_HAVE_VCPU_EVENTS |
| 24 | #define __KVM_HAVE_DEBUGREGS | 24 | #define __KVM_HAVE_DEBUGREGS |
| 25 | #define __KVM_HAVE_XSAVE | ||
| 26 | #define __KVM_HAVE_XCRS | ||
| 25 | 27 | ||
| 26 | /* Architectural interrupt line count. */ | 28 | /* Architectural interrupt line count. */ |
| 27 | #define KVM_NR_INTERRUPTS 256 | 29 | #define KVM_NR_INTERRUPTS 256 |
| @@ -299,4 +301,24 @@ struct kvm_debugregs { | |||
| 299 | __u64 reserved[9]; | 301 | __u64 reserved[9]; |
| 300 | }; | 302 | }; |
| 301 | 303 | ||
| 304 | /* for KVM_CAP_XSAVE */ | ||
| 305 | struct kvm_xsave { | ||
| 306 | __u32 region[1024]; | ||
| 307 | }; | ||
| 308 | |||
| 309 | #define KVM_MAX_XCRS 16 | ||
| 310 | |||
| 311 | struct kvm_xcr { | ||
| 312 | __u32 xcr; | ||
| 313 | __u32 reserved; | ||
| 314 | __u64 value; | ||
| 315 | }; | ||
| 316 | |||
| 317 | struct kvm_xcrs { | ||
| 318 | __u32 nr_xcrs; | ||
| 319 | __u32 flags; | ||
| 320 | struct kvm_xcr xcrs[KVM_MAX_XCRS]; | ||
| 321 | __u64 padding[16]; | ||
| 322 | }; | ||
| 323 | |||
| 302 | #endif /* _ASM_X86_KVM_H */ | 324 | #endif /* _ASM_X86_KVM_H */ |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 0b2729bf2070..51cfd730ac5d 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
| @@ -51,8 +51,10 @@ struct x86_emulate_ctxt; | |||
| 51 | #define X86EMUL_UNHANDLEABLE 1 | 51 | #define X86EMUL_UNHANDLEABLE 1 |
| 52 | /* Terminate emulation but return success to the caller. */ | 52 | /* Terminate emulation but return success to the caller. */ |
| 53 | #define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */ | 53 | #define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */ |
| 54 | #define X86EMUL_RETRY_INSTR 2 /* retry the instruction for some reason */ | 54 | #define X86EMUL_RETRY_INSTR 3 /* retry the instruction for some reason */ |
| 55 | #define X86EMUL_CMPXCHG_FAILED 2 /* cmpxchg did not see expected value */ | 55 | #define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */ |
| 56 | #define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */ | ||
| 57 | |||
| 56 | struct x86_emulate_ops { | 58 | struct x86_emulate_ops { |
| 57 | /* | 59 | /* |
| 58 | * read_std: Read bytes of standard (non-emulated/special) memory. | 60 | * read_std: Read bytes of standard (non-emulated/special) memory. |
| @@ -92,6 +94,7 @@ struct x86_emulate_ops { | |||
| 92 | int (*read_emulated)(unsigned long addr, | 94 | int (*read_emulated)(unsigned long addr, |
| 93 | void *val, | 95 | void *val, |
| 94 | unsigned int bytes, | 96 | unsigned int bytes, |
| 97 | unsigned int *error, | ||
| 95 | struct kvm_vcpu *vcpu); | 98 | struct kvm_vcpu *vcpu); |
| 96 | 99 | ||
| 97 | /* | 100 | /* |
| @@ -104,6 +107,7 @@ struct x86_emulate_ops { | |||
| 104 | int (*write_emulated)(unsigned long addr, | 107 | int (*write_emulated)(unsigned long addr, |
| 105 | const void *val, | 108 | const void *val, |
| 106 | unsigned int bytes, | 109 | unsigned int bytes, |
| 110 | unsigned int *error, | ||
| 107 | struct kvm_vcpu *vcpu); | 111 | struct kvm_vcpu *vcpu); |
| 108 | 112 | ||
| 109 | /* | 113 | /* |
| @@ -118,6 +122,7 @@ struct x86_emulate_ops { | |||
| 118 | const void *old, | 122 | const void *old, |
| 119 | const void *new, | 123 | const void *new, |
| 120 | unsigned int bytes, | 124 | unsigned int bytes, |
| 125 | unsigned int *error, | ||
| 121 | struct kvm_vcpu *vcpu); | 126 | struct kvm_vcpu *vcpu); |
| 122 | 127 | ||
| 123 | int (*pio_in_emulated)(int size, unsigned short port, void *val, | 128 | int (*pio_in_emulated)(int size, unsigned short port, void *val, |
| @@ -132,18 +137,26 @@ struct x86_emulate_ops { | |||
| 132 | int seg, struct kvm_vcpu *vcpu); | 137 | int seg, struct kvm_vcpu *vcpu); |
| 133 | u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); | 138 | u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); |
| 134 | void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); | 139 | void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); |
| 140 | unsigned long (*get_cached_segment_base)(int seg, struct kvm_vcpu *vcpu); | ||
| 135 | void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); | 141 | void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); |
| 136 | ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); | 142 | ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); |
| 137 | void (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); | 143 | int (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); |
| 138 | int (*cpl)(struct kvm_vcpu *vcpu); | 144 | int (*cpl)(struct kvm_vcpu *vcpu); |
| 139 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); | 145 | int (*get_dr)(int dr, unsigned long *dest, struct kvm_vcpu *vcpu); |
| 146 | int (*set_dr)(int dr, unsigned long value, struct kvm_vcpu *vcpu); | ||
| 147 | int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); | ||
| 148 | int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); | ||
| 140 | }; | 149 | }; |
| 141 | 150 | ||
| 142 | /* Type, address-of, and value of an instruction's operand. */ | 151 | /* Type, address-of, and value of an instruction's operand. */ |
| 143 | struct operand { | 152 | struct operand { |
| 144 | enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; | 153 | enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; |
| 145 | unsigned int bytes; | 154 | unsigned int bytes; |
| 146 | unsigned long val, orig_val, *ptr; | 155 | unsigned long orig_val, *ptr; |
| 156 | union { | ||
| 157 | unsigned long val; | ||
| 158 | char valptr[sizeof(unsigned long) + 2]; | ||
| 159 | }; | ||
| 147 | }; | 160 | }; |
| 148 | 161 | ||
| 149 | struct fetch_cache { | 162 | struct fetch_cache { |
| @@ -186,6 +199,7 @@ struct decode_cache { | |||
| 186 | unsigned long modrm_val; | 199 | unsigned long modrm_val; |
| 187 | struct fetch_cache fetch; | 200 | struct fetch_cache fetch; |
| 188 | struct read_cache io_read; | 201 | struct read_cache io_read; |
| 202 | struct read_cache mem_read; | ||
| 189 | }; | 203 | }; |
| 190 | 204 | ||
| 191 | struct x86_emulate_ctxt { | 205 | struct x86_emulate_ctxt { |
| @@ -202,6 +216,12 @@ struct x86_emulate_ctxt { | |||
| 202 | int interruptibility; | 216 | int interruptibility; |
| 203 | 217 | ||
| 204 | bool restart; /* restart string instruction after writeback */ | 218 | bool restart; /* restart string instruction after writeback */ |
| 219 | |||
| 220 | int exception; /* exception that happens during emulation or -1 */ | ||
| 221 | u32 error_code; /* error code for exception */ | ||
| 222 | bool error_code_valid; | ||
| 223 | unsigned long cr2; /* faulted address in case of #PF */ | ||
| 224 | |||
| 205 | /* decode cache */ | 225 | /* decode cache */ |
| 206 | struct decode_cache decode; | 226 | struct decode_cache decode; |
| 207 | }; | 227 | }; |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 76f5483cffec..502e53f999cf 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include <linux/mm.h> | 15 | #include <linux/mm.h> |
| 16 | #include <linux/mmu_notifier.h> | 16 | #include <linux/mmu_notifier.h> |
| 17 | #include <linux/tracepoint.h> | 17 | #include <linux/tracepoint.h> |
| 18 | #include <linux/cpumask.h> | ||
| 18 | 19 | ||
| 19 | #include <linux/kvm.h> | 20 | #include <linux/kvm.h> |
| 20 | #include <linux/kvm_para.h> | 21 | #include <linux/kvm_para.h> |
| @@ -39,11 +40,14 @@ | |||
| 39 | 0xFFFFFF0000000000ULL) | 40 | 0xFFFFFF0000000000ULL) |
| 40 | 41 | ||
| 41 | #define INVALID_PAGE (~(hpa_t)0) | 42 | #define INVALID_PAGE (~(hpa_t)0) |
| 43 | #define VALID_PAGE(x) ((x) != INVALID_PAGE) | ||
| 44 | |||
| 42 | #define UNMAPPED_GVA (~(gpa_t)0) | 45 | #define UNMAPPED_GVA (~(gpa_t)0) |
| 43 | 46 | ||
| 44 | /* KVM Hugepage definitions for x86 */ | 47 | /* KVM Hugepage definitions for x86 */ |
| 45 | #define KVM_NR_PAGE_SIZES 3 | 48 | #define KVM_NR_PAGE_SIZES 3 |
| 46 | #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + (((x) - 1) * 9)) | 49 | #define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 9) |
| 50 | #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x)) | ||
| 47 | #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) | 51 | #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) |
| 48 | #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) | 52 | #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) |
| 49 | #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) | 53 | #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) |
| @@ -69,8 +73,6 @@ | |||
| 69 | 73 | ||
| 70 | #define IOPL_SHIFT 12 | 74 | #define IOPL_SHIFT 12 |
| 71 | 75 | ||
| 72 | #define KVM_ALIAS_SLOTS 4 | ||
| 73 | |||
| 74 | #define KVM_PERMILLE_MMU_PAGES 20 | 76 | #define KVM_PERMILLE_MMU_PAGES 20 |
| 75 | #define KVM_MIN_ALLOC_MMU_PAGES 64 | 77 | #define KVM_MIN_ALLOC_MMU_PAGES 64 |
| 76 | #define KVM_MMU_HASH_SHIFT 10 | 78 | #define KVM_MMU_HASH_SHIFT 10 |
| @@ -241,7 +243,7 @@ struct kvm_mmu { | |||
| 241 | void (*prefetch_page)(struct kvm_vcpu *vcpu, | 243 | void (*prefetch_page)(struct kvm_vcpu *vcpu, |
| 242 | struct kvm_mmu_page *page); | 244 | struct kvm_mmu_page *page); |
| 243 | int (*sync_page)(struct kvm_vcpu *vcpu, | 245 | int (*sync_page)(struct kvm_vcpu *vcpu, |
| 244 | struct kvm_mmu_page *sp); | 246 | struct kvm_mmu_page *sp, bool clear_unsync); |
| 245 | void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); | 247 | void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); |
| 246 | hpa_t root_hpa; | 248 | hpa_t root_hpa; |
| 247 | int root_level; | 249 | int root_level; |
| @@ -301,8 +303,8 @@ struct kvm_vcpu_arch { | |||
| 301 | unsigned long mmu_seq; | 303 | unsigned long mmu_seq; |
| 302 | } update_pte; | 304 | } update_pte; |
| 303 | 305 | ||
| 304 | struct i387_fxsave_struct host_fx_image; | 306 | struct fpu guest_fpu; |
| 305 | struct i387_fxsave_struct guest_fx_image; | 307 | u64 xcr0; |
| 306 | 308 | ||
| 307 | gva_t mmio_fault_cr2; | 309 | gva_t mmio_fault_cr2; |
| 308 | struct kvm_pio_request pio; | 310 | struct kvm_pio_request pio; |
| @@ -360,26 +362,11 @@ struct kvm_vcpu_arch { | |||
| 360 | 362 | ||
| 361 | /* fields used by HYPER-V emulation */ | 363 | /* fields used by HYPER-V emulation */ |
| 362 | u64 hv_vapic; | 364 | u64 hv_vapic; |
| 363 | }; | ||
| 364 | |||
| 365 | struct kvm_mem_alias { | ||
| 366 | gfn_t base_gfn; | ||
| 367 | unsigned long npages; | ||
| 368 | gfn_t target_gfn; | ||
| 369 | #define KVM_ALIAS_INVALID 1UL | ||
| 370 | unsigned long flags; | ||
| 371 | }; | ||
| 372 | 365 | ||
| 373 | #define KVM_ARCH_HAS_UNALIAS_INSTANTIATION | 366 | cpumask_var_t wbinvd_dirty_mask; |
| 374 | |||
| 375 | struct kvm_mem_aliases { | ||
| 376 | struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; | ||
| 377 | int naliases; | ||
| 378 | }; | 367 | }; |
| 379 | 368 | ||
| 380 | struct kvm_arch { | 369 | struct kvm_arch { |
| 381 | struct kvm_mem_aliases *aliases; | ||
| 382 | |||
| 383 | unsigned int n_free_mmu_pages; | 370 | unsigned int n_free_mmu_pages; |
| 384 | unsigned int n_requested_mmu_pages; | 371 | unsigned int n_requested_mmu_pages; |
| 385 | unsigned int n_alloc_mmu_pages; | 372 | unsigned int n_alloc_mmu_pages; |
| @@ -533,6 +520,8 @@ struct kvm_x86_ops { | |||
| 533 | 520 | ||
| 534 | void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry); | 521 | void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry); |
| 535 | 522 | ||
| 523 | bool (*has_wbinvd_exit)(void); | ||
| 524 | |||
| 536 | const struct trace_print_flags *exit_reasons_str; | 525 | const struct trace_print_flags *exit_reasons_str; |
| 537 | }; | 526 | }; |
| 538 | 527 | ||
| @@ -576,7 +565,6 @@ enum emulation_result { | |||
| 576 | #define EMULTYPE_SKIP (1 << 2) | 565 | #define EMULTYPE_SKIP (1 << 2) |
| 577 | int emulate_instruction(struct kvm_vcpu *vcpu, | 566 | int emulate_instruction(struct kvm_vcpu *vcpu, |
| 578 | unsigned long cr2, u16 error_code, int emulation_type); | 567 | unsigned long cr2, u16 error_code, int emulation_type); |
| 579 | void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); | ||
| 580 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | 568 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); |
| 581 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | 569 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); |
| 582 | 570 | ||
| @@ -591,10 +579,7 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); | |||
| 591 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); | 579 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); |
| 592 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); | 580 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); |
| 593 | int emulate_clts(struct kvm_vcpu *vcpu); | 581 | int emulate_clts(struct kvm_vcpu *vcpu); |
| 594 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, | 582 | int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); |
| 595 | unsigned long *dest); | ||
| 596 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, | ||
| 597 | unsigned long value); | ||
| 598 | 583 | ||
| 599 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); | 584 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |
| 600 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); | 585 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); |
| @@ -602,15 +587,16 @@ int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); | |||
| 602 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, | 587 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, |
| 603 | bool has_error_code, u32 error_code); | 588 | bool has_error_code, u32 error_code); |
| 604 | 589 | ||
| 605 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); | 590 | int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); |
| 606 | void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); | 591 | int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); |
| 607 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); | 592 | int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); |
| 608 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); | 593 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); |
| 609 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); | 594 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); |
| 610 | int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); | 595 | int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); |
| 611 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); | 596 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); |
| 612 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); | 597 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); |
| 613 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); | 598 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); |
| 599 | int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr); | ||
| 614 | 600 | ||
| 615 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); | 601 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); |
| 616 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); | 602 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); |
| @@ -630,12 +616,7 @@ int kvm_pic_set_irq(void *opaque, int irq, int level); | |||
| 630 | 616 | ||
| 631 | void kvm_inject_nmi(struct kvm_vcpu *vcpu); | 617 | void kvm_inject_nmi(struct kvm_vcpu *vcpu); |
| 632 | 618 | ||
| 633 | void fx_init(struct kvm_vcpu *vcpu); | 619 | int fx_init(struct kvm_vcpu *vcpu); |
| 634 | |||
| 635 | int emulator_write_emulated(unsigned long addr, | ||
| 636 | const void *val, | ||
| 637 | unsigned int bytes, | ||
| 638 | struct kvm_vcpu *vcpu); | ||
| 639 | 620 | ||
| 640 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); | 621 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); |
| 641 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 622 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
| @@ -664,8 +645,6 @@ void kvm_disable_tdp(void); | |||
| 664 | int complete_pio(struct kvm_vcpu *vcpu); | 645 | int complete_pio(struct kvm_vcpu *vcpu); |
| 665 | bool kvm_check_iopl(struct kvm_vcpu *vcpu); | 646 | bool kvm_check_iopl(struct kvm_vcpu *vcpu); |
| 666 | 647 | ||
| 667 | struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn); | ||
| 668 | |||
| 669 | static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) | 648 | static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) |
| 670 | { | 649 | { |
| 671 | struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); | 650 | struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); |
| @@ -719,21 +698,6 @@ static inline unsigned long read_msr(unsigned long msr) | |||
| 719 | } | 698 | } |
| 720 | #endif | 699 | #endif |
| 721 | 700 | ||
| 722 | static inline void kvm_fx_save(struct i387_fxsave_struct *image) | ||
| 723 | { | ||
| 724 | asm("fxsave (%0)":: "r" (image)); | ||
| 725 | } | ||
| 726 | |||
| 727 | static inline void kvm_fx_restore(struct i387_fxsave_struct *image) | ||
| 728 | { | ||
| 729 | asm("fxrstor (%0)":: "r" (image)); | ||
| 730 | } | ||
| 731 | |||
| 732 | static inline void kvm_fx_finit(void) | ||
| 733 | { | ||
| 734 | asm("finit"); | ||
| 735 | } | ||
| 736 | |||
| 737 | static inline u32 get_rdx_init_val(void) | 701 | static inline u32 get_rdx_init_val(void) |
| 738 | { | 702 | { |
| 739 | return 0x600; /* P6 family */ | 703 | return 0x600; /* P6 family */ |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 8c7ae4318629..509a42187dc2 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #define _EFER_LMA 10 /* Long mode active (read-only) */ | 20 | #define _EFER_LMA 10 /* Long mode active (read-only) */ |
| 21 | #define _EFER_NX 11 /* No execute enable */ | 21 | #define _EFER_NX 11 /* No execute enable */ |
| 22 | #define _EFER_SVME 12 /* Enable virtualization */ | 22 | #define _EFER_SVME 12 /* Enable virtualization */ |
| 23 | #define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ | ||
| 23 | #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ | 24 | #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ |
| 24 | 25 | ||
| 25 | #define EFER_SCE (1<<_EFER_SCE) | 26 | #define EFER_SCE (1<<_EFER_SCE) |
| @@ -27,6 +28,7 @@ | |||
| 27 | #define EFER_LMA (1<<_EFER_LMA) | 28 | #define EFER_LMA (1<<_EFER_LMA) |
| 28 | #define EFER_NX (1<<_EFER_NX) | 29 | #define EFER_NX (1<<_EFER_NX) |
| 29 | #define EFER_SVME (1<<_EFER_SVME) | 30 | #define EFER_SVME (1<<_EFER_SVME) |
| 31 | #define EFER_LMSLE (1<<_EFER_LMSLE) | ||
| 30 | #define EFER_FFXSR (1<<_EFER_FFXSR) | 32 | #define EFER_FFXSR (1<<_EFER_FFXSR) |
| 31 | 33 | ||
| 32 | /* Intel MSRs. Some also available on other CPUs */ | 34 | /* Intel MSRs. Some also available on other CPUs */ |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 9e6779f7cf2d..9f0cbd987d50 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
| @@ -257,6 +257,7 @@ enum vmcs_field { | |||
| 257 | #define EXIT_REASON_IO_INSTRUCTION 30 | 257 | #define EXIT_REASON_IO_INSTRUCTION 30 |
| 258 | #define EXIT_REASON_MSR_READ 31 | 258 | #define EXIT_REASON_MSR_READ 31 |
| 259 | #define EXIT_REASON_MSR_WRITE 32 | 259 | #define EXIT_REASON_MSR_WRITE 32 |
| 260 | #define EXIT_REASON_INVALID_STATE 33 | ||
| 260 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 | 261 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 |
| 261 | #define EXIT_REASON_MONITOR_INSTRUCTION 39 | 262 | #define EXIT_REASON_MONITOR_INSTRUCTION 39 |
| 262 | #define EXIT_REASON_PAUSE_INSTRUCTION 40 | 263 | #define EXIT_REASON_PAUSE_INSTRUCTION 40 |
| @@ -266,6 +267,7 @@ enum vmcs_field { | |||
| 266 | #define EXIT_REASON_EPT_VIOLATION 48 | 267 | #define EXIT_REASON_EPT_VIOLATION 48 |
| 267 | #define EXIT_REASON_EPT_MISCONFIG 49 | 268 | #define EXIT_REASON_EPT_MISCONFIG 49 |
| 268 | #define EXIT_REASON_WBINVD 54 | 269 | #define EXIT_REASON_WBINVD 54 |
| 270 | #define EXIT_REASON_XSETBV 55 | ||
| 269 | 271 | ||
| 270 | /* | 272 | /* |
| 271 | * Interruption-information format | 273 | * Interruption-information format |
| @@ -375,6 +377,9 @@ enum vmcs_field { | |||
| 375 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) | 377 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) |
| 376 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) | 378 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) |
| 377 | 379 | ||
| 380 | #define VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT (1ull << 9) /* (41 - 32) */ | ||
| 381 | #define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */ | ||
| 382 | |||
| 378 | #define VMX_EPT_DEFAULT_GAW 3 | 383 | #define VMX_EPT_DEFAULT_GAW 3 |
| 379 | #define VMX_EPT_MAX_GAW 0x4 | 384 | #define VMX_EPT_MAX_GAW 0x4 |
| 380 | #define VMX_EPT_MT_EPTE_SHIFT 3 | 385 | #define VMX_EPT_MT_EPTE_SHIFT 3 |
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 2c4390cae228..32c36668fa7b 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h | |||
| @@ -13,6 +13,12 @@ | |||
| 13 | 13 | ||
| 14 | #define FXSAVE_SIZE 512 | 14 | #define FXSAVE_SIZE 512 |
| 15 | 15 | ||
| 16 | #define XSAVE_HDR_SIZE 64 | ||
| 17 | #define XSAVE_HDR_OFFSET FXSAVE_SIZE | ||
| 18 | |||
| 19 | #define XSAVE_YMM_SIZE 256 | ||
| 20 | #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) | ||
| 21 | |||
| 16 | /* | 22 | /* |
| 17 | * These are the features that the OS can handle currently. | 23 | * These are the features that the OS can handle currently. |
| 18 | */ | 24 | */ |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 86cef6b32253..c4444bce8469 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
| @@ -107,7 +107,7 @@ void __cpuinit fpu_init(void) | |||
| 107 | } | 107 | } |
| 108 | #endif /* CONFIG_X86_64 */ | 108 | #endif /* CONFIG_X86_64 */ |
| 109 | 109 | ||
| 110 | static void fpu_finit(struct fpu *fpu) | 110 | void fpu_finit(struct fpu *fpu) |
| 111 | { | 111 | { |
| 112 | #ifdef CONFIG_X86_32 | 112 | #ifdef CONFIG_X86_32 |
| 113 | if (!HAVE_HWFP) { | 113 | if (!HAVE_HWFP) { |
| @@ -132,6 +132,7 @@ static void fpu_finit(struct fpu *fpu) | |||
| 132 | fp->fos = 0xffff0000u; | 132 | fp->fos = 0xffff0000u; |
| 133 | } | 133 | } |
| 134 | } | 134 | } |
| 135 | EXPORT_SYMBOL_GPL(fpu_finit); | ||
| 135 | 136 | ||
| 136 | /* | 137 | /* |
| 137 | * The _current_ task is using the FPU for the first time | 138 | * The _current_ task is using the FPU for the first time |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e7e35219b32f..ebcfcceccc72 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
| @@ -28,6 +28,7 @@ unsigned long idle_nomwait; | |||
| 28 | EXPORT_SYMBOL(idle_nomwait); | 28 | EXPORT_SYMBOL(idle_nomwait); |
| 29 | 29 | ||
| 30 | struct kmem_cache *task_xstate_cachep; | 30 | struct kmem_cache *task_xstate_cachep; |
| 31 | EXPORT_SYMBOL_GPL(task_xstate_cachep); | ||
| 31 | 32 | ||
| 32 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | 33 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) |
| 33 | { | 34 | { |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 5ac0bb465ed6..b38bd8b92aa6 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | * privileged instructions: | 9 | * privileged instructions: |
| 10 | * | 10 | * |
| 11 | * Copyright (C) 2006 Qumranet | 11 | * Copyright (C) 2006 Qumranet |
| 12 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
| 12 | * | 13 | * |
| 13 | * Avi Kivity <avi@qumranet.com> | 14 | * Avi Kivity <avi@qumranet.com> |
| 14 | * Yaniv Kamay <yaniv@qumranet.com> | 15 | * Yaniv Kamay <yaniv@qumranet.com> |
| @@ -67,6 +68,9 @@ | |||
| 67 | #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ | 68 | #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ |
| 68 | #define SrcImmU (9<<4) /* Immediate operand, unsigned */ | 69 | #define SrcImmU (9<<4) /* Immediate operand, unsigned */ |
| 69 | #define SrcSI (0xa<<4) /* Source is in the DS:RSI */ | 70 | #define SrcSI (0xa<<4) /* Source is in the DS:RSI */ |
| 71 | #define SrcImmFAddr (0xb<<4) /* Source is immediate far address */ | ||
| 72 | #define SrcMemFAddr (0xc<<4) /* Source is far address in memory */ | ||
| 73 | #define SrcAcc (0xd<<4) /* Source Accumulator */ | ||
| 70 | #define SrcMask (0xf<<4) | 74 | #define SrcMask (0xf<<4) |
| 71 | /* Generic ModRM decode. */ | 75 | /* Generic ModRM decode. */ |
| 72 | #define ModRM (1<<8) | 76 | #define ModRM (1<<8) |
| @@ -88,10 +92,6 @@ | |||
| 88 | #define Src2CL (1<<29) | 92 | #define Src2CL (1<<29) |
| 89 | #define Src2ImmByte (2<<29) | 93 | #define Src2ImmByte (2<<29) |
| 90 | #define Src2One (3<<29) | 94 | #define Src2One (3<<29) |
| 91 | #define Src2Imm16 (4<<29) | ||
| 92 | #define Src2Mem16 (5<<29) /* Used for Ep encoding. First argument has to be | ||
| 93 | in memory and second argument is located | ||
| 94 | immediately after the first one in memory. */ | ||
| 95 | #define Src2Mask (7<<29) | 95 | #define Src2Mask (7<<29) |
| 96 | 96 | ||
| 97 | enum { | 97 | enum { |
| @@ -124,15 +124,15 @@ static u32 opcode_table[256] = { | |||
| 124 | /* 0x20 - 0x27 */ | 124 | /* 0x20 - 0x27 */ |
| 125 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, | 125 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
| 126 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 126 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
| 127 | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, | 127 | ByteOp | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, |
| 128 | /* 0x28 - 0x2F */ | 128 | /* 0x28 - 0x2F */ |
| 129 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, | 129 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
| 130 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 130 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
| 131 | 0, 0, 0, 0, | 131 | ByteOp | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, |
| 132 | /* 0x30 - 0x37 */ | 132 | /* 0x30 - 0x37 */ |
| 133 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, | 133 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
| 134 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 134 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
| 135 | 0, 0, 0, 0, | 135 | ByteOp | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, |
| 136 | /* 0x38 - 0x3F */ | 136 | /* 0x38 - 0x3F */ |
| 137 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 137 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
| 138 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 138 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
| @@ -170,20 +170,20 @@ static u32 opcode_table[256] = { | |||
| 170 | /* 0x88 - 0x8F */ | 170 | /* 0x88 - 0x8F */ |
| 171 | ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, | 171 | ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, |
| 172 | ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, | 172 | ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, |
| 173 | DstMem | SrcReg | ModRM | Mov, ModRM | DstReg, | 173 | DstMem | SrcNone | ModRM | Mov, ModRM | DstReg, |
| 174 | DstReg | SrcMem | ModRM | Mov, Group | Group1A, | 174 | ImplicitOps | SrcMem16 | ModRM, Group | Group1A, |
| 175 | /* 0x90 - 0x97 */ | 175 | /* 0x90 - 0x97 */ |
| 176 | DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, | 176 | DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, |
| 177 | /* 0x98 - 0x9F */ | 177 | /* 0x98 - 0x9F */ |
| 178 | 0, 0, SrcImm | Src2Imm16 | No64, 0, | 178 | 0, 0, SrcImmFAddr | No64, 0, |
| 179 | ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, | 179 | ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, |
| 180 | /* 0xA0 - 0xA7 */ | 180 | /* 0xA0 - 0xA7 */ |
| 181 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, | 181 | ByteOp | DstAcc | SrcMem | Mov | MemAbs, DstAcc | SrcMem | Mov | MemAbs, |
| 182 | ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, | 182 | ByteOp | DstMem | SrcAcc | Mov | MemAbs, DstMem | SrcAcc | Mov | MemAbs, |
| 183 | ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String, | 183 | ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String, |
| 184 | ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String, | 184 | ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String, |
| 185 | /* 0xA8 - 0xAF */ | 185 | /* 0xA8 - 0xAF */ |
| 186 | 0, 0, ByteOp | DstDI | Mov | String, DstDI | Mov | String, | 186 | DstAcc | SrcImmByte | ByteOp, DstAcc | SrcImm, ByteOp | DstDI | Mov | String, DstDI | Mov | String, |
| 187 | ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String, | 187 | ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String, |
| 188 | ByteOp | DstDI | String, DstDI | String, | 188 | ByteOp | DstDI | String, DstDI | String, |
| 189 | /* 0xB0 - 0xB7 */ | 189 | /* 0xB0 - 0xB7 */ |
| @@ -215,7 +215,7 @@ static u32 opcode_table[256] = { | |||
| 215 | ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, | 215 | ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, |
| 216 | /* 0xE8 - 0xEF */ | 216 | /* 0xE8 - 0xEF */ |
| 217 | SrcImm | Stack, SrcImm | ImplicitOps, | 217 | SrcImm | Stack, SrcImm | ImplicitOps, |
| 218 | SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, | 218 | SrcImmFAddr | No64, SrcImmByte | ImplicitOps, |
| 219 | SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, | 219 | SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, |
| 220 | SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, | 220 | SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, |
| 221 | /* 0xF0 - 0xF7 */ | 221 | /* 0xF0 - 0xF7 */ |
| @@ -337,20 +337,20 @@ static u32 group_table[] = { | |||
| 337 | [Group1A*8] = | 337 | [Group1A*8] = |
| 338 | DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0, | 338 | DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0, |
| 339 | [Group3_Byte*8] = | 339 | [Group3_Byte*8] = |
| 340 | ByteOp | SrcImm | DstMem | ModRM, 0, | 340 | ByteOp | SrcImm | DstMem | ModRM, ByteOp | SrcImm | DstMem | ModRM, |
| 341 | ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, | 341 | ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, |
| 342 | 0, 0, 0, 0, | 342 | 0, 0, 0, 0, |
| 343 | [Group3*8] = | 343 | [Group3*8] = |
| 344 | DstMem | SrcImm | ModRM, 0, | 344 | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, |
| 345 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, | 345 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, |
| 346 | 0, 0, 0, 0, | 346 | 0, 0, 0, 0, |
| 347 | [Group4*8] = | 347 | [Group4*8] = |
| 348 | ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, | 348 | ByteOp | DstMem | SrcNone | ModRM | Lock, ByteOp | DstMem | SrcNone | ModRM | Lock, |
| 349 | 0, 0, 0, 0, 0, 0, | 349 | 0, 0, 0, 0, 0, 0, |
| 350 | [Group5*8] = | 350 | [Group5*8] = |
| 351 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, | 351 | DstMem | SrcNone | ModRM | Lock, DstMem | SrcNone | ModRM | Lock, |
| 352 | SrcMem | ModRM | Stack, 0, | 352 | SrcMem | ModRM | Stack, 0, |
| 353 | SrcMem | ModRM | Stack, SrcMem | ModRM | Src2Mem16 | ImplicitOps, | 353 | SrcMem | ModRM | Stack, SrcMemFAddr | ModRM | ImplicitOps, |
| 354 | SrcMem | ModRM | Stack, 0, | 354 | SrcMem | ModRM | Stack, 0, |
| 355 | [Group7*8] = | 355 | [Group7*8] = |
| 356 | 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, | 356 | 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, |
| @@ -576,6 +576,13 @@ static u32 group2_table[] = { | |||
| 576 | (_type)_x; \ | 576 | (_type)_x; \ |
| 577 | }) | 577 | }) |
| 578 | 578 | ||
| 579 | #define insn_fetch_arr(_arr, _size, _eip) \ | ||
| 580 | ({ rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size)); \ | ||
| 581 | if (rc != X86EMUL_CONTINUE) \ | ||
| 582 | goto done; \ | ||
| 583 | (_eip) += (_size); \ | ||
| 584 | }) | ||
| 585 | |||
| 579 | static inline unsigned long ad_mask(struct decode_cache *c) | 586 | static inline unsigned long ad_mask(struct decode_cache *c) |
| 580 | { | 587 | { |
| 581 | return (1UL << (c->ad_bytes << 3)) - 1; | 588 | return (1UL << (c->ad_bytes << 3)) - 1; |
| @@ -617,31 +624,66 @@ static void set_seg_override(struct decode_cache *c, int seg) | |||
| 617 | c->seg_override = seg; | 624 | c->seg_override = seg; |
| 618 | } | 625 | } |
| 619 | 626 | ||
| 620 | static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg) | 627 | static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, |
| 628 | struct x86_emulate_ops *ops, int seg) | ||
| 621 | { | 629 | { |
| 622 | if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) | 630 | if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) |
| 623 | return 0; | 631 | return 0; |
| 624 | 632 | ||
| 625 | return kvm_x86_ops->get_segment_base(ctxt->vcpu, seg); | 633 | return ops->get_cached_segment_base(seg, ctxt->vcpu); |
| 626 | } | 634 | } |
| 627 | 635 | ||
| 628 | static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt, | 636 | static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt, |
| 637 | struct x86_emulate_ops *ops, | ||
| 629 | struct decode_cache *c) | 638 | struct decode_cache *c) |
| 630 | { | 639 | { |
| 631 | if (!c->has_seg_override) | 640 | if (!c->has_seg_override) |
| 632 | return 0; | 641 | return 0; |
| 633 | 642 | ||
| 634 | return seg_base(ctxt, c->seg_override); | 643 | return seg_base(ctxt, ops, c->seg_override); |
| 644 | } | ||
| 645 | |||
| 646 | static unsigned long es_base(struct x86_emulate_ctxt *ctxt, | ||
| 647 | struct x86_emulate_ops *ops) | ||
| 648 | { | ||
| 649 | return seg_base(ctxt, ops, VCPU_SREG_ES); | ||
| 650 | } | ||
| 651 | |||
| 652 | static unsigned long ss_base(struct x86_emulate_ctxt *ctxt, | ||
| 653 | struct x86_emulate_ops *ops) | ||
| 654 | { | ||
| 655 | return seg_base(ctxt, ops, VCPU_SREG_SS); | ||
| 656 | } | ||
| 657 | |||
| 658 | static void emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, | ||
| 659 | u32 error, bool valid) | ||
| 660 | { | ||
| 661 | ctxt->exception = vec; | ||
| 662 | ctxt->error_code = error; | ||
| 663 | ctxt->error_code_valid = valid; | ||
| 664 | ctxt->restart = false; | ||
| 665 | } | ||
| 666 | |||
| 667 | static void emulate_gp(struct x86_emulate_ctxt *ctxt, int err) | ||
| 668 | { | ||
| 669 | emulate_exception(ctxt, GP_VECTOR, err, true); | ||
| 635 | } | 670 | } |
| 636 | 671 | ||
| 637 | static unsigned long es_base(struct x86_emulate_ctxt *ctxt) | 672 | static void emulate_pf(struct x86_emulate_ctxt *ctxt, unsigned long addr, |
| 673 | int err) | ||
| 638 | { | 674 | { |
| 639 | return seg_base(ctxt, VCPU_SREG_ES); | 675 | ctxt->cr2 = addr; |
| 676 | emulate_exception(ctxt, PF_VECTOR, err, true); | ||
| 640 | } | 677 | } |
| 641 | 678 | ||
| 642 | static unsigned long ss_base(struct x86_emulate_ctxt *ctxt) | 679 | static void emulate_ud(struct x86_emulate_ctxt *ctxt) |
| 643 | { | 680 | { |
| 644 | return seg_base(ctxt, VCPU_SREG_SS); | 681 | emulate_exception(ctxt, UD_VECTOR, 0, false); |
| 682 | } | ||
| 683 | |||
| 684 | static void emulate_ts(struct x86_emulate_ctxt *ctxt, int err) | ||
| 685 | { | ||
| 686 | emulate_exception(ctxt, TS_VECTOR, err, true); | ||
| 645 | } | 687 | } |
| 646 | 688 | ||
| 647 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, | 689 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, |
| @@ -932,12 +974,9 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
| 932 | /* we cannot decode insn before we complete previous rep insn */ | 974 | /* we cannot decode insn before we complete previous rep insn */ |
| 933 | WARN_ON(ctxt->restart); | 975 | WARN_ON(ctxt->restart); |
| 934 | 976 | ||
| 935 | /* Shadow copy of register state. Committed on successful emulation. */ | ||
| 936 | memset(c, 0, sizeof(struct decode_cache)); | ||
| 937 | c->eip = ctxt->eip; | 977 | c->eip = ctxt->eip; |
| 938 | c->fetch.start = c->fetch.end = c->eip; | 978 | c->fetch.start = c->fetch.end = c->eip; |
| 939 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); | 979 | ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS); |
| 940 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | ||
| 941 | 980 | ||
| 942 | switch (mode) { | 981 | switch (mode) { |
| 943 | case X86EMUL_MODE_REAL: | 982 | case X86EMUL_MODE_REAL: |
| @@ -1060,7 +1099,7 @@ done_prefixes: | |||
| 1060 | set_seg_override(c, VCPU_SREG_DS); | 1099 | set_seg_override(c, VCPU_SREG_DS); |
| 1061 | 1100 | ||
| 1062 | if (!(!c->twobyte && c->b == 0x8d)) | 1101 | if (!(!c->twobyte && c->b == 0x8d)) |
| 1063 | c->modrm_ea += seg_override_base(ctxt, c); | 1102 | c->modrm_ea += seg_override_base(ctxt, ops, c); |
| 1064 | 1103 | ||
| 1065 | if (c->ad_bytes != 8) | 1104 | if (c->ad_bytes != 8) |
| 1066 | c->modrm_ea = (u32)c->modrm_ea; | 1105 | c->modrm_ea = (u32)c->modrm_ea; |
| @@ -1148,6 +1187,25 @@ done_prefixes: | |||
| 1148 | else | 1187 | else |
| 1149 | c->src.val = insn_fetch(u8, 1, c->eip); | 1188 | c->src.val = insn_fetch(u8, 1, c->eip); |
| 1150 | break; | 1189 | break; |
| 1190 | case SrcAcc: | ||
| 1191 | c->src.type = OP_REG; | ||
| 1192 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
| 1193 | c->src.ptr = &c->regs[VCPU_REGS_RAX]; | ||
| 1194 | switch (c->src.bytes) { | ||
| 1195 | case 1: | ||
| 1196 | c->src.val = *(u8 *)c->src.ptr; | ||
| 1197 | break; | ||
| 1198 | case 2: | ||
| 1199 | c->src.val = *(u16 *)c->src.ptr; | ||
| 1200 | break; | ||
| 1201 | case 4: | ||
| 1202 | c->src.val = *(u32 *)c->src.ptr; | ||
| 1203 | break; | ||
| 1204 | case 8: | ||
| 1205 | c->src.val = *(u64 *)c->src.ptr; | ||
| 1206 | break; | ||
| 1207 | } | ||
| 1208 | break; | ||
| 1151 | case SrcOne: | 1209 | case SrcOne: |
| 1152 | c->src.bytes = 1; | 1210 | c->src.bytes = 1; |
| 1153 | c->src.val = 1; | 1211 | c->src.val = 1; |
| @@ -1156,10 +1214,21 @@ done_prefixes: | |||
| 1156 | c->src.type = OP_MEM; | 1214 | c->src.type = OP_MEM; |
| 1157 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 1215 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
| 1158 | c->src.ptr = (unsigned long *) | 1216 | c->src.ptr = (unsigned long *) |
| 1159 | register_address(c, seg_override_base(ctxt, c), | 1217 | register_address(c, seg_override_base(ctxt, ops, c), |
| 1160 | c->regs[VCPU_REGS_RSI]); | 1218 | c->regs[VCPU_REGS_RSI]); |
| 1161 | c->src.val = 0; | 1219 | c->src.val = 0; |
| 1162 | break; | 1220 | break; |
| 1221 | case SrcImmFAddr: | ||
| 1222 | c->src.type = OP_IMM; | ||
| 1223 | c->src.ptr = (unsigned long *)c->eip; | ||
| 1224 | c->src.bytes = c->op_bytes + 2; | ||
| 1225 | insn_fetch_arr(c->src.valptr, c->src.bytes, c->eip); | ||
| 1226 | break; | ||
| 1227 | case SrcMemFAddr: | ||
| 1228 | c->src.type = OP_MEM; | ||
| 1229 | c->src.ptr = (unsigned long *)c->modrm_ea; | ||
| 1230 | c->src.bytes = c->op_bytes + 2; | ||
| 1231 | break; | ||
| 1163 | } | 1232 | } |
| 1164 | 1233 | ||
| 1165 | /* | 1234 | /* |
| @@ -1179,22 +1248,10 @@ done_prefixes: | |||
| 1179 | c->src2.bytes = 1; | 1248 | c->src2.bytes = 1; |
| 1180 | c->src2.val = insn_fetch(u8, 1, c->eip); | 1249 | c->src2.val = insn_fetch(u8, 1, c->eip); |
| 1181 | break; | 1250 | break; |
| 1182 | case Src2Imm16: | ||
| 1183 | c->src2.type = OP_IMM; | ||
| 1184 | c->src2.ptr = (unsigned long *)c->eip; | ||
| 1185 | c->src2.bytes = 2; | ||
| 1186 | c->src2.val = insn_fetch(u16, 2, c->eip); | ||
| 1187 | break; | ||
| 1188 | case Src2One: | 1251 | case Src2One: |
| 1189 | c->src2.bytes = 1; | 1252 | c->src2.bytes = 1; |
| 1190 | c->src2.val = 1; | 1253 | c->src2.val = 1; |
| 1191 | break; | 1254 | break; |
| 1192 | case Src2Mem16: | ||
| 1193 | c->src2.type = OP_MEM; | ||
| 1194 | c->src2.bytes = 2; | ||
| 1195 | c->src2.ptr = (unsigned long *)(c->modrm_ea + c->src.bytes); | ||
| 1196 | c->src2.val = 0; | ||
| 1197 | break; | ||
| 1198 | } | 1255 | } |
| 1199 | 1256 | ||
| 1200 | /* Decode and fetch the destination operand: register or memory. */ | 1257 | /* Decode and fetch the destination operand: register or memory. */ |
| @@ -1253,7 +1310,7 @@ done_prefixes: | |||
| 1253 | c->dst.type = OP_MEM; | 1310 | c->dst.type = OP_MEM; |
| 1254 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 1311 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
| 1255 | c->dst.ptr = (unsigned long *) | 1312 | c->dst.ptr = (unsigned long *) |
| 1256 | register_address(c, es_base(ctxt), | 1313 | register_address(c, es_base(ctxt, ops), |
| 1257 | c->regs[VCPU_REGS_RDI]); | 1314 | c->regs[VCPU_REGS_RDI]); |
| 1258 | c->dst.val = 0; | 1315 | c->dst.val = 0; |
| 1259 | break; | 1316 | break; |
| @@ -1263,6 +1320,37 @@ done: | |||
| 1263 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | 1320 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; |
| 1264 | } | 1321 | } |
| 1265 | 1322 | ||
| 1323 | static int read_emulated(struct x86_emulate_ctxt *ctxt, | ||
| 1324 | struct x86_emulate_ops *ops, | ||
| 1325 | unsigned long addr, void *dest, unsigned size) | ||
| 1326 | { | ||
| 1327 | int rc; | ||
| 1328 | struct read_cache *mc = &ctxt->decode.mem_read; | ||
| 1329 | u32 err; | ||
| 1330 | |||
| 1331 | while (size) { | ||
| 1332 | int n = min(size, 8u); | ||
| 1333 | size -= n; | ||
| 1334 | if (mc->pos < mc->end) | ||
| 1335 | goto read_cached; | ||
| 1336 | |||
| 1337 | rc = ops->read_emulated(addr, mc->data + mc->end, n, &err, | ||
| 1338 | ctxt->vcpu); | ||
| 1339 | if (rc == X86EMUL_PROPAGATE_FAULT) | ||
| 1340 | emulate_pf(ctxt, addr, err); | ||
| 1341 | if (rc != X86EMUL_CONTINUE) | ||
| 1342 | return rc; | ||
| 1343 | mc->end += n; | ||
| 1344 | |||
| 1345 | read_cached: | ||
| 1346 | memcpy(dest, mc->data + mc->pos, n); | ||
| 1347 | mc->pos += n; | ||
| 1348 | dest += n; | ||
| 1349 | addr += n; | ||
| 1350 | } | ||
| 1351 | return X86EMUL_CONTINUE; | ||
| 1352 | } | ||
| 1353 | |||
| 1266 | static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, | 1354 | static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, |
| 1267 | struct x86_emulate_ops *ops, | 1355 | struct x86_emulate_ops *ops, |
| 1268 | unsigned int size, unsigned short port, | 1356 | unsigned int size, unsigned short port, |
| @@ -1330,13 +1418,13 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
| 1330 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); | 1418 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); |
| 1331 | 1419 | ||
| 1332 | if (dt.size < index * 8 + 7) { | 1420 | if (dt.size < index * 8 + 7) { |
| 1333 | kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); | 1421 | emulate_gp(ctxt, selector & 0xfffc); |
| 1334 | return X86EMUL_PROPAGATE_FAULT; | 1422 | return X86EMUL_PROPAGATE_FAULT; |
| 1335 | } | 1423 | } |
| 1336 | addr = dt.address + index * 8; | 1424 | addr = dt.address + index * 8; |
| 1337 | ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); | 1425 | ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); |
| 1338 | if (ret == X86EMUL_PROPAGATE_FAULT) | 1426 | if (ret == X86EMUL_PROPAGATE_FAULT) |
| 1339 | kvm_inject_page_fault(ctxt->vcpu, addr, err); | 1427 | emulate_pf(ctxt, addr, err); |
| 1340 | 1428 | ||
| 1341 | return ret; | 1429 | return ret; |
| 1342 | } | 1430 | } |
| @@ -1355,14 +1443,14 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
| 1355 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); | 1443 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); |
| 1356 | 1444 | ||
| 1357 | if (dt.size < index * 8 + 7) { | 1445 | if (dt.size < index * 8 + 7) { |
| 1358 | kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); | 1446 | emulate_gp(ctxt, selector & 0xfffc); |
| 1359 | return X86EMUL_PROPAGATE_FAULT; | 1447 | return X86EMUL_PROPAGATE_FAULT; |
| 1360 | } | 1448 | } |
| 1361 | 1449 | ||
| 1362 | addr = dt.address + index * 8; | 1450 | addr = dt.address + index * 8; |
| 1363 | ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); | 1451 | ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); |
| 1364 | if (ret == X86EMUL_PROPAGATE_FAULT) | 1452 | if (ret == X86EMUL_PROPAGATE_FAULT) |
| 1365 | kvm_inject_page_fault(ctxt->vcpu, addr, err); | 1453 | emulate_pf(ctxt, addr, err); |
| 1366 | 1454 | ||
| 1367 | return ret; | 1455 | return ret; |
| 1368 | } | 1456 | } |
| @@ -1481,11 +1569,70 @@ load: | |||
| 1481 | ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu); | 1569 | ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu); |
| 1482 | return X86EMUL_CONTINUE; | 1570 | return X86EMUL_CONTINUE; |
| 1483 | exception: | 1571 | exception: |
| 1484 | kvm_queue_exception_e(ctxt->vcpu, err_vec, err_code); | 1572 | emulate_exception(ctxt, err_vec, err_code, true); |
| 1485 | return X86EMUL_PROPAGATE_FAULT; | 1573 | return X86EMUL_PROPAGATE_FAULT; |
| 1486 | } | 1574 | } |
| 1487 | 1575 | ||
| 1488 | static inline void emulate_push(struct x86_emulate_ctxt *ctxt) | 1576 | static inline int writeback(struct x86_emulate_ctxt *ctxt, |
| 1577 | struct x86_emulate_ops *ops) | ||
| 1578 | { | ||
| 1579 | int rc; | ||
| 1580 | struct decode_cache *c = &ctxt->decode; | ||
| 1581 | u32 err; | ||
| 1582 | |||
| 1583 | switch (c->dst.type) { | ||
| 1584 | case OP_REG: | ||
| 1585 | /* The 4-byte case *is* correct: | ||
| 1586 | * in 64-bit mode we zero-extend. | ||
| 1587 | */ | ||
| 1588 | switch (c->dst.bytes) { | ||
| 1589 | case 1: | ||
| 1590 | *(u8 *)c->dst.ptr = (u8)c->dst.val; | ||
| 1591 | break; | ||
| 1592 | case 2: | ||
| 1593 | *(u16 *)c->dst.ptr = (u16)c->dst.val; | ||
| 1594 | break; | ||
| 1595 | case 4: | ||
| 1596 | *c->dst.ptr = (u32)c->dst.val; | ||
| 1597 | break; /* 64b: zero-ext */ | ||
| 1598 | case 8: | ||
| 1599 | *c->dst.ptr = c->dst.val; | ||
| 1600 | break; | ||
| 1601 | } | ||
| 1602 | break; | ||
| 1603 | case OP_MEM: | ||
| 1604 | if (c->lock_prefix) | ||
| 1605 | rc = ops->cmpxchg_emulated( | ||
| 1606 | (unsigned long)c->dst.ptr, | ||
| 1607 | &c->dst.orig_val, | ||
| 1608 | &c->dst.val, | ||
| 1609 | c->dst.bytes, | ||
| 1610 | &err, | ||
| 1611 | ctxt->vcpu); | ||
| 1612 | else | ||
| 1613 | rc = ops->write_emulated( | ||
| 1614 | (unsigned long)c->dst.ptr, | ||
| 1615 | &c->dst.val, | ||
| 1616 | c->dst.bytes, | ||
| 1617 | &err, | ||
| 1618 | ctxt->vcpu); | ||
| 1619 | if (rc == X86EMUL_PROPAGATE_FAULT) | ||
| 1620 | emulate_pf(ctxt, | ||
| 1621 | (unsigned long)c->dst.ptr, err); | ||
| 1622 | if (rc != X86EMUL_CONTINUE) | ||
| 1623 | return rc; | ||
| 1624 | break; | ||
| 1625 | case OP_NONE: | ||
| 1626 | /* no writeback */ | ||
| 1627 | break; | ||
| 1628 | default: | ||
| 1629 | break; | ||
| 1630 | } | ||
| 1631 | return X86EMUL_CONTINUE; | ||
| 1632 | } | ||
| 1633 | |||
| 1634 | static inline void emulate_push(struct x86_emulate_ctxt *ctxt, | ||
| 1635 | struct x86_emulate_ops *ops) | ||
| 1489 | { | 1636 | { |
| 1490 | struct decode_cache *c = &ctxt->decode; | 1637 | struct decode_cache *c = &ctxt->decode; |
| 1491 | 1638 | ||
| @@ -1493,7 +1640,7 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt) | |||
| 1493 | c->dst.bytes = c->op_bytes; | 1640 | c->dst.bytes = c->op_bytes; |
| 1494 | c->dst.val = c->src.val; | 1641 | c->dst.val = c->src.val; |
| 1495 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); | 1642 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); |
| 1496 | c->dst.ptr = (void *) register_address(c, ss_base(ctxt), | 1643 | c->dst.ptr = (void *) register_address(c, ss_base(ctxt, ops), |
| 1497 | c->regs[VCPU_REGS_RSP]); | 1644 | c->regs[VCPU_REGS_RSP]); |
| 1498 | } | 1645 | } |
| 1499 | 1646 | ||
| @@ -1504,9 +1651,9 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, | |||
| 1504 | struct decode_cache *c = &ctxt->decode; | 1651 | struct decode_cache *c = &ctxt->decode; |
| 1505 | int rc; | 1652 | int rc; |
| 1506 | 1653 | ||
| 1507 | rc = ops->read_emulated(register_address(c, ss_base(ctxt), | 1654 | rc = read_emulated(ctxt, ops, register_address(c, ss_base(ctxt, ops), |
| 1508 | c->regs[VCPU_REGS_RSP]), | 1655 | c->regs[VCPU_REGS_RSP]), |
| 1509 | dest, len, ctxt->vcpu); | 1656 | dest, len); |
| 1510 | if (rc != X86EMUL_CONTINUE) | 1657 | if (rc != X86EMUL_CONTINUE) |
| 1511 | return rc; | 1658 | return rc; |
| 1512 | 1659 | ||
| @@ -1541,7 +1688,7 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, | |||
| 1541 | break; | 1688 | break; |
| 1542 | case X86EMUL_MODE_VM86: | 1689 | case X86EMUL_MODE_VM86: |
| 1543 | if (iopl < 3) { | 1690 | if (iopl < 3) { |
| 1544 | kvm_inject_gp(ctxt->vcpu, 0); | 1691 | emulate_gp(ctxt, 0); |
| 1545 | return X86EMUL_PROPAGATE_FAULT; | 1692 | return X86EMUL_PROPAGATE_FAULT; |
| 1546 | } | 1693 | } |
| 1547 | change_mask |= EFLG_IF; | 1694 | change_mask |= EFLG_IF; |
| @@ -1557,15 +1704,14 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, | |||
| 1557 | return rc; | 1704 | return rc; |
| 1558 | } | 1705 | } |
| 1559 | 1706 | ||
| 1560 | static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg) | 1707 | static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, |
| 1708 | struct x86_emulate_ops *ops, int seg) | ||
| 1561 | { | 1709 | { |
| 1562 | struct decode_cache *c = &ctxt->decode; | 1710 | struct decode_cache *c = &ctxt->decode; |
| 1563 | struct kvm_segment segment; | ||
| 1564 | 1711 | ||
| 1565 | kvm_x86_ops->get_segment(ctxt->vcpu, &segment, seg); | 1712 | c->src.val = ops->get_segment_selector(seg, ctxt->vcpu); |
| 1566 | 1713 | ||
| 1567 | c->src.val = segment.selector; | 1714 | emulate_push(ctxt, ops); |
| 1568 | emulate_push(ctxt); | ||
| 1569 | } | 1715 | } |
| 1570 | 1716 | ||
| 1571 | static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, | 1717 | static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, |
| @@ -1583,19 +1729,31 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, | |||
| 1583 | return rc; | 1729 | return rc; |
| 1584 | } | 1730 | } |
| 1585 | 1731 | ||
| 1586 | static void emulate_pusha(struct x86_emulate_ctxt *ctxt) | 1732 | static int emulate_pusha(struct x86_emulate_ctxt *ctxt, |
| 1733 | struct x86_emulate_ops *ops) | ||
| 1587 | { | 1734 | { |
| 1588 | struct decode_cache *c = &ctxt->decode; | 1735 | struct decode_cache *c = &ctxt->decode; |
| 1589 | unsigned long old_esp = c->regs[VCPU_REGS_RSP]; | 1736 | unsigned long old_esp = c->regs[VCPU_REGS_RSP]; |
| 1737 | int rc = X86EMUL_CONTINUE; | ||
| 1590 | int reg = VCPU_REGS_RAX; | 1738 | int reg = VCPU_REGS_RAX; |
| 1591 | 1739 | ||
| 1592 | while (reg <= VCPU_REGS_RDI) { | 1740 | while (reg <= VCPU_REGS_RDI) { |
| 1593 | (reg == VCPU_REGS_RSP) ? | 1741 | (reg == VCPU_REGS_RSP) ? |
| 1594 | (c->src.val = old_esp) : (c->src.val = c->regs[reg]); | 1742 | (c->src.val = old_esp) : (c->src.val = c->regs[reg]); |
| 1595 | 1743 | ||
| 1596 | emulate_push(ctxt); | 1744 | emulate_push(ctxt, ops); |
| 1745 | |||
| 1746 | rc = writeback(ctxt, ops); | ||
| 1747 | if (rc != X86EMUL_CONTINUE) | ||
| 1748 | return rc; | ||
| 1749 | |||
| 1597 | ++reg; | 1750 | ++reg; |
| 1598 | } | 1751 | } |
| 1752 | |||
| 1753 | /* Disable writeback. */ | ||
| 1754 | c->dst.type = OP_NONE; | ||
| 1755 | |||
| 1756 | return rc; | ||
| 1599 | } | 1757 | } |
| 1600 | 1758 | ||
| 1601 | static int emulate_popa(struct x86_emulate_ctxt *ctxt, | 1759 | static int emulate_popa(struct x86_emulate_ctxt *ctxt, |
| @@ -1695,14 +1853,14 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, | |||
| 1695 | old_eip = c->eip; | 1853 | old_eip = c->eip; |
| 1696 | c->eip = c->src.val; | 1854 | c->eip = c->src.val; |
| 1697 | c->src.val = old_eip; | 1855 | c->src.val = old_eip; |
| 1698 | emulate_push(ctxt); | 1856 | emulate_push(ctxt, ops); |
| 1699 | break; | 1857 | break; |
| 1700 | } | 1858 | } |
| 1701 | case 4: /* jmp abs */ | 1859 | case 4: /* jmp abs */ |
| 1702 | c->eip = c->src.val; | 1860 | c->eip = c->src.val; |
| 1703 | break; | 1861 | break; |
| 1704 | case 6: /* push */ | 1862 | case 6: /* push */ |
| 1705 | emulate_push(ctxt); | 1863 | emulate_push(ctxt, ops); |
| 1706 | break; | 1864 | break; |
| 1707 | } | 1865 | } |
| 1708 | return X86EMUL_CONTINUE; | 1866 | return X86EMUL_CONTINUE; |
| @@ -1748,145 +1906,82 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, | |||
| 1748 | return rc; | 1906 | return rc; |
| 1749 | } | 1907 | } |
| 1750 | 1908 | ||
| 1751 | static inline int writeback(struct x86_emulate_ctxt *ctxt, | ||
| 1752 | struct x86_emulate_ops *ops) | ||
| 1753 | { | ||
| 1754 | int rc; | ||
| 1755 | struct decode_cache *c = &ctxt->decode; | ||
| 1756 | |||
| 1757 | switch (c->dst.type) { | ||
| 1758 | case OP_REG: | ||
| 1759 | /* The 4-byte case *is* correct: | ||
| 1760 | * in 64-bit mode we zero-extend. | ||
| 1761 | */ | ||
| 1762 | switch (c->dst.bytes) { | ||
| 1763 | case 1: | ||
| 1764 | *(u8 *)c->dst.ptr = (u8)c->dst.val; | ||
| 1765 | break; | ||
| 1766 | case 2: | ||
| 1767 | *(u16 *)c->dst.ptr = (u16)c->dst.val; | ||
| 1768 | break; | ||
| 1769 | case 4: | ||
| 1770 | *c->dst.ptr = (u32)c->dst.val; | ||
| 1771 | break; /* 64b: zero-ext */ | ||
| 1772 | case 8: | ||
| 1773 | *c->dst.ptr = c->dst.val; | ||
| 1774 | break; | ||
| 1775 | } | ||
| 1776 | break; | ||
| 1777 | case OP_MEM: | ||
| 1778 | if (c->lock_prefix) | ||
| 1779 | rc = ops->cmpxchg_emulated( | ||
| 1780 | (unsigned long)c->dst.ptr, | ||
| 1781 | &c->dst.orig_val, | ||
| 1782 | &c->dst.val, | ||
| 1783 | c->dst.bytes, | ||
| 1784 | ctxt->vcpu); | ||
| 1785 | else | ||
| 1786 | rc = ops->write_emulated( | ||
| 1787 | (unsigned long)c->dst.ptr, | ||
| 1788 | &c->dst.val, | ||
| 1789 | c->dst.bytes, | ||
| 1790 | ctxt->vcpu); | ||
| 1791 | if (rc != X86EMUL_CONTINUE) | ||
| 1792 | return rc; | ||
| 1793 | break; | ||
| 1794 | case OP_NONE: | ||
| 1795 | /* no writeback */ | ||
| 1796 | break; | ||
| 1797 | default: | ||
| 1798 | break; | ||
| 1799 | } | ||
| 1800 | return X86EMUL_CONTINUE; | ||
| 1801 | } | ||
| 1802 | |||
| 1803 | static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) | ||
| 1804 | { | ||
| 1805 | u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask); | ||
| 1806 | /* | ||
| 1807 | * an sti; sti; sequence only disable interrupts for the first | ||
| 1808 | * instruction. So, if the last instruction, be it emulated or | ||
| 1809 | * not, left the system with the INT_STI flag enabled, it | ||
| 1810 | * means that the last instruction is an sti. We should not | ||
| 1811 | * leave the flag on in this case. The same goes for mov ss | ||
| 1812 | */ | ||
| 1813 | if (!(int_shadow & mask)) | ||
| 1814 | ctxt->interruptibility = mask; | ||
| 1815 | } | ||
| 1816 | |||
| 1817 | static inline void | 1909 | static inline void |
| 1818 | setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, | 1910 | setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, |
| 1819 | struct kvm_segment *cs, struct kvm_segment *ss) | 1911 | struct x86_emulate_ops *ops, struct desc_struct *cs, |
| 1912 | struct desc_struct *ss) | ||
| 1820 | { | 1913 | { |
| 1821 | memset(cs, 0, sizeof(struct kvm_segment)); | 1914 | memset(cs, 0, sizeof(struct desc_struct)); |
| 1822 | kvm_x86_ops->get_segment(ctxt->vcpu, cs, VCPU_SREG_CS); | 1915 | ops->get_cached_descriptor(cs, VCPU_SREG_CS, ctxt->vcpu); |
| 1823 | memset(ss, 0, sizeof(struct kvm_segment)); | 1916 | memset(ss, 0, sizeof(struct desc_struct)); |
| 1824 | 1917 | ||
| 1825 | cs->l = 0; /* will be adjusted later */ | 1918 | cs->l = 0; /* will be adjusted later */ |
| 1826 | cs->base = 0; /* flat segment */ | 1919 | set_desc_base(cs, 0); /* flat segment */ |
| 1827 | cs->g = 1; /* 4kb granularity */ | 1920 | cs->g = 1; /* 4kb granularity */ |
| 1828 | cs->limit = 0xffffffff; /* 4GB limit */ | 1921 | set_desc_limit(cs, 0xfffff); /* 4GB limit */ |
| 1829 | cs->type = 0x0b; /* Read, Execute, Accessed */ | 1922 | cs->type = 0x0b; /* Read, Execute, Accessed */ |
| 1830 | cs->s = 1; | 1923 | cs->s = 1; |
| 1831 | cs->dpl = 0; /* will be adjusted later */ | 1924 | cs->dpl = 0; /* will be adjusted later */ |
| 1832 | cs->present = 1; | 1925 | cs->p = 1; |
| 1833 | cs->db = 1; | 1926 | cs->d = 1; |
| 1834 | 1927 | ||
| 1835 | ss->unusable = 0; | 1928 | set_desc_base(ss, 0); /* flat segment */ |
| 1836 | ss->base = 0; /* flat segment */ | 1929 | set_desc_limit(ss, 0xfffff); /* 4GB limit */ |
| 1837 | ss->limit = 0xffffffff; /* 4GB limit */ | ||
| 1838 | ss->g = 1; /* 4kb granularity */ | 1930 | ss->g = 1; /* 4kb granularity */ |
| 1839 | ss->s = 1; | 1931 | ss->s = 1; |
| 1840 | ss->type = 0x03; /* Read/Write, Accessed */ | 1932 | ss->type = 0x03; /* Read/Write, Accessed */ |
| 1841 | ss->db = 1; /* 32bit stack segment */ | 1933 | ss->d = 1; /* 32bit stack segment */ |
| 1842 | ss->dpl = 0; | 1934 | ss->dpl = 0; |
| 1843 | ss->present = 1; | 1935 | ss->p = 1; |
| 1844 | } | 1936 | } |
| 1845 | 1937 | ||
| 1846 | static int | 1938 | static int |
| 1847 | emulate_syscall(struct x86_emulate_ctxt *ctxt) | 1939 | emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) |
| 1848 | { | 1940 | { |
| 1849 | struct decode_cache *c = &ctxt->decode; | 1941 | struct decode_cache *c = &ctxt->decode; |
| 1850 | struct kvm_segment cs, ss; | 1942 | struct desc_struct cs, ss; |
| 1851 | u64 msr_data; | 1943 | u64 msr_data; |
| 1944 | u16 cs_sel, ss_sel; | ||
| 1852 | 1945 | ||
| 1853 | /* syscall is not available in real mode */ | 1946 | /* syscall is not available in real mode */ |
| 1854 | if (ctxt->mode == X86EMUL_MODE_REAL || | 1947 | if (ctxt->mode == X86EMUL_MODE_REAL || |
| 1855 | ctxt->mode == X86EMUL_MODE_VM86) { | 1948 | ctxt->mode == X86EMUL_MODE_VM86) { |
| 1856 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 1949 | emulate_ud(ctxt); |
| 1857 | return X86EMUL_PROPAGATE_FAULT; | 1950 | return X86EMUL_PROPAGATE_FAULT; |
| 1858 | } | 1951 | } |
| 1859 | 1952 | ||
| 1860 | setup_syscalls_segments(ctxt, &cs, &ss); | 1953 | setup_syscalls_segments(ctxt, ops, &cs, &ss); |
| 1861 | 1954 | ||
| 1862 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); | 1955 | ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); |
| 1863 | msr_data >>= 32; | 1956 | msr_data >>= 32; |
| 1864 | cs.selector = (u16)(msr_data & 0xfffc); | 1957 | cs_sel = (u16)(msr_data & 0xfffc); |
| 1865 | ss.selector = (u16)(msr_data + 8); | 1958 | ss_sel = (u16)(msr_data + 8); |
| 1866 | 1959 | ||
| 1867 | if (is_long_mode(ctxt->vcpu)) { | 1960 | if (is_long_mode(ctxt->vcpu)) { |
| 1868 | cs.db = 0; | 1961 | cs.d = 0; |
| 1869 | cs.l = 1; | 1962 | cs.l = 1; |
| 1870 | } | 1963 | } |
| 1871 | kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS); | 1964 | ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu); |
| 1872 | kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS); | 1965 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); |
| 1966 | ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu); | ||
| 1967 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); | ||
| 1873 | 1968 | ||
| 1874 | c->regs[VCPU_REGS_RCX] = c->eip; | 1969 | c->regs[VCPU_REGS_RCX] = c->eip; |
| 1875 | if (is_long_mode(ctxt->vcpu)) { | 1970 | if (is_long_mode(ctxt->vcpu)) { |
| 1876 | #ifdef CONFIG_X86_64 | 1971 | #ifdef CONFIG_X86_64 |
| 1877 | c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; | 1972 | c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; |
| 1878 | 1973 | ||
| 1879 | kvm_x86_ops->get_msr(ctxt->vcpu, | 1974 | ops->get_msr(ctxt->vcpu, |
| 1880 | ctxt->mode == X86EMUL_MODE_PROT64 ? | 1975 | ctxt->mode == X86EMUL_MODE_PROT64 ? |
| 1881 | MSR_LSTAR : MSR_CSTAR, &msr_data); | 1976 | MSR_LSTAR : MSR_CSTAR, &msr_data); |
| 1882 | c->eip = msr_data; | 1977 | c->eip = msr_data; |
| 1883 | 1978 | ||
| 1884 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data); | 1979 | ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data); |
| 1885 | ctxt->eflags &= ~(msr_data | EFLG_RF); | 1980 | ctxt->eflags &= ~(msr_data | EFLG_RF); |
| 1886 | #endif | 1981 | #endif |
| 1887 | } else { | 1982 | } else { |
| 1888 | /* legacy mode */ | 1983 | /* legacy mode */ |
| 1889 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); | 1984 | ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); |
| 1890 | c->eip = (u32)msr_data; | 1985 | c->eip = (u32)msr_data; |
| 1891 | 1986 | ||
| 1892 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); | 1987 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); |
| @@ -1896,15 +1991,16 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt) | |||
| 1896 | } | 1991 | } |
| 1897 | 1992 | ||
| 1898 | static int | 1993 | static int |
| 1899 | emulate_sysenter(struct x86_emulate_ctxt *ctxt) | 1994 | emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) |
| 1900 | { | 1995 | { |
| 1901 | struct decode_cache *c = &ctxt->decode; | 1996 | struct decode_cache *c = &ctxt->decode; |
| 1902 | struct kvm_segment cs, ss; | 1997 | struct desc_struct cs, ss; |
| 1903 | u64 msr_data; | 1998 | u64 msr_data; |
| 1999 | u16 cs_sel, ss_sel; | ||
| 1904 | 2000 | ||
| 1905 | /* inject #GP if in real mode */ | 2001 | /* inject #GP if in real mode */ |
| 1906 | if (ctxt->mode == X86EMUL_MODE_REAL) { | 2002 | if (ctxt->mode == X86EMUL_MODE_REAL) { |
| 1907 | kvm_inject_gp(ctxt->vcpu, 0); | 2003 | emulate_gp(ctxt, 0); |
| 1908 | return X86EMUL_PROPAGATE_FAULT; | 2004 | return X86EMUL_PROPAGATE_FAULT; |
| 1909 | } | 2005 | } |
| 1910 | 2006 | ||
| @@ -1912,67 +2008,70 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt) | |||
| 1912 | * Therefore, we inject an #UD. | 2008 | * Therefore, we inject an #UD. |
| 1913 | */ | 2009 | */ |
| 1914 | if (ctxt->mode == X86EMUL_MODE_PROT64) { | 2010 | if (ctxt->mode == X86EMUL_MODE_PROT64) { |
| 1915 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 2011 | emulate_ud(ctxt); |
| 1916 | return X86EMUL_PROPAGATE_FAULT; | 2012 | return X86EMUL_PROPAGATE_FAULT; |
| 1917 | } | 2013 | } |
| 1918 | 2014 | ||
| 1919 | setup_syscalls_segments(ctxt, &cs, &ss); | 2015 | setup_syscalls_segments(ctxt, ops, &cs, &ss); |
| 1920 | 2016 | ||
| 1921 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); | 2017 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); |
| 1922 | switch (ctxt->mode) { | 2018 | switch (ctxt->mode) { |
| 1923 | case X86EMUL_MODE_PROT32: | 2019 | case X86EMUL_MODE_PROT32: |
| 1924 | if ((msr_data & 0xfffc) == 0x0) { | 2020 | if ((msr_data & 0xfffc) == 0x0) { |
| 1925 | kvm_inject_gp(ctxt->vcpu, 0); | 2021 | emulate_gp(ctxt, 0); |
| 1926 | return X86EMUL_PROPAGATE_FAULT; | 2022 | return X86EMUL_PROPAGATE_FAULT; |
| 1927 | } | 2023 | } |
| 1928 | break; | 2024 | break; |
| 1929 | case X86EMUL_MODE_PROT64: | 2025 | case X86EMUL_MODE_PROT64: |
| 1930 | if (msr_data == 0x0) { | 2026 | if (msr_data == 0x0) { |
| 1931 | kvm_inject_gp(ctxt->vcpu, 0); | 2027 | emulate_gp(ctxt, 0); |
| 1932 | return X86EMUL_PROPAGATE_FAULT; | 2028 | return X86EMUL_PROPAGATE_FAULT; |
| 1933 | } | 2029 | } |
| 1934 | break; | 2030 | break; |
| 1935 | } | 2031 | } |
| 1936 | 2032 | ||
| 1937 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); | 2033 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); |
| 1938 | cs.selector = (u16)msr_data; | 2034 | cs_sel = (u16)msr_data; |
| 1939 | cs.selector &= ~SELECTOR_RPL_MASK; | 2035 | cs_sel &= ~SELECTOR_RPL_MASK; |
| 1940 | ss.selector = cs.selector + 8; | 2036 | ss_sel = cs_sel + 8; |
| 1941 | ss.selector &= ~SELECTOR_RPL_MASK; | 2037 | ss_sel &= ~SELECTOR_RPL_MASK; |
| 1942 | if (ctxt->mode == X86EMUL_MODE_PROT64 | 2038 | if (ctxt->mode == X86EMUL_MODE_PROT64 |
| 1943 | || is_long_mode(ctxt->vcpu)) { | 2039 | || is_long_mode(ctxt->vcpu)) { |
| 1944 | cs.db = 0; | 2040 | cs.d = 0; |
| 1945 | cs.l = 1; | 2041 | cs.l = 1; |
| 1946 | } | 2042 | } |
| 1947 | 2043 | ||
| 1948 | kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS); | 2044 | ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu); |
| 1949 | kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS); | 2045 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); |
| 2046 | ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu); | ||
| 2047 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); | ||
| 1950 | 2048 | ||
| 1951 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); | 2049 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); |
| 1952 | c->eip = msr_data; | 2050 | c->eip = msr_data; |
| 1953 | 2051 | ||
| 1954 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); | 2052 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); |
| 1955 | c->regs[VCPU_REGS_RSP] = msr_data; | 2053 | c->regs[VCPU_REGS_RSP] = msr_data; |
| 1956 | 2054 | ||
| 1957 | return X86EMUL_CONTINUE; | 2055 | return X86EMUL_CONTINUE; |
| 1958 | } | 2056 | } |
| 1959 | 2057 | ||
| 1960 | static int | 2058 | static int |
| 1961 | emulate_sysexit(struct x86_emulate_ctxt *ctxt) | 2059 | emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) |
| 1962 | { | 2060 | { |
| 1963 | struct decode_cache *c = &ctxt->decode; | 2061 | struct decode_cache *c = &ctxt->decode; |
| 1964 | struct kvm_segment cs, ss; | 2062 | struct desc_struct cs, ss; |
| 1965 | u64 msr_data; | 2063 | u64 msr_data; |
| 1966 | int usermode; | 2064 | int usermode; |
| 2065 | u16 cs_sel, ss_sel; | ||
| 1967 | 2066 | ||
| 1968 | /* inject #GP if in real mode or Virtual 8086 mode */ | 2067 | /* inject #GP if in real mode or Virtual 8086 mode */ |
| 1969 | if (ctxt->mode == X86EMUL_MODE_REAL || | 2068 | if (ctxt->mode == X86EMUL_MODE_REAL || |
| 1970 | ctxt->mode == X86EMUL_MODE_VM86) { | 2069 | ctxt->mode == X86EMUL_MODE_VM86) { |
| 1971 | kvm_inject_gp(ctxt->vcpu, 0); | 2070 | emulate_gp(ctxt, 0); |
| 1972 | return X86EMUL_PROPAGATE_FAULT; | 2071 | return X86EMUL_PROPAGATE_FAULT; |
| 1973 | } | 2072 | } |
| 1974 | 2073 | ||
| 1975 | setup_syscalls_segments(ctxt, &cs, &ss); | 2074 | setup_syscalls_segments(ctxt, ops, &cs, &ss); |
| 1976 | 2075 | ||
| 1977 | if ((c->rex_prefix & 0x8) != 0x0) | 2076 | if ((c->rex_prefix & 0x8) != 0x0) |
| 1978 | usermode = X86EMUL_MODE_PROT64; | 2077 | usermode = X86EMUL_MODE_PROT64; |
| @@ -1981,35 +2080,37 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
| 1981 | 2080 | ||
| 1982 | cs.dpl = 3; | 2081 | cs.dpl = 3; |
| 1983 | ss.dpl = 3; | 2082 | ss.dpl = 3; |
| 1984 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); | 2083 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); |
| 1985 | switch (usermode) { | 2084 | switch (usermode) { |
| 1986 | case X86EMUL_MODE_PROT32: | 2085 | case X86EMUL_MODE_PROT32: |
| 1987 | cs.selector = (u16)(msr_data + 16); | 2086 | cs_sel = (u16)(msr_data + 16); |
| 1988 | if ((msr_data & 0xfffc) == 0x0) { | 2087 | if ((msr_data & 0xfffc) == 0x0) { |
| 1989 | kvm_inject_gp(ctxt->vcpu, 0); | 2088 | emulate_gp(ctxt, 0); |
| 1990 | return X86EMUL_PROPAGATE_FAULT; | 2089 | return X86EMUL_PROPAGATE_FAULT; |
| 1991 | } | 2090 | } |
| 1992 | ss.selector = (u16)(msr_data + 24); | 2091 | ss_sel = (u16)(msr_data + 24); |
| 1993 | break; | 2092 | break; |
| 1994 | case X86EMUL_MODE_PROT64: | 2093 | case X86EMUL_MODE_PROT64: |
| 1995 | cs.selector = (u16)(msr_data + 32); | 2094 | cs_sel = (u16)(msr_data + 32); |
| 1996 | if (msr_data == 0x0) { | 2095 | if (msr_data == 0x0) { |
| 1997 | kvm_inject_gp(ctxt->vcpu, 0); | 2096 | emulate_gp(ctxt, 0); |
| 1998 | return X86EMUL_PROPAGATE_FAULT; | 2097 | return X86EMUL_PROPAGATE_FAULT; |
| 1999 | } | 2098 | } |
| 2000 | ss.selector = cs.selector + 8; | 2099 | ss_sel = cs_sel + 8; |
| 2001 | cs.db = 0; | 2100 | cs.d = 0; |
| 2002 | cs.l = 1; | 2101 | cs.l = 1; |
| 2003 | break; | 2102 | break; |
| 2004 | } | 2103 | } |
| 2005 | cs.selector |= SELECTOR_RPL_MASK; | 2104 | cs_sel |= SELECTOR_RPL_MASK; |
| 2006 | ss.selector |= SELECTOR_RPL_MASK; | 2105 | ss_sel |= SELECTOR_RPL_MASK; |
| 2007 | 2106 | ||
| 2008 | kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS); | 2107 | ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu); |
| 2009 | kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS); | 2108 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); |
| 2109 | ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu); | ||
| 2110 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); | ||
| 2010 | 2111 | ||
| 2011 | c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX]; | 2112 | c->eip = c->regs[VCPU_REGS_RDX]; |
| 2012 | c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX]; | 2113 | c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX]; |
| 2013 | 2114 | ||
| 2014 | return X86EMUL_CONTINUE; | 2115 | return X86EMUL_CONTINUE; |
| 2015 | } | 2116 | } |
| @@ -2030,25 +2131,25 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | |||
| 2030 | struct x86_emulate_ops *ops, | 2131 | struct x86_emulate_ops *ops, |
| 2031 | u16 port, u16 len) | 2132 | u16 port, u16 len) |
| 2032 | { | 2133 | { |
| 2033 | struct kvm_segment tr_seg; | 2134 | struct desc_struct tr_seg; |
| 2034 | int r; | 2135 | int r; |
| 2035 | u16 io_bitmap_ptr; | 2136 | u16 io_bitmap_ptr; |
| 2036 | u8 perm, bit_idx = port & 0x7; | 2137 | u8 perm, bit_idx = port & 0x7; |
| 2037 | unsigned mask = (1 << len) - 1; | 2138 | unsigned mask = (1 << len) - 1; |
| 2038 | 2139 | ||
| 2039 | kvm_get_segment(ctxt->vcpu, &tr_seg, VCPU_SREG_TR); | 2140 | ops->get_cached_descriptor(&tr_seg, VCPU_SREG_TR, ctxt->vcpu); |
| 2040 | if (tr_seg.unusable) | 2141 | if (!tr_seg.p) |
| 2041 | return false; | 2142 | return false; |
| 2042 | if (tr_seg.limit < 103) | 2143 | if (desc_limit_scaled(&tr_seg) < 103) |
| 2043 | return false; | 2144 | return false; |
| 2044 | r = ops->read_std(tr_seg.base + 102, &io_bitmap_ptr, 2, ctxt->vcpu, | 2145 | r = ops->read_std(get_desc_base(&tr_seg) + 102, &io_bitmap_ptr, 2, |
| 2045 | NULL); | 2146 | ctxt->vcpu, NULL); |
| 2046 | if (r != X86EMUL_CONTINUE) | 2147 | if (r != X86EMUL_CONTINUE) |
| 2047 | return false; | 2148 | return false; |
| 2048 | if (io_bitmap_ptr + port/8 > tr_seg.limit) | 2149 | if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg)) |
| 2049 | return false; | 2150 | return false; |
| 2050 | r = ops->read_std(tr_seg.base + io_bitmap_ptr + port/8, &perm, 1, | 2151 | r = ops->read_std(get_desc_base(&tr_seg) + io_bitmap_ptr + port/8, |
| 2051 | ctxt->vcpu, NULL); | 2152 | &perm, 1, ctxt->vcpu, NULL); |
| 2052 | if (r != X86EMUL_CONTINUE) | 2153 | if (r != X86EMUL_CONTINUE) |
| 2053 | return false; | 2154 | return false; |
| 2054 | if ((perm >> bit_idx) & mask) | 2155 | if ((perm >> bit_idx) & mask) |
| @@ -2066,17 +2167,6 @@ static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt, | |||
| 2066 | return true; | 2167 | return true; |
| 2067 | } | 2168 | } |
| 2068 | 2169 | ||
| 2069 | static u32 get_cached_descriptor_base(struct x86_emulate_ctxt *ctxt, | ||
| 2070 | struct x86_emulate_ops *ops, | ||
| 2071 | int seg) | ||
| 2072 | { | ||
| 2073 | struct desc_struct desc; | ||
| 2074 | if (ops->get_cached_descriptor(&desc, seg, ctxt->vcpu)) | ||
| 2075 | return get_desc_base(&desc); | ||
| 2076 | else | ||
| 2077 | return ~0; | ||
| 2078 | } | ||
| 2079 | |||
| 2080 | static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, | 2170 | static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, |
| 2081 | struct x86_emulate_ops *ops, | 2171 | struct x86_emulate_ops *ops, |
| 2082 | struct tss_segment_16 *tss) | 2172 | struct tss_segment_16 *tss) |
| @@ -2165,7 +2255,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, | |||
| 2165 | &err); | 2255 | &err); |
| 2166 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2256 | if (ret == X86EMUL_PROPAGATE_FAULT) { |
| 2167 | /* FIXME: need to provide precise fault address */ | 2257 | /* FIXME: need to provide precise fault address */ |
| 2168 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | 2258 | emulate_pf(ctxt, old_tss_base, err); |
| 2169 | return ret; | 2259 | return ret; |
| 2170 | } | 2260 | } |
| 2171 | 2261 | ||
| @@ -2175,7 +2265,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, | |||
| 2175 | &err); | 2265 | &err); |
| 2176 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2266 | if (ret == X86EMUL_PROPAGATE_FAULT) { |
| 2177 | /* FIXME: need to provide precise fault address */ | 2267 | /* FIXME: need to provide precise fault address */ |
| 2178 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | 2268 | emulate_pf(ctxt, old_tss_base, err); |
| 2179 | return ret; | 2269 | return ret; |
| 2180 | } | 2270 | } |
| 2181 | 2271 | ||
| @@ -2183,7 +2273,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, | |||
| 2183 | &err); | 2273 | &err); |
| 2184 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2274 | if (ret == X86EMUL_PROPAGATE_FAULT) { |
| 2185 | /* FIXME: need to provide precise fault address */ | 2275 | /* FIXME: need to provide precise fault address */ |
| 2186 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | 2276 | emulate_pf(ctxt, new_tss_base, err); |
| 2187 | return ret; | 2277 | return ret; |
| 2188 | } | 2278 | } |
| 2189 | 2279 | ||
| @@ -2196,7 +2286,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, | |||
| 2196 | ctxt->vcpu, &err); | 2286 | ctxt->vcpu, &err); |
| 2197 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2287 | if (ret == X86EMUL_PROPAGATE_FAULT) { |
| 2198 | /* FIXME: need to provide precise fault address */ | 2288 | /* FIXME: need to provide precise fault address */ |
| 2199 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | 2289 | emulate_pf(ctxt, new_tss_base, err); |
| 2200 | return ret; | 2290 | return ret; |
| 2201 | } | 2291 | } |
| 2202 | } | 2292 | } |
| @@ -2238,7 +2328,10 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | |||
| 2238 | struct decode_cache *c = &ctxt->decode; | 2328 | struct decode_cache *c = &ctxt->decode; |
| 2239 | int ret; | 2329 | int ret; |
| 2240 | 2330 | ||
| 2241 | ops->set_cr(3, tss->cr3, ctxt->vcpu); | 2331 | if (ops->set_cr(3, tss->cr3, ctxt->vcpu)) { |
| 2332 | emulate_gp(ctxt, 0); | ||
| 2333 | return X86EMUL_PROPAGATE_FAULT; | ||
| 2334 | } | ||
| 2242 | c->eip = tss->eip; | 2335 | c->eip = tss->eip; |
| 2243 | ctxt->eflags = tss->eflags | 2; | 2336 | ctxt->eflags = tss->eflags | 2; |
| 2244 | c->regs[VCPU_REGS_RAX] = tss->eax; | 2337 | c->regs[VCPU_REGS_RAX] = tss->eax; |
| @@ -2304,7 +2397,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
| 2304 | &err); | 2397 | &err); |
| 2305 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2398 | if (ret == X86EMUL_PROPAGATE_FAULT) { |
| 2306 | /* FIXME: need to provide precise fault address */ | 2399 | /* FIXME: need to provide precise fault address */ |
| 2307 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | 2400 | emulate_pf(ctxt, old_tss_base, err); |
| 2308 | return ret; | 2401 | return ret; |
| 2309 | } | 2402 | } |
| 2310 | 2403 | ||
| @@ -2314,7 +2407,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
| 2314 | &err); | 2407 | &err); |
| 2315 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2408 | if (ret == X86EMUL_PROPAGATE_FAULT) { |
| 2316 | /* FIXME: need to provide precise fault address */ | 2409 | /* FIXME: need to provide precise fault address */ |
| 2317 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | 2410 | emulate_pf(ctxt, old_tss_base, err); |
| 2318 | return ret; | 2411 | return ret; |
| 2319 | } | 2412 | } |
| 2320 | 2413 | ||
| @@ -2322,7 +2415,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
| 2322 | &err); | 2415 | &err); |
| 2323 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2416 | if (ret == X86EMUL_PROPAGATE_FAULT) { |
| 2324 | /* FIXME: need to provide precise fault address */ | 2417 | /* FIXME: need to provide precise fault address */ |
| 2325 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | 2418 | emulate_pf(ctxt, new_tss_base, err); |
| 2326 | return ret; | 2419 | return ret; |
| 2327 | } | 2420 | } |
| 2328 | 2421 | ||
| @@ -2335,7 +2428,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
| 2335 | ctxt->vcpu, &err); | 2428 | ctxt->vcpu, &err); |
| 2336 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2429 | if (ret == X86EMUL_PROPAGATE_FAULT) { |
| 2337 | /* FIXME: need to provide precise fault address */ | 2430 | /* FIXME: need to provide precise fault address */ |
| 2338 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | 2431 | emulate_pf(ctxt, new_tss_base, err); |
| 2339 | return ret; | 2432 | return ret; |
| 2340 | } | 2433 | } |
| 2341 | } | 2434 | } |
| @@ -2352,7 +2445,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
| 2352 | int ret; | 2445 | int ret; |
| 2353 | u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); | 2446 | u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); |
| 2354 | ulong old_tss_base = | 2447 | ulong old_tss_base = |
| 2355 | get_cached_descriptor_base(ctxt, ops, VCPU_SREG_TR); | 2448 | ops->get_cached_segment_base(VCPU_SREG_TR, ctxt->vcpu); |
| 2356 | u32 desc_limit; | 2449 | u32 desc_limit; |
| 2357 | 2450 | ||
| 2358 | /* FIXME: old_tss_base == ~0 ? */ | 2451 | /* FIXME: old_tss_base == ~0 ? */ |
| @@ -2369,7 +2462,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
| 2369 | if (reason != TASK_SWITCH_IRET) { | 2462 | if (reason != TASK_SWITCH_IRET) { |
| 2370 | if ((tss_selector & 3) > next_tss_desc.dpl || | 2463 | if ((tss_selector & 3) > next_tss_desc.dpl || |
| 2371 | ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) { | 2464 | ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) { |
| 2372 | kvm_inject_gp(ctxt->vcpu, 0); | 2465 | emulate_gp(ctxt, 0); |
| 2373 | return X86EMUL_PROPAGATE_FAULT; | 2466 | return X86EMUL_PROPAGATE_FAULT; |
| 2374 | } | 2467 | } |
| 2375 | } | 2468 | } |
| @@ -2378,8 +2471,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
| 2378 | if (!next_tss_desc.p || | 2471 | if (!next_tss_desc.p || |
| 2379 | ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || | 2472 | ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || |
| 2380 | desc_limit < 0x2b)) { | 2473 | desc_limit < 0x2b)) { |
| 2381 | kvm_queue_exception_e(ctxt->vcpu, TS_VECTOR, | 2474 | emulate_ts(ctxt, tss_selector & 0xfffc); |
| 2382 | tss_selector & 0xfffc); | ||
| 2383 | return X86EMUL_PROPAGATE_FAULT; | 2475 | return X86EMUL_PROPAGATE_FAULT; |
| 2384 | } | 2476 | } |
| 2385 | 2477 | ||
| @@ -2425,7 +2517,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
| 2425 | c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; | 2517 | c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; |
| 2426 | c->lock_prefix = 0; | 2518 | c->lock_prefix = 0; |
| 2427 | c->src.val = (unsigned long) error_code; | 2519 | c->src.val = (unsigned long) error_code; |
| 2428 | emulate_push(ctxt); | 2520 | emulate_push(ctxt, ops); |
| 2429 | } | 2521 | } |
| 2430 | 2522 | ||
| 2431 | return ret; | 2523 | return ret; |
| @@ -2439,18 +2531,16 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | |||
| 2439 | struct decode_cache *c = &ctxt->decode; | 2531 | struct decode_cache *c = &ctxt->decode; |
| 2440 | int rc; | 2532 | int rc; |
| 2441 | 2533 | ||
| 2442 | memset(c, 0, sizeof(struct decode_cache)); | ||
| 2443 | c->eip = ctxt->eip; | 2534 | c->eip = ctxt->eip; |
| 2444 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | ||
| 2445 | c->dst.type = OP_NONE; | 2535 | c->dst.type = OP_NONE; |
| 2446 | 2536 | ||
| 2447 | rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, | 2537 | rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, |
| 2448 | has_error_code, error_code); | 2538 | has_error_code, error_code); |
| 2449 | 2539 | ||
| 2450 | if (rc == X86EMUL_CONTINUE) { | 2540 | if (rc == X86EMUL_CONTINUE) { |
| 2451 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); | ||
| 2452 | kvm_rip_write(ctxt->vcpu, c->eip); | ||
| 2453 | rc = writeback(ctxt, ops); | 2541 | rc = writeback(ctxt, ops); |
| 2542 | if (rc == X86EMUL_CONTINUE) | ||
| 2543 | ctxt->eip = c->eip; | ||
| 2454 | } | 2544 | } |
| 2455 | 2545 | ||
| 2456 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | 2546 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; |
| @@ -2474,29 +2564,22 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
| 2474 | int rc = X86EMUL_CONTINUE; | 2564 | int rc = X86EMUL_CONTINUE; |
| 2475 | int saved_dst_type = c->dst.type; | 2565 | int saved_dst_type = c->dst.type; |
| 2476 | 2566 | ||
| 2477 | ctxt->interruptibility = 0; | 2567 | ctxt->decode.mem_read.pos = 0; |
| 2478 | |||
| 2479 | /* Shadow copy of register state. Committed on successful emulation. | ||
| 2480 | * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't | ||
| 2481 | * modify them. | ||
| 2482 | */ | ||
| 2483 | |||
| 2484 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | ||
| 2485 | 2568 | ||
| 2486 | if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { | 2569 | if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { |
| 2487 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 2570 | emulate_ud(ctxt); |
| 2488 | goto done; | 2571 | goto done; |
| 2489 | } | 2572 | } |
| 2490 | 2573 | ||
| 2491 | /* LOCK prefix is allowed only with some instructions */ | 2574 | /* LOCK prefix is allowed only with some instructions */ |
| 2492 | if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) { | 2575 | if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) { |
| 2493 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 2576 | emulate_ud(ctxt); |
| 2494 | goto done; | 2577 | goto done; |
| 2495 | } | 2578 | } |
| 2496 | 2579 | ||
| 2497 | /* Privileged instruction can be executed only in CPL=0 */ | 2580 | /* Privileged instruction can be executed only in CPL=0 */ |
| 2498 | if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { | 2581 | if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { |
| 2499 | kvm_inject_gp(ctxt->vcpu, 0); | 2582 | emulate_gp(ctxt, 0); |
| 2500 | goto done; | 2583 | goto done; |
| 2501 | } | 2584 | } |
| 2502 | 2585 | ||
| @@ -2506,7 +2589,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
| 2506 | if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { | 2589 | if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { |
| 2507 | string_done: | 2590 | string_done: |
| 2508 | ctxt->restart = false; | 2591 | ctxt->restart = false; |
| 2509 | kvm_rip_write(ctxt->vcpu, c->eip); | 2592 | ctxt->eip = c->eip; |
| 2510 | goto done; | 2593 | goto done; |
| 2511 | } | 2594 | } |
| 2512 | /* The second termination condition only applies for REPE | 2595 | /* The second termination condition only applies for REPE |
| @@ -2529,20 +2612,16 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
| 2529 | } | 2612 | } |
| 2530 | 2613 | ||
| 2531 | if (c->src.type == OP_MEM) { | 2614 | if (c->src.type == OP_MEM) { |
| 2532 | rc = ops->read_emulated((unsigned long)c->src.ptr, | 2615 | rc = read_emulated(ctxt, ops, (unsigned long)c->src.ptr, |
| 2533 | &c->src.val, | 2616 | c->src.valptr, c->src.bytes); |
| 2534 | c->src.bytes, | ||
| 2535 | ctxt->vcpu); | ||
| 2536 | if (rc != X86EMUL_CONTINUE) | 2617 | if (rc != X86EMUL_CONTINUE) |
| 2537 | goto done; | 2618 | goto done; |
| 2538 | c->src.orig_val = c->src.val; | 2619 | c->src.orig_val = c->src.val; |
| 2539 | } | 2620 | } |
| 2540 | 2621 | ||
| 2541 | if (c->src2.type == OP_MEM) { | 2622 | if (c->src2.type == OP_MEM) { |
| 2542 | rc = ops->read_emulated((unsigned long)c->src2.ptr, | 2623 | rc = read_emulated(ctxt, ops, (unsigned long)c->src2.ptr, |
| 2543 | &c->src2.val, | 2624 | &c->src2.val, c->src2.bytes); |
| 2544 | c->src2.bytes, | ||
| 2545 | ctxt->vcpu); | ||
| 2546 | if (rc != X86EMUL_CONTINUE) | 2625 | if (rc != X86EMUL_CONTINUE) |
| 2547 | goto done; | 2626 | goto done; |
| 2548 | } | 2627 | } |
| @@ -2553,8 +2632,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
| 2553 | 2632 | ||
| 2554 | if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { | 2633 | if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { |
| 2555 | /* optimisation - avoid slow emulated read if Mov */ | 2634 | /* optimisation - avoid slow emulated read if Mov */ |
| 2556 | rc = ops->read_emulated((unsigned long)c->dst.ptr, &c->dst.val, | 2635 | rc = read_emulated(ctxt, ops, (unsigned long)c->dst.ptr, |
| 2557 | c->dst.bytes, ctxt->vcpu); | 2636 | &c->dst.val, c->dst.bytes); |
| 2558 | if (rc != X86EMUL_CONTINUE) | 2637 | if (rc != X86EMUL_CONTINUE) |
| 2559 | goto done; | 2638 | goto done; |
| 2560 | } | 2639 | } |
| @@ -2571,7 +2650,7 @@ special_insn: | |||
| 2571 | emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); | 2650 | emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); |
| 2572 | break; | 2651 | break; |
| 2573 | case 0x06: /* push es */ | 2652 | case 0x06: /* push es */ |
| 2574 | emulate_push_sreg(ctxt, VCPU_SREG_ES); | 2653 | emulate_push_sreg(ctxt, ops, VCPU_SREG_ES); |
| 2575 | break; | 2654 | break; |
| 2576 | case 0x07: /* pop es */ | 2655 | case 0x07: /* pop es */ |
| 2577 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); | 2656 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); |
| @@ -2583,14 +2662,14 @@ special_insn: | |||
| 2583 | emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); | 2662 | emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); |
| 2584 | break; | 2663 | break; |
| 2585 | case 0x0e: /* push cs */ | 2664 | case 0x0e: /* push cs */ |
| 2586 | emulate_push_sreg(ctxt, VCPU_SREG_CS); | 2665 | emulate_push_sreg(ctxt, ops, VCPU_SREG_CS); |
| 2587 | break; | 2666 | break; |
| 2588 | case 0x10 ... 0x15: | 2667 | case 0x10 ... 0x15: |
| 2589 | adc: /* adc */ | 2668 | adc: /* adc */ |
| 2590 | emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); | 2669 | emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); |
| 2591 | break; | 2670 | break; |
| 2592 | case 0x16: /* push ss */ | 2671 | case 0x16: /* push ss */ |
| 2593 | emulate_push_sreg(ctxt, VCPU_SREG_SS); | 2672 | emulate_push_sreg(ctxt, ops, VCPU_SREG_SS); |
| 2594 | break; | 2673 | break; |
| 2595 | case 0x17: /* pop ss */ | 2674 | case 0x17: /* pop ss */ |
| 2596 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); | 2675 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); |
| @@ -2602,7 +2681,7 @@ special_insn: | |||
| 2602 | emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); | 2681 | emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); |
| 2603 | break; | 2682 | break; |
| 2604 | case 0x1e: /* push ds */ | 2683 | case 0x1e: /* push ds */ |
| 2605 | emulate_push_sreg(ctxt, VCPU_SREG_DS); | 2684 | emulate_push_sreg(ctxt, ops, VCPU_SREG_DS); |
| 2606 | break; | 2685 | break; |
| 2607 | case 0x1f: /* pop ds */ | 2686 | case 0x1f: /* pop ds */ |
| 2608 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); | 2687 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); |
| @@ -2632,7 +2711,7 @@ special_insn: | |||
| 2632 | emulate_1op("dec", c->dst, ctxt->eflags); | 2711 | emulate_1op("dec", c->dst, ctxt->eflags); |
| 2633 | break; | 2712 | break; |
| 2634 | case 0x50 ... 0x57: /* push reg */ | 2713 | case 0x50 ... 0x57: /* push reg */ |
| 2635 | emulate_push(ctxt); | 2714 | emulate_push(ctxt, ops); |
| 2636 | break; | 2715 | break; |
| 2637 | case 0x58 ... 0x5f: /* pop reg */ | 2716 | case 0x58 ... 0x5f: /* pop reg */ |
| 2638 | pop_instruction: | 2717 | pop_instruction: |
| @@ -2641,7 +2720,9 @@ special_insn: | |||
| 2641 | goto done; | 2720 | goto done; |
| 2642 | break; | 2721 | break; |
| 2643 | case 0x60: /* pusha */ | 2722 | case 0x60: /* pusha */ |
| 2644 | emulate_pusha(ctxt); | 2723 | rc = emulate_pusha(ctxt, ops); |
| 2724 | if (rc != X86EMUL_CONTINUE) | ||
| 2725 | goto done; | ||
| 2645 | break; | 2726 | break; |
| 2646 | case 0x61: /* popa */ | 2727 | case 0x61: /* popa */ |
| 2647 | rc = emulate_popa(ctxt, ops); | 2728 | rc = emulate_popa(ctxt, ops); |
| @@ -2655,14 +2736,14 @@ special_insn: | |||
| 2655 | break; | 2736 | break; |
| 2656 | case 0x68: /* push imm */ | 2737 | case 0x68: /* push imm */ |
| 2657 | case 0x6a: /* push imm8 */ | 2738 | case 0x6a: /* push imm8 */ |
| 2658 | emulate_push(ctxt); | 2739 | emulate_push(ctxt, ops); |
| 2659 | break; | 2740 | break; |
| 2660 | case 0x6c: /* insb */ | 2741 | case 0x6c: /* insb */ |
| 2661 | case 0x6d: /* insw/insd */ | 2742 | case 0x6d: /* insw/insd */ |
| 2662 | c->dst.bytes = min(c->dst.bytes, 4u); | 2743 | c->dst.bytes = min(c->dst.bytes, 4u); |
| 2663 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], | 2744 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], |
| 2664 | c->dst.bytes)) { | 2745 | c->dst.bytes)) { |
| 2665 | kvm_inject_gp(ctxt->vcpu, 0); | 2746 | emulate_gp(ctxt, 0); |
| 2666 | goto done; | 2747 | goto done; |
| 2667 | } | 2748 | } |
| 2668 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, | 2749 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, |
| @@ -2674,7 +2755,7 @@ special_insn: | |||
| 2674 | c->src.bytes = min(c->src.bytes, 4u); | 2755 | c->src.bytes = min(c->src.bytes, 4u); |
| 2675 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], | 2756 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], |
| 2676 | c->src.bytes)) { | 2757 | c->src.bytes)) { |
| 2677 | kvm_inject_gp(ctxt->vcpu, 0); | 2758 | emulate_gp(ctxt, 0); |
| 2678 | goto done; | 2759 | goto done; |
| 2679 | } | 2760 | } |
| 2680 | ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX], | 2761 | ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX], |
| @@ -2707,6 +2788,7 @@ special_insn: | |||
| 2707 | } | 2788 | } |
| 2708 | break; | 2789 | break; |
| 2709 | case 0x84 ... 0x85: | 2790 | case 0x84 ... 0x85: |
| 2791 | test: | ||
| 2710 | emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); | 2792 | emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); |
| 2711 | break; | 2793 | break; |
| 2712 | case 0x86 ... 0x87: /* xchg */ | 2794 | case 0x86 ... 0x87: /* xchg */ |
| @@ -2735,18 +2817,13 @@ special_insn: | |||
| 2735 | break; | 2817 | break; |
| 2736 | case 0x88 ... 0x8b: /* mov */ | 2818 | case 0x88 ... 0x8b: /* mov */ |
| 2737 | goto mov; | 2819 | goto mov; |
| 2738 | case 0x8c: { /* mov r/m, sreg */ | 2820 | case 0x8c: /* mov r/m, sreg */ |
| 2739 | struct kvm_segment segreg; | 2821 | if (c->modrm_reg > VCPU_SREG_GS) { |
| 2740 | 2822 | emulate_ud(ctxt); | |
| 2741 | if (c->modrm_reg <= VCPU_SREG_GS) | ||
| 2742 | kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg); | ||
| 2743 | else { | ||
| 2744 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
| 2745 | goto done; | 2823 | goto done; |
| 2746 | } | 2824 | } |
| 2747 | c->dst.val = segreg.selector; | 2825 | c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu); |
| 2748 | break; | 2826 | break; |
| 2749 | } | ||
| 2750 | case 0x8d: /* lea r16/r32, m */ | 2827 | case 0x8d: /* lea r16/r32, m */ |
| 2751 | c->dst.val = c->modrm_ea; | 2828 | c->dst.val = c->modrm_ea; |
| 2752 | break; | 2829 | break; |
| @@ -2757,12 +2834,12 @@ special_insn: | |||
| 2757 | 2834 | ||
| 2758 | if (c->modrm_reg == VCPU_SREG_CS || | 2835 | if (c->modrm_reg == VCPU_SREG_CS || |
| 2759 | c->modrm_reg > VCPU_SREG_GS) { | 2836 | c->modrm_reg > VCPU_SREG_GS) { |
| 2760 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 2837 | emulate_ud(ctxt); |
| 2761 | goto done; | 2838 | goto done; |
| 2762 | } | 2839 | } |
| 2763 | 2840 | ||
| 2764 | if (c->modrm_reg == VCPU_SREG_SS) | 2841 | if (c->modrm_reg == VCPU_SREG_SS) |
| 2765 | toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_MOV_SS); | 2842 | ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS; |
| 2766 | 2843 | ||
| 2767 | rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg); | 2844 | rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg); |
| 2768 | 2845 | ||
| @@ -2775,19 +2852,19 @@ special_insn: | |||
| 2775 | goto done; | 2852 | goto done; |
| 2776 | break; | 2853 | break; |
| 2777 | case 0x90: /* nop / xchg r8,rax */ | 2854 | case 0x90: /* nop / xchg r8,rax */ |
| 2778 | if (!(c->rex_prefix & 1)) { /* nop */ | 2855 | if (c->dst.ptr == (unsigned long *)&c->regs[VCPU_REGS_RAX]) { |
| 2779 | c->dst.type = OP_NONE; | 2856 | c->dst.type = OP_NONE; /* nop */ |
| 2780 | break; | 2857 | break; |
| 2781 | } | 2858 | } |
| 2782 | case 0x91 ... 0x97: /* xchg reg,rax */ | 2859 | case 0x91 ... 0x97: /* xchg reg,rax */ |
| 2783 | c->src.type = c->dst.type = OP_REG; | 2860 | c->src.type = OP_REG; |
| 2784 | c->src.bytes = c->dst.bytes = c->op_bytes; | 2861 | c->src.bytes = c->op_bytes; |
| 2785 | c->src.ptr = (unsigned long *) &c->regs[VCPU_REGS_RAX]; | 2862 | c->src.ptr = (unsigned long *) &c->regs[VCPU_REGS_RAX]; |
| 2786 | c->src.val = *(c->src.ptr); | 2863 | c->src.val = *(c->src.ptr); |
| 2787 | goto xchg; | 2864 | goto xchg; |
| 2788 | case 0x9c: /* pushf */ | 2865 | case 0x9c: /* pushf */ |
| 2789 | c->src.val = (unsigned long) ctxt->eflags; | 2866 | c->src.val = (unsigned long) ctxt->eflags; |
| 2790 | emulate_push(ctxt); | 2867 | emulate_push(ctxt, ops); |
| 2791 | break; | 2868 | break; |
| 2792 | case 0x9d: /* popf */ | 2869 | case 0x9d: /* popf */ |
| 2793 | c->dst.type = OP_REG; | 2870 | c->dst.type = OP_REG; |
| @@ -2797,19 +2874,15 @@ special_insn: | |||
| 2797 | if (rc != X86EMUL_CONTINUE) | 2874 | if (rc != X86EMUL_CONTINUE) |
| 2798 | goto done; | 2875 | goto done; |
| 2799 | break; | 2876 | break; |
| 2800 | case 0xa0 ... 0xa1: /* mov */ | 2877 | case 0xa0 ... 0xa3: /* mov */ |
| 2801 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; | ||
| 2802 | c->dst.val = c->src.val; | ||
| 2803 | break; | ||
| 2804 | case 0xa2 ... 0xa3: /* mov */ | ||
| 2805 | c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX]; | ||
| 2806 | break; | ||
| 2807 | case 0xa4 ... 0xa5: /* movs */ | 2878 | case 0xa4 ... 0xa5: /* movs */ |
| 2808 | goto mov; | 2879 | goto mov; |
| 2809 | case 0xa6 ... 0xa7: /* cmps */ | 2880 | case 0xa6 ... 0xa7: /* cmps */ |
| 2810 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2881 | c->dst.type = OP_NONE; /* Disable writeback. */ |
| 2811 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); | 2882 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); |
| 2812 | goto cmp; | 2883 | goto cmp; |
| 2884 | case 0xa8 ... 0xa9: /* test ax, imm */ | ||
| 2885 | goto test; | ||
| 2813 | case 0xaa ... 0xab: /* stos */ | 2886 | case 0xaa ... 0xab: /* stos */ |
| 2814 | c->dst.val = c->regs[VCPU_REGS_RAX]; | 2887 | c->dst.val = c->regs[VCPU_REGS_RAX]; |
| 2815 | break; | 2888 | break; |
| @@ -2855,19 +2928,23 @@ special_insn: | |||
| 2855 | long int rel = c->src.val; | 2928 | long int rel = c->src.val; |
| 2856 | c->src.val = (unsigned long) c->eip; | 2929 | c->src.val = (unsigned long) c->eip; |
| 2857 | jmp_rel(c, rel); | 2930 | jmp_rel(c, rel); |
| 2858 | emulate_push(ctxt); | 2931 | emulate_push(ctxt, ops); |
| 2859 | break; | 2932 | break; |
| 2860 | } | 2933 | } |
| 2861 | case 0xe9: /* jmp rel */ | 2934 | case 0xe9: /* jmp rel */ |
| 2862 | goto jmp; | 2935 | goto jmp; |
| 2863 | case 0xea: /* jmp far */ | 2936 | case 0xea: { /* jmp far */ |
| 2937 | unsigned short sel; | ||
| 2864 | jump_far: | 2938 | jump_far: |
| 2865 | if (load_segment_descriptor(ctxt, ops, c->src2.val, | 2939 | memcpy(&sel, c->src.valptr + c->op_bytes, 2); |
| 2866 | VCPU_SREG_CS)) | 2940 | |
| 2941 | if (load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS)) | ||
| 2867 | goto done; | 2942 | goto done; |
| 2868 | 2943 | ||
| 2869 | c->eip = c->src.val; | 2944 | c->eip = 0; |
| 2945 | memcpy(&c->eip, c->src.valptr, c->op_bytes); | ||
| 2870 | break; | 2946 | break; |
| 2947 | } | ||
| 2871 | case 0xeb: | 2948 | case 0xeb: |
| 2872 | jmp: /* jmp rel short */ | 2949 | jmp: /* jmp rel short */ |
| 2873 | jmp_rel(c, c->src.val); | 2950 | jmp_rel(c, c->src.val); |
| @@ -2879,20 +2956,20 @@ special_insn: | |||
| 2879 | do_io_in: | 2956 | do_io_in: |
| 2880 | c->dst.bytes = min(c->dst.bytes, 4u); | 2957 | c->dst.bytes = min(c->dst.bytes, 4u); |
| 2881 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { | 2958 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { |
| 2882 | kvm_inject_gp(ctxt->vcpu, 0); | 2959 | emulate_gp(ctxt, 0); |
| 2883 | goto done; | 2960 | goto done; |
| 2884 | } | 2961 | } |
| 2885 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, | 2962 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, |
| 2886 | &c->dst.val)) | 2963 | &c->dst.val)) |
| 2887 | goto done; /* IO is needed */ | 2964 | goto done; /* IO is needed */ |
| 2888 | break; | 2965 | break; |
| 2889 | case 0xee: /* out al,dx */ | 2966 | case 0xee: /* out dx,al */ |
| 2890 | case 0xef: /* out (e/r)ax,dx */ | 2967 | case 0xef: /* out dx,(e/r)ax */ |
| 2891 | c->src.val = c->regs[VCPU_REGS_RDX]; | 2968 | c->src.val = c->regs[VCPU_REGS_RDX]; |
| 2892 | do_io_out: | 2969 | do_io_out: |
| 2893 | c->dst.bytes = min(c->dst.bytes, 4u); | 2970 | c->dst.bytes = min(c->dst.bytes, 4u); |
| 2894 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { | 2971 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { |
| 2895 | kvm_inject_gp(ctxt->vcpu, 0); | 2972 | emulate_gp(ctxt, 0); |
| 2896 | goto done; | 2973 | goto done; |
| 2897 | } | 2974 | } |
| 2898 | ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1, | 2975 | ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1, |
| @@ -2916,18 +2993,20 @@ special_insn: | |||
| 2916 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2993 | c->dst.type = OP_NONE; /* Disable writeback. */ |
| 2917 | break; | 2994 | break; |
| 2918 | case 0xfa: /* cli */ | 2995 | case 0xfa: /* cli */ |
| 2919 | if (emulator_bad_iopl(ctxt, ops)) | 2996 | if (emulator_bad_iopl(ctxt, ops)) { |
| 2920 | kvm_inject_gp(ctxt->vcpu, 0); | 2997 | emulate_gp(ctxt, 0); |
| 2921 | else { | 2998 | goto done; |
| 2999 | } else { | ||
| 2922 | ctxt->eflags &= ~X86_EFLAGS_IF; | 3000 | ctxt->eflags &= ~X86_EFLAGS_IF; |
| 2923 | c->dst.type = OP_NONE; /* Disable writeback. */ | 3001 | c->dst.type = OP_NONE; /* Disable writeback. */ |
| 2924 | } | 3002 | } |
| 2925 | break; | 3003 | break; |
| 2926 | case 0xfb: /* sti */ | 3004 | case 0xfb: /* sti */ |
| 2927 | if (emulator_bad_iopl(ctxt, ops)) | 3005 | if (emulator_bad_iopl(ctxt, ops)) { |
| 2928 | kvm_inject_gp(ctxt->vcpu, 0); | 3006 | emulate_gp(ctxt, 0); |
| 2929 | else { | 3007 | goto done; |
| 2930 | toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_STI); | 3008 | } else { |
| 3009 | ctxt->interruptibility = KVM_X86_SHADOW_INT_STI; | ||
| 2931 | ctxt->eflags |= X86_EFLAGS_IF; | 3010 | ctxt->eflags |= X86_EFLAGS_IF; |
| 2932 | c->dst.type = OP_NONE; /* Disable writeback. */ | 3011 | c->dst.type = OP_NONE; /* Disable writeback. */ |
| 2933 | } | 3012 | } |
| @@ -2964,11 +3043,12 @@ writeback: | |||
| 2964 | c->dst.type = saved_dst_type; | 3043 | c->dst.type = saved_dst_type; |
| 2965 | 3044 | ||
| 2966 | if ((c->d & SrcMask) == SrcSI) | 3045 | if ((c->d & SrcMask) == SrcSI) |
| 2967 | string_addr_inc(ctxt, seg_override_base(ctxt, c), VCPU_REGS_RSI, | 3046 | string_addr_inc(ctxt, seg_override_base(ctxt, ops, c), |
| 2968 | &c->src); | 3047 | VCPU_REGS_RSI, &c->src); |
| 2969 | 3048 | ||
| 2970 | if ((c->d & DstMask) == DstDI) | 3049 | if ((c->d & DstMask) == DstDI) |
| 2971 | string_addr_inc(ctxt, es_base(ctxt), VCPU_REGS_RDI, &c->dst); | 3050 | string_addr_inc(ctxt, es_base(ctxt, ops), VCPU_REGS_RDI, |
| 3051 | &c->dst); | ||
| 2972 | 3052 | ||
| 2973 | if (c->rep_prefix && (c->d & String)) { | 3053 | if (c->rep_prefix && (c->d & String)) { |
| 2974 | struct read_cache *rc = &ctxt->decode.io_read; | 3054 | struct read_cache *rc = &ctxt->decode.io_read; |
| @@ -2981,11 +3061,12 @@ writeback: | |||
| 2981 | (rc->end != 0 && rc->end == rc->pos)) | 3061 | (rc->end != 0 && rc->end == rc->pos)) |
| 2982 | ctxt->restart = false; | 3062 | ctxt->restart = false; |
| 2983 | } | 3063 | } |
| 2984 | 3064 | /* | |
| 2985 | /* Commit shadow register state. */ | 3065 | * reset read cache here in case string instruction is restared |
| 2986 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); | 3066 | * without decoding |
| 2987 | kvm_rip_write(ctxt->vcpu, c->eip); | 3067 | */ |
| 2988 | ops->set_rflags(ctxt->vcpu, ctxt->eflags); | 3068 | ctxt->decode.mem_read.end = 0; |
| 3069 | ctxt->eip = c->eip; | ||
| 2989 | 3070 | ||
| 2990 | done: | 3071 | done: |
| 2991 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | 3072 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; |
| @@ -3051,7 +3132,7 @@ twobyte_insn: | |||
| 3051 | c->dst.type = OP_NONE; | 3132 | c->dst.type = OP_NONE; |
| 3052 | break; | 3133 | break; |
| 3053 | case 5: /* not defined */ | 3134 | case 5: /* not defined */ |
| 3054 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 3135 | emulate_ud(ctxt); |
| 3055 | goto done; | 3136 | goto done; |
| 3056 | case 7: /* invlpg*/ | 3137 | case 7: /* invlpg*/ |
| 3057 | emulate_invlpg(ctxt->vcpu, c->modrm_ea); | 3138 | emulate_invlpg(ctxt->vcpu, c->modrm_ea); |
| @@ -3063,7 +3144,7 @@ twobyte_insn: | |||
| 3063 | } | 3144 | } |
| 3064 | break; | 3145 | break; |
| 3065 | case 0x05: /* syscall */ | 3146 | case 0x05: /* syscall */ |
| 3066 | rc = emulate_syscall(ctxt); | 3147 | rc = emulate_syscall(ctxt, ops); |
| 3067 | if (rc != X86EMUL_CONTINUE) | 3148 | if (rc != X86EMUL_CONTINUE) |
| 3068 | goto done; | 3149 | goto done; |
| 3069 | else | 3150 | else |
| @@ -3073,8 +3154,11 @@ twobyte_insn: | |||
| 3073 | emulate_clts(ctxt->vcpu); | 3154 | emulate_clts(ctxt->vcpu); |
| 3074 | c->dst.type = OP_NONE; | 3155 | c->dst.type = OP_NONE; |
| 3075 | break; | 3156 | break; |
| 3076 | case 0x08: /* invd */ | ||
| 3077 | case 0x09: /* wbinvd */ | 3157 | case 0x09: /* wbinvd */ |
| 3158 | kvm_emulate_wbinvd(ctxt->vcpu); | ||
| 3159 | c->dst.type = OP_NONE; | ||
| 3160 | break; | ||
| 3161 | case 0x08: /* invd */ | ||
| 3078 | case 0x0d: /* GrpP (prefetch) */ | 3162 | case 0x0d: /* GrpP (prefetch) */ |
| 3079 | case 0x18: /* Grp16 (prefetch/nop) */ | 3163 | case 0x18: /* Grp16 (prefetch/nop) */ |
| 3080 | c->dst.type = OP_NONE; | 3164 | c->dst.type = OP_NONE; |
| @@ -3084,7 +3168,7 @@ twobyte_insn: | |||
| 3084 | case 1: | 3168 | case 1: |
| 3085 | case 5 ... 7: | 3169 | case 5 ... 7: |
| 3086 | case 9 ... 15: | 3170 | case 9 ... 15: |
| 3087 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 3171 | emulate_ud(ctxt); |
| 3088 | goto done; | 3172 | goto done; |
| 3089 | } | 3173 | } |
| 3090 | c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu); | 3174 | c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu); |
| @@ -3093,31 +3177,42 @@ twobyte_insn: | |||
| 3093 | case 0x21: /* mov from dr to reg */ | 3177 | case 0x21: /* mov from dr to reg */ |
| 3094 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && | 3178 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && |
| 3095 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { | 3179 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { |
| 3096 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 3180 | emulate_ud(ctxt); |
| 3097 | goto done; | 3181 | goto done; |
| 3098 | } | 3182 | } |
| 3099 | emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]); | 3183 | ops->get_dr(c->modrm_reg, &c->regs[c->modrm_rm], ctxt->vcpu); |
| 3100 | c->dst.type = OP_NONE; /* no writeback */ | 3184 | c->dst.type = OP_NONE; /* no writeback */ |
| 3101 | break; | 3185 | break; |
| 3102 | case 0x22: /* mov reg, cr */ | 3186 | case 0x22: /* mov reg, cr */ |
| 3103 | ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu); | 3187 | if (ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu)) { |
| 3188 | emulate_gp(ctxt, 0); | ||
| 3189 | goto done; | ||
| 3190 | } | ||
| 3104 | c->dst.type = OP_NONE; | 3191 | c->dst.type = OP_NONE; |
| 3105 | break; | 3192 | break; |
| 3106 | case 0x23: /* mov from reg to dr */ | 3193 | case 0x23: /* mov from reg to dr */ |
| 3107 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && | 3194 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && |
| 3108 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { | 3195 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { |
| 3109 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 3196 | emulate_ud(ctxt); |
| 3197 | goto done; | ||
| 3198 | } | ||
| 3199 | |||
| 3200 | if (ops->set_dr(c->modrm_reg, c->regs[c->modrm_rm] & | ||
| 3201 | ((ctxt->mode == X86EMUL_MODE_PROT64) ? | ||
| 3202 | ~0ULL : ~0U), ctxt->vcpu) < 0) { | ||
| 3203 | /* #UD condition is already handled by the code above */ | ||
| 3204 | emulate_gp(ctxt, 0); | ||
| 3110 | goto done; | 3205 | goto done; |
| 3111 | } | 3206 | } |
| 3112 | emulator_set_dr(ctxt, c->modrm_reg, c->regs[c->modrm_rm]); | 3207 | |
| 3113 | c->dst.type = OP_NONE; /* no writeback */ | 3208 | c->dst.type = OP_NONE; /* no writeback */ |
| 3114 | break; | 3209 | break; |
| 3115 | case 0x30: | 3210 | case 0x30: |
| 3116 | /* wrmsr */ | 3211 | /* wrmsr */ |
| 3117 | msr_data = (u32)c->regs[VCPU_REGS_RAX] | 3212 | msr_data = (u32)c->regs[VCPU_REGS_RAX] |
| 3118 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); | 3213 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); |
| 3119 | if (kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { | 3214 | if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { |
| 3120 | kvm_inject_gp(ctxt->vcpu, 0); | 3215 | emulate_gp(ctxt, 0); |
| 3121 | goto done; | 3216 | goto done; |
| 3122 | } | 3217 | } |
| 3123 | rc = X86EMUL_CONTINUE; | 3218 | rc = X86EMUL_CONTINUE; |
| @@ -3125,8 +3220,8 @@ twobyte_insn: | |||
| 3125 | break; | 3220 | break; |
| 3126 | case 0x32: | 3221 | case 0x32: |
| 3127 | /* rdmsr */ | 3222 | /* rdmsr */ |
| 3128 | if (kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { | 3223 | if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { |
| 3129 | kvm_inject_gp(ctxt->vcpu, 0); | 3224 | emulate_gp(ctxt, 0); |
| 3130 | goto done; | 3225 | goto done; |
| 3131 | } else { | 3226 | } else { |
| 3132 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; | 3227 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; |
| @@ -3136,14 +3231,14 @@ twobyte_insn: | |||
| 3136 | c->dst.type = OP_NONE; | 3231 | c->dst.type = OP_NONE; |
| 3137 | break; | 3232 | break; |
| 3138 | case 0x34: /* sysenter */ | 3233 | case 0x34: /* sysenter */ |
| 3139 | rc = emulate_sysenter(ctxt); | 3234 | rc = emulate_sysenter(ctxt, ops); |
| 3140 | if (rc != X86EMUL_CONTINUE) | 3235 | if (rc != X86EMUL_CONTINUE) |
| 3141 | goto done; | 3236 | goto done; |
| 3142 | else | 3237 | else |
| 3143 | goto writeback; | 3238 | goto writeback; |
| 3144 | break; | 3239 | break; |
| 3145 | case 0x35: /* sysexit */ | 3240 | case 0x35: /* sysexit */ |
| 3146 | rc = emulate_sysexit(ctxt); | 3241 | rc = emulate_sysexit(ctxt, ops); |
| 3147 | if (rc != X86EMUL_CONTINUE) | 3242 | if (rc != X86EMUL_CONTINUE) |
| 3148 | goto done; | 3243 | goto done; |
| 3149 | else | 3244 | else |
| @@ -3160,7 +3255,7 @@ twobyte_insn: | |||
| 3160 | c->dst.type = OP_NONE; | 3255 | c->dst.type = OP_NONE; |
| 3161 | break; | 3256 | break; |
| 3162 | case 0xa0: /* push fs */ | 3257 | case 0xa0: /* push fs */ |
| 3163 | emulate_push_sreg(ctxt, VCPU_SREG_FS); | 3258 | emulate_push_sreg(ctxt, ops, VCPU_SREG_FS); |
| 3164 | break; | 3259 | break; |
| 3165 | case 0xa1: /* pop fs */ | 3260 | case 0xa1: /* pop fs */ |
| 3166 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); | 3261 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); |
| @@ -3179,7 +3274,7 @@ twobyte_insn: | |||
| 3179 | emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); | 3274 | emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); |
| 3180 | break; | 3275 | break; |
| 3181 | case 0xa8: /* push gs */ | 3276 | case 0xa8: /* push gs */ |
| 3182 | emulate_push_sreg(ctxt, VCPU_SREG_GS); | 3277 | emulate_push_sreg(ctxt, ops, VCPU_SREG_GS); |
| 3183 | break; | 3278 | break; |
| 3184 | case 0xa9: /* pop gs */ | 3279 | case 0xa9: /* pop gs */ |
| 3185 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); | 3280 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 0150affad25d..0fd6378981f4 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | * Copyright (c) 2006 Intel Corporation | 5 | * Copyright (c) 2006 Intel Corporation |
| 6 | * Copyright (c) 2007 Keir Fraser, XenSource Inc | 6 | * Copyright (c) 2007 Keir Fraser, XenSource Inc |
| 7 | * Copyright (c) 2008 Intel Corporation | 7 | * Copyright (c) 2008 Intel Corporation |
| 8 | * Copyright 2009 Red Hat, Inc. and/or its affilates. | ||
| 8 | * | 9 | * |
| 9 | * Permission is hereby granted, free of charge, to any person obtaining a copy | 10 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 10 | * of this software and associated documentation files (the "Software"), to deal | 11 | * of this software and associated documentation files (the "Software"), to deal |
| @@ -33,6 +34,7 @@ | |||
| 33 | 34 | ||
| 34 | #include <linux/kvm_host.h> | 35 | #include <linux/kvm_host.h> |
| 35 | #include <linux/slab.h> | 36 | #include <linux/slab.h> |
| 37 | #include <linux/workqueue.h> | ||
| 36 | 38 | ||
| 37 | #include "irq.h" | 39 | #include "irq.h" |
| 38 | #include "i8254.h" | 40 | #include "i8254.h" |
| @@ -243,11 +245,22 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) | |||
| 243 | { | 245 | { |
| 244 | struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, | 246 | struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, |
| 245 | irq_ack_notifier); | 247 | irq_ack_notifier); |
| 246 | raw_spin_lock(&ps->inject_lock); | 248 | int value; |
| 247 | if (atomic_dec_return(&ps->pit_timer.pending) < 0) | 249 | |
| 250 | spin_lock(&ps->inject_lock); | ||
| 251 | value = atomic_dec_return(&ps->pit_timer.pending); | ||
| 252 | if (value < 0) | ||
| 253 | /* spurious acks can be generated if, for example, the | ||
| 254 | * PIC is being reset. Handle it gracefully here | ||
| 255 | */ | ||
| 248 | atomic_inc(&ps->pit_timer.pending); | 256 | atomic_inc(&ps->pit_timer.pending); |
| 257 | else if (value > 0) | ||
| 258 | /* in this case, we had multiple outstanding pit interrupts | ||
| 259 | * that we needed to inject. Reinject | ||
| 260 | */ | ||
| 261 | queue_work(ps->pit->wq, &ps->pit->expired); | ||
| 249 | ps->irq_ack = 1; | 262 | ps->irq_ack = 1; |
| 250 | raw_spin_unlock(&ps->inject_lock); | 263 | spin_unlock(&ps->inject_lock); |
| 251 | } | 264 | } |
| 252 | 265 | ||
| 253 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) | 266 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) |
| @@ -263,10 +276,10 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) | |||
| 263 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); | 276 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); |
| 264 | } | 277 | } |
| 265 | 278 | ||
| 266 | static void destroy_pit_timer(struct kvm_timer *pt) | 279 | static void destroy_pit_timer(struct kvm_pit *pit) |
| 267 | { | 280 | { |
| 268 | pr_debug("execute del timer!\n"); | 281 | hrtimer_cancel(&pit->pit_state.pit_timer.timer); |
| 269 | hrtimer_cancel(&pt->timer); | 282 | cancel_work_sync(&pit->expired); |
| 270 | } | 283 | } |
| 271 | 284 | ||
| 272 | static bool kpit_is_periodic(struct kvm_timer *ktimer) | 285 | static bool kpit_is_periodic(struct kvm_timer *ktimer) |
| @@ -280,6 +293,60 @@ static struct kvm_timer_ops kpit_ops = { | |||
| 280 | .is_periodic = kpit_is_periodic, | 293 | .is_periodic = kpit_is_periodic, |
| 281 | }; | 294 | }; |
| 282 | 295 | ||
| 296 | static void pit_do_work(struct work_struct *work) | ||
| 297 | { | ||
| 298 | struct kvm_pit *pit = container_of(work, struct kvm_pit, expired); | ||
| 299 | struct kvm *kvm = pit->kvm; | ||
| 300 | struct kvm_vcpu *vcpu; | ||
| 301 | int i; | ||
| 302 | struct kvm_kpit_state *ps = &pit->pit_state; | ||
| 303 | int inject = 0; | ||
| 304 | |||
| 305 | /* Try to inject pending interrupts when | ||
| 306 | * last one has been acked. | ||
| 307 | */ | ||
| 308 | spin_lock(&ps->inject_lock); | ||
| 309 | if (ps->irq_ack) { | ||
| 310 | ps->irq_ack = 0; | ||
| 311 | inject = 1; | ||
| 312 | } | ||
| 313 | spin_unlock(&ps->inject_lock); | ||
| 314 | if (inject) { | ||
| 315 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); | ||
| 316 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); | ||
| 317 | |||
| 318 | /* | ||
| 319 | * Provides NMI watchdog support via Virtual Wire mode. | ||
| 320 | * The route is: PIT -> PIC -> LVT0 in NMI mode. | ||
| 321 | * | ||
| 322 | * Note: Our Virtual Wire implementation is simplified, only | ||
| 323 | * propagating PIT interrupts to all VCPUs when they have set | ||
| 324 | * LVT0 to NMI delivery. Other PIC interrupts are just sent to | ||
| 325 | * VCPU0, and only if its LVT0 is in EXTINT mode. | ||
| 326 | */ | ||
| 327 | if (kvm->arch.vapics_in_nmi_mode > 0) | ||
| 328 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
| 329 | kvm_apic_nmi_wd_deliver(vcpu); | ||
| 330 | } | ||
| 331 | } | ||
| 332 | |||
| 333 | static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) | ||
| 334 | { | ||
| 335 | struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); | ||
| 336 | struct kvm_pit *pt = ktimer->kvm->arch.vpit; | ||
| 337 | |||
| 338 | if (ktimer->reinject || !atomic_read(&ktimer->pending)) { | ||
| 339 | atomic_inc(&ktimer->pending); | ||
| 340 | queue_work(pt->wq, &pt->expired); | ||
| 341 | } | ||
| 342 | |||
| 343 | if (ktimer->t_ops->is_periodic(ktimer)) { | ||
| 344 | hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); | ||
| 345 | return HRTIMER_RESTART; | ||
| 346 | } else | ||
| 347 | return HRTIMER_NORESTART; | ||
| 348 | } | ||
| 349 | |||
| 283 | static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) | 350 | static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) |
| 284 | { | 351 | { |
| 285 | struct kvm_timer *pt = &ps->pit_timer; | 352 | struct kvm_timer *pt = &ps->pit_timer; |
| @@ -291,13 +358,13 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) | |||
| 291 | 358 | ||
| 292 | /* TODO The new value only affected after the retriggered */ | 359 | /* TODO The new value only affected after the retriggered */ |
| 293 | hrtimer_cancel(&pt->timer); | 360 | hrtimer_cancel(&pt->timer); |
| 361 | cancel_work_sync(&ps->pit->expired); | ||
| 294 | pt->period = interval; | 362 | pt->period = interval; |
| 295 | ps->is_periodic = is_period; | 363 | ps->is_periodic = is_period; |
| 296 | 364 | ||
| 297 | pt->timer.function = kvm_timer_fn; | 365 | pt->timer.function = pit_timer_fn; |
| 298 | pt->t_ops = &kpit_ops; | 366 | pt->t_ops = &kpit_ops; |
| 299 | pt->kvm = ps->pit->kvm; | 367 | pt->kvm = ps->pit->kvm; |
| 300 | pt->vcpu = pt->kvm->bsp_vcpu; | ||
| 301 | 368 | ||
| 302 | atomic_set(&pt->pending, 0); | 369 | atomic_set(&pt->pending, 0); |
| 303 | ps->irq_ack = 1; | 370 | ps->irq_ack = 1; |
| @@ -346,7 +413,7 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) | |||
| 346 | } | 413 | } |
| 347 | break; | 414 | break; |
| 348 | default: | 415 | default: |
| 349 | destroy_pit_timer(&ps->pit_timer); | 416 | destroy_pit_timer(kvm->arch.vpit); |
| 350 | } | 417 | } |
| 351 | } | 418 | } |
| 352 | 419 | ||
| @@ -625,7 +692,15 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | |||
| 625 | 692 | ||
| 626 | mutex_init(&pit->pit_state.lock); | 693 | mutex_init(&pit->pit_state.lock); |
| 627 | mutex_lock(&pit->pit_state.lock); | 694 | mutex_lock(&pit->pit_state.lock); |
| 628 | raw_spin_lock_init(&pit->pit_state.inject_lock); | 695 | spin_lock_init(&pit->pit_state.inject_lock); |
| 696 | |||
| 697 | pit->wq = create_singlethread_workqueue("kvm-pit-wq"); | ||
| 698 | if (!pit->wq) { | ||
| 699 | mutex_unlock(&pit->pit_state.lock); | ||
| 700 | kfree(pit); | ||
| 701 | return NULL; | ||
| 702 | } | ||
| 703 | INIT_WORK(&pit->expired, pit_do_work); | ||
| 629 | 704 | ||
| 630 | kvm->arch.vpit = pit; | 705 | kvm->arch.vpit = pit; |
| 631 | pit->kvm = kvm; | 706 | pit->kvm = kvm; |
| @@ -677,6 +752,9 @@ void kvm_free_pit(struct kvm *kvm) | |||
| 677 | struct hrtimer *timer; | 752 | struct hrtimer *timer; |
| 678 | 753 | ||
| 679 | if (kvm->arch.vpit) { | 754 | if (kvm->arch.vpit) { |
| 755 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &kvm->arch.vpit->dev); | ||
| 756 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, | ||
| 757 | &kvm->arch.vpit->speaker_dev); | ||
| 680 | kvm_unregister_irq_mask_notifier(kvm, 0, | 758 | kvm_unregister_irq_mask_notifier(kvm, 0, |
| 681 | &kvm->arch.vpit->mask_notifier); | 759 | &kvm->arch.vpit->mask_notifier); |
| 682 | kvm_unregister_irq_ack_notifier(kvm, | 760 | kvm_unregister_irq_ack_notifier(kvm, |
| @@ -684,54 +762,10 @@ void kvm_free_pit(struct kvm *kvm) | |||
| 684 | mutex_lock(&kvm->arch.vpit->pit_state.lock); | 762 | mutex_lock(&kvm->arch.vpit->pit_state.lock); |
| 685 | timer = &kvm->arch.vpit->pit_state.pit_timer.timer; | 763 | timer = &kvm->arch.vpit->pit_state.pit_timer.timer; |
| 686 | hrtimer_cancel(timer); | 764 | hrtimer_cancel(timer); |
| 765 | cancel_work_sync(&kvm->arch.vpit->expired); | ||
| 687 | kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id); | 766 | kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id); |
| 688 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); | 767 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); |
| 768 | destroy_workqueue(kvm->arch.vpit->wq); | ||
| 689 | kfree(kvm->arch.vpit); | 769 | kfree(kvm->arch.vpit); |
| 690 | } | 770 | } |
| 691 | } | 771 | } |
| 692 | |||
| 693 | static void __inject_pit_timer_intr(struct kvm *kvm) | ||
| 694 | { | ||
| 695 | struct kvm_vcpu *vcpu; | ||
| 696 | int i; | ||
| 697 | |||
| 698 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); | ||
| 699 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); | ||
| 700 | |||
| 701 | /* | ||
| 702 | * Provides NMI watchdog support via Virtual Wire mode. | ||
| 703 | * The route is: PIT -> PIC -> LVT0 in NMI mode. | ||
| 704 | * | ||
| 705 | * Note: Our Virtual Wire implementation is simplified, only | ||
| 706 | * propagating PIT interrupts to all VCPUs when they have set | ||
| 707 | * LVT0 to NMI delivery. Other PIC interrupts are just sent to | ||
| 708 | * VCPU0, and only if its LVT0 is in EXTINT mode. | ||
| 709 | */ | ||
| 710 | if (kvm->arch.vapics_in_nmi_mode > 0) | ||
| 711 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
| 712 | kvm_apic_nmi_wd_deliver(vcpu); | ||
| 713 | } | ||
| 714 | |||
| 715 | void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) | ||
| 716 | { | ||
| 717 | struct kvm_pit *pit = vcpu->kvm->arch.vpit; | ||
| 718 | struct kvm *kvm = vcpu->kvm; | ||
| 719 | struct kvm_kpit_state *ps; | ||
| 720 | |||
| 721 | if (pit) { | ||
| 722 | int inject = 0; | ||
| 723 | ps = &pit->pit_state; | ||
| 724 | |||
| 725 | /* Try to inject pending interrupts when | ||
| 726 | * last one has been acked. | ||
| 727 | */ | ||
| 728 | raw_spin_lock(&ps->inject_lock); | ||
| 729 | if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) { | ||
| 730 | ps->irq_ack = 0; | ||
| 731 | inject = 1; | ||
| 732 | } | ||
| 733 | raw_spin_unlock(&ps->inject_lock); | ||
| 734 | if (inject) | ||
| 735 | __inject_pit_timer_intr(kvm); | ||
| 736 | } | ||
| 737 | } | ||
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index 900d6b0ba7c2..46d08ca0b48f 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h | |||
| @@ -27,7 +27,7 @@ struct kvm_kpit_state { | |||
| 27 | u32 speaker_data_on; | 27 | u32 speaker_data_on; |
| 28 | struct mutex lock; | 28 | struct mutex lock; |
| 29 | struct kvm_pit *pit; | 29 | struct kvm_pit *pit; |
| 30 | raw_spinlock_t inject_lock; | 30 | spinlock_t inject_lock; |
| 31 | unsigned long irq_ack; | 31 | unsigned long irq_ack; |
| 32 | struct kvm_irq_ack_notifier irq_ack_notifier; | 32 | struct kvm_irq_ack_notifier irq_ack_notifier; |
| 33 | }; | 33 | }; |
| @@ -40,6 +40,8 @@ struct kvm_pit { | |||
| 40 | struct kvm_kpit_state pit_state; | 40 | struct kvm_kpit_state pit_state; |
| 41 | int irq_source_id; | 41 | int irq_source_id; |
| 42 | struct kvm_irq_mask_notifier mask_notifier; | 42 | struct kvm_irq_mask_notifier mask_notifier; |
| 43 | struct workqueue_struct *wq; | ||
| 44 | struct work_struct expired; | ||
| 43 | }; | 45 | }; |
| 44 | 46 | ||
| 45 | #define KVM_PIT_BASE_ADDRESS 0x40 | 47 | #define KVM_PIT_BASE_ADDRESS 0x40 |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 93825ff3338f..8d10c063d7f2 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | * | 3 | * |
| 4 | * Copyright (c) 2003-2004 Fabrice Bellard | 4 | * Copyright (c) 2003-2004 Fabrice Bellard |
| 5 | * Copyright (c) 2007 Intel Corporation | 5 | * Copyright (c) 2007 Intel Corporation |
| 6 | * Copyright 2009 Red Hat, Inc. and/or its affilates. | ||
| 6 | * | 7 | * |
| 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy | 8 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 8 | * of this software and associated documentation files (the "Software"), to deal | 9 | * of this software and associated documentation files (the "Software"), to deal |
| @@ -33,6 +34,8 @@ | |||
| 33 | #include <linux/kvm_host.h> | 34 | #include <linux/kvm_host.h> |
| 34 | #include "trace.h" | 35 | #include "trace.h" |
| 35 | 36 | ||
| 37 | static void pic_irq_request(struct kvm *kvm, int level); | ||
| 38 | |||
| 36 | static void pic_lock(struct kvm_pic *s) | 39 | static void pic_lock(struct kvm_pic *s) |
| 37 | __acquires(&s->lock) | 40 | __acquires(&s->lock) |
| 38 | { | 41 | { |
| @@ -43,16 +46,25 @@ static void pic_unlock(struct kvm_pic *s) | |||
| 43 | __releases(&s->lock) | 46 | __releases(&s->lock) |
| 44 | { | 47 | { |
| 45 | bool wakeup = s->wakeup_needed; | 48 | bool wakeup = s->wakeup_needed; |
| 46 | struct kvm_vcpu *vcpu; | 49 | struct kvm_vcpu *vcpu, *found = NULL; |
| 50 | int i; | ||
| 47 | 51 | ||
| 48 | s->wakeup_needed = false; | 52 | s->wakeup_needed = false; |
| 49 | 53 | ||
| 50 | raw_spin_unlock(&s->lock); | 54 | raw_spin_unlock(&s->lock); |
| 51 | 55 | ||
| 52 | if (wakeup) { | 56 | if (wakeup) { |
| 53 | vcpu = s->kvm->bsp_vcpu; | 57 | kvm_for_each_vcpu(i, vcpu, s->kvm) { |
| 54 | if (vcpu) | 58 | if (kvm_apic_accept_pic_intr(vcpu)) { |
| 55 | kvm_vcpu_kick(vcpu); | 59 | found = vcpu; |
| 60 | break; | ||
| 61 | } | ||
| 62 | } | ||
| 63 | |||
| 64 | if (!found) | ||
| 65 | found = s->kvm->bsp_vcpu; | ||
| 66 | |||
| 67 | kvm_vcpu_kick(found); | ||
| 56 | } | 68 | } |
| 57 | } | 69 | } |
| 58 | 70 | ||
| @@ -173,10 +185,7 @@ static void pic_update_irq(struct kvm_pic *s) | |||
| 173 | pic_set_irq1(&s->pics[0], 2, 0); | 185 | pic_set_irq1(&s->pics[0], 2, 0); |
| 174 | } | 186 | } |
| 175 | irq = pic_get_irq(&s->pics[0]); | 187 | irq = pic_get_irq(&s->pics[0]); |
| 176 | if (irq >= 0) | 188 | pic_irq_request(s->kvm, irq >= 0); |
| 177 | s->irq_request(s->irq_request_opaque, 1); | ||
| 178 | else | ||
| 179 | s->irq_request(s->irq_request_opaque, 0); | ||
| 180 | } | 189 | } |
| 181 | 190 | ||
| 182 | void kvm_pic_update_irq(struct kvm_pic *s) | 191 | void kvm_pic_update_irq(struct kvm_pic *s) |
| @@ -261,8 +270,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
| 261 | void kvm_pic_reset(struct kvm_kpic_state *s) | 270 | void kvm_pic_reset(struct kvm_kpic_state *s) |
| 262 | { | 271 | { |
| 263 | int irq; | 272 | int irq; |
| 264 | struct kvm *kvm = s->pics_state->irq_request_opaque; | 273 | struct kvm_vcpu *vcpu0 = s->pics_state->kvm->bsp_vcpu; |
| 265 | struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu; | ||
| 266 | u8 irr = s->irr, isr = s->imr; | 274 | u8 irr = s->irr, isr = s->imr; |
| 267 | 275 | ||
| 268 | s->last_irr = 0; | 276 | s->last_irr = 0; |
| @@ -301,8 +309,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) | |||
| 301 | /* | 309 | /* |
| 302 | * deassert a pending interrupt | 310 | * deassert a pending interrupt |
| 303 | */ | 311 | */ |
| 304 | s->pics_state->irq_request(s->pics_state-> | 312 | pic_irq_request(s->pics_state->kvm, 0); |
| 305 | irq_request_opaque, 0); | ||
| 306 | s->init_state = 1; | 313 | s->init_state = 1; |
| 307 | s->init4 = val & 1; | 314 | s->init4 = val & 1; |
| 308 | if (val & 0x02) | 315 | if (val & 0x02) |
| @@ -356,10 +363,20 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) | |||
| 356 | } | 363 | } |
| 357 | } else | 364 | } else |
| 358 | switch (s->init_state) { | 365 | switch (s->init_state) { |
| 359 | case 0: /* normal mode */ | 366 | case 0: { /* normal mode */ |
| 367 | u8 imr_diff = s->imr ^ val, | ||
| 368 | off = (s == &s->pics_state->pics[0]) ? 0 : 8; | ||
| 360 | s->imr = val; | 369 | s->imr = val; |
| 370 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) | ||
| 371 | if (imr_diff & (1 << irq)) | ||
| 372 | kvm_fire_mask_notifiers( | ||
| 373 | s->pics_state->kvm, | ||
| 374 | SELECT_PIC(irq + off), | ||
| 375 | irq + off, | ||
| 376 | !!(s->imr & (1 << irq))); | ||
| 361 | pic_update_irq(s->pics_state); | 377 | pic_update_irq(s->pics_state); |
| 362 | break; | 378 | break; |
| 379 | } | ||
| 363 | case 1: | 380 | case 1: |
| 364 | s->irq_base = val & 0xf8; | 381 | s->irq_base = val & 0xf8; |
| 365 | s->init_state = 2; | 382 | s->init_state = 2; |
| @@ -518,9 +535,8 @@ static int picdev_read(struct kvm_io_device *this, | |||
| 518 | /* | 535 | /* |
| 519 | * callback when PIC0 irq status changed | 536 | * callback when PIC0 irq status changed |
| 520 | */ | 537 | */ |
| 521 | static void pic_irq_request(void *opaque, int level) | 538 | static void pic_irq_request(struct kvm *kvm, int level) |
| 522 | { | 539 | { |
| 523 | struct kvm *kvm = opaque; | ||
| 524 | struct kvm_vcpu *vcpu = kvm->bsp_vcpu; | 540 | struct kvm_vcpu *vcpu = kvm->bsp_vcpu; |
| 525 | struct kvm_pic *s = pic_irqchip(kvm); | 541 | struct kvm_pic *s = pic_irqchip(kvm); |
| 526 | int irq = pic_get_irq(&s->pics[0]); | 542 | int irq = pic_get_irq(&s->pics[0]); |
| @@ -549,8 +565,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) | |||
| 549 | s->kvm = kvm; | 565 | s->kvm = kvm; |
| 550 | s->pics[0].elcr_mask = 0xf8; | 566 | s->pics[0].elcr_mask = 0xf8; |
| 551 | s->pics[1].elcr_mask = 0xde; | 567 | s->pics[1].elcr_mask = 0xde; |
| 552 | s->irq_request = pic_irq_request; | ||
| 553 | s->irq_request_opaque = kvm; | ||
| 554 | s->pics[0].pics_state = s; | 568 | s->pics[0].pics_state = s; |
| 555 | s->pics[1].pics_state = s; | 569 | s->pics[1].pics_state = s; |
| 556 | 570 | ||
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 96dfbb6ad2a9..2095a049835e 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * irq.c: API for in kernel interrupt controller | 2 | * irq.c: API for in kernel interrupt controller |
| 3 | * Copyright (c) 2007, Intel Corporation. | 3 | * Copyright (c) 2007, Intel Corporation. |
| 4 | * Copyright 2009 Red Hat, Inc. and/or its affilates. | ||
| 4 | * | 5 | * |
| 5 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
| 6 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
| @@ -89,7 +90,6 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt); | |||
| 89 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) | 90 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) |
| 90 | { | 91 | { |
| 91 | kvm_inject_apic_timer_irqs(vcpu); | 92 | kvm_inject_apic_timer_irqs(vcpu); |
| 92 | kvm_inject_pit_timer_irqs(vcpu); | ||
| 93 | /* TODO: PIT, RTC etc. */ | 93 | /* TODO: PIT, RTC etc. */ |
| 94 | } | 94 | } |
| 95 | EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); | 95 | EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); |
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index cd1f362f413d..ffed06871c5c 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
| @@ -38,8 +38,6 @@ | |||
| 38 | struct kvm; | 38 | struct kvm; |
| 39 | struct kvm_vcpu; | 39 | struct kvm_vcpu; |
| 40 | 40 | ||
| 41 | typedef void irq_request_func(void *opaque, int level); | ||
| 42 | |||
| 43 | struct kvm_kpic_state { | 41 | struct kvm_kpic_state { |
| 44 | u8 last_irr; /* edge detection */ | 42 | u8 last_irr; /* edge detection */ |
| 45 | u8 irr; /* interrupt request register */ | 43 | u8 irr; /* interrupt request register */ |
| @@ -67,8 +65,6 @@ struct kvm_pic { | |||
| 67 | unsigned pending_acks; | 65 | unsigned pending_acks; |
| 68 | struct kvm *kvm; | 66 | struct kvm *kvm; |
| 69 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ | 67 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ |
| 70 | irq_request_func *irq_request; | ||
| 71 | void *irq_request_opaque; | ||
| 72 | int output; /* intr from master PIC */ | 68 | int output; /* intr from master PIC */ |
| 73 | struct kvm_io_device dev; | 69 | struct kvm_io_device dev; |
| 74 | void (*ack_notifier)(void *opaque, int irq); | 70 | void (*ack_notifier)(void *opaque, int irq); |
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index cff851cf5322..6491ac8e755b 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h | |||
| @@ -36,6 +36,8 @@ static inline void kvm_rip_write(struct kvm_vcpu *vcpu, unsigned long val) | |||
| 36 | 36 | ||
| 37 | static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) | 37 | static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) |
| 38 | { | 38 | { |
| 39 | might_sleep(); /* on svm */ | ||
| 40 | |||
| 39 | if (!test_bit(VCPU_EXREG_PDPTR, | 41 | if (!test_bit(VCPU_EXREG_PDPTR, |
| 40 | (unsigned long *)&vcpu->arch.regs_avail)) | 42 | (unsigned long *)&vcpu->arch.regs_avail)) |
| 41 | kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR); | 43 | kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR); |
| @@ -69,4 +71,10 @@ static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu) | |||
| 69 | return kvm_read_cr4_bits(vcpu, ~0UL); | 71 | return kvm_read_cr4_bits(vcpu, ~0UL); |
| 70 | } | 72 | } |
| 71 | 73 | ||
| 74 | static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu) | ||
| 75 | { | ||
| 76 | return (kvm_register_read(vcpu, VCPU_REGS_RAX) & -1u) | ||
| 77 | | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32); | ||
| 78 | } | ||
| 79 | |||
| 72 | #endif | 80 | #endif |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 1eb7a4ae0c9c..77d8c0f4817d 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | * Copyright (C) 2006 Qumranet, Inc. | 5 | * Copyright (C) 2006 Qumranet, Inc. |
| 6 | * Copyright (C) 2007 Novell | 6 | * Copyright (C) 2007 Novell |
| 7 | * Copyright (C) 2007 Intel | 7 | * Copyright (C) 2007 Intel |
| 8 | * Copyright 2009 Red Hat, Inc. and/or its affilates. | ||
| 8 | * | 9 | * |
| 9 | * Authors: | 10 | * Authors: |
| 10 | * Dor Laor <dor.laor@qumranet.com> | 11 | * Dor Laor <dor.laor@qumranet.com> |
| @@ -328,7 +329,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | |||
| 328 | "dest_mode 0x%x, short_hand 0x%x\n", | 329 | "dest_mode 0x%x, short_hand 0x%x\n", |
| 329 | target, source, dest, dest_mode, short_hand); | 330 | target, source, dest, dest_mode, short_hand); |
| 330 | 331 | ||
| 331 | ASSERT(!target); | 332 | ASSERT(target); |
| 332 | switch (short_hand) { | 333 | switch (short_hand) { |
| 333 | case APIC_DEST_NOSHORT: | 334 | case APIC_DEST_NOSHORT: |
| 334 | if (dest_mode == 0) | 335 | if (dest_mode == 0) |
| @@ -533,7 +534,7 @@ static void __report_tpr_access(struct kvm_lapic *apic, bool write) | |||
| 533 | struct kvm_vcpu *vcpu = apic->vcpu; | 534 | struct kvm_vcpu *vcpu = apic->vcpu; |
| 534 | struct kvm_run *run = vcpu->run; | 535 | struct kvm_run *run = vcpu->run; |
| 535 | 536 | ||
| 536 | set_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests); | 537 | kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu); |
| 537 | run->tpr_access.rip = kvm_rip_read(vcpu); | 538 | run->tpr_access.rip = kvm_rip_read(vcpu); |
| 538 | run->tpr_access.is_write = write; | 539 | run->tpr_access.is_write = write; |
| 539 | } | 540 | } |
| @@ -1106,13 +1107,11 @@ int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) | |||
| 1106 | u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0); | 1107 | u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0); |
| 1107 | int r = 0; | 1108 | int r = 0; |
| 1108 | 1109 | ||
| 1109 | if (kvm_vcpu_is_bsp(vcpu)) { | 1110 | if (!apic_hw_enabled(vcpu->arch.apic)) |
| 1110 | if (!apic_hw_enabled(vcpu->arch.apic)) | 1111 | r = 1; |
| 1111 | r = 1; | 1112 | if ((lvt0 & APIC_LVT_MASKED) == 0 && |
| 1112 | if ((lvt0 & APIC_LVT_MASKED) == 0 && | 1113 | GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) |
| 1113 | GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) | 1114 | r = 1; |
| 1114 | r = 1; | ||
| 1115 | } | ||
| 1116 | return r; | 1115 | return r; |
| 1117 | } | 1116 | } |
| 1118 | 1117 | ||
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index b1ed0a1a5913..0dcc95e09876 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | * MMU support | 7 | * MMU support |
| 8 | * | 8 | * |
| 9 | * Copyright (C) 2006 Qumranet, Inc. | 9 | * Copyright (C) 2006 Qumranet, Inc. |
| 10 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
| 10 | * | 11 | * |
| 11 | * Authors: | 12 | * Authors: |
| 12 | * Yaniv Kamay <yaniv@qumranet.com> | 13 | * Yaniv Kamay <yaniv@qumranet.com> |
| @@ -32,6 +33,7 @@ | |||
| 32 | #include <linux/compiler.h> | 33 | #include <linux/compiler.h> |
| 33 | #include <linux/srcu.h> | 34 | #include <linux/srcu.h> |
| 34 | #include <linux/slab.h> | 35 | #include <linux/slab.h> |
| 36 | #include <linux/uaccess.h> | ||
| 35 | 37 | ||
| 36 | #include <asm/page.h> | 38 | #include <asm/page.h> |
| 37 | #include <asm/cmpxchg.h> | 39 | #include <asm/cmpxchg.h> |
| @@ -90,8 +92,6 @@ module_param(oos_shadow, bool, 0644); | |||
| 90 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 | 92 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 |
| 91 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 | 93 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 |
| 92 | 94 | ||
| 93 | #define VALID_PAGE(x) ((x) != INVALID_PAGE) | ||
| 94 | |||
| 95 | #define PT64_LEVEL_BITS 9 | 95 | #define PT64_LEVEL_BITS 9 |
| 96 | 96 | ||
| 97 | #define PT64_LEVEL_SHIFT(level) \ | 97 | #define PT64_LEVEL_SHIFT(level) \ |
| @@ -173,7 +173,7 @@ struct kvm_shadow_walk_iterator { | |||
| 173 | shadow_walk_okay(&(_walker)); \ | 173 | shadow_walk_okay(&(_walker)); \ |
| 174 | shadow_walk_next(&(_walker))) | 174 | shadow_walk_next(&(_walker))) |
| 175 | 175 | ||
| 176 | typedef int (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp); | 176 | typedef void (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp, u64 *spte); |
| 177 | 177 | ||
| 178 | static struct kmem_cache *pte_chain_cache; | 178 | static struct kmem_cache *pte_chain_cache; |
| 179 | static struct kmem_cache *rmap_desc_cache; | 179 | static struct kmem_cache *rmap_desc_cache; |
| @@ -288,6 +288,35 @@ static void __set_spte(u64 *sptep, u64 spte) | |||
| 288 | #endif | 288 | #endif |
| 289 | } | 289 | } |
| 290 | 290 | ||
| 291 | static u64 __xchg_spte(u64 *sptep, u64 new_spte) | ||
| 292 | { | ||
| 293 | #ifdef CONFIG_X86_64 | ||
| 294 | return xchg(sptep, new_spte); | ||
| 295 | #else | ||
| 296 | u64 old_spte; | ||
| 297 | |||
| 298 | do { | ||
| 299 | old_spte = *sptep; | ||
| 300 | } while (cmpxchg64(sptep, old_spte, new_spte) != old_spte); | ||
| 301 | |||
| 302 | return old_spte; | ||
| 303 | #endif | ||
| 304 | } | ||
| 305 | |||
| 306 | static void update_spte(u64 *sptep, u64 new_spte) | ||
| 307 | { | ||
| 308 | u64 old_spte; | ||
| 309 | |||
| 310 | if (!shadow_accessed_mask || (new_spte & shadow_accessed_mask) || | ||
| 311 | !is_rmap_spte(*sptep)) | ||
| 312 | __set_spte(sptep, new_spte); | ||
| 313 | else { | ||
| 314 | old_spte = __xchg_spte(sptep, new_spte); | ||
| 315 | if (old_spte & shadow_accessed_mask) | ||
| 316 | mark_page_accessed(pfn_to_page(spte_to_pfn(old_spte))); | ||
| 317 | } | ||
| 318 | } | ||
| 319 | |||
| 291 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | 320 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, |
| 292 | struct kmem_cache *base_cache, int min) | 321 | struct kmem_cache *base_cache, int min) |
| 293 | { | 322 | { |
| @@ -304,10 +333,11 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | |||
| 304 | return 0; | 333 | return 0; |
| 305 | } | 334 | } |
| 306 | 335 | ||
| 307 | static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) | 336 | static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc, |
| 337 | struct kmem_cache *cache) | ||
| 308 | { | 338 | { |
| 309 | while (mc->nobjs) | 339 | while (mc->nobjs) |
| 310 | kfree(mc->objects[--mc->nobjs]); | 340 | kmem_cache_free(cache, mc->objects[--mc->nobjs]); |
| 311 | } | 341 | } |
| 312 | 342 | ||
| 313 | static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, | 343 | static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, |
| @@ -355,10 +385,11 @@ out: | |||
| 355 | 385 | ||
| 356 | static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) | 386 | static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) |
| 357 | { | 387 | { |
| 358 | mmu_free_memory_cache(&vcpu->arch.mmu_pte_chain_cache); | 388 | mmu_free_memory_cache(&vcpu->arch.mmu_pte_chain_cache, pte_chain_cache); |
| 359 | mmu_free_memory_cache(&vcpu->arch.mmu_rmap_desc_cache); | 389 | mmu_free_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, rmap_desc_cache); |
| 360 | mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache); | 390 | mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache); |
| 361 | mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache); | 391 | mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache, |
| 392 | mmu_page_header_cache); | ||
| 362 | } | 393 | } |
| 363 | 394 | ||
| 364 | static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, | 395 | static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, |
| @@ -379,7 +410,7 @@ static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu) | |||
| 379 | 410 | ||
| 380 | static void mmu_free_pte_chain(struct kvm_pte_chain *pc) | 411 | static void mmu_free_pte_chain(struct kvm_pte_chain *pc) |
| 381 | { | 412 | { |
| 382 | kfree(pc); | 413 | kmem_cache_free(pte_chain_cache, pc); |
| 383 | } | 414 | } |
| 384 | 415 | ||
| 385 | static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) | 416 | static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) |
| @@ -390,7 +421,23 @@ static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) | |||
| 390 | 421 | ||
| 391 | static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd) | 422 | static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd) |
| 392 | { | 423 | { |
| 393 | kfree(rd); | 424 | kmem_cache_free(rmap_desc_cache, rd); |
| 425 | } | ||
| 426 | |||
| 427 | static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) | ||
| 428 | { | ||
| 429 | if (!sp->role.direct) | ||
| 430 | return sp->gfns[index]; | ||
| 431 | |||
| 432 | return sp->gfn + (index << ((sp->role.level - 1) * PT64_LEVEL_BITS)); | ||
| 433 | } | ||
| 434 | |||
| 435 | static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn) | ||
| 436 | { | ||
| 437 | if (sp->role.direct) | ||
| 438 | BUG_ON(gfn != kvm_mmu_page_get_gfn(sp, index)); | ||
| 439 | else | ||
| 440 | sp->gfns[index] = gfn; | ||
| 394 | } | 441 | } |
| 395 | 442 | ||
| 396 | /* | 443 | /* |
| @@ -403,8 +450,8 @@ static int *slot_largepage_idx(gfn_t gfn, | |||
| 403 | { | 450 | { |
| 404 | unsigned long idx; | 451 | unsigned long idx; |
| 405 | 452 | ||
| 406 | idx = (gfn / KVM_PAGES_PER_HPAGE(level)) - | 453 | idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - |
| 407 | (slot->base_gfn / KVM_PAGES_PER_HPAGE(level)); | 454 | (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); |
| 408 | return &slot->lpage_info[level - 2][idx].write_count; | 455 | return &slot->lpage_info[level - 2][idx].write_count; |
| 409 | } | 456 | } |
| 410 | 457 | ||
| @@ -414,9 +461,7 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn) | |||
| 414 | int *write_count; | 461 | int *write_count; |
| 415 | int i; | 462 | int i; |
| 416 | 463 | ||
| 417 | gfn = unalias_gfn(kvm, gfn); | 464 | slot = gfn_to_memslot(kvm, gfn); |
| 418 | |||
| 419 | slot = gfn_to_memslot_unaliased(kvm, gfn); | ||
| 420 | for (i = PT_DIRECTORY_LEVEL; | 465 | for (i = PT_DIRECTORY_LEVEL; |
| 421 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 466 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
| 422 | write_count = slot_largepage_idx(gfn, slot, i); | 467 | write_count = slot_largepage_idx(gfn, slot, i); |
| @@ -430,8 +475,7 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) | |||
| 430 | int *write_count; | 475 | int *write_count; |
| 431 | int i; | 476 | int i; |
| 432 | 477 | ||
| 433 | gfn = unalias_gfn(kvm, gfn); | 478 | slot = gfn_to_memslot(kvm, gfn); |
| 434 | slot = gfn_to_memslot_unaliased(kvm, gfn); | ||
| 435 | for (i = PT_DIRECTORY_LEVEL; | 479 | for (i = PT_DIRECTORY_LEVEL; |
| 436 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 480 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
| 437 | write_count = slot_largepage_idx(gfn, slot, i); | 481 | write_count = slot_largepage_idx(gfn, slot, i); |
| @@ -447,8 +491,7 @@ static int has_wrprotected_page(struct kvm *kvm, | |||
| 447 | struct kvm_memory_slot *slot; | 491 | struct kvm_memory_slot *slot; |
| 448 | int *largepage_idx; | 492 | int *largepage_idx; |
| 449 | 493 | ||
| 450 | gfn = unalias_gfn(kvm, gfn); | 494 | slot = gfn_to_memslot(kvm, gfn); |
| 451 | slot = gfn_to_memslot_unaliased(kvm, gfn); | ||
| 452 | if (slot) { | 495 | if (slot) { |
| 453 | largepage_idx = slot_largepage_idx(gfn, slot, level); | 496 | largepage_idx = slot_largepage_idx(gfn, slot, level); |
| 454 | return *largepage_idx; | 497 | return *largepage_idx; |
| @@ -501,7 +544,6 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | |||
| 501 | 544 | ||
| 502 | /* | 545 | /* |
| 503 | * Take gfn and return the reverse mapping to it. | 546 | * Take gfn and return the reverse mapping to it. |
| 504 | * Note: gfn must be unaliased before this function get called | ||
| 505 | */ | 547 | */ |
| 506 | 548 | ||
| 507 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) | 549 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) |
| @@ -513,8 +555,8 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) | |||
| 513 | if (likely(level == PT_PAGE_TABLE_LEVEL)) | 555 | if (likely(level == PT_PAGE_TABLE_LEVEL)) |
| 514 | return &slot->rmap[gfn - slot->base_gfn]; | 556 | return &slot->rmap[gfn - slot->base_gfn]; |
| 515 | 557 | ||
| 516 | idx = (gfn / KVM_PAGES_PER_HPAGE(level)) - | 558 | idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - |
| 517 | (slot->base_gfn / KVM_PAGES_PER_HPAGE(level)); | 559 | (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); |
| 518 | 560 | ||
| 519 | return &slot->lpage_info[level - 2][idx].rmap_pde; | 561 | return &slot->lpage_info[level - 2][idx].rmap_pde; |
| 520 | } | 562 | } |
| @@ -541,9 +583,8 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
| 541 | 583 | ||
| 542 | if (!is_rmap_spte(*spte)) | 584 | if (!is_rmap_spte(*spte)) |
| 543 | return count; | 585 | return count; |
| 544 | gfn = unalias_gfn(vcpu->kvm, gfn); | ||
| 545 | sp = page_header(__pa(spte)); | 586 | sp = page_header(__pa(spte)); |
| 546 | sp->gfns[spte - sp->spt] = gfn; | 587 | kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn); |
| 547 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); | 588 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); |
| 548 | if (!*rmapp) { | 589 | if (!*rmapp) { |
| 549 | rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); | 590 | rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); |
| @@ -600,19 +641,13 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
| 600 | struct kvm_rmap_desc *desc; | 641 | struct kvm_rmap_desc *desc; |
| 601 | struct kvm_rmap_desc *prev_desc; | 642 | struct kvm_rmap_desc *prev_desc; |
| 602 | struct kvm_mmu_page *sp; | 643 | struct kvm_mmu_page *sp; |
| 603 | pfn_t pfn; | 644 | gfn_t gfn; |
| 604 | unsigned long *rmapp; | 645 | unsigned long *rmapp; |
| 605 | int i; | 646 | int i; |
| 606 | 647 | ||
| 607 | if (!is_rmap_spte(*spte)) | ||
| 608 | return; | ||
| 609 | sp = page_header(__pa(spte)); | 648 | sp = page_header(__pa(spte)); |
| 610 | pfn = spte_to_pfn(*spte); | 649 | gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt); |
| 611 | if (*spte & shadow_accessed_mask) | 650 | rmapp = gfn_to_rmap(kvm, gfn, sp->role.level); |
| 612 | kvm_set_pfn_accessed(pfn); | ||
| 613 | if (is_writable_pte(*spte)) | ||
| 614 | kvm_set_pfn_dirty(pfn); | ||
| 615 | rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); | ||
| 616 | if (!*rmapp) { | 651 | if (!*rmapp) { |
| 617 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); | 652 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); |
| 618 | BUG(); | 653 | BUG(); |
| @@ -644,6 +679,32 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
| 644 | } | 679 | } |
| 645 | } | 680 | } |
| 646 | 681 | ||
| 682 | static void set_spte_track_bits(u64 *sptep, u64 new_spte) | ||
| 683 | { | ||
| 684 | pfn_t pfn; | ||
| 685 | u64 old_spte = *sptep; | ||
| 686 | |||
| 687 | if (!shadow_accessed_mask || !is_shadow_present_pte(old_spte) || | ||
| 688 | old_spte & shadow_accessed_mask) { | ||
| 689 | __set_spte(sptep, new_spte); | ||
| 690 | } else | ||
| 691 | old_spte = __xchg_spte(sptep, new_spte); | ||
| 692 | |||
| 693 | if (!is_rmap_spte(old_spte)) | ||
| 694 | return; | ||
| 695 | pfn = spte_to_pfn(old_spte); | ||
| 696 | if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) | ||
| 697 | kvm_set_pfn_accessed(pfn); | ||
| 698 | if (is_writable_pte(old_spte)) | ||
| 699 | kvm_set_pfn_dirty(pfn); | ||
| 700 | } | ||
| 701 | |||
| 702 | static void drop_spte(struct kvm *kvm, u64 *sptep, u64 new_spte) | ||
| 703 | { | ||
| 704 | set_spte_track_bits(sptep, new_spte); | ||
| 705 | rmap_remove(kvm, sptep); | ||
| 706 | } | ||
| 707 | |||
| 647 | static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) | 708 | static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) |
| 648 | { | 709 | { |
| 649 | struct kvm_rmap_desc *desc; | 710 | struct kvm_rmap_desc *desc; |
| @@ -676,7 +737,6 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
| 676 | u64 *spte; | 737 | u64 *spte; |
| 677 | int i, write_protected = 0; | 738 | int i, write_protected = 0; |
| 678 | 739 | ||
| 679 | gfn = unalias_gfn(kvm, gfn); | ||
| 680 | rmapp = gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL); | 740 | rmapp = gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL); |
| 681 | 741 | ||
| 682 | spte = rmap_next(kvm, rmapp, NULL); | 742 | spte = rmap_next(kvm, rmapp, NULL); |
| @@ -685,7 +745,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
| 685 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 745 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
| 686 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); | 746 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); |
| 687 | if (is_writable_pte(*spte)) { | 747 | if (is_writable_pte(*spte)) { |
| 688 | __set_spte(spte, *spte & ~PT_WRITABLE_MASK); | 748 | update_spte(spte, *spte & ~PT_WRITABLE_MASK); |
| 689 | write_protected = 1; | 749 | write_protected = 1; |
| 690 | } | 750 | } |
| 691 | spte = rmap_next(kvm, rmapp, spte); | 751 | spte = rmap_next(kvm, rmapp, spte); |
| @@ -709,9 +769,9 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
| 709 | BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); | 769 | BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); |
| 710 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); | 770 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); |
| 711 | if (is_writable_pte(*spte)) { | 771 | if (is_writable_pte(*spte)) { |
| 712 | rmap_remove(kvm, spte); | 772 | drop_spte(kvm, spte, |
| 773 | shadow_trap_nonpresent_pte); | ||
| 713 | --kvm->stat.lpages; | 774 | --kvm->stat.lpages; |
| 714 | __set_spte(spte, shadow_trap_nonpresent_pte); | ||
| 715 | spte = NULL; | 775 | spte = NULL; |
| 716 | write_protected = 1; | 776 | write_protected = 1; |
| 717 | } | 777 | } |
| @@ -731,8 +791,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
| 731 | while ((spte = rmap_next(kvm, rmapp, NULL))) { | 791 | while ((spte = rmap_next(kvm, rmapp, NULL))) { |
| 732 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 792 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
| 733 | rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); | 793 | rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); |
| 734 | rmap_remove(kvm, spte); | 794 | drop_spte(kvm, spte, shadow_trap_nonpresent_pte); |
| 735 | __set_spte(spte, shadow_trap_nonpresent_pte); | ||
| 736 | need_tlb_flush = 1; | 795 | need_tlb_flush = 1; |
| 737 | } | 796 | } |
| 738 | return need_tlb_flush; | 797 | return need_tlb_flush; |
| @@ -754,8 +813,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
| 754 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); | 813 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); |
| 755 | need_flush = 1; | 814 | need_flush = 1; |
| 756 | if (pte_write(*ptep)) { | 815 | if (pte_write(*ptep)) { |
| 757 | rmap_remove(kvm, spte); | 816 | drop_spte(kvm, spte, shadow_trap_nonpresent_pte); |
| 758 | __set_spte(spte, shadow_trap_nonpresent_pte); | ||
| 759 | spte = rmap_next(kvm, rmapp, NULL); | 817 | spte = rmap_next(kvm, rmapp, NULL); |
| 760 | } else { | 818 | } else { |
| 761 | new_spte = *spte &~ (PT64_BASE_ADDR_MASK); | 819 | new_spte = *spte &~ (PT64_BASE_ADDR_MASK); |
| @@ -763,9 +821,8 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
| 763 | 821 | ||
| 764 | new_spte &= ~PT_WRITABLE_MASK; | 822 | new_spte &= ~PT_WRITABLE_MASK; |
| 765 | new_spte &= ~SPTE_HOST_WRITEABLE; | 823 | new_spte &= ~SPTE_HOST_WRITEABLE; |
| 766 | if (is_writable_pte(*spte)) | 824 | new_spte &= ~shadow_accessed_mask; |
| 767 | kvm_set_pfn_dirty(spte_to_pfn(*spte)); | 825 | set_spte_track_bits(spte, new_spte); |
| 768 | __set_spte(spte, new_spte); | ||
| 769 | spte = rmap_next(kvm, rmapp, spte); | 826 | spte = rmap_next(kvm, rmapp, spte); |
| 770 | } | 827 | } |
| 771 | } | 828 | } |
| @@ -799,8 +856,12 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
| 799 | ret = handler(kvm, &memslot->rmap[gfn_offset], data); | 856 | ret = handler(kvm, &memslot->rmap[gfn_offset], data); |
| 800 | 857 | ||
| 801 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { | 858 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { |
| 802 | int idx = gfn_offset; | 859 | unsigned long idx; |
| 803 | idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); | 860 | int sh; |
| 861 | |||
| 862 | sh = KVM_HPAGE_GFN_SHIFT(PT_DIRECTORY_LEVEL+j); | ||
| 863 | idx = ((memslot->base_gfn+gfn_offset) >> sh) - | ||
| 864 | (memslot->base_gfn >> sh); | ||
| 804 | ret |= handler(kvm, | 865 | ret |= handler(kvm, |
| 805 | &memslot->lpage_info[j][idx].rmap_pde, | 866 | &memslot->lpage_info[j][idx].rmap_pde, |
| 806 | data); | 867 | data); |
| @@ -863,7 +924,6 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
| 863 | 924 | ||
| 864 | sp = page_header(__pa(spte)); | 925 | sp = page_header(__pa(spte)); |
| 865 | 926 | ||
| 866 | gfn = unalias_gfn(vcpu->kvm, gfn); | ||
| 867 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); | 927 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); |
| 868 | 928 | ||
| 869 | kvm_unmap_rmapp(vcpu->kvm, rmapp, 0); | 929 | kvm_unmap_rmapp(vcpu->kvm, rmapp, 0); |
| @@ -894,10 +954,12 @@ static int is_empty_shadow_page(u64 *spt) | |||
| 894 | static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 954 | static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp) |
| 895 | { | 955 | { |
| 896 | ASSERT(is_empty_shadow_page(sp->spt)); | 956 | ASSERT(is_empty_shadow_page(sp->spt)); |
| 957 | hlist_del(&sp->hash_link); | ||
| 897 | list_del(&sp->link); | 958 | list_del(&sp->link); |
| 898 | __free_page(virt_to_page(sp->spt)); | 959 | __free_page(virt_to_page(sp->spt)); |
| 899 | __free_page(virt_to_page(sp->gfns)); | 960 | if (!sp->role.direct) |
| 900 | kfree(sp); | 961 | __free_page(virt_to_page(sp->gfns)); |
| 962 | kmem_cache_free(mmu_page_header_cache, sp); | ||
| 901 | ++kvm->arch.n_free_mmu_pages; | 963 | ++kvm->arch.n_free_mmu_pages; |
| 902 | } | 964 | } |
| 903 | 965 | ||
| @@ -907,13 +969,15 @@ static unsigned kvm_page_table_hashfn(gfn_t gfn) | |||
| 907 | } | 969 | } |
| 908 | 970 | ||
| 909 | static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | 971 | static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, |
| 910 | u64 *parent_pte) | 972 | u64 *parent_pte, int direct) |
| 911 | { | 973 | { |
| 912 | struct kvm_mmu_page *sp; | 974 | struct kvm_mmu_page *sp; |
| 913 | 975 | ||
| 914 | sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp); | 976 | sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp); |
| 915 | sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); | 977 | sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); |
| 916 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); | 978 | if (!direct) |
| 979 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, | ||
| 980 | PAGE_SIZE); | ||
| 917 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); | 981 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); |
| 918 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); | 982 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); |
| 919 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | 983 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); |
| @@ -998,7 +1062,6 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp, | |||
| 998 | BUG(); | 1062 | BUG(); |
| 999 | } | 1063 | } |
| 1000 | 1064 | ||
| 1001 | |||
| 1002 | static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn) | 1065 | static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn) |
| 1003 | { | 1066 | { |
| 1004 | struct kvm_pte_chain *pte_chain; | 1067 | struct kvm_pte_chain *pte_chain; |
| @@ -1008,63 +1071,37 @@ static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn) | |||
| 1008 | 1071 | ||
| 1009 | if (!sp->multimapped && sp->parent_pte) { | 1072 | if (!sp->multimapped && sp->parent_pte) { |
| 1010 | parent_sp = page_header(__pa(sp->parent_pte)); | 1073 | parent_sp = page_header(__pa(sp->parent_pte)); |
| 1011 | fn(parent_sp); | 1074 | fn(parent_sp, sp->parent_pte); |
| 1012 | mmu_parent_walk(parent_sp, fn); | ||
| 1013 | return; | 1075 | return; |
| 1014 | } | 1076 | } |
| 1077 | |||
| 1015 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) | 1078 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) |
| 1016 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { | 1079 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { |
| 1017 | if (!pte_chain->parent_ptes[i]) | 1080 | u64 *spte = pte_chain->parent_ptes[i]; |
| 1081 | |||
| 1082 | if (!spte) | ||
| 1018 | break; | 1083 | break; |
| 1019 | parent_sp = page_header(__pa(pte_chain->parent_ptes[i])); | 1084 | parent_sp = page_header(__pa(spte)); |
| 1020 | fn(parent_sp); | 1085 | fn(parent_sp, spte); |
| 1021 | mmu_parent_walk(parent_sp, fn); | ||
| 1022 | } | 1086 | } |
| 1023 | } | 1087 | } |
| 1024 | 1088 | ||
| 1025 | static void kvm_mmu_update_unsync_bitmap(u64 *spte) | 1089 | static void mark_unsync(struct kvm_mmu_page *sp, u64 *spte); |
| 1090 | static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) | ||
| 1026 | { | 1091 | { |
| 1027 | unsigned int index; | 1092 | mmu_parent_walk(sp, mark_unsync); |
| 1028 | struct kvm_mmu_page *sp = page_header(__pa(spte)); | ||
| 1029 | |||
| 1030 | index = spte - sp->spt; | ||
| 1031 | if (!__test_and_set_bit(index, sp->unsync_child_bitmap)) | ||
| 1032 | sp->unsync_children++; | ||
| 1033 | WARN_ON(!sp->unsync_children); | ||
| 1034 | } | 1093 | } |
| 1035 | 1094 | ||
| 1036 | static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) | 1095 | static void mark_unsync(struct kvm_mmu_page *sp, u64 *spte) |
| 1037 | { | 1096 | { |
| 1038 | struct kvm_pte_chain *pte_chain; | 1097 | unsigned int index; |
| 1039 | struct hlist_node *node; | ||
| 1040 | int i; | ||
| 1041 | 1098 | ||
| 1042 | if (!sp->parent_pte) | 1099 | index = spte - sp->spt; |
| 1100 | if (__test_and_set_bit(index, sp->unsync_child_bitmap)) | ||
| 1043 | return; | 1101 | return; |
| 1044 | 1102 | if (sp->unsync_children++) | |
| 1045 | if (!sp->multimapped) { | ||
| 1046 | kvm_mmu_update_unsync_bitmap(sp->parent_pte); | ||
| 1047 | return; | 1103 | return; |
| 1048 | } | 1104 | kvm_mmu_mark_parents_unsync(sp); |
| 1049 | |||
| 1050 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) | ||
| 1051 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { | ||
| 1052 | if (!pte_chain->parent_ptes[i]) | ||
| 1053 | break; | ||
| 1054 | kvm_mmu_update_unsync_bitmap(pte_chain->parent_ptes[i]); | ||
| 1055 | } | ||
| 1056 | } | ||
| 1057 | |||
| 1058 | static int unsync_walk_fn(struct kvm_mmu_page *sp) | ||
| 1059 | { | ||
| 1060 | kvm_mmu_update_parents_unsync(sp); | ||
| 1061 | return 1; | ||
| 1062 | } | ||
| 1063 | |||
| 1064 | static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) | ||
| 1065 | { | ||
| 1066 | mmu_parent_walk(sp, unsync_walk_fn); | ||
| 1067 | kvm_mmu_update_parents_unsync(sp); | ||
| 1068 | } | 1105 | } |
| 1069 | 1106 | ||
| 1070 | static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, | 1107 | static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, |
| @@ -1077,7 +1114,7 @@ static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, | |||
| 1077 | } | 1114 | } |
| 1078 | 1115 | ||
| 1079 | static int nonpaging_sync_page(struct kvm_vcpu *vcpu, | 1116 | static int nonpaging_sync_page(struct kvm_vcpu *vcpu, |
| 1080 | struct kvm_mmu_page *sp) | 1117 | struct kvm_mmu_page *sp, bool clear_unsync) |
| 1081 | { | 1118 | { |
| 1082 | return 1; | 1119 | return 1; |
| 1083 | } | 1120 | } |
| @@ -1123,35 +1160,40 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp, | |||
| 1123 | int i, ret, nr_unsync_leaf = 0; | 1160 | int i, ret, nr_unsync_leaf = 0; |
| 1124 | 1161 | ||
| 1125 | for_each_unsync_children(sp->unsync_child_bitmap, i) { | 1162 | for_each_unsync_children(sp->unsync_child_bitmap, i) { |
| 1163 | struct kvm_mmu_page *child; | ||
| 1126 | u64 ent = sp->spt[i]; | 1164 | u64 ent = sp->spt[i]; |
| 1127 | 1165 | ||
| 1128 | if (is_shadow_present_pte(ent) && !is_large_pte(ent)) { | 1166 | if (!is_shadow_present_pte(ent) || is_large_pte(ent)) |
| 1129 | struct kvm_mmu_page *child; | 1167 | goto clear_child_bitmap; |
| 1130 | child = page_header(ent & PT64_BASE_ADDR_MASK); | 1168 | |
| 1131 | 1169 | child = page_header(ent & PT64_BASE_ADDR_MASK); | |
| 1132 | if (child->unsync_children) { | 1170 | |
| 1133 | if (mmu_pages_add(pvec, child, i)) | 1171 | if (child->unsync_children) { |
| 1134 | return -ENOSPC; | 1172 | if (mmu_pages_add(pvec, child, i)) |
| 1135 | 1173 | return -ENOSPC; | |
| 1136 | ret = __mmu_unsync_walk(child, pvec); | 1174 | |
| 1137 | if (!ret) | 1175 | ret = __mmu_unsync_walk(child, pvec); |
| 1138 | __clear_bit(i, sp->unsync_child_bitmap); | 1176 | if (!ret) |
| 1139 | else if (ret > 0) | 1177 | goto clear_child_bitmap; |
| 1140 | nr_unsync_leaf += ret; | 1178 | else if (ret > 0) |
| 1141 | else | 1179 | nr_unsync_leaf += ret; |
| 1142 | return ret; | 1180 | else |
| 1143 | } | 1181 | return ret; |
| 1182 | } else if (child->unsync) { | ||
| 1183 | nr_unsync_leaf++; | ||
| 1184 | if (mmu_pages_add(pvec, child, i)) | ||
| 1185 | return -ENOSPC; | ||
| 1186 | } else | ||
| 1187 | goto clear_child_bitmap; | ||
| 1144 | 1188 | ||
| 1145 | if (child->unsync) { | 1189 | continue; |
| 1146 | nr_unsync_leaf++; | 1190 | |
| 1147 | if (mmu_pages_add(pvec, child, i)) | 1191 | clear_child_bitmap: |
| 1148 | return -ENOSPC; | 1192 | __clear_bit(i, sp->unsync_child_bitmap); |
| 1149 | } | 1193 | sp->unsync_children--; |
| 1150 | } | 1194 | WARN_ON((int)sp->unsync_children < 0); |
| 1151 | } | 1195 | } |
| 1152 | 1196 | ||
| 1153 | if (find_first_bit(sp->unsync_child_bitmap, 512) == 512) | ||
| 1154 | sp->unsync_children = 0; | ||
| 1155 | 1197 | ||
| 1156 | return nr_unsync_leaf; | 1198 | return nr_unsync_leaf; |
| 1157 | } | 1199 | } |
| @@ -1166,26 +1208,6 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp, | |||
| 1166 | return __mmu_unsync_walk(sp, pvec); | 1208 | return __mmu_unsync_walk(sp, pvec); |
| 1167 | } | 1209 | } |
| 1168 | 1210 | ||
| 1169 | static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) | ||
| 1170 | { | ||
| 1171 | unsigned index; | ||
| 1172 | struct hlist_head *bucket; | ||
| 1173 | struct kvm_mmu_page *sp; | ||
| 1174 | struct hlist_node *node; | ||
| 1175 | |||
| 1176 | pgprintk("%s: looking for gfn %lx\n", __func__, gfn); | ||
| 1177 | index = kvm_page_table_hashfn(gfn); | ||
| 1178 | bucket = &kvm->arch.mmu_page_hash[index]; | ||
| 1179 | hlist_for_each_entry(sp, node, bucket, hash_link) | ||
| 1180 | if (sp->gfn == gfn && !sp->role.direct | ||
| 1181 | && !sp->role.invalid) { | ||
| 1182 | pgprintk("%s: found role %x\n", | ||
| 1183 | __func__, sp->role.word); | ||
| 1184 | return sp; | ||
| 1185 | } | ||
| 1186 | return NULL; | ||
| 1187 | } | ||
| 1188 | |||
| 1189 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 1211 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) |
| 1190 | { | 1212 | { |
| 1191 | WARN_ON(!sp->unsync); | 1213 | WARN_ON(!sp->unsync); |
| @@ -1194,20 +1216,36 @@ static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
| 1194 | --kvm->stat.mmu_unsync; | 1216 | --kvm->stat.mmu_unsync; |
| 1195 | } | 1217 | } |
| 1196 | 1218 | ||
| 1197 | static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp); | 1219 | static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, |
| 1220 | struct list_head *invalid_list); | ||
| 1221 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, | ||
| 1222 | struct list_head *invalid_list); | ||
| 1223 | |||
| 1224 | #define for_each_gfn_sp(kvm, sp, gfn, pos) \ | ||
| 1225 | hlist_for_each_entry(sp, pos, \ | ||
| 1226 | &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ | ||
| 1227 | if ((sp)->gfn != (gfn)) {} else | ||
| 1228 | |||
| 1229 | #define for_each_gfn_indirect_valid_sp(kvm, sp, gfn, pos) \ | ||
| 1230 | hlist_for_each_entry(sp, pos, \ | ||
| 1231 | &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ | ||
| 1232 | if ((sp)->gfn != (gfn) || (sp)->role.direct || \ | ||
| 1233 | (sp)->role.invalid) {} else | ||
| 1198 | 1234 | ||
| 1199 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 1235 | /* @sp->gfn should be write-protected at the call site */ |
| 1236 | static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | ||
| 1237 | struct list_head *invalid_list, bool clear_unsync) | ||
| 1200 | { | 1238 | { |
| 1201 | if (sp->role.cr4_pae != !!is_pae(vcpu)) { | 1239 | if (sp->role.cr4_pae != !!is_pae(vcpu)) { |
| 1202 | kvm_mmu_zap_page(vcpu->kvm, sp); | 1240 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); |
| 1203 | return 1; | 1241 | return 1; |
| 1204 | } | 1242 | } |
| 1205 | 1243 | ||
| 1206 | if (rmap_write_protect(vcpu->kvm, sp->gfn)) | 1244 | if (clear_unsync) |
| 1207 | kvm_flush_remote_tlbs(vcpu->kvm); | 1245 | kvm_unlink_unsync_page(vcpu->kvm, sp); |
| 1208 | kvm_unlink_unsync_page(vcpu->kvm, sp); | 1246 | |
| 1209 | if (vcpu->arch.mmu.sync_page(vcpu, sp)) { | 1247 | if (vcpu->arch.mmu.sync_page(vcpu, sp, clear_unsync)) { |
| 1210 | kvm_mmu_zap_page(vcpu->kvm, sp); | 1248 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); |
| 1211 | return 1; | 1249 | return 1; |
| 1212 | } | 1250 | } |
| 1213 | 1251 | ||
| @@ -1215,6 +1253,52 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
| 1215 | return 0; | 1253 | return 0; |
| 1216 | } | 1254 | } |
| 1217 | 1255 | ||
| 1256 | static int kvm_sync_page_transient(struct kvm_vcpu *vcpu, | ||
| 1257 | struct kvm_mmu_page *sp) | ||
| 1258 | { | ||
| 1259 | LIST_HEAD(invalid_list); | ||
| 1260 | int ret; | ||
| 1261 | |||
| 1262 | ret = __kvm_sync_page(vcpu, sp, &invalid_list, false); | ||
| 1263 | if (ret) | ||
| 1264 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
| 1265 | |||
| 1266 | return ret; | ||
| 1267 | } | ||
| 1268 | |||
| 1269 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | ||
| 1270 | struct list_head *invalid_list) | ||
| 1271 | { | ||
| 1272 | return __kvm_sync_page(vcpu, sp, invalid_list, true); | ||
| 1273 | } | ||
| 1274 | |||
| 1275 | /* @gfn should be write-protected at the call site */ | ||
| 1276 | static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
| 1277 | { | ||
| 1278 | struct kvm_mmu_page *s; | ||
| 1279 | struct hlist_node *node; | ||
| 1280 | LIST_HEAD(invalid_list); | ||
| 1281 | bool flush = false; | ||
| 1282 | |||
| 1283 | for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { | ||
| 1284 | if (!s->unsync) | ||
| 1285 | continue; | ||
| 1286 | |||
| 1287 | WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); | ||
| 1288 | if ((s->role.cr4_pae != !!is_pae(vcpu)) || | ||
| 1289 | (vcpu->arch.mmu.sync_page(vcpu, s, true))) { | ||
| 1290 | kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list); | ||
| 1291 | continue; | ||
| 1292 | } | ||
| 1293 | kvm_unlink_unsync_page(vcpu->kvm, s); | ||
| 1294 | flush = true; | ||
| 1295 | } | ||
| 1296 | |||
| 1297 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
| 1298 | if (flush) | ||
| 1299 | kvm_mmu_flush_tlb(vcpu); | ||
| 1300 | } | ||
| 1301 | |||
| 1218 | struct mmu_page_path { | 1302 | struct mmu_page_path { |
| 1219 | struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1]; | 1303 | struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1]; |
| 1220 | unsigned int idx[PT64_ROOT_LEVEL-1]; | 1304 | unsigned int idx[PT64_ROOT_LEVEL-1]; |
| @@ -1281,6 +1365,7 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, | |||
| 1281 | struct kvm_mmu_page *sp; | 1365 | struct kvm_mmu_page *sp; |
| 1282 | struct mmu_page_path parents; | 1366 | struct mmu_page_path parents; |
| 1283 | struct kvm_mmu_pages pages; | 1367 | struct kvm_mmu_pages pages; |
| 1368 | LIST_HEAD(invalid_list); | ||
| 1284 | 1369 | ||
| 1285 | kvm_mmu_pages_init(parent, &parents, &pages); | 1370 | kvm_mmu_pages_init(parent, &parents, &pages); |
| 1286 | while (mmu_unsync_walk(parent, &pages)) { | 1371 | while (mmu_unsync_walk(parent, &pages)) { |
| @@ -1293,9 +1378,10 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, | |||
| 1293 | kvm_flush_remote_tlbs(vcpu->kvm); | 1378 | kvm_flush_remote_tlbs(vcpu->kvm); |
| 1294 | 1379 | ||
| 1295 | for_each_sp(pages, sp, parents, i) { | 1380 | for_each_sp(pages, sp, parents, i) { |
| 1296 | kvm_sync_page(vcpu, sp); | 1381 | kvm_sync_page(vcpu, sp, &invalid_list); |
| 1297 | mmu_pages_clear_parents(&parents); | 1382 | mmu_pages_clear_parents(&parents); |
| 1298 | } | 1383 | } |
| 1384 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
| 1299 | cond_resched_lock(&vcpu->kvm->mmu_lock); | 1385 | cond_resched_lock(&vcpu->kvm->mmu_lock); |
| 1300 | kvm_mmu_pages_init(parent, &parents, &pages); | 1386 | kvm_mmu_pages_init(parent, &parents, &pages); |
| 1301 | } | 1387 | } |
| @@ -1310,11 +1396,10 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
| 1310 | u64 *parent_pte) | 1396 | u64 *parent_pte) |
| 1311 | { | 1397 | { |
| 1312 | union kvm_mmu_page_role role; | 1398 | union kvm_mmu_page_role role; |
| 1313 | unsigned index; | ||
| 1314 | unsigned quadrant; | 1399 | unsigned quadrant; |
| 1315 | struct hlist_head *bucket; | ||
| 1316 | struct kvm_mmu_page *sp; | 1400 | struct kvm_mmu_page *sp; |
| 1317 | struct hlist_node *node, *tmp; | 1401 | struct hlist_node *node; |
| 1402 | bool need_sync = false; | ||
| 1318 | 1403 | ||
| 1319 | role = vcpu->arch.mmu.base_role; | 1404 | role = vcpu->arch.mmu.base_role; |
| 1320 | role.level = level; | 1405 | role.level = level; |
| @@ -1322,40 +1407,45 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
| 1322 | if (role.direct) | 1407 | if (role.direct) |
| 1323 | role.cr4_pae = 0; | 1408 | role.cr4_pae = 0; |
| 1324 | role.access = access; | 1409 | role.access = access; |
| 1325 | if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { | 1410 | if (!tdp_enabled && vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { |
| 1326 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); | 1411 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); |
| 1327 | quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; | 1412 | quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; |
| 1328 | role.quadrant = quadrant; | 1413 | role.quadrant = quadrant; |
| 1329 | } | 1414 | } |
| 1330 | index = kvm_page_table_hashfn(gfn); | 1415 | for_each_gfn_sp(vcpu->kvm, sp, gfn, node) { |
| 1331 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | 1416 | if (!need_sync && sp->unsync) |
| 1332 | hlist_for_each_entry_safe(sp, node, tmp, bucket, hash_link) | 1417 | need_sync = true; |
| 1333 | if (sp->gfn == gfn) { | ||
| 1334 | if (sp->unsync) | ||
| 1335 | if (kvm_sync_page(vcpu, sp)) | ||
| 1336 | continue; | ||
| 1337 | 1418 | ||
| 1338 | if (sp->role.word != role.word) | 1419 | if (sp->role.word != role.word) |
| 1339 | continue; | 1420 | continue; |
| 1340 | 1421 | ||
| 1341 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); | 1422 | if (sp->unsync && kvm_sync_page_transient(vcpu, sp)) |
| 1342 | if (sp->unsync_children) { | 1423 | break; |
| 1343 | set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); | 1424 | |
| 1344 | kvm_mmu_mark_parents_unsync(sp); | 1425 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); |
| 1345 | } | 1426 | if (sp->unsync_children) { |
| 1346 | trace_kvm_mmu_get_page(sp, false); | 1427 | kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); |
| 1347 | return sp; | 1428 | kvm_mmu_mark_parents_unsync(sp); |
| 1348 | } | 1429 | } else if (sp->unsync) |
| 1430 | kvm_mmu_mark_parents_unsync(sp); | ||
| 1431 | |||
| 1432 | trace_kvm_mmu_get_page(sp, false); | ||
| 1433 | return sp; | ||
| 1434 | } | ||
| 1349 | ++vcpu->kvm->stat.mmu_cache_miss; | 1435 | ++vcpu->kvm->stat.mmu_cache_miss; |
| 1350 | sp = kvm_mmu_alloc_page(vcpu, parent_pte); | 1436 | sp = kvm_mmu_alloc_page(vcpu, parent_pte, direct); |
| 1351 | if (!sp) | 1437 | if (!sp) |
| 1352 | return sp; | 1438 | return sp; |
| 1353 | sp->gfn = gfn; | 1439 | sp->gfn = gfn; |
| 1354 | sp->role = role; | 1440 | sp->role = role; |
| 1355 | hlist_add_head(&sp->hash_link, bucket); | 1441 | hlist_add_head(&sp->hash_link, |
| 1442 | &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]); | ||
| 1356 | if (!direct) { | 1443 | if (!direct) { |
| 1357 | if (rmap_write_protect(vcpu->kvm, gfn)) | 1444 | if (rmap_write_protect(vcpu->kvm, gfn)) |
| 1358 | kvm_flush_remote_tlbs(vcpu->kvm); | 1445 | kvm_flush_remote_tlbs(vcpu->kvm); |
| 1446 | if (level > PT_PAGE_TABLE_LEVEL && need_sync) | ||
| 1447 | kvm_sync_pages(vcpu, gfn); | ||
| 1448 | |||
| 1359 | account_shadowed(vcpu->kvm, gfn); | 1449 | account_shadowed(vcpu->kvm, gfn); |
| 1360 | } | 1450 | } |
| 1361 | if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) | 1451 | if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) |
| @@ -1402,6 +1492,47 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator) | |||
| 1402 | --iterator->level; | 1492 | --iterator->level; |
| 1403 | } | 1493 | } |
| 1404 | 1494 | ||
| 1495 | static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp) | ||
| 1496 | { | ||
| 1497 | u64 spte; | ||
| 1498 | |||
| 1499 | spte = __pa(sp->spt) | ||
| 1500 | | PT_PRESENT_MASK | PT_ACCESSED_MASK | ||
| 1501 | | PT_WRITABLE_MASK | PT_USER_MASK; | ||
| 1502 | __set_spte(sptep, spte); | ||
| 1503 | } | ||
| 1504 | |||
| 1505 | static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) | ||
| 1506 | { | ||
| 1507 | if (is_large_pte(*sptep)) { | ||
| 1508 | drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte); | ||
| 1509 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
| 1510 | } | ||
| 1511 | } | ||
| 1512 | |||
| 1513 | static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, | ||
| 1514 | unsigned direct_access) | ||
| 1515 | { | ||
| 1516 | if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) { | ||
| 1517 | struct kvm_mmu_page *child; | ||
| 1518 | |||
| 1519 | /* | ||
| 1520 | * For the direct sp, if the guest pte's dirty bit | ||
| 1521 | * changed form clean to dirty, it will corrupt the | ||
| 1522 | * sp's access: allow writable in the read-only sp, | ||
| 1523 | * so we should update the spte at this point to get | ||
| 1524 | * a new sp with the correct access. | ||
| 1525 | */ | ||
| 1526 | child = page_header(*sptep & PT64_BASE_ADDR_MASK); | ||
| 1527 | if (child->role.access == direct_access) | ||
| 1528 | return; | ||
| 1529 | |||
| 1530 | mmu_page_remove_parent_pte(child, sptep); | ||
| 1531 | __set_spte(sptep, shadow_trap_nonpresent_pte); | ||
| 1532 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
| 1533 | } | ||
| 1534 | } | ||
| 1535 | |||
| 1405 | static void kvm_mmu_page_unlink_children(struct kvm *kvm, | 1536 | static void kvm_mmu_page_unlink_children(struct kvm *kvm, |
| 1406 | struct kvm_mmu_page *sp) | 1537 | struct kvm_mmu_page *sp) |
| 1407 | { | 1538 | { |
| @@ -1422,7 +1553,8 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm, | |||
| 1422 | } else { | 1553 | } else { |
| 1423 | if (is_large_pte(ent)) | 1554 | if (is_large_pte(ent)) |
| 1424 | --kvm->stat.lpages; | 1555 | --kvm->stat.lpages; |
| 1425 | rmap_remove(kvm, &pt[i]); | 1556 | drop_spte(kvm, &pt[i], |
| 1557 | shadow_trap_nonpresent_pte); | ||
| 1426 | } | 1558 | } |
| 1427 | } | 1559 | } |
| 1428 | pt[i] = shadow_trap_nonpresent_pte; | 1560 | pt[i] = shadow_trap_nonpresent_pte; |
| @@ -1464,7 +1596,8 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
| 1464 | } | 1596 | } |
| 1465 | 1597 | ||
| 1466 | static int mmu_zap_unsync_children(struct kvm *kvm, | 1598 | static int mmu_zap_unsync_children(struct kvm *kvm, |
| 1467 | struct kvm_mmu_page *parent) | 1599 | struct kvm_mmu_page *parent, |
| 1600 | struct list_head *invalid_list) | ||
| 1468 | { | 1601 | { |
| 1469 | int i, zapped = 0; | 1602 | int i, zapped = 0; |
| 1470 | struct mmu_page_path parents; | 1603 | struct mmu_page_path parents; |
| @@ -1478,7 +1611,7 @@ static int mmu_zap_unsync_children(struct kvm *kvm, | |||
| 1478 | struct kvm_mmu_page *sp; | 1611 | struct kvm_mmu_page *sp; |
| 1479 | 1612 | ||
| 1480 | for_each_sp(pages, sp, parents, i) { | 1613 | for_each_sp(pages, sp, parents, i) { |
| 1481 | kvm_mmu_zap_page(kvm, sp); | 1614 | kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); |
| 1482 | mmu_pages_clear_parents(&parents); | 1615 | mmu_pages_clear_parents(&parents); |
| 1483 | zapped++; | 1616 | zapped++; |
| 1484 | } | 1617 | } |
| @@ -1488,32 +1621,52 @@ static int mmu_zap_unsync_children(struct kvm *kvm, | |||
| 1488 | return zapped; | 1621 | return zapped; |
| 1489 | } | 1622 | } |
| 1490 | 1623 | ||
| 1491 | static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 1624 | static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, |
| 1625 | struct list_head *invalid_list) | ||
| 1492 | { | 1626 | { |
| 1493 | int ret; | 1627 | int ret; |
| 1494 | 1628 | ||
| 1495 | trace_kvm_mmu_zap_page(sp); | 1629 | trace_kvm_mmu_prepare_zap_page(sp); |
| 1496 | ++kvm->stat.mmu_shadow_zapped; | 1630 | ++kvm->stat.mmu_shadow_zapped; |
| 1497 | ret = mmu_zap_unsync_children(kvm, sp); | 1631 | ret = mmu_zap_unsync_children(kvm, sp, invalid_list); |
| 1498 | kvm_mmu_page_unlink_children(kvm, sp); | 1632 | kvm_mmu_page_unlink_children(kvm, sp); |
| 1499 | kvm_mmu_unlink_parents(kvm, sp); | 1633 | kvm_mmu_unlink_parents(kvm, sp); |
| 1500 | kvm_flush_remote_tlbs(kvm); | ||
| 1501 | if (!sp->role.invalid && !sp->role.direct) | 1634 | if (!sp->role.invalid && !sp->role.direct) |
| 1502 | unaccount_shadowed(kvm, sp->gfn); | 1635 | unaccount_shadowed(kvm, sp->gfn); |
| 1503 | if (sp->unsync) | 1636 | if (sp->unsync) |
| 1504 | kvm_unlink_unsync_page(kvm, sp); | 1637 | kvm_unlink_unsync_page(kvm, sp); |
| 1505 | if (!sp->root_count) { | 1638 | if (!sp->root_count) { |
| 1506 | hlist_del(&sp->hash_link); | 1639 | /* Count self */ |
| 1507 | kvm_mmu_free_page(kvm, sp); | 1640 | ret++; |
| 1641 | list_move(&sp->link, invalid_list); | ||
| 1508 | } else { | 1642 | } else { |
| 1509 | sp->role.invalid = 1; | ||
| 1510 | list_move(&sp->link, &kvm->arch.active_mmu_pages); | 1643 | list_move(&sp->link, &kvm->arch.active_mmu_pages); |
| 1511 | kvm_reload_remote_mmus(kvm); | 1644 | kvm_reload_remote_mmus(kvm); |
| 1512 | } | 1645 | } |
| 1646 | |||
| 1647 | sp->role.invalid = 1; | ||
| 1513 | kvm_mmu_reset_last_pte_updated(kvm); | 1648 | kvm_mmu_reset_last_pte_updated(kvm); |
| 1514 | return ret; | 1649 | return ret; |
| 1515 | } | 1650 | } |
| 1516 | 1651 | ||
| 1652 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, | ||
| 1653 | struct list_head *invalid_list) | ||
| 1654 | { | ||
| 1655 | struct kvm_mmu_page *sp; | ||
| 1656 | |||
| 1657 | if (list_empty(invalid_list)) | ||
| 1658 | return; | ||
| 1659 | |||
| 1660 | kvm_flush_remote_tlbs(kvm); | ||
| 1661 | |||
| 1662 | do { | ||
| 1663 | sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); | ||
| 1664 | WARN_ON(!sp->role.invalid || sp->root_count); | ||
| 1665 | kvm_mmu_free_page(kvm, sp); | ||
| 1666 | } while (!list_empty(invalid_list)); | ||
| 1667 | |||
| 1668 | } | ||
| 1669 | |||
| 1517 | /* | 1670 | /* |
| 1518 | * Changing the number of mmu pages allocated to the vm | 1671 | * Changing the number of mmu pages allocated to the vm |
| 1519 | * Note: if kvm_nr_mmu_pages is too small, you will get dead lock | 1672 | * Note: if kvm_nr_mmu_pages is too small, you will get dead lock |
| @@ -1521,6 +1674,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
| 1521 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) | 1674 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) |
| 1522 | { | 1675 | { |
| 1523 | int used_pages; | 1676 | int used_pages; |
| 1677 | LIST_HEAD(invalid_list); | ||
| 1524 | 1678 | ||
| 1525 | used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages; | 1679 | used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages; |
| 1526 | used_pages = max(0, used_pages); | 1680 | used_pages = max(0, used_pages); |
| @@ -1538,9 +1692,10 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) | |||
| 1538 | 1692 | ||
| 1539 | page = container_of(kvm->arch.active_mmu_pages.prev, | 1693 | page = container_of(kvm->arch.active_mmu_pages.prev, |
| 1540 | struct kvm_mmu_page, link); | 1694 | struct kvm_mmu_page, link); |
| 1541 | used_pages -= kvm_mmu_zap_page(kvm, page); | 1695 | used_pages -= kvm_mmu_prepare_zap_page(kvm, page, |
| 1542 | used_pages--; | 1696 | &invalid_list); |
| 1543 | } | 1697 | } |
| 1698 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | ||
| 1544 | kvm_nr_mmu_pages = used_pages; | 1699 | kvm_nr_mmu_pages = used_pages; |
| 1545 | kvm->arch.n_free_mmu_pages = 0; | 1700 | kvm->arch.n_free_mmu_pages = 0; |
| 1546 | } | 1701 | } |
| @@ -1553,47 +1708,36 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) | |||
| 1553 | 1708 | ||
| 1554 | static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | 1709 | static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) |
| 1555 | { | 1710 | { |
| 1556 | unsigned index; | ||
| 1557 | struct hlist_head *bucket; | ||
| 1558 | struct kvm_mmu_page *sp; | 1711 | struct kvm_mmu_page *sp; |
| 1559 | struct hlist_node *node, *n; | 1712 | struct hlist_node *node; |
| 1713 | LIST_HEAD(invalid_list); | ||
| 1560 | int r; | 1714 | int r; |
| 1561 | 1715 | ||
| 1562 | pgprintk("%s: looking for gfn %lx\n", __func__, gfn); | 1716 | pgprintk("%s: looking for gfn %lx\n", __func__, gfn); |
| 1563 | r = 0; | 1717 | r = 0; |
| 1564 | index = kvm_page_table_hashfn(gfn); | 1718 | |
| 1565 | bucket = &kvm->arch.mmu_page_hash[index]; | 1719 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { |
| 1566 | restart: | 1720 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, |
| 1567 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) | 1721 | sp->role.word); |
| 1568 | if (sp->gfn == gfn && !sp->role.direct) { | 1722 | r = 1; |
| 1569 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, | 1723 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); |
| 1570 | sp->role.word); | 1724 | } |
| 1571 | r = 1; | 1725 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
| 1572 | if (kvm_mmu_zap_page(kvm, sp)) | ||
| 1573 | goto restart; | ||
| 1574 | } | ||
| 1575 | return r; | 1726 | return r; |
| 1576 | } | 1727 | } |
| 1577 | 1728 | ||
| 1578 | static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) | 1729 | static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) |
| 1579 | { | 1730 | { |
| 1580 | unsigned index; | ||
| 1581 | struct hlist_head *bucket; | ||
| 1582 | struct kvm_mmu_page *sp; | 1731 | struct kvm_mmu_page *sp; |
| 1583 | struct hlist_node *node, *nn; | 1732 | struct hlist_node *node; |
| 1733 | LIST_HEAD(invalid_list); | ||
| 1584 | 1734 | ||
| 1585 | index = kvm_page_table_hashfn(gfn); | 1735 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { |
| 1586 | bucket = &kvm->arch.mmu_page_hash[index]; | 1736 | pgprintk("%s: zap %lx %x\n", |
| 1587 | restart: | 1737 | __func__, gfn, sp->role.word); |
| 1588 | hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { | 1738 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); |
| 1589 | if (sp->gfn == gfn && !sp->role.direct | ||
| 1590 | && !sp->role.invalid) { | ||
| 1591 | pgprintk("%s: zap %lx %x\n", | ||
| 1592 | __func__, gfn, sp->role.word); | ||
| 1593 | if (kvm_mmu_zap_page(kvm, sp)) | ||
| 1594 | goto restart; | ||
| 1595 | } | ||
| 1596 | } | 1739 | } |
| 1740 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | ||
| 1597 | } | 1741 | } |
| 1598 | 1742 | ||
| 1599 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) | 1743 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) |
| @@ -1723,47 +1867,51 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) | |||
| 1723 | } | 1867 | } |
| 1724 | EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type); | 1868 | EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type); |
| 1725 | 1869 | ||
| 1726 | static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 1870 | static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
| 1727 | { | 1871 | { |
| 1728 | unsigned index; | ||
| 1729 | struct hlist_head *bucket; | ||
| 1730 | struct kvm_mmu_page *s; | ||
| 1731 | struct hlist_node *node, *n; | ||
| 1732 | |||
| 1733 | index = kvm_page_table_hashfn(sp->gfn); | ||
| 1734 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | ||
| 1735 | /* don't unsync if pagetable is shadowed with multiple roles */ | ||
| 1736 | hlist_for_each_entry_safe(s, node, n, bucket, hash_link) { | ||
| 1737 | if (s->gfn != sp->gfn || s->role.direct) | ||
| 1738 | continue; | ||
| 1739 | if (s->role.word != sp->role.word) | ||
| 1740 | return 1; | ||
| 1741 | } | ||
| 1742 | trace_kvm_mmu_unsync_page(sp); | 1872 | trace_kvm_mmu_unsync_page(sp); |
| 1743 | ++vcpu->kvm->stat.mmu_unsync; | 1873 | ++vcpu->kvm->stat.mmu_unsync; |
| 1744 | sp->unsync = 1; | 1874 | sp->unsync = 1; |
| 1745 | 1875 | ||
| 1746 | kvm_mmu_mark_parents_unsync(sp); | 1876 | kvm_mmu_mark_parents_unsync(sp); |
| 1747 | |||
| 1748 | mmu_convert_notrap(sp); | 1877 | mmu_convert_notrap(sp); |
| 1749 | return 0; | 1878 | } |
| 1879 | |||
| 1880 | static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
| 1881 | { | ||
| 1882 | struct kvm_mmu_page *s; | ||
| 1883 | struct hlist_node *node; | ||
| 1884 | |||
| 1885 | for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { | ||
| 1886 | if (s->unsync) | ||
| 1887 | continue; | ||
| 1888 | WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); | ||
| 1889 | __kvm_unsync_page(vcpu, s); | ||
| 1890 | } | ||
| 1750 | } | 1891 | } |
| 1751 | 1892 | ||
| 1752 | static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, | 1893 | static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, |
| 1753 | bool can_unsync) | 1894 | bool can_unsync) |
| 1754 | { | 1895 | { |
| 1755 | struct kvm_mmu_page *shadow; | 1896 | struct kvm_mmu_page *s; |
| 1897 | struct hlist_node *node; | ||
| 1898 | bool need_unsync = false; | ||
| 1756 | 1899 | ||
| 1757 | shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); | 1900 | for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { |
| 1758 | if (shadow) { | 1901 | if (!can_unsync) |
| 1759 | if (shadow->role.level != PT_PAGE_TABLE_LEVEL) | ||
| 1760 | return 1; | 1902 | return 1; |
| 1761 | if (shadow->unsync) | 1903 | |
| 1762 | return 0; | 1904 | if (s->role.level != PT_PAGE_TABLE_LEVEL) |
| 1763 | if (can_unsync && oos_shadow) | 1905 | return 1; |
| 1764 | return kvm_unsync_page(vcpu, shadow); | 1906 | |
| 1765 | return 1; | 1907 | if (!need_unsync && !s->unsync) { |
| 1908 | if (!oos_shadow) | ||
| 1909 | return 1; | ||
| 1910 | need_unsync = true; | ||
| 1911 | } | ||
| 1766 | } | 1912 | } |
| 1913 | if (need_unsync) | ||
| 1914 | kvm_unsync_pages(vcpu, gfn); | ||
| 1767 | return 0; | 1915 | return 0; |
| 1768 | } | 1916 | } |
| 1769 | 1917 | ||
| @@ -1804,13 +1952,14 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
| 1804 | spte |= (u64)pfn << PAGE_SHIFT; | 1952 | spte |= (u64)pfn << PAGE_SHIFT; |
| 1805 | 1953 | ||
| 1806 | if ((pte_access & ACC_WRITE_MASK) | 1954 | if ((pte_access & ACC_WRITE_MASK) |
| 1807 | || (write_fault && !is_write_protection(vcpu) && !user_fault)) { | 1955 | || (!tdp_enabled && write_fault && !is_write_protection(vcpu) |
| 1956 | && !user_fault)) { | ||
| 1808 | 1957 | ||
| 1809 | if (level > PT_PAGE_TABLE_LEVEL && | 1958 | if (level > PT_PAGE_TABLE_LEVEL && |
| 1810 | has_wrprotected_page(vcpu->kvm, gfn, level)) { | 1959 | has_wrprotected_page(vcpu->kvm, gfn, level)) { |
| 1811 | ret = 1; | 1960 | ret = 1; |
| 1812 | spte = shadow_trap_nonpresent_pte; | 1961 | drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte); |
| 1813 | goto set_pte; | 1962 | goto done; |
| 1814 | } | 1963 | } |
| 1815 | 1964 | ||
| 1816 | spte |= PT_WRITABLE_MASK; | 1965 | spte |= PT_WRITABLE_MASK; |
| @@ -1841,7 +1990,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
| 1841 | mark_page_dirty(vcpu->kvm, gfn); | 1990 | mark_page_dirty(vcpu->kvm, gfn); |
| 1842 | 1991 | ||
| 1843 | set_pte: | 1992 | set_pte: |
| 1844 | __set_spte(sptep, spte); | 1993 | if (is_writable_pte(*sptep) && !is_writable_pte(spte)) |
| 1994 | kvm_set_pfn_dirty(pfn); | ||
| 1995 | update_spte(sptep, spte); | ||
| 1996 | done: | ||
| 1845 | return ret; | 1997 | return ret; |
| 1846 | } | 1998 | } |
| 1847 | 1999 | ||
| @@ -1853,7 +2005,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
| 1853 | bool reset_host_protection) | 2005 | bool reset_host_protection) |
| 1854 | { | 2006 | { |
| 1855 | int was_rmapped = 0; | 2007 | int was_rmapped = 0; |
| 1856 | int was_writable = is_writable_pte(*sptep); | ||
| 1857 | int rmap_count; | 2008 | int rmap_count; |
| 1858 | 2009 | ||
| 1859 | pgprintk("%s: spte %llx access %x write_fault %d" | 2010 | pgprintk("%s: spte %llx access %x write_fault %d" |
| @@ -1878,8 +2029,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
| 1878 | } else if (pfn != spte_to_pfn(*sptep)) { | 2029 | } else if (pfn != spte_to_pfn(*sptep)) { |
| 1879 | pgprintk("hfn old %lx new %lx\n", | 2030 | pgprintk("hfn old %lx new %lx\n", |
| 1880 | spte_to_pfn(*sptep), pfn); | 2031 | spte_to_pfn(*sptep), pfn); |
| 1881 | rmap_remove(vcpu->kvm, sptep); | 2032 | drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte); |
| 1882 | __set_spte(sptep, shadow_trap_nonpresent_pte); | ||
| 1883 | kvm_flush_remote_tlbs(vcpu->kvm); | 2033 | kvm_flush_remote_tlbs(vcpu->kvm); |
| 1884 | } else | 2034 | } else |
| 1885 | was_rmapped = 1; | 2035 | was_rmapped = 1; |
| @@ -1890,7 +2040,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
| 1890 | reset_host_protection)) { | 2040 | reset_host_protection)) { |
| 1891 | if (write_fault) | 2041 | if (write_fault) |
| 1892 | *ptwrite = 1; | 2042 | *ptwrite = 1; |
| 1893 | kvm_x86_ops->tlb_flush(vcpu); | 2043 | kvm_mmu_flush_tlb(vcpu); |
| 1894 | } | 2044 | } |
| 1895 | 2045 | ||
| 1896 | pgprintk("%s: setting spte %llx\n", __func__, *sptep); | 2046 | pgprintk("%s: setting spte %llx\n", __func__, *sptep); |
| @@ -1904,15 +2054,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
| 1904 | page_header_update_slot(vcpu->kvm, sptep, gfn); | 2054 | page_header_update_slot(vcpu->kvm, sptep, gfn); |
| 1905 | if (!was_rmapped) { | 2055 | if (!was_rmapped) { |
| 1906 | rmap_count = rmap_add(vcpu, sptep, gfn); | 2056 | rmap_count = rmap_add(vcpu, sptep, gfn); |
| 1907 | kvm_release_pfn_clean(pfn); | ||
| 1908 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) | 2057 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) |
| 1909 | rmap_recycle(vcpu, sptep, gfn); | 2058 | rmap_recycle(vcpu, sptep, gfn); |
| 1910 | } else { | ||
| 1911 | if (was_writable) | ||
| 1912 | kvm_release_pfn_dirty(pfn); | ||
| 1913 | else | ||
| 1914 | kvm_release_pfn_clean(pfn); | ||
| 1915 | } | 2059 | } |
| 2060 | kvm_release_pfn_clean(pfn); | ||
| 1916 | if (speculative) { | 2061 | if (speculative) { |
| 1917 | vcpu->arch.last_pte_updated = sptep; | 2062 | vcpu->arch.last_pte_updated = sptep; |
| 1918 | vcpu->arch.last_pte_gfn = gfn; | 2063 | vcpu->arch.last_pte_gfn = gfn; |
| @@ -1941,7 +2086,10 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
| 1941 | } | 2086 | } |
| 1942 | 2087 | ||
| 1943 | if (*iterator.sptep == shadow_trap_nonpresent_pte) { | 2088 | if (*iterator.sptep == shadow_trap_nonpresent_pte) { |
| 1944 | pseudo_gfn = (iterator.addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT; | 2089 | u64 base_addr = iterator.addr; |
| 2090 | |||
| 2091 | base_addr &= PT64_LVL_ADDR_MASK(iterator.level); | ||
| 2092 | pseudo_gfn = base_addr >> PAGE_SHIFT; | ||
| 1945 | sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, | 2093 | sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, |
| 1946 | iterator.level - 1, | 2094 | iterator.level - 1, |
| 1947 | 1, ACC_ALL, iterator.sptep); | 2095 | 1, ACC_ALL, iterator.sptep); |
| @@ -1960,6 +2108,29 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
| 1960 | return pt_write; | 2108 | return pt_write; |
| 1961 | } | 2109 | } |
| 1962 | 2110 | ||
| 2111 | static void kvm_send_hwpoison_signal(struct kvm *kvm, gfn_t gfn) | ||
| 2112 | { | ||
| 2113 | char buf[1]; | ||
| 2114 | void __user *hva; | ||
| 2115 | int r; | ||
| 2116 | |||
| 2117 | /* Touch the page, so send SIGBUS */ | ||
| 2118 | hva = (void __user *)gfn_to_hva(kvm, gfn); | ||
| 2119 | r = copy_from_user(buf, hva, 1); | ||
| 2120 | } | ||
| 2121 | |||
| 2122 | static int kvm_handle_bad_page(struct kvm *kvm, gfn_t gfn, pfn_t pfn) | ||
| 2123 | { | ||
| 2124 | kvm_release_pfn_clean(pfn); | ||
| 2125 | if (is_hwpoison_pfn(pfn)) { | ||
| 2126 | kvm_send_hwpoison_signal(kvm, gfn); | ||
| 2127 | return 0; | ||
| 2128 | } else if (is_fault_pfn(pfn)) | ||
| 2129 | return -EFAULT; | ||
| 2130 | |||
| 2131 | return 1; | ||
| 2132 | } | ||
| 2133 | |||
| 1963 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | 2134 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) |
| 1964 | { | 2135 | { |
| 1965 | int r; | 2136 | int r; |
| @@ -1983,10 +2154,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
| 1983 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 2154 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
| 1984 | 2155 | ||
| 1985 | /* mmio */ | 2156 | /* mmio */ |
| 1986 | if (is_error_pfn(pfn)) { | 2157 | if (is_error_pfn(pfn)) |
| 1987 | kvm_release_pfn_clean(pfn); | 2158 | return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); |
| 1988 | return 1; | ||
| 1989 | } | ||
| 1990 | 2159 | ||
| 1991 | spin_lock(&vcpu->kvm->mmu_lock); | 2160 | spin_lock(&vcpu->kvm->mmu_lock); |
| 1992 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 2161 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
| @@ -2009,6 +2178,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
| 2009 | { | 2178 | { |
| 2010 | int i; | 2179 | int i; |
| 2011 | struct kvm_mmu_page *sp; | 2180 | struct kvm_mmu_page *sp; |
| 2181 | LIST_HEAD(invalid_list); | ||
| 2012 | 2182 | ||
| 2013 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 2183 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
| 2014 | return; | 2184 | return; |
| @@ -2018,8 +2188,10 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
| 2018 | 2188 | ||
| 2019 | sp = page_header(root); | 2189 | sp = page_header(root); |
| 2020 | --sp->root_count; | 2190 | --sp->root_count; |
| 2021 | if (!sp->root_count && sp->role.invalid) | 2191 | if (!sp->root_count && sp->role.invalid) { |
| 2022 | kvm_mmu_zap_page(vcpu->kvm, sp); | 2192 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); |
| 2193 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
| 2194 | } | ||
| 2023 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | 2195 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; |
| 2024 | spin_unlock(&vcpu->kvm->mmu_lock); | 2196 | spin_unlock(&vcpu->kvm->mmu_lock); |
| 2025 | return; | 2197 | return; |
| @@ -2032,10 +2204,12 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
| 2032 | sp = page_header(root); | 2204 | sp = page_header(root); |
| 2033 | --sp->root_count; | 2205 | --sp->root_count; |
| 2034 | if (!sp->root_count && sp->role.invalid) | 2206 | if (!sp->root_count && sp->role.invalid) |
| 2035 | kvm_mmu_zap_page(vcpu->kvm, sp); | 2207 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, |
| 2208 | &invalid_list); | ||
| 2036 | } | 2209 | } |
| 2037 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; | 2210 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; |
| 2038 | } | 2211 | } |
| 2212 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
| 2039 | spin_unlock(&vcpu->kvm->mmu_lock); | 2213 | spin_unlock(&vcpu->kvm->mmu_lock); |
| 2040 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | 2214 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; |
| 2041 | } | 2215 | } |
| @@ -2045,7 +2219,7 @@ static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn) | |||
| 2045 | int ret = 0; | 2219 | int ret = 0; |
| 2046 | 2220 | ||
| 2047 | if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) { | 2221 | if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) { |
| 2048 | set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); | 2222 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); |
| 2049 | ret = 1; | 2223 | ret = 1; |
| 2050 | } | 2224 | } |
| 2051 | 2225 | ||
| @@ -2073,6 +2247,7 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
| 2073 | root_gfn = 0; | 2247 | root_gfn = 0; |
| 2074 | } | 2248 | } |
| 2075 | spin_lock(&vcpu->kvm->mmu_lock); | 2249 | spin_lock(&vcpu->kvm->mmu_lock); |
| 2250 | kvm_mmu_free_some_pages(vcpu); | ||
| 2076 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, | 2251 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, |
| 2077 | PT64_ROOT_LEVEL, direct, | 2252 | PT64_ROOT_LEVEL, direct, |
| 2078 | ACC_ALL, NULL); | 2253 | ACC_ALL, NULL); |
| @@ -2103,6 +2278,7 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
| 2103 | root_gfn = i << 30; | 2278 | root_gfn = i << 30; |
| 2104 | } | 2279 | } |
| 2105 | spin_lock(&vcpu->kvm->mmu_lock); | 2280 | spin_lock(&vcpu->kvm->mmu_lock); |
| 2281 | kvm_mmu_free_some_pages(vcpu); | ||
| 2106 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | 2282 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, |
| 2107 | PT32_ROOT_LEVEL, direct, | 2283 | PT32_ROOT_LEVEL, direct, |
| 2108 | ACC_ALL, NULL); | 2284 | ACC_ALL, NULL); |
| @@ -2198,10 +2374,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
| 2198 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 2374 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
| 2199 | smp_rmb(); | 2375 | smp_rmb(); |
| 2200 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 2376 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
| 2201 | if (is_error_pfn(pfn)) { | 2377 | if (is_error_pfn(pfn)) |
| 2202 | kvm_release_pfn_clean(pfn); | 2378 | return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); |
| 2203 | return 1; | ||
| 2204 | } | ||
| 2205 | spin_lock(&vcpu->kvm->mmu_lock); | 2379 | spin_lock(&vcpu->kvm->mmu_lock); |
| 2206 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 2380 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
| 2207 | goto out_unlock; | 2381 | goto out_unlock; |
| @@ -2243,7 +2417,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu) | |||
| 2243 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) | 2417 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) |
| 2244 | { | 2418 | { |
| 2245 | ++vcpu->stat.tlb_flush; | 2419 | ++vcpu->stat.tlb_flush; |
| 2246 | kvm_x86_ops->tlb_flush(vcpu); | 2420 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); |
| 2247 | } | 2421 | } |
| 2248 | 2422 | ||
| 2249 | static void paging_new_cr3(struct kvm_vcpu *vcpu) | 2423 | static void paging_new_cr3(struct kvm_vcpu *vcpu) |
| @@ -2457,10 +2631,9 @@ static int init_kvm_mmu(struct kvm_vcpu *vcpu) | |||
| 2457 | static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) | 2631 | static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) |
| 2458 | { | 2632 | { |
| 2459 | ASSERT(vcpu); | 2633 | ASSERT(vcpu); |
| 2460 | if (VALID_PAGE(vcpu->arch.mmu.root_hpa)) { | 2634 | if (VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
| 2635 | /* mmu.free() should set root_hpa = INVALID_PAGE */ | ||
| 2461 | vcpu->arch.mmu.free(vcpu); | 2636 | vcpu->arch.mmu.free(vcpu); |
| 2462 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | ||
| 2463 | } | ||
| 2464 | } | 2637 | } |
| 2465 | 2638 | ||
| 2466 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) | 2639 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) |
| @@ -2477,9 +2650,6 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) | |||
| 2477 | r = mmu_topup_memory_caches(vcpu); | 2650 | r = mmu_topup_memory_caches(vcpu); |
| 2478 | if (r) | 2651 | if (r) |
| 2479 | goto out; | 2652 | goto out; |
| 2480 | spin_lock(&vcpu->kvm->mmu_lock); | ||
| 2481 | kvm_mmu_free_some_pages(vcpu); | ||
| 2482 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
| 2483 | r = mmu_alloc_roots(vcpu); | 2653 | r = mmu_alloc_roots(vcpu); |
| 2484 | spin_lock(&vcpu->kvm->mmu_lock); | 2654 | spin_lock(&vcpu->kvm->mmu_lock); |
| 2485 | mmu_sync_roots(vcpu); | 2655 | mmu_sync_roots(vcpu); |
| @@ -2508,7 +2678,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, | |||
| 2508 | pte = *spte; | 2678 | pte = *spte; |
| 2509 | if (is_shadow_present_pte(pte)) { | 2679 | if (is_shadow_present_pte(pte)) { |
| 2510 | if (is_last_spte(pte, sp->role.level)) | 2680 | if (is_last_spte(pte, sp->role.level)) |
| 2511 | rmap_remove(vcpu->kvm, spte); | 2681 | drop_spte(vcpu->kvm, spte, shadow_trap_nonpresent_pte); |
| 2512 | else { | 2682 | else { |
| 2513 | child = page_header(pte & PT64_BASE_ADDR_MASK); | 2683 | child = page_header(pte & PT64_BASE_ADDR_MASK); |
| 2514 | mmu_page_remove_parent_pte(child, spte); | 2684 | mmu_page_remove_parent_pte(child, spte); |
| @@ -2529,6 +2699,9 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | |||
| 2529 | return; | 2699 | return; |
| 2530 | } | 2700 | } |
| 2531 | 2701 | ||
| 2702 | if (is_rsvd_bits_set(vcpu, *(u64 *)new, PT_PAGE_TABLE_LEVEL)) | ||
| 2703 | return; | ||
| 2704 | |||
| 2532 | ++vcpu->kvm->stat.mmu_pte_updated; | 2705 | ++vcpu->kvm->stat.mmu_pte_updated; |
| 2533 | if (!sp->role.cr4_pae) | 2706 | if (!sp->role.cr4_pae) |
| 2534 | paging32_update_pte(vcpu, sp, spte, new); | 2707 | paging32_update_pte(vcpu, sp, spte, new); |
| @@ -2549,11 +2722,15 @@ static bool need_remote_flush(u64 old, u64 new) | |||
| 2549 | return (old & ~new & PT64_PERM_MASK) != 0; | 2722 | return (old & ~new & PT64_PERM_MASK) != 0; |
| 2550 | } | 2723 | } |
| 2551 | 2724 | ||
| 2552 | static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, u64 old, u64 new) | 2725 | static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page, |
| 2726 | bool remote_flush, bool local_flush) | ||
| 2553 | { | 2727 | { |
| 2554 | if (need_remote_flush(old, new)) | 2728 | if (zap_page) |
| 2729 | return; | ||
| 2730 | |||
| 2731 | if (remote_flush) | ||
| 2555 | kvm_flush_remote_tlbs(vcpu->kvm); | 2732 | kvm_flush_remote_tlbs(vcpu->kvm); |
| 2556 | else | 2733 | else if (local_flush) |
| 2557 | kvm_mmu_flush_tlb(vcpu); | 2734 | kvm_mmu_flush_tlb(vcpu); |
| 2558 | } | 2735 | } |
| 2559 | 2736 | ||
| @@ -2603,10 +2780,10 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 2603 | bool guest_initiated) | 2780 | bool guest_initiated) |
| 2604 | { | 2781 | { |
| 2605 | gfn_t gfn = gpa >> PAGE_SHIFT; | 2782 | gfn_t gfn = gpa >> PAGE_SHIFT; |
| 2783 | union kvm_mmu_page_role mask = { .word = 0 }; | ||
| 2606 | struct kvm_mmu_page *sp; | 2784 | struct kvm_mmu_page *sp; |
| 2607 | struct hlist_node *node, *n; | 2785 | struct hlist_node *node; |
| 2608 | struct hlist_head *bucket; | 2786 | LIST_HEAD(invalid_list); |
| 2609 | unsigned index; | ||
| 2610 | u64 entry, gentry; | 2787 | u64 entry, gentry; |
| 2611 | u64 *spte; | 2788 | u64 *spte; |
| 2612 | unsigned offset = offset_in_page(gpa); | 2789 | unsigned offset = offset_in_page(gpa); |
| @@ -2619,6 +2796,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 2619 | int npte; | 2796 | int npte; |
| 2620 | int r; | 2797 | int r; |
| 2621 | int invlpg_counter; | 2798 | int invlpg_counter; |
| 2799 | bool remote_flush, local_flush, zap_page; | ||
| 2800 | |||
| 2801 | zap_page = remote_flush = local_flush = false; | ||
| 2622 | 2802 | ||
| 2623 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); | 2803 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); |
| 2624 | 2804 | ||
| @@ -2674,13 +2854,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 2674 | vcpu->arch.last_pte_updated = NULL; | 2854 | vcpu->arch.last_pte_updated = NULL; |
| 2675 | } | 2855 | } |
| 2676 | } | 2856 | } |
| 2677 | index = kvm_page_table_hashfn(gfn); | ||
| 2678 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | ||
| 2679 | 2857 | ||
| 2680 | restart: | 2858 | mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; |
| 2681 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { | 2859 | for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { |
| 2682 | if (sp->gfn != gfn || sp->role.direct || sp->role.invalid) | ||
| 2683 | continue; | ||
| 2684 | pte_size = sp->role.cr4_pae ? 8 : 4; | 2860 | pte_size = sp->role.cr4_pae ? 8 : 4; |
| 2685 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); | 2861 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); |
| 2686 | misaligned |= bytes < 4; | 2862 | misaligned |= bytes < 4; |
| @@ -2697,8 +2873,8 @@ restart: | |||
| 2697 | */ | 2873 | */ |
| 2698 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", | 2874 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", |
| 2699 | gpa, bytes, sp->role.word); | 2875 | gpa, bytes, sp->role.word); |
| 2700 | if (kvm_mmu_zap_page(vcpu->kvm, sp)) | 2876 | zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, |
| 2701 | goto restart; | 2877 | &invalid_list); |
| 2702 | ++vcpu->kvm->stat.mmu_flooded; | 2878 | ++vcpu->kvm->stat.mmu_flooded; |
| 2703 | continue; | 2879 | continue; |
| 2704 | } | 2880 | } |
| @@ -2722,16 +2898,22 @@ restart: | |||
| 2722 | if (quadrant != sp->role.quadrant) | 2898 | if (quadrant != sp->role.quadrant) |
| 2723 | continue; | 2899 | continue; |
| 2724 | } | 2900 | } |
| 2901 | local_flush = true; | ||
| 2725 | spte = &sp->spt[page_offset / sizeof(*spte)]; | 2902 | spte = &sp->spt[page_offset / sizeof(*spte)]; |
| 2726 | while (npte--) { | 2903 | while (npte--) { |
| 2727 | entry = *spte; | 2904 | entry = *spte; |
| 2728 | mmu_pte_write_zap_pte(vcpu, sp, spte); | 2905 | mmu_pte_write_zap_pte(vcpu, sp, spte); |
| 2729 | if (gentry) | 2906 | if (gentry && |
| 2907 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) | ||
| 2908 | & mask.word)) | ||
| 2730 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); | 2909 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); |
| 2731 | mmu_pte_write_flush_tlb(vcpu, entry, *spte); | 2910 | if (!remote_flush && need_remote_flush(entry, *spte)) |
| 2911 | remote_flush = true; | ||
| 2732 | ++spte; | 2912 | ++spte; |
| 2733 | } | 2913 | } |
| 2734 | } | 2914 | } |
| 2915 | mmu_pte_write_flush_tlb(vcpu, zap_page, remote_flush, local_flush); | ||
| 2916 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
| 2735 | kvm_mmu_audit(vcpu, "post pte write"); | 2917 | kvm_mmu_audit(vcpu, "post pte write"); |
| 2736 | spin_unlock(&vcpu->kvm->mmu_lock); | 2918 | spin_unlock(&vcpu->kvm->mmu_lock); |
| 2737 | if (!is_error_pfn(vcpu->arch.update_pte.pfn)) { | 2919 | if (!is_error_pfn(vcpu->arch.update_pte.pfn)) { |
| @@ -2759,15 +2941,21 @@ EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); | |||
| 2759 | 2941 | ||
| 2760 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | 2942 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) |
| 2761 | { | 2943 | { |
| 2762 | while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES && | 2944 | int free_pages; |
| 2945 | LIST_HEAD(invalid_list); | ||
| 2946 | |||
| 2947 | free_pages = vcpu->kvm->arch.n_free_mmu_pages; | ||
| 2948 | while (free_pages < KVM_REFILL_PAGES && | ||
| 2763 | !list_empty(&vcpu->kvm->arch.active_mmu_pages)) { | 2949 | !list_empty(&vcpu->kvm->arch.active_mmu_pages)) { |
| 2764 | struct kvm_mmu_page *sp; | 2950 | struct kvm_mmu_page *sp; |
| 2765 | 2951 | ||
| 2766 | sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev, | 2952 | sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev, |
| 2767 | struct kvm_mmu_page, link); | 2953 | struct kvm_mmu_page, link); |
| 2768 | kvm_mmu_zap_page(vcpu->kvm, sp); | 2954 | free_pages += kvm_mmu_prepare_zap_page(vcpu->kvm, sp, |
| 2955 | &invalid_list); | ||
| 2769 | ++vcpu->kvm->stat.mmu_recycled; | 2956 | ++vcpu->kvm->stat.mmu_recycled; |
| 2770 | } | 2957 | } |
| 2958 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
| 2771 | } | 2959 | } |
| 2772 | 2960 | ||
| 2773 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | 2961 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) |
| @@ -2795,11 +2983,8 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | |||
| 2795 | return 1; | 2983 | return 1; |
| 2796 | case EMULATE_DO_MMIO: | 2984 | case EMULATE_DO_MMIO: |
| 2797 | ++vcpu->stat.mmio_exits; | 2985 | ++vcpu->stat.mmio_exits; |
| 2798 | return 0; | 2986 | /* fall through */ |
| 2799 | case EMULATE_FAIL: | 2987 | case EMULATE_FAIL: |
| 2800 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
| 2801 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
| 2802 | vcpu->run->internal.ndata = 0; | ||
| 2803 | return 0; | 2988 | return 0; |
| 2804 | default: | 2989 | default: |
| 2805 | BUG(); | 2990 | BUG(); |
| @@ -2896,7 +3081,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
| 2896 | pt = sp->spt; | 3081 | pt = sp->spt; |
| 2897 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) | 3082 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) |
| 2898 | /* avoid RMW */ | 3083 | /* avoid RMW */ |
| 2899 | if (pt[i] & PT_WRITABLE_MASK) | 3084 | if (is_writable_pte(pt[i])) |
| 2900 | pt[i] &= ~PT_WRITABLE_MASK; | 3085 | pt[i] &= ~PT_WRITABLE_MASK; |
| 2901 | } | 3086 | } |
| 2902 | kvm_flush_remote_tlbs(kvm); | 3087 | kvm_flush_remote_tlbs(kvm); |
| @@ -2905,25 +3090,26 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
| 2905 | void kvm_mmu_zap_all(struct kvm *kvm) | 3090 | void kvm_mmu_zap_all(struct kvm *kvm) |
| 2906 | { | 3091 | { |
| 2907 | struct kvm_mmu_page *sp, *node; | 3092 | struct kvm_mmu_page *sp, *node; |
| 3093 | LIST_HEAD(invalid_list); | ||
| 2908 | 3094 | ||
| 2909 | spin_lock(&kvm->mmu_lock); | 3095 | spin_lock(&kvm->mmu_lock); |
| 2910 | restart: | 3096 | restart: |
| 2911 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) | 3097 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) |
| 2912 | if (kvm_mmu_zap_page(kvm, sp)) | 3098 | if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list)) |
| 2913 | goto restart; | 3099 | goto restart; |
| 2914 | 3100 | ||
| 3101 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | ||
| 2915 | spin_unlock(&kvm->mmu_lock); | 3102 | spin_unlock(&kvm->mmu_lock); |
| 2916 | |||
| 2917 | kvm_flush_remote_tlbs(kvm); | ||
| 2918 | } | 3103 | } |
| 2919 | 3104 | ||
| 2920 | static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm) | 3105 | static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, |
| 3106 | struct list_head *invalid_list) | ||
| 2921 | { | 3107 | { |
| 2922 | struct kvm_mmu_page *page; | 3108 | struct kvm_mmu_page *page; |
| 2923 | 3109 | ||
| 2924 | page = container_of(kvm->arch.active_mmu_pages.prev, | 3110 | page = container_of(kvm->arch.active_mmu_pages.prev, |
| 2925 | struct kvm_mmu_page, link); | 3111 | struct kvm_mmu_page, link); |
| 2926 | return kvm_mmu_zap_page(kvm, page) + 1; | 3112 | return kvm_mmu_prepare_zap_page(kvm, page, invalid_list); |
| 2927 | } | 3113 | } |
| 2928 | 3114 | ||
| 2929 | static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) | 3115 | static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) |
| @@ -2936,6 +3122,7 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) | |||
| 2936 | 3122 | ||
| 2937 | list_for_each_entry(kvm, &vm_list, vm_list) { | 3123 | list_for_each_entry(kvm, &vm_list, vm_list) { |
| 2938 | int npages, idx, freed_pages; | 3124 | int npages, idx, freed_pages; |
| 3125 | LIST_HEAD(invalid_list); | ||
| 2939 | 3126 | ||
| 2940 | idx = srcu_read_lock(&kvm->srcu); | 3127 | idx = srcu_read_lock(&kvm->srcu); |
| 2941 | spin_lock(&kvm->mmu_lock); | 3128 | spin_lock(&kvm->mmu_lock); |
| @@ -2943,12 +3130,14 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) | |||
| 2943 | kvm->arch.n_free_mmu_pages; | 3130 | kvm->arch.n_free_mmu_pages; |
| 2944 | cache_count += npages; | 3131 | cache_count += npages; |
| 2945 | if (!kvm_freed && nr_to_scan > 0 && npages > 0) { | 3132 | if (!kvm_freed && nr_to_scan > 0 && npages > 0) { |
| 2946 | freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm); | 3133 | freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm, |
| 3134 | &invalid_list); | ||
| 2947 | cache_count -= freed_pages; | 3135 | cache_count -= freed_pages; |
| 2948 | kvm_freed = kvm; | 3136 | kvm_freed = kvm; |
| 2949 | } | 3137 | } |
| 2950 | nr_to_scan--; | 3138 | nr_to_scan--; |
| 2951 | 3139 | ||
| 3140 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | ||
| 2952 | spin_unlock(&kvm->mmu_lock); | 3141 | spin_unlock(&kvm->mmu_lock); |
| 2953 | srcu_read_unlock(&kvm->srcu, idx); | 3142 | srcu_read_unlock(&kvm->srcu, idx); |
| 2954 | } | 3143 | } |
| @@ -3074,7 +3263,7 @@ static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu, | |||
| 3074 | 3263 | ||
| 3075 | static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) | 3264 | static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) |
| 3076 | { | 3265 | { |
| 3077 | kvm_set_cr3(vcpu, vcpu->arch.cr3); | 3266 | (void)kvm_set_cr3(vcpu, vcpu->arch.cr3); |
| 3078 | return 1; | 3267 | return 1; |
| 3079 | } | 3268 | } |
| 3080 | 3269 | ||
| @@ -3331,9 +3520,9 @@ void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) | |||
| 3331 | struct kvm_mmu_page *rev_sp; | 3520 | struct kvm_mmu_page *rev_sp; |
| 3332 | gfn_t gfn; | 3521 | gfn_t gfn; |
| 3333 | 3522 | ||
| 3334 | if (*sptep & PT_WRITABLE_MASK) { | 3523 | if (is_writable_pte(*sptep)) { |
| 3335 | rev_sp = page_header(__pa(sptep)); | 3524 | rev_sp = page_header(__pa(sptep)); |
| 3336 | gfn = rev_sp->gfns[sptep - rev_sp->spt]; | 3525 | gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt); |
| 3337 | 3526 | ||
| 3338 | if (!gfn_to_memslot(kvm, gfn)) { | 3527 | if (!gfn_to_memslot(kvm, gfn)) { |
| 3339 | if (!printk_ratelimit()) | 3528 | if (!printk_ratelimit()) |
| @@ -3347,8 +3536,7 @@ void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) | |||
| 3347 | return; | 3536 | return; |
| 3348 | } | 3537 | } |
| 3349 | 3538 | ||
| 3350 | rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt], | 3539 | rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level); |
| 3351 | rev_sp->role.level); | ||
| 3352 | if (!*rmapp) { | 3540 | if (!*rmapp) { |
| 3353 | if (!printk_ratelimit()) | 3541 | if (!printk_ratelimit()) |
| 3354 | return; | 3542 | return; |
| @@ -3381,7 +3569,7 @@ static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu) | |||
| 3381 | 3569 | ||
| 3382 | if (!(ent & PT_PRESENT_MASK)) | 3570 | if (!(ent & PT_PRESENT_MASK)) |
| 3383 | continue; | 3571 | continue; |
| 3384 | if (!(ent & PT_WRITABLE_MASK)) | 3572 | if (!is_writable_pte(ent)) |
| 3385 | continue; | 3573 | continue; |
| 3386 | inspect_spte_has_rmap(vcpu->kvm, &pt[i]); | 3574 | inspect_spte_has_rmap(vcpu->kvm, &pt[i]); |
| 3387 | } | 3575 | } |
| @@ -3409,13 +3597,12 @@ static void audit_write_protection(struct kvm_vcpu *vcpu) | |||
| 3409 | if (sp->unsync) | 3597 | if (sp->unsync) |
| 3410 | continue; | 3598 | continue; |
| 3411 | 3599 | ||
| 3412 | gfn = unalias_gfn(vcpu->kvm, sp->gfn); | 3600 | slot = gfn_to_memslot(vcpu->kvm, sp->gfn); |
| 3413 | slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn); | ||
| 3414 | rmapp = &slot->rmap[gfn - slot->base_gfn]; | 3601 | rmapp = &slot->rmap[gfn - slot->base_gfn]; |
| 3415 | 3602 | ||
| 3416 | spte = rmap_next(vcpu->kvm, rmapp, NULL); | 3603 | spte = rmap_next(vcpu->kvm, rmapp, NULL); |
| 3417 | while (spte) { | 3604 | while (spte) { |
| 3418 | if (*spte & PT_WRITABLE_MASK) | 3605 | if (is_writable_pte(*spte)) |
| 3419 | printk(KERN_ERR "%s: (%s) shadow page has " | 3606 | printk(KERN_ERR "%s: (%s) shadow page has " |
| 3420 | "writable mappings: gfn %lx role %x\n", | 3607 | "writable mappings: gfn %lx role %x\n", |
| 3421 | __func__, audit_msg, sp->gfn, | 3608 | __func__, audit_msg, sp->gfn, |
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 42f07b1bfbc9..3aab0f0930ef 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h | |||
| @@ -190,7 +190,7 @@ DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_unsync_page, | |||
| 190 | TP_ARGS(sp) | 190 | TP_ARGS(sp) |
| 191 | ); | 191 | ); |
| 192 | 192 | ||
| 193 | DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_zap_page, | 193 | DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page, |
| 194 | TP_PROTO(struct kvm_mmu_page *sp), | 194 | TP_PROTO(struct kvm_mmu_page *sp), |
| 195 | 195 | ||
| 196 | TP_ARGS(sp) | 196 | TP_ARGS(sp) |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 2331bdc2b549..51ef9097960d 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | * MMU support | 7 | * MMU support |
| 8 | * | 8 | * |
| 9 | * Copyright (C) 2006 Qumranet, Inc. | 9 | * Copyright (C) 2006 Qumranet, Inc. |
| 10 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
| 10 | * | 11 | * |
| 11 | * Authors: | 12 | * Authors: |
| 12 | * Yaniv Kamay <yaniv@qumranet.com> | 13 | * Yaniv Kamay <yaniv@qumranet.com> |
| @@ -118,21 +119,25 @@ static int FNAME(walk_addr)(struct guest_walker *walker, | |||
| 118 | { | 119 | { |
| 119 | pt_element_t pte; | 120 | pt_element_t pte; |
| 120 | gfn_t table_gfn; | 121 | gfn_t table_gfn; |
| 121 | unsigned index, pt_access, pte_access; | 122 | unsigned index, pt_access, uninitialized_var(pte_access); |
| 122 | gpa_t pte_gpa; | 123 | gpa_t pte_gpa; |
| 123 | int rsvd_fault = 0; | 124 | bool eperm, present, rsvd_fault; |
| 124 | 125 | ||
| 125 | trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault, | 126 | trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault, |
| 126 | fetch_fault); | 127 | fetch_fault); |
| 127 | walk: | 128 | walk: |
| 129 | present = true; | ||
| 130 | eperm = rsvd_fault = false; | ||
| 128 | walker->level = vcpu->arch.mmu.root_level; | 131 | walker->level = vcpu->arch.mmu.root_level; |
| 129 | pte = vcpu->arch.cr3; | 132 | pte = vcpu->arch.cr3; |
| 130 | #if PTTYPE == 64 | 133 | #if PTTYPE == 64 |
| 131 | if (!is_long_mode(vcpu)) { | 134 | if (!is_long_mode(vcpu)) { |
| 132 | pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3); | 135 | pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3); |
| 133 | trace_kvm_mmu_paging_element(pte, walker->level); | 136 | trace_kvm_mmu_paging_element(pte, walker->level); |
| 134 | if (!is_present_gpte(pte)) | 137 | if (!is_present_gpte(pte)) { |
| 135 | goto not_present; | 138 | present = false; |
| 139 | goto error; | ||
| 140 | } | ||
| 136 | --walker->level; | 141 | --walker->level; |
| 137 | } | 142 | } |
| 138 | #endif | 143 | #endif |
| @@ -150,37 +155,42 @@ walk: | |||
| 150 | walker->table_gfn[walker->level - 1] = table_gfn; | 155 | walker->table_gfn[walker->level - 1] = table_gfn; |
| 151 | walker->pte_gpa[walker->level - 1] = pte_gpa; | 156 | walker->pte_gpa[walker->level - 1] = pte_gpa; |
| 152 | 157 | ||
| 153 | if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte))) | 158 | if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte))) { |
| 154 | goto not_present; | 159 | present = false; |
| 160 | break; | ||
| 161 | } | ||
| 155 | 162 | ||
| 156 | trace_kvm_mmu_paging_element(pte, walker->level); | 163 | trace_kvm_mmu_paging_element(pte, walker->level); |
| 157 | 164 | ||
| 158 | if (!is_present_gpte(pte)) | 165 | if (!is_present_gpte(pte)) { |
| 159 | goto not_present; | 166 | present = false; |
| 167 | break; | ||
| 168 | } | ||
| 160 | 169 | ||
| 161 | rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level); | 170 | if (is_rsvd_bits_set(vcpu, pte, walker->level)) { |
| 162 | if (rsvd_fault) | 171 | rsvd_fault = true; |
| 163 | goto access_error; | 172 | break; |
| 173 | } | ||
| 164 | 174 | ||
| 165 | if (write_fault && !is_writable_pte(pte)) | 175 | if (write_fault && !is_writable_pte(pte)) |
| 166 | if (user_fault || is_write_protection(vcpu)) | 176 | if (user_fault || is_write_protection(vcpu)) |
| 167 | goto access_error; | 177 | eperm = true; |
| 168 | 178 | ||
| 169 | if (user_fault && !(pte & PT_USER_MASK)) | 179 | if (user_fault && !(pte & PT_USER_MASK)) |
| 170 | goto access_error; | 180 | eperm = true; |
| 171 | 181 | ||
| 172 | #if PTTYPE == 64 | 182 | #if PTTYPE == 64 |
| 173 | if (fetch_fault && (pte & PT64_NX_MASK)) | 183 | if (fetch_fault && (pte & PT64_NX_MASK)) |
| 174 | goto access_error; | 184 | eperm = true; |
| 175 | #endif | 185 | #endif |
| 176 | 186 | ||
| 177 | if (!(pte & PT_ACCESSED_MASK)) { | 187 | if (!eperm && !rsvd_fault && !(pte & PT_ACCESSED_MASK)) { |
| 178 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, | 188 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, |
| 179 | sizeof(pte)); | 189 | sizeof(pte)); |
| 180 | mark_page_dirty(vcpu->kvm, table_gfn); | ||
| 181 | if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, | 190 | if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, |
| 182 | index, pte, pte|PT_ACCESSED_MASK)) | 191 | index, pte, pte|PT_ACCESSED_MASK)) |
| 183 | goto walk; | 192 | goto walk; |
| 193 | mark_page_dirty(vcpu->kvm, table_gfn); | ||
| 184 | pte |= PT_ACCESSED_MASK; | 194 | pte |= PT_ACCESSED_MASK; |
| 185 | } | 195 | } |
| 186 | 196 | ||
| @@ -213,15 +223,18 @@ walk: | |||
| 213 | --walker->level; | 223 | --walker->level; |
| 214 | } | 224 | } |
| 215 | 225 | ||
| 226 | if (!present || eperm || rsvd_fault) | ||
| 227 | goto error; | ||
| 228 | |||
| 216 | if (write_fault && !is_dirty_gpte(pte)) { | 229 | if (write_fault && !is_dirty_gpte(pte)) { |
| 217 | bool ret; | 230 | bool ret; |
| 218 | 231 | ||
| 219 | trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); | 232 | trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); |
| 220 | mark_page_dirty(vcpu->kvm, table_gfn); | ||
| 221 | ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte, | 233 | ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte, |
| 222 | pte|PT_DIRTY_MASK); | 234 | pte|PT_DIRTY_MASK); |
| 223 | if (ret) | 235 | if (ret) |
| 224 | goto walk; | 236 | goto walk; |
| 237 | mark_page_dirty(vcpu->kvm, table_gfn); | ||
| 225 | pte |= PT_DIRTY_MASK; | 238 | pte |= PT_DIRTY_MASK; |
| 226 | walker->ptes[walker->level - 1] = pte; | 239 | walker->ptes[walker->level - 1] = pte; |
| 227 | } | 240 | } |
| @@ -229,22 +242,18 @@ walk: | |||
| 229 | walker->pt_access = pt_access; | 242 | walker->pt_access = pt_access; |
| 230 | walker->pte_access = pte_access; | 243 | walker->pte_access = pte_access; |
| 231 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", | 244 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", |
| 232 | __func__, (u64)pte, pt_access, pte_access); | 245 | __func__, (u64)pte, pte_access, pt_access); |
| 233 | return 1; | 246 | return 1; |
| 234 | 247 | ||
| 235 | not_present: | 248 | error: |
| 236 | walker->error_code = 0; | 249 | walker->error_code = 0; |
| 237 | goto err; | 250 | if (present) |
| 238 | 251 | walker->error_code |= PFERR_PRESENT_MASK; | |
| 239 | access_error: | ||
| 240 | walker->error_code = PFERR_PRESENT_MASK; | ||
| 241 | |||
| 242 | err: | ||
| 243 | if (write_fault) | 252 | if (write_fault) |
| 244 | walker->error_code |= PFERR_WRITE_MASK; | 253 | walker->error_code |= PFERR_WRITE_MASK; |
| 245 | if (user_fault) | 254 | if (user_fault) |
| 246 | walker->error_code |= PFERR_USER_MASK; | 255 | walker->error_code |= PFERR_USER_MASK; |
| 247 | if (fetch_fault) | 256 | if (fetch_fault && is_nx(vcpu)) |
| 248 | walker->error_code |= PFERR_FETCH_MASK; | 257 | walker->error_code |= PFERR_FETCH_MASK; |
| 249 | if (rsvd_fault) | 258 | if (rsvd_fault) |
| 250 | walker->error_code |= PFERR_RSVD_MASK; | 259 | walker->error_code |= PFERR_RSVD_MASK; |
| @@ -252,7 +261,7 @@ err: | |||
| 252 | return 0; | 261 | return 0; |
| 253 | } | 262 | } |
| 254 | 263 | ||
| 255 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | 264 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
| 256 | u64 *spte, const void *pte) | 265 | u64 *spte, const void *pte) |
| 257 | { | 266 | { |
| 258 | pt_element_t gpte; | 267 | pt_element_t gpte; |
| @@ -263,7 +272,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
| 263 | gpte = *(const pt_element_t *)pte; | 272 | gpte = *(const pt_element_t *)pte; |
| 264 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { | 273 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { |
| 265 | if (!is_present_gpte(gpte)) { | 274 | if (!is_present_gpte(gpte)) { |
| 266 | if (page->unsync) | 275 | if (sp->unsync) |
| 267 | new_spte = shadow_trap_nonpresent_pte; | 276 | new_spte = shadow_trap_nonpresent_pte; |
| 268 | else | 277 | else |
| 269 | new_spte = shadow_notrap_nonpresent_pte; | 278 | new_spte = shadow_notrap_nonpresent_pte; |
| @@ -272,7 +281,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
| 272 | return; | 281 | return; |
| 273 | } | 282 | } |
| 274 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); | 283 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); |
| 275 | pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte); | 284 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); |
| 276 | if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) | 285 | if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) |
| 277 | return; | 286 | return; |
| 278 | pfn = vcpu->arch.update_pte.pfn; | 287 | pfn = vcpu->arch.update_pte.pfn; |
| @@ -285,11 +294,22 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
| 285 | * we call mmu_set_spte() with reset_host_protection = true beacuse that | 294 | * we call mmu_set_spte() with reset_host_protection = true beacuse that |
| 286 | * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). | 295 | * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). |
| 287 | */ | 296 | */ |
| 288 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, | 297 | mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, |
| 289 | gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, | 298 | is_dirty_gpte(gpte), NULL, PT_PAGE_TABLE_LEVEL, |
| 290 | gpte_to_gfn(gpte), pfn, true, true); | 299 | gpte_to_gfn(gpte), pfn, true, true); |
| 291 | } | 300 | } |
| 292 | 301 | ||
| 302 | static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu, | ||
| 303 | struct guest_walker *gw, int level) | ||
| 304 | { | ||
| 305 | int r; | ||
| 306 | pt_element_t curr_pte; | ||
| 307 | |||
| 308 | r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 1], | ||
| 309 | &curr_pte, sizeof(curr_pte)); | ||
| 310 | return r || curr_pte != gw->ptes[level - 1]; | ||
| 311 | } | ||
| 312 | |||
| 293 | /* | 313 | /* |
| 294 | * Fetch a shadow pte for a specific level in the paging hierarchy. | 314 | * Fetch a shadow pte for a specific level in the paging hierarchy. |
| 295 | */ | 315 | */ |
| @@ -299,75 +319,86 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
| 299 | int *ptwrite, pfn_t pfn) | 319 | int *ptwrite, pfn_t pfn) |
| 300 | { | 320 | { |
| 301 | unsigned access = gw->pt_access; | 321 | unsigned access = gw->pt_access; |
| 302 | struct kvm_mmu_page *shadow_page; | 322 | struct kvm_mmu_page *sp = NULL; |
| 303 | u64 spte, *sptep = NULL; | 323 | bool dirty = is_dirty_gpte(gw->ptes[gw->level - 1]); |
| 304 | int direct; | 324 | int top_level; |
| 305 | gfn_t table_gfn; | 325 | unsigned direct_access; |
| 306 | int r; | 326 | struct kvm_shadow_walk_iterator it; |
| 307 | int level; | ||
| 308 | pt_element_t curr_pte; | ||
| 309 | struct kvm_shadow_walk_iterator iterator; | ||
| 310 | 327 | ||
| 311 | if (!is_present_gpte(gw->ptes[gw->level - 1])) | 328 | if (!is_present_gpte(gw->ptes[gw->level - 1])) |
| 312 | return NULL; | 329 | return NULL; |
| 313 | 330 | ||
| 314 | for_each_shadow_entry(vcpu, addr, iterator) { | 331 | direct_access = gw->pt_access & gw->pte_access; |
| 315 | level = iterator.level; | 332 | if (!dirty) |
| 316 | sptep = iterator.sptep; | 333 | direct_access &= ~ACC_WRITE_MASK; |
| 317 | if (iterator.level == hlevel) { | ||
| 318 | mmu_set_spte(vcpu, sptep, access, | ||
| 319 | gw->pte_access & access, | ||
| 320 | user_fault, write_fault, | ||
| 321 | gw->ptes[gw->level-1] & PT_DIRTY_MASK, | ||
| 322 | ptwrite, level, | ||
| 323 | gw->gfn, pfn, false, true); | ||
| 324 | break; | ||
| 325 | } | ||
| 326 | 334 | ||
| 327 | if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) | 335 | top_level = vcpu->arch.mmu.root_level; |
| 328 | continue; | 336 | if (top_level == PT32E_ROOT_LEVEL) |
| 337 | top_level = PT32_ROOT_LEVEL; | ||
| 338 | /* | ||
| 339 | * Verify that the top-level gpte is still there. Since the page | ||
| 340 | * is a root page, it is either write protected (and cannot be | ||
| 341 | * changed from now on) or it is invalid (in which case, we don't | ||
| 342 | * really care if it changes underneath us after this point). | ||
| 343 | */ | ||
| 344 | if (FNAME(gpte_changed)(vcpu, gw, top_level)) | ||
| 345 | goto out_gpte_changed; | ||
| 329 | 346 | ||
| 330 | if (is_large_pte(*sptep)) { | 347 | for (shadow_walk_init(&it, vcpu, addr); |
| 331 | rmap_remove(vcpu->kvm, sptep); | 348 | shadow_walk_okay(&it) && it.level > gw->level; |
| 332 | __set_spte(sptep, shadow_trap_nonpresent_pte); | 349 | shadow_walk_next(&it)) { |
| 333 | kvm_flush_remote_tlbs(vcpu->kvm); | 350 | gfn_t table_gfn; |
| 334 | } | ||
| 335 | 351 | ||
| 336 | if (level <= gw->level) { | 352 | drop_large_spte(vcpu, it.sptep); |
| 337 | int delta = level - gw->level + 1; | 353 | |
| 338 | direct = 1; | 354 | sp = NULL; |
| 339 | if (!is_dirty_gpte(gw->ptes[level - delta])) | 355 | if (!is_shadow_present_pte(*it.sptep)) { |
| 340 | access &= ~ACC_WRITE_MASK; | 356 | table_gfn = gw->table_gfn[it.level - 2]; |
| 341 | table_gfn = gpte_to_gfn(gw->ptes[level - delta]); | 357 | sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1, |
| 342 | /* advance table_gfn when emulating 1gb pages with 4k */ | 358 | false, access, it.sptep); |
| 343 | if (delta == 0) | ||
| 344 | table_gfn += PT_INDEX(addr, level); | ||
| 345 | access &= gw->pte_access; | ||
| 346 | } else { | ||
| 347 | direct = 0; | ||
| 348 | table_gfn = gw->table_gfn[level - 2]; | ||
| 349 | } | ||
| 350 | shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, | ||
| 351 | direct, access, sptep); | ||
| 352 | if (!direct) { | ||
| 353 | r = kvm_read_guest_atomic(vcpu->kvm, | ||
| 354 | gw->pte_gpa[level - 2], | ||
| 355 | &curr_pte, sizeof(curr_pte)); | ||
| 356 | if (r || curr_pte != gw->ptes[level - 2]) { | ||
| 357 | kvm_mmu_put_page(shadow_page, sptep); | ||
| 358 | kvm_release_pfn_clean(pfn); | ||
| 359 | sptep = NULL; | ||
| 360 | break; | ||
| 361 | } | ||
| 362 | } | 359 | } |
| 363 | 360 | ||
| 364 | spte = __pa(shadow_page->spt) | 361 | /* |
| 365 | | PT_PRESENT_MASK | PT_ACCESSED_MASK | 362 | * Verify that the gpte in the page we've just write |
| 366 | | PT_WRITABLE_MASK | PT_USER_MASK; | 363 | * protected is still there. |
| 367 | *sptep = spte; | 364 | */ |
| 365 | if (FNAME(gpte_changed)(vcpu, gw, it.level - 1)) | ||
| 366 | goto out_gpte_changed; | ||
| 367 | |||
| 368 | if (sp) | ||
| 369 | link_shadow_page(it.sptep, sp); | ||
| 368 | } | 370 | } |
| 369 | 371 | ||
| 370 | return sptep; | 372 | for (; |
| 373 | shadow_walk_okay(&it) && it.level > hlevel; | ||
| 374 | shadow_walk_next(&it)) { | ||
| 375 | gfn_t direct_gfn; | ||
| 376 | |||
| 377 | validate_direct_spte(vcpu, it.sptep, direct_access); | ||
| 378 | |||
| 379 | drop_large_spte(vcpu, it.sptep); | ||
| 380 | |||
| 381 | if (is_shadow_present_pte(*it.sptep)) | ||
| 382 | continue; | ||
| 383 | |||
| 384 | direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); | ||
| 385 | |||
| 386 | sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1, | ||
| 387 | true, direct_access, it.sptep); | ||
| 388 | link_shadow_page(it.sptep, sp); | ||
| 389 | } | ||
| 390 | |||
| 391 | mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access, | ||
| 392 | user_fault, write_fault, dirty, ptwrite, it.level, | ||
| 393 | gw->gfn, pfn, false, true); | ||
| 394 | |||
| 395 | return it.sptep; | ||
| 396 | |||
| 397 | out_gpte_changed: | ||
| 398 | if (sp) | ||
| 399 | kvm_mmu_put_page(sp, it.sptep); | ||
| 400 | kvm_release_pfn_clean(pfn); | ||
| 401 | return NULL; | ||
| 371 | } | 402 | } |
| 372 | 403 | ||
| 373 | /* | 404 | /* |
| @@ -431,11 +462,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
| 431 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); | 462 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); |
| 432 | 463 | ||
| 433 | /* mmio */ | 464 | /* mmio */ |
| 434 | if (is_error_pfn(pfn)) { | 465 | if (is_error_pfn(pfn)) |
| 435 | pgprintk("gfn %lx is mmio\n", walker.gfn); | 466 | return kvm_handle_bad_page(vcpu->kvm, walker.gfn, pfn); |
| 436 | kvm_release_pfn_clean(pfn); | ||
| 437 | return 1; | ||
| 438 | } | ||
| 439 | 467 | ||
| 440 | spin_lock(&vcpu->kvm->mmu_lock); | 468 | spin_lock(&vcpu->kvm->mmu_lock); |
| 441 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 469 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
| @@ -443,6 +471,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
| 443 | kvm_mmu_free_some_pages(vcpu); | 471 | kvm_mmu_free_some_pages(vcpu); |
| 444 | sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, | 472 | sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, |
| 445 | level, &write_pt, pfn); | 473 | level, &write_pt, pfn); |
| 474 | (void)sptep; | ||
| 446 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, | 475 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, |
| 447 | sptep, *sptep, write_pt); | 476 | sptep, *sptep, write_pt); |
| 448 | 477 | ||
| @@ -464,6 +493,7 @@ out_unlock: | |||
| 464 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | 493 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) |
| 465 | { | 494 | { |
| 466 | struct kvm_shadow_walk_iterator iterator; | 495 | struct kvm_shadow_walk_iterator iterator; |
| 496 | struct kvm_mmu_page *sp; | ||
| 467 | gpa_t pte_gpa = -1; | 497 | gpa_t pte_gpa = -1; |
| 468 | int level; | 498 | int level; |
| 469 | u64 *sptep; | 499 | u64 *sptep; |
| @@ -475,10 +505,13 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
| 475 | level = iterator.level; | 505 | level = iterator.level; |
| 476 | sptep = iterator.sptep; | 506 | sptep = iterator.sptep; |
| 477 | 507 | ||
| 508 | sp = page_header(__pa(sptep)); | ||
| 478 | if (is_last_spte(*sptep, level)) { | 509 | if (is_last_spte(*sptep, level)) { |
| 479 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); | ||
| 480 | int offset, shift; | 510 | int offset, shift; |
| 481 | 511 | ||
| 512 | if (!sp->unsync) | ||
| 513 | break; | ||
| 514 | |||
| 482 | shift = PAGE_SHIFT - | 515 | shift = PAGE_SHIFT - |
| 483 | (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level; | 516 | (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level; |
| 484 | offset = sp->role.quadrant << shift; | 517 | offset = sp->role.quadrant << shift; |
| @@ -487,16 +520,17 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
| 487 | pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); | 520 | pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); |
| 488 | 521 | ||
| 489 | if (is_shadow_present_pte(*sptep)) { | 522 | if (is_shadow_present_pte(*sptep)) { |
| 490 | rmap_remove(vcpu->kvm, sptep); | ||
| 491 | if (is_large_pte(*sptep)) | 523 | if (is_large_pte(*sptep)) |
| 492 | --vcpu->kvm->stat.lpages; | 524 | --vcpu->kvm->stat.lpages; |
| 525 | drop_spte(vcpu->kvm, sptep, | ||
| 526 | shadow_trap_nonpresent_pte); | ||
| 493 | need_flush = 1; | 527 | need_flush = 1; |
| 494 | } | 528 | } else |
| 495 | __set_spte(sptep, shadow_trap_nonpresent_pte); | 529 | __set_spte(sptep, shadow_trap_nonpresent_pte); |
| 496 | break; | 530 | break; |
| 497 | } | 531 | } |
| 498 | 532 | ||
| 499 | if (!is_shadow_present_pte(*sptep)) | 533 | if (!is_shadow_present_pte(*sptep) || !sp->unsync_children) |
| 500 | break; | 534 | break; |
| 501 | } | 535 | } |
| 502 | 536 | ||
| @@ -570,9 +604,9 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, | |||
| 570 | * Using the cached information from sp->gfns is safe because: | 604 | * Using the cached information from sp->gfns is safe because: |
| 571 | * - The spte has a reference to the struct page, so the pfn for a given gfn | 605 | * - The spte has a reference to the struct page, so the pfn for a given gfn |
| 572 | * can't change unless all sptes pointing to it are nuked first. | 606 | * can't change unless all sptes pointing to it are nuked first. |
| 573 | * - Alias changes zap the entire shadow cache. | ||
| 574 | */ | 607 | */ |
| 575 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 608 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
| 609 | bool clear_unsync) | ||
| 576 | { | 610 | { |
| 577 | int i, offset, nr_present; | 611 | int i, offset, nr_present; |
| 578 | bool reset_host_protection; | 612 | bool reset_host_protection; |
| @@ -580,6 +614,9 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
| 580 | 614 | ||
| 581 | offset = nr_present = 0; | 615 | offset = nr_present = 0; |
| 582 | 616 | ||
| 617 | /* direct kvm_mmu_page can not be unsync. */ | ||
| 618 | BUG_ON(sp->role.direct); | ||
| 619 | |||
| 583 | if (PTTYPE == 32) | 620 | if (PTTYPE == 32) |
| 584 | offset = sp->role.quadrant << PT64_LEVEL_BITS; | 621 | offset = sp->role.quadrant << PT64_LEVEL_BITS; |
| 585 | 622 | ||
| @@ -589,7 +626,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
| 589 | unsigned pte_access; | 626 | unsigned pte_access; |
| 590 | pt_element_t gpte; | 627 | pt_element_t gpte; |
| 591 | gpa_t pte_gpa; | 628 | gpa_t pte_gpa; |
| 592 | gfn_t gfn = sp->gfns[i]; | 629 | gfn_t gfn; |
| 593 | 630 | ||
| 594 | if (!is_shadow_present_pte(sp->spt[i])) | 631 | if (!is_shadow_present_pte(sp->spt[i])) |
| 595 | continue; | 632 | continue; |
| @@ -600,16 +637,17 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
| 600 | sizeof(pt_element_t))) | 637 | sizeof(pt_element_t))) |
| 601 | return -EINVAL; | 638 | return -EINVAL; |
| 602 | 639 | ||
| 603 | if (gpte_to_gfn(gpte) != gfn || !is_present_gpte(gpte) || | 640 | gfn = gpte_to_gfn(gpte); |
| 604 | !(gpte & PT_ACCESSED_MASK)) { | 641 | if (is_rsvd_bits_set(vcpu, gpte, PT_PAGE_TABLE_LEVEL) |
| 642 | || gfn != sp->gfns[i] || !is_present_gpte(gpte) | ||
| 643 | || !(gpte & PT_ACCESSED_MASK)) { | ||
| 605 | u64 nonpresent; | 644 | u64 nonpresent; |
| 606 | 645 | ||
| 607 | rmap_remove(vcpu->kvm, &sp->spt[i]); | 646 | if (is_present_gpte(gpte) || !clear_unsync) |
| 608 | if (is_present_gpte(gpte)) | ||
| 609 | nonpresent = shadow_trap_nonpresent_pte; | 647 | nonpresent = shadow_trap_nonpresent_pte; |
| 610 | else | 648 | else |
| 611 | nonpresent = shadow_notrap_nonpresent_pte; | 649 | nonpresent = shadow_notrap_nonpresent_pte; |
| 612 | __set_spte(&sp->spt[i], nonpresent); | 650 | drop_spte(vcpu->kvm, &sp->spt[i], nonpresent); |
| 613 | continue; | 651 | continue; |
| 614 | } | 652 | } |
| 615 | 653 | ||
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ce438e0fdd26..56c9b6bd7655 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | * AMD SVM support | 4 | * AMD SVM support |
| 5 | * | 5 | * |
| 6 | * Copyright (C) 2006 Qumranet, Inc. | 6 | * Copyright (C) 2006 Qumranet, Inc. |
| 7 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
| 7 | * | 8 | * |
| 8 | * Authors: | 9 | * Authors: |
| 9 | * Yaniv Kamay <yaniv@qumranet.com> | 10 | * Yaniv Kamay <yaniv@qumranet.com> |
| @@ -285,11 +286,11 @@ static inline void flush_guest_tlb(struct kvm_vcpu *vcpu) | |||
| 285 | 286 | ||
| 286 | static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) | 287 | static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) |
| 287 | { | 288 | { |
| 289 | vcpu->arch.efer = efer; | ||
| 288 | if (!npt_enabled && !(efer & EFER_LMA)) | 290 | if (!npt_enabled && !(efer & EFER_LMA)) |
| 289 | efer &= ~EFER_LME; | 291 | efer &= ~EFER_LME; |
| 290 | 292 | ||
| 291 | to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; | 293 | to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; |
| 292 | vcpu->arch.efer = efer; | ||
| 293 | } | 294 | } |
| 294 | 295 | ||
| 295 | static int is_external_interrupt(u32 info) | 296 | static int is_external_interrupt(u32 info) |
| @@ -640,7 +641,7 @@ static __init int svm_hardware_setup(void) | |||
| 640 | 641 | ||
| 641 | if (nested) { | 642 | if (nested) { |
| 642 | printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); | 643 | printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); |
| 643 | kvm_enable_efer_bits(EFER_SVME); | 644 | kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); |
| 644 | } | 645 | } |
| 645 | 646 | ||
| 646 | for_each_possible_cpu(cpu) { | 647 | for_each_possible_cpu(cpu) { |
| @@ -806,7 +807,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 806 | * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. | 807 | * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. |
| 807 | */ | 808 | */ |
| 808 | svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; | 809 | svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; |
| 809 | kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0); | 810 | (void)kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0); |
| 810 | 811 | ||
| 811 | save->cr4 = X86_CR4_PAE; | 812 | save->cr4 = X86_CR4_PAE; |
| 812 | /* rdx = ?? */ | 813 | /* rdx = ?? */ |
| @@ -903,13 +904,18 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
| 903 | svm->asid_generation = 0; | 904 | svm->asid_generation = 0; |
| 904 | init_vmcb(svm); | 905 | init_vmcb(svm); |
| 905 | 906 | ||
| 906 | fx_init(&svm->vcpu); | 907 | err = fx_init(&svm->vcpu); |
| 908 | if (err) | ||
| 909 | goto free_page4; | ||
| 910 | |||
| 907 | svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; | 911 | svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; |
| 908 | if (kvm_vcpu_is_bsp(&svm->vcpu)) | 912 | if (kvm_vcpu_is_bsp(&svm->vcpu)) |
| 909 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; | 913 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; |
| 910 | 914 | ||
| 911 | return &svm->vcpu; | 915 | return &svm->vcpu; |
| 912 | 916 | ||
| 917 | free_page4: | ||
| 918 | __free_page(hsave_page); | ||
| 913 | free_page3: | 919 | free_page3: |
| 914 | __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); | 920 | __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); |
| 915 | free_page2: | 921 | free_page2: |
| @@ -1488,7 +1494,7 @@ static void svm_handle_mce(struct vcpu_svm *svm) | |||
| 1488 | */ | 1494 | */ |
| 1489 | pr_err("KVM: Guest triggered AMD Erratum 383\n"); | 1495 | pr_err("KVM: Guest triggered AMD Erratum 383\n"); |
| 1490 | 1496 | ||
| 1491 | set_bit(KVM_REQ_TRIPLE_FAULT, &svm->vcpu.requests); | 1497 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu); |
| 1492 | 1498 | ||
| 1493 | return; | 1499 | return; |
| 1494 | } | 1500 | } |
| @@ -1535,7 +1541,7 @@ static int io_interception(struct vcpu_svm *svm) | |||
| 1535 | string = (io_info & SVM_IOIO_STR_MASK) != 0; | 1541 | string = (io_info & SVM_IOIO_STR_MASK) != 0; |
| 1536 | in = (io_info & SVM_IOIO_TYPE_MASK) != 0; | 1542 | in = (io_info & SVM_IOIO_TYPE_MASK) != 0; |
| 1537 | if (string || in) | 1543 | if (string || in) |
| 1538 | return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); | 1544 | return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; |
| 1539 | 1545 | ||
| 1540 | port = io_info >> 16; | 1546 | port = io_info >> 16; |
| 1541 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; | 1547 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; |
| @@ -1957,7 +1963,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
| 1957 | svm->vmcb->save.cr3 = hsave->save.cr3; | 1963 | svm->vmcb->save.cr3 = hsave->save.cr3; |
| 1958 | svm->vcpu.arch.cr3 = hsave->save.cr3; | 1964 | svm->vcpu.arch.cr3 = hsave->save.cr3; |
| 1959 | } else { | 1965 | } else { |
| 1960 | kvm_set_cr3(&svm->vcpu, hsave->save.cr3); | 1966 | (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3); |
| 1961 | } | 1967 | } |
| 1962 | kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax); | 1968 | kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax); |
| 1963 | kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp); | 1969 | kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp); |
| @@ -2080,7 +2086,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
| 2080 | svm->vmcb->save.cr3 = nested_vmcb->save.cr3; | 2086 | svm->vmcb->save.cr3 = nested_vmcb->save.cr3; |
| 2081 | svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; | 2087 | svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; |
| 2082 | } else | 2088 | } else |
| 2083 | kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); | 2089 | (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); |
| 2084 | 2090 | ||
| 2085 | /* Guest paging mode is active - reset mmu */ | 2091 | /* Guest paging mode is active - reset mmu */ |
| 2086 | kvm_mmu_reset_context(&svm->vcpu); | 2092 | kvm_mmu_reset_context(&svm->vcpu); |
| @@ -2386,16 +2392,12 @@ static int iret_interception(struct vcpu_svm *svm) | |||
| 2386 | 2392 | ||
| 2387 | static int invlpg_interception(struct vcpu_svm *svm) | 2393 | static int invlpg_interception(struct vcpu_svm *svm) |
| 2388 | { | 2394 | { |
| 2389 | if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) | 2395 | return emulate_instruction(&svm->vcpu, 0, 0, 0) == EMULATE_DONE; |
| 2390 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); | ||
| 2391 | return 1; | ||
| 2392 | } | 2396 | } |
| 2393 | 2397 | ||
| 2394 | static int emulate_on_interception(struct vcpu_svm *svm) | 2398 | static int emulate_on_interception(struct vcpu_svm *svm) |
| 2395 | { | 2399 | { |
| 2396 | if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) | 2400 | return emulate_instruction(&svm->vcpu, 0, 0, 0) == EMULATE_DONE; |
| 2397 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); | ||
| 2398 | return 1; | ||
| 2399 | } | 2401 | } |
| 2400 | 2402 | ||
| 2401 | static int cr8_write_interception(struct vcpu_svm *svm) | 2403 | static int cr8_write_interception(struct vcpu_svm *svm) |
| @@ -2726,6 +2728,99 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
| 2726 | [SVM_EXIT_NPF] = pf_interception, | 2728 | [SVM_EXIT_NPF] = pf_interception, |
| 2727 | }; | 2729 | }; |
| 2728 | 2730 | ||
| 2731 | void dump_vmcb(struct kvm_vcpu *vcpu) | ||
| 2732 | { | ||
| 2733 | struct vcpu_svm *svm = to_svm(vcpu); | ||
| 2734 | struct vmcb_control_area *control = &svm->vmcb->control; | ||
| 2735 | struct vmcb_save_area *save = &svm->vmcb->save; | ||
| 2736 | |||
| 2737 | pr_err("VMCB Control Area:\n"); | ||
| 2738 | pr_err("cr_read: %04x\n", control->intercept_cr_read); | ||
| 2739 | pr_err("cr_write: %04x\n", control->intercept_cr_write); | ||
| 2740 | pr_err("dr_read: %04x\n", control->intercept_dr_read); | ||
| 2741 | pr_err("dr_write: %04x\n", control->intercept_dr_write); | ||
| 2742 | pr_err("exceptions: %08x\n", control->intercept_exceptions); | ||
| 2743 | pr_err("intercepts: %016llx\n", control->intercept); | ||
| 2744 | pr_err("pause filter count: %d\n", control->pause_filter_count); | ||
| 2745 | pr_err("iopm_base_pa: %016llx\n", control->iopm_base_pa); | ||
| 2746 | pr_err("msrpm_base_pa: %016llx\n", control->msrpm_base_pa); | ||
| 2747 | pr_err("tsc_offset: %016llx\n", control->tsc_offset); | ||
| 2748 | pr_err("asid: %d\n", control->asid); | ||
| 2749 | pr_err("tlb_ctl: %d\n", control->tlb_ctl); | ||
| 2750 | pr_err("int_ctl: %08x\n", control->int_ctl); | ||
| 2751 | pr_err("int_vector: %08x\n", control->int_vector); | ||
| 2752 | pr_err("int_state: %08x\n", control->int_state); | ||
| 2753 | pr_err("exit_code: %08x\n", control->exit_code); | ||
| 2754 | pr_err("exit_info1: %016llx\n", control->exit_info_1); | ||
| 2755 | pr_err("exit_info2: %016llx\n", control->exit_info_2); | ||
| 2756 | pr_err("exit_int_info: %08x\n", control->exit_int_info); | ||
| 2757 | pr_err("exit_int_info_err: %08x\n", control->exit_int_info_err); | ||
| 2758 | pr_err("nested_ctl: %lld\n", control->nested_ctl); | ||
| 2759 | pr_err("nested_cr3: %016llx\n", control->nested_cr3); | ||
| 2760 | pr_err("event_inj: %08x\n", control->event_inj); | ||
| 2761 | pr_err("event_inj_err: %08x\n", control->event_inj_err); | ||
| 2762 | pr_err("lbr_ctl: %lld\n", control->lbr_ctl); | ||
| 2763 | pr_err("next_rip: %016llx\n", control->next_rip); | ||
| 2764 | pr_err("VMCB State Save Area:\n"); | ||
| 2765 | pr_err("es: s: %04x a: %04x l: %08x b: %016llx\n", | ||
| 2766 | save->es.selector, save->es.attrib, | ||
| 2767 | save->es.limit, save->es.base); | ||
| 2768 | pr_err("cs: s: %04x a: %04x l: %08x b: %016llx\n", | ||
| 2769 | save->cs.selector, save->cs.attrib, | ||
| 2770 | save->cs.limit, save->cs.base); | ||
| 2771 | pr_err("ss: s: %04x a: %04x l: %08x b: %016llx\n", | ||
| 2772 | save->ss.selector, save->ss.attrib, | ||
| 2773 | save->ss.limit, save->ss.base); | ||
| 2774 | pr_err("ds: s: %04x a: %04x l: %08x b: %016llx\n", | ||
| 2775 | save->ds.selector, save->ds.attrib, | ||
| 2776 | save->ds.limit, save->ds.base); | ||
| 2777 | pr_err("fs: s: %04x a: %04x l: %08x b: %016llx\n", | ||
| 2778 | save->fs.selector, save->fs.attrib, | ||
| 2779 | save->fs.limit, save->fs.base); | ||
| 2780 | pr_err("gs: s: %04x a: %04x l: %08x b: %016llx\n", | ||
| 2781 | save->gs.selector, save->gs.attrib, | ||
| 2782 | save->gs.limit, save->gs.base); | ||
| 2783 | pr_err("gdtr: s: %04x a: %04x l: %08x b: %016llx\n", | ||
| 2784 | save->gdtr.selector, save->gdtr.attrib, | ||
| 2785 | save->gdtr.limit, save->gdtr.base); | ||
| 2786 | pr_err("ldtr: s: %04x a: %04x l: %08x b: %016llx\n", | ||
| 2787 | save->ldtr.selector, save->ldtr.attrib, | ||
| 2788 | save->ldtr.limit, save->ldtr.base); | ||
| 2789 | pr_err("idtr: s: %04x a: %04x l: %08x b: %016llx\n", | ||
| 2790 | save->idtr.selector, save->idtr.attrib, | ||
| 2791 | save->idtr.limit, save->idtr.base); | ||
| 2792 | pr_err("tr: s: %04x a: %04x l: %08x b: %016llx\n", | ||
| 2793 | save->tr.selector, save->tr.attrib, | ||
| 2794 | save->tr.limit, save->tr.base); | ||
| 2795 | pr_err("cpl: %d efer: %016llx\n", | ||
| 2796 | save->cpl, save->efer); | ||
| 2797 | pr_err("cr0: %016llx cr2: %016llx\n", | ||
| 2798 | save->cr0, save->cr2); | ||
| 2799 | pr_err("cr3: %016llx cr4: %016llx\n", | ||
| 2800 | save->cr3, save->cr4); | ||
| 2801 | pr_err("dr6: %016llx dr7: %016llx\n", | ||
| 2802 | save->dr6, save->dr7); | ||
| 2803 | pr_err("rip: %016llx rflags: %016llx\n", | ||
| 2804 | save->rip, save->rflags); | ||
| 2805 | pr_err("rsp: %016llx rax: %016llx\n", | ||
| 2806 | save->rsp, save->rax); | ||
| 2807 | pr_err("star: %016llx lstar: %016llx\n", | ||
| 2808 | save->star, save->lstar); | ||
| 2809 | pr_err("cstar: %016llx sfmask: %016llx\n", | ||
| 2810 | save->cstar, save->sfmask); | ||
| 2811 | pr_err("kernel_gs_base: %016llx sysenter_cs: %016llx\n", | ||
| 2812 | save->kernel_gs_base, save->sysenter_cs); | ||
| 2813 | pr_err("sysenter_esp: %016llx sysenter_eip: %016llx\n", | ||
| 2814 | save->sysenter_esp, save->sysenter_eip); | ||
| 2815 | pr_err("gpat: %016llx dbgctl: %016llx\n", | ||
| 2816 | save->g_pat, save->dbgctl); | ||
| 2817 | pr_err("br_from: %016llx br_to: %016llx\n", | ||
| 2818 | save->br_from, save->br_to); | ||
| 2819 | pr_err("excp_from: %016llx excp_to: %016llx\n", | ||
| 2820 | save->last_excp_from, save->last_excp_to); | ||
| 2821 | |||
| 2822 | } | ||
| 2823 | |||
| 2729 | static int handle_exit(struct kvm_vcpu *vcpu) | 2824 | static int handle_exit(struct kvm_vcpu *vcpu) |
| 2730 | { | 2825 | { |
| 2731 | struct vcpu_svm *svm = to_svm(vcpu); | 2826 | struct vcpu_svm *svm = to_svm(vcpu); |
| @@ -2770,6 +2865,8 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
| 2770 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 2865 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
| 2771 | kvm_run->fail_entry.hardware_entry_failure_reason | 2866 | kvm_run->fail_entry.hardware_entry_failure_reason |
| 2772 | = svm->vmcb->control.exit_code; | 2867 | = svm->vmcb->control.exit_code; |
| 2868 | pr_err("KVM: FAILED VMRUN WITH VMCB:\n"); | ||
| 2869 | dump_vmcb(vcpu); | ||
| 2773 | return 0; | 2870 | return 0; |
| 2774 | } | 2871 | } |
| 2775 | 2872 | ||
| @@ -2826,9 +2923,6 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) | |||
| 2826 | { | 2923 | { |
| 2827 | struct vmcb_control_area *control; | 2924 | struct vmcb_control_area *control; |
| 2828 | 2925 | ||
| 2829 | trace_kvm_inj_virq(irq); | ||
| 2830 | |||
| 2831 | ++svm->vcpu.stat.irq_injections; | ||
| 2832 | control = &svm->vmcb->control; | 2926 | control = &svm->vmcb->control; |
| 2833 | control->int_vector = irq; | 2927 | control->int_vector = irq; |
| 2834 | control->int_ctl &= ~V_INTR_PRIO_MASK; | 2928 | control->int_ctl &= ~V_INTR_PRIO_MASK; |
| @@ -2842,6 +2936,9 @@ static void svm_set_irq(struct kvm_vcpu *vcpu) | |||
| 2842 | 2936 | ||
| 2843 | BUG_ON(!(gif_set(svm))); | 2937 | BUG_ON(!(gif_set(svm))); |
| 2844 | 2938 | ||
| 2939 | trace_kvm_inj_virq(vcpu->arch.interrupt.nr); | ||
| 2940 | ++vcpu->stat.irq_injections; | ||
| 2941 | |||
| 2845 | svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr | | 2942 | svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr | |
| 2846 | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR; | 2943 | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR; |
| 2847 | } | 2944 | } |
| @@ -3327,6 +3424,11 @@ static bool svm_rdtscp_supported(void) | |||
| 3327 | return false; | 3424 | return false; |
| 3328 | } | 3425 | } |
| 3329 | 3426 | ||
| 3427 | static bool svm_has_wbinvd_exit(void) | ||
| 3428 | { | ||
| 3429 | return true; | ||
| 3430 | } | ||
| 3431 | |||
| 3330 | static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) | 3432 | static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) |
| 3331 | { | 3433 | { |
| 3332 | struct vcpu_svm *svm = to_svm(vcpu); | 3434 | struct vcpu_svm *svm = to_svm(vcpu); |
| @@ -3411,6 +3513,8 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
| 3411 | .rdtscp_supported = svm_rdtscp_supported, | 3513 | .rdtscp_supported = svm_rdtscp_supported, |
| 3412 | 3514 | ||
| 3413 | .set_supported_cpuid = svm_set_supported_cpuid, | 3515 | .set_supported_cpuid = svm_set_supported_cpuid, |
| 3516 | |||
| 3517 | .has_wbinvd_exit = svm_has_wbinvd_exit, | ||
| 3414 | }; | 3518 | }; |
| 3415 | 3519 | ||
| 3416 | static int __init svm_init(void) | 3520 | static int __init svm_init(void) |
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c index 4ddadb1a5ffe..e16a0dbe74d8 100644 --- a/arch/x86/kvm/timer.c +++ b/arch/x86/kvm/timer.c | |||
| @@ -1,3 +1,17 @@ | |||
| 1 | /* | ||
| 2 | * Kernel-based Virtual Machine driver for Linux | ||
| 3 | * | ||
| 4 | * This module enables machines with Intel VT-x extensions to run virtual | ||
| 5 | * machines without emulation or binary translation. | ||
| 6 | * | ||
| 7 | * timer support | ||
| 8 | * | ||
| 9 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
| 10 | * | ||
| 11 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
| 12 | * the COPYING file in the top-level directory. | ||
| 13 | */ | ||
| 14 | |||
| 1 | #include <linux/kvm_host.h> | 15 | #include <linux/kvm_host.h> |
| 2 | #include <linux/kvm.h> | 16 | #include <linux/kvm.h> |
| 3 | #include <linux/hrtimer.h> | 17 | #include <linux/hrtimer.h> |
| @@ -18,7 +32,7 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer) | |||
| 18 | if (ktimer->reinject || !atomic_read(&ktimer->pending)) { | 32 | if (ktimer->reinject || !atomic_read(&ktimer->pending)) { |
| 19 | atomic_inc(&ktimer->pending); | 33 | atomic_inc(&ktimer->pending); |
| 20 | /* FIXME: this code should not know anything about vcpus */ | 34 | /* FIXME: this code should not know anything about vcpus */ |
| 21 | set_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests); | 35 | kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); |
| 22 | } | 36 | } |
| 23 | 37 | ||
| 24 | if (waitqueue_active(q)) | 38 | if (waitqueue_active(q)) |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ee03679efe78..27a0222c2946 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | * machines without emulation or binary translation. | 5 | * machines without emulation or binary translation. |
| 6 | * | 6 | * |
| 7 | * Copyright (C) 2006 Qumranet, Inc. | 7 | * Copyright (C) 2006 Qumranet, Inc. |
| 8 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
| 8 | * | 9 | * |
| 9 | * Authors: | 10 | * Authors: |
| 10 | * Avi Kivity <avi@qumranet.com> | 11 | * Avi Kivity <avi@qumranet.com> |
| @@ -36,6 +37,8 @@ | |||
| 36 | #include <asm/vmx.h> | 37 | #include <asm/vmx.h> |
| 37 | #include <asm/virtext.h> | 38 | #include <asm/virtext.h> |
| 38 | #include <asm/mce.h> | 39 | #include <asm/mce.h> |
| 40 | #include <asm/i387.h> | ||
| 41 | #include <asm/xcr.h> | ||
| 39 | 42 | ||
| 40 | #include "trace.h" | 43 | #include "trace.h" |
| 41 | 44 | ||
| @@ -63,6 +66,9 @@ module_param_named(unrestricted_guest, | |||
| 63 | static int __read_mostly emulate_invalid_guest_state = 0; | 66 | static int __read_mostly emulate_invalid_guest_state = 0; |
| 64 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); | 67 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); |
| 65 | 68 | ||
| 69 | static int __read_mostly vmm_exclusive = 1; | ||
| 70 | module_param(vmm_exclusive, bool, S_IRUGO); | ||
| 71 | |||
| 66 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ | 72 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ |
| 67 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) | 73 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) |
| 68 | #define KVM_GUEST_CR0_MASK \ | 74 | #define KVM_GUEST_CR0_MASK \ |
| @@ -173,10 +179,13 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | |||
| 173 | 179 | ||
| 174 | static int init_rmode(struct kvm *kvm); | 180 | static int init_rmode(struct kvm *kvm); |
| 175 | static u64 construct_eptp(unsigned long root_hpa); | 181 | static u64 construct_eptp(unsigned long root_hpa); |
| 182 | static void kvm_cpu_vmxon(u64 addr); | ||
| 183 | static void kvm_cpu_vmxoff(void); | ||
| 176 | 184 | ||
| 177 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 185 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
| 178 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 186 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
| 179 | static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu); | 187 | static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu); |
| 188 | static DEFINE_PER_CPU(struct desc_ptr, host_gdt); | ||
| 180 | 189 | ||
| 181 | static unsigned long *vmx_io_bitmap_a; | 190 | static unsigned long *vmx_io_bitmap_a; |
| 182 | static unsigned long *vmx_io_bitmap_b; | 191 | static unsigned long *vmx_io_bitmap_b; |
| @@ -334,6 +343,11 @@ static inline bool cpu_has_vmx_ept_1g_page(void) | |||
| 334 | return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT; | 343 | return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT; |
| 335 | } | 344 | } |
| 336 | 345 | ||
| 346 | static inline bool cpu_has_vmx_ept_4levels(void) | ||
| 347 | { | ||
| 348 | return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT; | ||
| 349 | } | ||
| 350 | |||
| 337 | static inline bool cpu_has_vmx_invept_individual_addr(void) | 351 | static inline bool cpu_has_vmx_invept_individual_addr(void) |
| 338 | { | 352 | { |
| 339 | return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT; | 353 | return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT; |
| @@ -349,6 +363,16 @@ static inline bool cpu_has_vmx_invept_global(void) | |||
| 349 | return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT; | 363 | return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT; |
| 350 | } | 364 | } |
| 351 | 365 | ||
| 366 | static inline bool cpu_has_vmx_invvpid_single(void) | ||
| 367 | { | ||
| 368 | return vmx_capability.vpid & VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT; | ||
| 369 | } | ||
| 370 | |||
| 371 | static inline bool cpu_has_vmx_invvpid_global(void) | ||
| 372 | { | ||
| 373 | return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT; | ||
| 374 | } | ||
| 375 | |||
| 352 | static inline bool cpu_has_vmx_ept(void) | 376 | static inline bool cpu_has_vmx_ept(void) |
| 353 | { | 377 | { |
| 354 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 378 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
| @@ -389,6 +413,12 @@ static inline bool cpu_has_virtual_nmis(void) | |||
| 389 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; | 413 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; |
| 390 | } | 414 | } |
| 391 | 415 | ||
| 416 | static inline bool cpu_has_vmx_wbinvd_exit(void) | ||
| 417 | { | ||
| 418 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
| 419 | SECONDARY_EXEC_WBINVD_EXITING; | ||
| 420 | } | ||
| 421 | |||
| 392 | static inline bool report_flexpriority(void) | 422 | static inline bool report_flexpriority(void) |
| 393 | { | 423 | { |
| 394 | return flexpriority_enabled; | 424 | return flexpriority_enabled; |
| @@ -453,6 +483,19 @@ static void vmcs_clear(struct vmcs *vmcs) | |||
| 453 | vmcs, phys_addr); | 483 | vmcs, phys_addr); |
| 454 | } | 484 | } |
| 455 | 485 | ||
| 486 | static void vmcs_load(struct vmcs *vmcs) | ||
| 487 | { | ||
| 488 | u64 phys_addr = __pa(vmcs); | ||
| 489 | u8 error; | ||
| 490 | |||
| 491 | asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" | ||
| 492 | : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) | ||
| 493 | : "cc", "memory"); | ||
| 494 | if (error) | ||
| 495 | printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", | ||
| 496 | vmcs, phys_addr); | ||
| 497 | } | ||
| 498 | |||
| 456 | static void __vcpu_clear(void *arg) | 499 | static void __vcpu_clear(void *arg) |
| 457 | { | 500 | { |
| 458 | struct vcpu_vmx *vmx = arg; | 501 | struct vcpu_vmx *vmx = arg; |
| @@ -475,12 +518,27 @@ static void vcpu_clear(struct vcpu_vmx *vmx) | |||
| 475 | smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1); | 518 | smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1); |
| 476 | } | 519 | } |
| 477 | 520 | ||
| 478 | static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx) | 521 | static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx) |
| 479 | { | 522 | { |
| 480 | if (vmx->vpid == 0) | 523 | if (vmx->vpid == 0) |
| 481 | return; | 524 | return; |
| 482 | 525 | ||
| 483 | __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); | 526 | if (cpu_has_vmx_invvpid_single()) |
| 527 | __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); | ||
| 528 | } | ||
| 529 | |||
| 530 | static inline void vpid_sync_vcpu_global(void) | ||
| 531 | { | ||
| 532 | if (cpu_has_vmx_invvpid_global()) | ||
| 533 | __invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0); | ||
| 534 | } | ||
| 535 | |||
| 536 | static inline void vpid_sync_context(struct vcpu_vmx *vmx) | ||
| 537 | { | ||
| 538 | if (cpu_has_vmx_invvpid_single()) | ||
| 539 | vpid_sync_vcpu_single(vmx); | ||
| 540 | else | ||
| 541 | vpid_sync_vcpu_global(); | ||
| 484 | } | 542 | } |
| 485 | 543 | ||
| 486 | static inline void ept_sync_global(void) | 544 | static inline void ept_sync_global(void) |
| @@ -812,6 +870,9 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) | |||
| 812 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | 870 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); |
| 813 | } | 871 | } |
| 814 | #endif | 872 | #endif |
| 873 | if (current_thread_info()->status & TS_USEDFPU) | ||
| 874 | clts(); | ||
| 875 | load_gdt(&__get_cpu_var(host_gdt)); | ||
| 815 | } | 876 | } |
| 816 | 877 | ||
| 817 | static void vmx_load_host_state(struct vcpu_vmx *vmx) | 878 | static void vmx_load_host_state(struct vcpu_vmx *vmx) |
| @@ -828,35 +889,30 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx) | |||
| 828 | static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 889 | static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
| 829 | { | 890 | { |
| 830 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 891 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 831 | u64 phys_addr = __pa(vmx->vmcs); | ||
| 832 | u64 tsc_this, delta, new_offset; | 892 | u64 tsc_this, delta, new_offset; |
| 893 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); | ||
| 833 | 894 | ||
| 834 | if (vcpu->cpu != cpu) { | 895 | if (!vmm_exclusive) |
| 896 | kvm_cpu_vmxon(phys_addr); | ||
| 897 | else if (vcpu->cpu != cpu) | ||
| 835 | vcpu_clear(vmx); | 898 | vcpu_clear(vmx); |
| 836 | kvm_migrate_timers(vcpu); | ||
| 837 | set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests); | ||
| 838 | local_irq_disable(); | ||
| 839 | list_add(&vmx->local_vcpus_link, | ||
| 840 | &per_cpu(vcpus_on_cpu, cpu)); | ||
| 841 | local_irq_enable(); | ||
| 842 | } | ||
| 843 | 899 | ||
| 844 | if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { | 900 | if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { |
| 845 | u8 error; | ||
| 846 | |||
| 847 | per_cpu(current_vmcs, cpu) = vmx->vmcs; | 901 | per_cpu(current_vmcs, cpu) = vmx->vmcs; |
| 848 | asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" | 902 | vmcs_load(vmx->vmcs); |
| 849 | : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) | ||
| 850 | : "cc"); | ||
| 851 | if (error) | ||
| 852 | printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", | ||
| 853 | vmx->vmcs, phys_addr); | ||
| 854 | } | 903 | } |
| 855 | 904 | ||
| 856 | if (vcpu->cpu != cpu) { | 905 | if (vcpu->cpu != cpu) { |
| 857 | struct desc_ptr dt; | 906 | struct desc_ptr dt; |
| 858 | unsigned long sysenter_esp; | 907 | unsigned long sysenter_esp; |
| 859 | 908 | ||
| 909 | kvm_migrate_timers(vcpu); | ||
| 910 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); | ||
| 911 | local_irq_disable(); | ||
| 912 | list_add(&vmx->local_vcpus_link, | ||
| 913 | &per_cpu(vcpus_on_cpu, cpu)); | ||
| 914 | local_irq_enable(); | ||
| 915 | |||
| 860 | vcpu->cpu = cpu; | 916 | vcpu->cpu = cpu; |
| 861 | /* | 917 | /* |
| 862 | * Linux uses per-cpu TSS and GDT, so set these when switching | 918 | * Linux uses per-cpu TSS and GDT, so set these when switching |
| @@ -884,6 +940,10 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
| 884 | static void vmx_vcpu_put(struct kvm_vcpu *vcpu) | 940 | static void vmx_vcpu_put(struct kvm_vcpu *vcpu) |
| 885 | { | 941 | { |
| 886 | __vmx_load_host_state(to_vmx(vcpu)); | 942 | __vmx_load_host_state(to_vmx(vcpu)); |
| 943 | if (!vmm_exclusive) { | ||
| 944 | __vcpu_clear(to_vmx(vcpu)); | ||
| 945 | kvm_cpu_vmxoff(); | ||
| 946 | } | ||
| 887 | } | 947 | } |
| 888 | 948 | ||
| 889 | static void vmx_fpu_activate(struct kvm_vcpu *vcpu) | 949 | static void vmx_fpu_activate(struct kvm_vcpu *vcpu) |
| @@ -1286,6 +1346,13 @@ static __init int vmx_disabled_by_bios(void) | |||
| 1286 | /* locked but not enabled */ | 1346 | /* locked but not enabled */ |
| 1287 | } | 1347 | } |
| 1288 | 1348 | ||
| 1349 | static void kvm_cpu_vmxon(u64 addr) | ||
| 1350 | { | ||
| 1351 | asm volatile (ASM_VMX_VMXON_RAX | ||
| 1352 | : : "a"(&addr), "m"(addr) | ||
| 1353 | : "memory", "cc"); | ||
| 1354 | } | ||
| 1355 | |||
| 1289 | static int hardware_enable(void *garbage) | 1356 | static int hardware_enable(void *garbage) |
| 1290 | { | 1357 | { |
| 1291 | int cpu = raw_smp_processor_id(); | 1358 | int cpu = raw_smp_processor_id(); |
| @@ -1308,11 +1375,13 @@ static int hardware_enable(void *garbage) | |||
| 1308 | wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); | 1375 | wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); |
| 1309 | } | 1376 | } |
| 1310 | write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ | 1377 | write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ |
| 1311 | asm volatile (ASM_VMX_VMXON_RAX | ||
| 1312 | : : "a"(&phys_addr), "m"(phys_addr) | ||
| 1313 | : "memory", "cc"); | ||
| 1314 | 1378 | ||
| 1315 | ept_sync_global(); | 1379 | if (vmm_exclusive) { |
| 1380 | kvm_cpu_vmxon(phys_addr); | ||
| 1381 | ept_sync_global(); | ||
| 1382 | } | ||
| 1383 | |||
| 1384 | store_gdt(&__get_cpu_var(host_gdt)); | ||
| 1316 | 1385 | ||
| 1317 | return 0; | 1386 | return 0; |
| 1318 | } | 1387 | } |
| @@ -1334,13 +1403,15 @@ static void vmclear_local_vcpus(void) | |||
| 1334 | static void kvm_cpu_vmxoff(void) | 1403 | static void kvm_cpu_vmxoff(void) |
| 1335 | { | 1404 | { |
| 1336 | asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); | 1405 | asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); |
| 1337 | write_cr4(read_cr4() & ~X86_CR4_VMXE); | ||
| 1338 | } | 1406 | } |
| 1339 | 1407 | ||
| 1340 | static void hardware_disable(void *garbage) | 1408 | static void hardware_disable(void *garbage) |
| 1341 | { | 1409 | { |
| 1342 | vmclear_local_vcpus(); | 1410 | if (vmm_exclusive) { |
| 1343 | kvm_cpu_vmxoff(); | 1411 | vmclear_local_vcpus(); |
| 1412 | kvm_cpu_vmxoff(); | ||
| 1413 | } | ||
| 1414 | write_cr4(read_cr4() & ~X86_CR4_VMXE); | ||
| 1344 | } | 1415 | } |
| 1345 | 1416 | ||
| 1346 | static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, | 1417 | static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, |
| @@ -1539,7 +1610,8 @@ static __init int hardware_setup(void) | |||
| 1539 | if (!cpu_has_vmx_vpid()) | 1610 | if (!cpu_has_vmx_vpid()) |
| 1540 | enable_vpid = 0; | 1611 | enable_vpid = 0; |
| 1541 | 1612 | ||
| 1542 | if (!cpu_has_vmx_ept()) { | 1613 | if (!cpu_has_vmx_ept() || |
| 1614 | !cpu_has_vmx_ept_4levels()) { | ||
| 1543 | enable_ept = 0; | 1615 | enable_ept = 0; |
| 1544 | enable_unrestricted_guest = 0; | 1616 | enable_unrestricted_guest = 0; |
| 1545 | } | 1617 | } |
| @@ -1628,7 +1700,7 @@ static gva_t rmode_tss_base(struct kvm *kvm) | |||
| 1628 | gfn_t base_gfn; | 1700 | gfn_t base_gfn; |
| 1629 | 1701 | ||
| 1630 | slots = kvm_memslots(kvm); | 1702 | slots = kvm_memslots(kvm); |
| 1631 | base_gfn = kvm->memslots->memslots[0].base_gfn + | 1703 | base_gfn = slots->memslots[0].base_gfn + |
| 1632 | kvm->memslots->memslots[0].npages - 3; | 1704 | kvm->memslots->memslots[0].npages - 3; |
| 1633 | return base_gfn << PAGE_SHIFT; | 1705 | return base_gfn << PAGE_SHIFT; |
| 1634 | } | 1706 | } |
| @@ -1759,9 +1831,12 @@ static void exit_lmode(struct kvm_vcpu *vcpu) | |||
| 1759 | 1831 | ||
| 1760 | static void vmx_flush_tlb(struct kvm_vcpu *vcpu) | 1832 | static void vmx_flush_tlb(struct kvm_vcpu *vcpu) |
| 1761 | { | 1833 | { |
| 1762 | vpid_sync_vcpu_all(to_vmx(vcpu)); | 1834 | vpid_sync_context(to_vmx(vcpu)); |
| 1763 | if (enable_ept) | 1835 | if (enable_ept) { |
| 1836 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | ||
| 1837 | return; | ||
| 1764 | ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); | 1838 | ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); |
| 1839 | } | ||
| 1765 | } | 1840 | } |
| 1766 | 1841 | ||
| 1767 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | 1842 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) |
| @@ -2507,7 +2582,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
| 2507 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); | 2582 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); |
| 2508 | vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ | 2583 | vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ |
| 2509 | 2584 | ||
| 2510 | vmcs_writel(HOST_CR0, read_cr0()); /* 22.2.3 */ | 2585 | vmcs_writel(HOST_CR0, read_cr0() | X86_CR0_TS); /* 22.2.3 */ |
| 2511 | vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */ | 2586 | vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */ |
| 2512 | vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ | 2587 | vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ |
| 2513 | 2588 | ||
| @@ -2599,21 +2674,27 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
| 2599 | 2674 | ||
| 2600 | static int init_rmode(struct kvm *kvm) | 2675 | static int init_rmode(struct kvm *kvm) |
| 2601 | { | 2676 | { |
| 2677 | int idx, ret = 0; | ||
| 2678 | |||
| 2679 | idx = srcu_read_lock(&kvm->srcu); | ||
| 2602 | if (!init_rmode_tss(kvm)) | 2680 | if (!init_rmode_tss(kvm)) |
| 2603 | return 0; | 2681 | goto exit; |
| 2604 | if (!init_rmode_identity_map(kvm)) | 2682 | if (!init_rmode_identity_map(kvm)) |
| 2605 | return 0; | 2683 | goto exit; |
| 2606 | return 1; | 2684 | |
| 2685 | ret = 1; | ||
| 2686 | exit: | ||
| 2687 | srcu_read_unlock(&kvm->srcu, idx); | ||
| 2688 | return ret; | ||
| 2607 | } | 2689 | } |
| 2608 | 2690 | ||
| 2609 | static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | 2691 | static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) |
| 2610 | { | 2692 | { |
| 2611 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2693 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 2612 | u64 msr; | 2694 | u64 msr; |
| 2613 | int ret, idx; | 2695 | int ret; |
| 2614 | 2696 | ||
| 2615 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); | 2697 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); |
| 2616 | idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
| 2617 | if (!init_rmode(vmx->vcpu.kvm)) { | 2698 | if (!init_rmode(vmx->vcpu.kvm)) { |
| 2618 | ret = -ENOMEM; | 2699 | ret = -ENOMEM; |
| 2619 | goto out; | 2700 | goto out; |
| @@ -2630,7 +2711,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 2630 | msr |= MSR_IA32_APICBASE_BSP; | 2711 | msr |= MSR_IA32_APICBASE_BSP; |
| 2631 | kvm_set_apic_base(&vmx->vcpu, msr); | 2712 | kvm_set_apic_base(&vmx->vcpu, msr); |
| 2632 | 2713 | ||
| 2633 | fx_init(&vmx->vcpu); | 2714 | ret = fx_init(&vmx->vcpu); |
| 2715 | if (ret != 0) | ||
| 2716 | goto out; | ||
| 2634 | 2717 | ||
| 2635 | seg_setup(VCPU_SREG_CS); | 2718 | seg_setup(VCPU_SREG_CS); |
| 2636 | /* | 2719 | /* |
| @@ -2713,7 +2796,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 2713 | vmx_fpu_activate(&vmx->vcpu); | 2796 | vmx_fpu_activate(&vmx->vcpu); |
| 2714 | update_exception_bitmap(&vmx->vcpu); | 2797 | update_exception_bitmap(&vmx->vcpu); |
| 2715 | 2798 | ||
| 2716 | vpid_sync_vcpu_all(vmx); | 2799 | vpid_sync_context(vmx); |
| 2717 | 2800 | ||
| 2718 | ret = 0; | 2801 | ret = 0; |
| 2719 | 2802 | ||
| @@ -2721,7 +2804,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 2721 | vmx->emulation_required = 0; | 2804 | vmx->emulation_required = 0; |
| 2722 | 2805 | ||
| 2723 | out: | 2806 | out: |
| 2724 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | ||
| 2725 | return ret; | 2807 | return ret; |
| 2726 | } | 2808 | } |
| 2727 | 2809 | ||
| @@ -2826,9 +2908,7 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) | |||
| 2826 | { | 2908 | { |
| 2827 | if (!cpu_has_virtual_nmis()) | 2909 | if (!cpu_has_virtual_nmis()) |
| 2828 | return to_vmx(vcpu)->soft_vnmi_blocked; | 2910 | return to_vmx(vcpu)->soft_vnmi_blocked; |
| 2829 | else | 2911 | return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; |
| 2830 | return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | ||
| 2831 | GUEST_INTR_STATE_NMI); | ||
| 2832 | } | 2912 | } |
| 2833 | 2913 | ||
| 2834 | static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | 2914 | static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) |
| @@ -3070,7 +3150,7 @@ static int handle_io(struct kvm_vcpu *vcpu) | |||
| 3070 | ++vcpu->stat.io_exits; | 3150 | ++vcpu->stat.io_exits; |
| 3071 | 3151 | ||
| 3072 | if (string || in) | 3152 | if (string || in) |
| 3073 | return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); | 3153 | return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; |
| 3074 | 3154 | ||
| 3075 | port = exit_qualification >> 16; | 3155 | port = exit_qualification >> 16; |
| 3076 | size = (exit_qualification & 7) + 1; | 3156 | size = (exit_qualification & 7) + 1; |
| @@ -3090,11 +3170,20 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) | |||
| 3090 | hypercall[2] = 0xc1; | 3170 | hypercall[2] = 0xc1; |
| 3091 | } | 3171 | } |
| 3092 | 3172 | ||
| 3173 | static void complete_insn_gp(struct kvm_vcpu *vcpu, int err) | ||
| 3174 | { | ||
| 3175 | if (err) | ||
| 3176 | kvm_inject_gp(vcpu, 0); | ||
| 3177 | else | ||
| 3178 | skip_emulated_instruction(vcpu); | ||
| 3179 | } | ||
| 3180 | |||
| 3093 | static int handle_cr(struct kvm_vcpu *vcpu) | 3181 | static int handle_cr(struct kvm_vcpu *vcpu) |
| 3094 | { | 3182 | { |
| 3095 | unsigned long exit_qualification, val; | 3183 | unsigned long exit_qualification, val; |
| 3096 | int cr; | 3184 | int cr; |
| 3097 | int reg; | 3185 | int reg; |
| 3186 | int err; | ||
| 3098 | 3187 | ||
| 3099 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3188 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
| 3100 | cr = exit_qualification & 15; | 3189 | cr = exit_qualification & 15; |
| @@ -3105,16 +3194,16 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
| 3105 | trace_kvm_cr_write(cr, val); | 3194 | trace_kvm_cr_write(cr, val); |
| 3106 | switch (cr) { | 3195 | switch (cr) { |
| 3107 | case 0: | 3196 | case 0: |
| 3108 | kvm_set_cr0(vcpu, val); | 3197 | err = kvm_set_cr0(vcpu, val); |
| 3109 | skip_emulated_instruction(vcpu); | 3198 | complete_insn_gp(vcpu, err); |
| 3110 | return 1; | 3199 | return 1; |
| 3111 | case 3: | 3200 | case 3: |
| 3112 | kvm_set_cr3(vcpu, val); | 3201 | err = kvm_set_cr3(vcpu, val); |
| 3113 | skip_emulated_instruction(vcpu); | 3202 | complete_insn_gp(vcpu, err); |
| 3114 | return 1; | 3203 | return 1; |
| 3115 | case 4: | 3204 | case 4: |
| 3116 | kvm_set_cr4(vcpu, val); | 3205 | err = kvm_set_cr4(vcpu, val); |
| 3117 | skip_emulated_instruction(vcpu); | 3206 | complete_insn_gp(vcpu, err); |
| 3118 | return 1; | 3207 | return 1; |
| 3119 | case 8: { | 3208 | case 8: { |
| 3120 | u8 cr8_prev = kvm_get_cr8(vcpu); | 3209 | u8 cr8_prev = kvm_get_cr8(vcpu); |
| @@ -3321,30 +3410,25 @@ static int handle_invlpg(struct kvm_vcpu *vcpu) | |||
| 3321 | static int handle_wbinvd(struct kvm_vcpu *vcpu) | 3410 | static int handle_wbinvd(struct kvm_vcpu *vcpu) |
| 3322 | { | 3411 | { |
| 3323 | skip_emulated_instruction(vcpu); | 3412 | skip_emulated_instruction(vcpu); |
| 3324 | /* TODO: Add support for VT-d/pass-through device */ | 3413 | kvm_emulate_wbinvd(vcpu); |
| 3325 | return 1; | 3414 | return 1; |
| 3326 | } | 3415 | } |
| 3327 | 3416 | ||
| 3328 | static int handle_apic_access(struct kvm_vcpu *vcpu) | 3417 | static int handle_xsetbv(struct kvm_vcpu *vcpu) |
| 3329 | { | 3418 | { |
| 3330 | unsigned long exit_qualification; | 3419 | u64 new_bv = kvm_read_edx_eax(vcpu); |
| 3331 | enum emulation_result er; | 3420 | u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX); |
| 3332 | unsigned long offset; | ||
| 3333 | 3421 | ||
| 3334 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3422 | if (kvm_set_xcr(vcpu, index, new_bv) == 0) |
| 3335 | offset = exit_qualification & 0xffful; | 3423 | skip_emulated_instruction(vcpu); |
| 3336 | |||
| 3337 | er = emulate_instruction(vcpu, 0, 0, 0); | ||
| 3338 | |||
| 3339 | if (er != EMULATE_DONE) { | ||
| 3340 | printk(KERN_ERR | ||
| 3341 | "Fail to handle apic access vmexit! Offset is 0x%lx\n", | ||
| 3342 | offset); | ||
| 3343 | return -ENOEXEC; | ||
| 3344 | } | ||
| 3345 | return 1; | 3424 | return 1; |
| 3346 | } | 3425 | } |
| 3347 | 3426 | ||
| 3427 | static int handle_apic_access(struct kvm_vcpu *vcpu) | ||
| 3428 | { | ||
| 3429 | return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; | ||
| 3430 | } | ||
| 3431 | |||
| 3348 | static int handle_task_switch(struct kvm_vcpu *vcpu) | 3432 | static int handle_task_switch(struct kvm_vcpu *vcpu) |
| 3349 | { | 3433 | { |
| 3350 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3434 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| @@ -3554,13 +3638,8 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
| 3554 | goto out; | 3638 | goto out; |
| 3555 | } | 3639 | } |
| 3556 | 3640 | ||
| 3557 | if (err != EMULATE_DONE) { | 3641 | if (err != EMULATE_DONE) |
| 3558 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 3642 | return 0; |
| 3559 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
| 3560 | vcpu->run->internal.ndata = 0; | ||
| 3561 | ret = 0; | ||
| 3562 | goto out; | ||
| 3563 | } | ||
| 3564 | 3643 | ||
| 3565 | if (signal_pending(current)) | 3644 | if (signal_pending(current)) |
| 3566 | goto out; | 3645 | goto out; |
| @@ -3623,6 +3702,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
| 3623 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, | 3702 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, |
| 3624 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, | 3703 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, |
| 3625 | [EXIT_REASON_WBINVD] = handle_wbinvd, | 3704 | [EXIT_REASON_WBINVD] = handle_wbinvd, |
| 3705 | [EXIT_REASON_XSETBV] = handle_xsetbv, | ||
| 3626 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, | 3706 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, |
| 3627 | [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, | 3707 | [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, |
| 3628 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, | 3708 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, |
| @@ -3656,6 +3736,13 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
| 3656 | if (enable_ept && is_paging(vcpu)) | 3736 | if (enable_ept && is_paging(vcpu)) |
| 3657 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | 3737 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); |
| 3658 | 3738 | ||
| 3739 | if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { | ||
| 3740 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; | ||
| 3741 | vcpu->run->fail_entry.hardware_entry_failure_reason | ||
| 3742 | = exit_reason; | ||
| 3743 | return 0; | ||
| 3744 | } | ||
| 3745 | |||
| 3659 | if (unlikely(vmx->fail)) { | 3746 | if (unlikely(vmx->fail)) { |
| 3660 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 3747 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
| 3661 | vcpu->run->fail_entry.hardware_entry_failure_reason | 3748 | vcpu->run->fail_entry.hardware_entry_failure_reason |
| @@ -3861,11 +3948,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
| 3861 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | 3948 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) |
| 3862 | vmx_set_interrupt_shadow(vcpu, 0); | 3949 | vmx_set_interrupt_shadow(vcpu, 0); |
| 3863 | 3950 | ||
| 3864 | /* | ||
| 3865 | * Loading guest fpu may have cleared host cr0.ts | ||
| 3866 | */ | ||
| 3867 | vmcs_writel(HOST_CR0, read_cr0()); | ||
| 3868 | |||
| 3869 | asm( | 3951 | asm( |
| 3870 | /* Store host registers */ | 3952 | /* Store host registers */ |
| 3871 | "push %%"R"dx; push %%"R"bp;" | 3953 | "push %%"R"dx; push %%"R"bp;" |
| @@ -4001,6 +4083,19 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | |||
| 4001 | kmem_cache_free(kvm_vcpu_cache, vmx); | 4083 | kmem_cache_free(kvm_vcpu_cache, vmx); |
| 4002 | } | 4084 | } |
| 4003 | 4085 | ||
| 4086 | static inline void vmcs_init(struct vmcs *vmcs) | ||
| 4087 | { | ||
| 4088 | u64 phys_addr = __pa(per_cpu(vmxarea, raw_smp_processor_id())); | ||
| 4089 | |||
| 4090 | if (!vmm_exclusive) | ||
| 4091 | kvm_cpu_vmxon(phys_addr); | ||
| 4092 | |||
| 4093 | vmcs_clear(vmcs); | ||
| 4094 | |||
| 4095 | if (!vmm_exclusive) | ||
| 4096 | kvm_cpu_vmxoff(); | ||
| 4097 | } | ||
| 4098 | |||
| 4004 | static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | 4099 | static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) |
| 4005 | { | 4100 | { |
| 4006 | int err; | 4101 | int err; |
| @@ -4026,7 +4121,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
| 4026 | if (!vmx->vmcs) | 4121 | if (!vmx->vmcs) |
| 4027 | goto free_msrs; | 4122 | goto free_msrs; |
| 4028 | 4123 | ||
| 4029 | vmcs_clear(vmx->vmcs); | 4124 | vmcs_init(vmx->vmcs); |
| 4030 | 4125 | ||
| 4031 | cpu = get_cpu(); | 4126 | cpu = get_cpu(); |
| 4032 | vmx_vcpu_load(&vmx->vcpu, cpu); | 4127 | vmx_vcpu_load(&vmx->vcpu, cpu); |
| @@ -4265,6 +4360,8 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
| 4265 | .rdtscp_supported = vmx_rdtscp_supported, | 4360 | .rdtscp_supported = vmx_rdtscp_supported, |
| 4266 | 4361 | ||
| 4267 | .set_supported_cpuid = vmx_set_supported_cpuid, | 4362 | .set_supported_cpuid = vmx_set_supported_cpuid, |
| 4363 | |||
| 4364 | .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, | ||
| 4268 | }; | 4365 | }; |
| 4269 | 4366 | ||
| 4270 | static int __init vmx_init(void) | 4367 | static int __init vmx_init(void) |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7fa89c39c64f..97aab036dabf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | * Copyright (C) 2006 Qumranet, Inc. | 6 | * Copyright (C) 2006 Qumranet, Inc. |
| 7 | * Copyright (C) 2008 Qumranet, Inc. | 7 | * Copyright (C) 2008 Qumranet, Inc. |
| 8 | * Copyright IBM Corporation, 2008 | 8 | * Copyright IBM Corporation, 2008 |
| 9 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
| 9 | * | 10 | * |
| 10 | * Authors: | 11 | * Authors: |
| 11 | * Avi Kivity <avi@qumranet.com> | 12 | * Avi Kivity <avi@qumranet.com> |
| @@ -41,17 +42,19 @@ | |||
| 41 | #include <linux/srcu.h> | 42 | #include <linux/srcu.h> |
| 42 | #include <linux/slab.h> | 43 | #include <linux/slab.h> |
| 43 | #include <linux/perf_event.h> | 44 | #include <linux/perf_event.h> |
| 45 | #include <linux/uaccess.h> | ||
| 44 | #include <trace/events/kvm.h> | 46 | #include <trace/events/kvm.h> |
| 45 | 47 | ||
| 46 | #define CREATE_TRACE_POINTS | 48 | #define CREATE_TRACE_POINTS |
| 47 | #include "trace.h" | 49 | #include "trace.h" |
| 48 | 50 | ||
| 49 | #include <asm/debugreg.h> | 51 | #include <asm/debugreg.h> |
| 50 | #include <asm/uaccess.h> | ||
| 51 | #include <asm/msr.h> | 52 | #include <asm/msr.h> |
| 52 | #include <asm/desc.h> | 53 | #include <asm/desc.h> |
| 53 | #include <asm/mtrr.h> | 54 | #include <asm/mtrr.h> |
| 54 | #include <asm/mce.h> | 55 | #include <asm/mce.h> |
| 56 | #include <asm/i387.h> | ||
| 57 | #include <asm/xcr.h> | ||
| 55 | 58 | ||
| 56 | #define MAX_IO_MSRS 256 | 59 | #define MAX_IO_MSRS 256 |
| 57 | #define CR0_RESERVED_BITS \ | 60 | #define CR0_RESERVED_BITS \ |
| @@ -62,6 +65,7 @@ | |||
| 62 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | 65 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ |
| 63 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | 66 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ |
| 64 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ | 67 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ |
| 68 | | X86_CR4_OSXSAVE \ | ||
| 65 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) | 69 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) |
| 66 | 70 | ||
| 67 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) | 71 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) |
| @@ -147,6 +151,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
| 147 | { NULL } | 151 | { NULL } |
| 148 | }; | 152 | }; |
| 149 | 153 | ||
| 154 | u64 __read_mostly host_xcr0; | ||
| 155 | |||
| 156 | static inline u32 bit(int bitno) | ||
| 157 | { | ||
| 158 | return 1 << (bitno & 31); | ||
| 159 | } | ||
| 160 | |||
| 150 | static void kvm_on_user_return(struct user_return_notifier *urn) | 161 | static void kvm_on_user_return(struct user_return_notifier *urn) |
| 151 | { | 162 | { |
| 152 | unsigned slot; | 163 | unsigned slot; |
| @@ -285,7 +296,7 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | |||
| 285 | prev_nr = vcpu->arch.exception.nr; | 296 | prev_nr = vcpu->arch.exception.nr; |
| 286 | if (prev_nr == DF_VECTOR) { | 297 | if (prev_nr == DF_VECTOR) { |
| 287 | /* triple fault -> shutdown */ | 298 | /* triple fault -> shutdown */ |
| 288 | set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); | 299 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); |
| 289 | return; | 300 | return; |
| 290 | } | 301 | } |
| 291 | class1 = exception_class(prev_nr); | 302 | class1 = exception_class(prev_nr); |
| @@ -414,121 +425,163 @@ out: | |||
| 414 | return changed; | 425 | return changed; |
| 415 | } | 426 | } |
| 416 | 427 | ||
| 417 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | 428 | int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) |
| 418 | { | 429 | { |
| 430 | unsigned long old_cr0 = kvm_read_cr0(vcpu); | ||
| 431 | unsigned long update_bits = X86_CR0_PG | X86_CR0_WP | | ||
| 432 | X86_CR0_CD | X86_CR0_NW; | ||
| 433 | |||
| 419 | cr0 |= X86_CR0_ET; | 434 | cr0 |= X86_CR0_ET; |
| 420 | 435 | ||
| 421 | #ifdef CONFIG_X86_64 | 436 | #ifdef CONFIG_X86_64 |
| 422 | if (cr0 & 0xffffffff00000000UL) { | 437 | if (cr0 & 0xffffffff00000000UL) |
| 423 | kvm_inject_gp(vcpu, 0); | 438 | return 1; |
| 424 | return; | ||
| 425 | } | ||
| 426 | #endif | 439 | #endif |
| 427 | 440 | ||
| 428 | cr0 &= ~CR0_RESERVED_BITS; | 441 | cr0 &= ~CR0_RESERVED_BITS; |
| 429 | 442 | ||
| 430 | if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { | 443 | if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) |
| 431 | kvm_inject_gp(vcpu, 0); | 444 | return 1; |
| 432 | return; | ||
| 433 | } | ||
| 434 | 445 | ||
| 435 | if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { | 446 | if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) |
| 436 | kvm_inject_gp(vcpu, 0); | 447 | return 1; |
| 437 | return; | ||
| 438 | } | ||
| 439 | 448 | ||
| 440 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { | 449 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { |
| 441 | #ifdef CONFIG_X86_64 | 450 | #ifdef CONFIG_X86_64 |
| 442 | if ((vcpu->arch.efer & EFER_LME)) { | 451 | if ((vcpu->arch.efer & EFER_LME)) { |
| 443 | int cs_db, cs_l; | 452 | int cs_db, cs_l; |
| 444 | 453 | ||
| 445 | if (!is_pae(vcpu)) { | 454 | if (!is_pae(vcpu)) |
| 446 | kvm_inject_gp(vcpu, 0); | 455 | return 1; |
| 447 | return; | ||
| 448 | } | ||
| 449 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 456 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
| 450 | if (cs_l) { | 457 | if (cs_l) |
| 451 | kvm_inject_gp(vcpu, 0); | 458 | return 1; |
| 452 | return; | ||
| 453 | |||
| 454 | } | ||
| 455 | } else | 459 | } else |
| 456 | #endif | 460 | #endif |
| 457 | if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { | 461 | if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) |
| 458 | kvm_inject_gp(vcpu, 0); | 462 | return 1; |
| 459 | return; | ||
| 460 | } | ||
| 461 | |||
| 462 | } | 463 | } |
| 463 | 464 | ||
| 464 | kvm_x86_ops->set_cr0(vcpu, cr0); | 465 | kvm_x86_ops->set_cr0(vcpu, cr0); |
| 465 | 466 | ||
| 466 | kvm_mmu_reset_context(vcpu); | 467 | if ((cr0 ^ old_cr0) & update_bits) |
| 467 | return; | 468 | kvm_mmu_reset_context(vcpu); |
| 469 | return 0; | ||
| 468 | } | 470 | } |
| 469 | EXPORT_SYMBOL_GPL(kvm_set_cr0); | 471 | EXPORT_SYMBOL_GPL(kvm_set_cr0); |
| 470 | 472 | ||
| 471 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) | 473 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) |
| 472 | { | 474 | { |
| 473 | kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f)); | 475 | (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f)); |
| 474 | } | 476 | } |
| 475 | EXPORT_SYMBOL_GPL(kvm_lmsw); | 477 | EXPORT_SYMBOL_GPL(kvm_lmsw); |
| 476 | 478 | ||
| 477 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 479 | int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) |
| 478 | { | 480 | { |
| 479 | unsigned long old_cr4 = kvm_read_cr4(vcpu); | 481 | u64 xcr0; |
| 480 | unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; | ||
| 481 | 482 | ||
| 482 | if (cr4 & CR4_RESERVED_BITS) { | 483 | /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */ |
| 484 | if (index != XCR_XFEATURE_ENABLED_MASK) | ||
| 485 | return 1; | ||
| 486 | xcr0 = xcr; | ||
| 487 | if (kvm_x86_ops->get_cpl(vcpu) != 0) | ||
| 488 | return 1; | ||
| 489 | if (!(xcr0 & XSTATE_FP)) | ||
| 490 | return 1; | ||
| 491 | if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) | ||
| 492 | return 1; | ||
| 493 | if (xcr0 & ~host_xcr0) | ||
| 494 | return 1; | ||
| 495 | vcpu->arch.xcr0 = xcr0; | ||
| 496 | vcpu->guest_xcr0_loaded = 0; | ||
| 497 | return 0; | ||
| 498 | } | ||
| 499 | |||
| 500 | int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) | ||
| 501 | { | ||
| 502 | if (__kvm_set_xcr(vcpu, index, xcr)) { | ||
| 483 | kvm_inject_gp(vcpu, 0); | 503 | kvm_inject_gp(vcpu, 0); |
| 504 | return 1; | ||
| 505 | } | ||
| 506 | return 0; | ||
| 507 | } | ||
| 508 | EXPORT_SYMBOL_GPL(kvm_set_xcr); | ||
| 509 | |||
| 510 | static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) | ||
| 511 | { | ||
| 512 | struct kvm_cpuid_entry2 *best; | ||
| 513 | |||
| 514 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
| 515 | return best && (best->ecx & bit(X86_FEATURE_XSAVE)); | ||
| 516 | } | ||
| 517 | |||
| 518 | static void update_cpuid(struct kvm_vcpu *vcpu) | ||
| 519 | { | ||
| 520 | struct kvm_cpuid_entry2 *best; | ||
| 521 | |||
| 522 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
| 523 | if (!best) | ||
| 484 | return; | 524 | return; |
| 525 | |||
| 526 | /* Update OSXSAVE bit */ | ||
| 527 | if (cpu_has_xsave && best->function == 0x1) { | ||
| 528 | best->ecx &= ~(bit(X86_FEATURE_OSXSAVE)); | ||
| 529 | if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) | ||
| 530 | best->ecx |= bit(X86_FEATURE_OSXSAVE); | ||
| 485 | } | 531 | } |
| 532 | } | ||
| 533 | |||
| 534 | int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | ||
| 535 | { | ||
| 536 | unsigned long old_cr4 = kvm_read_cr4(vcpu); | ||
| 537 | unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; | ||
| 538 | |||
| 539 | if (cr4 & CR4_RESERVED_BITS) | ||
| 540 | return 1; | ||
| 541 | |||
| 542 | if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE)) | ||
| 543 | return 1; | ||
| 486 | 544 | ||
| 487 | if (is_long_mode(vcpu)) { | 545 | if (is_long_mode(vcpu)) { |
| 488 | if (!(cr4 & X86_CR4_PAE)) { | 546 | if (!(cr4 & X86_CR4_PAE)) |
| 489 | kvm_inject_gp(vcpu, 0); | 547 | return 1; |
| 490 | return; | ||
| 491 | } | ||
| 492 | } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) | 548 | } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) |
| 493 | && ((cr4 ^ old_cr4) & pdptr_bits) | 549 | && ((cr4 ^ old_cr4) & pdptr_bits) |
| 494 | && !load_pdptrs(vcpu, vcpu->arch.cr3)) { | 550 | && !load_pdptrs(vcpu, vcpu->arch.cr3)) |
| 495 | kvm_inject_gp(vcpu, 0); | 551 | return 1; |
| 496 | return; | 552 | |
| 497 | } | 553 | if (cr4 & X86_CR4_VMXE) |
| 554 | return 1; | ||
| 498 | 555 | ||
| 499 | if (cr4 & X86_CR4_VMXE) { | ||
| 500 | kvm_inject_gp(vcpu, 0); | ||
| 501 | return; | ||
| 502 | } | ||
| 503 | kvm_x86_ops->set_cr4(vcpu, cr4); | 556 | kvm_x86_ops->set_cr4(vcpu, cr4); |
| 504 | vcpu->arch.cr4 = cr4; | 557 | |
| 505 | kvm_mmu_reset_context(vcpu); | 558 | if ((cr4 ^ old_cr4) & pdptr_bits) |
| 559 | kvm_mmu_reset_context(vcpu); | ||
| 560 | |||
| 561 | if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) | ||
| 562 | update_cpuid(vcpu); | ||
| 563 | |||
| 564 | return 0; | ||
| 506 | } | 565 | } |
| 507 | EXPORT_SYMBOL_GPL(kvm_set_cr4); | 566 | EXPORT_SYMBOL_GPL(kvm_set_cr4); |
| 508 | 567 | ||
| 509 | void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | 568 | int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) |
| 510 | { | 569 | { |
| 511 | if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { | 570 | if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { |
| 512 | kvm_mmu_sync_roots(vcpu); | 571 | kvm_mmu_sync_roots(vcpu); |
| 513 | kvm_mmu_flush_tlb(vcpu); | 572 | kvm_mmu_flush_tlb(vcpu); |
| 514 | return; | 573 | return 0; |
| 515 | } | 574 | } |
| 516 | 575 | ||
| 517 | if (is_long_mode(vcpu)) { | 576 | if (is_long_mode(vcpu)) { |
| 518 | if (cr3 & CR3_L_MODE_RESERVED_BITS) { | 577 | if (cr3 & CR3_L_MODE_RESERVED_BITS) |
| 519 | kvm_inject_gp(vcpu, 0); | 578 | return 1; |
| 520 | return; | ||
| 521 | } | ||
| 522 | } else { | 579 | } else { |
| 523 | if (is_pae(vcpu)) { | 580 | if (is_pae(vcpu)) { |
| 524 | if (cr3 & CR3_PAE_RESERVED_BITS) { | 581 | if (cr3 & CR3_PAE_RESERVED_BITS) |
| 525 | kvm_inject_gp(vcpu, 0); | 582 | return 1; |
| 526 | return; | 583 | if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) |
| 527 | } | 584 | return 1; |
| 528 | if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { | ||
| 529 | kvm_inject_gp(vcpu, 0); | ||
| 530 | return; | ||
| 531 | } | ||
| 532 | } | 585 | } |
| 533 | /* | 586 | /* |
| 534 | * We don't check reserved bits in nonpae mode, because | 587 | * We don't check reserved bits in nonpae mode, because |
| @@ -546,24 +599,28 @@ void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
| 546 | * to debug) behavior on the guest side. | 599 | * to debug) behavior on the guest side. |
| 547 | */ | 600 | */ |
| 548 | if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) | 601 | if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) |
| 549 | kvm_inject_gp(vcpu, 0); | 602 | return 1; |
| 550 | else { | 603 | vcpu->arch.cr3 = cr3; |
| 551 | vcpu->arch.cr3 = cr3; | 604 | vcpu->arch.mmu.new_cr3(vcpu); |
| 552 | vcpu->arch.mmu.new_cr3(vcpu); | 605 | return 0; |
| 553 | } | ||
| 554 | } | 606 | } |
| 555 | EXPORT_SYMBOL_GPL(kvm_set_cr3); | 607 | EXPORT_SYMBOL_GPL(kvm_set_cr3); |
| 556 | 608 | ||
| 557 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) | 609 | int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) |
| 558 | { | 610 | { |
| 559 | if (cr8 & CR8_RESERVED_BITS) { | 611 | if (cr8 & CR8_RESERVED_BITS) |
| 560 | kvm_inject_gp(vcpu, 0); | 612 | return 1; |
| 561 | return; | ||
| 562 | } | ||
| 563 | if (irqchip_in_kernel(vcpu->kvm)) | 613 | if (irqchip_in_kernel(vcpu->kvm)) |
| 564 | kvm_lapic_set_tpr(vcpu, cr8); | 614 | kvm_lapic_set_tpr(vcpu, cr8); |
| 565 | else | 615 | else |
| 566 | vcpu->arch.cr8 = cr8; | 616 | vcpu->arch.cr8 = cr8; |
| 617 | return 0; | ||
| 618 | } | ||
| 619 | |||
| 620 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) | ||
| 621 | { | ||
| 622 | if (__kvm_set_cr8(vcpu, cr8)) | ||
| 623 | kvm_inject_gp(vcpu, 0); | ||
| 567 | } | 624 | } |
| 568 | EXPORT_SYMBOL_GPL(kvm_set_cr8); | 625 | EXPORT_SYMBOL_GPL(kvm_set_cr8); |
| 569 | 626 | ||
| @@ -576,7 +633,7 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) | |||
| 576 | } | 633 | } |
| 577 | EXPORT_SYMBOL_GPL(kvm_get_cr8); | 634 | EXPORT_SYMBOL_GPL(kvm_get_cr8); |
| 578 | 635 | ||
| 579 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | 636 | static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) |
| 580 | { | 637 | { |
| 581 | switch (dr) { | 638 | switch (dr) { |
| 582 | case 0 ... 3: | 639 | case 0 ... 3: |
| @@ -585,29 +642,21 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | |||
| 585 | vcpu->arch.eff_db[dr] = val; | 642 | vcpu->arch.eff_db[dr] = val; |
| 586 | break; | 643 | break; |
| 587 | case 4: | 644 | case 4: |
| 588 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | 645 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) |
| 589 | kvm_queue_exception(vcpu, UD_VECTOR); | 646 | return 1; /* #UD */ |
| 590 | return 1; | ||
| 591 | } | ||
| 592 | /* fall through */ | 647 | /* fall through */ |
| 593 | case 6: | 648 | case 6: |
| 594 | if (val & 0xffffffff00000000ULL) { | 649 | if (val & 0xffffffff00000000ULL) |
| 595 | kvm_inject_gp(vcpu, 0); | 650 | return -1; /* #GP */ |
| 596 | return 1; | ||
| 597 | } | ||
| 598 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; | 651 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; |
| 599 | break; | 652 | break; |
| 600 | case 5: | 653 | case 5: |
| 601 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | 654 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) |
| 602 | kvm_queue_exception(vcpu, UD_VECTOR); | 655 | return 1; /* #UD */ |
| 603 | return 1; | ||
| 604 | } | ||
| 605 | /* fall through */ | 656 | /* fall through */ |
| 606 | default: /* 7 */ | 657 | default: /* 7 */ |
| 607 | if (val & 0xffffffff00000000ULL) { | 658 | if (val & 0xffffffff00000000ULL) |
| 608 | kvm_inject_gp(vcpu, 0); | 659 | return -1; /* #GP */ |
| 609 | return 1; | ||
| 610 | } | ||
| 611 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; | 660 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; |
| 612 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | 661 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { |
| 613 | kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7); | 662 | kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7); |
| @@ -618,28 +667,37 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | |||
| 618 | 667 | ||
| 619 | return 0; | 668 | return 0; |
| 620 | } | 669 | } |
| 670 | |||
| 671 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | ||
| 672 | { | ||
| 673 | int res; | ||
| 674 | |||
| 675 | res = __kvm_set_dr(vcpu, dr, val); | ||
| 676 | if (res > 0) | ||
| 677 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
| 678 | else if (res < 0) | ||
| 679 | kvm_inject_gp(vcpu, 0); | ||
| 680 | |||
| 681 | return res; | ||
| 682 | } | ||
| 621 | EXPORT_SYMBOL_GPL(kvm_set_dr); | 683 | EXPORT_SYMBOL_GPL(kvm_set_dr); |
| 622 | 684 | ||
| 623 | int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) | 685 | static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) |
| 624 | { | 686 | { |
| 625 | switch (dr) { | 687 | switch (dr) { |
| 626 | case 0 ... 3: | 688 | case 0 ... 3: |
| 627 | *val = vcpu->arch.db[dr]; | 689 | *val = vcpu->arch.db[dr]; |
| 628 | break; | 690 | break; |
| 629 | case 4: | 691 | case 4: |
| 630 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | 692 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) |
| 631 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
| 632 | return 1; | 693 | return 1; |
| 633 | } | ||
| 634 | /* fall through */ | 694 | /* fall through */ |
| 635 | case 6: | 695 | case 6: |
| 636 | *val = vcpu->arch.dr6; | 696 | *val = vcpu->arch.dr6; |
| 637 | break; | 697 | break; |
| 638 | case 5: | 698 | case 5: |
| 639 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | 699 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) |
| 640 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
| 641 | return 1; | 700 | return 1; |
| 642 | } | ||
| 643 | /* fall through */ | 701 | /* fall through */ |
| 644 | default: /* 7 */ | 702 | default: /* 7 */ |
| 645 | *val = vcpu->arch.dr7; | 703 | *val = vcpu->arch.dr7; |
| @@ -648,12 +706,16 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) | |||
| 648 | 706 | ||
| 649 | return 0; | 707 | return 0; |
| 650 | } | 708 | } |
| 651 | EXPORT_SYMBOL_GPL(kvm_get_dr); | ||
| 652 | 709 | ||
| 653 | static inline u32 bit(int bitno) | 710 | int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) |
| 654 | { | 711 | { |
| 655 | return 1 << (bitno & 31); | 712 | if (_kvm_get_dr(vcpu, dr, val)) { |
| 713 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
| 714 | return 1; | ||
| 715 | } | ||
| 716 | return 0; | ||
| 656 | } | 717 | } |
| 718 | EXPORT_SYMBOL_GPL(kvm_get_dr); | ||
| 657 | 719 | ||
| 658 | /* | 720 | /* |
| 659 | * List of msr numbers which we expose to userspace through KVM_GET_MSRS | 721 | * List of msr numbers which we expose to userspace through KVM_GET_MSRS |
| @@ -682,10 +744,14 @@ static unsigned num_msrs_to_save; | |||
| 682 | 744 | ||
| 683 | static u32 emulated_msrs[] = { | 745 | static u32 emulated_msrs[] = { |
| 684 | MSR_IA32_MISC_ENABLE, | 746 | MSR_IA32_MISC_ENABLE, |
| 747 | MSR_IA32_MCG_STATUS, | ||
| 748 | MSR_IA32_MCG_CTL, | ||
| 685 | }; | 749 | }; |
| 686 | 750 | ||
| 687 | static int set_efer(struct kvm_vcpu *vcpu, u64 efer) | 751 | static int set_efer(struct kvm_vcpu *vcpu, u64 efer) |
| 688 | { | 752 | { |
| 753 | u64 old_efer = vcpu->arch.efer; | ||
| 754 | |||
| 689 | if (efer & efer_reserved_bits) | 755 | if (efer & efer_reserved_bits) |
| 690 | return 1; | 756 | return 1; |
| 691 | 757 | ||
| @@ -714,11 +780,13 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
| 714 | 780 | ||
| 715 | kvm_x86_ops->set_efer(vcpu, efer); | 781 | kvm_x86_ops->set_efer(vcpu, efer); |
| 716 | 782 | ||
| 717 | vcpu->arch.efer = efer; | ||
| 718 | |||
| 719 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; | 783 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; |
| 720 | kvm_mmu_reset_context(vcpu); | 784 | kvm_mmu_reset_context(vcpu); |
| 721 | 785 | ||
| 786 | /* Update reserved bits */ | ||
| 787 | if ((efer ^ old_efer) & EFER_NX) | ||
| 788 | kvm_mmu_reset_context(vcpu); | ||
| 789 | |||
| 722 | return 0; | 790 | return 0; |
| 723 | } | 791 | } |
| 724 | 792 | ||
| @@ -882,7 +950,7 @@ static int kvm_request_guest_time_update(struct kvm_vcpu *v) | |||
| 882 | 950 | ||
| 883 | if (!vcpu->time_page) | 951 | if (!vcpu->time_page) |
| 884 | return 0; | 952 | return 0; |
| 885 | set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests); | 953 | kvm_make_request(KVM_REQ_KVMCLOCK_UPDATE, v); |
| 886 | return 1; | 954 | return 1; |
| 887 | } | 955 | } |
| 888 | 956 | ||
| @@ -1524,16 +1592,12 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, | |||
| 1524 | { | 1592 | { |
| 1525 | int i, idx; | 1593 | int i, idx; |
| 1526 | 1594 | ||
| 1527 | vcpu_load(vcpu); | ||
| 1528 | |||
| 1529 | idx = srcu_read_lock(&vcpu->kvm->srcu); | 1595 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
| 1530 | for (i = 0; i < msrs->nmsrs; ++i) | 1596 | for (i = 0; i < msrs->nmsrs; ++i) |
| 1531 | if (do_msr(vcpu, entries[i].index, &entries[i].data)) | 1597 | if (do_msr(vcpu, entries[i].index, &entries[i].data)) |
| 1532 | break; | 1598 | break; |
| 1533 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | 1599 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
| 1534 | 1600 | ||
| 1535 | vcpu_put(vcpu); | ||
| 1536 | |||
| 1537 | return i; | 1601 | return i; |
| 1538 | } | 1602 | } |
| 1539 | 1603 | ||
| @@ -1618,6 +1682,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
| 1618 | case KVM_CAP_PCI_SEGMENT: | 1682 | case KVM_CAP_PCI_SEGMENT: |
| 1619 | case KVM_CAP_DEBUGREGS: | 1683 | case KVM_CAP_DEBUGREGS: |
| 1620 | case KVM_CAP_X86_ROBUST_SINGLESTEP: | 1684 | case KVM_CAP_X86_ROBUST_SINGLESTEP: |
| 1685 | case KVM_CAP_XSAVE: | ||
| 1621 | r = 1; | 1686 | r = 1; |
| 1622 | break; | 1687 | break; |
| 1623 | case KVM_CAP_COALESCED_MMIO: | 1688 | case KVM_CAP_COALESCED_MMIO: |
| @@ -1641,6 +1706,9 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
| 1641 | case KVM_CAP_MCE: | 1706 | case KVM_CAP_MCE: |
| 1642 | r = KVM_MAX_MCE_BANKS; | 1707 | r = KVM_MAX_MCE_BANKS; |
| 1643 | break; | 1708 | break; |
| 1709 | case KVM_CAP_XCRS: | ||
| 1710 | r = cpu_has_xsave; | ||
| 1711 | break; | ||
| 1644 | default: | 1712 | default: |
| 1645 | r = 0; | 1713 | r = 0; |
| 1646 | break; | 1714 | break; |
| @@ -1717,8 +1785,28 @@ out: | |||
| 1717 | return r; | 1785 | return r; |
| 1718 | } | 1786 | } |
| 1719 | 1787 | ||
| 1788 | static void wbinvd_ipi(void *garbage) | ||
| 1789 | { | ||
| 1790 | wbinvd(); | ||
| 1791 | } | ||
| 1792 | |||
| 1793 | static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu) | ||
| 1794 | { | ||
| 1795 | return vcpu->kvm->arch.iommu_domain && | ||
| 1796 | !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY); | ||
| 1797 | } | ||
| 1798 | |||
| 1720 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 1799 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
| 1721 | { | 1800 | { |
| 1801 | /* Address WBINVD may be executed by guest */ | ||
| 1802 | if (need_emulate_wbinvd(vcpu)) { | ||
| 1803 | if (kvm_x86_ops->has_wbinvd_exit()) | ||
| 1804 | cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask); | ||
| 1805 | else if (vcpu->cpu != -1 && vcpu->cpu != cpu) | ||
| 1806 | smp_call_function_single(vcpu->cpu, | ||
| 1807 | wbinvd_ipi, NULL, 1); | ||
| 1808 | } | ||
| 1809 | |||
| 1722 | kvm_x86_ops->vcpu_load(vcpu, cpu); | 1810 | kvm_x86_ops->vcpu_load(vcpu, cpu); |
| 1723 | if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) { | 1811 | if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) { |
| 1724 | unsigned long khz = cpufreq_quick_get(cpu); | 1812 | unsigned long khz = cpufreq_quick_get(cpu); |
| @@ -1731,8 +1819,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
| 1731 | 1819 | ||
| 1732 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | 1820 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) |
| 1733 | { | 1821 | { |
| 1734 | kvm_put_guest_fpu(vcpu); | ||
| 1735 | kvm_x86_ops->vcpu_put(vcpu); | 1822 | kvm_x86_ops->vcpu_put(vcpu); |
| 1823 | kvm_put_guest_fpu(vcpu); | ||
| 1736 | } | 1824 | } |
| 1737 | 1825 | ||
| 1738 | static int is_efer_nx(void) | 1826 | static int is_efer_nx(void) |
| @@ -1781,7 +1869,6 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | |||
| 1781 | if (copy_from_user(cpuid_entries, entries, | 1869 | if (copy_from_user(cpuid_entries, entries, |
| 1782 | cpuid->nent * sizeof(struct kvm_cpuid_entry))) | 1870 | cpuid->nent * sizeof(struct kvm_cpuid_entry))) |
| 1783 | goto out_free; | 1871 | goto out_free; |
| 1784 | vcpu_load(vcpu); | ||
| 1785 | for (i = 0; i < cpuid->nent; i++) { | 1872 | for (i = 0; i < cpuid->nent; i++) { |
| 1786 | vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; | 1873 | vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; |
| 1787 | vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; | 1874 | vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; |
| @@ -1799,7 +1886,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | |||
| 1799 | r = 0; | 1886 | r = 0; |
| 1800 | kvm_apic_set_version(vcpu); | 1887 | kvm_apic_set_version(vcpu); |
| 1801 | kvm_x86_ops->cpuid_update(vcpu); | 1888 | kvm_x86_ops->cpuid_update(vcpu); |
| 1802 | vcpu_put(vcpu); | 1889 | update_cpuid(vcpu); |
| 1803 | 1890 | ||
| 1804 | out_free: | 1891 | out_free: |
| 1805 | vfree(cpuid_entries); | 1892 | vfree(cpuid_entries); |
| @@ -1820,11 +1907,10 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, | |||
| 1820 | if (copy_from_user(&vcpu->arch.cpuid_entries, entries, | 1907 | if (copy_from_user(&vcpu->arch.cpuid_entries, entries, |
| 1821 | cpuid->nent * sizeof(struct kvm_cpuid_entry2))) | 1908 | cpuid->nent * sizeof(struct kvm_cpuid_entry2))) |
| 1822 | goto out; | 1909 | goto out; |
| 1823 | vcpu_load(vcpu); | ||
| 1824 | vcpu->arch.cpuid_nent = cpuid->nent; | 1910 | vcpu->arch.cpuid_nent = cpuid->nent; |
| 1825 | kvm_apic_set_version(vcpu); | 1911 | kvm_apic_set_version(vcpu); |
| 1826 | kvm_x86_ops->cpuid_update(vcpu); | 1912 | kvm_x86_ops->cpuid_update(vcpu); |
| 1827 | vcpu_put(vcpu); | 1913 | update_cpuid(vcpu); |
| 1828 | return 0; | 1914 | return 0; |
| 1829 | 1915 | ||
| 1830 | out: | 1916 | out: |
| @@ -1837,7 +1923,6 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | |||
| 1837 | { | 1923 | { |
| 1838 | int r; | 1924 | int r; |
| 1839 | 1925 | ||
| 1840 | vcpu_load(vcpu); | ||
| 1841 | r = -E2BIG; | 1926 | r = -E2BIG; |
| 1842 | if (cpuid->nent < vcpu->arch.cpuid_nent) | 1927 | if (cpuid->nent < vcpu->arch.cpuid_nent) |
| 1843 | goto out; | 1928 | goto out; |
| @@ -1849,7 +1934,6 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | |||
| 1849 | 1934 | ||
| 1850 | out: | 1935 | out: |
| 1851 | cpuid->nent = vcpu->arch.cpuid_nent; | 1936 | cpuid->nent = vcpu->arch.cpuid_nent; |
| 1852 | vcpu_put(vcpu); | ||
| 1853 | return r; | 1937 | return r; |
| 1854 | } | 1938 | } |
| 1855 | 1939 | ||
| @@ -1901,13 +1985,13 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
| 1901 | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); | 1985 | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); |
| 1902 | /* cpuid 1.ecx */ | 1986 | /* cpuid 1.ecx */ |
| 1903 | const u32 kvm_supported_word4_x86_features = | 1987 | const u32 kvm_supported_word4_x86_features = |
| 1904 | F(XMM3) | 0 /* Reserved, DTES64, MONITOR */ | | 1988 | F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ | |
| 1905 | 0 /* DS-CPL, VMX, SMX, EST */ | | 1989 | 0 /* DS-CPL, VMX, SMX, EST */ | |
| 1906 | 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | | 1990 | 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | |
| 1907 | 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | | 1991 | 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | |
| 1908 | 0 /* Reserved, DCA */ | F(XMM4_1) | | 1992 | 0 /* Reserved, DCA */ | F(XMM4_1) | |
| 1909 | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | | 1993 | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | |
| 1910 | 0 /* Reserved, XSAVE, OSXSAVE */; | 1994 | 0 /* Reserved, AES */ | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX); |
| 1911 | /* cpuid 0x80000001.ecx */ | 1995 | /* cpuid 0x80000001.ecx */ |
| 1912 | const u32 kvm_supported_word6_x86_features = | 1996 | const u32 kvm_supported_word6_x86_features = |
| 1913 | F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | | 1997 | F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | |
| @@ -1922,7 +2006,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
| 1922 | 2006 | ||
| 1923 | switch (function) { | 2007 | switch (function) { |
| 1924 | case 0: | 2008 | case 0: |
| 1925 | entry->eax = min(entry->eax, (u32)0xb); | 2009 | entry->eax = min(entry->eax, (u32)0xd); |
| 1926 | break; | 2010 | break; |
| 1927 | case 1: | 2011 | case 1: |
| 1928 | entry->edx &= kvm_supported_word0_x86_features; | 2012 | entry->edx &= kvm_supported_word0_x86_features; |
| @@ -1980,6 +2064,20 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
| 1980 | } | 2064 | } |
| 1981 | break; | 2065 | break; |
| 1982 | } | 2066 | } |
| 2067 | case 0xd: { | ||
| 2068 | int i; | ||
| 2069 | |||
| 2070 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
| 2071 | for (i = 1; *nent < maxnent; ++i) { | ||
| 2072 | if (entry[i - 1].eax == 0 && i != 2) | ||
| 2073 | break; | ||
| 2074 | do_cpuid_1_ent(&entry[i], function, i); | ||
| 2075 | entry[i].flags |= | ||
| 2076 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
| 2077 | ++*nent; | ||
| 2078 | } | ||
| 2079 | break; | ||
| 2080 | } | ||
| 1983 | case KVM_CPUID_SIGNATURE: { | 2081 | case KVM_CPUID_SIGNATURE: { |
| 1984 | char signature[12] = "KVMKVMKVM\0\0"; | 2082 | char signature[12] = "KVMKVMKVM\0\0"; |
| 1985 | u32 *sigptr = (u32 *)signature; | 2083 | u32 *sigptr = (u32 *)signature; |
| @@ -2081,9 +2179,7 @@ out: | |||
| 2081 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, | 2179 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, |
| 2082 | struct kvm_lapic_state *s) | 2180 | struct kvm_lapic_state *s) |
| 2083 | { | 2181 | { |
| 2084 | vcpu_load(vcpu); | ||
| 2085 | memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); | 2182 | memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); |
| 2086 | vcpu_put(vcpu); | ||
| 2087 | 2183 | ||
| 2088 | return 0; | 2184 | return 0; |
| 2089 | } | 2185 | } |
| @@ -2091,11 +2187,9 @@ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, | |||
| 2091 | static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, | 2187 | static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, |
| 2092 | struct kvm_lapic_state *s) | 2188 | struct kvm_lapic_state *s) |
| 2093 | { | 2189 | { |
| 2094 | vcpu_load(vcpu); | ||
| 2095 | memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); | 2190 | memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); |
| 2096 | kvm_apic_post_state_restore(vcpu); | 2191 | kvm_apic_post_state_restore(vcpu); |
| 2097 | update_cr8_intercept(vcpu); | 2192 | update_cr8_intercept(vcpu); |
| 2098 | vcpu_put(vcpu); | ||
| 2099 | 2193 | ||
| 2100 | return 0; | 2194 | return 0; |
| 2101 | } | 2195 | } |
| @@ -2107,20 +2201,15 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, | |||
| 2107 | return -EINVAL; | 2201 | return -EINVAL; |
| 2108 | if (irqchip_in_kernel(vcpu->kvm)) | 2202 | if (irqchip_in_kernel(vcpu->kvm)) |
| 2109 | return -ENXIO; | 2203 | return -ENXIO; |
| 2110 | vcpu_load(vcpu); | ||
| 2111 | 2204 | ||
| 2112 | kvm_queue_interrupt(vcpu, irq->irq, false); | 2205 | kvm_queue_interrupt(vcpu, irq->irq, false); |
| 2113 | 2206 | ||
| 2114 | vcpu_put(vcpu); | ||
| 2115 | |||
| 2116 | return 0; | 2207 | return 0; |
| 2117 | } | 2208 | } |
| 2118 | 2209 | ||
| 2119 | static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu) | 2210 | static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu) |
| 2120 | { | 2211 | { |
| 2121 | vcpu_load(vcpu); | ||
| 2122 | kvm_inject_nmi(vcpu); | 2212 | kvm_inject_nmi(vcpu); |
| 2123 | vcpu_put(vcpu); | ||
| 2124 | 2213 | ||
| 2125 | return 0; | 2214 | return 0; |
| 2126 | } | 2215 | } |
| @@ -2140,7 +2229,6 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, | |||
| 2140 | int r; | 2229 | int r; |
| 2141 | unsigned bank_num = mcg_cap & 0xff, bank; | 2230 | unsigned bank_num = mcg_cap & 0xff, bank; |
| 2142 | 2231 | ||
| 2143 | vcpu_load(vcpu); | ||
| 2144 | r = -EINVAL; | 2232 | r = -EINVAL; |
| 2145 | if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) | 2233 | if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) |
| 2146 | goto out; | 2234 | goto out; |
| @@ -2155,7 +2243,6 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, | |||
| 2155 | for (bank = 0; bank < bank_num; bank++) | 2243 | for (bank = 0; bank < bank_num; bank++) |
| 2156 | vcpu->arch.mce_banks[bank*4] = ~(u64)0; | 2244 | vcpu->arch.mce_banks[bank*4] = ~(u64)0; |
| 2157 | out: | 2245 | out: |
| 2158 | vcpu_put(vcpu); | ||
| 2159 | return r; | 2246 | return r; |
| 2160 | } | 2247 | } |
| 2161 | 2248 | ||
| @@ -2188,7 +2275,7 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, | |||
| 2188 | printk(KERN_DEBUG "kvm: set_mce: " | 2275 | printk(KERN_DEBUG "kvm: set_mce: " |
| 2189 | "injects mce exception while " | 2276 | "injects mce exception while " |
| 2190 | "previous one is in progress!\n"); | 2277 | "previous one is in progress!\n"); |
| 2191 | set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); | 2278 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); |
| 2192 | return 0; | 2279 | return 0; |
| 2193 | } | 2280 | } |
| 2194 | if (banks[1] & MCI_STATUS_VAL) | 2281 | if (banks[1] & MCI_STATUS_VAL) |
| @@ -2213,8 +2300,6 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, | |||
| 2213 | static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | 2300 | static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, |
| 2214 | struct kvm_vcpu_events *events) | 2301 | struct kvm_vcpu_events *events) |
| 2215 | { | 2302 | { |
| 2216 | vcpu_load(vcpu); | ||
| 2217 | |||
| 2218 | events->exception.injected = | 2303 | events->exception.injected = |
| 2219 | vcpu->arch.exception.pending && | 2304 | vcpu->arch.exception.pending && |
| 2220 | !kvm_exception_is_soft(vcpu->arch.exception.nr); | 2305 | !kvm_exception_is_soft(vcpu->arch.exception.nr); |
| @@ -2239,8 +2324,6 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
| 2239 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING | 2324 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING |
| 2240 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR | 2325 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR |
| 2241 | | KVM_VCPUEVENT_VALID_SHADOW); | 2326 | | KVM_VCPUEVENT_VALID_SHADOW); |
| 2242 | |||
| 2243 | vcpu_put(vcpu); | ||
| 2244 | } | 2327 | } |
| 2245 | 2328 | ||
| 2246 | static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | 2329 | static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, |
| @@ -2251,8 +2334,6 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
| 2251 | | KVM_VCPUEVENT_VALID_SHADOW)) | 2334 | | KVM_VCPUEVENT_VALID_SHADOW)) |
| 2252 | return -EINVAL; | 2335 | return -EINVAL; |
| 2253 | 2336 | ||
| 2254 | vcpu_load(vcpu); | ||
| 2255 | |||
| 2256 | vcpu->arch.exception.pending = events->exception.injected; | 2337 | vcpu->arch.exception.pending = events->exception.injected; |
| 2257 | vcpu->arch.exception.nr = events->exception.nr; | 2338 | vcpu->arch.exception.nr = events->exception.nr; |
| 2258 | vcpu->arch.exception.has_error_code = events->exception.has_error_code; | 2339 | vcpu->arch.exception.has_error_code = events->exception.has_error_code; |
| @@ -2275,22 +2356,16 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
| 2275 | if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) | 2356 | if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) |
| 2276 | vcpu->arch.sipi_vector = events->sipi_vector; | 2357 | vcpu->arch.sipi_vector = events->sipi_vector; |
| 2277 | 2358 | ||
| 2278 | vcpu_put(vcpu); | ||
| 2279 | |||
| 2280 | return 0; | 2359 | return 0; |
| 2281 | } | 2360 | } |
| 2282 | 2361 | ||
| 2283 | static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, | 2362 | static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, |
| 2284 | struct kvm_debugregs *dbgregs) | 2363 | struct kvm_debugregs *dbgregs) |
| 2285 | { | 2364 | { |
| 2286 | vcpu_load(vcpu); | ||
| 2287 | |||
| 2288 | memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); | 2365 | memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); |
| 2289 | dbgregs->dr6 = vcpu->arch.dr6; | 2366 | dbgregs->dr6 = vcpu->arch.dr6; |
| 2290 | dbgregs->dr7 = vcpu->arch.dr7; | 2367 | dbgregs->dr7 = vcpu->arch.dr7; |
| 2291 | dbgregs->flags = 0; | 2368 | dbgregs->flags = 0; |
| 2292 | |||
| 2293 | vcpu_put(vcpu); | ||
| 2294 | } | 2369 | } |
| 2295 | 2370 | ||
| 2296 | static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, | 2371 | static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, |
| @@ -2299,40 +2374,113 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, | |||
| 2299 | if (dbgregs->flags) | 2374 | if (dbgregs->flags) |
| 2300 | return -EINVAL; | 2375 | return -EINVAL; |
| 2301 | 2376 | ||
| 2302 | vcpu_load(vcpu); | ||
| 2303 | |||
| 2304 | memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); | 2377 | memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); |
| 2305 | vcpu->arch.dr6 = dbgregs->dr6; | 2378 | vcpu->arch.dr6 = dbgregs->dr6; |
| 2306 | vcpu->arch.dr7 = dbgregs->dr7; | 2379 | vcpu->arch.dr7 = dbgregs->dr7; |
| 2307 | 2380 | ||
| 2308 | vcpu_put(vcpu); | 2381 | return 0; |
| 2382 | } | ||
| 2383 | |||
| 2384 | static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, | ||
| 2385 | struct kvm_xsave *guest_xsave) | ||
| 2386 | { | ||
| 2387 | if (cpu_has_xsave) | ||
| 2388 | memcpy(guest_xsave->region, | ||
| 2389 | &vcpu->arch.guest_fpu.state->xsave, | ||
| 2390 | sizeof(struct xsave_struct)); | ||
| 2391 | else { | ||
| 2392 | memcpy(guest_xsave->region, | ||
| 2393 | &vcpu->arch.guest_fpu.state->fxsave, | ||
| 2394 | sizeof(struct i387_fxsave_struct)); | ||
| 2395 | *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] = | ||
| 2396 | XSTATE_FPSSE; | ||
| 2397 | } | ||
| 2398 | } | ||
| 2399 | |||
| 2400 | static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, | ||
| 2401 | struct kvm_xsave *guest_xsave) | ||
| 2402 | { | ||
| 2403 | u64 xstate_bv = | ||
| 2404 | *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)]; | ||
| 2309 | 2405 | ||
| 2406 | if (cpu_has_xsave) | ||
| 2407 | memcpy(&vcpu->arch.guest_fpu.state->xsave, | ||
| 2408 | guest_xsave->region, sizeof(struct xsave_struct)); | ||
| 2409 | else { | ||
| 2410 | if (xstate_bv & ~XSTATE_FPSSE) | ||
| 2411 | return -EINVAL; | ||
| 2412 | memcpy(&vcpu->arch.guest_fpu.state->fxsave, | ||
| 2413 | guest_xsave->region, sizeof(struct i387_fxsave_struct)); | ||
| 2414 | } | ||
| 2310 | return 0; | 2415 | return 0; |
| 2311 | } | 2416 | } |
| 2312 | 2417 | ||
| 2418 | static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu, | ||
| 2419 | struct kvm_xcrs *guest_xcrs) | ||
| 2420 | { | ||
| 2421 | if (!cpu_has_xsave) { | ||
| 2422 | guest_xcrs->nr_xcrs = 0; | ||
| 2423 | return; | ||
| 2424 | } | ||
| 2425 | |||
| 2426 | guest_xcrs->nr_xcrs = 1; | ||
| 2427 | guest_xcrs->flags = 0; | ||
| 2428 | guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK; | ||
| 2429 | guest_xcrs->xcrs[0].value = vcpu->arch.xcr0; | ||
| 2430 | } | ||
| 2431 | |||
| 2432 | static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu, | ||
| 2433 | struct kvm_xcrs *guest_xcrs) | ||
| 2434 | { | ||
| 2435 | int i, r = 0; | ||
| 2436 | |||
| 2437 | if (!cpu_has_xsave) | ||
| 2438 | return -EINVAL; | ||
| 2439 | |||
| 2440 | if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags) | ||
| 2441 | return -EINVAL; | ||
| 2442 | |||
| 2443 | for (i = 0; i < guest_xcrs->nr_xcrs; i++) | ||
| 2444 | /* Only support XCR0 currently */ | ||
| 2445 | if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) { | ||
| 2446 | r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK, | ||
| 2447 | guest_xcrs->xcrs[0].value); | ||
| 2448 | break; | ||
| 2449 | } | ||
| 2450 | if (r) | ||
| 2451 | r = -EINVAL; | ||
| 2452 | return r; | ||
| 2453 | } | ||
| 2454 | |||
| 2313 | long kvm_arch_vcpu_ioctl(struct file *filp, | 2455 | long kvm_arch_vcpu_ioctl(struct file *filp, |
| 2314 | unsigned int ioctl, unsigned long arg) | 2456 | unsigned int ioctl, unsigned long arg) |
| 2315 | { | 2457 | { |
| 2316 | struct kvm_vcpu *vcpu = filp->private_data; | 2458 | struct kvm_vcpu *vcpu = filp->private_data; |
| 2317 | void __user *argp = (void __user *)arg; | 2459 | void __user *argp = (void __user *)arg; |
| 2318 | int r; | 2460 | int r; |
| 2319 | struct kvm_lapic_state *lapic = NULL; | 2461 | union { |
| 2462 | struct kvm_lapic_state *lapic; | ||
| 2463 | struct kvm_xsave *xsave; | ||
| 2464 | struct kvm_xcrs *xcrs; | ||
| 2465 | void *buffer; | ||
| 2466 | } u; | ||
| 2320 | 2467 | ||
| 2468 | u.buffer = NULL; | ||
| 2321 | switch (ioctl) { | 2469 | switch (ioctl) { |
| 2322 | case KVM_GET_LAPIC: { | 2470 | case KVM_GET_LAPIC: { |
| 2323 | r = -EINVAL; | 2471 | r = -EINVAL; |
| 2324 | if (!vcpu->arch.apic) | 2472 | if (!vcpu->arch.apic) |
| 2325 | goto out; | 2473 | goto out; |
| 2326 | lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); | 2474 | u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); |
| 2327 | 2475 | ||
| 2328 | r = -ENOMEM; | 2476 | r = -ENOMEM; |
| 2329 | if (!lapic) | 2477 | if (!u.lapic) |
| 2330 | goto out; | 2478 | goto out; |
| 2331 | r = kvm_vcpu_ioctl_get_lapic(vcpu, lapic); | 2479 | r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic); |
| 2332 | if (r) | 2480 | if (r) |
| 2333 | goto out; | 2481 | goto out; |
| 2334 | r = -EFAULT; | 2482 | r = -EFAULT; |
| 2335 | if (copy_to_user(argp, lapic, sizeof(struct kvm_lapic_state))) | 2483 | if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state))) |
| 2336 | goto out; | 2484 | goto out; |
| 2337 | r = 0; | 2485 | r = 0; |
| 2338 | break; | 2486 | break; |
| @@ -2341,14 +2489,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
| 2341 | r = -EINVAL; | 2489 | r = -EINVAL; |
| 2342 | if (!vcpu->arch.apic) | 2490 | if (!vcpu->arch.apic) |
| 2343 | goto out; | 2491 | goto out; |
| 2344 | lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); | 2492 | u.lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); |
| 2345 | r = -ENOMEM; | 2493 | r = -ENOMEM; |
| 2346 | if (!lapic) | 2494 | if (!u.lapic) |
| 2347 | goto out; | 2495 | goto out; |
| 2348 | r = -EFAULT; | 2496 | r = -EFAULT; |
| 2349 | if (copy_from_user(lapic, argp, sizeof(struct kvm_lapic_state))) | 2497 | if (copy_from_user(u.lapic, argp, sizeof(struct kvm_lapic_state))) |
| 2350 | goto out; | 2498 | goto out; |
| 2351 | r = kvm_vcpu_ioctl_set_lapic(vcpu, lapic); | 2499 | r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic); |
| 2352 | if (r) | 2500 | if (r) |
| 2353 | goto out; | 2501 | goto out; |
| 2354 | r = 0; | 2502 | r = 0; |
| @@ -2464,9 +2612,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
| 2464 | r = -EFAULT; | 2612 | r = -EFAULT; |
| 2465 | if (copy_from_user(&mce, argp, sizeof mce)) | 2613 | if (copy_from_user(&mce, argp, sizeof mce)) |
| 2466 | goto out; | 2614 | goto out; |
| 2467 | vcpu_load(vcpu); | ||
| 2468 | r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); | 2615 | r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); |
| 2469 | vcpu_put(vcpu); | ||
| 2470 | break; | 2616 | break; |
| 2471 | } | 2617 | } |
| 2472 | case KVM_GET_VCPU_EVENTS: { | 2618 | case KVM_GET_VCPU_EVENTS: { |
| @@ -2513,11 +2659,67 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
| 2513 | r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs); | 2659 | r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs); |
| 2514 | break; | 2660 | break; |
| 2515 | } | 2661 | } |
| 2662 | case KVM_GET_XSAVE: { | ||
| 2663 | u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL); | ||
| 2664 | r = -ENOMEM; | ||
| 2665 | if (!u.xsave) | ||
| 2666 | break; | ||
| 2667 | |||
| 2668 | kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave); | ||
| 2669 | |||
| 2670 | r = -EFAULT; | ||
| 2671 | if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave))) | ||
| 2672 | break; | ||
| 2673 | r = 0; | ||
| 2674 | break; | ||
| 2675 | } | ||
| 2676 | case KVM_SET_XSAVE: { | ||
| 2677 | u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL); | ||
| 2678 | r = -ENOMEM; | ||
| 2679 | if (!u.xsave) | ||
| 2680 | break; | ||
| 2681 | |||
| 2682 | r = -EFAULT; | ||
| 2683 | if (copy_from_user(u.xsave, argp, sizeof(struct kvm_xsave))) | ||
| 2684 | break; | ||
| 2685 | |||
| 2686 | r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave); | ||
| 2687 | break; | ||
| 2688 | } | ||
| 2689 | case KVM_GET_XCRS: { | ||
| 2690 | u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL); | ||
| 2691 | r = -ENOMEM; | ||
| 2692 | if (!u.xcrs) | ||
| 2693 | break; | ||
| 2694 | |||
| 2695 | kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs); | ||
| 2696 | |||
| 2697 | r = -EFAULT; | ||
| 2698 | if (copy_to_user(argp, u.xcrs, | ||
| 2699 | sizeof(struct kvm_xcrs))) | ||
| 2700 | break; | ||
| 2701 | r = 0; | ||
| 2702 | break; | ||
| 2703 | } | ||
| 2704 | case KVM_SET_XCRS: { | ||
| 2705 | u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL); | ||
| 2706 | r = -ENOMEM; | ||
| 2707 | if (!u.xcrs) | ||
| 2708 | break; | ||
| 2709 | |||
| 2710 | r = -EFAULT; | ||
| 2711 | if (copy_from_user(u.xcrs, argp, | ||
| 2712 | sizeof(struct kvm_xcrs))) | ||
| 2713 | break; | ||
| 2714 | |||
| 2715 | r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); | ||
| 2716 | break; | ||
| 2717 | } | ||
| 2516 | default: | 2718 | default: |
| 2517 | r = -EINVAL; | 2719 | r = -EINVAL; |
| 2518 | } | 2720 | } |
| 2519 | out: | 2721 | out: |
| 2520 | kfree(lapic); | 2722 | kfree(u.buffer); |
| 2521 | return r; | 2723 | return r; |
| 2522 | } | 2724 | } |
| 2523 | 2725 | ||
| @@ -2560,115 +2762,6 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) | |||
| 2560 | return kvm->arch.n_alloc_mmu_pages; | 2762 | return kvm->arch.n_alloc_mmu_pages; |
| 2561 | } | 2763 | } |
| 2562 | 2764 | ||
| 2563 | gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn) | ||
| 2564 | { | ||
| 2565 | int i; | ||
| 2566 | struct kvm_mem_alias *alias; | ||
| 2567 | struct kvm_mem_aliases *aliases; | ||
| 2568 | |||
| 2569 | aliases = kvm_aliases(kvm); | ||
| 2570 | |||
| 2571 | for (i = 0; i < aliases->naliases; ++i) { | ||
| 2572 | alias = &aliases->aliases[i]; | ||
| 2573 | if (alias->flags & KVM_ALIAS_INVALID) | ||
| 2574 | continue; | ||
| 2575 | if (gfn >= alias->base_gfn | ||
| 2576 | && gfn < alias->base_gfn + alias->npages) | ||
| 2577 | return alias->target_gfn + gfn - alias->base_gfn; | ||
| 2578 | } | ||
| 2579 | return gfn; | ||
| 2580 | } | ||
| 2581 | |||
| 2582 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | ||
| 2583 | { | ||
| 2584 | int i; | ||
| 2585 | struct kvm_mem_alias *alias; | ||
| 2586 | struct kvm_mem_aliases *aliases; | ||
| 2587 | |||
| 2588 | aliases = kvm_aliases(kvm); | ||
| 2589 | |||
| 2590 | for (i = 0; i < aliases->naliases; ++i) { | ||
| 2591 | alias = &aliases->aliases[i]; | ||
| 2592 | if (gfn >= alias->base_gfn | ||
| 2593 | && gfn < alias->base_gfn + alias->npages) | ||
| 2594 | return alias->target_gfn + gfn - alias->base_gfn; | ||
| 2595 | } | ||
| 2596 | return gfn; | ||
| 2597 | } | ||
| 2598 | |||
| 2599 | /* | ||
| 2600 | * Set a new alias region. Aliases map a portion of physical memory into | ||
| 2601 | * another portion. This is useful for memory windows, for example the PC | ||
| 2602 | * VGA region. | ||
| 2603 | */ | ||
| 2604 | static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | ||
| 2605 | struct kvm_memory_alias *alias) | ||
| 2606 | { | ||
| 2607 | int r, n; | ||
| 2608 | struct kvm_mem_alias *p; | ||
| 2609 | struct kvm_mem_aliases *aliases, *old_aliases; | ||
| 2610 | |||
| 2611 | r = -EINVAL; | ||
| 2612 | /* General sanity checks */ | ||
| 2613 | if (alias->memory_size & (PAGE_SIZE - 1)) | ||
| 2614 | goto out; | ||
| 2615 | if (alias->guest_phys_addr & (PAGE_SIZE - 1)) | ||
| 2616 | goto out; | ||
| 2617 | if (alias->slot >= KVM_ALIAS_SLOTS) | ||
| 2618 | goto out; | ||
| 2619 | if (alias->guest_phys_addr + alias->memory_size | ||
| 2620 | < alias->guest_phys_addr) | ||
| 2621 | goto out; | ||
| 2622 | if (alias->target_phys_addr + alias->memory_size | ||
| 2623 | < alias->target_phys_addr) | ||
| 2624 | goto out; | ||
| 2625 | |||
| 2626 | r = -ENOMEM; | ||
| 2627 | aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); | ||
| 2628 | if (!aliases) | ||
| 2629 | goto out; | ||
| 2630 | |||
| 2631 | mutex_lock(&kvm->slots_lock); | ||
| 2632 | |||
| 2633 | /* invalidate any gfn reference in case of deletion/shrinking */ | ||
| 2634 | memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases)); | ||
| 2635 | aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID; | ||
| 2636 | old_aliases = kvm->arch.aliases; | ||
| 2637 | rcu_assign_pointer(kvm->arch.aliases, aliases); | ||
| 2638 | synchronize_srcu_expedited(&kvm->srcu); | ||
| 2639 | kvm_mmu_zap_all(kvm); | ||
| 2640 | kfree(old_aliases); | ||
| 2641 | |||
| 2642 | r = -ENOMEM; | ||
| 2643 | aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); | ||
| 2644 | if (!aliases) | ||
| 2645 | goto out_unlock; | ||
| 2646 | |||
| 2647 | memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases)); | ||
| 2648 | |||
| 2649 | p = &aliases->aliases[alias->slot]; | ||
| 2650 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; | ||
| 2651 | p->npages = alias->memory_size >> PAGE_SHIFT; | ||
| 2652 | p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; | ||
| 2653 | p->flags &= ~(KVM_ALIAS_INVALID); | ||
| 2654 | |||
| 2655 | for (n = KVM_ALIAS_SLOTS; n > 0; --n) | ||
| 2656 | if (aliases->aliases[n - 1].npages) | ||
| 2657 | break; | ||
| 2658 | aliases->naliases = n; | ||
| 2659 | |||
| 2660 | old_aliases = kvm->arch.aliases; | ||
| 2661 | rcu_assign_pointer(kvm->arch.aliases, aliases); | ||
| 2662 | synchronize_srcu_expedited(&kvm->srcu); | ||
| 2663 | kfree(old_aliases); | ||
| 2664 | r = 0; | ||
| 2665 | |||
| 2666 | out_unlock: | ||
| 2667 | mutex_unlock(&kvm->slots_lock); | ||
| 2668 | out: | ||
| 2669 | return r; | ||
| 2670 | } | ||
| 2671 | |||
| 2672 | static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | 2765 | static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) |
| 2673 | { | 2766 | { |
| 2674 | int r; | 2767 | int r; |
| @@ -2797,7 +2890,6 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
| 2797 | struct kvm_memory_slot *memslot; | 2890 | struct kvm_memory_slot *memslot; |
| 2798 | unsigned long n; | 2891 | unsigned long n; |
| 2799 | unsigned long is_dirty = 0; | 2892 | unsigned long is_dirty = 0; |
| 2800 | unsigned long *dirty_bitmap = NULL; | ||
| 2801 | 2893 | ||
| 2802 | mutex_lock(&kvm->slots_lock); | 2894 | mutex_lock(&kvm->slots_lock); |
| 2803 | 2895 | ||
| @@ -2812,27 +2904,30 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
| 2812 | 2904 | ||
| 2813 | n = kvm_dirty_bitmap_bytes(memslot); | 2905 | n = kvm_dirty_bitmap_bytes(memslot); |
| 2814 | 2906 | ||
| 2815 | r = -ENOMEM; | ||
| 2816 | dirty_bitmap = vmalloc(n); | ||
| 2817 | if (!dirty_bitmap) | ||
| 2818 | goto out; | ||
| 2819 | memset(dirty_bitmap, 0, n); | ||
| 2820 | |||
| 2821 | for (i = 0; !is_dirty && i < n/sizeof(long); i++) | 2907 | for (i = 0; !is_dirty && i < n/sizeof(long); i++) |
| 2822 | is_dirty = memslot->dirty_bitmap[i]; | 2908 | is_dirty = memslot->dirty_bitmap[i]; |
| 2823 | 2909 | ||
| 2824 | /* If nothing is dirty, don't bother messing with page tables. */ | 2910 | /* If nothing is dirty, don't bother messing with page tables. */ |
| 2825 | if (is_dirty) { | 2911 | if (is_dirty) { |
| 2826 | struct kvm_memslots *slots, *old_slots; | 2912 | struct kvm_memslots *slots, *old_slots; |
| 2913 | unsigned long *dirty_bitmap; | ||
| 2827 | 2914 | ||
| 2828 | spin_lock(&kvm->mmu_lock); | 2915 | spin_lock(&kvm->mmu_lock); |
| 2829 | kvm_mmu_slot_remove_write_access(kvm, log->slot); | 2916 | kvm_mmu_slot_remove_write_access(kvm, log->slot); |
| 2830 | spin_unlock(&kvm->mmu_lock); | 2917 | spin_unlock(&kvm->mmu_lock); |
| 2831 | 2918 | ||
| 2832 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 2919 | r = -ENOMEM; |
| 2833 | if (!slots) | 2920 | dirty_bitmap = vmalloc(n); |
| 2834 | goto out_free; | 2921 | if (!dirty_bitmap) |
| 2922 | goto out; | ||
| 2923 | memset(dirty_bitmap, 0, n); | ||
| 2835 | 2924 | ||
| 2925 | r = -ENOMEM; | ||
| 2926 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | ||
| 2927 | if (!slots) { | ||
| 2928 | vfree(dirty_bitmap); | ||
| 2929 | goto out; | ||
| 2930 | } | ||
| 2836 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | 2931 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); |
| 2837 | slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; | 2932 | slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; |
| 2838 | 2933 | ||
| @@ -2841,13 +2936,20 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
| 2841 | synchronize_srcu_expedited(&kvm->srcu); | 2936 | synchronize_srcu_expedited(&kvm->srcu); |
| 2842 | dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; | 2937 | dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; |
| 2843 | kfree(old_slots); | 2938 | kfree(old_slots); |
| 2939 | |||
| 2940 | r = -EFAULT; | ||
| 2941 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) { | ||
| 2942 | vfree(dirty_bitmap); | ||
| 2943 | goto out; | ||
| 2944 | } | ||
| 2945 | vfree(dirty_bitmap); | ||
| 2946 | } else { | ||
| 2947 | r = -EFAULT; | ||
| 2948 | if (clear_user(log->dirty_bitmap, n)) | ||
| 2949 | goto out; | ||
| 2844 | } | 2950 | } |
| 2845 | 2951 | ||
| 2846 | r = 0; | 2952 | r = 0; |
| 2847 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) | ||
| 2848 | r = -EFAULT; | ||
| 2849 | out_free: | ||
| 2850 | vfree(dirty_bitmap); | ||
| 2851 | out: | 2953 | out: |
| 2852 | mutex_unlock(&kvm->slots_lock); | 2954 | mutex_unlock(&kvm->slots_lock); |
| 2853 | return r; | 2955 | return r; |
| @@ -2867,7 +2969,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 2867 | union { | 2969 | union { |
| 2868 | struct kvm_pit_state ps; | 2970 | struct kvm_pit_state ps; |
| 2869 | struct kvm_pit_state2 ps2; | 2971 | struct kvm_pit_state2 ps2; |
| 2870 | struct kvm_memory_alias alias; | ||
| 2871 | struct kvm_pit_config pit_config; | 2972 | struct kvm_pit_config pit_config; |
| 2872 | } u; | 2973 | } u; |
| 2873 | 2974 | ||
| @@ -2888,22 +2989,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 2888 | goto out; | 2989 | goto out; |
| 2889 | break; | 2990 | break; |
| 2890 | } | 2991 | } |
| 2891 | case KVM_SET_MEMORY_REGION: { | ||
| 2892 | struct kvm_memory_region kvm_mem; | ||
| 2893 | struct kvm_userspace_memory_region kvm_userspace_mem; | ||
| 2894 | |||
| 2895 | r = -EFAULT; | ||
| 2896 | if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) | ||
| 2897 | goto out; | ||
| 2898 | kvm_userspace_mem.slot = kvm_mem.slot; | ||
| 2899 | kvm_userspace_mem.flags = kvm_mem.flags; | ||
| 2900 | kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr; | ||
| 2901 | kvm_userspace_mem.memory_size = kvm_mem.memory_size; | ||
| 2902 | r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0); | ||
| 2903 | if (r) | ||
| 2904 | goto out; | ||
| 2905 | break; | ||
| 2906 | } | ||
| 2907 | case KVM_SET_NR_MMU_PAGES: | 2992 | case KVM_SET_NR_MMU_PAGES: |
| 2908 | r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg); | 2993 | r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg); |
| 2909 | if (r) | 2994 | if (r) |
| @@ -2912,14 +2997,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 2912 | case KVM_GET_NR_MMU_PAGES: | 2997 | case KVM_GET_NR_MMU_PAGES: |
| 2913 | r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); | 2998 | r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); |
| 2914 | break; | 2999 | break; |
| 2915 | case KVM_SET_MEMORY_ALIAS: | ||
| 2916 | r = -EFAULT; | ||
| 2917 | if (copy_from_user(&u.alias, argp, sizeof(struct kvm_memory_alias))) | ||
| 2918 | goto out; | ||
| 2919 | r = kvm_vm_ioctl_set_memory_alias(kvm, &u.alias); | ||
| 2920 | if (r) | ||
| 2921 | goto out; | ||
| 2922 | break; | ||
| 2923 | case KVM_CREATE_IRQCHIP: { | 3000 | case KVM_CREATE_IRQCHIP: { |
| 2924 | struct kvm_pic *vpic; | 3001 | struct kvm_pic *vpic; |
| 2925 | 3002 | ||
| @@ -3259,7 +3336,7 @@ static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, | |||
| 3259 | } | 3336 | } |
| 3260 | ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); | 3337 | ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); |
| 3261 | if (ret < 0) { | 3338 | if (ret < 0) { |
| 3262 | r = X86EMUL_UNHANDLEABLE; | 3339 | r = X86EMUL_IO_NEEDED; |
| 3263 | goto out; | 3340 | goto out; |
| 3264 | } | 3341 | } |
| 3265 | 3342 | ||
| @@ -3315,7 +3392,7 @@ static int kvm_write_guest_virt_system(gva_t addr, void *val, | |||
| 3315 | } | 3392 | } |
| 3316 | ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite); | 3393 | ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite); |
| 3317 | if (ret < 0) { | 3394 | if (ret < 0) { |
| 3318 | r = X86EMUL_UNHANDLEABLE; | 3395 | r = X86EMUL_IO_NEEDED; |
| 3319 | goto out; | 3396 | goto out; |
| 3320 | } | 3397 | } |
| 3321 | 3398 | ||
| @@ -3330,10 +3407,10 @@ out: | |||
| 3330 | static int emulator_read_emulated(unsigned long addr, | 3407 | static int emulator_read_emulated(unsigned long addr, |
| 3331 | void *val, | 3408 | void *val, |
| 3332 | unsigned int bytes, | 3409 | unsigned int bytes, |
| 3410 | unsigned int *error_code, | ||
| 3333 | struct kvm_vcpu *vcpu) | 3411 | struct kvm_vcpu *vcpu) |
| 3334 | { | 3412 | { |
| 3335 | gpa_t gpa; | 3413 | gpa_t gpa; |
| 3336 | u32 error_code; | ||
| 3337 | 3414 | ||
| 3338 | if (vcpu->mmio_read_completed) { | 3415 | if (vcpu->mmio_read_completed) { |
| 3339 | memcpy(val, vcpu->mmio_data, bytes); | 3416 | memcpy(val, vcpu->mmio_data, bytes); |
| @@ -3343,12 +3420,10 @@ static int emulator_read_emulated(unsigned long addr, | |||
| 3343 | return X86EMUL_CONTINUE; | 3420 | return X86EMUL_CONTINUE; |
| 3344 | } | 3421 | } |
| 3345 | 3422 | ||
| 3346 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, &error_code); | 3423 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, error_code); |
| 3347 | 3424 | ||
| 3348 | if (gpa == UNMAPPED_GVA) { | 3425 | if (gpa == UNMAPPED_GVA) |
| 3349 | kvm_inject_page_fault(vcpu, addr, error_code); | ||
| 3350 | return X86EMUL_PROPAGATE_FAULT; | 3426 | return X86EMUL_PROPAGATE_FAULT; |
| 3351 | } | ||
| 3352 | 3427 | ||
| 3353 | /* For APIC access vmexit */ | 3428 | /* For APIC access vmexit */ |
| 3354 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | 3429 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
| @@ -3370,11 +3445,12 @@ mmio: | |||
| 3370 | trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); | 3445 | trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); |
| 3371 | 3446 | ||
| 3372 | vcpu->mmio_needed = 1; | 3447 | vcpu->mmio_needed = 1; |
| 3373 | vcpu->mmio_phys_addr = gpa; | 3448 | vcpu->run->exit_reason = KVM_EXIT_MMIO; |
| 3374 | vcpu->mmio_size = bytes; | 3449 | vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; |
| 3375 | vcpu->mmio_is_write = 0; | 3450 | vcpu->run->mmio.len = vcpu->mmio_size = bytes; |
| 3451 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0; | ||
| 3376 | 3452 | ||
| 3377 | return X86EMUL_UNHANDLEABLE; | 3453 | return X86EMUL_IO_NEEDED; |
| 3378 | } | 3454 | } |
| 3379 | 3455 | ||
| 3380 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | 3456 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, |
| @@ -3392,17 +3468,15 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 3392 | static int emulator_write_emulated_onepage(unsigned long addr, | 3468 | static int emulator_write_emulated_onepage(unsigned long addr, |
| 3393 | const void *val, | 3469 | const void *val, |
| 3394 | unsigned int bytes, | 3470 | unsigned int bytes, |
| 3471 | unsigned int *error_code, | ||
| 3395 | struct kvm_vcpu *vcpu) | 3472 | struct kvm_vcpu *vcpu) |
| 3396 | { | 3473 | { |
| 3397 | gpa_t gpa; | 3474 | gpa_t gpa; |
| 3398 | u32 error_code; | ||
| 3399 | 3475 | ||
| 3400 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, &error_code); | 3476 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error_code); |
| 3401 | 3477 | ||
| 3402 | if (gpa == UNMAPPED_GVA) { | 3478 | if (gpa == UNMAPPED_GVA) |
| 3403 | kvm_inject_page_fault(vcpu, addr, error_code); | ||
| 3404 | return X86EMUL_PROPAGATE_FAULT; | 3479 | return X86EMUL_PROPAGATE_FAULT; |
| 3405 | } | ||
| 3406 | 3480 | ||
| 3407 | /* For APIC access vmexit */ | 3481 | /* For APIC access vmexit */ |
| 3408 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | 3482 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
| @@ -3420,10 +3494,11 @@ mmio: | |||
| 3420 | return X86EMUL_CONTINUE; | 3494 | return X86EMUL_CONTINUE; |
| 3421 | 3495 | ||
| 3422 | vcpu->mmio_needed = 1; | 3496 | vcpu->mmio_needed = 1; |
| 3423 | vcpu->mmio_phys_addr = gpa; | 3497 | vcpu->run->exit_reason = KVM_EXIT_MMIO; |
| 3424 | vcpu->mmio_size = bytes; | 3498 | vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; |
| 3425 | vcpu->mmio_is_write = 1; | 3499 | vcpu->run->mmio.len = vcpu->mmio_size = bytes; |
| 3426 | memcpy(vcpu->mmio_data, val, bytes); | 3500 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1; |
| 3501 | memcpy(vcpu->run->mmio.data, val, bytes); | ||
| 3427 | 3502 | ||
| 3428 | return X86EMUL_CONTINUE; | 3503 | return X86EMUL_CONTINUE; |
| 3429 | } | 3504 | } |
| @@ -3431,6 +3506,7 @@ mmio: | |||
| 3431 | int emulator_write_emulated(unsigned long addr, | 3506 | int emulator_write_emulated(unsigned long addr, |
| 3432 | const void *val, | 3507 | const void *val, |
| 3433 | unsigned int bytes, | 3508 | unsigned int bytes, |
| 3509 | unsigned int *error_code, | ||
| 3434 | struct kvm_vcpu *vcpu) | 3510 | struct kvm_vcpu *vcpu) |
| 3435 | { | 3511 | { |
| 3436 | /* Crossing a page boundary? */ | 3512 | /* Crossing a page boundary? */ |
| @@ -3438,16 +3514,17 @@ int emulator_write_emulated(unsigned long addr, | |||
| 3438 | int rc, now; | 3514 | int rc, now; |
| 3439 | 3515 | ||
| 3440 | now = -addr & ~PAGE_MASK; | 3516 | now = -addr & ~PAGE_MASK; |
| 3441 | rc = emulator_write_emulated_onepage(addr, val, now, vcpu); | 3517 | rc = emulator_write_emulated_onepage(addr, val, now, error_code, |
| 3518 | vcpu); | ||
| 3442 | if (rc != X86EMUL_CONTINUE) | 3519 | if (rc != X86EMUL_CONTINUE) |
| 3443 | return rc; | 3520 | return rc; |
| 3444 | addr += now; | 3521 | addr += now; |
| 3445 | val += now; | 3522 | val += now; |
| 3446 | bytes -= now; | 3523 | bytes -= now; |
| 3447 | } | 3524 | } |
| 3448 | return emulator_write_emulated_onepage(addr, val, bytes, vcpu); | 3525 | return emulator_write_emulated_onepage(addr, val, bytes, error_code, |
| 3526 | vcpu); | ||
| 3449 | } | 3527 | } |
| 3450 | EXPORT_SYMBOL_GPL(emulator_write_emulated); | ||
| 3451 | 3528 | ||
| 3452 | #define CMPXCHG_TYPE(t, ptr, old, new) \ | 3529 | #define CMPXCHG_TYPE(t, ptr, old, new) \ |
| 3453 | (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old)) | 3530 | (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old)) |
| @@ -3463,6 +3540,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
| 3463 | const void *old, | 3540 | const void *old, |
| 3464 | const void *new, | 3541 | const void *new, |
| 3465 | unsigned int bytes, | 3542 | unsigned int bytes, |
| 3543 | unsigned int *error_code, | ||
| 3466 | struct kvm_vcpu *vcpu) | 3544 | struct kvm_vcpu *vcpu) |
| 3467 | { | 3545 | { |
| 3468 | gpa_t gpa; | 3546 | gpa_t gpa; |
| @@ -3484,6 +3562,10 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
| 3484 | goto emul_write; | 3562 | goto emul_write; |
| 3485 | 3563 | ||
| 3486 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 3564 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
| 3565 | if (is_error_page(page)) { | ||
| 3566 | kvm_release_page_clean(page); | ||
| 3567 | goto emul_write; | ||
| 3568 | } | ||
| 3487 | 3569 | ||
| 3488 | kaddr = kmap_atomic(page, KM_USER0); | 3570 | kaddr = kmap_atomic(page, KM_USER0); |
| 3489 | kaddr += offset_in_page(gpa); | 3571 | kaddr += offset_in_page(gpa); |
| @@ -3516,7 +3598,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
| 3516 | emul_write: | 3598 | emul_write: |
| 3517 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); | 3599 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); |
| 3518 | 3600 | ||
| 3519 | return emulator_write_emulated(addr, new, bytes, vcpu); | 3601 | return emulator_write_emulated(addr, new, bytes, error_code, vcpu); |
| 3520 | } | 3602 | } |
| 3521 | 3603 | ||
| 3522 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | 3604 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) |
| @@ -3604,42 +3686,38 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) | |||
| 3604 | return X86EMUL_CONTINUE; | 3686 | return X86EMUL_CONTINUE; |
| 3605 | } | 3687 | } |
| 3606 | 3688 | ||
| 3607 | int emulate_clts(struct kvm_vcpu *vcpu) | 3689 | int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) |
| 3608 | { | 3690 | { |
| 3609 | kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); | 3691 | if (!need_emulate_wbinvd(vcpu)) |
| 3610 | kvm_x86_ops->fpu_activate(vcpu); | 3692 | return X86EMUL_CONTINUE; |
| 3693 | |||
| 3694 | if (kvm_x86_ops->has_wbinvd_exit()) { | ||
| 3695 | smp_call_function_many(vcpu->arch.wbinvd_dirty_mask, | ||
| 3696 | wbinvd_ipi, NULL, 1); | ||
| 3697 | cpumask_clear(vcpu->arch.wbinvd_dirty_mask); | ||
| 3698 | } | ||
| 3699 | wbinvd(); | ||
| 3611 | return X86EMUL_CONTINUE; | 3700 | return X86EMUL_CONTINUE; |
| 3612 | } | 3701 | } |
| 3702 | EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); | ||
| 3613 | 3703 | ||
| 3614 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) | 3704 | int emulate_clts(struct kvm_vcpu *vcpu) |
| 3615 | { | 3705 | { |
| 3616 | return kvm_get_dr(ctxt->vcpu, dr, dest); | 3706 | kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); |
| 3707 | kvm_x86_ops->fpu_activate(vcpu); | ||
| 3708 | return X86EMUL_CONTINUE; | ||
| 3617 | } | 3709 | } |
| 3618 | 3710 | ||
| 3619 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) | 3711 | int emulator_get_dr(int dr, unsigned long *dest, struct kvm_vcpu *vcpu) |
| 3620 | { | 3712 | { |
| 3621 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; | 3713 | return _kvm_get_dr(vcpu, dr, dest); |
| 3622 | |||
| 3623 | return kvm_set_dr(ctxt->vcpu, dr, value & mask); | ||
| 3624 | } | 3714 | } |
| 3625 | 3715 | ||
| 3626 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | 3716 | int emulator_set_dr(int dr, unsigned long value, struct kvm_vcpu *vcpu) |
| 3627 | { | 3717 | { |
| 3628 | u8 opcodes[4]; | ||
| 3629 | unsigned long rip = kvm_rip_read(vcpu); | ||
| 3630 | unsigned long rip_linear; | ||
| 3631 | |||
| 3632 | if (!printk_ratelimit()) | ||
| 3633 | return; | ||
| 3634 | 3718 | ||
| 3635 | rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); | 3719 | return __kvm_set_dr(vcpu, dr, value); |
| 3636 | |||
| 3637 | kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu, NULL); | ||
| 3638 | |||
| 3639 | printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", | ||
| 3640 | context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); | ||
| 3641 | } | 3720 | } |
| 3642 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); | ||
| 3643 | 3721 | ||
| 3644 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) | 3722 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) |
| 3645 | { | 3723 | { |
| @@ -3674,27 +3752,32 @@ static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) | |||
| 3674 | return value; | 3752 | return value; |
| 3675 | } | 3753 | } |
| 3676 | 3754 | ||
| 3677 | static void emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) | 3755 | static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) |
| 3678 | { | 3756 | { |
| 3757 | int res = 0; | ||
| 3758 | |||
| 3679 | switch (cr) { | 3759 | switch (cr) { |
| 3680 | case 0: | 3760 | case 0: |
| 3681 | kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); | 3761 | res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); |
| 3682 | break; | 3762 | break; |
| 3683 | case 2: | 3763 | case 2: |
| 3684 | vcpu->arch.cr2 = val; | 3764 | vcpu->arch.cr2 = val; |
| 3685 | break; | 3765 | break; |
| 3686 | case 3: | 3766 | case 3: |
| 3687 | kvm_set_cr3(vcpu, val); | 3767 | res = kvm_set_cr3(vcpu, val); |
| 3688 | break; | 3768 | break; |
| 3689 | case 4: | 3769 | case 4: |
| 3690 | kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); | 3770 | res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); |
| 3691 | break; | 3771 | break; |
| 3692 | case 8: | 3772 | case 8: |
| 3693 | kvm_set_cr8(vcpu, val & 0xfUL); | 3773 | res = __kvm_set_cr8(vcpu, val & 0xfUL); |
| 3694 | break; | 3774 | break; |
| 3695 | default: | 3775 | default: |
| 3696 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | 3776 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); |
| 3777 | res = -1; | ||
| 3697 | } | 3778 | } |
| 3779 | |||
| 3780 | return res; | ||
| 3698 | } | 3781 | } |
| 3699 | 3782 | ||
| 3700 | static int emulator_get_cpl(struct kvm_vcpu *vcpu) | 3783 | static int emulator_get_cpl(struct kvm_vcpu *vcpu) |
| @@ -3707,6 +3790,12 @@ static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) | |||
| 3707 | kvm_x86_ops->get_gdt(vcpu, dt); | 3790 | kvm_x86_ops->get_gdt(vcpu, dt); |
| 3708 | } | 3791 | } |
| 3709 | 3792 | ||
| 3793 | static unsigned long emulator_get_cached_segment_base(int seg, | ||
| 3794 | struct kvm_vcpu *vcpu) | ||
| 3795 | { | ||
| 3796 | return get_segment_base(vcpu, seg); | ||
| 3797 | } | ||
| 3798 | |||
| 3710 | static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, | 3799 | static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, |
| 3711 | struct kvm_vcpu *vcpu) | 3800 | struct kvm_vcpu *vcpu) |
| 3712 | { | 3801 | { |
| @@ -3779,11 +3868,6 @@ static void emulator_set_segment_selector(u16 sel, int seg, | |||
| 3779 | kvm_set_segment(vcpu, &kvm_seg, seg); | 3868 | kvm_set_segment(vcpu, &kvm_seg, seg); |
| 3780 | } | 3869 | } |
| 3781 | 3870 | ||
| 3782 | static void emulator_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | ||
| 3783 | { | ||
| 3784 | kvm_x86_ops->set_rflags(vcpu, rflags); | ||
| 3785 | } | ||
| 3786 | |||
| 3787 | static struct x86_emulate_ops emulate_ops = { | 3871 | static struct x86_emulate_ops emulate_ops = { |
| 3788 | .read_std = kvm_read_guest_virt_system, | 3872 | .read_std = kvm_read_guest_virt_system, |
| 3789 | .write_std = kvm_write_guest_virt_system, | 3873 | .write_std = kvm_write_guest_virt_system, |
| @@ -3797,11 +3881,15 @@ static struct x86_emulate_ops emulate_ops = { | |||
| 3797 | .set_cached_descriptor = emulator_set_cached_descriptor, | 3881 | .set_cached_descriptor = emulator_set_cached_descriptor, |
| 3798 | .get_segment_selector = emulator_get_segment_selector, | 3882 | .get_segment_selector = emulator_get_segment_selector, |
| 3799 | .set_segment_selector = emulator_set_segment_selector, | 3883 | .set_segment_selector = emulator_set_segment_selector, |
| 3884 | .get_cached_segment_base = emulator_get_cached_segment_base, | ||
| 3800 | .get_gdt = emulator_get_gdt, | 3885 | .get_gdt = emulator_get_gdt, |
| 3801 | .get_cr = emulator_get_cr, | 3886 | .get_cr = emulator_get_cr, |
| 3802 | .set_cr = emulator_set_cr, | 3887 | .set_cr = emulator_set_cr, |
| 3803 | .cpl = emulator_get_cpl, | 3888 | .cpl = emulator_get_cpl, |
| 3804 | .set_rflags = emulator_set_rflags, | 3889 | .get_dr = emulator_get_dr, |
| 3890 | .set_dr = emulator_set_dr, | ||
| 3891 | .set_msr = kvm_set_msr, | ||
| 3892 | .get_msr = kvm_get_msr, | ||
| 3805 | }; | 3893 | }; |
| 3806 | 3894 | ||
| 3807 | static void cache_all_regs(struct kvm_vcpu *vcpu) | 3895 | static void cache_all_regs(struct kvm_vcpu *vcpu) |
| @@ -3812,14 +3900,75 @@ static void cache_all_regs(struct kvm_vcpu *vcpu) | |||
| 3812 | vcpu->arch.regs_dirty = ~0; | 3900 | vcpu->arch.regs_dirty = ~0; |
| 3813 | } | 3901 | } |
| 3814 | 3902 | ||
| 3903 | static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) | ||
| 3904 | { | ||
| 3905 | u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask); | ||
| 3906 | /* | ||
| 3907 | * an sti; sti; sequence only disable interrupts for the first | ||
| 3908 | * instruction. So, if the last instruction, be it emulated or | ||
| 3909 | * not, left the system with the INT_STI flag enabled, it | ||
| 3910 | * means that the last instruction is an sti. We should not | ||
| 3911 | * leave the flag on in this case. The same goes for mov ss | ||
| 3912 | */ | ||
| 3913 | if (!(int_shadow & mask)) | ||
| 3914 | kvm_x86_ops->set_interrupt_shadow(vcpu, mask); | ||
| 3915 | } | ||
| 3916 | |||
| 3917 | static void inject_emulated_exception(struct kvm_vcpu *vcpu) | ||
| 3918 | { | ||
| 3919 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; | ||
| 3920 | if (ctxt->exception == PF_VECTOR) | ||
| 3921 | kvm_inject_page_fault(vcpu, ctxt->cr2, ctxt->error_code); | ||
| 3922 | else if (ctxt->error_code_valid) | ||
| 3923 | kvm_queue_exception_e(vcpu, ctxt->exception, ctxt->error_code); | ||
| 3924 | else | ||
| 3925 | kvm_queue_exception(vcpu, ctxt->exception); | ||
| 3926 | } | ||
| 3927 | |||
| 3928 | static int handle_emulation_failure(struct kvm_vcpu *vcpu) | ||
| 3929 | { | ||
| 3930 | ++vcpu->stat.insn_emulation_fail; | ||
| 3931 | trace_kvm_emulate_insn_failed(vcpu); | ||
| 3932 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
| 3933 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
| 3934 | vcpu->run->internal.ndata = 0; | ||
| 3935 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
| 3936 | return EMULATE_FAIL; | ||
| 3937 | } | ||
| 3938 | |||
| 3939 | static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) | ||
| 3940 | { | ||
| 3941 | gpa_t gpa; | ||
| 3942 | |||
| 3943 | if (tdp_enabled) | ||
| 3944 | return false; | ||
| 3945 | |||
| 3946 | /* | ||
| 3947 | * if emulation was due to access to shadowed page table | ||
| 3948 | * and it failed try to unshadow page and re-entetr the | ||
| 3949 | * guest to let CPU execute the instruction. | ||
| 3950 | */ | ||
| 3951 | if (kvm_mmu_unprotect_page_virt(vcpu, gva)) | ||
| 3952 | return true; | ||
| 3953 | |||
| 3954 | gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL); | ||
| 3955 | |||
| 3956 | if (gpa == UNMAPPED_GVA) | ||
| 3957 | return true; /* let cpu generate fault */ | ||
| 3958 | |||
| 3959 | if (!kvm_is_error_hva(gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT))) | ||
| 3960 | return true; | ||
| 3961 | |||
| 3962 | return false; | ||
| 3963 | } | ||
| 3964 | |||
| 3815 | int emulate_instruction(struct kvm_vcpu *vcpu, | 3965 | int emulate_instruction(struct kvm_vcpu *vcpu, |
| 3816 | unsigned long cr2, | 3966 | unsigned long cr2, |
| 3817 | u16 error_code, | 3967 | u16 error_code, |
| 3818 | int emulation_type) | 3968 | int emulation_type) |
| 3819 | { | 3969 | { |
| 3820 | int r, shadow_mask; | 3970 | int r; |
| 3821 | struct decode_cache *c; | 3971 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; |
| 3822 | struct kvm_run *run = vcpu->run; | ||
| 3823 | 3972 | ||
| 3824 | kvm_clear_exception_queue(vcpu); | 3973 | kvm_clear_exception_queue(vcpu); |
| 3825 | vcpu->arch.mmio_fault_cr2 = cr2; | 3974 | vcpu->arch.mmio_fault_cr2 = cr2; |
| @@ -3831,8 +3980,6 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 3831 | */ | 3980 | */ |
| 3832 | cache_all_regs(vcpu); | 3981 | cache_all_regs(vcpu); |
| 3833 | 3982 | ||
| 3834 | vcpu->mmio_is_write = 0; | ||
| 3835 | |||
| 3836 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { | 3983 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { |
| 3837 | int cs_db, cs_l; | 3984 | int cs_db, cs_l; |
| 3838 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 3985 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
| @@ -3846,13 +3993,16 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 3846 | ? X86EMUL_MODE_VM86 : cs_l | 3993 | ? X86EMUL_MODE_VM86 : cs_l |
| 3847 | ? X86EMUL_MODE_PROT64 : cs_db | 3994 | ? X86EMUL_MODE_PROT64 : cs_db |
| 3848 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | 3995 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; |
| 3996 | memset(c, 0, sizeof(struct decode_cache)); | ||
| 3997 | memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); | ||
| 3998 | vcpu->arch.emulate_ctxt.interruptibility = 0; | ||
| 3999 | vcpu->arch.emulate_ctxt.exception = -1; | ||
| 3849 | 4000 | ||
| 3850 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); | 4001 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); |
| 3851 | trace_kvm_emulate_insn_start(vcpu); | 4002 | trace_kvm_emulate_insn_start(vcpu); |
| 3852 | 4003 | ||
| 3853 | /* Only allow emulation of specific instructions on #UD | 4004 | /* Only allow emulation of specific instructions on #UD |
| 3854 | * (namely VMMCALL, sysenter, sysexit, syscall)*/ | 4005 | * (namely VMMCALL, sysenter, sysexit, syscall)*/ |
| 3855 | c = &vcpu->arch.emulate_ctxt.decode; | ||
| 3856 | if (emulation_type & EMULTYPE_TRAP_UD) { | 4006 | if (emulation_type & EMULTYPE_TRAP_UD) { |
| 3857 | if (!c->twobyte) | 4007 | if (!c->twobyte) |
| 3858 | return EMULATE_FAIL; | 4008 | return EMULATE_FAIL; |
| @@ -3880,11 +4030,11 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 3880 | 4030 | ||
| 3881 | ++vcpu->stat.insn_emulation; | 4031 | ++vcpu->stat.insn_emulation; |
| 3882 | if (r) { | 4032 | if (r) { |
| 3883 | ++vcpu->stat.insn_emulation_fail; | 4033 | if (reexecute_instruction(vcpu, cr2)) |
| 3884 | trace_kvm_emulate_insn_failed(vcpu); | ||
| 3885 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) | ||
| 3886 | return EMULATE_DONE; | 4034 | return EMULATE_DONE; |
| 3887 | return EMULATE_FAIL; | 4035 | if (emulation_type & EMULTYPE_SKIP) |
| 4036 | return EMULATE_FAIL; | ||
| 4037 | return handle_emulation_failure(vcpu); | ||
| 3888 | } | 4038 | } |
| 3889 | } | 4039 | } |
| 3890 | 4040 | ||
| @@ -3893,48 +4043,42 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 3893 | return EMULATE_DONE; | 4043 | return EMULATE_DONE; |
| 3894 | } | 4044 | } |
| 3895 | 4045 | ||
| 4046 | /* this is needed for vmware backdor interface to work since it | ||
| 4047 | changes registers values during IO operation */ | ||
| 4048 | memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); | ||
| 4049 | |||
| 3896 | restart: | 4050 | restart: |
| 3897 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); | 4051 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); |
| 3898 | shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; | ||
| 3899 | 4052 | ||
| 3900 | if (r == 0) | 4053 | if (r) { /* emulation failed */ |
| 3901 | kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); | 4054 | if (reexecute_instruction(vcpu, cr2)) |
| 4055 | return EMULATE_DONE; | ||
| 3902 | 4056 | ||
| 3903 | if (vcpu->arch.pio.count) { | 4057 | return handle_emulation_failure(vcpu); |
| 3904 | if (!vcpu->arch.pio.in) | ||
| 3905 | vcpu->arch.pio.count = 0; | ||
| 3906 | return EMULATE_DO_MMIO; | ||
| 3907 | } | 4058 | } |
| 3908 | 4059 | ||
| 3909 | if (r || vcpu->mmio_is_write) { | 4060 | toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility); |
| 3910 | run->exit_reason = KVM_EXIT_MMIO; | 4061 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); |
| 3911 | run->mmio.phys_addr = vcpu->mmio_phys_addr; | 4062 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); |
| 3912 | memcpy(run->mmio.data, vcpu->mmio_data, 8); | 4063 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); |
| 3913 | run->mmio.len = vcpu->mmio_size; | 4064 | |
| 3914 | run->mmio.is_write = vcpu->mmio_is_write; | 4065 | if (vcpu->arch.emulate_ctxt.exception >= 0) { |
| 4066 | inject_emulated_exception(vcpu); | ||
| 4067 | return EMULATE_DONE; | ||
| 3915 | } | 4068 | } |
| 3916 | 4069 | ||
| 3917 | if (r) { | 4070 | if (vcpu->arch.pio.count) { |
| 3918 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) | 4071 | if (!vcpu->arch.pio.in) |
| 3919 | goto done; | 4072 | vcpu->arch.pio.count = 0; |
| 3920 | if (!vcpu->mmio_needed) { | ||
| 3921 | ++vcpu->stat.insn_emulation_fail; | ||
| 3922 | trace_kvm_emulate_insn_failed(vcpu); | ||
| 3923 | kvm_report_emulation_failure(vcpu, "mmio"); | ||
| 3924 | return EMULATE_FAIL; | ||
| 3925 | } | ||
| 3926 | return EMULATE_DO_MMIO; | 4073 | return EMULATE_DO_MMIO; |
| 3927 | } | 4074 | } |
| 3928 | 4075 | ||
| 3929 | if (vcpu->mmio_is_write) { | 4076 | if (vcpu->mmio_needed) { |
| 3930 | vcpu->mmio_needed = 0; | 4077 | if (vcpu->mmio_is_write) |
| 4078 | vcpu->mmio_needed = 0; | ||
| 3931 | return EMULATE_DO_MMIO; | 4079 | return EMULATE_DO_MMIO; |
| 3932 | } | 4080 | } |
| 3933 | 4081 | ||
| 3934 | done: | ||
| 3935 | if (vcpu->arch.exception.pending) | ||
| 3936 | vcpu->arch.emulate_ctxt.restart = false; | ||
| 3937 | |||
| 3938 | if (vcpu->arch.emulate_ctxt.restart) | 4082 | if (vcpu->arch.emulate_ctxt.restart) |
| 3939 | goto restart; | 4083 | goto restart; |
| 3940 | 4084 | ||
| @@ -4108,6 +4252,9 @@ int kvm_arch_init(void *opaque) | |||
| 4108 | 4252 | ||
| 4109 | perf_register_guest_info_callbacks(&kvm_guest_cbs); | 4253 | perf_register_guest_info_callbacks(&kvm_guest_cbs); |
| 4110 | 4254 | ||
| 4255 | if (cpu_has_xsave) | ||
| 4256 | host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
| 4257 | |||
| 4111 | return 0; | 4258 | return 0; |
| 4112 | 4259 | ||
| 4113 | out: | 4260 | out: |
| @@ -4270,7 +4417,7 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | |||
| 4270 | 4417 | ||
| 4271 | kvm_x86_ops->patch_hypercall(vcpu, instruction); | 4418 | kvm_x86_ops->patch_hypercall(vcpu, instruction); |
| 4272 | 4419 | ||
| 4273 | return emulator_write_emulated(rip, instruction, 3, vcpu); | 4420 | return emulator_write_emulated(rip, instruction, 3, NULL, vcpu); |
| 4274 | } | 4421 | } |
| 4275 | 4422 | ||
| 4276 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | 4423 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) |
| @@ -4506,59 +4653,78 @@ static void inject_pending_event(struct kvm_vcpu *vcpu) | |||
| 4506 | } | 4653 | } |
| 4507 | } | 4654 | } |
| 4508 | 4655 | ||
| 4656 | static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu) | ||
| 4657 | { | ||
| 4658 | if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) && | ||
| 4659 | !vcpu->guest_xcr0_loaded) { | ||
| 4660 | /* kvm_set_xcr() also depends on this */ | ||
| 4661 | xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0); | ||
| 4662 | vcpu->guest_xcr0_loaded = 1; | ||
| 4663 | } | ||
| 4664 | } | ||
| 4665 | |||
| 4666 | static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) | ||
| 4667 | { | ||
| 4668 | if (vcpu->guest_xcr0_loaded) { | ||
| 4669 | if (vcpu->arch.xcr0 != host_xcr0) | ||
| 4670 | xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0); | ||
| 4671 | vcpu->guest_xcr0_loaded = 0; | ||
| 4672 | } | ||
| 4673 | } | ||
| 4674 | |||
| 4509 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | 4675 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
| 4510 | { | 4676 | { |
| 4511 | int r; | 4677 | int r; |
| 4512 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && | 4678 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && |
| 4513 | vcpu->run->request_interrupt_window; | 4679 | vcpu->run->request_interrupt_window; |
| 4514 | 4680 | ||
| 4515 | if (vcpu->requests) | ||
| 4516 | if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) | ||
| 4517 | kvm_mmu_unload(vcpu); | ||
| 4518 | |||
| 4519 | r = kvm_mmu_reload(vcpu); | ||
| 4520 | if (unlikely(r)) | ||
| 4521 | goto out; | ||
| 4522 | |||
| 4523 | if (vcpu->requests) { | 4681 | if (vcpu->requests) { |
| 4524 | if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) | 4682 | if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) |
| 4683 | kvm_mmu_unload(vcpu); | ||
| 4684 | if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) | ||
| 4525 | __kvm_migrate_timers(vcpu); | 4685 | __kvm_migrate_timers(vcpu); |
| 4526 | if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests)) | 4686 | if (kvm_check_request(KVM_REQ_KVMCLOCK_UPDATE, vcpu)) |
| 4527 | kvm_write_guest_time(vcpu); | 4687 | kvm_write_guest_time(vcpu); |
| 4528 | if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests)) | 4688 | if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu)) |
| 4529 | kvm_mmu_sync_roots(vcpu); | 4689 | kvm_mmu_sync_roots(vcpu); |
| 4530 | if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) | 4690 | if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) |
| 4531 | kvm_x86_ops->tlb_flush(vcpu); | 4691 | kvm_x86_ops->tlb_flush(vcpu); |
| 4532 | if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, | 4692 | if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) { |
| 4533 | &vcpu->requests)) { | ||
| 4534 | vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; | 4693 | vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; |
| 4535 | r = 0; | 4694 | r = 0; |
| 4536 | goto out; | 4695 | goto out; |
| 4537 | } | 4696 | } |
| 4538 | if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) { | 4697 | if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { |
| 4539 | vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; | 4698 | vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; |
| 4540 | r = 0; | 4699 | r = 0; |
| 4541 | goto out; | 4700 | goto out; |
| 4542 | } | 4701 | } |
| 4543 | if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) { | 4702 | if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) { |
| 4544 | vcpu->fpu_active = 0; | 4703 | vcpu->fpu_active = 0; |
| 4545 | kvm_x86_ops->fpu_deactivate(vcpu); | 4704 | kvm_x86_ops->fpu_deactivate(vcpu); |
| 4546 | } | 4705 | } |
| 4547 | } | 4706 | } |
| 4548 | 4707 | ||
| 4708 | r = kvm_mmu_reload(vcpu); | ||
| 4709 | if (unlikely(r)) | ||
| 4710 | goto out; | ||
| 4711 | |||
| 4549 | preempt_disable(); | 4712 | preempt_disable(); |
| 4550 | 4713 | ||
| 4551 | kvm_x86_ops->prepare_guest_switch(vcpu); | 4714 | kvm_x86_ops->prepare_guest_switch(vcpu); |
| 4552 | if (vcpu->fpu_active) | 4715 | if (vcpu->fpu_active) |
| 4553 | kvm_load_guest_fpu(vcpu); | 4716 | kvm_load_guest_fpu(vcpu); |
| 4717 | kvm_load_guest_xcr0(vcpu); | ||
| 4554 | 4718 | ||
| 4555 | local_irq_disable(); | 4719 | atomic_set(&vcpu->guest_mode, 1); |
| 4720 | smp_wmb(); | ||
| 4556 | 4721 | ||
| 4557 | clear_bit(KVM_REQ_KICK, &vcpu->requests); | 4722 | local_irq_disable(); |
| 4558 | smp_mb__after_clear_bit(); | ||
| 4559 | 4723 | ||
| 4560 | if (vcpu->requests || need_resched() || signal_pending(current)) { | 4724 | if (!atomic_read(&vcpu->guest_mode) || vcpu->requests |
| 4561 | set_bit(KVM_REQ_KICK, &vcpu->requests); | 4725 | || need_resched() || signal_pending(current)) { |
| 4726 | atomic_set(&vcpu->guest_mode, 0); | ||
| 4727 | smp_wmb(); | ||
| 4562 | local_irq_enable(); | 4728 | local_irq_enable(); |
| 4563 | preempt_enable(); | 4729 | preempt_enable(); |
| 4564 | r = 1; | 4730 | r = 1; |
| @@ -4603,7 +4769,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
| 4603 | if (hw_breakpoint_active()) | 4769 | if (hw_breakpoint_active()) |
| 4604 | hw_breakpoint_restore(); | 4770 | hw_breakpoint_restore(); |
| 4605 | 4771 | ||
| 4606 | set_bit(KVM_REQ_KICK, &vcpu->requests); | 4772 | atomic_set(&vcpu->guest_mode, 0); |
| 4773 | smp_wmb(); | ||
| 4607 | local_irq_enable(); | 4774 | local_irq_enable(); |
| 4608 | 4775 | ||
| 4609 | ++vcpu->stat.exits; | 4776 | ++vcpu->stat.exits; |
| @@ -4665,7 +4832,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
| 4665 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); | 4832 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
| 4666 | kvm_vcpu_block(vcpu); | 4833 | kvm_vcpu_block(vcpu); |
| 4667 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); | 4834 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
| 4668 | if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) | 4835 | if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) |
| 4669 | { | 4836 | { |
| 4670 | switch(vcpu->arch.mp_state) { | 4837 | switch(vcpu->arch.mp_state) { |
| 4671 | case KVM_MP_STATE_HALTED: | 4838 | case KVM_MP_STATE_HALTED: |
| @@ -4717,8 +4884,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 4717 | int r; | 4884 | int r; |
| 4718 | sigset_t sigsaved; | 4885 | sigset_t sigsaved; |
| 4719 | 4886 | ||
| 4720 | vcpu_load(vcpu); | ||
| 4721 | |||
| 4722 | if (vcpu->sigset_active) | 4887 | if (vcpu->sigset_active) |
| 4723 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | 4888 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); |
| 4724 | 4889 | ||
| @@ -4743,7 +4908,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 4743 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 4908 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
| 4744 | r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE); | 4909 | r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE); |
| 4745 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 4910 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
| 4746 | if (r == EMULATE_DO_MMIO) { | 4911 | if (r != EMULATE_DONE) { |
| 4747 | r = 0; | 4912 | r = 0; |
| 4748 | goto out; | 4913 | goto out; |
| 4749 | } | 4914 | } |
| @@ -4759,14 +4924,11 @@ out: | |||
| 4759 | if (vcpu->sigset_active) | 4924 | if (vcpu->sigset_active) |
| 4760 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 4925 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
| 4761 | 4926 | ||
| 4762 | vcpu_put(vcpu); | ||
| 4763 | return r; | 4927 | return r; |
| 4764 | } | 4928 | } |
| 4765 | 4929 | ||
| 4766 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | 4930 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) |
| 4767 | { | 4931 | { |
| 4768 | vcpu_load(vcpu); | ||
| 4769 | |||
| 4770 | regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); | 4932 | regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); |
| 4771 | regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); | 4933 | regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); |
| 4772 | regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); | 4934 | regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); |
| @@ -4789,15 +4951,11 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 4789 | regs->rip = kvm_rip_read(vcpu); | 4951 | regs->rip = kvm_rip_read(vcpu); |
| 4790 | regs->rflags = kvm_get_rflags(vcpu); | 4952 | regs->rflags = kvm_get_rflags(vcpu); |
| 4791 | 4953 | ||
| 4792 | vcpu_put(vcpu); | ||
| 4793 | |||
| 4794 | return 0; | 4954 | return 0; |
| 4795 | } | 4955 | } |
| 4796 | 4956 | ||
| 4797 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | 4957 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) |
| 4798 | { | 4958 | { |
| 4799 | vcpu_load(vcpu); | ||
| 4800 | |||
| 4801 | kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); | 4959 | kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); |
| 4802 | kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); | 4960 | kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); |
| 4803 | kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); | 4961 | kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); |
| @@ -4822,8 +4980,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 4822 | 4980 | ||
| 4823 | vcpu->arch.exception.pending = false; | 4981 | vcpu->arch.exception.pending = false; |
| 4824 | 4982 | ||
| 4825 | vcpu_put(vcpu); | ||
| 4826 | |||
| 4827 | return 0; | 4983 | return 0; |
| 4828 | } | 4984 | } |
| 4829 | 4985 | ||
| @@ -4842,8 +4998,6 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
| 4842 | { | 4998 | { |
| 4843 | struct desc_ptr dt; | 4999 | struct desc_ptr dt; |
| 4844 | 5000 | ||
| 4845 | vcpu_load(vcpu); | ||
| 4846 | |||
| 4847 | kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); | 5001 | kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); |
| 4848 | kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); | 5002 | kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); |
| 4849 | kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); | 5003 | kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); |
| @@ -4875,32 +5029,27 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
| 4875 | set_bit(vcpu->arch.interrupt.nr, | 5029 | set_bit(vcpu->arch.interrupt.nr, |
| 4876 | (unsigned long *)sregs->interrupt_bitmap); | 5030 | (unsigned long *)sregs->interrupt_bitmap); |
| 4877 | 5031 | ||
| 4878 | vcpu_put(vcpu); | ||
| 4879 | |||
| 4880 | return 0; | 5032 | return 0; |
| 4881 | } | 5033 | } |
| 4882 | 5034 | ||
| 4883 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | 5035 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, |
| 4884 | struct kvm_mp_state *mp_state) | 5036 | struct kvm_mp_state *mp_state) |
| 4885 | { | 5037 | { |
| 4886 | vcpu_load(vcpu); | ||
| 4887 | mp_state->mp_state = vcpu->arch.mp_state; | 5038 | mp_state->mp_state = vcpu->arch.mp_state; |
| 4888 | vcpu_put(vcpu); | ||
| 4889 | return 0; | 5039 | return 0; |
| 4890 | } | 5040 | } |
| 4891 | 5041 | ||
| 4892 | int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | 5042 | int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, |
| 4893 | struct kvm_mp_state *mp_state) | 5043 | struct kvm_mp_state *mp_state) |
| 4894 | { | 5044 | { |
| 4895 | vcpu_load(vcpu); | ||
| 4896 | vcpu->arch.mp_state = mp_state->mp_state; | 5045 | vcpu->arch.mp_state = mp_state->mp_state; |
| 4897 | vcpu_put(vcpu); | ||
| 4898 | return 0; | 5046 | return 0; |
| 4899 | } | 5047 | } |
| 4900 | 5048 | ||
| 4901 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, | 5049 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, |
| 4902 | bool has_error_code, u32 error_code) | 5050 | bool has_error_code, u32 error_code) |
| 4903 | { | 5051 | { |
| 5052 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; | ||
| 4904 | int cs_db, cs_l, ret; | 5053 | int cs_db, cs_l, ret; |
| 4905 | cache_all_regs(vcpu); | 5054 | cache_all_regs(vcpu); |
| 4906 | 5055 | ||
| @@ -4915,6 +5064,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, | |||
| 4915 | ? X86EMUL_MODE_VM86 : cs_l | 5064 | ? X86EMUL_MODE_VM86 : cs_l |
| 4916 | ? X86EMUL_MODE_PROT64 : cs_db | 5065 | ? X86EMUL_MODE_PROT64 : cs_db |
| 4917 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | 5066 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; |
| 5067 | memset(c, 0, sizeof(struct decode_cache)); | ||
| 5068 | memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); | ||
| 4918 | 5069 | ||
| 4919 | ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops, | 5070 | ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops, |
| 4920 | tss_selector, reason, has_error_code, | 5071 | tss_selector, reason, has_error_code, |
| @@ -4923,6 +5074,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, | |||
| 4923 | if (ret) | 5074 | if (ret) |
| 4924 | return EMULATE_FAIL; | 5075 | return EMULATE_FAIL; |
| 4925 | 5076 | ||
| 5077 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); | ||
| 5078 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); | ||
| 4926 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | 5079 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); |
| 4927 | return EMULATE_DONE; | 5080 | return EMULATE_DONE; |
| 4928 | } | 5081 | } |
| @@ -4935,8 +5088,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 4935 | int pending_vec, max_bits; | 5088 | int pending_vec, max_bits; |
| 4936 | struct desc_ptr dt; | 5089 | struct desc_ptr dt; |
| 4937 | 5090 | ||
| 4938 | vcpu_load(vcpu); | ||
| 4939 | |||
| 4940 | dt.size = sregs->idt.limit; | 5091 | dt.size = sregs->idt.limit; |
| 4941 | dt.address = sregs->idt.base; | 5092 | dt.address = sregs->idt.base; |
| 4942 | kvm_x86_ops->set_idt(vcpu, &dt); | 5093 | kvm_x86_ops->set_idt(vcpu, &dt); |
| @@ -4996,8 +5147,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 4996 | !is_protmode(vcpu)) | 5147 | !is_protmode(vcpu)) |
| 4997 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 5148 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
| 4998 | 5149 | ||
| 4999 | vcpu_put(vcpu); | ||
| 5000 | |||
| 5001 | return 0; | 5150 | return 0; |
| 5002 | } | 5151 | } |
| 5003 | 5152 | ||
| @@ -5007,12 +5156,10 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
| 5007 | unsigned long rflags; | 5156 | unsigned long rflags; |
| 5008 | int i, r; | 5157 | int i, r; |
| 5009 | 5158 | ||
| 5010 | vcpu_load(vcpu); | ||
| 5011 | |||
| 5012 | if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { | 5159 | if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { |
| 5013 | r = -EBUSY; | 5160 | r = -EBUSY; |
| 5014 | if (vcpu->arch.exception.pending) | 5161 | if (vcpu->arch.exception.pending) |
| 5015 | goto unlock_out; | 5162 | goto out; |
| 5016 | if (dbg->control & KVM_GUESTDBG_INJECT_DB) | 5163 | if (dbg->control & KVM_GUESTDBG_INJECT_DB) |
| 5017 | kvm_queue_exception(vcpu, DB_VECTOR); | 5164 | kvm_queue_exception(vcpu, DB_VECTOR); |
| 5018 | else | 5165 | else |
| @@ -5054,34 +5201,12 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
| 5054 | 5201 | ||
| 5055 | r = 0; | 5202 | r = 0; |
| 5056 | 5203 | ||
| 5057 | unlock_out: | 5204 | out: |
| 5058 | vcpu_put(vcpu); | ||
| 5059 | 5205 | ||
| 5060 | return r; | 5206 | return r; |
| 5061 | } | 5207 | } |
| 5062 | 5208 | ||
| 5063 | /* | 5209 | /* |
| 5064 | * fxsave fpu state. Taken from x86_64/processor.h. To be killed when | ||
| 5065 | * we have asm/x86/processor.h | ||
| 5066 | */ | ||
| 5067 | struct fxsave { | ||
| 5068 | u16 cwd; | ||
| 5069 | u16 swd; | ||
| 5070 | u16 twd; | ||
| 5071 | u16 fop; | ||
| 5072 | u64 rip; | ||
| 5073 | u64 rdp; | ||
| 5074 | u32 mxcsr; | ||
| 5075 | u32 mxcsr_mask; | ||
| 5076 | u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ | ||
| 5077 | #ifdef CONFIG_X86_64 | ||
| 5078 | u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ | ||
| 5079 | #else | ||
| 5080 | u32 xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ | ||
| 5081 | #endif | ||
| 5082 | }; | ||
| 5083 | |||
| 5084 | /* | ||
| 5085 | * Translate a guest virtual address to a guest physical address. | 5210 | * Translate a guest virtual address to a guest physical address. |
| 5086 | */ | 5211 | */ |
| 5087 | int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | 5212 | int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, |
| @@ -5091,7 +5216,6 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | |||
| 5091 | gpa_t gpa; | 5216 | gpa_t gpa; |
| 5092 | int idx; | 5217 | int idx; |
| 5093 | 5218 | ||
| 5094 | vcpu_load(vcpu); | ||
| 5095 | idx = srcu_read_lock(&vcpu->kvm->srcu); | 5219 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
| 5096 | gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL); | 5220 | gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL); |
| 5097 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | 5221 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
| @@ -5099,16 +5223,14 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | |||
| 5099 | tr->valid = gpa != UNMAPPED_GVA; | 5223 | tr->valid = gpa != UNMAPPED_GVA; |
| 5100 | tr->writeable = 1; | 5224 | tr->writeable = 1; |
| 5101 | tr->usermode = 0; | 5225 | tr->usermode = 0; |
| 5102 | vcpu_put(vcpu); | ||
| 5103 | 5226 | ||
| 5104 | return 0; | 5227 | return 0; |
| 5105 | } | 5228 | } |
| 5106 | 5229 | ||
| 5107 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | 5230 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) |
| 5108 | { | 5231 | { |
| 5109 | struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image; | 5232 | struct i387_fxsave_struct *fxsave = |
| 5110 | 5233 | &vcpu->arch.guest_fpu.state->fxsave; | |
| 5111 | vcpu_load(vcpu); | ||
| 5112 | 5234 | ||
| 5113 | memcpy(fpu->fpr, fxsave->st_space, 128); | 5235 | memcpy(fpu->fpr, fxsave->st_space, 128); |
| 5114 | fpu->fcw = fxsave->cwd; | 5236 | fpu->fcw = fxsave->cwd; |
| @@ -5119,16 +5241,13 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | |||
| 5119 | fpu->last_dp = fxsave->rdp; | 5241 | fpu->last_dp = fxsave->rdp; |
| 5120 | memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space); | 5242 | memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space); |
| 5121 | 5243 | ||
| 5122 | vcpu_put(vcpu); | ||
| 5123 | |||
| 5124 | return 0; | 5244 | return 0; |
| 5125 | } | 5245 | } |
| 5126 | 5246 | ||
| 5127 | int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | 5247 | int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) |
| 5128 | { | 5248 | { |
| 5129 | struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image; | 5249 | struct i387_fxsave_struct *fxsave = |
| 5130 | 5250 | &vcpu->arch.guest_fpu.state->fxsave; | |
| 5131 | vcpu_load(vcpu); | ||
| 5132 | 5251 | ||
| 5133 | memcpy(fxsave->st_space, fpu->fpr, 128); | 5252 | memcpy(fxsave->st_space, fpu->fpr, 128); |
| 5134 | fxsave->cwd = fpu->fcw; | 5253 | fxsave->cwd = fpu->fcw; |
| @@ -5139,61 +5258,63 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | |||
| 5139 | fxsave->rdp = fpu->last_dp; | 5258 | fxsave->rdp = fpu->last_dp; |
| 5140 | memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space); | 5259 | memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space); |
| 5141 | 5260 | ||
| 5142 | vcpu_put(vcpu); | ||
| 5143 | |||
| 5144 | return 0; | 5261 | return 0; |
| 5145 | } | 5262 | } |
| 5146 | 5263 | ||
| 5147 | void fx_init(struct kvm_vcpu *vcpu) | 5264 | int fx_init(struct kvm_vcpu *vcpu) |
| 5148 | { | 5265 | { |
| 5149 | unsigned after_mxcsr_mask; | 5266 | int err; |
| 5267 | |||
| 5268 | err = fpu_alloc(&vcpu->arch.guest_fpu); | ||
| 5269 | if (err) | ||
| 5270 | return err; | ||
| 5271 | |||
| 5272 | fpu_finit(&vcpu->arch.guest_fpu); | ||
| 5150 | 5273 | ||
| 5151 | /* | 5274 | /* |
| 5152 | * Touch the fpu the first time in non atomic context as if | 5275 | * Ensure guest xcr0 is valid for loading |
| 5153 | * this is the first fpu instruction the exception handler | ||
| 5154 | * will fire before the instruction returns and it'll have to | ||
| 5155 | * allocate ram with GFP_KERNEL. | ||
| 5156 | */ | 5276 | */ |
| 5157 | if (!used_math()) | 5277 | vcpu->arch.xcr0 = XSTATE_FP; |
| 5158 | kvm_fx_save(&vcpu->arch.host_fx_image); | ||
| 5159 | |||
| 5160 | /* Initialize guest FPU by resetting ours and saving into guest's */ | ||
| 5161 | preempt_disable(); | ||
| 5162 | kvm_fx_save(&vcpu->arch.host_fx_image); | ||
| 5163 | kvm_fx_finit(); | ||
| 5164 | kvm_fx_save(&vcpu->arch.guest_fx_image); | ||
| 5165 | kvm_fx_restore(&vcpu->arch.host_fx_image); | ||
| 5166 | preempt_enable(); | ||
| 5167 | 5278 | ||
| 5168 | vcpu->arch.cr0 |= X86_CR0_ET; | 5279 | vcpu->arch.cr0 |= X86_CR0_ET; |
| 5169 | after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space); | 5280 | |
| 5170 | vcpu->arch.guest_fx_image.mxcsr = 0x1f80; | 5281 | return 0; |
| 5171 | memset((void *)&vcpu->arch.guest_fx_image + after_mxcsr_mask, | ||
| 5172 | 0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask); | ||
| 5173 | } | 5282 | } |
| 5174 | EXPORT_SYMBOL_GPL(fx_init); | 5283 | EXPORT_SYMBOL_GPL(fx_init); |
| 5175 | 5284 | ||
| 5285 | static void fx_free(struct kvm_vcpu *vcpu) | ||
| 5286 | { | ||
| 5287 | fpu_free(&vcpu->arch.guest_fpu); | ||
| 5288 | } | ||
| 5289 | |||
| 5176 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) | 5290 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) |
| 5177 | { | 5291 | { |
| 5178 | if (vcpu->guest_fpu_loaded) | 5292 | if (vcpu->guest_fpu_loaded) |
| 5179 | return; | 5293 | return; |
| 5180 | 5294 | ||
| 5295 | /* | ||
| 5296 | * Restore all possible states in the guest, | ||
| 5297 | * and assume host would use all available bits. | ||
| 5298 | * Guest xcr0 would be loaded later. | ||
| 5299 | */ | ||
| 5300 | kvm_put_guest_xcr0(vcpu); | ||
| 5181 | vcpu->guest_fpu_loaded = 1; | 5301 | vcpu->guest_fpu_loaded = 1; |
| 5182 | kvm_fx_save(&vcpu->arch.host_fx_image); | 5302 | unlazy_fpu(current); |
| 5183 | kvm_fx_restore(&vcpu->arch.guest_fx_image); | 5303 | fpu_restore_checking(&vcpu->arch.guest_fpu); |
| 5184 | trace_kvm_fpu(1); | 5304 | trace_kvm_fpu(1); |
| 5185 | } | 5305 | } |
| 5186 | 5306 | ||
| 5187 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) | 5307 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) |
| 5188 | { | 5308 | { |
| 5309 | kvm_put_guest_xcr0(vcpu); | ||
| 5310 | |||
| 5189 | if (!vcpu->guest_fpu_loaded) | 5311 | if (!vcpu->guest_fpu_loaded) |
| 5190 | return; | 5312 | return; |
| 5191 | 5313 | ||
| 5192 | vcpu->guest_fpu_loaded = 0; | 5314 | vcpu->guest_fpu_loaded = 0; |
| 5193 | kvm_fx_save(&vcpu->arch.guest_fx_image); | 5315 | fpu_save_init(&vcpu->arch.guest_fpu); |
| 5194 | kvm_fx_restore(&vcpu->arch.host_fx_image); | ||
| 5195 | ++vcpu->stat.fpu_reload; | 5316 | ++vcpu->stat.fpu_reload; |
| 5196 | set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests); | 5317 | kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); |
| 5197 | trace_kvm_fpu(0); | 5318 | trace_kvm_fpu(0); |
| 5198 | } | 5319 | } |
| 5199 | 5320 | ||
| @@ -5204,6 +5325,8 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | |||
| 5204 | vcpu->arch.time_page = NULL; | 5325 | vcpu->arch.time_page = NULL; |
| 5205 | } | 5326 | } |
| 5206 | 5327 | ||
| 5328 | free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); | ||
| 5329 | fx_free(vcpu); | ||
| 5207 | kvm_x86_ops->vcpu_free(vcpu); | 5330 | kvm_x86_ops->vcpu_free(vcpu); |
| 5208 | } | 5331 | } |
| 5209 | 5332 | ||
| @@ -5217,9 +5340,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
| 5217 | { | 5340 | { |
| 5218 | int r; | 5341 | int r; |
| 5219 | 5342 | ||
| 5220 | /* We do fxsave: this must be aligned. */ | ||
| 5221 | BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF); | ||
| 5222 | |||
| 5223 | vcpu->arch.mtrr_state.have_fixed = 1; | 5343 | vcpu->arch.mtrr_state.have_fixed = 1; |
| 5224 | vcpu_load(vcpu); | 5344 | vcpu_load(vcpu); |
| 5225 | r = kvm_arch_vcpu_reset(vcpu); | 5345 | r = kvm_arch_vcpu_reset(vcpu); |
| @@ -5241,6 +5361,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | |||
| 5241 | kvm_mmu_unload(vcpu); | 5361 | kvm_mmu_unload(vcpu); |
| 5242 | vcpu_put(vcpu); | 5362 | vcpu_put(vcpu); |
| 5243 | 5363 | ||
| 5364 | fx_free(vcpu); | ||
| 5244 | kvm_x86_ops->vcpu_free(vcpu); | 5365 | kvm_x86_ops->vcpu_free(vcpu); |
| 5245 | } | 5366 | } |
| 5246 | 5367 | ||
| @@ -5334,7 +5455,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
| 5334 | } | 5455 | } |
| 5335 | vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; | 5456 | vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; |
| 5336 | 5457 | ||
| 5458 | if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) | ||
| 5459 | goto fail_free_mce_banks; | ||
| 5460 | |||
| 5337 | return 0; | 5461 | return 0; |
| 5462 | fail_free_mce_banks: | ||
| 5463 | kfree(vcpu->arch.mce_banks); | ||
| 5338 | fail_free_lapic: | 5464 | fail_free_lapic: |
| 5339 | kvm_free_lapic(vcpu); | 5465 | kvm_free_lapic(vcpu); |
| 5340 | fail_mmu_destroy: | 5466 | fail_mmu_destroy: |
| @@ -5364,12 +5490,6 @@ struct kvm *kvm_arch_create_vm(void) | |||
| 5364 | if (!kvm) | 5490 | if (!kvm) |
| 5365 | return ERR_PTR(-ENOMEM); | 5491 | return ERR_PTR(-ENOMEM); |
| 5366 | 5492 | ||
| 5367 | kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); | ||
| 5368 | if (!kvm->arch.aliases) { | ||
| 5369 | kfree(kvm); | ||
| 5370 | return ERR_PTR(-ENOMEM); | ||
| 5371 | } | ||
| 5372 | |||
| 5373 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); | 5493 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); |
| 5374 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); | 5494 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); |
| 5375 | 5495 | ||
| @@ -5412,12 +5532,12 @@ static void kvm_free_vcpus(struct kvm *kvm) | |||
| 5412 | void kvm_arch_sync_events(struct kvm *kvm) | 5532 | void kvm_arch_sync_events(struct kvm *kvm) |
| 5413 | { | 5533 | { |
| 5414 | kvm_free_all_assigned_devices(kvm); | 5534 | kvm_free_all_assigned_devices(kvm); |
| 5535 | kvm_free_pit(kvm); | ||
| 5415 | } | 5536 | } |
| 5416 | 5537 | ||
| 5417 | void kvm_arch_destroy_vm(struct kvm *kvm) | 5538 | void kvm_arch_destroy_vm(struct kvm *kvm) |
| 5418 | { | 5539 | { |
| 5419 | kvm_iommu_unmap_guest(kvm); | 5540 | kvm_iommu_unmap_guest(kvm); |
| 5420 | kvm_free_pit(kvm); | ||
| 5421 | kfree(kvm->arch.vpic); | 5541 | kfree(kvm->arch.vpic); |
| 5422 | kfree(kvm->arch.vioapic); | 5542 | kfree(kvm->arch.vioapic); |
| 5423 | kvm_free_vcpus(kvm); | 5543 | kvm_free_vcpus(kvm); |
| @@ -5427,7 +5547,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
| 5427 | if (kvm->arch.ept_identity_pagetable) | 5547 | if (kvm->arch.ept_identity_pagetable) |
| 5428 | put_page(kvm->arch.ept_identity_pagetable); | 5548 | put_page(kvm->arch.ept_identity_pagetable); |
| 5429 | cleanup_srcu_struct(&kvm->srcu); | 5549 | cleanup_srcu_struct(&kvm->srcu); |
| 5430 | kfree(kvm->arch.aliases); | ||
| 5431 | kfree(kvm); | 5550 | kfree(kvm); |
| 5432 | } | 5551 | } |
| 5433 | 5552 | ||
| @@ -5438,6 +5557,11 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
| 5438 | int user_alloc) | 5557 | int user_alloc) |
| 5439 | { | 5558 | { |
| 5440 | int npages = memslot->npages; | 5559 | int npages = memslot->npages; |
| 5560 | int map_flags = MAP_PRIVATE | MAP_ANONYMOUS; | ||
| 5561 | |||
| 5562 | /* Prevent internal slot pages from being moved by fork()/COW. */ | ||
| 5563 | if (memslot->id >= KVM_MEMORY_SLOTS) | ||
| 5564 | map_flags = MAP_SHARED | MAP_ANONYMOUS; | ||
| 5441 | 5565 | ||
| 5442 | /*To keep backward compatibility with older userspace, | 5566 | /*To keep backward compatibility with older userspace, |
| 5443 | *x86 needs to hanlde !user_alloc case. | 5567 | *x86 needs to hanlde !user_alloc case. |
| @@ -5450,7 +5574,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
| 5450 | userspace_addr = do_mmap(NULL, 0, | 5574 | userspace_addr = do_mmap(NULL, 0, |
| 5451 | npages * PAGE_SIZE, | 5575 | npages * PAGE_SIZE, |
| 5452 | PROT_READ | PROT_WRITE, | 5576 | PROT_READ | PROT_WRITE, |
| 5453 | MAP_PRIVATE | MAP_ANONYMOUS, | 5577 | map_flags, |
| 5454 | 0); | 5578 | 0); |
| 5455 | up_write(¤t->mm->mmap_sem); | 5579 | up_write(¤t->mm->mmap_sem); |
| 5456 | 5580 | ||
| @@ -5523,7 +5647,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) | |||
| 5523 | 5647 | ||
| 5524 | me = get_cpu(); | 5648 | me = get_cpu(); |
| 5525 | if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) | 5649 | if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) |
| 5526 | if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests)) | 5650 | if (atomic_xchg(&vcpu->guest_mode, 0)) |
| 5527 | smp_send_reschedule(cpu); | 5651 | smp_send_reschedule(cpu); |
| 5528 | put_cpu(); | 5652 | put_cpu(); |
| 5529 | } | 5653 | } |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index f4b54458285b..b7a404722d2b 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
| @@ -65,13 +65,6 @@ static inline int is_paging(struct kvm_vcpu *vcpu) | |||
| 65 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); | 65 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); |
| 66 | } | 66 | } |
| 67 | 67 | ||
| 68 | static inline struct kvm_mem_aliases *kvm_aliases(struct kvm *kvm) | ||
| 69 | { | ||
| 70 | return rcu_dereference_check(kvm->arch.aliases, | ||
| 71 | srcu_read_lock_held(&kvm->srcu) | ||
| 72 | || lockdep_is_held(&kvm->slots_lock)); | ||
| 73 | } | ||
| 74 | |||
| 75 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); | 68 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); |
| 76 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); | 69 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); |
| 77 | 70 | ||
diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 23ea02253900..636fc381c897 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h | |||
| @@ -524,6 +524,12 @@ struct kvm_enable_cap { | |||
| 524 | #define KVM_CAP_PPC_OSI 52 | 524 | #define KVM_CAP_PPC_OSI 52 |
| 525 | #define KVM_CAP_PPC_UNSET_IRQ 53 | 525 | #define KVM_CAP_PPC_UNSET_IRQ 53 |
| 526 | #define KVM_CAP_ENABLE_CAP 54 | 526 | #define KVM_CAP_ENABLE_CAP 54 |
| 527 | #ifdef __KVM_HAVE_XSAVE | ||
| 528 | #define KVM_CAP_XSAVE 55 | ||
| 529 | #endif | ||
| 530 | #ifdef __KVM_HAVE_XCRS | ||
| 531 | #define KVM_CAP_XCRS 56 | ||
| 532 | #endif | ||
| 527 | 533 | ||
| 528 | #ifdef KVM_CAP_IRQ_ROUTING | 534 | #ifdef KVM_CAP_IRQ_ROUTING |
| 529 | 535 | ||
| @@ -613,6 +619,7 @@ struct kvm_clock_data { | |||
| 613 | */ | 619 | */ |
| 614 | #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) | 620 | #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) |
| 615 | #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) | 621 | #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) |
| 622 | /* KVM_SET_MEMORY_ALIAS is obsolete: */ | ||
| 616 | #define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) | 623 | #define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) |
| 617 | #define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44) | 624 | #define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44) |
| 618 | #define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) | 625 | #define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) |
| @@ -714,6 +721,12 @@ struct kvm_clock_data { | |||
| 714 | #define KVM_GET_DEBUGREGS _IOR(KVMIO, 0xa1, struct kvm_debugregs) | 721 | #define KVM_GET_DEBUGREGS _IOR(KVMIO, 0xa1, struct kvm_debugregs) |
| 715 | #define KVM_SET_DEBUGREGS _IOW(KVMIO, 0xa2, struct kvm_debugregs) | 722 | #define KVM_SET_DEBUGREGS _IOW(KVMIO, 0xa2, struct kvm_debugregs) |
| 716 | #define KVM_ENABLE_CAP _IOW(KVMIO, 0xa3, struct kvm_enable_cap) | 723 | #define KVM_ENABLE_CAP _IOW(KVMIO, 0xa3, struct kvm_enable_cap) |
| 724 | /* Available with KVM_CAP_XSAVE */ | ||
| 725 | #define KVM_GET_XSAVE _IOR(KVMIO, 0xa4, struct kvm_xsave) | ||
| 726 | #define KVM_SET_XSAVE _IOW(KVMIO, 0xa5, struct kvm_xsave) | ||
| 727 | /* Available with KVM_CAP_XCRS */ | ||
| 728 | #define KVM_GET_XCRS _IOR(KVMIO, 0xa6, struct kvm_xcrs) | ||
| 729 | #define KVM_SET_XCRS _IOW(KVMIO, 0xa7, struct kvm_xcrs) | ||
| 717 | 730 | ||
| 718 | #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) | 731 | #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) |
| 719 | 732 | ||
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7cb116afa1cd..c13cc48697aa 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
| @@ -81,13 +81,14 @@ struct kvm_vcpu { | |||
| 81 | int vcpu_id; | 81 | int vcpu_id; |
| 82 | struct mutex mutex; | 82 | struct mutex mutex; |
| 83 | int cpu; | 83 | int cpu; |
| 84 | atomic_t guest_mode; | ||
| 84 | struct kvm_run *run; | 85 | struct kvm_run *run; |
| 85 | unsigned long requests; | 86 | unsigned long requests; |
| 86 | unsigned long guest_debug; | 87 | unsigned long guest_debug; |
| 87 | int srcu_idx; | 88 | int srcu_idx; |
| 88 | 89 | ||
| 89 | int fpu_active; | 90 | int fpu_active; |
| 90 | int guest_fpu_loaded; | 91 | int guest_fpu_loaded, guest_xcr0_loaded; |
| 91 | wait_queue_head_t wq; | 92 | wait_queue_head_t wq; |
| 92 | int sigset_active; | 93 | int sigset_active; |
| 93 | sigset_t sigset; | 94 | sigset_t sigset; |
| @@ -123,6 +124,7 @@ struct kvm_memory_slot { | |||
| 123 | } *lpage_info[KVM_NR_PAGE_SIZES - 1]; | 124 | } *lpage_info[KVM_NR_PAGE_SIZES - 1]; |
| 124 | unsigned long userspace_addr; | 125 | unsigned long userspace_addr; |
| 125 | int user_alloc; | 126 | int user_alloc; |
| 127 | int id; | ||
| 126 | }; | 128 | }; |
| 127 | 129 | ||
| 128 | static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot) | 130 | static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot) |
| @@ -266,6 +268,8 @@ extern pfn_t bad_pfn; | |||
| 266 | 268 | ||
| 267 | int is_error_page(struct page *page); | 269 | int is_error_page(struct page *page); |
| 268 | int is_error_pfn(pfn_t pfn); | 270 | int is_error_pfn(pfn_t pfn); |
| 271 | int is_hwpoison_pfn(pfn_t pfn); | ||
| 272 | int is_fault_pfn(pfn_t pfn); | ||
| 269 | int kvm_is_error_hva(unsigned long addr); | 273 | int kvm_is_error_hva(unsigned long addr); |
| 270 | int kvm_set_memory_region(struct kvm *kvm, | 274 | int kvm_set_memory_region(struct kvm *kvm, |
| 271 | struct kvm_userspace_memory_region *mem, | 275 | struct kvm_userspace_memory_region *mem, |
| @@ -284,8 +288,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
| 284 | int user_alloc); | 288 | int user_alloc); |
| 285 | void kvm_disable_largepages(void); | 289 | void kvm_disable_largepages(void); |
| 286 | void kvm_arch_flush_shadow(struct kvm *kvm); | 290 | void kvm_arch_flush_shadow(struct kvm *kvm); |
| 287 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); | ||
| 288 | gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn); | ||
| 289 | 291 | ||
| 290 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); | 292 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); |
| 291 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); | 293 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); |
| @@ -445,7 +447,8 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, | |||
| 445 | struct kvm_irq_mask_notifier *kimn); | 447 | struct kvm_irq_mask_notifier *kimn); |
| 446 | void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, | 448 | void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, |
| 447 | struct kvm_irq_mask_notifier *kimn); | 449 | struct kvm_irq_mask_notifier *kimn); |
| 448 | void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask); | 450 | void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, |
| 451 | bool mask); | ||
| 449 | 452 | ||
| 450 | #ifdef __KVM_HAVE_IOAPIC | 453 | #ifdef __KVM_HAVE_IOAPIC |
| 451 | void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, | 454 | void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, |
| @@ -562,10 +565,6 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se | |||
| 562 | } | 565 | } |
| 563 | #endif | 566 | #endif |
| 564 | 567 | ||
| 565 | #ifndef KVM_ARCH_HAS_UNALIAS_INSTANTIATION | ||
| 566 | #define unalias_gfn_instantiation unalias_gfn | ||
| 567 | #endif | ||
| 568 | |||
| 569 | #ifdef CONFIG_HAVE_KVM_IRQCHIP | 568 | #ifdef CONFIG_HAVE_KVM_IRQCHIP |
| 570 | 569 | ||
| 571 | #define KVM_MAX_IRQ_ROUTES 1024 | 570 | #define KVM_MAX_IRQ_ROUTES 1024 |
| @@ -628,5 +627,25 @@ static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | |||
| 628 | 627 | ||
| 629 | #endif | 628 | #endif |
| 630 | 629 | ||
| 630 | static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) | ||
| 631 | { | ||
| 632 | set_bit(req, &vcpu->requests); | ||
| 633 | } | ||
| 634 | |||
| 635 | static inline bool kvm_make_check_request(int req, struct kvm_vcpu *vcpu) | ||
| 636 | { | ||
| 637 | return test_and_set_bit(req, &vcpu->requests); | ||
| 638 | } | ||
| 639 | |||
| 640 | static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) | ||
| 641 | { | ||
| 642 | if (test_bit(req, &vcpu->requests)) { | ||
| 643 | clear_bit(req, &vcpu->requests); | ||
| 644 | return true; | ||
| 645 | } else { | ||
| 646 | return false; | ||
| 647 | } | ||
| 648 | } | ||
| 649 | |||
| 631 | #endif | 650 | #endif |
| 632 | 651 | ||
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index fb46efbeabec..7ac0d4eee430 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h | |||
| @@ -32,11 +32,11 @@ | |||
| 32 | 32 | ||
| 33 | typedef unsigned long gva_t; | 33 | typedef unsigned long gva_t; |
| 34 | typedef u64 gpa_t; | 34 | typedef u64 gpa_t; |
| 35 | typedef unsigned long gfn_t; | 35 | typedef u64 gfn_t; |
| 36 | 36 | ||
| 37 | typedef unsigned long hva_t; | 37 | typedef unsigned long hva_t; |
| 38 | typedef u64 hpa_t; | 38 | typedef u64 hpa_t; |
| 39 | typedef unsigned long hfn_t; | 39 | typedef u64 hfn_t; |
| 40 | 40 | ||
| 41 | typedef hfn_t pfn_t; | 41 | typedef hfn_t pfn_t; |
| 42 | 42 | ||
diff --git a/include/linux/mm.h b/include/linux/mm.h index a2b48041b910..7a9ab7db1975 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
| @@ -1465,6 +1465,14 @@ extern int sysctl_memory_failure_recovery; | |||
| 1465 | extern void shake_page(struct page *p, int access); | 1465 | extern void shake_page(struct page *p, int access); |
| 1466 | extern atomic_long_t mce_bad_pages; | 1466 | extern atomic_long_t mce_bad_pages; |
| 1467 | extern int soft_offline_page(struct page *page, int flags); | 1467 | extern int soft_offline_page(struct page *page, int flags); |
| 1468 | #ifdef CONFIG_MEMORY_FAILURE | ||
| 1469 | int is_hwpoison_address(unsigned long addr); | ||
| 1470 | #else | ||
| 1471 | static inline int is_hwpoison_address(unsigned long addr) | ||
| 1472 | { | ||
| 1473 | return 0; | ||
| 1474 | } | ||
| 1475 | #endif | ||
| 1468 | 1476 | ||
| 1469 | extern void dump_page(struct page *page); | 1477 | extern void dump_page(struct page *page); |
| 1470 | 1478 | ||
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 620b0b461593..6b44e52cacaa 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
| @@ -45,6 +45,7 @@ | |||
| 45 | #include <linux/page-isolation.h> | 45 | #include <linux/page-isolation.h> |
| 46 | #include <linux/suspend.h> | 46 | #include <linux/suspend.h> |
| 47 | #include <linux/slab.h> | 47 | #include <linux/slab.h> |
| 48 | #include <linux/swapops.h> | ||
| 48 | #include "internal.h" | 49 | #include "internal.h" |
| 49 | 50 | ||
| 50 | int sysctl_memory_failure_early_kill __read_mostly = 0; | 51 | int sysctl_memory_failure_early_kill __read_mostly = 0; |
| @@ -1296,3 +1297,35 @@ done: | |||
| 1296 | /* keep elevated page count for bad page */ | 1297 | /* keep elevated page count for bad page */ |
| 1297 | return ret; | 1298 | return ret; |
| 1298 | } | 1299 | } |
| 1300 | |||
| 1301 | /* | ||
| 1302 | * The caller must hold current->mm->mmap_sem in read mode. | ||
| 1303 | */ | ||
| 1304 | int is_hwpoison_address(unsigned long addr) | ||
| 1305 | { | ||
| 1306 | pgd_t *pgdp; | ||
| 1307 | pud_t pud, *pudp; | ||
| 1308 | pmd_t pmd, *pmdp; | ||
| 1309 | pte_t pte, *ptep; | ||
| 1310 | swp_entry_t entry; | ||
| 1311 | |||
| 1312 | pgdp = pgd_offset(current->mm, addr); | ||
| 1313 | if (!pgd_present(*pgdp)) | ||
| 1314 | return 0; | ||
| 1315 | pudp = pud_offset(pgdp, addr); | ||
| 1316 | pud = *pudp; | ||
| 1317 | if (!pud_present(pud) || pud_large(pud)) | ||
| 1318 | return 0; | ||
| 1319 | pmdp = pmd_offset(pudp, addr); | ||
| 1320 | pmd = *pmdp; | ||
| 1321 | if (!pmd_present(pmd) || pmd_large(pmd)) | ||
| 1322 | return 0; | ||
| 1323 | ptep = pte_offset_map(pmdp, addr); | ||
| 1324 | pte = *ptep; | ||
| 1325 | pte_unmap(ptep); | ||
| 1326 | if (!is_swap_pte(pte)) | ||
| 1327 | return 0; | ||
| 1328 | entry = pte_to_swp_entry(pte); | ||
| 1329 | return is_hwpoison_entry(entry); | ||
| 1330 | } | ||
| 1331 | EXPORT_SYMBOL_GPL(is_hwpoison_address); | ||
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 4d10b1e047f4..7c98928b09d9 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Kernel-based Virtual Machine - device assignment support | 2 | * Kernel-based Virtual Machine - device assignment support |
| 3 | * | 3 | * |
| 4 | * Copyright (C) 2006-9 Red Hat, Inc | 4 | * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates. |
| 5 | * | 5 | * |
| 6 | * This work is licensed under the terms of the GNU GPL, version 2. See | 6 | * This work is licensed under the terms of the GNU GPL, version 2. See |
| 7 | * the COPYING file in the top-level directory. | 7 | * the COPYING file in the top-level directory. |
| @@ -58,12 +58,10 @@ static int find_index_from_host_irq(struct kvm_assigned_dev_kernel | |||
| 58 | static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) | 58 | static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) |
| 59 | { | 59 | { |
| 60 | struct kvm_assigned_dev_kernel *assigned_dev; | 60 | struct kvm_assigned_dev_kernel *assigned_dev; |
| 61 | struct kvm *kvm; | ||
| 62 | int i; | 61 | int i; |
| 63 | 62 | ||
| 64 | assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, | 63 | assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, |
| 65 | interrupt_work); | 64 | interrupt_work); |
| 66 | kvm = assigned_dev->kvm; | ||
| 67 | 65 | ||
| 68 | spin_lock_irq(&assigned_dev->assigned_dev_lock); | 66 | spin_lock_irq(&assigned_dev->assigned_dev_lock); |
| 69 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | 67 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { |
| @@ -448,9 +446,6 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, | |||
| 448 | struct kvm_assigned_dev_kernel *match; | 446 | struct kvm_assigned_dev_kernel *match; |
| 449 | unsigned long host_irq_type, guest_irq_type; | 447 | unsigned long host_irq_type, guest_irq_type; |
| 450 | 448 | ||
| 451 | if (!capable(CAP_SYS_RAWIO)) | ||
| 452 | return -EPERM; | ||
| 453 | |||
| 454 | if (!irqchip_in_kernel(kvm)) | 449 | if (!irqchip_in_kernel(kvm)) |
| 455 | return r; | 450 | return r; |
| 456 | 451 | ||
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 53850177163f..fc8487564d1f 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | * KVM coalesced MMIO | 2 | * KVM coalesced MMIO |
| 3 | * | 3 | * |
| 4 | * Copyright (c) 2008 Bull S.A.S. | 4 | * Copyright (c) 2008 Bull S.A.S. |
| 5 | * Copyright 2009 Red Hat, Inc. and/or its affiliates. | ||
| 5 | * | 6 | * |
| 6 | * Author: Laurent Vivier <Laurent.Vivier@bull.net> | 7 | * Author: Laurent Vivier <Laurent.Vivier@bull.net> |
| 7 | * | 8 | * |
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index b81f0ebbaaad..66cf65b510b1 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | * kvm eventfd support - use eventfd objects to signal various KVM events | 2 | * kvm eventfd support - use eventfd objects to signal various KVM events |
| 3 | * | 3 | * |
| 4 | * Copyright 2009 Novell. All Rights Reserved. | 4 | * Copyright 2009 Novell. All Rights Reserved. |
| 5 | * Copyright 2010 Red Hat, Inc. and/or its affiliates. | ||
| 5 | * | 6 | * |
| 6 | * Author: | 7 | * Author: |
| 7 | * Gregory Haskins <ghaskins@novell.com> | 8 | * Gregory Haskins <ghaskins@novell.com> |
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 3500dee9cf2b..0b9df8303dcf 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2001 MandrakeSoft S.A. | 2 | * Copyright (C) 2001 MandrakeSoft S.A. |
| 3 | * Copyright 2010 Red Hat, Inc. and/or its affiliates. | ||
| 3 | * | 4 | * |
| 4 | * MandrakeSoft S.A. | 5 | * MandrakeSoft S.A. |
| 5 | * 43, rue d'Aboukir | 6 | * 43, rue d'Aboukir |
| @@ -151,7 +152,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) | |||
| 151 | update_handled_vectors(ioapic); | 152 | update_handled_vectors(ioapic); |
| 152 | mask_after = e->fields.mask; | 153 | mask_after = e->fields.mask; |
| 153 | if (mask_before != mask_after) | 154 | if (mask_before != mask_after) |
| 154 | kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after); | 155 | kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); |
| 155 | if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG | 156 | if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG |
| 156 | && ioapic->irr & (1 << index)) | 157 | && ioapic->irr & (1 << index)) |
| 157 | ioapic_service(ioapic, index); | 158 | ioapic_service(ioapic, index); |
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 96048ee9e39e..62a9caf0563c 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c | |||
| @@ -16,6 +16,8 @@ | |||
| 16 | * | 16 | * |
| 17 | * Copyright (C) 2006-2008 Intel Corporation | 17 | * Copyright (C) 2006-2008 Intel Corporation |
| 18 | * Copyright IBM Corporation, 2008 | 18 | * Copyright IBM Corporation, 2008 |
| 19 | * Copyright 2010 Red Hat, Inc. and/or its affiliates. | ||
| 20 | * | ||
| 19 | * Author: Allen M. Kay <allen.m.kay@intel.com> | 21 | * Author: Allen M. Kay <allen.m.kay@intel.com> |
| 20 | * Author: Weidong Han <weidong.han@intel.com> | 22 | * Author: Weidong Han <weidong.han@intel.com> |
| 21 | * Author: Ben-Ami Yassour <benami@il.ibm.com> | 23 | * Author: Ben-Ami Yassour <benami@il.ibm.com> |
| @@ -106,7 +108,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) | |||
| 106 | get_order(page_size), flags); | 108 | get_order(page_size), flags); |
| 107 | if (r) { | 109 | if (r) { |
| 108 | printk(KERN_ERR "kvm_iommu_map_address:" | 110 | printk(KERN_ERR "kvm_iommu_map_address:" |
| 109 | "iommu failed to map pfn=%lx\n", pfn); | 111 | "iommu failed to map pfn=%llx\n", pfn); |
| 110 | goto unmap_pages; | 112 | goto unmap_pages; |
| 111 | } | 113 | } |
| 112 | 114 | ||
| @@ -124,9 +126,10 @@ unmap_pages: | |||
| 124 | 126 | ||
| 125 | static int kvm_iommu_map_memslots(struct kvm *kvm) | 127 | static int kvm_iommu_map_memslots(struct kvm *kvm) |
| 126 | { | 128 | { |
| 127 | int i, r = 0; | 129 | int i, idx, r = 0; |
| 128 | struct kvm_memslots *slots; | 130 | struct kvm_memslots *slots; |
| 129 | 131 | ||
| 132 | idx = srcu_read_lock(&kvm->srcu); | ||
| 130 | slots = kvm_memslots(kvm); | 133 | slots = kvm_memslots(kvm); |
| 131 | 134 | ||
| 132 | for (i = 0; i < slots->nmemslots; i++) { | 135 | for (i = 0; i < slots->nmemslots; i++) { |
| @@ -134,6 +137,7 @@ static int kvm_iommu_map_memslots(struct kvm *kvm) | |||
| 134 | if (r) | 137 | if (r) |
| 135 | break; | 138 | break; |
| 136 | } | 139 | } |
| 140 | srcu_read_unlock(&kvm->srcu, idx); | ||
| 137 | 141 | ||
| 138 | return r; | 142 | return r; |
| 139 | } | 143 | } |
| @@ -283,15 +287,17 @@ static void kvm_iommu_put_pages(struct kvm *kvm, | |||
| 283 | 287 | ||
| 284 | static int kvm_iommu_unmap_memslots(struct kvm *kvm) | 288 | static int kvm_iommu_unmap_memslots(struct kvm *kvm) |
| 285 | { | 289 | { |
| 286 | int i; | 290 | int i, idx; |
| 287 | struct kvm_memslots *slots; | 291 | struct kvm_memslots *slots; |
| 288 | 292 | ||
| 293 | idx = srcu_read_lock(&kvm->srcu); | ||
| 289 | slots = kvm_memslots(kvm); | 294 | slots = kvm_memslots(kvm); |
| 290 | 295 | ||
| 291 | for (i = 0; i < slots->nmemslots; i++) { | 296 | for (i = 0; i < slots->nmemslots; i++) { |
| 292 | kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, | 297 | kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, |
| 293 | slots->memslots[i].npages); | 298 | slots->memslots[i].npages); |
| 294 | } | 299 | } |
| 300 | srcu_read_unlock(&kvm->srcu, idx); | ||
| 295 | 301 | ||
| 296 | return 0; | 302 | return 0; |
| 297 | } | 303 | } |
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index a0e88809e45e..369e38010ad5 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | * Authors: | 17 | * Authors: |
| 18 | * Yaozu (Eddie) Dong <Eddie.dong@intel.com> | 18 | * Yaozu (Eddie) Dong <Eddie.dong@intel.com> |
| 19 | * | 19 | * |
| 20 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
| 20 | */ | 21 | */ |
| 21 | 22 | ||
| 22 | #include <linux/kvm_host.h> | 23 | #include <linux/kvm_host.h> |
| @@ -99,7 +100,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
| 99 | if (r < 0) | 100 | if (r < 0) |
| 100 | r = 0; | 101 | r = 0; |
| 101 | r += kvm_apic_set_irq(vcpu, irq); | 102 | r += kvm_apic_set_irq(vcpu, irq); |
| 102 | } else { | 103 | } else if (kvm_lapic_enabled(vcpu)) { |
| 103 | if (!lowest) | 104 | if (!lowest) |
| 104 | lowest = vcpu; | 105 | lowest = vcpu; |
| 105 | else if (kvm_apic_compare_prio(vcpu, lowest) < 0) | 106 | else if (kvm_apic_compare_prio(vcpu, lowest) < 0) |
| @@ -278,15 +279,19 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, | |||
| 278 | synchronize_rcu(); | 279 | synchronize_rcu(); |
| 279 | } | 280 | } |
| 280 | 281 | ||
| 281 | void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) | 282 | void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, |
| 283 | bool mask) | ||
| 282 | { | 284 | { |
| 283 | struct kvm_irq_mask_notifier *kimn; | 285 | struct kvm_irq_mask_notifier *kimn; |
| 284 | struct hlist_node *n; | 286 | struct hlist_node *n; |
| 287 | int gsi; | ||
| 285 | 288 | ||
| 286 | rcu_read_lock(); | 289 | rcu_read_lock(); |
| 287 | hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link) | 290 | gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; |
| 288 | if (kimn->irq == irq) | 291 | if (gsi != -1) |
| 289 | kimn->func(kimn, mask); | 292 | hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link) |
| 293 | if (kimn->irq == gsi) | ||
| 294 | kimn->func(kimn, mask); | ||
| 290 | rcu_read_unlock(); | 295 | rcu_read_unlock(); |
| 291 | } | 296 | } |
| 292 | 297 | ||
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f032806a212f..b78b794c1039 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | * machines without emulation or binary translation. | 5 | * machines without emulation or binary translation. |
| 6 | * | 6 | * |
| 7 | * Copyright (C) 2006 Qumranet, Inc. | 7 | * Copyright (C) 2006 Qumranet, Inc. |
| 8 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
| 8 | * | 9 | * |
| 9 | * Authors: | 10 | * Authors: |
| 10 | * Avi Kivity <avi@qumranet.com> | 11 | * Avi Kivity <avi@qumranet.com> |
| @@ -92,6 +93,12 @@ static bool kvm_rebooting; | |||
| 92 | 93 | ||
| 93 | static bool largepages_enabled = true; | 94 | static bool largepages_enabled = true; |
| 94 | 95 | ||
| 96 | static struct page *hwpoison_page; | ||
| 97 | static pfn_t hwpoison_pfn; | ||
| 98 | |||
| 99 | static struct page *fault_page; | ||
| 100 | static pfn_t fault_pfn; | ||
| 101 | |||
| 95 | inline int kvm_is_mmio_pfn(pfn_t pfn) | 102 | inline int kvm_is_mmio_pfn(pfn_t pfn) |
| 96 | { | 103 | { |
| 97 | if (pfn_valid(pfn)) { | 104 | if (pfn_valid(pfn)) { |
| @@ -141,7 +148,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) | |||
| 141 | raw_spin_lock(&kvm->requests_lock); | 148 | raw_spin_lock(&kvm->requests_lock); |
| 142 | me = smp_processor_id(); | 149 | me = smp_processor_id(); |
| 143 | kvm_for_each_vcpu(i, vcpu, kvm) { | 150 | kvm_for_each_vcpu(i, vcpu, kvm) { |
| 144 | if (test_and_set_bit(req, &vcpu->requests)) | 151 | if (kvm_make_check_request(req, vcpu)) |
| 145 | continue; | 152 | continue; |
| 146 | cpu = vcpu->cpu; | 153 | cpu = vcpu->cpu; |
| 147 | if (cpus != NULL && cpu != -1 && cpu != me) | 154 | if (cpus != NULL && cpu != -1 && cpu != me) |
| @@ -566,6 +573,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
| 566 | 573 | ||
| 567 | new = old = *memslot; | 574 | new = old = *memslot; |
| 568 | 575 | ||
| 576 | new.id = mem->slot; | ||
| 569 | new.base_gfn = base_gfn; | 577 | new.base_gfn = base_gfn; |
| 570 | new.npages = npages; | 578 | new.npages = npages; |
| 571 | new.flags = mem->flags; | 579 | new.flags = mem->flags; |
| @@ -596,7 +604,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
| 596 | /* Allocate if a slot is being created */ | 604 | /* Allocate if a slot is being created */ |
| 597 | #ifndef CONFIG_S390 | 605 | #ifndef CONFIG_S390 |
| 598 | if (npages && !new.rmap) { | 606 | if (npages && !new.rmap) { |
| 599 | new.rmap = vmalloc(npages * sizeof(struct page *)); | 607 | new.rmap = vmalloc(npages * sizeof(*new.rmap)); |
| 600 | 608 | ||
| 601 | if (!new.rmap) | 609 | if (!new.rmap) |
| 602 | goto out_free; | 610 | goto out_free; |
| @@ -621,9 +629,9 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
| 621 | if (new.lpage_info[i]) | 629 | if (new.lpage_info[i]) |
| 622 | continue; | 630 | continue; |
| 623 | 631 | ||
| 624 | lpages = 1 + (base_gfn + npages - 1) / | 632 | lpages = 1 + ((base_gfn + npages - 1) |
| 625 | KVM_PAGES_PER_HPAGE(level); | 633 | >> KVM_HPAGE_GFN_SHIFT(level)); |
| 626 | lpages -= base_gfn / KVM_PAGES_PER_HPAGE(level); | 634 | lpages -= base_gfn >> KVM_HPAGE_GFN_SHIFT(level); |
| 627 | 635 | ||
| 628 | new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i])); | 636 | new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i])); |
| 629 | 637 | ||
| @@ -633,9 +641,9 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
| 633 | memset(new.lpage_info[i], 0, | 641 | memset(new.lpage_info[i], 0, |
| 634 | lpages * sizeof(*new.lpage_info[i])); | 642 | lpages * sizeof(*new.lpage_info[i])); |
| 635 | 643 | ||
| 636 | if (base_gfn % KVM_PAGES_PER_HPAGE(level)) | 644 | if (base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) |
| 637 | new.lpage_info[i][0].write_count = 1; | 645 | new.lpage_info[i][0].write_count = 1; |
| 638 | if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE(level)) | 646 | if ((base_gfn+npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) |
| 639 | new.lpage_info[i][lpages - 1].write_count = 1; | 647 | new.lpage_info[i][lpages - 1].write_count = 1; |
| 640 | ugfn = new.userspace_addr >> PAGE_SHIFT; | 648 | ugfn = new.userspace_addr >> PAGE_SHIFT; |
| 641 | /* | 649 | /* |
| @@ -810,16 +818,28 @@ EXPORT_SYMBOL_GPL(kvm_disable_largepages); | |||
| 810 | 818 | ||
| 811 | int is_error_page(struct page *page) | 819 | int is_error_page(struct page *page) |
| 812 | { | 820 | { |
| 813 | return page == bad_page; | 821 | return page == bad_page || page == hwpoison_page || page == fault_page; |
| 814 | } | 822 | } |
| 815 | EXPORT_SYMBOL_GPL(is_error_page); | 823 | EXPORT_SYMBOL_GPL(is_error_page); |
| 816 | 824 | ||
| 817 | int is_error_pfn(pfn_t pfn) | 825 | int is_error_pfn(pfn_t pfn) |
| 818 | { | 826 | { |
| 819 | return pfn == bad_pfn; | 827 | return pfn == bad_pfn || pfn == hwpoison_pfn || pfn == fault_pfn; |
| 820 | } | 828 | } |
| 821 | EXPORT_SYMBOL_GPL(is_error_pfn); | 829 | EXPORT_SYMBOL_GPL(is_error_pfn); |
| 822 | 830 | ||
| 831 | int is_hwpoison_pfn(pfn_t pfn) | ||
| 832 | { | ||
| 833 | return pfn == hwpoison_pfn; | ||
| 834 | } | ||
| 835 | EXPORT_SYMBOL_GPL(is_hwpoison_pfn); | ||
| 836 | |||
| 837 | int is_fault_pfn(pfn_t pfn) | ||
| 838 | { | ||
| 839 | return pfn == fault_pfn; | ||
| 840 | } | ||
| 841 | EXPORT_SYMBOL_GPL(is_fault_pfn); | ||
| 842 | |||
| 823 | static inline unsigned long bad_hva(void) | 843 | static inline unsigned long bad_hva(void) |
| 824 | { | 844 | { |
| 825 | return PAGE_OFFSET; | 845 | return PAGE_OFFSET; |
| @@ -831,7 +851,7 @@ int kvm_is_error_hva(unsigned long addr) | |||
| 831 | } | 851 | } |
| 832 | EXPORT_SYMBOL_GPL(kvm_is_error_hva); | 852 | EXPORT_SYMBOL_GPL(kvm_is_error_hva); |
| 833 | 853 | ||
| 834 | struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) | 854 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) |
| 835 | { | 855 | { |
| 836 | int i; | 856 | int i; |
| 837 | struct kvm_memslots *slots = kvm_memslots(kvm); | 857 | struct kvm_memslots *slots = kvm_memslots(kvm); |
| @@ -845,20 +865,13 @@ struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) | |||
| 845 | } | 865 | } |
| 846 | return NULL; | 866 | return NULL; |
| 847 | } | 867 | } |
| 848 | EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased); | 868 | EXPORT_SYMBOL_GPL(gfn_to_memslot); |
| 849 | |||
| 850 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | ||
| 851 | { | ||
| 852 | gfn = unalias_gfn(kvm, gfn); | ||
| 853 | return gfn_to_memslot_unaliased(kvm, gfn); | ||
| 854 | } | ||
| 855 | 869 | ||
| 856 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) | 870 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) |
| 857 | { | 871 | { |
| 858 | int i; | 872 | int i; |
| 859 | struct kvm_memslots *slots = kvm_memslots(kvm); | 873 | struct kvm_memslots *slots = kvm_memslots(kvm); |
| 860 | 874 | ||
| 861 | gfn = unalias_gfn_instantiation(kvm, gfn); | ||
| 862 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 875 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
| 863 | struct kvm_memory_slot *memslot = &slots->memslots[i]; | 876 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
| 864 | 877 | ||
| @@ -903,7 +916,6 @@ int memslot_id(struct kvm *kvm, gfn_t gfn) | |||
| 903 | struct kvm_memslots *slots = kvm_memslots(kvm); | 916 | struct kvm_memslots *slots = kvm_memslots(kvm); |
| 904 | struct kvm_memory_slot *memslot = NULL; | 917 | struct kvm_memory_slot *memslot = NULL; |
| 905 | 918 | ||
| 906 | gfn = unalias_gfn(kvm, gfn); | ||
| 907 | for (i = 0; i < slots->nmemslots; ++i) { | 919 | for (i = 0; i < slots->nmemslots; ++i) { |
| 908 | memslot = &slots->memslots[i]; | 920 | memslot = &slots->memslots[i]; |
| 909 | 921 | ||
| @@ -924,8 +936,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | |||
| 924 | { | 936 | { |
| 925 | struct kvm_memory_slot *slot; | 937 | struct kvm_memory_slot *slot; |
| 926 | 938 | ||
| 927 | gfn = unalias_gfn_instantiation(kvm, gfn); | 939 | slot = gfn_to_memslot(kvm, gfn); |
| 928 | slot = gfn_to_memslot_unaliased(kvm, gfn); | ||
| 929 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID) | 940 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID) |
| 930 | return bad_hva(); | 941 | return bad_hva(); |
| 931 | return gfn_to_hva_memslot(slot, gfn); | 942 | return gfn_to_hva_memslot(slot, gfn); |
| @@ -946,13 +957,19 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr) | |||
| 946 | struct vm_area_struct *vma; | 957 | struct vm_area_struct *vma; |
| 947 | 958 | ||
| 948 | down_read(¤t->mm->mmap_sem); | 959 | down_read(¤t->mm->mmap_sem); |
| 960 | if (is_hwpoison_address(addr)) { | ||
| 961 | up_read(¤t->mm->mmap_sem); | ||
| 962 | get_page(hwpoison_page); | ||
| 963 | return page_to_pfn(hwpoison_page); | ||
| 964 | } | ||
| 965 | |||
| 949 | vma = find_vma(current->mm, addr); | 966 | vma = find_vma(current->mm, addr); |
| 950 | 967 | ||
| 951 | if (vma == NULL || addr < vma->vm_start || | 968 | if (vma == NULL || addr < vma->vm_start || |
| 952 | !(vma->vm_flags & VM_PFNMAP)) { | 969 | !(vma->vm_flags & VM_PFNMAP)) { |
| 953 | up_read(¤t->mm->mmap_sem); | 970 | up_read(¤t->mm->mmap_sem); |
| 954 | get_page(bad_page); | 971 | get_page(fault_page); |
| 955 | return page_to_pfn(bad_page); | 972 | return page_to_pfn(fault_page); |
| 956 | } | 973 | } |
| 957 | 974 | ||
| 958 | pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | 975 | pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; |
| @@ -1187,8 +1204,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) | |||
| 1187 | { | 1204 | { |
| 1188 | struct kvm_memory_slot *memslot; | 1205 | struct kvm_memory_slot *memslot; |
| 1189 | 1206 | ||
| 1190 | gfn = unalias_gfn(kvm, gfn); | 1207 | memslot = gfn_to_memslot(kvm, gfn); |
| 1191 | memslot = gfn_to_memslot_unaliased(kvm, gfn); | ||
| 1192 | if (memslot && memslot->dirty_bitmap) { | 1208 | if (memslot && memslot->dirty_bitmap) { |
| 1193 | unsigned long rel_gfn = gfn - memslot->base_gfn; | 1209 | unsigned long rel_gfn = gfn - memslot->base_gfn; |
| 1194 | 1210 | ||
| @@ -1207,7 +1223,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) | |||
| 1207 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); | 1223 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); |
| 1208 | 1224 | ||
| 1209 | if (kvm_arch_vcpu_runnable(vcpu)) { | 1225 | if (kvm_arch_vcpu_runnable(vcpu)) { |
| 1210 | set_bit(KVM_REQ_UNHALT, &vcpu->requests); | 1226 | kvm_make_request(KVM_REQ_UNHALT, vcpu); |
| 1211 | break; | 1227 | break; |
| 1212 | } | 1228 | } |
| 1213 | if (kvm_cpu_has_pending_timer(vcpu)) | 1229 | if (kvm_cpu_has_pending_timer(vcpu)) |
| @@ -1378,6 +1394,18 @@ static long kvm_vcpu_ioctl(struct file *filp, | |||
| 1378 | 1394 | ||
| 1379 | if (vcpu->kvm->mm != current->mm) | 1395 | if (vcpu->kvm->mm != current->mm) |
| 1380 | return -EIO; | 1396 | return -EIO; |
| 1397 | |||
| 1398 | #if defined(CONFIG_S390) || defined(CONFIG_PPC) | ||
| 1399 | /* | ||
| 1400 | * Special cases: vcpu ioctls that are asynchronous to vcpu execution, | ||
| 1401 | * so vcpu_load() would break it. | ||
| 1402 | */ | ||
| 1403 | if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_INTERRUPT) | ||
| 1404 | return kvm_arch_vcpu_ioctl(filp, ioctl, arg); | ||
| 1405 | #endif | ||
| 1406 | |||
| 1407 | |||
| 1408 | vcpu_load(vcpu); | ||
| 1381 | switch (ioctl) { | 1409 | switch (ioctl) { |
| 1382 | case KVM_RUN: | 1410 | case KVM_RUN: |
| 1383 | r = -EINVAL; | 1411 | r = -EINVAL; |
| @@ -1520,7 +1548,7 @@ out_free2: | |||
| 1520 | goto out; | 1548 | goto out; |
| 1521 | p = &sigset; | 1549 | p = &sigset; |
| 1522 | } | 1550 | } |
| 1523 | r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); | 1551 | r = kvm_vcpu_ioctl_set_sigmask(vcpu, p); |
| 1524 | break; | 1552 | break; |
| 1525 | } | 1553 | } |
| 1526 | case KVM_GET_FPU: { | 1554 | case KVM_GET_FPU: { |
| @@ -1555,6 +1583,7 @@ out_free2: | |||
| 1555 | r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); | 1583 | r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); |
| 1556 | } | 1584 | } |
| 1557 | out: | 1585 | out: |
| 1586 | vcpu_put(vcpu); | ||
| 1558 | kfree(fpu); | 1587 | kfree(fpu); |
| 1559 | kfree(kvm_sregs); | 1588 | kfree(kvm_sregs); |
| 1560 | return r; | 1589 | return r; |
| @@ -2197,6 +2226,24 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, | |||
| 2197 | 2226 | ||
| 2198 | bad_pfn = page_to_pfn(bad_page); | 2227 | bad_pfn = page_to_pfn(bad_page); |
| 2199 | 2228 | ||
| 2229 | hwpoison_page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
| 2230 | |||
| 2231 | if (hwpoison_page == NULL) { | ||
| 2232 | r = -ENOMEM; | ||
| 2233 | goto out_free_0; | ||
| 2234 | } | ||
| 2235 | |||
| 2236 | hwpoison_pfn = page_to_pfn(hwpoison_page); | ||
| 2237 | |||
| 2238 | fault_page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
| 2239 | |||
| 2240 | if (fault_page == NULL) { | ||
| 2241 | r = -ENOMEM; | ||
| 2242 | goto out_free_0; | ||
| 2243 | } | ||
| 2244 | |||
| 2245 | fault_pfn = page_to_pfn(fault_page); | ||
| 2246 | |||
| 2200 | if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { | 2247 | if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { |
| 2201 | r = -ENOMEM; | 2248 | r = -ENOMEM; |
| 2202 | goto out_free_0; | 2249 | goto out_free_0; |
| @@ -2269,6 +2316,10 @@ out_free_1: | |||
| 2269 | out_free_0a: | 2316 | out_free_0a: |
| 2270 | free_cpumask_var(cpus_hardware_enabled); | 2317 | free_cpumask_var(cpus_hardware_enabled); |
| 2271 | out_free_0: | 2318 | out_free_0: |
| 2319 | if (fault_page) | ||
| 2320 | __free_page(fault_page); | ||
| 2321 | if (hwpoison_page) | ||
| 2322 | __free_page(hwpoison_page); | ||
| 2272 | __free_page(bad_page); | 2323 | __free_page(bad_page); |
| 2273 | out: | 2324 | out: |
| 2274 | kvm_arch_exit(); | 2325 | kvm_arch_exit(); |
| @@ -2290,6 +2341,7 @@ void kvm_exit(void) | |||
| 2290 | kvm_arch_hardware_unsetup(); | 2341 | kvm_arch_hardware_unsetup(); |
| 2291 | kvm_arch_exit(); | 2342 | kvm_arch_exit(); |
| 2292 | free_cpumask_var(cpus_hardware_enabled); | 2343 | free_cpumask_var(cpus_hardware_enabled); |
| 2344 | __free_page(hwpoison_page); | ||
| 2293 | __free_page(bad_page); | 2345 | __free_page(bad_page); |
| 2294 | } | 2346 | } |
| 2295 | EXPORT_SYMBOL_GPL(kvm_exit); | 2347 | EXPORT_SYMBOL_GPL(kvm_exit); |
