diff options
author | Avi Kivity <avi@redhat.com> | 2012-09-20 08:04:41 -0400 |
---|---|---|
committer | Avi Kivity <avi@redhat.com> | 2012-09-20 08:04:41 -0400 |
commit | 1d86b5cc4c6d9a1be1458be3701ac9c915a9706f (patch) | |
tree | 313fee0269d75f0dd5146762dd32c78b8ced4f75 | |
parent | ecba9a52acdf20530d561b7634b80c35c308943a (diff) | |
parent | c5421519f30bd5ed77857a78de6dc8414385e602 (diff) |
Merge branch 'queue' into next
* queue:
KVM: MMU: Eliminate pointless temporary 'ac'
KVM: MMU: Avoid access/dirty update loop if all is well
KVM: MMU: Eliminate eperm temporary
KVM: MMU: Optimize is_last_gpte()
KVM: MMU: Simplify walk_addr_generic() loop
KVM: MMU: Optimize pte permission checks
KVM: MMU: Update accessed and dirty bits after guest pagetable walk
KVM: MMU: Move gpte_access() out of paging_tmpl.h
KVM: MMU: Optimize gpte_access() slightly
KVM: MMU: Push clean gpte write protection out of gpte_access()
KVM: clarify kvmclock documentation
KVM: make processes waiting on vcpu mutex killable
KVM: SVM: Make use of asm.h
KVM: VMX: Make use of asm.h
KVM: VMX: Make lto-friendly
Signed-off-by: Avi Kivity <avi@redhat.com>
-rw-r--r-- | Documentation/virtual/kvm/msr.txt | 32 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 14 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 91 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.h | 25 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 191 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 46 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 86 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 23 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 2 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 10 |
10 files changed, 300 insertions, 220 deletions
diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt index 730471048583..6d470ae7b073 100644 --- a/Documentation/virtual/kvm/msr.txt +++ b/Documentation/virtual/kvm/msr.txt | |||
@@ -34,9 +34,12 @@ MSR_KVM_WALL_CLOCK_NEW: 0x4b564d00 | |||
34 | time information and check that they are both equal and even. | 34 | time information and check that they are both equal and even. |
35 | An odd version indicates an in-progress update. | 35 | An odd version indicates an in-progress update. |
36 | 36 | ||
37 | sec: number of seconds for wallclock. | 37 | sec: number of seconds for wallclock at time of boot. |
38 | 38 | ||
39 | nsec: number of nanoseconds for wallclock. | 39 | nsec: number of nanoseconds for wallclock at time of boot. |
40 | |||
41 | In order to get the current wallclock time, the system_time from | ||
42 | MSR_KVM_SYSTEM_TIME_NEW needs to be added. | ||
40 | 43 | ||
41 | Note that although MSRs are per-CPU entities, the effect of this | 44 | Note that although MSRs are per-CPU entities, the effect of this |
42 | particular MSR is global. | 45 | particular MSR is global. |
@@ -82,20 +85,25 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01 | |||
82 | time at the time this structure was last updated. Unit is | 85 | time at the time this structure was last updated. Unit is |
83 | nanoseconds. | 86 | nanoseconds. |
84 | 87 | ||
85 | tsc_to_system_mul: a function of the tsc frequency. One has | 88 | tsc_to_system_mul: multiplier to be used when converting |
86 | to multiply any tsc-related quantity by this value to get | 89 | tsc-related quantity to nanoseconds |
87 | a value in nanoseconds, besides dividing by 2^tsc_shift | ||
88 | 90 | ||
89 | tsc_shift: cycle to nanosecond divider, as a power of two, to | 91 | tsc_shift: shift to be used when converting tsc-related |
90 | allow for shift rights. One has to shift right any tsc-related | 92 | quantity to nanoseconds. This shift will ensure that |
91 | quantity by this value to get a value in nanoseconds, besides | 93 | multiplication with tsc_to_system_mul does not overflow. |
92 | multiplying by tsc_to_system_mul. | 94 | A positive value denotes a left shift, a negative value |
95 | a right shift. | ||
93 | 96 | ||
94 | With this information, guests can derive per-CPU time by | 97 | The conversion from tsc to nanoseconds involves an additional |
95 | doing: | 98 | right shift by 32 bits. With this information, guests can |
99 | derive per-CPU time by doing: | ||
96 | 100 | ||
97 | time = (current_tsc - tsc_timestamp) | 101 | time = (current_tsc - tsc_timestamp) |
98 | time = (time * tsc_to_system_mul) >> tsc_shift | 102 | if (tsc_shift >= 0) |
103 | time <<= tsc_shift; | ||
104 | else | ||
105 | time >>= -tsc_shift; | ||
106 | time = (time * tsc_to_system_mul) >> 32 | ||
99 | time = time + system_time | 107 | time = time + system_time |
100 | 108 | ||
101 | flags: bits in this field indicate extended capabilities | 109 | flags: bits in this field indicate extended capabilities |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 64adb6117e19..43aeb9422839 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -287,10 +287,24 @@ struct kvm_mmu { | |||
287 | union kvm_mmu_page_role base_role; | 287 | union kvm_mmu_page_role base_role; |
288 | bool direct_map; | 288 | bool direct_map; |
289 | 289 | ||
290 | /* | ||
291 | * Bitmap; bit set = permission fault | ||
292 | * Byte index: page fault error code [4:1] | ||
293 | * Bit index: pte permissions in ACC_* format | ||
294 | */ | ||
295 | u8 permissions[16]; | ||
296 | |||
290 | u64 *pae_root; | 297 | u64 *pae_root; |
291 | u64 *lm_root; | 298 | u64 *lm_root; |
292 | u64 rsvd_bits_mask[2][4]; | 299 | u64 rsvd_bits_mask[2][4]; |
293 | 300 | ||
301 | /* | ||
302 | * Bitmap: bit set = last pte in walk | ||
303 | * index[0:1]: level (zero-based) | ||
304 | * index[2]: pte.ps | ||
305 | */ | ||
306 | u8 last_pte_bitmap; | ||
307 | |||
294 | bool nx; | 308 | bool nx; |
295 | 309 | ||
296 | u64 pdptrs[4]; /* pae */ | 310 | u64 pdptrs[4]; /* pae */ |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index aa0b469ee07d..d289fee1ffb8 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -3408,6 +3408,18 @@ static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level) | |||
3408 | return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0; | 3408 | return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0; |
3409 | } | 3409 | } |
3410 | 3410 | ||
3411 | static inline void protect_clean_gpte(unsigned *access, unsigned gpte) | ||
3412 | { | ||
3413 | unsigned mask; | ||
3414 | |||
3415 | BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK); | ||
3416 | |||
3417 | mask = (unsigned)~ACC_WRITE_MASK; | ||
3418 | /* Allow write access to dirty gptes */ | ||
3419 | mask |= (gpte >> (PT_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) & PT_WRITABLE_MASK; | ||
3420 | *access &= mask; | ||
3421 | } | ||
3422 | |||
3411 | static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access, | 3423 | static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access, |
3412 | int *nr_present) | 3424 | int *nr_present) |
3413 | { | 3425 | { |
@@ -3425,6 +3437,25 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access, | |||
3425 | return false; | 3437 | return false; |
3426 | } | 3438 | } |
3427 | 3439 | ||
3440 | static inline unsigned gpte_access(struct kvm_vcpu *vcpu, u64 gpte) | ||
3441 | { | ||
3442 | unsigned access; | ||
3443 | |||
3444 | access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; | ||
3445 | access &= ~(gpte >> PT64_NX_SHIFT); | ||
3446 | |||
3447 | return access; | ||
3448 | } | ||
3449 | |||
3450 | static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte) | ||
3451 | { | ||
3452 | unsigned index; | ||
3453 | |||
3454 | index = level - 1; | ||
3455 | index |= (gpte & PT_PAGE_SIZE_MASK) >> (PT_PAGE_SIZE_SHIFT - 2); | ||
3456 | return mmu->last_pte_bitmap & (1 << index); | ||
3457 | } | ||
3458 | |||
3428 | #define PTTYPE 64 | 3459 | #define PTTYPE 64 |
3429 | #include "paging_tmpl.h" | 3460 | #include "paging_tmpl.h" |
3430 | #undef PTTYPE | 3461 | #undef PTTYPE |
@@ -3494,6 +3525,56 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, | |||
3494 | } | 3525 | } |
3495 | } | 3526 | } |
3496 | 3527 | ||
3528 | static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) | ||
3529 | { | ||
3530 | unsigned bit, byte, pfec; | ||
3531 | u8 map; | ||
3532 | bool fault, x, w, u, wf, uf, ff, smep; | ||
3533 | |||
3534 | smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); | ||
3535 | for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) { | ||
3536 | pfec = byte << 1; | ||
3537 | map = 0; | ||
3538 | wf = pfec & PFERR_WRITE_MASK; | ||
3539 | uf = pfec & PFERR_USER_MASK; | ||
3540 | ff = pfec & PFERR_FETCH_MASK; | ||
3541 | for (bit = 0; bit < 8; ++bit) { | ||
3542 | x = bit & ACC_EXEC_MASK; | ||
3543 | w = bit & ACC_WRITE_MASK; | ||
3544 | u = bit & ACC_USER_MASK; | ||
3545 | |||
3546 | /* Not really needed: !nx will cause pte.nx to fault */ | ||
3547 | x |= !mmu->nx; | ||
3548 | /* Allow supervisor writes if !cr0.wp */ | ||
3549 | w |= !is_write_protection(vcpu) && !uf; | ||
3550 | /* Disallow supervisor fetches of user code if cr4.smep */ | ||
3551 | x &= !(smep && u && !uf); | ||
3552 | |||
3553 | fault = (ff && !x) || (uf && !u) || (wf && !w); | ||
3554 | map |= fault << bit; | ||
3555 | } | ||
3556 | mmu->permissions[byte] = map; | ||
3557 | } | ||
3558 | } | ||
3559 | |||
3560 | static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) | ||
3561 | { | ||
3562 | u8 map; | ||
3563 | unsigned level, root_level = mmu->root_level; | ||
3564 | const unsigned ps_set_index = 1 << 2; /* bit 2 of index: ps */ | ||
3565 | |||
3566 | if (root_level == PT32E_ROOT_LEVEL) | ||
3567 | --root_level; | ||
3568 | /* PT_PAGE_TABLE_LEVEL always terminates */ | ||
3569 | map = 1 | (1 << ps_set_index); | ||
3570 | for (level = PT_DIRECTORY_LEVEL; level <= root_level; ++level) { | ||
3571 | if (level <= PT_PDPE_LEVEL | ||
3572 | && (mmu->root_level >= PT32E_ROOT_LEVEL || is_pse(vcpu))) | ||
3573 | map |= 1 << (ps_set_index | (level - 1)); | ||
3574 | } | ||
3575 | mmu->last_pte_bitmap = map; | ||
3576 | } | ||
3577 | |||
3497 | static int paging64_init_context_common(struct kvm_vcpu *vcpu, | 3578 | static int paging64_init_context_common(struct kvm_vcpu *vcpu, |
3498 | struct kvm_mmu *context, | 3579 | struct kvm_mmu *context, |
3499 | int level) | 3580 | int level) |
@@ -3502,6 +3583,8 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, | |||
3502 | context->root_level = level; | 3583 | context->root_level = level; |
3503 | 3584 | ||
3504 | reset_rsvds_bits_mask(vcpu, context); | 3585 | reset_rsvds_bits_mask(vcpu, context); |
3586 | update_permission_bitmask(vcpu, context); | ||
3587 | update_last_pte_bitmap(vcpu, context); | ||
3505 | 3588 | ||
3506 | ASSERT(is_pae(vcpu)); | 3589 | ASSERT(is_pae(vcpu)); |
3507 | context->new_cr3 = paging_new_cr3; | 3590 | context->new_cr3 = paging_new_cr3; |
@@ -3530,6 +3613,8 @@ static int paging32_init_context(struct kvm_vcpu *vcpu, | |||
3530 | context->root_level = PT32_ROOT_LEVEL; | 3613 | context->root_level = PT32_ROOT_LEVEL; |
3531 | 3614 | ||
3532 | reset_rsvds_bits_mask(vcpu, context); | 3615 | reset_rsvds_bits_mask(vcpu, context); |
3616 | update_permission_bitmask(vcpu, context); | ||
3617 | update_last_pte_bitmap(vcpu, context); | ||
3533 | 3618 | ||
3534 | context->new_cr3 = paging_new_cr3; | 3619 | context->new_cr3 = paging_new_cr3; |
3535 | context->page_fault = paging32_page_fault; | 3620 | context->page_fault = paging32_page_fault; |
@@ -3590,6 +3675,9 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
3590 | context->gva_to_gpa = paging32_gva_to_gpa; | 3675 | context->gva_to_gpa = paging32_gva_to_gpa; |
3591 | } | 3676 | } |
3592 | 3677 | ||
3678 | update_permission_bitmask(vcpu, context); | ||
3679 | update_last_pte_bitmap(vcpu, context); | ||
3680 | |||
3593 | return 0; | 3681 | return 0; |
3594 | } | 3682 | } |
3595 | 3683 | ||
@@ -3665,6 +3753,9 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) | |||
3665 | g_context->gva_to_gpa = paging32_gva_to_gpa_nested; | 3753 | g_context->gva_to_gpa = paging32_gva_to_gpa_nested; |
3666 | } | 3754 | } |
3667 | 3755 | ||
3756 | update_permission_bitmask(vcpu, g_context); | ||
3757 | update_last_pte_bitmap(vcpu, g_context); | ||
3758 | |||
3668 | return 0; | 3759 | return 0; |
3669 | } | 3760 | } |
3670 | 3761 | ||
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index e374db9af021..69871080e866 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -18,8 +18,10 @@ | |||
18 | #define PT_PCD_MASK (1ULL << 4) | 18 | #define PT_PCD_MASK (1ULL << 4) |
19 | #define PT_ACCESSED_SHIFT 5 | 19 | #define PT_ACCESSED_SHIFT 5 |
20 | #define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT) | 20 | #define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT) |
21 | #define PT_DIRTY_MASK (1ULL << 6) | 21 | #define PT_DIRTY_SHIFT 6 |
22 | #define PT_PAGE_SIZE_MASK (1ULL << 7) | 22 | #define PT_DIRTY_MASK (1ULL << PT_DIRTY_SHIFT) |
23 | #define PT_PAGE_SIZE_SHIFT 7 | ||
24 | #define PT_PAGE_SIZE_MASK (1ULL << PT_PAGE_SIZE_SHIFT) | ||
23 | #define PT_PAT_MASK (1ULL << 7) | 25 | #define PT_PAT_MASK (1ULL << 7) |
24 | #define PT_GLOBAL_MASK (1ULL << 8) | 26 | #define PT_GLOBAL_MASK (1ULL << 8) |
25 | #define PT64_NX_SHIFT 63 | 27 | #define PT64_NX_SHIFT 63 |
@@ -88,17 +90,14 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu) | |||
88 | return kvm_read_cr0_bits(vcpu, X86_CR0_WP); | 90 | return kvm_read_cr0_bits(vcpu, X86_CR0_WP); |
89 | } | 91 | } |
90 | 92 | ||
91 | static inline bool check_write_user_access(struct kvm_vcpu *vcpu, | 93 | /* |
92 | bool write_fault, bool user_fault, | 94 | * Will a fault with a given page-fault error code (pfec) cause a permission |
93 | unsigned long pte) | 95 | * fault with the given access (in ACC_* format)? |
96 | */ | ||
97 | static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access, | ||
98 | unsigned pfec) | ||
94 | { | 99 | { |
95 | if (unlikely(write_fault && !is_writable_pte(pte) | 100 | return (mmu->permissions[pfec >> 1] >> pte_access) & 1; |
96 | && (user_fault || is_write_protection(vcpu)))) | ||
97 | return false; | ||
98 | |||
99 | if (unlikely(user_fault && !(pte & PT_USER_MASK))) | ||
100 | return false; | ||
101 | |||
102 | return true; | ||
103 | } | 101 | } |
102 | |||
104 | #endif | 103 | #endif |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index bf8c42bf50fe..714e2c01a6fe 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -63,10 +63,12 @@ | |||
63 | */ | 63 | */ |
64 | struct guest_walker { | 64 | struct guest_walker { |
65 | int level; | 65 | int level; |
66 | unsigned max_level; | ||
66 | gfn_t table_gfn[PT_MAX_FULL_LEVELS]; | 67 | gfn_t table_gfn[PT_MAX_FULL_LEVELS]; |
67 | pt_element_t ptes[PT_MAX_FULL_LEVELS]; | 68 | pt_element_t ptes[PT_MAX_FULL_LEVELS]; |
68 | pt_element_t prefetch_ptes[PTE_PREFETCH_NUM]; | 69 | pt_element_t prefetch_ptes[PTE_PREFETCH_NUM]; |
69 | gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; | 70 | gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; |
71 | pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS]; | ||
70 | unsigned pt_access; | 72 | unsigned pt_access; |
71 | unsigned pte_access; | 73 | unsigned pte_access; |
72 | gfn_t gfn; | 74 | gfn_t gfn; |
@@ -101,38 +103,41 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | |||
101 | return (ret != orig_pte); | 103 | return (ret != orig_pte); |
102 | } | 104 | } |
103 | 105 | ||
104 | static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte, | 106 | static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, |
105 | bool last) | 107 | struct kvm_mmu *mmu, |
108 | struct guest_walker *walker, | ||
109 | int write_fault) | ||
106 | { | 110 | { |
107 | unsigned access; | 111 | unsigned level, index; |
108 | 112 | pt_element_t pte, orig_pte; | |
109 | access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; | 113 | pt_element_t __user *ptep_user; |
110 | if (last && !is_dirty_gpte(gpte)) | 114 | gfn_t table_gfn; |
111 | access &= ~ACC_WRITE_MASK; | 115 | int ret; |
112 | 116 | ||
113 | #if PTTYPE == 64 | 117 | for (level = walker->max_level; level >= walker->level; --level) { |
114 | if (vcpu->arch.mmu.nx) | 118 | pte = orig_pte = walker->ptes[level - 1]; |
115 | access &= ~(gpte >> PT64_NX_SHIFT); | 119 | table_gfn = walker->table_gfn[level - 1]; |
116 | #endif | 120 | ptep_user = walker->ptep_user[level - 1]; |
117 | return access; | 121 | index = offset_in_page(ptep_user) / sizeof(pt_element_t); |
118 | } | 122 | if (!(pte & PT_ACCESSED_MASK)) { |
119 | 123 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte)); | |
120 | static bool FNAME(is_last_gpte)(struct guest_walker *walker, | 124 | pte |= PT_ACCESSED_MASK; |
121 | struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | 125 | } |
122 | pt_element_t gpte) | 126 | if (level == walker->level && write_fault && !is_dirty_gpte(pte)) { |
123 | { | 127 | trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); |
124 | if (walker->level == PT_PAGE_TABLE_LEVEL) | 128 | pte |= PT_DIRTY_MASK; |
125 | return true; | 129 | } |
126 | 130 | if (pte == orig_pte) | |
127 | if ((walker->level == PT_DIRECTORY_LEVEL) && is_large_pte(gpte) && | 131 | continue; |
128 | (PTTYPE == 64 || is_pse(vcpu))) | ||
129 | return true; | ||
130 | 132 | ||
131 | if ((walker->level == PT_PDPE_LEVEL) && is_large_pte(gpte) && | 133 | ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte); |
132 | (mmu->root_level == PT64_ROOT_LEVEL)) | 134 | if (ret) |
133 | return true; | 135 | return ret; |
134 | 136 | ||
135 | return false; | 137 | mark_page_dirty(vcpu->kvm, table_gfn); |
138 | walker->ptes[level] = pte; | ||
139 | } | ||
140 | return 0; | ||
136 | } | 141 | } |
137 | 142 | ||
138 | /* | 143 | /* |
@@ -142,21 +147,22 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, | |||
142 | struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | 147 | struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, |
143 | gva_t addr, u32 access) | 148 | gva_t addr, u32 access) |
144 | { | 149 | { |
150 | int ret; | ||
145 | pt_element_t pte; | 151 | pt_element_t pte; |
146 | pt_element_t __user *uninitialized_var(ptep_user); | 152 | pt_element_t __user *uninitialized_var(ptep_user); |
147 | gfn_t table_gfn; | 153 | gfn_t table_gfn; |
148 | unsigned index, pt_access, uninitialized_var(pte_access); | 154 | unsigned index, pt_access, pte_access, accessed_dirty, shift; |
149 | gpa_t pte_gpa; | 155 | gpa_t pte_gpa; |
150 | bool eperm, last_gpte; | ||
151 | int offset; | 156 | int offset; |
152 | const int write_fault = access & PFERR_WRITE_MASK; | 157 | const int write_fault = access & PFERR_WRITE_MASK; |
153 | const int user_fault = access & PFERR_USER_MASK; | 158 | const int user_fault = access & PFERR_USER_MASK; |
154 | const int fetch_fault = access & PFERR_FETCH_MASK; | 159 | const int fetch_fault = access & PFERR_FETCH_MASK; |
155 | u16 errcode = 0; | 160 | u16 errcode = 0; |
161 | gpa_t real_gpa; | ||
162 | gfn_t gfn; | ||
156 | 163 | ||
157 | trace_kvm_mmu_pagetable_walk(addr, access); | 164 | trace_kvm_mmu_pagetable_walk(addr, access); |
158 | retry_walk: | 165 | retry_walk: |
159 | eperm = false; | ||
160 | walker->level = mmu->root_level; | 166 | walker->level = mmu->root_level; |
161 | pte = mmu->get_cr3(vcpu); | 167 | pte = mmu->get_cr3(vcpu); |
162 | 168 | ||
@@ -169,15 +175,21 @@ retry_walk: | |||
169 | --walker->level; | 175 | --walker->level; |
170 | } | 176 | } |
171 | #endif | 177 | #endif |
178 | walker->max_level = walker->level; | ||
172 | ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || | 179 | ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || |
173 | (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); | 180 | (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); |
174 | 181 | ||
175 | pt_access = ACC_ALL; | 182 | accessed_dirty = PT_ACCESSED_MASK; |
183 | pt_access = pte_access = ACC_ALL; | ||
184 | ++walker->level; | ||
176 | 185 | ||
177 | for (;;) { | 186 | do { |
178 | gfn_t real_gfn; | 187 | gfn_t real_gfn; |
179 | unsigned long host_addr; | 188 | unsigned long host_addr; |
180 | 189 | ||
190 | pt_access &= pte_access; | ||
191 | --walker->level; | ||
192 | |||
181 | index = PT_INDEX(addr, walker->level); | 193 | index = PT_INDEX(addr, walker->level); |
182 | 194 | ||
183 | table_gfn = gpte_to_gfn(pte); | 195 | table_gfn = gpte_to_gfn(pte); |
@@ -199,6 +211,7 @@ retry_walk: | |||
199 | ptep_user = (pt_element_t __user *)((void *)host_addr + offset); | 211 | ptep_user = (pt_element_t __user *)((void *)host_addr + offset); |
200 | if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) | 212 | if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) |
201 | goto error; | 213 | goto error; |
214 | walker->ptep_user[walker->level - 1] = ptep_user; | ||
202 | 215 | ||
203 | trace_kvm_mmu_paging_element(pte, walker->level); | 216 | trace_kvm_mmu_paging_element(pte, walker->level); |
204 | 217 | ||
@@ -211,92 +224,48 @@ retry_walk: | |||
211 | goto error; | 224 | goto error; |
212 | } | 225 | } |
213 | 226 | ||
214 | if (!check_write_user_access(vcpu, write_fault, user_fault, | 227 | accessed_dirty &= pte; |
215 | pte)) | 228 | pte_access = pt_access & gpte_access(vcpu, pte); |
216 | eperm = true; | ||
217 | |||
218 | #if PTTYPE == 64 | ||
219 | if (unlikely(fetch_fault && (pte & PT64_NX_MASK))) | ||
220 | eperm = true; | ||
221 | #endif | ||
222 | |||
223 | last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte); | ||
224 | if (last_gpte) { | ||
225 | pte_access = pt_access & | ||
226 | FNAME(gpte_access)(vcpu, pte, true); | ||
227 | /* check if the kernel is fetching from user page */ | ||
228 | if (unlikely(pte_access & PT_USER_MASK) && | ||
229 | kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)) | ||
230 | if (fetch_fault && !user_fault) | ||
231 | eperm = true; | ||
232 | } | ||
233 | |||
234 | if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) { | ||
235 | int ret; | ||
236 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, | ||
237 | sizeof(pte)); | ||
238 | ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, | ||
239 | pte, pte|PT_ACCESSED_MASK); | ||
240 | if (unlikely(ret < 0)) | ||
241 | goto error; | ||
242 | else if (ret) | ||
243 | goto retry_walk; | ||
244 | |||
245 | mark_page_dirty(vcpu->kvm, table_gfn); | ||
246 | pte |= PT_ACCESSED_MASK; | ||
247 | } | ||
248 | 229 | ||
249 | walker->ptes[walker->level - 1] = pte; | 230 | walker->ptes[walker->level - 1] = pte; |
231 | } while (!is_last_gpte(mmu, walker->level, pte)); | ||
250 | 232 | ||
251 | if (last_gpte) { | 233 | if (unlikely(permission_fault(mmu, pte_access, access))) { |
252 | int lvl = walker->level; | 234 | errcode |= PFERR_PRESENT_MASK; |
253 | gpa_t real_gpa; | 235 | goto error; |
254 | gfn_t gfn; | 236 | } |
255 | u32 ac; | ||
256 | |||
257 | gfn = gpte_to_gfn_lvl(pte, lvl); | ||
258 | gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT; | ||
259 | |||
260 | if (PTTYPE == 32 && | ||
261 | walker->level == PT_DIRECTORY_LEVEL && | ||
262 | is_cpuid_PSE36()) | ||
263 | gfn += pse36_gfn_delta(pte); | ||
264 | |||
265 | ac = write_fault | fetch_fault | user_fault; | ||
266 | 237 | ||
267 | real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), | 238 | gfn = gpte_to_gfn_lvl(pte, walker->level); |
268 | ac); | 239 | gfn += (addr & PT_LVL_OFFSET_MASK(walker->level)) >> PAGE_SHIFT; |
269 | if (real_gpa == UNMAPPED_GVA) | ||
270 | return 0; | ||
271 | 240 | ||
272 | walker->gfn = real_gpa >> PAGE_SHIFT; | 241 | if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36()) |
242 | gfn += pse36_gfn_delta(pte); | ||
273 | 243 | ||
274 | break; | 244 | real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access); |
275 | } | 245 | if (real_gpa == UNMAPPED_GVA) |
246 | return 0; | ||
276 | 247 | ||
277 | pt_access &= FNAME(gpte_access)(vcpu, pte, false); | 248 | walker->gfn = real_gpa >> PAGE_SHIFT; |
278 | --walker->level; | ||
279 | } | ||
280 | 249 | ||
281 | if (unlikely(eperm)) { | 250 | if (!write_fault) |
282 | errcode |= PFERR_PRESENT_MASK; | 251 | protect_clean_gpte(&pte_access, pte); |
283 | goto error; | ||
284 | } | ||
285 | 252 | ||
286 | if (write_fault && unlikely(!is_dirty_gpte(pte))) { | 253 | /* |
287 | int ret; | 254 | * On a write fault, fold the dirty bit into accessed_dirty by shifting it one |
255 | * place right. | ||
256 | * | ||
257 | * On a read fault, do nothing. | ||
258 | */ | ||
259 | shift = write_fault >> ilog2(PFERR_WRITE_MASK); | ||
260 | shift *= PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT; | ||
261 | accessed_dirty &= pte >> shift; | ||
288 | 262 | ||
289 | trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); | 263 | if (unlikely(!accessed_dirty)) { |
290 | ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, | 264 | ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); |
291 | pte, pte|PT_DIRTY_MASK); | ||
292 | if (unlikely(ret < 0)) | 265 | if (unlikely(ret < 0)) |
293 | goto error; | 266 | goto error; |
294 | else if (ret) | 267 | else if (ret) |
295 | goto retry_walk; | 268 | goto retry_walk; |
296 | |||
297 | mark_page_dirty(vcpu->kvm, table_gfn); | ||
298 | pte |= PT_DIRTY_MASK; | ||
299 | walker->ptes[walker->level - 1] = pte; | ||
300 | } | 269 | } |
301 | 270 | ||
302 | walker->pt_access = pt_access; | 271 | walker->pt_access = pt_access; |
@@ -368,7 +337,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
368 | return; | 337 | return; |
369 | 338 | ||
370 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); | 339 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); |
371 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, true); | 340 | pte_access = sp->role.access & gpte_access(vcpu, gpte); |
341 | protect_clean_gpte(&pte_access, gpte); | ||
372 | pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte)); | 342 | pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte)); |
373 | if (mmu_invalid_pfn(pfn)) | 343 | if (mmu_invalid_pfn(pfn)) |
374 | return; | 344 | return; |
@@ -441,8 +411,8 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, | |||
441 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) | 411 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) |
442 | continue; | 412 | continue; |
443 | 413 | ||
444 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, | 414 | pte_access = sp->role.access & gpte_access(vcpu, gpte); |
445 | true); | 415 | protect_clean_gpte(&pte_access, gpte); |
446 | gfn = gpte_to_gfn(gpte); | 416 | gfn = gpte_to_gfn(gpte); |
447 | pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, | 417 | pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, |
448 | pte_access & ACC_WRITE_MASK); | 418 | pte_access & ACC_WRITE_MASK); |
@@ -794,7 +764,8 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
794 | 764 | ||
795 | gfn = gpte_to_gfn(gpte); | 765 | gfn = gpte_to_gfn(gpte); |
796 | pte_access = sp->role.access; | 766 | pte_access = sp->role.access; |
797 | pte_access &= FNAME(gpte_access)(vcpu, gpte, true); | 767 | pte_access &= gpte_access(vcpu, gpte); |
768 | protect_clean_gpte(&pte_access, gpte); | ||
798 | 769 | ||
799 | if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present)) | 770 | if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present)) |
800 | continue; | 771 | continue; |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 611c72875fb9..818fceb3091e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -3782,12 +3782,6 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu) | |||
3782 | svm_complete_interrupts(svm); | 3782 | svm_complete_interrupts(svm); |
3783 | } | 3783 | } |
3784 | 3784 | ||
3785 | #ifdef CONFIG_X86_64 | ||
3786 | #define R "r" | ||
3787 | #else | ||
3788 | #define R "e" | ||
3789 | #endif | ||
3790 | |||
3791 | static void svm_vcpu_run(struct kvm_vcpu *vcpu) | 3785 | static void svm_vcpu_run(struct kvm_vcpu *vcpu) |
3792 | { | 3786 | { |
3793 | struct vcpu_svm *svm = to_svm(vcpu); | 3787 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -3814,13 +3808,13 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3814 | local_irq_enable(); | 3808 | local_irq_enable(); |
3815 | 3809 | ||
3816 | asm volatile ( | 3810 | asm volatile ( |
3817 | "push %%"R"bp; \n\t" | 3811 | "push %%" _ASM_BP "; \n\t" |
3818 | "mov %c[rbx](%[svm]), %%"R"bx \n\t" | 3812 | "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" |
3819 | "mov %c[rcx](%[svm]), %%"R"cx \n\t" | 3813 | "mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t" |
3820 | "mov %c[rdx](%[svm]), %%"R"dx \n\t" | 3814 | "mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t" |
3821 | "mov %c[rsi](%[svm]), %%"R"si \n\t" | 3815 | "mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t" |
3822 | "mov %c[rdi](%[svm]), %%"R"di \n\t" | 3816 | "mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t" |
3823 | "mov %c[rbp](%[svm]), %%"R"bp \n\t" | 3817 | "mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t" |
3824 | #ifdef CONFIG_X86_64 | 3818 | #ifdef CONFIG_X86_64 |
3825 | "mov %c[r8](%[svm]), %%r8 \n\t" | 3819 | "mov %c[r8](%[svm]), %%r8 \n\t" |
3826 | "mov %c[r9](%[svm]), %%r9 \n\t" | 3820 | "mov %c[r9](%[svm]), %%r9 \n\t" |
@@ -3833,20 +3827,20 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3833 | #endif | 3827 | #endif |
3834 | 3828 | ||
3835 | /* Enter guest mode */ | 3829 | /* Enter guest mode */ |
3836 | "push %%"R"ax \n\t" | 3830 | "push %%" _ASM_AX " \n\t" |
3837 | "mov %c[vmcb](%[svm]), %%"R"ax \n\t" | 3831 | "mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t" |
3838 | __ex(SVM_VMLOAD) "\n\t" | 3832 | __ex(SVM_VMLOAD) "\n\t" |
3839 | __ex(SVM_VMRUN) "\n\t" | 3833 | __ex(SVM_VMRUN) "\n\t" |
3840 | __ex(SVM_VMSAVE) "\n\t" | 3834 | __ex(SVM_VMSAVE) "\n\t" |
3841 | "pop %%"R"ax \n\t" | 3835 | "pop %%" _ASM_AX " \n\t" |
3842 | 3836 | ||
3843 | /* Save guest registers, load host registers */ | 3837 | /* Save guest registers, load host registers */ |
3844 | "mov %%"R"bx, %c[rbx](%[svm]) \n\t" | 3838 | "mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t" |
3845 | "mov %%"R"cx, %c[rcx](%[svm]) \n\t" | 3839 | "mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t" |
3846 | "mov %%"R"dx, %c[rdx](%[svm]) \n\t" | 3840 | "mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t" |
3847 | "mov %%"R"si, %c[rsi](%[svm]) \n\t" | 3841 | "mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t" |
3848 | "mov %%"R"di, %c[rdi](%[svm]) \n\t" | 3842 | "mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t" |
3849 | "mov %%"R"bp, %c[rbp](%[svm]) \n\t" | 3843 | "mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t" |
3850 | #ifdef CONFIG_X86_64 | 3844 | #ifdef CONFIG_X86_64 |
3851 | "mov %%r8, %c[r8](%[svm]) \n\t" | 3845 | "mov %%r8, %c[r8](%[svm]) \n\t" |
3852 | "mov %%r9, %c[r9](%[svm]) \n\t" | 3846 | "mov %%r9, %c[r9](%[svm]) \n\t" |
@@ -3857,7 +3851,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3857 | "mov %%r14, %c[r14](%[svm]) \n\t" | 3851 | "mov %%r14, %c[r14](%[svm]) \n\t" |
3858 | "mov %%r15, %c[r15](%[svm]) \n\t" | 3852 | "mov %%r15, %c[r15](%[svm]) \n\t" |
3859 | #endif | 3853 | #endif |
3860 | "pop %%"R"bp" | 3854 | "pop %%" _ASM_BP |
3861 | : | 3855 | : |
3862 | : [svm]"a"(svm), | 3856 | : [svm]"a"(svm), |
3863 | [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), | 3857 | [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), |
@@ -3878,9 +3872,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3878 | [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15])) | 3872 | [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15])) |
3879 | #endif | 3873 | #endif |
3880 | : "cc", "memory" | 3874 | : "cc", "memory" |
3881 | , R"bx", R"cx", R"dx", R"si", R"di" | ||
3882 | #ifdef CONFIG_X86_64 | 3875 | #ifdef CONFIG_X86_64 |
3876 | , "rbx", "rcx", "rdx", "rsi", "rdi" | ||
3883 | , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15" | 3877 | , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15" |
3878 | #else | ||
3879 | , "ebx", "ecx", "edx", "esi", "edi" | ||
3884 | #endif | 3880 | #endif |
3885 | ); | 3881 | ); |
3886 | 3882 | ||
@@ -3940,8 +3936,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3940 | mark_all_clean(svm->vmcb); | 3936 | mark_all_clean(svm->vmcb); |
3941 | } | 3937 | } |
3942 | 3938 | ||
3943 | #undef R | ||
3944 | |||
3945 | static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) | 3939 | static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) |
3946 | { | 3940 | { |
3947 | struct vcpu_svm *svm = to_svm(vcpu); | 3941 | struct vcpu_svm *svm = to_svm(vcpu); |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d62b4139a292..30bcb953afee 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -127,6 +127,8 @@ module_param(ple_gap, int, S_IRUGO); | |||
127 | static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; | 127 | static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; |
128 | module_param(ple_window, int, S_IRUGO); | 128 | module_param(ple_window, int, S_IRUGO); |
129 | 129 | ||
130 | extern const ulong vmx_return; | ||
131 | |||
130 | #define NR_AUTOLOAD_MSRS 8 | 132 | #define NR_AUTOLOAD_MSRS 8 |
131 | #define VMCS02_POOL_SIZE 1 | 133 | #define VMCS02_POOL_SIZE 1 |
132 | 134 | ||
@@ -3724,8 +3726,7 @@ static void vmx_set_constant_host_state(void) | |||
3724 | native_store_idt(&dt); | 3726 | native_store_idt(&dt); |
3725 | vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ | 3727 | vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ |
3726 | 3728 | ||
3727 | asm("mov $.Lkvm_vmx_return, %0" : "=r"(tmpl)); | 3729 | vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */ |
3728 | vmcs_writel(HOST_RIP, tmpl); /* 22.2.5 */ | ||
3729 | 3730 | ||
3730 | rdmsr(MSR_IA32_SYSENTER_CS, low32, high32); | 3731 | rdmsr(MSR_IA32_SYSENTER_CS, low32, high32); |
3731 | vmcs_write32(HOST_IA32_SYSENTER_CS, low32); | 3732 | vmcs_write32(HOST_IA32_SYSENTER_CS, low32); |
@@ -6183,14 +6184,6 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) | |||
6183 | msrs[i].host); | 6184 | msrs[i].host); |
6184 | } | 6185 | } |
6185 | 6186 | ||
6186 | #ifdef CONFIG_X86_64 | ||
6187 | #define R "r" | ||
6188 | #define Q "q" | ||
6189 | #else | ||
6190 | #define R "e" | ||
6191 | #define Q "l" | ||
6192 | #endif | ||
6193 | |||
6194 | static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | 6187 | static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) |
6195 | { | 6188 | { |
6196 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 6189 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -6239,30 +6232,30 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6239 | vmx->__launched = vmx->loaded_vmcs->launched; | 6232 | vmx->__launched = vmx->loaded_vmcs->launched; |
6240 | asm( | 6233 | asm( |
6241 | /* Store host registers */ | 6234 | /* Store host registers */ |
6242 | "push %%"R"dx; push %%"R"bp;" | 6235 | "push %%" _ASM_DX "; push %%" _ASM_BP ";" |
6243 | "push %%"R"cx \n\t" /* placeholder for guest rcx */ | 6236 | "push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */ |
6244 | "push %%"R"cx \n\t" | 6237 | "push %%" _ASM_CX " \n\t" |
6245 | "cmp %%"R"sp, %c[host_rsp](%0) \n\t" | 6238 | "cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t" |
6246 | "je 1f \n\t" | 6239 | "je 1f \n\t" |
6247 | "mov %%"R"sp, %c[host_rsp](%0) \n\t" | 6240 | "mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t" |
6248 | __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" | 6241 | __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" |
6249 | "1: \n\t" | 6242 | "1: \n\t" |
6250 | /* Reload cr2 if changed */ | 6243 | /* Reload cr2 if changed */ |
6251 | "mov %c[cr2](%0), %%"R"ax \n\t" | 6244 | "mov %c[cr2](%0), %%" _ASM_AX " \n\t" |
6252 | "mov %%cr2, %%"R"dx \n\t" | 6245 | "mov %%cr2, %%" _ASM_DX " \n\t" |
6253 | "cmp %%"R"ax, %%"R"dx \n\t" | 6246 | "cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t" |
6254 | "je 2f \n\t" | 6247 | "je 2f \n\t" |
6255 | "mov %%"R"ax, %%cr2 \n\t" | 6248 | "mov %%" _ASM_AX", %%cr2 \n\t" |
6256 | "2: \n\t" | 6249 | "2: \n\t" |
6257 | /* Check if vmlaunch of vmresume is needed */ | 6250 | /* Check if vmlaunch of vmresume is needed */ |
6258 | "cmpl $0, %c[launched](%0) \n\t" | 6251 | "cmpl $0, %c[launched](%0) \n\t" |
6259 | /* Load guest registers. Don't clobber flags. */ | 6252 | /* Load guest registers. Don't clobber flags. */ |
6260 | "mov %c[rax](%0), %%"R"ax \n\t" | 6253 | "mov %c[rax](%0), %%" _ASM_AX " \n\t" |
6261 | "mov %c[rbx](%0), %%"R"bx \n\t" | 6254 | "mov %c[rbx](%0), %%" _ASM_BX " \n\t" |
6262 | "mov %c[rdx](%0), %%"R"dx \n\t" | 6255 | "mov %c[rdx](%0), %%" _ASM_DX " \n\t" |
6263 | "mov %c[rsi](%0), %%"R"si \n\t" | 6256 | "mov %c[rsi](%0), %%" _ASM_SI " \n\t" |
6264 | "mov %c[rdi](%0), %%"R"di \n\t" | 6257 | "mov %c[rdi](%0), %%" _ASM_DI " \n\t" |
6265 | "mov %c[rbp](%0), %%"R"bp \n\t" | 6258 | "mov %c[rbp](%0), %%" _ASM_BP " \n\t" |
6266 | #ifdef CONFIG_X86_64 | 6259 | #ifdef CONFIG_X86_64 |
6267 | "mov %c[r8](%0), %%r8 \n\t" | 6260 | "mov %c[r8](%0), %%r8 \n\t" |
6268 | "mov %c[r9](%0), %%r9 \n\t" | 6261 | "mov %c[r9](%0), %%r9 \n\t" |
@@ -6273,24 +6266,24 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6273 | "mov %c[r14](%0), %%r14 \n\t" | 6266 | "mov %c[r14](%0), %%r14 \n\t" |
6274 | "mov %c[r15](%0), %%r15 \n\t" | 6267 | "mov %c[r15](%0), %%r15 \n\t" |
6275 | #endif | 6268 | #endif |
6276 | "mov %c[rcx](%0), %%"R"cx \n\t" /* kills %0 (ecx) */ | 6269 | "mov %c[rcx](%0), %%" _ASM_CX " \n\t" /* kills %0 (ecx) */ |
6277 | 6270 | ||
6278 | /* Enter guest mode */ | 6271 | /* Enter guest mode */ |
6279 | "jne .Llaunched \n\t" | 6272 | "jne 1f \n\t" |
6280 | __ex(ASM_VMX_VMLAUNCH) "\n\t" | 6273 | __ex(ASM_VMX_VMLAUNCH) "\n\t" |
6281 | "jmp .Lkvm_vmx_return \n\t" | 6274 | "jmp 2f \n\t" |
6282 | ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" | 6275 | "1: " __ex(ASM_VMX_VMRESUME) "\n\t" |
6283 | ".Lkvm_vmx_return: " | 6276 | "2: " |
6284 | /* Save guest registers, load host registers, keep flags */ | 6277 | /* Save guest registers, load host registers, keep flags */ |
6285 | "mov %0, %c[wordsize](%%"R"sp) \n\t" | 6278 | "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t" |
6286 | "pop %0 \n\t" | 6279 | "pop %0 \n\t" |
6287 | "mov %%"R"ax, %c[rax](%0) \n\t" | 6280 | "mov %%" _ASM_AX ", %c[rax](%0) \n\t" |
6288 | "mov %%"R"bx, %c[rbx](%0) \n\t" | 6281 | "mov %%" _ASM_BX ", %c[rbx](%0) \n\t" |
6289 | "pop"Q" %c[rcx](%0) \n\t" | 6282 | __ASM_SIZE(pop) " %c[rcx](%0) \n\t" |
6290 | "mov %%"R"dx, %c[rdx](%0) \n\t" | 6283 | "mov %%" _ASM_DX ", %c[rdx](%0) \n\t" |
6291 | "mov %%"R"si, %c[rsi](%0) \n\t" | 6284 | "mov %%" _ASM_SI ", %c[rsi](%0) \n\t" |
6292 | "mov %%"R"di, %c[rdi](%0) \n\t" | 6285 | "mov %%" _ASM_DI ", %c[rdi](%0) \n\t" |
6293 | "mov %%"R"bp, %c[rbp](%0) \n\t" | 6286 | "mov %%" _ASM_BP ", %c[rbp](%0) \n\t" |
6294 | #ifdef CONFIG_X86_64 | 6287 | #ifdef CONFIG_X86_64 |
6295 | "mov %%r8, %c[r8](%0) \n\t" | 6288 | "mov %%r8, %c[r8](%0) \n\t" |
6296 | "mov %%r9, %c[r9](%0) \n\t" | 6289 | "mov %%r9, %c[r9](%0) \n\t" |
@@ -6301,11 +6294,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6301 | "mov %%r14, %c[r14](%0) \n\t" | 6294 | "mov %%r14, %c[r14](%0) \n\t" |
6302 | "mov %%r15, %c[r15](%0) \n\t" | 6295 | "mov %%r15, %c[r15](%0) \n\t" |
6303 | #endif | 6296 | #endif |
6304 | "mov %%cr2, %%"R"ax \n\t" | 6297 | "mov %%cr2, %%" _ASM_AX " \n\t" |
6305 | "mov %%"R"ax, %c[cr2](%0) \n\t" | 6298 | "mov %%" _ASM_AX ", %c[cr2](%0) \n\t" |
6306 | 6299 | ||
6307 | "pop %%"R"bp; pop %%"R"dx \n\t" | 6300 | "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t" |
6308 | "setbe %c[fail](%0) \n\t" | 6301 | "setbe %c[fail](%0) \n\t" |
6302 | ".pushsection .rodata \n\t" | ||
6303 | ".global vmx_return \n\t" | ||
6304 | "vmx_return: " _ASM_PTR " 2b \n\t" | ||
6305 | ".popsection" | ||
6309 | : : "c"(vmx), "d"((unsigned long)HOST_RSP), | 6306 | : : "c"(vmx), "d"((unsigned long)HOST_RSP), |
6310 | [launched]"i"(offsetof(struct vcpu_vmx, __launched)), | 6307 | [launched]"i"(offsetof(struct vcpu_vmx, __launched)), |
6311 | [fail]"i"(offsetof(struct vcpu_vmx, fail)), | 6308 | [fail]"i"(offsetof(struct vcpu_vmx, fail)), |
@@ -6330,9 +6327,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6330 | [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)), | 6327 | [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)), |
6331 | [wordsize]"i"(sizeof(ulong)) | 6328 | [wordsize]"i"(sizeof(ulong)) |
6332 | : "cc", "memory" | 6329 | : "cc", "memory" |
6333 | , R"ax", R"bx", R"di", R"si" | ||
6334 | #ifdef CONFIG_X86_64 | 6330 | #ifdef CONFIG_X86_64 |
6331 | , "rax", "rbx", "rdi", "rsi" | ||
6335 | , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" | 6332 | , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" |
6333 | #else | ||
6334 | , "eax", "ebx", "edi", "esi" | ||
6336 | #endif | 6335 | #endif |
6337 | ); | 6336 | ); |
6338 | 6337 | ||
@@ -6384,9 +6383,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6384 | vmx_complete_interrupts(vmx); | 6383 | vmx_complete_interrupts(vmx); |
6385 | } | 6384 | } |
6386 | 6385 | ||
6387 | #undef R | ||
6388 | #undef Q | ||
6389 | |||
6390 | static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | 6386 | static void vmx_free_vcpu(struct kvm_vcpu *vcpu) |
6391 | { | 6387 | { |
6392 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 6388 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c4d451ed1573..497226e49d4b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -3672,20 +3672,17 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, | |||
3672 | gpa_t *gpa, struct x86_exception *exception, | 3672 | gpa_t *gpa, struct x86_exception *exception, |
3673 | bool write) | 3673 | bool write) |
3674 | { | 3674 | { |
3675 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3675 | u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0) |
3676 | | (write ? PFERR_WRITE_MASK : 0); | ||
3676 | 3677 | ||
3677 | if (vcpu_match_mmio_gva(vcpu, gva) && | 3678 | if (vcpu_match_mmio_gva(vcpu, gva) |
3678 | check_write_user_access(vcpu, write, access, | 3679 | && !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) { |
3679 | vcpu->arch.access)) { | ||
3680 | *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT | | 3680 | *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT | |
3681 | (gva & (PAGE_SIZE - 1)); | 3681 | (gva & (PAGE_SIZE - 1)); |
3682 | trace_vcpu_match_mmio(gva, *gpa, write, false); | 3682 | trace_vcpu_match_mmio(gva, *gpa, write, false); |
3683 | return 1; | 3683 | return 1; |
3684 | } | 3684 | } |
3685 | 3685 | ||
3686 | if (write) | ||
3687 | access |= PFERR_WRITE_MASK; | ||
3688 | |||
3689 | *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); | 3686 | *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); |
3690 | 3687 | ||
3691 | if (*gpa == UNMAPPED_GVA) | 3688 | if (*gpa == UNMAPPED_GVA) |
@@ -6016,7 +6013,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
6016 | int r; | 6013 | int r; |
6017 | 6014 | ||
6018 | vcpu->arch.mtrr_state.have_fixed = 1; | 6015 | vcpu->arch.mtrr_state.have_fixed = 1; |
6019 | vcpu_load(vcpu); | 6016 | r = vcpu_load(vcpu); |
6017 | if (r) | ||
6018 | return r; | ||
6020 | r = kvm_arch_vcpu_reset(vcpu); | 6019 | r = kvm_arch_vcpu_reset(vcpu); |
6021 | if (r == 0) | 6020 | if (r == 0) |
6022 | r = kvm_mmu_setup(vcpu); | 6021 | r = kvm_mmu_setup(vcpu); |
@@ -6027,9 +6026,11 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
6027 | 6026 | ||
6028 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | 6027 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) |
6029 | { | 6028 | { |
6029 | int r; | ||
6030 | vcpu->arch.apf.msr_val = 0; | 6030 | vcpu->arch.apf.msr_val = 0; |
6031 | 6031 | ||
6032 | vcpu_load(vcpu); | 6032 | r = vcpu_load(vcpu); |
6033 | BUG_ON(r); | ||
6033 | kvm_mmu_unload(vcpu); | 6034 | kvm_mmu_unload(vcpu); |
6034 | vcpu_put(vcpu); | 6035 | vcpu_put(vcpu); |
6035 | 6036 | ||
@@ -6275,7 +6276,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
6275 | 6276 | ||
6276 | static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) | 6277 | static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) |
6277 | { | 6278 | { |
6278 | vcpu_load(vcpu); | 6279 | int r; |
6280 | r = vcpu_load(vcpu); | ||
6281 | BUG_ON(r); | ||
6279 | kvm_mmu_unload(vcpu); | 6282 | kvm_mmu_unload(vcpu); |
6280 | vcpu_put(vcpu); | 6283 | vcpu_put(vcpu); |
6281 | } | 6284 | } |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 40791930bc15..80bfc880921e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -408,7 +408,7 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) | |||
408 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id); | 408 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id); |
409 | void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); | 409 | void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); |
410 | 410 | ||
411 | void vcpu_load(struct kvm_vcpu *vcpu); | 411 | int __must_check vcpu_load(struct kvm_vcpu *vcpu); |
412 | void vcpu_put(struct kvm_vcpu *vcpu); | 412 | void vcpu_put(struct kvm_vcpu *vcpu); |
413 | 413 | ||
414 | int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, | 414 | int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4fe02d900810..cc3f6dc506e4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -131,11 +131,12 @@ bool kvm_is_mmio_pfn(pfn_t pfn) | |||
131 | /* | 131 | /* |
132 | * Switches to specified vcpu, until a matching vcpu_put() | 132 | * Switches to specified vcpu, until a matching vcpu_put() |
133 | */ | 133 | */ |
134 | void vcpu_load(struct kvm_vcpu *vcpu) | 134 | int vcpu_load(struct kvm_vcpu *vcpu) |
135 | { | 135 | { |
136 | int cpu; | 136 | int cpu; |
137 | 137 | ||
138 | mutex_lock(&vcpu->mutex); | 138 | if (mutex_lock_killable(&vcpu->mutex)) |
139 | return -EINTR; | ||
139 | if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) { | 140 | if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) { |
140 | /* The thread running this VCPU changed. */ | 141 | /* The thread running this VCPU changed. */ |
141 | struct pid *oldpid = vcpu->pid; | 142 | struct pid *oldpid = vcpu->pid; |
@@ -148,6 +149,7 @@ void vcpu_load(struct kvm_vcpu *vcpu) | |||
148 | preempt_notifier_register(&vcpu->preempt_notifier); | 149 | preempt_notifier_register(&vcpu->preempt_notifier); |
149 | kvm_arch_vcpu_load(vcpu, cpu); | 150 | kvm_arch_vcpu_load(vcpu, cpu); |
150 | put_cpu(); | 151 | put_cpu(); |
152 | return 0; | ||
151 | } | 153 | } |
152 | 154 | ||
153 | void vcpu_put(struct kvm_vcpu *vcpu) | 155 | void vcpu_put(struct kvm_vcpu *vcpu) |
@@ -1891,7 +1893,9 @@ static long kvm_vcpu_ioctl(struct file *filp, | |||
1891 | #endif | 1893 | #endif |
1892 | 1894 | ||
1893 | 1895 | ||
1894 | vcpu_load(vcpu); | 1896 | r = vcpu_load(vcpu); |
1897 | if (r) | ||
1898 | return r; | ||
1895 | switch (ioctl) { | 1899 | switch (ioctl) { |
1896 | case KVM_RUN: | 1900 | case KVM_RUN: |
1897 | r = -EINVAL; | 1901 | r = -EINVAL; |