aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAvi Kivity <avi@redhat.com>2012-09-20 08:04:41 -0400
committerAvi Kivity <avi@redhat.com>2012-09-20 08:04:41 -0400
commit1d86b5cc4c6d9a1be1458be3701ac9c915a9706f (patch)
tree313fee0269d75f0dd5146762dd32c78b8ced4f75
parentecba9a52acdf20530d561b7634b80c35c308943a (diff)
parentc5421519f30bd5ed77857a78de6dc8414385e602 (diff)
Merge branch 'queue' into next
* queue: KVM: MMU: Eliminate pointless temporary 'ac' KVM: MMU: Avoid access/dirty update loop if all is well KVM: MMU: Eliminate eperm temporary KVM: MMU: Optimize is_last_gpte() KVM: MMU: Simplify walk_addr_generic() loop KVM: MMU: Optimize pte permission checks KVM: MMU: Update accessed and dirty bits after guest pagetable walk KVM: MMU: Move gpte_access() out of paging_tmpl.h KVM: MMU: Optimize gpte_access() slightly KVM: MMU: Push clean gpte write protection out of gpte_access() KVM: clarify kvmclock documentation KVM: make processes waiting on vcpu mutex killable KVM: SVM: Make use of asm.h KVM: VMX: Make use of asm.h KVM: VMX: Make lto-friendly Signed-off-by: Avi Kivity <avi@redhat.com>
-rw-r--r--Documentation/virtual/kvm/msr.txt32
-rw-r--r--arch/x86/include/asm/kvm_host.h14
-rw-r--r--arch/x86/kvm/mmu.c91
-rw-r--r--arch/x86/kvm/mmu.h25
-rw-r--r--arch/x86/kvm/paging_tmpl.h191
-rw-r--r--arch/x86/kvm/svm.c46
-rw-r--r--arch/x86/kvm/vmx.c86
-rw-r--r--arch/x86/kvm/x86.c23
-rw-r--r--include/linux/kvm_host.h2
-rw-r--r--virt/kvm/kvm_main.c10
10 files changed, 300 insertions, 220 deletions
diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt
index 730471048583..6d470ae7b073 100644
--- a/Documentation/virtual/kvm/msr.txt
+++ b/Documentation/virtual/kvm/msr.txt
@@ -34,9 +34,12 @@ MSR_KVM_WALL_CLOCK_NEW: 0x4b564d00
34 time information and check that they are both equal and even. 34 time information and check that they are both equal and even.
35 An odd version indicates an in-progress update. 35 An odd version indicates an in-progress update.
36 36
37 sec: number of seconds for wallclock. 37 sec: number of seconds for wallclock at time of boot.
38 38
39 nsec: number of nanoseconds for wallclock. 39 nsec: number of nanoseconds for wallclock at time of boot.
40
41 In order to get the current wallclock time, the system_time from
42 MSR_KVM_SYSTEM_TIME_NEW needs to be added.
40 43
41 Note that although MSRs are per-CPU entities, the effect of this 44 Note that although MSRs are per-CPU entities, the effect of this
42 particular MSR is global. 45 particular MSR is global.
@@ -82,20 +85,25 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
82 time at the time this structure was last updated. Unit is 85 time at the time this structure was last updated. Unit is
83 nanoseconds. 86 nanoseconds.
84 87
85 tsc_to_system_mul: a function of the tsc frequency. One has 88 tsc_to_system_mul: multiplier to be used when converting
86 to multiply any tsc-related quantity by this value to get 89 tsc-related quantity to nanoseconds
87 a value in nanoseconds, besides dividing by 2^tsc_shift
88 90
89 tsc_shift: cycle to nanosecond divider, as a power of two, to 91 tsc_shift: shift to be used when converting tsc-related
90 allow for shift rights. One has to shift right any tsc-related 92 quantity to nanoseconds. This shift will ensure that
91 quantity by this value to get a value in nanoseconds, besides 93 multiplication with tsc_to_system_mul does not overflow.
92 multiplying by tsc_to_system_mul. 94 A positive value denotes a left shift, a negative value
95 a right shift.
93 96
94 With this information, guests can derive per-CPU time by 97 The conversion from tsc to nanoseconds involves an additional
95 doing: 98 right shift by 32 bits. With this information, guests can
99 derive per-CPU time by doing:
96 100
97 time = (current_tsc - tsc_timestamp) 101 time = (current_tsc - tsc_timestamp)
98 time = (time * tsc_to_system_mul) >> tsc_shift 102 if (tsc_shift >= 0)
103 time <<= tsc_shift;
104 else
105 time >>= -tsc_shift;
106 time = (time * tsc_to_system_mul) >> 32
99 time = time + system_time 107 time = time + system_time
100 108
101 flags: bits in this field indicate extended capabilities 109 flags: bits in this field indicate extended capabilities
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 64adb6117e19..43aeb9422839 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -287,10 +287,24 @@ struct kvm_mmu {
287 union kvm_mmu_page_role base_role; 287 union kvm_mmu_page_role base_role;
288 bool direct_map; 288 bool direct_map;
289 289
290 /*
291 * Bitmap; bit set = permission fault
292 * Byte index: page fault error code [4:1]
293 * Bit index: pte permissions in ACC_* format
294 */
295 u8 permissions[16];
296
290 u64 *pae_root; 297 u64 *pae_root;
291 u64 *lm_root; 298 u64 *lm_root;
292 u64 rsvd_bits_mask[2][4]; 299 u64 rsvd_bits_mask[2][4];
293 300
301 /*
302 * Bitmap: bit set = last pte in walk
303 * index[0:1]: level (zero-based)
304 * index[2]: pte.ps
305 */
306 u8 last_pte_bitmap;
307
294 bool nx; 308 bool nx;
295 309
296 u64 pdptrs[4]; /* pae */ 310 u64 pdptrs[4]; /* pae */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index aa0b469ee07d..d289fee1ffb8 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3408,6 +3408,18 @@ static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
3408 return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0; 3408 return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
3409} 3409}
3410 3410
3411static inline void protect_clean_gpte(unsigned *access, unsigned gpte)
3412{
3413 unsigned mask;
3414
3415 BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK);
3416
3417 mask = (unsigned)~ACC_WRITE_MASK;
3418 /* Allow write access to dirty gptes */
3419 mask |= (gpte >> (PT_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) & PT_WRITABLE_MASK;
3420 *access &= mask;
3421}
3422
3411static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access, 3423static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
3412 int *nr_present) 3424 int *nr_present)
3413{ 3425{
@@ -3425,6 +3437,25 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
3425 return false; 3437 return false;
3426} 3438}
3427 3439
3440static inline unsigned gpte_access(struct kvm_vcpu *vcpu, u64 gpte)
3441{
3442 unsigned access;
3443
3444 access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
3445 access &= ~(gpte >> PT64_NX_SHIFT);
3446
3447 return access;
3448}
3449
3450static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte)
3451{
3452 unsigned index;
3453
3454 index = level - 1;
3455 index |= (gpte & PT_PAGE_SIZE_MASK) >> (PT_PAGE_SIZE_SHIFT - 2);
3456 return mmu->last_pte_bitmap & (1 << index);
3457}
3458
3428#define PTTYPE 64 3459#define PTTYPE 64
3429#include "paging_tmpl.h" 3460#include "paging_tmpl.h"
3430#undef PTTYPE 3461#undef PTTYPE
@@ -3494,6 +3525,56 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
3494 } 3525 }
3495} 3526}
3496 3527
3528static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
3529{
3530 unsigned bit, byte, pfec;
3531 u8 map;
3532 bool fault, x, w, u, wf, uf, ff, smep;
3533
3534 smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
3535 for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
3536 pfec = byte << 1;
3537 map = 0;
3538 wf = pfec & PFERR_WRITE_MASK;
3539 uf = pfec & PFERR_USER_MASK;
3540 ff = pfec & PFERR_FETCH_MASK;
3541 for (bit = 0; bit < 8; ++bit) {
3542 x = bit & ACC_EXEC_MASK;
3543 w = bit & ACC_WRITE_MASK;
3544 u = bit & ACC_USER_MASK;
3545
3546 /* Not really needed: !nx will cause pte.nx to fault */
3547 x |= !mmu->nx;
3548 /* Allow supervisor writes if !cr0.wp */
3549 w |= !is_write_protection(vcpu) && !uf;
3550 /* Disallow supervisor fetches of user code if cr4.smep */
3551 x &= !(smep && u && !uf);
3552
3553 fault = (ff && !x) || (uf && !u) || (wf && !w);
3554 map |= fault << bit;
3555 }
3556 mmu->permissions[byte] = map;
3557 }
3558}
3559
3560static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
3561{
3562 u8 map;
3563 unsigned level, root_level = mmu->root_level;
3564 const unsigned ps_set_index = 1 << 2; /* bit 2 of index: ps */
3565
3566 if (root_level == PT32E_ROOT_LEVEL)
3567 --root_level;
3568 /* PT_PAGE_TABLE_LEVEL always terminates */
3569 map = 1 | (1 << ps_set_index);
3570 for (level = PT_DIRECTORY_LEVEL; level <= root_level; ++level) {
3571 if (level <= PT_PDPE_LEVEL
3572 && (mmu->root_level >= PT32E_ROOT_LEVEL || is_pse(vcpu)))
3573 map |= 1 << (ps_set_index | (level - 1));
3574 }
3575 mmu->last_pte_bitmap = map;
3576}
3577
3497static int paging64_init_context_common(struct kvm_vcpu *vcpu, 3578static int paging64_init_context_common(struct kvm_vcpu *vcpu,
3498 struct kvm_mmu *context, 3579 struct kvm_mmu *context,
3499 int level) 3580 int level)
@@ -3502,6 +3583,8 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
3502 context->root_level = level; 3583 context->root_level = level;
3503 3584
3504 reset_rsvds_bits_mask(vcpu, context); 3585 reset_rsvds_bits_mask(vcpu, context);
3586 update_permission_bitmask(vcpu, context);
3587 update_last_pte_bitmap(vcpu, context);
3505 3588
3506 ASSERT(is_pae(vcpu)); 3589 ASSERT(is_pae(vcpu));
3507 context->new_cr3 = paging_new_cr3; 3590 context->new_cr3 = paging_new_cr3;
@@ -3530,6 +3613,8 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
3530 context->root_level = PT32_ROOT_LEVEL; 3613 context->root_level = PT32_ROOT_LEVEL;
3531 3614
3532 reset_rsvds_bits_mask(vcpu, context); 3615 reset_rsvds_bits_mask(vcpu, context);
3616 update_permission_bitmask(vcpu, context);
3617 update_last_pte_bitmap(vcpu, context);
3533 3618
3534 context->new_cr3 = paging_new_cr3; 3619 context->new_cr3 = paging_new_cr3;
3535 context->page_fault = paging32_page_fault; 3620 context->page_fault = paging32_page_fault;
@@ -3590,6 +3675,9 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
3590 context->gva_to_gpa = paging32_gva_to_gpa; 3675 context->gva_to_gpa = paging32_gva_to_gpa;
3591 } 3676 }
3592 3677
3678 update_permission_bitmask(vcpu, context);
3679 update_last_pte_bitmap(vcpu, context);
3680
3593 return 0; 3681 return 0;
3594} 3682}
3595 3683
@@ -3665,6 +3753,9 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
3665 g_context->gva_to_gpa = paging32_gva_to_gpa_nested; 3753 g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
3666 } 3754 }
3667 3755
3756 update_permission_bitmask(vcpu, g_context);
3757 update_last_pte_bitmap(vcpu, g_context);
3758
3668 return 0; 3759 return 0;
3669} 3760}
3670 3761
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index e374db9af021..69871080e866 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -18,8 +18,10 @@
18#define PT_PCD_MASK (1ULL << 4) 18#define PT_PCD_MASK (1ULL << 4)
19#define PT_ACCESSED_SHIFT 5 19#define PT_ACCESSED_SHIFT 5
20#define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT) 20#define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT)
21#define PT_DIRTY_MASK (1ULL << 6) 21#define PT_DIRTY_SHIFT 6
22#define PT_PAGE_SIZE_MASK (1ULL << 7) 22#define PT_DIRTY_MASK (1ULL << PT_DIRTY_SHIFT)
23#define PT_PAGE_SIZE_SHIFT 7
24#define PT_PAGE_SIZE_MASK (1ULL << PT_PAGE_SIZE_SHIFT)
23#define PT_PAT_MASK (1ULL << 7) 25#define PT_PAT_MASK (1ULL << 7)
24#define PT_GLOBAL_MASK (1ULL << 8) 26#define PT_GLOBAL_MASK (1ULL << 8)
25#define PT64_NX_SHIFT 63 27#define PT64_NX_SHIFT 63
@@ -88,17 +90,14 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu)
88 return kvm_read_cr0_bits(vcpu, X86_CR0_WP); 90 return kvm_read_cr0_bits(vcpu, X86_CR0_WP);
89} 91}
90 92
91static inline bool check_write_user_access(struct kvm_vcpu *vcpu, 93/*
92 bool write_fault, bool user_fault, 94 * Will a fault with a given page-fault error code (pfec) cause a permission
93 unsigned long pte) 95 * fault with the given access (in ACC_* format)?
96 */
97static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access,
98 unsigned pfec)
94{ 99{
95 if (unlikely(write_fault && !is_writable_pte(pte) 100 return (mmu->permissions[pfec >> 1] >> pte_access) & 1;
96 && (user_fault || is_write_protection(vcpu))))
97 return false;
98
99 if (unlikely(user_fault && !(pte & PT_USER_MASK)))
100 return false;
101
102 return true;
103} 101}
102
104#endif 103#endif
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index bf8c42bf50fe..714e2c01a6fe 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -63,10 +63,12 @@
63 */ 63 */
64struct guest_walker { 64struct guest_walker {
65 int level; 65 int level;
66 unsigned max_level;
66 gfn_t table_gfn[PT_MAX_FULL_LEVELS]; 67 gfn_t table_gfn[PT_MAX_FULL_LEVELS];
67 pt_element_t ptes[PT_MAX_FULL_LEVELS]; 68 pt_element_t ptes[PT_MAX_FULL_LEVELS];
68 pt_element_t prefetch_ptes[PTE_PREFETCH_NUM]; 69 pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
69 gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; 70 gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
71 pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
70 unsigned pt_access; 72 unsigned pt_access;
71 unsigned pte_access; 73 unsigned pte_access;
72 gfn_t gfn; 74 gfn_t gfn;
@@ -101,38 +103,41 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
101 return (ret != orig_pte); 103 return (ret != orig_pte);
102} 104}
103 105
104static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte, 106static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
105 bool last) 107 struct kvm_mmu *mmu,
108 struct guest_walker *walker,
109 int write_fault)
106{ 110{
107 unsigned access; 111 unsigned level, index;
108 112 pt_element_t pte, orig_pte;
109 access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; 113 pt_element_t __user *ptep_user;
110 if (last && !is_dirty_gpte(gpte)) 114 gfn_t table_gfn;
111 access &= ~ACC_WRITE_MASK; 115 int ret;
112 116
113#if PTTYPE == 64 117 for (level = walker->max_level; level >= walker->level; --level) {
114 if (vcpu->arch.mmu.nx) 118 pte = orig_pte = walker->ptes[level - 1];
115 access &= ~(gpte >> PT64_NX_SHIFT); 119 table_gfn = walker->table_gfn[level - 1];
116#endif 120 ptep_user = walker->ptep_user[level - 1];
117 return access; 121 index = offset_in_page(ptep_user) / sizeof(pt_element_t);
118} 122 if (!(pte & PT_ACCESSED_MASK)) {
119 123 trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte));
120static bool FNAME(is_last_gpte)(struct guest_walker *walker, 124 pte |= PT_ACCESSED_MASK;
121 struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, 125 }
122 pt_element_t gpte) 126 if (level == walker->level && write_fault && !is_dirty_gpte(pte)) {
123{ 127 trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
124 if (walker->level == PT_PAGE_TABLE_LEVEL) 128 pte |= PT_DIRTY_MASK;
125 return true; 129 }
126 130 if (pte == orig_pte)
127 if ((walker->level == PT_DIRECTORY_LEVEL) && is_large_pte(gpte) && 131 continue;
128 (PTTYPE == 64 || is_pse(vcpu)))
129 return true;
130 132
131 if ((walker->level == PT_PDPE_LEVEL) && is_large_pte(gpte) && 133 ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte);
132 (mmu->root_level == PT64_ROOT_LEVEL)) 134 if (ret)
133 return true; 135 return ret;
134 136
135 return false; 137 mark_page_dirty(vcpu->kvm, table_gfn);
138 walker->ptes[level] = pte;
139 }
140 return 0;
136} 141}
137 142
138/* 143/*
@@ -142,21 +147,22 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
142 struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, 147 struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
143 gva_t addr, u32 access) 148 gva_t addr, u32 access)
144{ 149{
150 int ret;
145 pt_element_t pte; 151 pt_element_t pte;
146 pt_element_t __user *uninitialized_var(ptep_user); 152 pt_element_t __user *uninitialized_var(ptep_user);
147 gfn_t table_gfn; 153 gfn_t table_gfn;
148 unsigned index, pt_access, uninitialized_var(pte_access); 154 unsigned index, pt_access, pte_access, accessed_dirty, shift;
149 gpa_t pte_gpa; 155 gpa_t pte_gpa;
150 bool eperm, last_gpte;
151 int offset; 156 int offset;
152 const int write_fault = access & PFERR_WRITE_MASK; 157 const int write_fault = access & PFERR_WRITE_MASK;
153 const int user_fault = access & PFERR_USER_MASK; 158 const int user_fault = access & PFERR_USER_MASK;
154 const int fetch_fault = access & PFERR_FETCH_MASK; 159 const int fetch_fault = access & PFERR_FETCH_MASK;
155 u16 errcode = 0; 160 u16 errcode = 0;
161 gpa_t real_gpa;
162 gfn_t gfn;
156 163
157 trace_kvm_mmu_pagetable_walk(addr, access); 164 trace_kvm_mmu_pagetable_walk(addr, access);
158retry_walk: 165retry_walk:
159 eperm = false;
160 walker->level = mmu->root_level; 166 walker->level = mmu->root_level;
161 pte = mmu->get_cr3(vcpu); 167 pte = mmu->get_cr3(vcpu);
162 168
@@ -169,15 +175,21 @@ retry_walk:
169 --walker->level; 175 --walker->level;
170 } 176 }
171#endif 177#endif
178 walker->max_level = walker->level;
172 ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || 179 ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
173 (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); 180 (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
174 181
175 pt_access = ACC_ALL; 182 accessed_dirty = PT_ACCESSED_MASK;
183 pt_access = pte_access = ACC_ALL;
184 ++walker->level;
176 185
177 for (;;) { 186 do {
178 gfn_t real_gfn; 187 gfn_t real_gfn;
179 unsigned long host_addr; 188 unsigned long host_addr;
180 189
190 pt_access &= pte_access;
191 --walker->level;
192
181 index = PT_INDEX(addr, walker->level); 193 index = PT_INDEX(addr, walker->level);
182 194
183 table_gfn = gpte_to_gfn(pte); 195 table_gfn = gpte_to_gfn(pte);
@@ -199,6 +211,7 @@ retry_walk:
199 ptep_user = (pt_element_t __user *)((void *)host_addr + offset); 211 ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
200 if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) 212 if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte))))
201 goto error; 213 goto error;
214 walker->ptep_user[walker->level - 1] = ptep_user;
202 215
203 trace_kvm_mmu_paging_element(pte, walker->level); 216 trace_kvm_mmu_paging_element(pte, walker->level);
204 217
@@ -211,92 +224,48 @@ retry_walk:
211 goto error; 224 goto error;
212 } 225 }
213 226
214 if (!check_write_user_access(vcpu, write_fault, user_fault, 227 accessed_dirty &= pte;
215 pte)) 228 pte_access = pt_access & gpte_access(vcpu, pte);
216 eperm = true;
217
218#if PTTYPE == 64
219 if (unlikely(fetch_fault && (pte & PT64_NX_MASK)))
220 eperm = true;
221#endif
222
223 last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte);
224 if (last_gpte) {
225 pte_access = pt_access &
226 FNAME(gpte_access)(vcpu, pte, true);
227 /* check if the kernel is fetching from user page */
228 if (unlikely(pte_access & PT_USER_MASK) &&
229 kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
230 if (fetch_fault && !user_fault)
231 eperm = true;
232 }
233
234 if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) {
235 int ret;
236 trace_kvm_mmu_set_accessed_bit(table_gfn, index,
237 sizeof(pte));
238 ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
239 pte, pte|PT_ACCESSED_MASK);
240 if (unlikely(ret < 0))
241 goto error;
242 else if (ret)
243 goto retry_walk;
244
245 mark_page_dirty(vcpu->kvm, table_gfn);
246 pte |= PT_ACCESSED_MASK;
247 }
248 229
249 walker->ptes[walker->level - 1] = pte; 230 walker->ptes[walker->level - 1] = pte;
231 } while (!is_last_gpte(mmu, walker->level, pte));
250 232
251 if (last_gpte) { 233 if (unlikely(permission_fault(mmu, pte_access, access))) {
252 int lvl = walker->level; 234 errcode |= PFERR_PRESENT_MASK;
253 gpa_t real_gpa; 235 goto error;
254 gfn_t gfn; 236 }
255 u32 ac;
256
257 gfn = gpte_to_gfn_lvl(pte, lvl);
258 gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT;
259
260 if (PTTYPE == 32 &&
261 walker->level == PT_DIRECTORY_LEVEL &&
262 is_cpuid_PSE36())
263 gfn += pse36_gfn_delta(pte);
264
265 ac = write_fault | fetch_fault | user_fault;
266 237
267 real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), 238 gfn = gpte_to_gfn_lvl(pte, walker->level);
268 ac); 239 gfn += (addr & PT_LVL_OFFSET_MASK(walker->level)) >> PAGE_SHIFT;
269 if (real_gpa == UNMAPPED_GVA)
270 return 0;
271 240
272 walker->gfn = real_gpa >> PAGE_SHIFT; 241 if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36())
242 gfn += pse36_gfn_delta(pte);
273 243
274 break; 244 real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access);
275 } 245 if (real_gpa == UNMAPPED_GVA)
246 return 0;
276 247
277 pt_access &= FNAME(gpte_access)(vcpu, pte, false); 248 walker->gfn = real_gpa >> PAGE_SHIFT;
278 --walker->level;
279 }
280 249
281 if (unlikely(eperm)) { 250 if (!write_fault)
282 errcode |= PFERR_PRESENT_MASK; 251 protect_clean_gpte(&pte_access, pte);
283 goto error;
284 }
285 252
286 if (write_fault && unlikely(!is_dirty_gpte(pte))) { 253 /*
287 int ret; 254 * On a write fault, fold the dirty bit into accessed_dirty by shifting it one
255 * place right.
256 *
257 * On a read fault, do nothing.
258 */
259 shift = write_fault >> ilog2(PFERR_WRITE_MASK);
260 shift *= PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT;
261 accessed_dirty &= pte >> shift;
288 262
289 trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); 263 if (unlikely(!accessed_dirty)) {
290 ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, 264 ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault);
291 pte, pte|PT_DIRTY_MASK);
292 if (unlikely(ret < 0)) 265 if (unlikely(ret < 0))
293 goto error; 266 goto error;
294 else if (ret) 267 else if (ret)
295 goto retry_walk; 268 goto retry_walk;
296
297 mark_page_dirty(vcpu->kvm, table_gfn);
298 pte |= PT_DIRTY_MASK;
299 walker->ptes[walker->level - 1] = pte;
300 } 269 }
301 270
302 walker->pt_access = pt_access; 271 walker->pt_access = pt_access;
@@ -368,7 +337,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
368 return; 337 return;
369 338
370 pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); 339 pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
371 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, true); 340 pte_access = sp->role.access & gpte_access(vcpu, gpte);
341 protect_clean_gpte(&pte_access, gpte);
372 pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte)); 342 pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte));
373 if (mmu_invalid_pfn(pfn)) 343 if (mmu_invalid_pfn(pfn))
374 return; 344 return;
@@ -441,8 +411,8 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
441 if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) 411 if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
442 continue; 412 continue;
443 413
444 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, 414 pte_access = sp->role.access & gpte_access(vcpu, gpte);
445 true); 415 protect_clean_gpte(&pte_access, gpte);
446 gfn = gpte_to_gfn(gpte); 416 gfn = gpte_to_gfn(gpte);
447 pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, 417 pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
448 pte_access & ACC_WRITE_MASK); 418 pte_access & ACC_WRITE_MASK);
@@ -794,7 +764,8 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
794 764
795 gfn = gpte_to_gfn(gpte); 765 gfn = gpte_to_gfn(gpte);
796 pte_access = sp->role.access; 766 pte_access = sp->role.access;
797 pte_access &= FNAME(gpte_access)(vcpu, gpte, true); 767 pte_access &= gpte_access(vcpu, gpte);
768 protect_clean_gpte(&pte_access, gpte);
798 769
799 if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present)) 770 if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present))
800 continue; 771 continue;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 611c72875fb9..818fceb3091e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3782,12 +3782,6 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu)
3782 svm_complete_interrupts(svm); 3782 svm_complete_interrupts(svm);
3783} 3783}
3784 3784
3785#ifdef CONFIG_X86_64
3786#define R "r"
3787#else
3788#define R "e"
3789#endif
3790
3791static void svm_vcpu_run(struct kvm_vcpu *vcpu) 3785static void svm_vcpu_run(struct kvm_vcpu *vcpu)
3792{ 3786{
3793 struct vcpu_svm *svm = to_svm(vcpu); 3787 struct vcpu_svm *svm = to_svm(vcpu);
@@ -3814,13 +3808,13 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
3814 local_irq_enable(); 3808 local_irq_enable();
3815 3809
3816 asm volatile ( 3810 asm volatile (
3817 "push %%"R"bp; \n\t" 3811 "push %%" _ASM_BP "; \n\t"
3818 "mov %c[rbx](%[svm]), %%"R"bx \n\t" 3812 "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
3819 "mov %c[rcx](%[svm]), %%"R"cx \n\t" 3813 "mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t"
3820 "mov %c[rdx](%[svm]), %%"R"dx \n\t" 3814 "mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t"
3821 "mov %c[rsi](%[svm]), %%"R"si \n\t" 3815 "mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t"
3822 "mov %c[rdi](%[svm]), %%"R"di \n\t" 3816 "mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t"
3823 "mov %c[rbp](%[svm]), %%"R"bp \n\t" 3817 "mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t"
3824#ifdef CONFIG_X86_64 3818#ifdef CONFIG_X86_64
3825 "mov %c[r8](%[svm]), %%r8 \n\t" 3819 "mov %c[r8](%[svm]), %%r8 \n\t"
3826 "mov %c[r9](%[svm]), %%r9 \n\t" 3820 "mov %c[r9](%[svm]), %%r9 \n\t"
@@ -3833,20 +3827,20 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
3833#endif 3827#endif
3834 3828
3835 /* Enter guest mode */ 3829 /* Enter guest mode */
3836 "push %%"R"ax \n\t" 3830 "push %%" _ASM_AX " \n\t"
3837 "mov %c[vmcb](%[svm]), %%"R"ax \n\t" 3831 "mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t"
3838 __ex(SVM_VMLOAD) "\n\t" 3832 __ex(SVM_VMLOAD) "\n\t"
3839 __ex(SVM_VMRUN) "\n\t" 3833 __ex(SVM_VMRUN) "\n\t"
3840 __ex(SVM_VMSAVE) "\n\t" 3834 __ex(SVM_VMSAVE) "\n\t"
3841 "pop %%"R"ax \n\t" 3835 "pop %%" _ASM_AX " \n\t"
3842 3836
3843 /* Save guest registers, load host registers */ 3837 /* Save guest registers, load host registers */
3844 "mov %%"R"bx, %c[rbx](%[svm]) \n\t" 3838 "mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t"
3845 "mov %%"R"cx, %c[rcx](%[svm]) \n\t" 3839 "mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t"
3846 "mov %%"R"dx, %c[rdx](%[svm]) \n\t" 3840 "mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t"
3847 "mov %%"R"si, %c[rsi](%[svm]) \n\t" 3841 "mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t"
3848 "mov %%"R"di, %c[rdi](%[svm]) \n\t" 3842 "mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t"
3849 "mov %%"R"bp, %c[rbp](%[svm]) \n\t" 3843 "mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t"
3850#ifdef CONFIG_X86_64 3844#ifdef CONFIG_X86_64
3851 "mov %%r8, %c[r8](%[svm]) \n\t" 3845 "mov %%r8, %c[r8](%[svm]) \n\t"
3852 "mov %%r9, %c[r9](%[svm]) \n\t" 3846 "mov %%r9, %c[r9](%[svm]) \n\t"
@@ -3857,7 +3851,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
3857 "mov %%r14, %c[r14](%[svm]) \n\t" 3851 "mov %%r14, %c[r14](%[svm]) \n\t"
3858 "mov %%r15, %c[r15](%[svm]) \n\t" 3852 "mov %%r15, %c[r15](%[svm]) \n\t"
3859#endif 3853#endif
3860 "pop %%"R"bp" 3854 "pop %%" _ASM_BP
3861 : 3855 :
3862 : [svm]"a"(svm), 3856 : [svm]"a"(svm),
3863 [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), 3857 [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
@@ -3878,9 +3872,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
3878 [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15])) 3872 [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
3879#endif 3873#endif
3880 : "cc", "memory" 3874 : "cc", "memory"
3881 , R"bx", R"cx", R"dx", R"si", R"di"
3882#ifdef CONFIG_X86_64 3875#ifdef CONFIG_X86_64
3876 , "rbx", "rcx", "rdx", "rsi", "rdi"
3883 , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15" 3877 , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
3878#else
3879 , "ebx", "ecx", "edx", "esi", "edi"
3884#endif 3880#endif
3885 ); 3881 );
3886 3882
@@ -3940,8 +3936,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
3940 mark_all_clean(svm->vmcb); 3936 mark_all_clean(svm->vmcb);
3941} 3937}
3942 3938
3943#undef R
3944
3945static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) 3939static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
3946{ 3940{
3947 struct vcpu_svm *svm = to_svm(vcpu); 3941 struct vcpu_svm *svm = to_svm(vcpu);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d62b4139a292..30bcb953afee 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -127,6 +127,8 @@ module_param(ple_gap, int, S_IRUGO);
127static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; 127static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
128module_param(ple_window, int, S_IRUGO); 128module_param(ple_window, int, S_IRUGO);
129 129
130extern const ulong vmx_return;
131
130#define NR_AUTOLOAD_MSRS 8 132#define NR_AUTOLOAD_MSRS 8
131#define VMCS02_POOL_SIZE 1 133#define VMCS02_POOL_SIZE 1
132 134
@@ -3724,8 +3726,7 @@ static void vmx_set_constant_host_state(void)
3724 native_store_idt(&dt); 3726 native_store_idt(&dt);
3725 vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ 3727 vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
3726 3728
3727 asm("mov $.Lkvm_vmx_return, %0" : "=r"(tmpl)); 3729 vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */
3728 vmcs_writel(HOST_RIP, tmpl); /* 22.2.5 */
3729 3730
3730 rdmsr(MSR_IA32_SYSENTER_CS, low32, high32); 3731 rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
3731 vmcs_write32(HOST_IA32_SYSENTER_CS, low32); 3732 vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
@@ -6183,14 +6184,6 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
6183 msrs[i].host); 6184 msrs[i].host);
6184} 6185}
6185 6186
6186#ifdef CONFIG_X86_64
6187#define R "r"
6188#define Q "q"
6189#else
6190#define R "e"
6191#define Q "l"
6192#endif
6193
6194static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) 6187static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
6195{ 6188{
6196 struct vcpu_vmx *vmx = to_vmx(vcpu); 6189 struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6239,30 +6232,30 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
6239 vmx->__launched = vmx->loaded_vmcs->launched; 6232 vmx->__launched = vmx->loaded_vmcs->launched;
6240 asm( 6233 asm(
6241 /* Store host registers */ 6234 /* Store host registers */
6242 "push %%"R"dx; push %%"R"bp;" 6235 "push %%" _ASM_DX "; push %%" _ASM_BP ";"
6243 "push %%"R"cx \n\t" /* placeholder for guest rcx */ 6236 "push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */
6244 "push %%"R"cx \n\t" 6237 "push %%" _ASM_CX " \n\t"
6245 "cmp %%"R"sp, %c[host_rsp](%0) \n\t" 6238 "cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
6246 "je 1f \n\t" 6239 "je 1f \n\t"
6247 "mov %%"R"sp, %c[host_rsp](%0) \n\t" 6240 "mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
6248 __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" 6241 __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
6249 "1: \n\t" 6242 "1: \n\t"
6250 /* Reload cr2 if changed */ 6243 /* Reload cr2 if changed */
6251 "mov %c[cr2](%0), %%"R"ax \n\t" 6244 "mov %c[cr2](%0), %%" _ASM_AX " \n\t"
6252 "mov %%cr2, %%"R"dx \n\t" 6245 "mov %%cr2, %%" _ASM_DX " \n\t"
6253 "cmp %%"R"ax, %%"R"dx \n\t" 6246 "cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t"
6254 "je 2f \n\t" 6247 "je 2f \n\t"
6255 "mov %%"R"ax, %%cr2 \n\t" 6248 "mov %%" _ASM_AX", %%cr2 \n\t"
6256 "2: \n\t" 6249 "2: \n\t"
6257 /* Check if vmlaunch of vmresume is needed */ 6250 /* Check if vmlaunch of vmresume is needed */
6258 "cmpl $0, %c[launched](%0) \n\t" 6251 "cmpl $0, %c[launched](%0) \n\t"
6259 /* Load guest registers. Don't clobber flags. */ 6252 /* Load guest registers. Don't clobber flags. */
6260 "mov %c[rax](%0), %%"R"ax \n\t" 6253 "mov %c[rax](%0), %%" _ASM_AX " \n\t"
6261 "mov %c[rbx](%0), %%"R"bx \n\t" 6254 "mov %c[rbx](%0), %%" _ASM_BX " \n\t"
6262 "mov %c[rdx](%0), %%"R"dx \n\t" 6255 "mov %c[rdx](%0), %%" _ASM_DX " \n\t"
6263 "mov %c[rsi](%0), %%"R"si \n\t" 6256 "mov %c[rsi](%0), %%" _ASM_SI " \n\t"
6264 "mov %c[rdi](%0), %%"R"di \n\t" 6257 "mov %c[rdi](%0), %%" _ASM_DI " \n\t"
6265 "mov %c[rbp](%0), %%"R"bp \n\t" 6258 "mov %c[rbp](%0), %%" _ASM_BP " \n\t"
6266#ifdef CONFIG_X86_64 6259#ifdef CONFIG_X86_64
6267 "mov %c[r8](%0), %%r8 \n\t" 6260 "mov %c[r8](%0), %%r8 \n\t"
6268 "mov %c[r9](%0), %%r9 \n\t" 6261 "mov %c[r9](%0), %%r9 \n\t"
@@ -6273,24 +6266,24 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
6273 "mov %c[r14](%0), %%r14 \n\t" 6266 "mov %c[r14](%0), %%r14 \n\t"
6274 "mov %c[r15](%0), %%r15 \n\t" 6267 "mov %c[r15](%0), %%r15 \n\t"
6275#endif 6268#endif
6276 "mov %c[rcx](%0), %%"R"cx \n\t" /* kills %0 (ecx) */ 6269 "mov %c[rcx](%0), %%" _ASM_CX " \n\t" /* kills %0 (ecx) */
6277 6270
6278 /* Enter guest mode */ 6271 /* Enter guest mode */
6279 "jne .Llaunched \n\t" 6272 "jne 1f \n\t"
6280 __ex(ASM_VMX_VMLAUNCH) "\n\t" 6273 __ex(ASM_VMX_VMLAUNCH) "\n\t"
6281 "jmp .Lkvm_vmx_return \n\t" 6274 "jmp 2f \n\t"
6282 ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" 6275 "1: " __ex(ASM_VMX_VMRESUME) "\n\t"
6283 ".Lkvm_vmx_return: " 6276 "2: "
6284 /* Save guest registers, load host registers, keep flags */ 6277 /* Save guest registers, load host registers, keep flags */
6285 "mov %0, %c[wordsize](%%"R"sp) \n\t" 6278 "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
6286 "pop %0 \n\t" 6279 "pop %0 \n\t"
6287 "mov %%"R"ax, %c[rax](%0) \n\t" 6280 "mov %%" _ASM_AX ", %c[rax](%0) \n\t"
6288 "mov %%"R"bx, %c[rbx](%0) \n\t" 6281 "mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
6289 "pop"Q" %c[rcx](%0) \n\t" 6282 __ASM_SIZE(pop) " %c[rcx](%0) \n\t"
6290 "mov %%"R"dx, %c[rdx](%0) \n\t" 6283 "mov %%" _ASM_DX ", %c[rdx](%0) \n\t"
6291 "mov %%"R"si, %c[rsi](%0) \n\t" 6284 "mov %%" _ASM_SI ", %c[rsi](%0) \n\t"
6292 "mov %%"R"di, %c[rdi](%0) \n\t" 6285 "mov %%" _ASM_DI ", %c[rdi](%0) \n\t"
6293 "mov %%"R"bp, %c[rbp](%0) \n\t" 6286 "mov %%" _ASM_BP ", %c[rbp](%0) \n\t"
6294#ifdef CONFIG_X86_64 6287#ifdef CONFIG_X86_64
6295 "mov %%r8, %c[r8](%0) \n\t" 6288 "mov %%r8, %c[r8](%0) \n\t"
6296 "mov %%r9, %c[r9](%0) \n\t" 6289 "mov %%r9, %c[r9](%0) \n\t"
@@ -6301,11 +6294,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
6301 "mov %%r14, %c[r14](%0) \n\t" 6294 "mov %%r14, %c[r14](%0) \n\t"
6302 "mov %%r15, %c[r15](%0) \n\t" 6295 "mov %%r15, %c[r15](%0) \n\t"
6303#endif 6296#endif
6304 "mov %%cr2, %%"R"ax \n\t" 6297 "mov %%cr2, %%" _ASM_AX " \n\t"
6305 "mov %%"R"ax, %c[cr2](%0) \n\t" 6298 "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
6306 6299
6307 "pop %%"R"bp; pop %%"R"dx \n\t" 6300 "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
6308 "setbe %c[fail](%0) \n\t" 6301 "setbe %c[fail](%0) \n\t"
6302 ".pushsection .rodata \n\t"
6303 ".global vmx_return \n\t"
6304 "vmx_return: " _ASM_PTR " 2b \n\t"
6305 ".popsection"
6309 : : "c"(vmx), "d"((unsigned long)HOST_RSP), 6306 : : "c"(vmx), "d"((unsigned long)HOST_RSP),
6310 [launched]"i"(offsetof(struct vcpu_vmx, __launched)), 6307 [launched]"i"(offsetof(struct vcpu_vmx, __launched)),
6311 [fail]"i"(offsetof(struct vcpu_vmx, fail)), 6308 [fail]"i"(offsetof(struct vcpu_vmx, fail)),
@@ -6330,9 +6327,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
6330 [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)), 6327 [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
6331 [wordsize]"i"(sizeof(ulong)) 6328 [wordsize]"i"(sizeof(ulong))
6332 : "cc", "memory" 6329 : "cc", "memory"
6333 , R"ax", R"bx", R"di", R"si"
6334#ifdef CONFIG_X86_64 6330#ifdef CONFIG_X86_64
6331 , "rax", "rbx", "rdi", "rsi"
6335 , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" 6332 , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
6333#else
6334 , "eax", "ebx", "edi", "esi"
6336#endif 6335#endif
6337 ); 6336 );
6338 6337
@@ -6384,9 +6383,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
6384 vmx_complete_interrupts(vmx); 6383 vmx_complete_interrupts(vmx);
6385} 6384}
6386 6385
6387#undef R
6388#undef Q
6389
6390static void vmx_free_vcpu(struct kvm_vcpu *vcpu) 6386static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
6391{ 6387{
6392 struct vcpu_vmx *vmx = to_vmx(vcpu); 6388 struct vcpu_vmx *vmx = to_vmx(vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c4d451ed1573..497226e49d4b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3672,20 +3672,17 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
3672 gpa_t *gpa, struct x86_exception *exception, 3672 gpa_t *gpa, struct x86_exception *exception,
3673 bool write) 3673 bool write)
3674{ 3674{
3675 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 3675 u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
3676 | (write ? PFERR_WRITE_MASK : 0);
3676 3677
3677 if (vcpu_match_mmio_gva(vcpu, gva) && 3678 if (vcpu_match_mmio_gva(vcpu, gva)
3678 check_write_user_access(vcpu, write, access, 3679 && !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) {
3679 vcpu->arch.access)) {
3680 *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT | 3680 *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
3681 (gva & (PAGE_SIZE - 1)); 3681 (gva & (PAGE_SIZE - 1));
3682 trace_vcpu_match_mmio(gva, *gpa, write, false); 3682 trace_vcpu_match_mmio(gva, *gpa, write, false);
3683 return 1; 3683 return 1;
3684 } 3684 }
3685 3685
3686 if (write)
3687 access |= PFERR_WRITE_MASK;
3688
3689 *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); 3686 *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3690 3687
3691 if (*gpa == UNMAPPED_GVA) 3688 if (*gpa == UNMAPPED_GVA)
@@ -6016,7 +6013,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
6016 int r; 6013 int r;
6017 6014
6018 vcpu->arch.mtrr_state.have_fixed = 1; 6015 vcpu->arch.mtrr_state.have_fixed = 1;
6019 vcpu_load(vcpu); 6016 r = vcpu_load(vcpu);
6017 if (r)
6018 return r;
6020 r = kvm_arch_vcpu_reset(vcpu); 6019 r = kvm_arch_vcpu_reset(vcpu);
6021 if (r == 0) 6020 if (r == 0)
6022 r = kvm_mmu_setup(vcpu); 6021 r = kvm_mmu_setup(vcpu);
@@ -6027,9 +6026,11 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
6027 6026
6028void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 6027void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
6029{ 6028{
6029 int r;
6030 vcpu->arch.apf.msr_val = 0; 6030 vcpu->arch.apf.msr_val = 0;
6031 6031
6032 vcpu_load(vcpu); 6032 r = vcpu_load(vcpu);
6033 BUG_ON(r);
6033 kvm_mmu_unload(vcpu); 6034 kvm_mmu_unload(vcpu);
6034 vcpu_put(vcpu); 6035 vcpu_put(vcpu);
6035 6036
@@ -6275,7 +6276,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
6275 6276
6276static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) 6277static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
6277{ 6278{
6278 vcpu_load(vcpu); 6279 int r;
6280 r = vcpu_load(vcpu);
6281 BUG_ON(r);
6279 kvm_mmu_unload(vcpu); 6282 kvm_mmu_unload(vcpu);
6280 vcpu_put(vcpu); 6283 vcpu_put(vcpu);
6281} 6284}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 40791930bc15..80bfc880921e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -408,7 +408,7 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
408int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id); 408int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
409void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); 409void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
410 410
411void vcpu_load(struct kvm_vcpu *vcpu); 411int __must_check vcpu_load(struct kvm_vcpu *vcpu);
412void vcpu_put(struct kvm_vcpu *vcpu); 412void vcpu_put(struct kvm_vcpu *vcpu);
413 413
414int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, 414int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4fe02d900810..cc3f6dc506e4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -131,11 +131,12 @@ bool kvm_is_mmio_pfn(pfn_t pfn)
131/* 131/*
132 * Switches to specified vcpu, until a matching vcpu_put() 132 * Switches to specified vcpu, until a matching vcpu_put()
133 */ 133 */
134void vcpu_load(struct kvm_vcpu *vcpu) 134int vcpu_load(struct kvm_vcpu *vcpu)
135{ 135{
136 int cpu; 136 int cpu;
137 137
138 mutex_lock(&vcpu->mutex); 138 if (mutex_lock_killable(&vcpu->mutex))
139 return -EINTR;
139 if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) { 140 if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
140 /* The thread running this VCPU changed. */ 141 /* The thread running this VCPU changed. */
141 struct pid *oldpid = vcpu->pid; 142 struct pid *oldpid = vcpu->pid;
@@ -148,6 +149,7 @@ void vcpu_load(struct kvm_vcpu *vcpu)
148 preempt_notifier_register(&vcpu->preempt_notifier); 149 preempt_notifier_register(&vcpu->preempt_notifier);
149 kvm_arch_vcpu_load(vcpu, cpu); 150 kvm_arch_vcpu_load(vcpu, cpu);
150 put_cpu(); 151 put_cpu();
152 return 0;
151} 153}
152 154
153void vcpu_put(struct kvm_vcpu *vcpu) 155void vcpu_put(struct kvm_vcpu *vcpu)
@@ -1891,7 +1893,9 @@ static long kvm_vcpu_ioctl(struct file *filp,
1891#endif 1893#endif
1892 1894
1893 1895
1894 vcpu_load(vcpu); 1896 r = vcpu_load(vcpu);
1897 if (r)
1898 return r;
1895 switch (ioctl) { 1899 switch (ioctl) {
1896 case KVM_RUN: 1900 case KVM_RUN:
1897 r = -EINVAL; 1901 r = -EINVAL;