aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/paging_tmpl.h
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/paging_tmpl.h')
-rw-r--r--arch/x86/kvm/paging_tmpl.h199
1 files changed, 83 insertions, 116 deletions
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index bb7cf01cae76..714e2c01a6fe 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -63,10 +63,12 @@
63 */ 63 */
64struct guest_walker { 64struct guest_walker {
65 int level; 65 int level;
66 unsigned max_level;
66 gfn_t table_gfn[PT_MAX_FULL_LEVELS]; 67 gfn_t table_gfn[PT_MAX_FULL_LEVELS];
67 pt_element_t ptes[PT_MAX_FULL_LEVELS]; 68 pt_element_t ptes[PT_MAX_FULL_LEVELS];
68 pt_element_t prefetch_ptes[PTE_PREFETCH_NUM]; 69 pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
69 gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; 70 gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
71 pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
70 unsigned pt_access; 72 unsigned pt_access;
71 unsigned pte_access; 73 unsigned pte_access;
72 gfn_t gfn; 74 gfn_t gfn;
@@ -101,38 +103,41 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
101 return (ret != orig_pte); 103 return (ret != orig_pte);
102} 104}
103 105
104static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte, 106static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
105 bool last) 107 struct kvm_mmu *mmu,
108 struct guest_walker *walker,
109 int write_fault)
106{ 110{
107 unsigned access; 111 unsigned level, index;
108 112 pt_element_t pte, orig_pte;
109 access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; 113 pt_element_t __user *ptep_user;
110 if (last && !is_dirty_gpte(gpte)) 114 gfn_t table_gfn;
111 access &= ~ACC_WRITE_MASK; 115 int ret;
112 116
113#if PTTYPE == 64 117 for (level = walker->max_level; level >= walker->level; --level) {
114 if (vcpu->arch.mmu.nx) 118 pte = orig_pte = walker->ptes[level - 1];
115 access &= ~(gpte >> PT64_NX_SHIFT); 119 table_gfn = walker->table_gfn[level - 1];
116#endif 120 ptep_user = walker->ptep_user[level - 1];
117 return access; 121 index = offset_in_page(ptep_user) / sizeof(pt_element_t);
118} 122 if (!(pte & PT_ACCESSED_MASK)) {
119 123 trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte));
120static bool FNAME(is_last_gpte)(struct guest_walker *walker, 124 pte |= PT_ACCESSED_MASK;
121 struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, 125 }
122 pt_element_t gpte) 126 if (level == walker->level && write_fault && !is_dirty_gpte(pte)) {
123{ 127 trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
124 if (walker->level == PT_PAGE_TABLE_LEVEL) 128 pte |= PT_DIRTY_MASK;
125 return true; 129 }
126 130 if (pte == orig_pte)
127 if ((walker->level == PT_DIRECTORY_LEVEL) && is_large_pte(gpte) && 131 continue;
128 (PTTYPE == 64 || is_pse(vcpu)))
129 return true;
130 132
131 if ((walker->level == PT_PDPE_LEVEL) && is_large_pte(gpte) && 133 ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte);
132 (mmu->root_level == PT64_ROOT_LEVEL)) 134 if (ret)
133 return true; 135 return ret;
134 136
135 return false; 137 mark_page_dirty(vcpu->kvm, table_gfn);
138 walker->ptes[level] = pte;
139 }
140 return 0;
136} 141}
137 142
138/* 143/*
@@ -142,21 +147,22 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
142 struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, 147 struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
143 gva_t addr, u32 access) 148 gva_t addr, u32 access)
144{ 149{
150 int ret;
145 pt_element_t pte; 151 pt_element_t pte;
146 pt_element_t __user *uninitialized_var(ptep_user); 152 pt_element_t __user *uninitialized_var(ptep_user);
147 gfn_t table_gfn; 153 gfn_t table_gfn;
148 unsigned index, pt_access, uninitialized_var(pte_access); 154 unsigned index, pt_access, pte_access, accessed_dirty, shift;
149 gpa_t pte_gpa; 155 gpa_t pte_gpa;
150 bool eperm, last_gpte;
151 int offset; 156 int offset;
152 const int write_fault = access & PFERR_WRITE_MASK; 157 const int write_fault = access & PFERR_WRITE_MASK;
153 const int user_fault = access & PFERR_USER_MASK; 158 const int user_fault = access & PFERR_USER_MASK;
154 const int fetch_fault = access & PFERR_FETCH_MASK; 159 const int fetch_fault = access & PFERR_FETCH_MASK;
155 u16 errcode = 0; 160 u16 errcode = 0;
161 gpa_t real_gpa;
162 gfn_t gfn;
156 163
157 trace_kvm_mmu_pagetable_walk(addr, access); 164 trace_kvm_mmu_pagetable_walk(addr, access);
158retry_walk: 165retry_walk:
159 eperm = false;
160 walker->level = mmu->root_level; 166 walker->level = mmu->root_level;
161 pte = mmu->get_cr3(vcpu); 167 pte = mmu->get_cr3(vcpu);
162 168
@@ -169,15 +175,21 @@ retry_walk:
169 --walker->level; 175 --walker->level;
170 } 176 }
171#endif 177#endif
178 walker->max_level = walker->level;
172 ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || 179 ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
173 (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); 180 (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
174 181
175 pt_access = ACC_ALL; 182 accessed_dirty = PT_ACCESSED_MASK;
183 pt_access = pte_access = ACC_ALL;
184 ++walker->level;
176 185
177 for (;;) { 186 do {
178 gfn_t real_gfn; 187 gfn_t real_gfn;
179 unsigned long host_addr; 188 unsigned long host_addr;
180 189
190 pt_access &= pte_access;
191 --walker->level;
192
181 index = PT_INDEX(addr, walker->level); 193 index = PT_INDEX(addr, walker->level);
182 194
183 table_gfn = gpte_to_gfn(pte); 195 table_gfn = gpte_to_gfn(pte);
@@ -199,6 +211,7 @@ retry_walk:
199 ptep_user = (pt_element_t __user *)((void *)host_addr + offset); 211 ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
200 if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) 212 if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte))))
201 goto error; 213 goto error;
214 walker->ptep_user[walker->level - 1] = ptep_user;
202 215
203 trace_kvm_mmu_paging_element(pte, walker->level); 216 trace_kvm_mmu_paging_element(pte, walker->level);
204 217
@@ -211,92 +224,48 @@ retry_walk:
211 goto error; 224 goto error;
212 } 225 }
213 226
214 if (!check_write_user_access(vcpu, write_fault, user_fault, 227 accessed_dirty &= pte;
215 pte)) 228 pte_access = pt_access & gpte_access(vcpu, pte);
216 eperm = true;
217
218#if PTTYPE == 64
219 if (unlikely(fetch_fault && (pte & PT64_NX_MASK)))
220 eperm = true;
221#endif
222
223 last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte);
224 if (last_gpte) {
225 pte_access = pt_access &
226 FNAME(gpte_access)(vcpu, pte, true);
227 /* check if the kernel is fetching from user page */
228 if (unlikely(pte_access & PT_USER_MASK) &&
229 kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
230 if (fetch_fault && !user_fault)
231 eperm = true;
232 }
233
234 if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) {
235 int ret;
236 trace_kvm_mmu_set_accessed_bit(table_gfn, index,
237 sizeof(pte));
238 ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
239 pte, pte|PT_ACCESSED_MASK);
240 if (unlikely(ret < 0))
241 goto error;
242 else if (ret)
243 goto retry_walk;
244
245 mark_page_dirty(vcpu->kvm, table_gfn);
246 pte |= PT_ACCESSED_MASK;
247 }
248 229
249 walker->ptes[walker->level - 1] = pte; 230 walker->ptes[walker->level - 1] = pte;
231 } while (!is_last_gpte(mmu, walker->level, pte));
250 232
251 if (last_gpte) { 233 if (unlikely(permission_fault(mmu, pte_access, access))) {
252 int lvl = walker->level; 234 errcode |= PFERR_PRESENT_MASK;
253 gpa_t real_gpa; 235 goto error;
254 gfn_t gfn; 236 }
255 u32 ac;
256
257 gfn = gpte_to_gfn_lvl(pte, lvl);
258 gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT;
259
260 if (PTTYPE == 32 &&
261 walker->level == PT_DIRECTORY_LEVEL &&
262 is_cpuid_PSE36())
263 gfn += pse36_gfn_delta(pte);
264
265 ac = write_fault | fetch_fault | user_fault;
266 237
267 real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), 238 gfn = gpte_to_gfn_lvl(pte, walker->level);
268 ac); 239 gfn += (addr & PT_LVL_OFFSET_MASK(walker->level)) >> PAGE_SHIFT;
269 if (real_gpa == UNMAPPED_GVA)
270 return 0;
271 240
272 walker->gfn = real_gpa >> PAGE_SHIFT; 241 if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36())
242 gfn += pse36_gfn_delta(pte);
273 243
274 break; 244 real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access);
275 } 245 if (real_gpa == UNMAPPED_GVA)
246 return 0;
276 247
277 pt_access &= FNAME(gpte_access)(vcpu, pte, false); 248 walker->gfn = real_gpa >> PAGE_SHIFT;
278 --walker->level;
279 }
280 249
281 if (unlikely(eperm)) { 250 if (!write_fault)
282 errcode |= PFERR_PRESENT_MASK; 251 protect_clean_gpte(&pte_access, pte);
283 goto error;
284 }
285 252
286 if (write_fault && unlikely(!is_dirty_gpte(pte))) { 253 /*
287 int ret; 254 * On a write fault, fold the dirty bit into accessed_dirty by shifting it one
255 * place right.
256 *
257 * On a read fault, do nothing.
258 */
259 shift = write_fault >> ilog2(PFERR_WRITE_MASK);
260 shift *= PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT;
261 accessed_dirty &= pte >> shift;
288 262
289 trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); 263 if (unlikely(!accessed_dirty)) {
290 ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, 264 ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault);
291 pte, pte|PT_DIRTY_MASK);
292 if (unlikely(ret < 0)) 265 if (unlikely(ret < 0))
293 goto error; 266 goto error;
294 else if (ret) 267 else if (ret)
295 goto retry_walk; 268 goto retry_walk;
296
297 mark_page_dirty(vcpu->kvm, table_gfn);
298 pte |= PT_DIRTY_MASK;
299 walker->ptes[walker->level - 1] = pte;
300 } 269 }
301 270
302 walker->pt_access = pt_access; 271 walker->pt_access = pt_access;
@@ -368,12 +337,11 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
368 return; 337 return;
369 338
370 pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); 339 pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
371 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, true); 340 pte_access = sp->role.access & gpte_access(vcpu, gpte);
341 protect_clean_gpte(&pte_access, gpte);
372 pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte)); 342 pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte));
373 if (mmu_invalid_pfn(pfn)) { 343 if (mmu_invalid_pfn(pfn))
374 kvm_release_pfn_clean(pfn);
375 return; 344 return;
376 }
377 345
378 /* 346 /*
379 * we call mmu_set_spte() with host_writable = true because that 347 * we call mmu_set_spte() with host_writable = true because that
@@ -443,15 +411,13 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
443 if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) 411 if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
444 continue; 412 continue;
445 413
446 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, 414 pte_access = sp->role.access & gpte_access(vcpu, gpte);
447 true); 415 protect_clean_gpte(&pte_access, gpte);
448 gfn = gpte_to_gfn(gpte); 416 gfn = gpte_to_gfn(gpte);
449 pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, 417 pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
450 pte_access & ACC_WRITE_MASK); 418 pte_access & ACC_WRITE_MASK);
451 if (mmu_invalid_pfn(pfn)) { 419 if (mmu_invalid_pfn(pfn))
452 kvm_release_pfn_clean(pfn);
453 break; 420 break;
454 }
455 421
456 mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, 422 mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
457 NULL, PT_PAGE_TABLE_LEVEL, gfn, 423 NULL, PT_PAGE_TABLE_LEVEL, gfn,
@@ -798,7 +764,8 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
798 764
799 gfn = gpte_to_gfn(gpte); 765 gfn = gpte_to_gfn(gpte);
800 pte_access = sp->role.access; 766 pte_access = sp->role.access;
801 pte_access &= FNAME(gpte_access)(vcpu, gpte, true); 767 pte_access &= gpte_access(vcpu, gpte);
768 protect_clean_gpte(&pte_access, gpte);
802 769
803 if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present)) 770 if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present))
804 continue; 771 continue;