aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/kvm/paging_tmpl.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/kvm/paging_tmpl.h')
-rw-r--r--drivers/kvm/paging_tmpl.h273
1 files changed, 142 insertions, 131 deletions
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index 73ffbffb1097..a7c5cb0319ea 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -31,7 +31,6 @@
31 #define PT_INDEX(addr, level) PT64_INDEX(addr, level) 31 #define PT_INDEX(addr, level) PT64_INDEX(addr, level)
32 #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) 32 #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
33 #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level) 33 #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
34 #define PT_PTE_COPY_MASK PT64_PTE_COPY_MASK
35 #ifdef CONFIG_X86_64 34 #ifdef CONFIG_X86_64
36 #define PT_MAX_FULL_LEVELS 4 35 #define PT_MAX_FULL_LEVELS 4
37 #else 36 #else
@@ -46,7 +45,6 @@
46 #define PT_INDEX(addr, level) PT32_INDEX(addr, level) 45 #define PT_INDEX(addr, level) PT32_INDEX(addr, level)
47 #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) 46 #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
48 #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level) 47 #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
49 #define PT_PTE_COPY_MASK PT32_PTE_COPY_MASK
50 #define PT_MAX_FULL_LEVELS 2 48 #define PT_MAX_FULL_LEVELS 2
51#else 49#else
52 #error Invalid PTTYPE value 50 #error Invalid PTTYPE value
@@ -192,40 +190,143 @@ static void FNAME(mark_pagetable_dirty)(struct kvm *kvm,
192 mark_page_dirty(kvm, walker->table_gfn[walker->level - 1]); 190 mark_page_dirty(kvm, walker->table_gfn[walker->level - 1]);
193} 191}
194 192
195static void FNAME(set_pte)(struct kvm_vcpu *vcpu, u64 guest_pte, 193static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu,
196 u64 *shadow_pte, u64 access_bits, gfn_t gfn) 194 u64 *shadow_pte,
195 gpa_t gaddr,
196 pt_element_t *gpte,
197 u64 access_bits,
198 int user_fault,
199 int write_fault,
200 int *ptwrite,
201 struct guest_walker *walker,
202 gfn_t gfn)
197{ 203{
198 ASSERT(*shadow_pte == 0); 204 hpa_t paddr;
199 access_bits &= guest_pte; 205 int dirty = *gpte & PT_DIRTY_MASK;
200 *shadow_pte = (guest_pte & PT_PTE_COPY_MASK); 206 u64 spte = *shadow_pte;
201 set_pte_common(vcpu, shadow_pte, guest_pte & PT_BASE_ADDR_MASK, 207 int was_rmapped = is_rmap_pte(spte);
202 guest_pte & PT_DIRTY_MASK, access_bits, gfn); 208
209 pgprintk("%s: spte %llx gpte %llx access %llx write_fault %d"
210 " user_fault %d gfn %lx\n",
211 __FUNCTION__, spte, (u64)*gpte, access_bits,
212 write_fault, user_fault, gfn);
213
214 if (write_fault && !dirty) {
215 *gpte |= PT_DIRTY_MASK;
216 dirty = 1;
217 FNAME(mark_pagetable_dirty)(vcpu->kvm, walker);
218 }
219
220 spte |= PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK;
221 spte |= *gpte & PT64_NX_MASK;
222 if (!dirty)
223 access_bits &= ~PT_WRITABLE_MASK;
224
225 paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK);
226
227 spte |= PT_PRESENT_MASK;
228 if (access_bits & PT_USER_MASK)
229 spte |= PT_USER_MASK;
230
231 if (is_error_hpa(paddr)) {
232 spte |= gaddr;
233 spte |= PT_SHADOW_IO_MARK;
234 spte &= ~PT_PRESENT_MASK;
235 set_shadow_pte(shadow_pte, spte);
236 return;
237 }
238
239 spte |= paddr;
240
241 if ((access_bits & PT_WRITABLE_MASK)
242 || (write_fault && !is_write_protection(vcpu) && !user_fault)) {
243 struct kvm_mmu_page *shadow;
244
245 spte |= PT_WRITABLE_MASK;
246 if (user_fault) {
247 mmu_unshadow(vcpu, gfn);
248 goto unshadowed;
249 }
250
251 shadow = kvm_mmu_lookup_page(vcpu, gfn);
252 if (shadow) {
253 pgprintk("%s: found shadow page for %lx, marking ro\n",
254 __FUNCTION__, gfn);
255 access_bits &= ~PT_WRITABLE_MASK;
256 if (is_writeble_pte(spte)) {
257 spte &= ~PT_WRITABLE_MASK;
258 kvm_arch_ops->tlb_flush(vcpu);
259 }
260 if (write_fault)
261 *ptwrite = 1;
262 }
263 }
264
265unshadowed:
266
267 if (access_bits & PT_WRITABLE_MASK)
268 mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT);
269
270 set_shadow_pte(shadow_pte, spte);
271 page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
272 if (!was_rmapped)
273 rmap_add(vcpu, shadow_pte);
203} 274}
204 275
205static void FNAME(set_pde)(struct kvm_vcpu *vcpu, u64 guest_pde, 276static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t *gpte,
206 u64 *shadow_pte, u64 access_bits, gfn_t gfn) 277 u64 *shadow_pte, u64 access_bits,
278 int user_fault, int write_fault, int *ptwrite,
279 struct guest_walker *walker, gfn_t gfn)
280{
281 access_bits &= *gpte;
282 FNAME(set_pte_common)(vcpu, shadow_pte, *gpte & PT_BASE_ADDR_MASK,
283 gpte, access_bits, user_fault, write_fault,
284 ptwrite, walker, gfn);
285}
286
287static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
288 u64 *spte, const void *pte, int bytes)
289{
290 pt_element_t gpte;
291
292 if (bytes < sizeof(pt_element_t))
293 return;
294 gpte = *(const pt_element_t *)pte;
295 if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK))
296 return;
297 pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte);
298 FNAME(set_pte)(vcpu, &gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, 0,
299 0, NULL, NULL,
300 (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT);
301}
302
303static void FNAME(set_pde)(struct kvm_vcpu *vcpu, pt_element_t *gpde,
304 u64 *shadow_pte, u64 access_bits,
305 int user_fault, int write_fault, int *ptwrite,
306 struct guest_walker *walker, gfn_t gfn)
207{ 307{
208 gpa_t gaddr; 308 gpa_t gaddr;
209 309
210 ASSERT(*shadow_pte == 0); 310 access_bits &= *gpde;
211 access_bits &= guest_pde;
212 gaddr = (gpa_t)gfn << PAGE_SHIFT; 311 gaddr = (gpa_t)gfn << PAGE_SHIFT;
213 if (PTTYPE == 32 && is_cpuid_PSE36()) 312 if (PTTYPE == 32 && is_cpuid_PSE36())
214 gaddr |= (guest_pde & PT32_DIR_PSE36_MASK) << 313 gaddr |= (*gpde & PT32_DIR_PSE36_MASK) <<
215 (32 - PT32_DIR_PSE36_SHIFT); 314 (32 - PT32_DIR_PSE36_SHIFT);
216 *shadow_pte = guest_pde & PT_PTE_COPY_MASK; 315 FNAME(set_pte_common)(vcpu, shadow_pte, gaddr,
217 set_pte_common(vcpu, shadow_pte, gaddr, 316 gpde, access_bits, user_fault, write_fault,
218 guest_pde & PT_DIRTY_MASK, access_bits, gfn); 317 ptwrite, walker, gfn);
219} 318}
220 319
221/* 320/*
222 * Fetch a shadow pte for a specific level in the paging hierarchy. 321 * Fetch a shadow pte for a specific level in the paging hierarchy.
223 */ 322 */
224static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, 323static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
225 struct guest_walker *walker) 324 struct guest_walker *walker,
325 int user_fault, int write_fault, int *ptwrite)
226{ 326{
227 hpa_t shadow_addr; 327 hpa_t shadow_addr;
228 int level; 328 int level;
329 u64 *shadow_ent;
229 u64 *prev_shadow_ent = NULL; 330 u64 *prev_shadow_ent = NULL;
230 pt_element_t *guest_ent = walker->ptep; 331 pt_element_t *guest_ent = walker->ptep;
231 332
@@ -242,37 +343,23 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
242 343
243 for (; ; level--) { 344 for (; ; level--) {
244 u32 index = SHADOW_PT_INDEX(addr, level); 345 u32 index = SHADOW_PT_INDEX(addr, level);
245 u64 *shadow_ent = ((u64 *)__va(shadow_addr)) + index;
246 struct kvm_mmu_page *shadow_page; 346 struct kvm_mmu_page *shadow_page;
247 u64 shadow_pte; 347 u64 shadow_pte;
248 int metaphysical; 348 int metaphysical;
249 gfn_t table_gfn; 349 gfn_t table_gfn;
250 unsigned hugepage_access = 0; 350 unsigned hugepage_access = 0;
251 351
352 shadow_ent = ((u64 *)__va(shadow_addr)) + index;
252 if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) { 353 if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) {
253 if (level == PT_PAGE_TABLE_LEVEL) 354 if (level == PT_PAGE_TABLE_LEVEL)
254 return shadow_ent; 355 break;
255 shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK; 356 shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK;
256 prev_shadow_ent = shadow_ent; 357 prev_shadow_ent = shadow_ent;
257 continue; 358 continue;
258 } 359 }
259 360
260 if (level == PT_PAGE_TABLE_LEVEL) { 361 if (level == PT_PAGE_TABLE_LEVEL)
261 362 break;
262 if (walker->level == PT_DIRECTORY_LEVEL) {
263 if (prev_shadow_ent)
264 *prev_shadow_ent |= PT_SHADOW_PS_MARK;
265 FNAME(set_pde)(vcpu, *guest_ent, shadow_ent,
266 walker->inherited_ar,
267 walker->gfn);
268 } else {
269 ASSERT(walker->level == PT_PAGE_TABLE_LEVEL);
270 FNAME(set_pte)(vcpu, *guest_ent, shadow_ent,
271 walker->inherited_ar,
272 walker->gfn);
273 }
274 return shadow_ent;
275 }
276 363
277 if (level - 1 == PT_PAGE_TABLE_LEVEL 364 if (level - 1 == PT_PAGE_TABLE_LEVEL
278 && walker->level == PT_DIRECTORY_LEVEL) { 365 && walker->level == PT_DIRECTORY_LEVEL) {
@@ -289,90 +376,24 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
289 shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, 376 shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
290 metaphysical, hugepage_access, 377 metaphysical, hugepage_access,
291 shadow_ent); 378 shadow_ent);
292 shadow_addr = shadow_page->page_hpa; 379 shadow_addr = __pa(shadow_page->spt);
293 shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK 380 shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK
294 | PT_WRITABLE_MASK | PT_USER_MASK; 381 | PT_WRITABLE_MASK | PT_USER_MASK;
295 *shadow_ent = shadow_pte; 382 *shadow_ent = shadow_pte;
296 prev_shadow_ent = shadow_ent; 383 prev_shadow_ent = shadow_ent;
297 } 384 }
298}
299 385
300/* 386 if (walker->level == PT_DIRECTORY_LEVEL) {
301 * The guest faulted for write. We need to 387 FNAME(set_pde)(vcpu, guest_ent, shadow_ent,
302 * 388 walker->inherited_ar, user_fault, write_fault,
303 * - check write permissions 389 ptwrite, walker, walker->gfn);
304 * - update the guest pte dirty bit 390 } else {
305 * - update our own dirty page tracking structures 391 ASSERT(walker->level == PT_PAGE_TABLE_LEVEL);
306 */ 392 FNAME(set_pte)(vcpu, guest_ent, shadow_ent,
307static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu, 393 walker->inherited_ar, user_fault, write_fault,
308 u64 *shadow_ent, 394 ptwrite, walker, walker->gfn);
309 struct guest_walker *walker,
310 gva_t addr,
311 int user,
312 int *write_pt)
313{
314 pt_element_t *guest_ent;
315 int writable_shadow;
316 gfn_t gfn;
317 struct kvm_mmu_page *page;
318
319 if (is_writeble_pte(*shadow_ent))
320 return !user || (*shadow_ent & PT_USER_MASK);
321
322 writable_shadow = *shadow_ent & PT_SHADOW_WRITABLE_MASK;
323 if (user) {
324 /*
325 * User mode access. Fail if it's a kernel page or a read-only
326 * page.
327 */
328 if (!(*shadow_ent & PT_SHADOW_USER_MASK) || !writable_shadow)
329 return 0;
330 ASSERT(*shadow_ent & PT_USER_MASK);
331 } else
332 /*
333 * Kernel mode access. Fail if it's a read-only page and
334 * supervisor write protection is enabled.
335 */
336 if (!writable_shadow) {
337 if (is_write_protection(vcpu))
338 return 0;
339 *shadow_ent &= ~PT_USER_MASK;
340 }
341
342 guest_ent = walker->ptep;
343
344 if (!is_present_pte(*guest_ent)) {
345 *shadow_ent = 0;
346 return 0;
347 } 395 }
348 396 return shadow_ent;
349 gfn = walker->gfn;
350
351 if (user) {
352 /*
353 * Usermode page faults won't be for page table updates.
354 */
355 while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) {
356 pgprintk("%s: zap %lx %x\n",
357 __FUNCTION__, gfn, page->role.word);
358 kvm_mmu_zap_page(vcpu, page);
359 }
360 } else if (kvm_mmu_lookup_page(vcpu, gfn)) {
361 pgprintk("%s: found shadow page for %lx, marking ro\n",
362 __FUNCTION__, gfn);
363 mark_page_dirty(vcpu->kvm, gfn);
364 FNAME(mark_pagetable_dirty)(vcpu->kvm, walker);
365 *guest_ent |= PT_DIRTY_MASK;
366 *write_pt = 1;
367 return 0;
368 }
369 mark_page_dirty(vcpu->kvm, gfn);
370 *shadow_ent |= PT_WRITABLE_MASK;
371 FNAME(mark_pagetable_dirty)(vcpu->kvm, walker);
372 *guest_ent |= PT_DIRTY_MASK;
373 rmap_add(vcpu, shadow_ent);
374
375 return 1;
376} 397}
377 398
378/* 399/*
@@ -397,7 +418,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
397 int fetch_fault = error_code & PFERR_FETCH_MASK; 418 int fetch_fault = error_code & PFERR_FETCH_MASK;
398 struct guest_walker walker; 419 struct guest_walker walker;
399 u64 *shadow_pte; 420 u64 *shadow_pte;
400 int fixed;
401 int write_pt = 0; 421 int write_pt = 0;
402 int r; 422 int r;
403 423
@@ -421,27 +441,20 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
421 pgprintk("%s: guest page fault\n", __FUNCTION__); 441 pgprintk("%s: guest page fault\n", __FUNCTION__);
422 inject_page_fault(vcpu, addr, walker.error_code); 442 inject_page_fault(vcpu, addr, walker.error_code);
423 FNAME(release_walker)(&walker); 443 FNAME(release_walker)(&walker);
444 vcpu->last_pt_write_count = 0; /* reset fork detector */
424 return 0; 445 return 0;
425 } 446 }
426 447
427 shadow_pte = FNAME(fetch)(vcpu, addr, &walker); 448 shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
428 pgprintk("%s: shadow pte %p %llx\n", __FUNCTION__, 449 &write_pt);
429 shadow_pte, *shadow_pte); 450 pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__,
430 451 shadow_pte, *shadow_pte, write_pt);
431 /*
432 * Update the shadow pte.
433 */
434 if (write_fault)
435 fixed = FNAME(fix_write_pf)(vcpu, shadow_pte, &walker, addr,
436 user_fault, &write_pt);
437 else
438 fixed = fix_read_pf(shadow_pte);
439
440 pgprintk("%s: updated shadow pte %p %llx\n", __FUNCTION__,
441 shadow_pte, *shadow_pte);
442 452
443 FNAME(release_walker)(&walker); 453 FNAME(release_walker)(&walker);
444 454
455 if (!write_pt)
456 vcpu->last_pt_write_count = 0; /* reset fork detector */
457
445 /* 458 /*
446 * mmio: emulate if accessible, otherwise its a guest fault. 459 * mmio: emulate if accessible, otherwise its a guest fault.
447 */ 460 */
@@ -478,7 +491,5 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
478#undef PT_INDEX 491#undef PT_INDEX
479#undef SHADOW_PT_INDEX 492#undef SHADOW_PT_INDEX
480#undef PT_LEVEL_MASK 493#undef PT_LEVEL_MASK
481#undef PT_PTE_COPY_MASK
482#undef PT_NON_PTE_COPY_MASK
483#undef PT_DIR_BASE_ADDR_MASK 494#undef PT_DIR_BASE_ADDR_MASK
484#undef PT_MAX_FULL_LEVELS 495#undef PT_MAX_FULL_LEVELS