diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2017-05-11 07:23:29 -0400 |
---|---|---|
committer | Radim Krčmář <rkrcmar@redhat.com> | 2017-05-15 12:22:40 -0400 |
commit | 0780516a18f87e881e42ed815f189279b0a1743c (patch) | |
tree | 229ce5f517835cdc192664ea8a14cb4b6a779336 | |
parent | fce6ac4c0508b985d497e3d9c8eff28ec8a43182 (diff) |
KVM: nVMX: fix EPT permissions as reported in exit qualification
This fixes the new ept_access_test_read_only and ept_access_test_read_write
testcases from vmx.flat.
The problem is that gpte_access moves bits around to switch from EPT
bit order (XWR) to ACC_*_MASK bit order (RWX). This results in an
incorrect exit qualification. To fix this, make pt_access and
pte_access operate on raw PTE values (only with NX flipped to mean
"can execute") and call gpte_access at the end of the walk. This
lets us use pte_access to compute the exit qualification with XWR
bit order.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Xiao Guangrong <xiaoguangrong@tencent.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 35 |
1 files changed, 21 insertions, 14 deletions
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 56241746abbd..b0454c7e4cff 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -283,11 +283,13 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, | |||
283 | pt_element_t pte; | 283 | pt_element_t pte; |
284 | pt_element_t __user *uninitialized_var(ptep_user); | 284 | pt_element_t __user *uninitialized_var(ptep_user); |
285 | gfn_t table_gfn; | 285 | gfn_t table_gfn; |
286 | unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey; | 286 | u64 pt_access, pte_access; |
287 | unsigned index, accessed_dirty, pte_pkey; | ||
287 | unsigned nested_access; | 288 | unsigned nested_access; |
288 | gpa_t pte_gpa; | 289 | gpa_t pte_gpa; |
289 | bool have_ad; | 290 | bool have_ad; |
290 | int offset; | 291 | int offset; |
292 | u64 walk_nx_mask = 0; | ||
291 | const int write_fault = access & PFERR_WRITE_MASK; | 293 | const int write_fault = access & PFERR_WRITE_MASK; |
292 | const int user_fault = access & PFERR_USER_MASK; | 294 | const int user_fault = access & PFERR_USER_MASK; |
293 | const int fetch_fault = access & PFERR_FETCH_MASK; | 295 | const int fetch_fault = access & PFERR_FETCH_MASK; |
@@ -302,6 +304,7 @@ retry_walk: | |||
302 | have_ad = PT_HAVE_ACCESSED_DIRTY(mmu); | 304 | have_ad = PT_HAVE_ACCESSED_DIRTY(mmu); |
303 | 305 | ||
304 | #if PTTYPE == 64 | 306 | #if PTTYPE == 64 |
307 | walk_nx_mask = 1ULL << PT64_NX_SHIFT; | ||
305 | if (walker->level == PT32E_ROOT_LEVEL) { | 308 | if (walker->level == PT32E_ROOT_LEVEL) { |
306 | pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3); | 309 | pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3); |
307 | trace_kvm_mmu_paging_element(pte, walker->level); | 310 | trace_kvm_mmu_paging_element(pte, walker->level); |
@@ -313,8 +316,6 @@ retry_walk: | |||
313 | walker->max_level = walker->level; | 316 | walker->max_level = walker->level; |
314 | ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu))); | 317 | ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu))); |
315 | 318 | ||
316 | accessed_dirty = have_ad ? PT_GUEST_ACCESSED_MASK : 0; | ||
317 | |||
318 | /* | 319 | /* |
319 | * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging | 320 | * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging |
320 | * by the MOV to CR instruction are treated as reads and do not cause the | 321 | * by the MOV to CR instruction are treated as reads and do not cause the |
@@ -322,14 +323,14 @@ retry_walk: | |||
322 | */ | 323 | */ |
323 | nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK; | 324 | nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK; |
324 | 325 | ||
325 | pt_access = pte_access = ACC_ALL; | 326 | pte_access = ~0; |
326 | ++walker->level; | 327 | ++walker->level; |
327 | 328 | ||
328 | do { | 329 | do { |
329 | gfn_t real_gfn; | 330 | gfn_t real_gfn; |
330 | unsigned long host_addr; | 331 | unsigned long host_addr; |
331 | 332 | ||
332 | pt_access &= pte_access; | 333 | pt_access = pte_access; |
333 | --walker->level; | 334 | --walker->level; |
334 | 335 | ||
335 | index = PT_INDEX(addr, walker->level); | 336 | index = PT_INDEX(addr, walker->level); |
@@ -371,6 +372,12 @@ retry_walk: | |||
371 | 372 | ||
372 | trace_kvm_mmu_paging_element(pte, walker->level); | 373 | trace_kvm_mmu_paging_element(pte, walker->level); |
373 | 374 | ||
375 | /* | ||
376 | * Inverting the NX it lets us AND it like other | ||
377 | * permission bits. | ||
378 | */ | ||
379 | pte_access = pt_access & (pte ^ walk_nx_mask); | ||
380 | |||
374 | if (unlikely(!FNAME(is_present_gpte)(pte))) | 381 | if (unlikely(!FNAME(is_present_gpte)(pte))) |
375 | goto error; | 382 | goto error; |
376 | 383 | ||
@@ -379,14 +386,16 @@ retry_walk: | |||
379 | goto error; | 386 | goto error; |
380 | } | 387 | } |
381 | 388 | ||
382 | accessed_dirty &= pte; | ||
383 | pte_access = pt_access & FNAME(gpte_access)(vcpu, pte); | ||
384 | |||
385 | walker->ptes[walker->level - 1] = pte; | 389 | walker->ptes[walker->level - 1] = pte; |
386 | } while (!is_last_gpte(mmu, walker->level, pte)); | 390 | } while (!is_last_gpte(mmu, walker->level, pte)); |
387 | 391 | ||
388 | pte_pkey = FNAME(gpte_pkeys)(vcpu, pte); | 392 | pte_pkey = FNAME(gpte_pkeys)(vcpu, pte); |
389 | errcode = permission_fault(vcpu, mmu, pte_access, pte_pkey, access); | 393 | accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0; |
394 | |||
395 | /* Convert to ACC_*_MASK flags for struct guest_walker. */ | ||
396 | walker->pt_access = FNAME(gpte_access)(vcpu, pt_access ^ walk_nx_mask); | ||
397 | walker->pte_access = FNAME(gpte_access)(vcpu, pte_access ^ walk_nx_mask); | ||
398 | errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access); | ||
390 | if (unlikely(errcode)) | 399 | if (unlikely(errcode)) |
391 | goto error; | 400 | goto error; |
392 | 401 | ||
@@ -403,7 +412,7 @@ retry_walk: | |||
403 | walker->gfn = real_gpa >> PAGE_SHIFT; | 412 | walker->gfn = real_gpa >> PAGE_SHIFT; |
404 | 413 | ||
405 | if (!write_fault) | 414 | if (!write_fault) |
406 | FNAME(protect_clean_gpte)(mmu, &pte_access, pte); | 415 | FNAME(protect_clean_gpte)(mmu, &walker->pte_access, pte); |
407 | else | 416 | else |
408 | /* | 417 | /* |
409 | * On a write fault, fold the dirty bit into accessed_dirty. | 418 | * On a write fault, fold the dirty bit into accessed_dirty. |
@@ -421,10 +430,8 @@ retry_walk: | |||
421 | goto retry_walk; | 430 | goto retry_walk; |
422 | } | 431 | } |
423 | 432 | ||
424 | walker->pt_access = pt_access; | ||
425 | walker->pte_access = pte_access; | ||
426 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", | 433 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", |
427 | __func__, (u64)pte, pte_access, pt_access); | 434 | __func__, (u64)pte, walker->pte_access, walker->pt_access); |
428 | return 1; | 435 | return 1; |
429 | 436 | ||
430 | error: | 437 | error: |
@@ -452,7 +459,7 @@ error: | |||
452 | */ | 459 | */ |
453 | if (!(errcode & PFERR_RSVD_MASK)) { | 460 | if (!(errcode & PFERR_RSVD_MASK)) { |
454 | vcpu->arch.exit_qualification &= 0x187; | 461 | vcpu->arch.exit_qualification &= 0x187; |
455 | vcpu->arch.exit_qualification |= ((pt_access & pte) & 0x7) << 3; | 462 | vcpu->arch.exit_qualification |= (pte_access & 0x7) << 3; |
456 | } | 463 | } |
457 | #endif | 464 | #endif |
458 | walker->fault.address = addr; | 465 | walker->fault.address = addr; |