diff options
| author | Paolo Bonzini <pbonzini@redhat.com> | 2017-05-11 07:23:29 -0400 |
|---|---|---|
| committer | Radim Krčmář <rkrcmar@redhat.com> | 2017-05-15 12:22:40 -0400 |
| commit | 0780516a18f87e881e42ed815f189279b0a1743c (patch) | |
| tree | 229ce5f517835cdc192664ea8a14cb4b6a779336 | |
| parent | fce6ac4c0508b985d497e3d9c8eff28ec8a43182 (diff) | |
KVM: nVMX: fix EPT permissions as reported in exit qualification
This fixes the new ept_access_test_read_only and ept_access_test_read_write
testcases from vmx.flat.
The problem is that gpte_access moves bits around to switch from EPT
bit order (XWR) to ACC_*_MASK bit order (RWX). This results in an
incorrect exit qualification. To fix this, make pt_access and
pte_access operate on raw PTE values (only with NX flipped to mean
"can execute") and call gpte_access at the end of the walk. This
lets us use pte_access to compute the exit qualification with XWR
bit order.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Xiao Guangrong <xiaoguangrong@tencent.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
| -rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 35 |
1 files changed, 21 insertions, 14 deletions
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 56241746abbd..b0454c7e4cff 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
| @@ -283,11 +283,13 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, | |||
| 283 | pt_element_t pte; | 283 | pt_element_t pte; |
| 284 | pt_element_t __user *uninitialized_var(ptep_user); | 284 | pt_element_t __user *uninitialized_var(ptep_user); |
| 285 | gfn_t table_gfn; | 285 | gfn_t table_gfn; |
| 286 | unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey; | 286 | u64 pt_access, pte_access; |
| 287 | unsigned index, accessed_dirty, pte_pkey; | ||
| 287 | unsigned nested_access; | 288 | unsigned nested_access; |
| 288 | gpa_t pte_gpa; | 289 | gpa_t pte_gpa; |
| 289 | bool have_ad; | 290 | bool have_ad; |
| 290 | int offset; | 291 | int offset; |
| 292 | u64 walk_nx_mask = 0; | ||
| 291 | const int write_fault = access & PFERR_WRITE_MASK; | 293 | const int write_fault = access & PFERR_WRITE_MASK; |
| 292 | const int user_fault = access & PFERR_USER_MASK; | 294 | const int user_fault = access & PFERR_USER_MASK; |
| 293 | const int fetch_fault = access & PFERR_FETCH_MASK; | 295 | const int fetch_fault = access & PFERR_FETCH_MASK; |
| @@ -302,6 +304,7 @@ retry_walk: | |||
| 302 | have_ad = PT_HAVE_ACCESSED_DIRTY(mmu); | 304 | have_ad = PT_HAVE_ACCESSED_DIRTY(mmu); |
| 303 | 305 | ||
| 304 | #if PTTYPE == 64 | 306 | #if PTTYPE == 64 |
| 307 | walk_nx_mask = 1ULL << PT64_NX_SHIFT; | ||
| 305 | if (walker->level == PT32E_ROOT_LEVEL) { | 308 | if (walker->level == PT32E_ROOT_LEVEL) { |
| 306 | pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3); | 309 | pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3); |
| 307 | trace_kvm_mmu_paging_element(pte, walker->level); | 310 | trace_kvm_mmu_paging_element(pte, walker->level); |
| @@ -313,8 +316,6 @@ retry_walk: | |||
| 313 | walker->max_level = walker->level; | 316 | walker->max_level = walker->level; |
| 314 | ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu))); | 317 | ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu))); |
| 315 | 318 | ||
| 316 | accessed_dirty = have_ad ? PT_GUEST_ACCESSED_MASK : 0; | ||
| 317 | |||
| 318 | /* | 319 | /* |
| 319 | * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging | 320 | * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging |
| 320 | * by the MOV to CR instruction are treated as reads and do not cause the | 321 | * by the MOV to CR instruction are treated as reads and do not cause the |
| @@ -322,14 +323,14 @@ retry_walk: | |||
| 322 | */ | 323 | */ |
| 323 | nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK; | 324 | nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK; |
| 324 | 325 | ||
| 325 | pt_access = pte_access = ACC_ALL; | 326 | pte_access = ~0; |
| 326 | ++walker->level; | 327 | ++walker->level; |
| 327 | 328 | ||
| 328 | do { | 329 | do { |
| 329 | gfn_t real_gfn; | 330 | gfn_t real_gfn; |
| 330 | unsigned long host_addr; | 331 | unsigned long host_addr; |
| 331 | 332 | ||
| 332 | pt_access &= pte_access; | 333 | pt_access = pte_access; |
| 333 | --walker->level; | 334 | --walker->level; |
| 334 | 335 | ||
| 335 | index = PT_INDEX(addr, walker->level); | 336 | index = PT_INDEX(addr, walker->level); |
| @@ -371,6 +372,12 @@ retry_walk: | |||
| 371 | 372 | ||
| 372 | trace_kvm_mmu_paging_element(pte, walker->level); | 373 | trace_kvm_mmu_paging_element(pte, walker->level); |
| 373 | 374 | ||
| 375 | /* | ||
| 376 | * Inverting the NX it lets us AND it like other | ||
| 377 | * permission bits. | ||
| 378 | */ | ||
| 379 | pte_access = pt_access & (pte ^ walk_nx_mask); | ||
| 380 | |||
| 374 | if (unlikely(!FNAME(is_present_gpte)(pte))) | 381 | if (unlikely(!FNAME(is_present_gpte)(pte))) |
| 375 | goto error; | 382 | goto error; |
| 376 | 383 | ||
| @@ -379,14 +386,16 @@ retry_walk: | |||
| 379 | goto error; | 386 | goto error; |
| 380 | } | 387 | } |
| 381 | 388 | ||
| 382 | accessed_dirty &= pte; | ||
| 383 | pte_access = pt_access & FNAME(gpte_access)(vcpu, pte); | ||
| 384 | |||
| 385 | walker->ptes[walker->level - 1] = pte; | 389 | walker->ptes[walker->level - 1] = pte; |
| 386 | } while (!is_last_gpte(mmu, walker->level, pte)); | 390 | } while (!is_last_gpte(mmu, walker->level, pte)); |
| 387 | 391 | ||
| 388 | pte_pkey = FNAME(gpte_pkeys)(vcpu, pte); | 392 | pte_pkey = FNAME(gpte_pkeys)(vcpu, pte); |
| 389 | errcode = permission_fault(vcpu, mmu, pte_access, pte_pkey, access); | 393 | accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0; |
| 394 | |||
| 395 | /* Convert to ACC_*_MASK flags for struct guest_walker. */ | ||
| 396 | walker->pt_access = FNAME(gpte_access)(vcpu, pt_access ^ walk_nx_mask); | ||
| 397 | walker->pte_access = FNAME(gpte_access)(vcpu, pte_access ^ walk_nx_mask); | ||
| 398 | errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access); | ||
| 390 | if (unlikely(errcode)) | 399 | if (unlikely(errcode)) |
| 391 | goto error; | 400 | goto error; |
| 392 | 401 | ||
| @@ -403,7 +412,7 @@ retry_walk: | |||
| 403 | walker->gfn = real_gpa >> PAGE_SHIFT; | 412 | walker->gfn = real_gpa >> PAGE_SHIFT; |
| 404 | 413 | ||
| 405 | if (!write_fault) | 414 | if (!write_fault) |
| 406 | FNAME(protect_clean_gpte)(mmu, &pte_access, pte); | 415 | FNAME(protect_clean_gpte)(mmu, &walker->pte_access, pte); |
| 407 | else | 416 | else |
| 408 | /* | 417 | /* |
| 409 | * On a write fault, fold the dirty bit into accessed_dirty. | 418 | * On a write fault, fold the dirty bit into accessed_dirty. |
| @@ -421,10 +430,8 @@ retry_walk: | |||
| 421 | goto retry_walk; | 430 | goto retry_walk; |
| 422 | } | 431 | } |
| 423 | 432 | ||
| 424 | walker->pt_access = pt_access; | ||
| 425 | walker->pte_access = pte_access; | ||
| 426 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", | 433 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", |
| 427 | __func__, (u64)pte, pte_access, pt_access); | 434 | __func__, (u64)pte, walker->pte_access, walker->pt_access); |
| 428 | return 1; | 435 | return 1; |
| 429 | 436 | ||
| 430 | error: | 437 | error: |
| @@ -452,7 +459,7 @@ error: | |||
| 452 | */ | 459 | */ |
| 453 | if (!(errcode & PFERR_RSVD_MASK)) { | 460 | if (!(errcode & PFERR_RSVD_MASK)) { |
| 454 | vcpu->arch.exit_qualification &= 0x187; | 461 | vcpu->arch.exit_qualification &= 0x187; |
| 455 | vcpu->arch.exit_qualification |= ((pt_access & pte) & 0x7) << 3; | 462 | vcpu->arch.exit_qualification |= (pte_access & 0x7) << 3; |
| 456 | } | 463 | } |
| 457 | #endif | 464 | #endif |
| 458 | walker->fault.address = addr; | 465 | walker->fault.address = addr; |
