aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarcelo Tosatti <mtosatti@redhat.com>2010-10-22 12:18:18 -0400
committerAvi Kivity <avi@redhat.com>2011-01-12 04:28:40 -0500
commit612819c3c6e67bac8fceaa7cc402f13b1b63f7e4 (patch)
tree3739b8420660fc4de8d37d26004d9992e92acbe3
parent7905d9a5ad7a83f1c1c00559839857ab90afbdfc (diff)
KVM: propagate fault r/w information to gup(), allow read-only memory
As suggested by Andrea, pass r/w error code to gup(), upgrading read fault to writable if host pte allows it. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Avi Kivity <avi@redhat.com>
-rw-r--r--arch/x86/kvm/mmu.c27
-rw-r--r--arch/x86/kvm/paging_tmpl.h13
-rw-r--r--include/linux/kvm_host.h5
-rw-r--r--virt/kvm/kvm_main.c51
4 files changed, 71 insertions, 25 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 99433943170c..53509f5973db 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2216,7 +2216,7 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
2216} 2216}
2217 2217
2218static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, 2218static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
2219 int level, gfn_t gfn, pfn_t pfn) 2219 int map_writable, int level, gfn_t gfn, pfn_t pfn)
2220{ 2220{
2221 struct kvm_shadow_walk_iterator iterator; 2221 struct kvm_shadow_walk_iterator iterator;
2222 struct kvm_mmu_page *sp; 2222 struct kvm_mmu_page *sp;
@@ -2225,9 +2225,13 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
2225 2225
2226 for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { 2226 for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
2227 if (iterator.level == level) { 2227 if (iterator.level == level) {
2228 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, 2228 unsigned pte_access = ACC_ALL;
2229
2230 if (!map_writable)
2231 pte_access &= ~ACC_WRITE_MASK;
2232 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access,
2229 0, write, 1, &pt_write, 2233 0, write, 1, &pt_write,
2230 level, gfn, pfn, false, true); 2234 level, gfn, pfn, false, map_writable);
2231 direct_pte_prefetch(vcpu, iterator.sptep); 2235 direct_pte_prefetch(vcpu, iterator.sptep);
2232 ++vcpu->stat.pf_fixed; 2236 ++vcpu->stat.pf_fixed;
2233 break; 2237 break;
@@ -2288,6 +2292,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
2288 int level; 2292 int level;
2289 pfn_t pfn; 2293 pfn_t pfn;
2290 unsigned long mmu_seq; 2294 unsigned long mmu_seq;
2295 bool map_writable;
2291 2296
2292 level = mapping_level(vcpu, gfn); 2297 level = mapping_level(vcpu, gfn);
2293 2298
@@ -2302,7 +2307,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
2302 2307
2303 mmu_seq = vcpu->kvm->mmu_notifier_seq; 2308 mmu_seq = vcpu->kvm->mmu_notifier_seq;
2304 smp_rmb(); 2309 smp_rmb();
2305 pfn = gfn_to_pfn(vcpu->kvm, gfn); 2310 pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write, &map_writable);
2306 2311
2307 /* mmio */ 2312 /* mmio */
2308 if (is_error_pfn(pfn)) 2313 if (is_error_pfn(pfn))
@@ -2312,7 +2317,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
2312 if (mmu_notifier_retry(vcpu, mmu_seq)) 2317 if (mmu_notifier_retry(vcpu, mmu_seq))
2313 goto out_unlock; 2318 goto out_unlock;
2314 kvm_mmu_free_some_pages(vcpu); 2319 kvm_mmu_free_some_pages(vcpu);
2315 r = __direct_map(vcpu, v, write, level, gfn, pfn); 2320 r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn);
2316 spin_unlock(&vcpu->kvm->mmu_lock); 2321 spin_unlock(&vcpu->kvm->mmu_lock);
2317 2322
2318 2323
@@ -2611,11 +2616,11 @@ static bool can_do_async_pf(struct kvm_vcpu *vcpu)
2611} 2616}
2612 2617
2613static bool try_async_pf(struct kvm_vcpu *vcpu, bool no_apf, gfn_t gfn, 2618static bool try_async_pf(struct kvm_vcpu *vcpu, bool no_apf, gfn_t gfn,
2614 gva_t gva, pfn_t *pfn) 2619 gva_t gva, pfn_t *pfn, bool write, bool *writable)
2615{ 2620{
2616 bool async; 2621 bool async;
2617 2622
2618 *pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async); 2623 *pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async, write, writable);
2619 2624
2620 if (!async) 2625 if (!async)
2621 return false; /* *pfn has correct page already */ 2626 return false; /* *pfn has correct page already */
@@ -2632,7 +2637,7 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool no_apf, gfn_t gfn,
2632 return true; 2637 return true;
2633 } 2638 }
2634 2639
2635 *pfn = gfn_to_pfn(vcpu->kvm, gfn); 2640 *pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write, writable);
2636 2641
2637 return false; 2642 return false;
2638} 2643}
@@ -2645,6 +2650,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
2645 int level; 2650 int level;
2646 gfn_t gfn = gpa >> PAGE_SHIFT; 2651 gfn_t gfn = gpa >> PAGE_SHIFT;
2647 unsigned long mmu_seq; 2652 unsigned long mmu_seq;
2653 int write = error_code & PFERR_WRITE_MASK;
2654 bool map_writable;
2648 2655
2649 ASSERT(vcpu); 2656 ASSERT(vcpu);
2650 ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); 2657 ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
@@ -2660,7 +2667,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
2660 mmu_seq = vcpu->kvm->mmu_notifier_seq; 2667 mmu_seq = vcpu->kvm->mmu_notifier_seq;
2661 smp_rmb(); 2668 smp_rmb();
2662 2669
2663 if (try_async_pf(vcpu, no_apf, gfn, gpa, &pfn)) 2670 if (try_async_pf(vcpu, no_apf, gfn, gpa, &pfn, write, &map_writable))
2664 return 0; 2671 return 0;
2665 2672
2666 /* mmio */ 2673 /* mmio */
@@ -2670,7 +2677,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
2670 if (mmu_notifier_retry(vcpu, mmu_seq)) 2677 if (mmu_notifier_retry(vcpu, mmu_seq))
2671 goto out_unlock; 2678 goto out_unlock;
2672 kvm_mmu_free_some_pages(vcpu); 2679 kvm_mmu_free_some_pages(vcpu);
2673 r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, 2680 r = __direct_map(vcpu, gpa, write, map_writable,
2674 level, gfn, pfn); 2681 level, gfn, pfn);
2675 spin_unlock(&vcpu->kvm->mmu_lock); 2682 spin_unlock(&vcpu->kvm->mmu_lock);
2676 2683
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index d6b281e989b1..ba00eefa7bcd 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -427,7 +427,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
427static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, 427static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
428 struct guest_walker *gw, 428 struct guest_walker *gw,
429 int user_fault, int write_fault, int hlevel, 429 int user_fault, int write_fault, int hlevel,
430 int *ptwrite, pfn_t pfn) 430 int *ptwrite, pfn_t pfn, bool map_writable)
431{ 431{
432 unsigned access = gw->pt_access; 432 unsigned access = gw->pt_access;
433 struct kvm_mmu_page *sp = NULL; 433 struct kvm_mmu_page *sp = NULL;
@@ -501,7 +501,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
501 501
502 mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access, 502 mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access,
503 user_fault, write_fault, dirty, ptwrite, it.level, 503 user_fault, write_fault, dirty, ptwrite, it.level,
504 gw->gfn, pfn, false, true); 504 gw->gfn, pfn, false, map_writable);
505 FNAME(pte_prefetch)(vcpu, gw, it.sptep); 505 FNAME(pte_prefetch)(vcpu, gw, it.sptep);
506 506
507 return it.sptep; 507 return it.sptep;
@@ -539,6 +539,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
539 pfn_t pfn; 539 pfn_t pfn;
540 int level = PT_PAGE_TABLE_LEVEL; 540 int level = PT_PAGE_TABLE_LEVEL;
541 unsigned long mmu_seq; 541 unsigned long mmu_seq;
542 bool map_writable;
542 543
543 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); 544 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
544 545
@@ -569,13 +570,17 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
569 mmu_seq = vcpu->kvm->mmu_notifier_seq; 570 mmu_seq = vcpu->kvm->mmu_notifier_seq;
570 smp_rmb(); 571 smp_rmb();
571 572
572 if (try_async_pf(vcpu, no_apf, walker.gfn, addr, &pfn)) 573 if (try_async_pf(vcpu, no_apf, walker.gfn, addr, &pfn, write_fault,
574 &map_writable))
573 return 0; 575 return 0;
574 576
575 /* mmio */ 577 /* mmio */
576 if (is_error_pfn(pfn)) 578 if (is_error_pfn(pfn))
577 return kvm_handle_bad_page(vcpu->kvm, walker.gfn, pfn); 579 return kvm_handle_bad_page(vcpu->kvm, walker.gfn, pfn);
578 580
581 if (!map_writable)
582 walker.pte_access &= ~ACC_WRITE_MASK;
583
579 spin_lock(&vcpu->kvm->mmu_lock); 584 spin_lock(&vcpu->kvm->mmu_lock);
580 if (mmu_notifier_retry(vcpu, mmu_seq)) 585 if (mmu_notifier_retry(vcpu, mmu_seq))
581 goto out_unlock; 586 goto out_unlock;
@@ -583,7 +588,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
583 trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); 588 trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
584 kvm_mmu_free_some_pages(vcpu); 589 kvm_mmu_free_some_pages(vcpu);
585 sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, 590 sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
586 level, &write_pt, pfn); 591 level, &write_pt, pfn, map_writable);
587 (void)sptep; 592 (void)sptep;
588 pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, 593 pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
589 sptep, *sptep, write_pt); 594 sptep, *sptep, write_pt);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ee4314e15ead..462b982fedfb 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -334,8 +334,11 @@ void kvm_set_page_accessed(struct page *page);
334 334
335pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr); 335pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr);
336pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn); 336pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
337pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async); 337pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
338 bool write_fault, bool *writable);
338pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); 339pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
340pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
341 bool *writable);
339pfn_t gfn_to_pfn_memslot(struct kvm *kvm, 342pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
340 struct kvm_memory_slot *slot, gfn_t gfn); 343 struct kvm_memory_slot *slot, gfn_t gfn);
341int memslot_id(struct kvm *kvm, gfn_t gfn); 344int memslot_id(struct kvm *kvm, gfn_t gfn);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 475a100f3a22..2803b4db2a38 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -959,7 +959,7 @@ static pfn_t get_fault_pfn(void)
959} 959}
960 960
961static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic, 961static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
962 bool *async) 962 bool *async, bool write_fault, bool *writable)
963{ 963{
964 struct page *page[1]; 964 struct page *page[1];
965 int npages = 0; 965 int npages = 0;
@@ -968,12 +968,34 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
968 /* we can do it either atomically or asynchronously, not both */ 968 /* we can do it either atomically or asynchronously, not both */
969 BUG_ON(atomic && async); 969 BUG_ON(atomic && async);
970 970
971 BUG_ON(!write_fault && !writable);
972
973 if (writable)
974 *writable = true;
975
971 if (atomic || async) 976 if (atomic || async)
972 npages = __get_user_pages_fast(addr, 1, 1, page); 977 npages = __get_user_pages_fast(addr, 1, 1, page);
973 978
974 if (unlikely(npages != 1) && !atomic) { 979 if (unlikely(npages != 1) && !atomic) {
975 might_sleep(); 980 might_sleep();
976 npages = get_user_pages_fast(addr, 1, 1, page); 981
982 if (writable)
983 *writable = write_fault;
984
985 npages = get_user_pages_fast(addr, 1, write_fault, page);
986
987 /* map read fault as writable if possible */
988 if (unlikely(!write_fault) && npages == 1) {
989 struct page *wpage[1];
990
991 npages = __get_user_pages_fast(addr, 1, 1, wpage);
992 if (npages == 1) {
993 *writable = true;
994 put_page(page[0]);
995 page[0] = wpage[0];
996 }
997 npages = 1;
998 }
977 } 999 }
978 1000
979 if (unlikely(npages != 1)) { 1001 if (unlikely(npages != 1)) {
@@ -1011,11 +1033,12 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
1011 1033
1012pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr) 1034pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
1013{ 1035{
1014 return hva_to_pfn(kvm, addr, true, NULL); 1036 return hva_to_pfn(kvm, addr, true, NULL, true, NULL);
1015} 1037}
1016EXPORT_SYMBOL_GPL(hva_to_pfn_atomic); 1038EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
1017 1039
1018static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async) 1040static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
1041 bool write_fault, bool *writable)
1019{ 1042{
1020 unsigned long addr; 1043 unsigned long addr;
1021 1044
@@ -1028,32 +1051,40 @@ static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async)
1028 return page_to_pfn(bad_page); 1051 return page_to_pfn(bad_page);
1029 } 1052 }
1030 1053
1031 return hva_to_pfn(kvm, addr, atomic, async); 1054 return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable);
1032} 1055}
1033 1056
1034pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) 1057pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
1035{ 1058{
1036 return __gfn_to_pfn(kvm, gfn, true, NULL); 1059 return __gfn_to_pfn(kvm, gfn, true, NULL, true, NULL);
1037} 1060}
1038EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic); 1061EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);
1039 1062
1040pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async) 1063pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
1064 bool write_fault, bool *writable)
1041{ 1065{
1042 return __gfn_to_pfn(kvm, gfn, false, async); 1066 return __gfn_to_pfn(kvm, gfn, false, async, write_fault, writable);
1043} 1067}
1044EXPORT_SYMBOL_GPL(gfn_to_pfn_async); 1068EXPORT_SYMBOL_GPL(gfn_to_pfn_async);
1045 1069
1046pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) 1070pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
1047{ 1071{
1048 return __gfn_to_pfn(kvm, gfn, false, NULL); 1072 return __gfn_to_pfn(kvm, gfn, false, NULL, true, NULL);
1049} 1073}
1050EXPORT_SYMBOL_GPL(gfn_to_pfn); 1074EXPORT_SYMBOL_GPL(gfn_to_pfn);
1051 1075
1076pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
1077 bool *writable)
1078{
1079 return __gfn_to_pfn(kvm, gfn, false, NULL, write_fault, writable);
1080}
1081EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
1082
1052pfn_t gfn_to_pfn_memslot(struct kvm *kvm, 1083pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
1053 struct kvm_memory_slot *slot, gfn_t gfn) 1084 struct kvm_memory_slot *slot, gfn_t gfn)
1054{ 1085{
1055 unsigned long addr = gfn_to_hva_memslot(slot, gfn); 1086 unsigned long addr = gfn_to_hva_memslot(slot, gfn);
1056 return hva_to_pfn(kvm, addr, false, NULL); 1087 return hva_to_pfn(kvm, addr, false, NULL, true, NULL);
1057} 1088}
1058 1089
1059int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, 1090int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,