aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/mmu.c')
-rw-r--r--arch/x86/kvm/mmu.c54
1 files changed, 37 insertions, 17 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7837ab001d80..eee455a8a612 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -182,7 +182,7 @@ struct kvm_shadow_walk_iterator {
182 182
183static const union kvm_mmu_page_role mmu_base_role_mask = { 183static const union kvm_mmu_page_role mmu_base_role_mask = {
184 .cr0_wp = 1, 184 .cr0_wp = 1,
185 .cr4_pae = 1, 185 .gpte_is_8_bytes = 1,
186 .nxe = 1, 186 .nxe = 1,
187 .smep_andnot_wp = 1, 187 .smep_andnot_wp = 1,
188 .smap_andnot_wp = 1, 188 .smap_andnot_wp = 1,
@@ -2205,6 +2205,7 @@ static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
2205static void kvm_mmu_commit_zap_page(struct kvm *kvm, 2205static void kvm_mmu_commit_zap_page(struct kvm *kvm,
2206 struct list_head *invalid_list); 2206 struct list_head *invalid_list);
2207 2207
2208
2208#define for_each_valid_sp(_kvm, _sp, _gfn) \ 2209#define for_each_valid_sp(_kvm, _sp, _gfn) \
2209 hlist_for_each_entry(_sp, \ 2210 hlist_for_each_entry(_sp, \
2210 &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ 2211 &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
@@ -2215,12 +2216,17 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
2215 for_each_valid_sp(_kvm, _sp, _gfn) \ 2216 for_each_valid_sp(_kvm, _sp, _gfn) \
2216 if ((_sp)->gfn != (_gfn) || (_sp)->role.direct) {} else 2217 if ((_sp)->gfn != (_gfn) || (_sp)->role.direct) {} else
2217 2218
2219static inline bool is_ept_sp(struct kvm_mmu_page *sp)
2220{
2221 return sp->role.cr0_wp && sp->role.smap_andnot_wp;
2222}
2223
2218/* @sp->gfn should be write-protected at the call site */ 2224/* @sp->gfn should be write-protected at the call site */
2219static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, 2225static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
2220 struct list_head *invalid_list) 2226 struct list_head *invalid_list)
2221{ 2227{
2222 if (sp->role.cr4_pae != !!is_pae(vcpu) 2228 if ((!is_ept_sp(sp) && sp->role.gpte_is_8_bytes != !!is_pae(vcpu)) ||
2223 || vcpu->arch.mmu->sync_page(vcpu, sp) == 0) { 2229 vcpu->arch.mmu->sync_page(vcpu, sp) == 0) {
2224 kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); 2230 kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
2225 return false; 2231 return false;
2226 } 2232 }
@@ -2423,7 +2429,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
2423 role.level = level; 2429 role.level = level;
2424 role.direct = direct; 2430 role.direct = direct;
2425 if (role.direct) 2431 if (role.direct)
2426 role.cr4_pae = 0; 2432 role.gpte_is_8_bytes = true;
2427 role.access = access; 2433 role.access = access;
2428 if (!vcpu->arch.mmu->direct_map 2434 if (!vcpu->arch.mmu->direct_map
2429 && vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) { 2435 && vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) {
@@ -4794,7 +4800,6 @@ static union kvm_mmu_role kvm_calc_mmu_role_common(struct kvm_vcpu *vcpu,
4794 4800
4795 role.base.access = ACC_ALL; 4801 role.base.access = ACC_ALL;
4796 role.base.nxe = !!is_nx(vcpu); 4802 role.base.nxe = !!is_nx(vcpu);
4797 role.base.cr4_pae = !!is_pae(vcpu);
4798 role.base.cr0_wp = is_write_protection(vcpu); 4803 role.base.cr0_wp = is_write_protection(vcpu);
4799 role.base.smm = is_smm(vcpu); 4804 role.base.smm = is_smm(vcpu);
4800 role.base.guest_mode = is_guest_mode(vcpu); 4805 role.base.guest_mode = is_guest_mode(vcpu);
@@ -4815,6 +4820,7 @@ kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
4815 role.base.ad_disabled = (shadow_accessed_mask == 0); 4820 role.base.ad_disabled = (shadow_accessed_mask == 0);
4816 role.base.level = kvm_x86_ops->get_tdp_level(vcpu); 4821 role.base.level = kvm_x86_ops->get_tdp_level(vcpu);
4817 role.base.direct = true; 4822 role.base.direct = true;
4823 role.base.gpte_is_8_bytes = true;
4818 4824
4819 return role; 4825 return role;
4820} 4826}
@@ -4879,6 +4885,7 @@ kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
4879 role.base.smap_andnot_wp = role.ext.cr4_smap && 4885 role.base.smap_andnot_wp = role.ext.cr4_smap &&
4880 !is_write_protection(vcpu); 4886 !is_write_protection(vcpu);
4881 role.base.direct = !is_paging(vcpu); 4887 role.base.direct = !is_paging(vcpu);
4888 role.base.gpte_is_8_bytes = !!is_pae(vcpu);
4882 4889
4883 if (!is_long_mode(vcpu)) 4890 if (!is_long_mode(vcpu))
4884 role.base.level = PT32E_ROOT_LEVEL; 4891 role.base.level = PT32E_ROOT_LEVEL;
@@ -4918,18 +4925,26 @@ static union kvm_mmu_role
4918kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty, 4925kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty,
4919 bool execonly) 4926 bool execonly)
4920{ 4927{
4921 union kvm_mmu_role role; 4928 union kvm_mmu_role role = {0};
4922 4929
4923 /* Base role is inherited from root_mmu */ 4930 /* SMM flag is inherited from root_mmu */
4924 role.base.word = vcpu->arch.root_mmu.mmu_role.base.word; 4931 role.base.smm = vcpu->arch.root_mmu.mmu_role.base.smm;
4925 role.ext = kvm_calc_mmu_role_ext(vcpu);
4926 4932
4927 role.base.level = PT64_ROOT_4LEVEL; 4933 role.base.level = PT64_ROOT_4LEVEL;
4934 role.base.gpte_is_8_bytes = true;
4928 role.base.direct = false; 4935 role.base.direct = false;
4929 role.base.ad_disabled = !accessed_dirty; 4936 role.base.ad_disabled = !accessed_dirty;
4930 role.base.guest_mode = true; 4937 role.base.guest_mode = true;
4931 role.base.access = ACC_ALL; 4938 role.base.access = ACC_ALL;
4932 4939
4940 /*
4941 * WP=1 and NOT_WP=1 is an impossible combination, use WP and the
4942 * SMAP variation to denote shadow EPT entries.
4943 */
4944 role.base.cr0_wp = true;
4945 role.base.smap_andnot_wp = true;
4946
4947 role.ext = kvm_calc_mmu_role_ext(vcpu);
4933 role.ext.execonly = execonly; 4948 role.ext.execonly = execonly;
4934 4949
4935 return role; 4950 return role;
@@ -5179,7 +5194,7 @@ static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa,
5179 gpa, bytes, sp->role.word); 5194 gpa, bytes, sp->role.word);
5180 5195
5181 offset = offset_in_page(gpa); 5196 offset = offset_in_page(gpa);
5182 pte_size = sp->role.cr4_pae ? 8 : 4; 5197 pte_size = sp->role.gpte_is_8_bytes ? 8 : 4;
5183 5198
5184 /* 5199 /*
5185 * Sometimes, the OS only writes the last one bytes to update status 5200 * Sometimes, the OS only writes the last one bytes to update status
@@ -5203,7 +5218,7 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte)
5203 page_offset = offset_in_page(gpa); 5218 page_offset = offset_in_page(gpa);
5204 level = sp->role.level; 5219 level = sp->role.level;
5205 *nspte = 1; 5220 *nspte = 1;
5206 if (!sp->role.cr4_pae) { 5221 if (!sp->role.gpte_is_8_bytes) {
5207 page_offset <<= 1; /* 32->64 */ 5222 page_offset <<= 1; /* 32->64 */
5208 /* 5223 /*
5209 * A 32-bit pde maps 4MB while the shadow pdes map 5224 * A 32-bit pde maps 4MB while the shadow pdes map
@@ -5393,10 +5408,12 @@ emulate:
5393 * This can happen if a guest gets a page-fault on data access but the HW 5408 * This can happen if a guest gets a page-fault on data access but the HW
5394 * table walker is not able to read the instruction page (e.g instruction 5409 * table walker is not able to read the instruction page (e.g instruction
5395 * page is not present in memory). In those cases we simply restart the 5410 * page is not present in memory). In those cases we simply restart the
5396 * guest. 5411 * guest, with the exception of AMD Erratum 1096 which is unrecoverable.
5397 */ 5412 */
5398 if (unlikely(insn && !insn_len)) 5413 if (unlikely(insn && !insn_len)) {
5399 return 1; 5414 if (!kvm_x86_ops->need_emulation_on_page_fault(vcpu))
5415 return 1;
5416 }
5400 5417
5401 er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len); 5418 er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len);
5402 5419
@@ -5509,7 +5526,9 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
5509 5526
5510 if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { 5527 if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
5511 if (flush && lock_flush_tlb) { 5528 if (flush && lock_flush_tlb) {
5512 kvm_flush_remote_tlbs(kvm); 5529 kvm_flush_remote_tlbs_with_address(kvm,
5530 start_gfn,
5531 iterator.gfn - start_gfn + 1);
5513 flush = false; 5532 flush = false;
5514 } 5533 }
5515 cond_resched_lock(&kvm->mmu_lock); 5534 cond_resched_lock(&kvm->mmu_lock);
@@ -5517,7 +5536,8 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
5517 } 5536 }
5518 5537
5519 if (flush && lock_flush_tlb) { 5538 if (flush && lock_flush_tlb) {
5520 kvm_flush_remote_tlbs(kvm); 5539 kvm_flush_remote_tlbs_with_address(kvm, start_gfn,
5540 end_gfn - start_gfn + 1);
5521 flush = false; 5541 flush = false;
5522 } 5542 }
5523 5543
@@ -6011,7 +6031,7 @@ out:
6011/* 6031/*
6012 * Calculate mmu pages needed for kvm. 6032 * Calculate mmu pages needed for kvm.
6013 */ 6033 */
6014unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) 6034unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
6015{ 6035{
6016 unsigned int nr_mmu_pages; 6036 unsigned int nr_mmu_pages;
6017 unsigned int nr_pages = 0; 6037 unsigned int nr_pages = 0;