summaryrefslogtreecommitdiffstats
path: root/virt
diff options
context:
space:
mode:
authorSuzuki K Poulose <suzuki.poulose@arm.com>2019-03-20 10:57:19 -0400
committerMarc Zyngier <marc.zyngier@arm.com>2019-03-20 13:29:55 -0400
commit3c3736cd32bf5197aed1410ae826d2d254a5b277 (patch)
tree721732d76a77b19dc5e3bd104931caad39dd3b4d /virt
parenta80868f398554842b14d07060012c06efb57c456 (diff)
KVM: arm/arm64: Fix handling of stage2 huge mappings
We rely on the mmu_notifier call backs to handle the split/merge of huge pages and thus we are guaranteed that, while creating a block mapping, either the entire block is unmapped at stage2 or it is missing permission. However, we miss a case where the block mapping is split for dirty logging case and then could later be made block mapping, if we cancel the dirty logging. This not only creates inconsistent TLB entries for the pages in the the block, but also leakes the table pages for PMD level. Handle this corner case for the huge mappings at stage2 by unmapping the non-huge mapping for the block. This could potentially release the upper level table. So we need to restart the table walk once we unmap the range. Fixes : ad361f093c1e31d ("KVM: ARM: Support hugetlbfs backed huge pages") Reported-by: Zheng Xiang <zhengxiang9@huawei.com> Cc: Zheng Xiang <zhengxiang9@huawei.com> Cc: Zenghui Yu <yuzenghui@huawei.com> Cc: Christoffer Dall <christoffer.dall@arm.com> Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/arm/mmu.c59
1 files changed, 43 insertions, 16 deletions
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index bcdf978c0d1d..f9da2fad9bd6 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1067,25 +1067,43 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
1067{ 1067{
1068 pmd_t *pmd, old_pmd; 1068 pmd_t *pmd, old_pmd;
1069 1069
1070retry:
1070 pmd = stage2_get_pmd(kvm, cache, addr); 1071 pmd = stage2_get_pmd(kvm, cache, addr);
1071 VM_BUG_ON(!pmd); 1072 VM_BUG_ON(!pmd);
1072 1073
1073 old_pmd = *pmd; 1074 old_pmd = *pmd;
1075 /*
1076 * Multiple vcpus faulting on the same PMD entry, can
1077 * lead to them sequentially updating the PMD with the
1078 * same value. Following the break-before-make
1079 * (pmd_clear() followed by tlb_flush()) process can
1080 * hinder forward progress due to refaults generated
1081 * on missing translations.
1082 *
1083 * Skip updating the page table if the entry is
1084 * unchanged.
1085 */
1086 if (pmd_val(old_pmd) == pmd_val(*new_pmd))
1087 return 0;
1088
1074 if (pmd_present(old_pmd)) { 1089 if (pmd_present(old_pmd)) {
1075 /* 1090 /*
1076 * Multiple vcpus faulting on the same PMD entry, can 1091 * If we already have PTE level mapping for this block,
1077 * lead to them sequentially updating the PMD with the 1092 * we must unmap it to avoid inconsistent TLB state and
1078 * same value. Following the break-before-make 1093 * leaking the table page. We could end up in this situation
1079 * (pmd_clear() followed by tlb_flush()) process can 1094 * if the memory slot was marked for dirty logging and was
1080 * hinder forward progress due to refaults generated 1095 * reverted, leaving PTE level mappings for the pages accessed
1081 * on missing translations. 1096 * during the period. So, unmap the PTE level mapping for this
1097 * block and retry, as we could have released the upper level
1098 * table in the process.
1082 * 1099 *
1083 * Skip updating the page table if the entry is 1100 * Normal THP split/merge follows mmu_notifier callbacks and do
1084 * unchanged. 1101 * get handled accordingly.
1085 */ 1102 */
1086 if (pmd_val(old_pmd) == pmd_val(*new_pmd)) 1103 if (!pmd_thp_or_huge(old_pmd)) {
1087 return 0; 1104 unmap_stage2_range(kvm, addr & S2_PMD_MASK, S2_PMD_SIZE);
1088 1105 goto retry;
1106 }
1089 /* 1107 /*
1090 * Mapping in huge pages should only happen through a 1108 * Mapping in huge pages should only happen through a
1091 * fault. If a page is merged into a transparent huge 1109 * fault. If a page is merged into a transparent huge
@@ -1097,8 +1115,7 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
1097 * should become splitting first, unmapped, merged, 1115 * should become splitting first, unmapped, merged,
1098 * and mapped back in on-demand. 1116 * and mapped back in on-demand.
1099 */ 1117 */
1100 VM_BUG_ON(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd)); 1118 WARN_ON_ONCE(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd));
1101
1102 pmd_clear(pmd); 1119 pmd_clear(pmd);
1103 kvm_tlb_flush_vmid_ipa(kvm, addr); 1120 kvm_tlb_flush_vmid_ipa(kvm, addr);
1104 } else { 1121 } else {
@@ -1114,6 +1131,7 @@ static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cac
1114{ 1131{
1115 pud_t *pudp, old_pud; 1132 pud_t *pudp, old_pud;
1116 1133
1134retry:
1117 pudp = stage2_get_pud(kvm, cache, addr); 1135 pudp = stage2_get_pud(kvm, cache, addr);
1118 VM_BUG_ON(!pudp); 1136 VM_BUG_ON(!pudp);
1119 1137
@@ -1121,14 +1139,23 @@ static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cac
1121 1139
1122 /* 1140 /*
1123 * A large number of vcpus faulting on the same stage 2 entry, 1141 * A large number of vcpus faulting on the same stage 2 entry,
1124 * can lead to a refault due to the 1142 * can lead to a refault due to the stage2_pud_clear()/tlb_flush().
1125 * stage2_pud_clear()/tlb_flush(). Skip updating the page 1143 * Skip updating the page tables if there is no change.
1126 * tables if there is no change.
1127 */ 1144 */
1128 if (pud_val(old_pud) == pud_val(*new_pudp)) 1145 if (pud_val(old_pud) == pud_val(*new_pudp))
1129 return 0; 1146 return 0;
1130 1147
1131 if (stage2_pud_present(kvm, old_pud)) { 1148 if (stage2_pud_present(kvm, old_pud)) {
1149 /*
1150 * If we already have table level mapping for this block, unmap
1151 * the range for this block and retry.
1152 */
1153 if (!stage2_pud_huge(kvm, old_pud)) {
1154 unmap_stage2_range(kvm, addr & S2_PUD_MASK, S2_PUD_SIZE);
1155 goto retry;
1156 }
1157
1158 WARN_ON_ONCE(kvm_pud_pfn(old_pud) != kvm_pud_pfn(*new_pudp));
1132 stage2_pud_clear(kvm, pudp); 1159 stage2_pud_clear(kvm, pudp);
1133 kvm_tlb_flush_vmid_ipa(kvm, addr); 1160 kvm_tlb_flush_vmid_ipa(kvm, addr);
1134 } else { 1161 } else {