diff options
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_hv.c | 47 |
1 files changed, 37 insertions, 10 deletions
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 96c90447d4bf..80561074078d 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
@@ -1060,6 +1060,11 @@ void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte) | |||
1060 | kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); | 1060 | kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); |
1061 | } | 1061 | } |
1062 | 1062 | ||
1063 | static int vcpus_running(struct kvm *kvm) | ||
1064 | { | ||
1065 | return atomic_read(&kvm->arch.vcpus_running) != 0; | ||
1066 | } | ||
1067 | |||
1063 | /* | 1068 | /* |
1064 | * Returns the number of system pages that are dirty. | 1069 | * Returns the number of system pages that are dirty. |
1065 | * This can be more than 1 if we find a huge-page HPTE. | 1070 | * This can be more than 1 if we find a huge-page HPTE. |
@@ -1069,6 +1074,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) | |||
1069 | struct revmap_entry *rev = kvm->arch.revmap; | 1074 | struct revmap_entry *rev = kvm->arch.revmap; |
1070 | unsigned long head, i, j; | 1075 | unsigned long head, i, j; |
1071 | unsigned long n; | 1076 | unsigned long n; |
1077 | unsigned long v, r; | ||
1072 | unsigned long *hptep; | 1078 | unsigned long *hptep; |
1073 | int npages_dirty = 0; | 1079 | int npages_dirty = 0; |
1074 | 1080 | ||
@@ -1088,7 +1094,22 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) | |||
1088 | hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); | 1094 | hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); |
1089 | j = rev[i].forw; | 1095 | j = rev[i].forw; |
1090 | 1096 | ||
1091 | if (!(hptep[1] & HPTE_R_C)) | 1097 | /* |
1098 | * Checking the C (changed) bit here is racy since there | ||
1099 | * is no guarantee about when the hardware writes it back. | ||
1100 | * If the HPTE is not writable then it is stable since the | ||
1101 | * page can't be written to, and we would have done a tlbie | ||
1102 | * (which forces the hardware to complete any writeback) | ||
1103 | * when making the HPTE read-only. | ||
1104 | * If vcpus are running then this call is racy anyway | ||
1105 | * since the page could get dirtied subsequently, so we | ||
1106 | * expect there to be a further call which would pick up | ||
1107 | * any delayed C bit writeback. | ||
1108 | * Otherwise we need to do the tlbie even if C==0 in | ||
1109 | * order to pick up any delayed writeback of C. | ||
1110 | */ | ||
1111 | if (!(hptep[1] & HPTE_R_C) && | ||
1112 | (!hpte_is_writable(hptep[1]) || vcpus_running(kvm))) | ||
1092 | continue; | 1113 | continue; |
1093 | 1114 | ||
1094 | if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { | 1115 | if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { |
@@ -1100,23 +1121,29 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) | |||
1100 | } | 1121 | } |
1101 | 1122 | ||
1102 | /* Now check and modify the HPTE */ | 1123 | /* Now check and modify the HPTE */ |
1103 | if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_C)) { | 1124 | if (!(hptep[0] & HPTE_V_VALID)) |
1104 | /* need to make it temporarily absent to clear C */ | 1125 | continue; |
1105 | hptep[0] |= HPTE_V_ABSENT; | 1126 | |
1106 | kvmppc_invalidate_hpte(kvm, hptep, i); | 1127 | /* need to make it temporarily absent so C is stable */ |
1107 | hptep[1] &= ~HPTE_R_C; | 1128 | hptep[0] |= HPTE_V_ABSENT; |
1108 | eieio(); | 1129 | kvmppc_invalidate_hpte(kvm, hptep, i); |
1109 | hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; | 1130 | v = hptep[0]; |
1131 | r = hptep[1]; | ||
1132 | if (r & HPTE_R_C) { | ||
1133 | hptep[1] = r & ~HPTE_R_C; | ||
1110 | if (!(rev[i].guest_rpte & HPTE_R_C)) { | 1134 | if (!(rev[i].guest_rpte & HPTE_R_C)) { |
1111 | rev[i].guest_rpte |= HPTE_R_C; | 1135 | rev[i].guest_rpte |= HPTE_R_C; |
1112 | note_hpte_modification(kvm, &rev[i]); | 1136 | note_hpte_modification(kvm, &rev[i]); |
1113 | } | 1137 | } |
1114 | n = hpte_page_size(hptep[0], hptep[1]); | 1138 | n = hpte_page_size(v, r); |
1115 | n = (n + PAGE_SIZE - 1) >> PAGE_SHIFT; | 1139 | n = (n + PAGE_SIZE - 1) >> PAGE_SHIFT; |
1116 | if (n > npages_dirty) | 1140 | if (n > npages_dirty) |
1117 | npages_dirty = n; | 1141 | npages_dirty = n; |
1142 | eieio(); | ||
1118 | } | 1143 | } |
1119 | hptep[0] &= ~HPTE_V_HVLOCK; | 1144 | v &= ~(HPTE_V_ABSENT | HPTE_V_HVLOCK); |
1145 | v |= HPTE_V_VALID; | ||
1146 | hptep[0] = v; | ||
1120 | } while ((i = j) != head); | 1147 | } while ((i = j) != head); |
1121 | 1148 | ||
1122 | unlock_rmap(rmapp); | 1149 | unlock_rmap(rmapp); |