aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm
diff options
context:
space:
mode:
authorAvi Kivity <avi@redhat.com>2012-09-16 07:18:51 -0400
committerAvi Kivity <avi@redhat.com>2012-09-20 06:00:08 -0400
commit8cbc70696f149e44753b0fe60162b4ff96c2dd2b (patch)
tree79729287462257080b071f584bdba7a7ef9a25ea /arch/x86/kvm
parent3d34adec7081621ff51c195be045b87d75c0c49d (diff)
KVM: MMU: Update accessed and dirty bits after guest pagetable walk
While unspecified, the behaviour of Intel processors is to first perform the page table walk, then, if the walk was successful, to atomically update the accessed and dirty bits of walked paging elements. While we are not required to follow this exactly, doing so will allow us to perform the access permissions check after the walk is complete, rather than after each walk step. (the tricky case is SMEP: a zero in any pte's U bit makes the referenced page a supervisor page, so we can't fault on a one bit during the walk itself). Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/paging_tmpl.h76
1 files changed, 47 insertions, 29 deletions
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 1cbf576852ca..35a05dd2f69c 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -63,10 +63,12 @@
63 */ 63 */
64struct guest_walker { 64struct guest_walker {
65 int level; 65 int level;
66 unsigned max_level;
66 gfn_t table_gfn[PT_MAX_FULL_LEVELS]; 67 gfn_t table_gfn[PT_MAX_FULL_LEVELS];
67 pt_element_t ptes[PT_MAX_FULL_LEVELS]; 68 pt_element_t ptes[PT_MAX_FULL_LEVELS];
68 pt_element_t prefetch_ptes[PTE_PREFETCH_NUM]; 69 pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
69 gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; 70 gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
71 pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
70 unsigned pt_access; 72 unsigned pt_access;
71 unsigned pte_access; 73 unsigned pte_access;
72 gfn_t gfn; 74 gfn_t gfn;
@@ -119,6 +121,43 @@ static bool FNAME(is_last_gpte)(struct guest_walker *walker,
119 return false; 121 return false;
120} 122}
121 123
124static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
125 struct kvm_mmu *mmu,
126 struct guest_walker *walker,
127 int write_fault)
128{
129 unsigned level, index;
130 pt_element_t pte, orig_pte;
131 pt_element_t __user *ptep_user;
132 gfn_t table_gfn;
133 int ret;
134
135 for (level = walker->max_level; level >= walker->level; --level) {
136 pte = orig_pte = walker->ptes[level - 1];
137 table_gfn = walker->table_gfn[level - 1];
138 ptep_user = walker->ptep_user[level - 1];
139 index = offset_in_page(ptep_user) / sizeof(pt_element_t);
140 if (!(pte & PT_ACCESSED_MASK)) {
141 trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte));
142 pte |= PT_ACCESSED_MASK;
143 }
144 if (level == walker->level && write_fault && !is_dirty_gpte(pte)) {
145 trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
146 pte |= PT_DIRTY_MASK;
147 }
148 if (pte == orig_pte)
149 continue;
150
151 ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte);
152 if (ret)
153 return ret;
154
155 mark_page_dirty(vcpu->kvm, table_gfn);
156 walker->ptes[level] = pte;
157 }
158 return 0;
159}
160
122/* 161/*
123 * Fetch a guest pte for a guest virtual address 162 * Fetch a guest pte for a guest virtual address
124 */ 163 */
@@ -126,6 +165,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
126 struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, 165 struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
127 gva_t addr, u32 access) 166 gva_t addr, u32 access)
128{ 167{
168 int ret;
129 pt_element_t pte; 169 pt_element_t pte;
130 pt_element_t __user *uninitialized_var(ptep_user); 170 pt_element_t __user *uninitialized_var(ptep_user);
131 gfn_t table_gfn; 171 gfn_t table_gfn;
@@ -153,6 +193,7 @@ retry_walk:
153 --walker->level; 193 --walker->level;
154 } 194 }
155#endif 195#endif
196 walker->max_level = walker->level;
156 ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || 197 ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
157 (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); 198 (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
158 199
@@ -183,6 +224,7 @@ retry_walk:
183 ptep_user = (pt_element_t __user *)((void *)host_addr + offset); 224 ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
184 if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) 225 if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte))))
185 goto error; 226 goto error;
227 walker->ptep_user[walker->level - 1] = ptep_user;
186 228
187 trace_kvm_mmu_paging_element(pte, walker->level); 229 trace_kvm_mmu_paging_element(pte, walker->level);
188 230
@@ -214,21 +256,6 @@ retry_walk:
214 eperm = true; 256 eperm = true;
215 } 257 }
216 258
217 if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) {
218 int ret;
219 trace_kvm_mmu_set_accessed_bit(table_gfn, index,
220 sizeof(pte));
221 ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
222 pte, pte|PT_ACCESSED_MASK);
223 if (unlikely(ret < 0))
224 goto error;
225 else if (ret)
226 goto retry_walk;
227
228 mark_page_dirty(vcpu->kvm, table_gfn);
229 pte |= PT_ACCESSED_MASK;
230 }
231
232 walker->ptes[walker->level - 1] = pte; 259 walker->ptes[walker->level - 1] = pte;
233 260
234 if (last_gpte) { 261 if (last_gpte) {
@@ -268,21 +295,12 @@ retry_walk:
268 295
269 if (!write_fault) 296 if (!write_fault)
270 protect_clean_gpte(&pte_access, pte); 297 protect_clean_gpte(&pte_access, pte);
271 else if (unlikely(!is_dirty_gpte(pte))) {
272 int ret;
273 298
274 trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); 299 ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault);
275 ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, 300 if (unlikely(ret < 0))
276 pte, pte|PT_DIRTY_MASK); 301 goto error;
277 if (unlikely(ret < 0)) 302 else if (ret)
278 goto error; 303 goto retry_walk;
279 else if (ret)
280 goto retry_walk;
281
282 mark_page_dirty(vcpu->kvm, table_gfn);
283 pte |= PT_DIRTY_MASK;
284 walker->ptes[walker->level - 1] = pte;
285 }
286 304
287 walker->pt_access = pt_access; 305 walker->pt_access = pt_access;
288 walker->pte_access = pte_access; 306 walker->pte_access = pte_access;