diff options
Diffstat (limited to 'arch/x86/kvm/paging_tmpl.h')
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 252 |
1 files changed, 145 insertions, 107 deletions
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 2331bdc2b549..51ef9097960d 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -7,6 +7,7 @@ | |||
7 | * MMU support | 7 | * MMU support |
8 | * | 8 | * |
9 | * Copyright (C) 2006 Qumranet, Inc. | 9 | * Copyright (C) 2006 Qumranet, Inc. |
10 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
10 | * | 11 | * |
11 | * Authors: | 12 | * Authors: |
12 | * Yaniv Kamay <yaniv@qumranet.com> | 13 | * Yaniv Kamay <yaniv@qumranet.com> |
@@ -118,21 +119,25 @@ static int FNAME(walk_addr)(struct guest_walker *walker, | |||
118 | { | 119 | { |
119 | pt_element_t pte; | 120 | pt_element_t pte; |
120 | gfn_t table_gfn; | 121 | gfn_t table_gfn; |
121 | unsigned index, pt_access, pte_access; | 122 | unsigned index, pt_access, uninitialized_var(pte_access); |
122 | gpa_t pte_gpa; | 123 | gpa_t pte_gpa; |
123 | int rsvd_fault = 0; | 124 | bool eperm, present, rsvd_fault; |
124 | 125 | ||
125 | trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault, | 126 | trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault, |
126 | fetch_fault); | 127 | fetch_fault); |
127 | walk: | 128 | walk: |
129 | present = true; | ||
130 | eperm = rsvd_fault = false; | ||
128 | walker->level = vcpu->arch.mmu.root_level; | 131 | walker->level = vcpu->arch.mmu.root_level; |
129 | pte = vcpu->arch.cr3; | 132 | pte = vcpu->arch.cr3; |
130 | #if PTTYPE == 64 | 133 | #if PTTYPE == 64 |
131 | if (!is_long_mode(vcpu)) { | 134 | if (!is_long_mode(vcpu)) { |
132 | pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3); | 135 | pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3); |
133 | trace_kvm_mmu_paging_element(pte, walker->level); | 136 | trace_kvm_mmu_paging_element(pte, walker->level); |
134 | if (!is_present_gpte(pte)) | 137 | if (!is_present_gpte(pte)) { |
135 | goto not_present; | 138 | present = false; |
139 | goto error; | ||
140 | } | ||
136 | --walker->level; | 141 | --walker->level; |
137 | } | 142 | } |
138 | #endif | 143 | #endif |
@@ -150,37 +155,42 @@ walk: | |||
150 | walker->table_gfn[walker->level - 1] = table_gfn; | 155 | walker->table_gfn[walker->level - 1] = table_gfn; |
151 | walker->pte_gpa[walker->level - 1] = pte_gpa; | 156 | walker->pte_gpa[walker->level - 1] = pte_gpa; |
152 | 157 | ||
153 | if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte))) | 158 | if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte))) { |
154 | goto not_present; | 159 | present = false; |
160 | break; | ||
161 | } | ||
155 | 162 | ||
156 | trace_kvm_mmu_paging_element(pte, walker->level); | 163 | trace_kvm_mmu_paging_element(pte, walker->level); |
157 | 164 | ||
158 | if (!is_present_gpte(pte)) | 165 | if (!is_present_gpte(pte)) { |
159 | goto not_present; | 166 | present = false; |
167 | break; | ||
168 | } | ||
160 | 169 | ||
161 | rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level); | 170 | if (is_rsvd_bits_set(vcpu, pte, walker->level)) { |
162 | if (rsvd_fault) | 171 | rsvd_fault = true; |
163 | goto access_error; | 172 | break; |
173 | } | ||
164 | 174 | ||
165 | if (write_fault && !is_writable_pte(pte)) | 175 | if (write_fault && !is_writable_pte(pte)) |
166 | if (user_fault || is_write_protection(vcpu)) | 176 | if (user_fault || is_write_protection(vcpu)) |
167 | goto access_error; | 177 | eperm = true; |
168 | 178 | ||
169 | if (user_fault && !(pte & PT_USER_MASK)) | 179 | if (user_fault && !(pte & PT_USER_MASK)) |
170 | goto access_error; | 180 | eperm = true; |
171 | 181 | ||
172 | #if PTTYPE == 64 | 182 | #if PTTYPE == 64 |
173 | if (fetch_fault && (pte & PT64_NX_MASK)) | 183 | if (fetch_fault && (pte & PT64_NX_MASK)) |
174 | goto access_error; | 184 | eperm = true; |
175 | #endif | 185 | #endif |
176 | 186 | ||
177 | if (!(pte & PT_ACCESSED_MASK)) { | 187 | if (!eperm && !rsvd_fault && !(pte & PT_ACCESSED_MASK)) { |
178 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, | 188 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, |
179 | sizeof(pte)); | 189 | sizeof(pte)); |
180 | mark_page_dirty(vcpu->kvm, table_gfn); | ||
181 | if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, | 190 | if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, |
182 | index, pte, pte|PT_ACCESSED_MASK)) | 191 | index, pte, pte|PT_ACCESSED_MASK)) |
183 | goto walk; | 192 | goto walk; |
193 | mark_page_dirty(vcpu->kvm, table_gfn); | ||
184 | pte |= PT_ACCESSED_MASK; | 194 | pte |= PT_ACCESSED_MASK; |
185 | } | 195 | } |
186 | 196 | ||
@@ -213,15 +223,18 @@ walk: | |||
213 | --walker->level; | 223 | --walker->level; |
214 | } | 224 | } |
215 | 225 | ||
226 | if (!present || eperm || rsvd_fault) | ||
227 | goto error; | ||
228 | |||
216 | if (write_fault && !is_dirty_gpte(pte)) { | 229 | if (write_fault && !is_dirty_gpte(pte)) { |
217 | bool ret; | 230 | bool ret; |
218 | 231 | ||
219 | trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); | 232 | trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); |
220 | mark_page_dirty(vcpu->kvm, table_gfn); | ||
221 | ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte, | 233 | ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte, |
222 | pte|PT_DIRTY_MASK); | 234 | pte|PT_DIRTY_MASK); |
223 | if (ret) | 235 | if (ret) |
224 | goto walk; | 236 | goto walk; |
237 | mark_page_dirty(vcpu->kvm, table_gfn); | ||
225 | pte |= PT_DIRTY_MASK; | 238 | pte |= PT_DIRTY_MASK; |
226 | walker->ptes[walker->level - 1] = pte; | 239 | walker->ptes[walker->level - 1] = pte; |
227 | } | 240 | } |
@@ -229,22 +242,18 @@ walk: | |||
229 | walker->pt_access = pt_access; | 242 | walker->pt_access = pt_access; |
230 | walker->pte_access = pte_access; | 243 | walker->pte_access = pte_access; |
231 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", | 244 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", |
232 | __func__, (u64)pte, pt_access, pte_access); | 245 | __func__, (u64)pte, pte_access, pt_access); |
233 | return 1; | 246 | return 1; |
234 | 247 | ||
235 | not_present: | 248 | error: |
236 | walker->error_code = 0; | 249 | walker->error_code = 0; |
237 | goto err; | 250 | if (present) |
238 | 251 | walker->error_code |= PFERR_PRESENT_MASK; | |
239 | access_error: | ||
240 | walker->error_code = PFERR_PRESENT_MASK; | ||
241 | |||
242 | err: | ||
243 | if (write_fault) | 252 | if (write_fault) |
244 | walker->error_code |= PFERR_WRITE_MASK; | 253 | walker->error_code |= PFERR_WRITE_MASK; |
245 | if (user_fault) | 254 | if (user_fault) |
246 | walker->error_code |= PFERR_USER_MASK; | 255 | walker->error_code |= PFERR_USER_MASK; |
247 | if (fetch_fault) | 256 | if (fetch_fault && is_nx(vcpu)) |
248 | walker->error_code |= PFERR_FETCH_MASK; | 257 | walker->error_code |= PFERR_FETCH_MASK; |
249 | if (rsvd_fault) | 258 | if (rsvd_fault) |
250 | walker->error_code |= PFERR_RSVD_MASK; | 259 | walker->error_code |= PFERR_RSVD_MASK; |
@@ -252,7 +261,7 @@ err: | |||
252 | return 0; | 261 | return 0; |
253 | } | 262 | } |
254 | 263 | ||
255 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | 264 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
256 | u64 *spte, const void *pte) | 265 | u64 *spte, const void *pte) |
257 | { | 266 | { |
258 | pt_element_t gpte; | 267 | pt_element_t gpte; |
@@ -263,7 +272,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
263 | gpte = *(const pt_element_t *)pte; | 272 | gpte = *(const pt_element_t *)pte; |
264 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { | 273 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { |
265 | if (!is_present_gpte(gpte)) { | 274 | if (!is_present_gpte(gpte)) { |
266 | if (page->unsync) | 275 | if (sp->unsync) |
267 | new_spte = shadow_trap_nonpresent_pte; | 276 | new_spte = shadow_trap_nonpresent_pte; |
268 | else | 277 | else |
269 | new_spte = shadow_notrap_nonpresent_pte; | 278 | new_spte = shadow_notrap_nonpresent_pte; |
@@ -272,7 +281,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
272 | return; | 281 | return; |
273 | } | 282 | } |
274 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); | 283 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); |
275 | pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte); | 284 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); |
276 | if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) | 285 | if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) |
277 | return; | 286 | return; |
278 | pfn = vcpu->arch.update_pte.pfn; | 287 | pfn = vcpu->arch.update_pte.pfn; |
@@ -285,11 +294,22 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
285 | * we call mmu_set_spte() with reset_host_protection = true beacuse that | 294 | * we call mmu_set_spte() with reset_host_protection = true beacuse that |
286 | * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). | 295 | * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). |
287 | */ | 296 | */ |
288 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, | 297 | mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, |
289 | gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, | 298 | is_dirty_gpte(gpte), NULL, PT_PAGE_TABLE_LEVEL, |
290 | gpte_to_gfn(gpte), pfn, true, true); | 299 | gpte_to_gfn(gpte), pfn, true, true); |
291 | } | 300 | } |
292 | 301 | ||
302 | static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu, | ||
303 | struct guest_walker *gw, int level) | ||
304 | { | ||
305 | int r; | ||
306 | pt_element_t curr_pte; | ||
307 | |||
308 | r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 1], | ||
309 | &curr_pte, sizeof(curr_pte)); | ||
310 | return r || curr_pte != gw->ptes[level - 1]; | ||
311 | } | ||
312 | |||
293 | /* | 313 | /* |
294 | * Fetch a shadow pte for a specific level in the paging hierarchy. | 314 | * Fetch a shadow pte for a specific level in the paging hierarchy. |
295 | */ | 315 | */ |
@@ -299,75 +319,86 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
299 | int *ptwrite, pfn_t pfn) | 319 | int *ptwrite, pfn_t pfn) |
300 | { | 320 | { |
301 | unsigned access = gw->pt_access; | 321 | unsigned access = gw->pt_access; |
302 | struct kvm_mmu_page *shadow_page; | 322 | struct kvm_mmu_page *sp = NULL; |
303 | u64 spte, *sptep = NULL; | 323 | bool dirty = is_dirty_gpte(gw->ptes[gw->level - 1]); |
304 | int direct; | 324 | int top_level; |
305 | gfn_t table_gfn; | 325 | unsigned direct_access; |
306 | int r; | 326 | struct kvm_shadow_walk_iterator it; |
307 | int level; | ||
308 | pt_element_t curr_pte; | ||
309 | struct kvm_shadow_walk_iterator iterator; | ||
310 | 327 | ||
311 | if (!is_present_gpte(gw->ptes[gw->level - 1])) | 328 | if (!is_present_gpte(gw->ptes[gw->level - 1])) |
312 | return NULL; | 329 | return NULL; |
313 | 330 | ||
314 | for_each_shadow_entry(vcpu, addr, iterator) { | 331 | direct_access = gw->pt_access & gw->pte_access; |
315 | level = iterator.level; | 332 | if (!dirty) |
316 | sptep = iterator.sptep; | 333 | direct_access &= ~ACC_WRITE_MASK; |
317 | if (iterator.level == hlevel) { | ||
318 | mmu_set_spte(vcpu, sptep, access, | ||
319 | gw->pte_access & access, | ||
320 | user_fault, write_fault, | ||
321 | gw->ptes[gw->level-1] & PT_DIRTY_MASK, | ||
322 | ptwrite, level, | ||
323 | gw->gfn, pfn, false, true); | ||
324 | break; | ||
325 | } | ||
326 | 334 | ||
327 | if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) | 335 | top_level = vcpu->arch.mmu.root_level; |
328 | continue; | 336 | if (top_level == PT32E_ROOT_LEVEL) |
337 | top_level = PT32_ROOT_LEVEL; | ||
338 | /* | ||
339 | * Verify that the top-level gpte is still there. Since the page | ||
340 | * is a root page, it is either write protected (and cannot be | ||
341 | * changed from now on) or it is invalid (in which case, we don't | ||
342 | * really care if it changes underneath us after this point). | ||
343 | */ | ||
344 | if (FNAME(gpte_changed)(vcpu, gw, top_level)) | ||
345 | goto out_gpte_changed; | ||
329 | 346 | ||
330 | if (is_large_pte(*sptep)) { | 347 | for (shadow_walk_init(&it, vcpu, addr); |
331 | rmap_remove(vcpu->kvm, sptep); | 348 | shadow_walk_okay(&it) && it.level > gw->level; |
332 | __set_spte(sptep, shadow_trap_nonpresent_pte); | 349 | shadow_walk_next(&it)) { |
333 | kvm_flush_remote_tlbs(vcpu->kvm); | 350 | gfn_t table_gfn; |
334 | } | ||
335 | 351 | ||
336 | if (level <= gw->level) { | 352 | drop_large_spte(vcpu, it.sptep); |
337 | int delta = level - gw->level + 1; | 353 | |
338 | direct = 1; | 354 | sp = NULL; |
339 | if (!is_dirty_gpte(gw->ptes[level - delta])) | 355 | if (!is_shadow_present_pte(*it.sptep)) { |
340 | access &= ~ACC_WRITE_MASK; | 356 | table_gfn = gw->table_gfn[it.level - 2]; |
341 | table_gfn = gpte_to_gfn(gw->ptes[level - delta]); | 357 | sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1, |
342 | /* advance table_gfn when emulating 1gb pages with 4k */ | 358 | false, access, it.sptep); |
343 | if (delta == 0) | ||
344 | table_gfn += PT_INDEX(addr, level); | ||
345 | access &= gw->pte_access; | ||
346 | } else { | ||
347 | direct = 0; | ||
348 | table_gfn = gw->table_gfn[level - 2]; | ||
349 | } | ||
350 | shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, | ||
351 | direct, access, sptep); | ||
352 | if (!direct) { | ||
353 | r = kvm_read_guest_atomic(vcpu->kvm, | ||
354 | gw->pte_gpa[level - 2], | ||
355 | &curr_pte, sizeof(curr_pte)); | ||
356 | if (r || curr_pte != gw->ptes[level - 2]) { | ||
357 | kvm_mmu_put_page(shadow_page, sptep); | ||
358 | kvm_release_pfn_clean(pfn); | ||
359 | sptep = NULL; | ||
360 | break; | ||
361 | } | ||
362 | } | 359 | } |
363 | 360 | ||
364 | spte = __pa(shadow_page->spt) | 361 | /* |
365 | | PT_PRESENT_MASK | PT_ACCESSED_MASK | 362 | * Verify that the gpte in the page we've just write |
366 | | PT_WRITABLE_MASK | PT_USER_MASK; | 363 | * protected is still there. |
367 | *sptep = spte; | 364 | */ |
365 | if (FNAME(gpte_changed)(vcpu, gw, it.level - 1)) | ||
366 | goto out_gpte_changed; | ||
367 | |||
368 | if (sp) | ||
369 | link_shadow_page(it.sptep, sp); | ||
368 | } | 370 | } |
369 | 371 | ||
370 | return sptep; | 372 | for (; |
373 | shadow_walk_okay(&it) && it.level > hlevel; | ||
374 | shadow_walk_next(&it)) { | ||
375 | gfn_t direct_gfn; | ||
376 | |||
377 | validate_direct_spte(vcpu, it.sptep, direct_access); | ||
378 | |||
379 | drop_large_spte(vcpu, it.sptep); | ||
380 | |||
381 | if (is_shadow_present_pte(*it.sptep)) | ||
382 | continue; | ||
383 | |||
384 | direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); | ||
385 | |||
386 | sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1, | ||
387 | true, direct_access, it.sptep); | ||
388 | link_shadow_page(it.sptep, sp); | ||
389 | } | ||
390 | |||
391 | mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access, | ||
392 | user_fault, write_fault, dirty, ptwrite, it.level, | ||
393 | gw->gfn, pfn, false, true); | ||
394 | |||
395 | return it.sptep; | ||
396 | |||
397 | out_gpte_changed: | ||
398 | if (sp) | ||
399 | kvm_mmu_put_page(sp, it.sptep); | ||
400 | kvm_release_pfn_clean(pfn); | ||
401 | return NULL; | ||
371 | } | 402 | } |
372 | 403 | ||
373 | /* | 404 | /* |
@@ -431,11 +462,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
431 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); | 462 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); |
432 | 463 | ||
433 | /* mmio */ | 464 | /* mmio */ |
434 | if (is_error_pfn(pfn)) { | 465 | if (is_error_pfn(pfn)) |
435 | pgprintk("gfn %lx is mmio\n", walker.gfn); | 466 | return kvm_handle_bad_page(vcpu->kvm, walker.gfn, pfn); |
436 | kvm_release_pfn_clean(pfn); | ||
437 | return 1; | ||
438 | } | ||
439 | 467 | ||
440 | spin_lock(&vcpu->kvm->mmu_lock); | 468 | spin_lock(&vcpu->kvm->mmu_lock); |
441 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 469 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
@@ -443,6 +471,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
443 | kvm_mmu_free_some_pages(vcpu); | 471 | kvm_mmu_free_some_pages(vcpu); |
444 | sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, | 472 | sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, |
445 | level, &write_pt, pfn); | 473 | level, &write_pt, pfn); |
474 | (void)sptep; | ||
446 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, | 475 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, |
447 | sptep, *sptep, write_pt); | 476 | sptep, *sptep, write_pt); |
448 | 477 | ||
@@ -464,6 +493,7 @@ out_unlock: | |||
464 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | 493 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) |
465 | { | 494 | { |
466 | struct kvm_shadow_walk_iterator iterator; | 495 | struct kvm_shadow_walk_iterator iterator; |
496 | struct kvm_mmu_page *sp; | ||
467 | gpa_t pte_gpa = -1; | 497 | gpa_t pte_gpa = -1; |
468 | int level; | 498 | int level; |
469 | u64 *sptep; | 499 | u64 *sptep; |
@@ -475,10 +505,13 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
475 | level = iterator.level; | 505 | level = iterator.level; |
476 | sptep = iterator.sptep; | 506 | sptep = iterator.sptep; |
477 | 507 | ||
508 | sp = page_header(__pa(sptep)); | ||
478 | if (is_last_spte(*sptep, level)) { | 509 | if (is_last_spte(*sptep, level)) { |
479 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); | ||
480 | int offset, shift; | 510 | int offset, shift; |
481 | 511 | ||
512 | if (!sp->unsync) | ||
513 | break; | ||
514 | |||
482 | shift = PAGE_SHIFT - | 515 | shift = PAGE_SHIFT - |
483 | (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level; | 516 | (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level; |
484 | offset = sp->role.quadrant << shift; | 517 | offset = sp->role.quadrant << shift; |
@@ -487,16 +520,17 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
487 | pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); | 520 | pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); |
488 | 521 | ||
489 | if (is_shadow_present_pte(*sptep)) { | 522 | if (is_shadow_present_pte(*sptep)) { |
490 | rmap_remove(vcpu->kvm, sptep); | ||
491 | if (is_large_pte(*sptep)) | 523 | if (is_large_pte(*sptep)) |
492 | --vcpu->kvm->stat.lpages; | 524 | --vcpu->kvm->stat.lpages; |
525 | drop_spte(vcpu->kvm, sptep, | ||
526 | shadow_trap_nonpresent_pte); | ||
493 | need_flush = 1; | 527 | need_flush = 1; |
494 | } | 528 | } else |
495 | __set_spte(sptep, shadow_trap_nonpresent_pte); | 529 | __set_spte(sptep, shadow_trap_nonpresent_pte); |
496 | break; | 530 | break; |
497 | } | 531 | } |
498 | 532 | ||
499 | if (!is_shadow_present_pte(*sptep)) | 533 | if (!is_shadow_present_pte(*sptep) || !sp->unsync_children) |
500 | break; | 534 | break; |
501 | } | 535 | } |
502 | 536 | ||
@@ -570,9 +604,9 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, | |||
570 | * Using the cached information from sp->gfns is safe because: | 604 | * Using the cached information from sp->gfns is safe because: |
571 | * - The spte has a reference to the struct page, so the pfn for a given gfn | 605 | * - The spte has a reference to the struct page, so the pfn for a given gfn |
572 | * can't change unless all sptes pointing to it are nuked first. | 606 | * can't change unless all sptes pointing to it are nuked first. |
573 | * - Alias changes zap the entire shadow cache. | ||
574 | */ | 607 | */ |
575 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 608 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
609 | bool clear_unsync) | ||
576 | { | 610 | { |
577 | int i, offset, nr_present; | 611 | int i, offset, nr_present; |
578 | bool reset_host_protection; | 612 | bool reset_host_protection; |
@@ -580,6 +614,9 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
580 | 614 | ||
581 | offset = nr_present = 0; | 615 | offset = nr_present = 0; |
582 | 616 | ||
617 | /* direct kvm_mmu_page can not be unsync. */ | ||
618 | BUG_ON(sp->role.direct); | ||
619 | |||
583 | if (PTTYPE == 32) | 620 | if (PTTYPE == 32) |
584 | offset = sp->role.quadrant << PT64_LEVEL_BITS; | 621 | offset = sp->role.quadrant << PT64_LEVEL_BITS; |
585 | 622 | ||
@@ -589,7 +626,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
589 | unsigned pte_access; | 626 | unsigned pte_access; |
590 | pt_element_t gpte; | 627 | pt_element_t gpte; |
591 | gpa_t pte_gpa; | 628 | gpa_t pte_gpa; |
592 | gfn_t gfn = sp->gfns[i]; | 629 | gfn_t gfn; |
593 | 630 | ||
594 | if (!is_shadow_present_pte(sp->spt[i])) | 631 | if (!is_shadow_present_pte(sp->spt[i])) |
595 | continue; | 632 | continue; |
@@ -600,16 +637,17 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
600 | sizeof(pt_element_t))) | 637 | sizeof(pt_element_t))) |
601 | return -EINVAL; | 638 | return -EINVAL; |
602 | 639 | ||
603 | if (gpte_to_gfn(gpte) != gfn || !is_present_gpte(gpte) || | 640 | gfn = gpte_to_gfn(gpte); |
604 | !(gpte & PT_ACCESSED_MASK)) { | 641 | if (is_rsvd_bits_set(vcpu, gpte, PT_PAGE_TABLE_LEVEL) |
642 | || gfn != sp->gfns[i] || !is_present_gpte(gpte) | ||
643 | || !(gpte & PT_ACCESSED_MASK)) { | ||
605 | u64 nonpresent; | 644 | u64 nonpresent; |
606 | 645 | ||
607 | rmap_remove(vcpu->kvm, &sp->spt[i]); | 646 | if (is_present_gpte(gpte) || !clear_unsync) |
608 | if (is_present_gpte(gpte)) | ||
609 | nonpresent = shadow_trap_nonpresent_pte; | 647 | nonpresent = shadow_trap_nonpresent_pte; |
610 | else | 648 | else |
611 | nonpresent = shadow_notrap_nonpresent_pte; | 649 | nonpresent = shadow_notrap_nonpresent_pte; |
612 | __set_spte(&sp->spt[i], nonpresent); | 650 | drop_spte(vcpu->kvm, &sp->spt[i], nonpresent); |
613 | continue; | 651 | continue; |
614 | } | 652 | } |
615 | 653 | ||