diff options
Diffstat (limited to 'arch/x86/kvm/paging_tmpl.h')
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 86 |
1 files changed, 49 insertions, 37 deletions
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index ecc0856268c4..156fe10288ae 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -130,7 +130,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker, | |||
130 | unsigned index, pt_access, pte_access; | 130 | unsigned index, pt_access, pte_access; |
131 | gpa_t pte_gpa; | 131 | gpa_t pte_gpa; |
132 | 132 | ||
133 | pgprintk("%s: addr %lx\n", __FUNCTION__, addr); | 133 | pgprintk("%s: addr %lx\n", __func__, addr); |
134 | walk: | 134 | walk: |
135 | walker->level = vcpu->arch.mmu.root_level; | 135 | walker->level = vcpu->arch.mmu.root_level; |
136 | pte = vcpu->arch.cr3; | 136 | pte = vcpu->arch.cr3; |
@@ -155,7 +155,7 @@ walk: | |||
155 | pte_gpa += index * sizeof(pt_element_t); | 155 | pte_gpa += index * sizeof(pt_element_t); |
156 | walker->table_gfn[walker->level - 1] = table_gfn; | 156 | walker->table_gfn[walker->level - 1] = table_gfn; |
157 | walker->pte_gpa[walker->level - 1] = pte_gpa; | 157 | walker->pte_gpa[walker->level - 1] = pte_gpa; |
158 | pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__, | 158 | pgprintk("%s: table_gfn[%d] %lx\n", __func__, |
159 | walker->level - 1, table_gfn); | 159 | walker->level - 1, table_gfn); |
160 | 160 | ||
161 | kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)); | 161 | kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)); |
@@ -222,7 +222,7 @@ walk: | |||
222 | walker->pt_access = pt_access; | 222 | walker->pt_access = pt_access; |
223 | walker->pte_access = pte_access; | 223 | walker->pte_access = pte_access; |
224 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", | 224 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", |
225 | __FUNCTION__, (u64)pte, pt_access, pte_access); | 225 | __func__, (u64)pte, pt_access, pte_access); |
226 | return 1; | 226 | return 1; |
227 | 227 | ||
228 | not_present: | 228 | not_present: |
@@ -243,31 +243,30 @@ err: | |||
243 | } | 243 | } |
244 | 244 | ||
245 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | 245 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, |
246 | u64 *spte, const void *pte, int bytes, | 246 | u64 *spte, const void *pte) |
247 | int offset_in_pte) | ||
248 | { | 247 | { |
249 | pt_element_t gpte; | 248 | pt_element_t gpte; |
250 | unsigned pte_access; | 249 | unsigned pte_access; |
251 | struct page *npage; | 250 | pfn_t pfn; |
251 | int largepage = vcpu->arch.update_pte.largepage; | ||
252 | 252 | ||
253 | gpte = *(const pt_element_t *)pte; | 253 | gpte = *(const pt_element_t *)pte; |
254 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { | 254 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { |
255 | if (!offset_in_pte && !is_present_pte(gpte)) | 255 | if (!is_present_pte(gpte)) |
256 | set_shadow_pte(spte, shadow_notrap_nonpresent_pte); | 256 | set_shadow_pte(spte, shadow_notrap_nonpresent_pte); |
257 | return; | 257 | return; |
258 | } | 258 | } |
259 | if (bytes < sizeof(pt_element_t)) | 259 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); |
260 | return; | ||
261 | pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte); | ||
262 | pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte); | 260 | pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte); |
263 | if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) | 261 | if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) |
264 | return; | 262 | return; |
265 | npage = vcpu->arch.update_pte.page; | 263 | pfn = vcpu->arch.update_pte.pfn; |
266 | if (!npage) | 264 | if (is_error_pfn(pfn)) |
267 | return; | 265 | return; |
268 | get_page(npage); | 266 | kvm_get_pfn(pfn); |
269 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, | 267 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, |
270 | gpte & PT_DIRTY_MASK, NULL, gpte_to_gfn(gpte), npage); | 268 | gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), |
269 | pfn, true); | ||
271 | } | 270 | } |
272 | 271 | ||
273 | /* | 272 | /* |
@@ -275,8 +274,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
275 | */ | 274 | */ |
276 | static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | 275 | static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, |
277 | struct guest_walker *walker, | 276 | struct guest_walker *walker, |
278 | int user_fault, int write_fault, int *ptwrite, | 277 | int user_fault, int write_fault, int largepage, |
279 | struct page *page) | 278 | int *ptwrite, pfn_t pfn) |
280 | { | 279 | { |
281 | hpa_t shadow_addr; | 280 | hpa_t shadow_addr; |
282 | int level; | 281 | int level; |
@@ -304,11 +303,19 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
304 | shadow_ent = ((u64 *)__va(shadow_addr)) + index; | 303 | shadow_ent = ((u64 *)__va(shadow_addr)) + index; |
305 | if (level == PT_PAGE_TABLE_LEVEL) | 304 | if (level == PT_PAGE_TABLE_LEVEL) |
306 | break; | 305 | break; |
307 | if (is_shadow_present_pte(*shadow_ent)) { | 306 | |
307 | if (largepage && level == PT_DIRECTORY_LEVEL) | ||
308 | break; | ||
309 | |||
310 | if (is_shadow_present_pte(*shadow_ent) | ||
311 | && !is_large_pte(*shadow_ent)) { | ||
308 | shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK; | 312 | shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK; |
309 | continue; | 313 | continue; |
310 | } | 314 | } |
311 | 315 | ||
316 | if (is_large_pte(*shadow_ent)) | ||
317 | rmap_remove(vcpu->kvm, shadow_ent); | ||
318 | |||
312 | if (level - 1 == PT_PAGE_TABLE_LEVEL | 319 | if (level - 1 == PT_PAGE_TABLE_LEVEL |
313 | && walker->level == PT_DIRECTORY_LEVEL) { | 320 | && walker->level == PT_DIRECTORY_LEVEL) { |
314 | metaphysical = 1; | 321 | metaphysical = 1; |
@@ -329,7 +336,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
329 | walker->pte_gpa[level - 2], | 336 | walker->pte_gpa[level - 2], |
330 | &curr_pte, sizeof(curr_pte)); | 337 | &curr_pte, sizeof(curr_pte)); |
331 | if (r || curr_pte != walker->ptes[level - 2]) { | 338 | if (r || curr_pte != walker->ptes[level - 2]) { |
332 | kvm_release_page_clean(page); | 339 | kvm_release_pfn_clean(pfn); |
333 | return NULL; | 340 | return NULL; |
334 | } | 341 | } |
335 | } | 342 | } |
@@ -342,7 +349,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
342 | mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, | 349 | mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, |
343 | user_fault, write_fault, | 350 | user_fault, write_fault, |
344 | walker->ptes[walker->level-1] & PT_DIRTY_MASK, | 351 | walker->ptes[walker->level-1] & PT_DIRTY_MASK, |
345 | ptwrite, walker->gfn, page); | 352 | ptwrite, largepage, walker->gfn, pfn, false); |
346 | 353 | ||
347 | return shadow_ent; | 354 | return shadow_ent; |
348 | } | 355 | } |
@@ -371,16 +378,16 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
371 | u64 *shadow_pte; | 378 | u64 *shadow_pte; |
372 | int write_pt = 0; | 379 | int write_pt = 0; |
373 | int r; | 380 | int r; |
374 | struct page *page; | 381 | pfn_t pfn; |
382 | int largepage = 0; | ||
375 | 383 | ||
376 | pgprintk("%s: addr %lx err %x\n", __FUNCTION__, addr, error_code); | 384 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); |
377 | kvm_mmu_audit(vcpu, "pre page fault"); | 385 | kvm_mmu_audit(vcpu, "pre page fault"); |
378 | 386 | ||
379 | r = mmu_topup_memory_caches(vcpu); | 387 | r = mmu_topup_memory_caches(vcpu); |
380 | if (r) | 388 | if (r) |
381 | return r; | 389 | return r; |
382 | 390 | ||
383 | down_read(&vcpu->kvm->slots_lock); | ||
384 | /* | 391 | /* |
385 | * Look up the shadow pte for the faulting address. | 392 | * Look up the shadow pte for the faulting address. |
386 | */ | 393 | */ |
@@ -391,40 +398,45 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
391 | * The page is not mapped by the guest. Let the guest handle it. | 398 | * The page is not mapped by the guest. Let the guest handle it. |
392 | */ | 399 | */ |
393 | if (!r) { | 400 | if (!r) { |
394 | pgprintk("%s: guest page fault\n", __FUNCTION__); | 401 | pgprintk("%s: guest page fault\n", __func__); |
395 | inject_page_fault(vcpu, addr, walker.error_code); | 402 | inject_page_fault(vcpu, addr, walker.error_code); |
396 | vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ | 403 | vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ |
397 | up_read(&vcpu->kvm->slots_lock); | ||
398 | return 0; | 404 | return 0; |
399 | } | 405 | } |
400 | 406 | ||
401 | down_read(¤t->mm->mmap_sem); | 407 | down_read(¤t->mm->mmap_sem); |
402 | page = gfn_to_page(vcpu->kvm, walker.gfn); | 408 | if (walker.level == PT_DIRECTORY_LEVEL) { |
409 | gfn_t large_gfn; | ||
410 | large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1); | ||
411 | if (is_largepage_backed(vcpu, large_gfn)) { | ||
412 | walker.gfn = large_gfn; | ||
413 | largepage = 1; | ||
414 | } | ||
415 | } | ||
416 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); | ||
403 | up_read(¤t->mm->mmap_sem); | 417 | up_read(¤t->mm->mmap_sem); |
404 | 418 | ||
419 | /* mmio */ | ||
420 | if (is_error_pfn(pfn)) { | ||
421 | pgprintk("gfn %x is mmio\n", walker.gfn); | ||
422 | kvm_release_pfn_clean(pfn); | ||
423 | return 1; | ||
424 | } | ||
425 | |||
405 | spin_lock(&vcpu->kvm->mmu_lock); | 426 | spin_lock(&vcpu->kvm->mmu_lock); |
406 | kvm_mmu_free_some_pages(vcpu); | 427 | kvm_mmu_free_some_pages(vcpu); |
407 | shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, | 428 | shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, |
408 | &write_pt, page); | 429 | largepage, &write_pt, pfn); |
409 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__, | 430 | |
431 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, | ||
410 | shadow_pte, *shadow_pte, write_pt); | 432 | shadow_pte, *shadow_pte, write_pt); |
411 | 433 | ||
412 | if (!write_pt) | 434 | if (!write_pt) |
413 | vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ | 435 | vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ |
414 | 436 | ||
415 | /* | ||
416 | * mmio: emulate if accessible, otherwise its a guest fault. | ||
417 | */ | ||
418 | if (shadow_pte && is_io_pte(*shadow_pte)) { | ||
419 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
420 | up_read(&vcpu->kvm->slots_lock); | ||
421 | return 1; | ||
422 | } | ||
423 | |||
424 | ++vcpu->stat.pf_fixed; | 437 | ++vcpu->stat.pf_fixed; |
425 | kvm_mmu_audit(vcpu, "post page fault (fixed)"); | 438 | kvm_mmu_audit(vcpu, "post page fault (fixed)"); |
426 | spin_unlock(&vcpu->kvm->mmu_lock); | 439 | spin_unlock(&vcpu->kvm->mmu_lock); |
427 | up_read(&vcpu->kvm->slots_lock); | ||
428 | 440 | ||
429 | return write_pt; | 441 | return write_pt; |
430 | } | 442 | } |