diff options
Diffstat (limited to 'drivers/kvm/paging_tmpl.h')
-rw-r--r-- | drivers/kvm/paging_tmpl.h | 273 |
1 files changed, 142 insertions, 131 deletions
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 73ffbffb1097..a7c5cb0319ea 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h | |||
@@ -31,7 +31,6 @@ | |||
31 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) | 31 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) |
32 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) | 32 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) |
33 | #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level) | 33 | #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level) |
34 | #define PT_PTE_COPY_MASK PT64_PTE_COPY_MASK | ||
35 | #ifdef CONFIG_X86_64 | 34 | #ifdef CONFIG_X86_64 |
36 | #define PT_MAX_FULL_LEVELS 4 | 35 | #define PT_MAX_FULL_LEVELS 4 |
37 | #else | 36 | #else |
@@ -46,7 +45,6 @@ | |||
46 | #define PT_INDEX(addr, level) PT32_INDEX(addr, level) | 45 | #define PT_INDEX(addr, level) PT32_INDEX(addr, level) |
47 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) | 46 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) |
48 | #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level) | 47 | #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level) |
49 | #define PT_PTE_COPY_MASK PT32_PTE_COPY_MASK | ||
50 | #define PT_MAX_FULL_LEVELS 2 | 48 | #define PT_MAX_FULL_LEVELS 2 |
51 | #else | 49 | #else |
52 | #error Invalid PTTYPE value | 50 | #error Invalid PTTYPE value |
@@ -192,40 +190,143 @@ static void FNAME(mark_pagetable_dirty)(struct kvm *kvm, | |||
192 | mark_page_dirty(kvm, walker->table_gfn[walker->level - 1]); | 190 | mark_page_dirty(kvm, walker->table_gfn[walker->level - 1]); |
193 | } | 191 | } |
194 | 192 | ||
195 | static void FNAME(set_pte)(struct kvm_vcpu *vcpu, u64 guest_pte, | 193 | static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, |
196 | u64 *shadow_pte, u64 access_bits, gfn_t gfn) | 194 | u64 *shadow_pte, |
195 | gpa_t gaddr, | ||
196 | pt_element_t *gpte, | ||
197 | u64 access_bits, | ||
198 | int user_fault, | ||
199 | int write_fault, | ||
200 | int *ptwrite, | ||
201 | struct guest_walker *walker, | ||
202 | gfn_t gfn) | ||
197 | { | 203 | { |
198 | ASSERT(*shadow_pte == 0); | 204 | hpa_t paddr; |
199 | access_bits &= guest_pte; | 205 | int dirty = *gpte & PT_DIRTY_MASK; |
200 | *shadow_pte = (guest_pte & PT_PTE_COPY_MASK); | 206 | u64 spte = *shadow_pte; |
201 | set_pte_common(vcpu, shadow_pte, guest_pte & PT_BASE_ADDR_MASK, | 207 | int was_rmapped = is_rmap_pte(spte); |
202 | guest_pte & PT_DIRTY_MASK, access_bits, gfn); | 208 | |
209 | pgprintk("%s: spte %llx gpte %llx access %llx write_fault %d" | ||
210 | " user_fault %d gfn %lx\n", | ||
211 | __FUNCTION__, spte, (u64)*gpte, access_bits, | ||
212 | write_fault, user_fault, gfn); | ||
213 | |||
214 | if (write_fault && !dirty) { | ||
215 | *gpte |= PT_DIRTY_MASK; | ||
216 | dirty = 1; | ||
217 | FNAME(mark_pagetable_dirty)(vcpu->kvm, walker); | ||
218 | } | ||
219 | |||
220 | spte |= PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK; | ||
221 | spte |= *gpte & PT64_NX_MASK; | ||
222 | if (!dirty) | ||
223 | access_bits &= ~PT_WRITABLE_MASK; | ||
224 | |||
225 | paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK); | ||
226 | |||
227 | spte |= PT_PRESENT_MASK; | ||
228 | if (access_bits & PT_USER_MASK) | ||
229 | spte |= PT_USER_MASK; | ||
230 | |||
231 | if (is_error_hpa(paddr)) { | ||
232 | spte |= gaddr; | ||
233 | spte |= PT_SHADOW_IO_MARK; | ||
234 | spte &= ~PT_PRESENT_MASK; | ||
235 | set_shadow_pte(shadow_pte, spte); | ||
236 | return; | ||
237 | } | ||
238 | |||
239 | spte |= paddr; | ||
240 | |||
241 | if ((access_bits & PT_WRITABLE_MASK) | ||
242 | || (write_fault && !is_write_protection(vcpu) && !user_fault)) { | ||
243 | struct kvm_mmu_page *shadow; | ||
244 | |||
245 | spte |= PT_WRITABLE_MASK; | ||
246 | if (user_fault) { | ||
247 | mmu_unshadow(vcpu, gfn); | ||
248 | goto unshadowed; | ||
249 | } | ||
250 | |||
251 | shadow = kvm_mmu_lookup_page(vcpu, gfn); | ||
252 | if (shadow) { | ||
253 | pgprintk("%s: found shadow page for %lx, marking ro\n", | ||
254 | __FUNCTION__, gfn); | ||
255 | access_bits &= ~PT_WRITABLE_MASK; | ||
256 | if (is_writeble_pte(spte)) { | ||
257 | spte &= ~PT_WRITABLE_MASK; | ||
258 | kvm_arch_ops->tlb_flush(vcpu); | ||
259 | } | ||
260 | if (write_fault) | ||
261 | *ptwrite = 1; | ||
262 | } | ||
263 | } | ||
264 | |||
265 | unshadowed: | ||
266 | |||
267 | if (access_bits & PT_WRITABLE_MASK) | ||
268 | mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT); | ||
269 | |||
270 | set_shadow_pte(shadow_pte, spte); | ||
271 | page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); | ||
272 | if (!was_rmapped) | ||
273 | rmap_add(vcpu, shadow_pte); | ||
203 | } | 274 | } |
204 | 275 | ||
205 | static void FNAME(set_pde)(struct kvm_vcpu *vcpu, u64 guest_pde, | 276 | static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t *gpte, |
206 | u64 *shadow_pte, u64 access_bits, gfn_t gfn) | 277 | u64 *shadow_pte, u64 access_bits, |
278 | int user_fault, int write_fault, int *ptwrite, | ||
279 | struct guest_walker *walker, gfn_t gfn) | ||
280 | { | ||
281 | access_bits &= *gpte; | ||
282 | FNAME(set_pte_common)(vcpu, shadow_pte, *gpte & PT_BASE_ADDR_MASK, | ||
283 | gpte, access_bits, user_fault, write_fault, | ||
284 | ptwrite, walker, gfn); | ||
285 | } | ||
286 | |||
287 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | ||
288 | u64 *spte, const void *pte, int bytes) | ||
289 | { | ||
290 | pt_element_t gpte; | ||
291 | |||
292 | if (bytes < sizeof(pt_element_t)) | ||
293 | return; | ||
294 | gpte = *(const pt_element_t *)pte; | ||
295 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) | ||
296 | return; | ||
297 | pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte); | ||
298 | FNAME(set_pte)(vcpu, &gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, 0, | ||
299 | 0, NULL, NULL, | ||
300 | (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT); | ||
301 | } | ||
302 | |||
303 | static void FNAME(set_pde)(struct kvm_vcpu *vcpu, pt_element_t *gpde, | ||
304 | u64 *shadow_pte, u64 access_bits, | ||
305 | int user_fault, int write_fault, int *ptwrite, | ||
306 | struct guest_walker *walker, gfn_t gfn) | ||
207 | { | 307 | { |
208 | gpa_t gaddr; | 308 | gpa_t gaddr; |
209 | 309 | ||
210 | ASSERT(*shadow_pte == 0); | 310 | access_bits &= *gpde; |
211 | access_bits &= guest_pde; | ||
212 | gaddr = (gpa_t)gfn << PAGE_SHIFT; | 311 | gaddr = (gpa_t)gfn << PAGE_SHIFT; |
213 | if (PTTYPE == 32 && is_cpuid_PSE36()) | 312 | if (PTTYPE == 32 && is_cpuid_PSE36()) |
214 | gaddr |= (guest_pde & PT32_DIR_PSE36_MASK) << | 313 | gaddr |= (*gpde & PT32_DIR_PSE36_MASK) << |
215 | (32 - PT32_DIR_PSE36_SHIFT); | 314 | (32 - PT32_DIR_PSE36_SHIFT); |
216 | *shadow_pte = guest_pde & PT_PTE_COPY_MASK; | 315 | FNAME(set_pte_common)(vcpu, shadow_pte, gaddr, |
217 | set_pte_common(vcpu, shadow_pte, gaddr, | 316 | gpde, access_bits, user_fault, write_fault, |
218 | guest_pde & PT_DIRTY_MASK, access_bits, gfn); | 317 | ptwrite, walker, gfn); |
219 | } | 318 | } |
220 | 319 | ||
221 | /* | 320 | /* |
222 | * Fetch a shadow pte for a specific level in the paging hierarchy. | 321 | * Fetch a shadow pte for a specific level in the paging hierarchy. |
223 | */ | 322 | */ |
224 | static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | 323 | static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, |
225 | struct guest_walker *walker) | 324 | struct guest_walker *walker, |
325 | int user_fault, int write_fault, int *ptwrite) | ||
226 | { | 326 | { |
227 | hpa_t shadow_addr; | 327 | hpa_t shadow_addr; |
228 | int level; | 328 | int level; |
329 | u64 *shadow_ent; | ||
229 | u64 *prev_shadow_ent = NULL; | 330 | u64 *prev_shadow_ent = NULL; |
230 | pt_element_t *guest_ent = walker->ptep; | 331 | pt_element_t *guest_ent = walker->ptep; |
231 | 332 | ||
@@ -242,37 +343,23 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
242 | 343 | ||
243 | for (; ; level--) { | 344 | for (; ; level--) { |
244 | u32 index = SHADOW_PT_INDEX(addr, level); | 345 | u32 index = SHADOW_PT_INDEX(addr, level); |
245 | u64 *shadow_ent = ((u64 *)__va(shadow_addr)) + index; | ||
246 | struct kvm_mmu_page *shadow_page; | 346 | struct kvm_mmu_page *shadow_page; |
247 | u64 shadow_pte; | 347 | u64 shadow_pte; |
248 | int metaphysical; | 348 | int metaphysical; |
249 | gfn_t table_gfn; | 349 | gfn_t table_gfn; |
250 | unsigned hugepage_access = 0; | 350 | unsigned hugepage_access = 0; |
251 | 351 | ||
352 | shadow_ent = ((u64 *)__va(shadow_addr)) + index; | ||
252 | if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) { | 353 | if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) { |
253 | if (level == PT_PAGE_TABLE_LEVEL) | 354 | if (level == PT_PAGE_TABLE_LEVEL) |
254 | return shadow_ent; | 355 | break; |
255 | shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK; | 356 | shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK; |
256 | prev_shadow_ent = shadow_ent; | 357 | prev_shadow_ent = shadow_ent; |
257 | continue; | 358 | continue; |
258 | } | 359 | } |
259 | 360 | ||
260 | if (level == PT_PAGE_TABLE_LEVEL) { | 361 | if (level == PT_PAGE_TABLE_LEVEL) |
261 | 362 | break; | |
262 | if (walker->level == PT_DIRECTORY_LEVEL) { | ||
263 | if (prev_shadow_ent) | ||
264 | *prev_shadow_ent |= PT_SHADOW_PS_MARK; | ||
265 | FNAME(set_pde)(vcpu, *guest_ent, shadow_ent, | ||
266 | walker->inherited_ar, | ||
267 | walker->gfn); | ||
268 | } else { | ||
269 | ASSERT(walker->level == PT_PAGE_TABLE_LEVEL); | ||
270 | FNAME(set_pte)(vcpu, *guest_ent, shadow_ent, | ||
271 | walker->inherited_ar, | ||
272 | walker->gfn); | ||
273 | } | ||
274 | return shadow_ent; | ||
275 | } | ||
276 | 363 | ||
277 | if (level - 1 == PT_PAGE_TABLE_LEVEL | 364 | if (level - 1 == PT_PAGE_TABLE_LEVEL |
278 | && walker->level == PT_DIRECTORY_LEVEL) { | 365 | && walker->level == PT_DIRECTORY_LEVEL) { |
@@ -289,90 +376,24 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
289 | shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, | 376 | shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, |
290 | metaphysical, hugepage_access, | 377 | metaphysical, hugepage_access, |
291 | shadow_ent); | 378 | shadow_ent); |
292 | shadow_addr = shadow_page->page_hpa; | 379 | shadow_addr = __pa(shadow_page->spt); |
293 | shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK | 380 | shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK |
294 | | PT_WRITABLE_MASK | PT_USER_MASK; | 381 | | PT_WRITABLE_MASK | PT_USER_MASK; |
295 | *shadow_ent = shadow_pte; | 382 | *shadow_ent = shadow_pte; |
296 | prev_shadow_ent = shadow_ent; | 383 | prev_shadow_ent = shadow_ent; |
297 | } | 384 | } |
298 | } | ||
299 | 385 | ||
300 | /* | 386 | if (walker->level == PT_DIRECTORY_LEVEL) { |
301 | * The guest faulted for write. We need to | 387 | FNAME(set_pde)(vcpu, guest_ent, shadow_ent, |
302 | * | 388 | walker->inherited_ar, user_fault, write_fault, |
303 | * - check write permissions | 389 | ptwrite, walker, walker->gfn); |
304 | * - update the guest pte dirty bit | 390 | } else { |
305 | * - update our own dirty page tracking structures | 391 | ASSERT(walker->level == PT_PAGE_TABLE_LEVEL); |
306 | */ | 392 | FNAME(set_pte)(vcpu, guest_ent, shadow_ent, |
307 | static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu, | 393 | walker->inherited_ar, user_fault, write_fault, |
308 | u64 *shadow_ent, | 394 | ptwrite, walker, walker->gfn); |
309 | struct guest_walker *walker, | ||
310 | gva_t addr, | ||
311 | int user, | ||
312 | int *write_pt) | ||
313 | { | ||
314 | pt_element_t *guest_ent; | ||
315 | int writable_shadow; | ||
316 | gfn_t gfn; | ||
317 | struct kvm_mmu_page *page; | ||
318 | |||
319 | if (is_writeble_pte(*shadow_ent)) | ||
320 | return !user || (*shadow_ent & PT_USER_MASK); | ||
321 | |||
322 | writable_shadow = *shadow_ent & PT_SHADOW_WRITABLE_MASK; | ||
323 | if (user) { | ||
324 | /* | ||
325 | * User mode access. Fail if it's a kernel page or a read-only | ||
326 | * page. | ||
327 | */ | ||
328 | if (!(*shadow_ent & PT_SHADOW_USER_MASK) || !writable_shadow) | ||
329 | return 0; | ||
330 | ASSERT(*shadow_ent & PT_USER_MASK); | ||
331 | } else | ||
332 | /* | ||
333 | * Kernel mode access. Fail if it's a read-only page and | ||
334 | * supervisor write protection is enabled. | ||
335 | */ | ||
336 | if (!writable_shadow) { | ||
337 | if (is_write_protection(vcpu)) | ||
338 | return 0; | ||
339 | *shadow_ent &= ~PT_USER_MASK; | ||
340 | } | ||
341 | |||
342 | guest_ent = walker->ptep; | ||
343 | |||
344 | if (!is_present_pte(*guest_ent)) { | ||
345 | *shadow_ent = 0; | ||
346 | return 0; | ||
347 | } | 395 | } |
348 | 396 | return shadow_ent; | |
349 | gfn = walker->gfn; | ||
350 | |||
351 | if (user) { | ||
352 | /* | ||
353 | * Usermode page faults won't be for page table updates. | ||
354 | */ | ||
355 | while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) { | ||
356 | pgprintk("%s: zap %lx %x\n", | ||
357 | __FUNCTION__, gfn, page->role.word); | ||
358 | kvm_mmu_zap_page(vcpu, page); | ||
359 | } | ||
360 | } else if (kvm_mmu_lookup_page(vcpu, gfn)) { | ||
361 | pgprintk("%s: found shadow page for %lx, marking ro\n", | ||
362 | __FUNCTION__, gfn); | ||
363 | mark_page_dirty(vcpu->kvm, gfn); | ||
364 | FNAME(mark_pagetable_dirty)(vcpu->kvm, walker); | ||
365 | *guest_ent |= PT_DIRTY_MASK; | ||
366 | *write_pt = 1; | ||
367 | return 0; | ||
368 | } | ||
369 | mark_page_dirty(vcpu->kvm, gfn); | ||
370 | *shadow_ent |= PT_WRITABLE_MASK; | ||
371 | FNAME(mark_pagetable_dirty)(vcpu->kvm, walker); | ||
372 | *guest_ent |= PT_DIRTY_MASK; | ||
373 | rmap_add(vcpu, shadow_ent); | ||
374 | |||
375 | return 1; | ||
376 | } | 397 | } |
377 | 398 | ||
378 | /* | 399 | /* |
@@ -397,7 +418,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
397 | int fetch_fault = error_code & PFERR_FETCH_MASK; | 418 | int fetch_fault = error_code & PFERR_FETCH_MASK; |
398 | struct guest_walker walker; | 419 | struct guest_walker walker; |
399 | u64 *shadow_pte; | 420 | u64 *shadow_pte; |
400 | int fixed; | ||
401 | int write_pt = 0; | 421 | int write_pt = 0; |
402 | int r; | 422 | int r; |
403 | 423 | ||
@@ -421,27 +441,20 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
421 | pgprintk("%s: guest page fault\n", __FUNCTION__); | 441 | pgprintk("%s: guest page fault\n", __FUNCTION__); |
422 | inject_page_fault(vcpu, addr, walker.error_code); | 442 | inject_page_fault(vcpu, addr, walker.error_code); |
423 | FNAME(release_walker)(&walker); | 443 | FNAME(release_walker)(&walker); |
444 | vcpu->last_pt_write_count = 0; /* reset fork detector */ | ||
424 | return 0; | 445 | return 0; |
425 | } | 446 | } |
426 | 447 | ||
427 | shadow_pte = FNAME(fetch)(vcpu, addr, &walker); | 448 | shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, |
428 | pgprintk("%s: shadow pte %p %llx\n", __FUNCTION__, | 449 | &write_pt); |
429 | shadow_pte, *shadow_pte); | 450 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__, |
430 | 451 | shadow_pte, *shadow_pte, write_pt); | |
431 | /* | ||
432 | * Update the shadow pte. | ||
433 | */ | ||
434 | if (write_fault) | ||
435 | fixed = FNAME(fix_write_pf)(vcpu, shadow_pte, &walker, addr, | ||
436 | user_fault, &write_pt); | ||
437 | else | ||
438 | fixed = fix_read_pf(shadow_pte); | ||
439 | |||
440 | pgprintk("%s: updated shadow pte %p %llx\n", __FUNCTION__, | ||
441 | shadow_pte, *shadow_pte); | ||
442 | 452 | ||
443 | FNAME(release_walker)(&walker); | 453 | FNAME(release_walker)(&walker); |
444 | 454 | ||
455 | if (!write_pt) | ||
456 | vcpu->last_pt_write_count = 0; /* reset fork detector */ | ||
457 | |||
445 | /* | 458 | /* |
446 | * mmio: emulate if accessible, otherwise its a guest fault. | 459 | * mmio: emulate if accessible, otherwise its a guest fault. |
447 | */ | 460 | */ |
@@ -478,7 +491,5 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) | |||
478 | #undef PT_INDEX | 491 | #undef PT_INDEX |
479 | #undef SHADOW_PT_INDEX | 492 | #undef SHADOW_PT_INDEX |
480 | #undef PT_LEVEL_MASK | 493 | #undef PT_LEVEL_MASK |
481 | #undef PT_PTE_COPY_MASK | ||
482 | #undef PT_NON_PTE_COPY_MASK | ||
483 | #undef PT_DIR_BASE_ADDR_MASK | 494 | #undef PT_DIR_BASE_ADDR_MASK |
484 | #undef PT_MAX_FULL_LEVELS | 495 | #undef PT_MAX_FULL_LEVELS |