diff options
Diffstat (limited to 'drivers/kvm/mmu.c')
| -rw-r--r-- | drivers/kvm/mmu.c | 1114 |
1 files changed, 943 insertions, 171 deletions
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 790423c5f23d..c6f972914f08 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c | |||
| @@ -26,7 +26,31 @@ | |||
| 26 | #include "vmx.h" | 26 | #include "vmx.h" |
| 27 | #include "kvm.h" | 27 | #include "kvm.h" |
| 28 | 28 | ||
| 29 | #undef MMU_DEBUG | ||
| 30 | |||
| 31 | #undef AUDIT | ||
| 32 | |||
| 33 | #ifdef AUDIT | ||
| 34 | static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg); | ||
| 35 | #else | ||
| 36 | static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {} | ||
| 37 | #endif | ||
| 38 | |||
| 39 | #ifdef MMU_DEBUG | ||
| 40 | |||
| 41 | #define pgprintk(x...) do { if (dbg) printk(x); } while (0) | ||
| 42 | #define rmap_printk(x...) do { if (dbg) printk(x); } while (0) | ||
| 43 | |||
| 44 | #else | ||
| 45 | |||
| 29 | #define pgprintk(x...) do { } while (0) | 46 | #define pgprintk(x...) do { } while (0) |
| 47 | #define rmap_printk(x...) do { } while (0) | ||
| 48 | |||
| 49 | #endif | ||
| 50 | |||
| 51 | #if defined(MMU_DEBUG) || defined(AUDIT) | ||
| 52 | static int dbg = 1; | ||
| 53 | #endif | ||
| 30 | 54 | ||
| 31 | #define ASSERT(x) \ | 55 | #define ASSERT(x) \ |
| 32 | if (!(x)) { \ | 56 | if (!(x)) { \ |
| @@ -34,8 +58,10 @@ | |||
| 34 | __FILE__, __LINE__, #x); \ | 58 | __FILE__, __LINE__, #x); \ |
| 35 | } | 59 | } |
| 36 | 60 | ||
| 37 | #define PT64_ENT_PER_PAGE 512 | 61 | #define PT64_PT_BITS 9 |
| 38 | #define PT32_ENT_PER_PAGE 1024 | 62 | #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) |
| 63 | #define PT32_PT_BITS 10 | ||
| 64 | #define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS) | ||
| 39 | 65 | ||
| 40 | #define PT_WRITABLE_SHIFT 1 | 66 | #define PT_WRITABLE_SHIFT 1 |
| 41 | 67 | ||
| @@ -125,6 +151,13 @@ | |||
| 125 | #define PT_DIRECTORY_LEVEL 2 | 151 | #define PT_DIRECTORY_LEVEL 2 |
| 126 | #define PT_PAGE_TABLE_LEVEL 1 | 152 | #define PT_PAGE_TABLE_LEVEL 1 |
| 127 | 153 | ||
| 154 | #define RMAP_EXT 4 | ||
| 155 | |||
| 156 | struct kvm_rmap_desc { | ||
| 157 | u64 *shadow_ptes[RMAP_EXT]; | ||
| 158 | struct kvm_rmap_desc *more; | ||
| 159 | }; | ||
| 160 | |||
| 128 | static int is_write_protection(struct kvm_vcpu *vcpu) | 161 | static int is_write_protection(struct kvm_vcpu *vcpu) |
| 129 | { | 162 | { |
| 130 | return vcpu->cr0 & CR0_WP_MASK; | 163 | return vcpu->cr0 & CR0_WP_MASK; |
| @@ -150,32 +183,272 @@ static int is_io_pte(unsigned long pte) | |||
| 150 | return pte & PT_SHADOW_IO_MARK; | 183 | return pte & PT_SHADOW_IO_MARK; |
| 151 | } | 184 | } |
| 152 | 185 | ||
| 186 | static int is_rmap_pte(u64 pte) | ||
| 187 | { | ||
| 188 | return (pte & (PT_WRITABLE_MASK | PT_PRESENT_MASK)) | ||
| 189 | == (PT_WRITABLE_MASK | PT_PRESENT_MASK); | ||
| 190 | } | ||
| 191 | |||
| 192 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | ||
| 193 | size_t objsize, int min) | ||
| 194 | { | ||
| 195 | void *obj; | ||
| 196 | |||
| 197 | if (cache->nobjs >= min) | ||
| 198 | return 0; | ||
| 199 | while (cache->nobjs < ARRAY_SIZE(cache->objects)) { | ||
| 200 | obj = kzalloc(objsize, GFP_NOWAIT); | ||
| 201 | if (!obj) | ||
| 202 | return -ENOMEM; | ||
| 203 | cache->objects[cache->nobjs++] = obj; | ||
| 204 | } | ||
| 205 | return 0; | ||
| 206 | } | ||
| 207 | |||
| 208 | static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) | ||
| 209 | { | ||
| 210 | while (mc->nobjs) | ||
| 211 | kfree(mc->objects[--mc->nobjs]); | ||
| 212 | } | ||
| 213 | |||
| 214 | static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) | ||
| 215 | { | ||
| 216 | int r; | ||
| 217 | |||
| 218 | r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache, | ||
| 219 | sizeof(struct kvm_pte_chain), 4); | ||
| 220 | if (r) | ||
| 221 | goto out; | ||
| 222 | r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache, | ||
| 223 | sizeof(struct kvm_rmap_desc), 1); | ||
| 224 | out: | ||
| 225 | return r; | ||
| 226 | } | ||
| 227 | |||
| 228 | static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) | ||
| 229 | { | ||
| 230 | mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache); | ||
| 231 | mmu_free_memory_cache(&vcpu->mmu_rmap_desc_cache); | ||
| 232 | } | ||
| 233 | |||
| 234 | static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, | ||
| 235 | size_t size) | ||
| 236 | { | ||
| 237 | void *p; | ||
| 238 | |||
| 239 | BUG_ON(!mc->nobjs); | ||
| 240 | p = mc->objects[--mc->nobjs]; | ||
| 241 | memset(p, 0, size); | ||
| 242 | return p; | ||
| 243 | } | ||
| 244 | |||
| 245 | static void mmu_memory_cache_free(struct kvm_mmu_memory_cache *mc, void *obj) | ||
| 246 | { | ||
| 247 | if (mc->nobjs < KVM_NR_MEM_OBJS) | ||
| 248 | mc->objects[mc->nobjs++] = obj; | ||
| 249 | else | ||
| 250 | kfree(obj); | ||
| 251 | } | ||
| 252 | |||
| 253 | static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu) | ||
| 254 | { | ||
| 255 | return mmu_memory_cache_alloc(&vcpu->mmu_pte_chain_cache, | ||
| 256 | sizeof(struct kvm_pte_chain)); | ||
| 257 | } | ||
| 258 | |||
| 259 | static void mmu_free_pte_chain(struct kvm_vcpu *vcpu, | ||
| 260 | struct kvm_pte_chain *pc) | ||
| 261 | { | ||
| 262 | mmu_memory_cache_free(&vcpu->mmu_pte_chain_cache, pc); | ||
| 263 | } | ||
| 264 | |||
| 265 | static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) | ||
| 266 | { | ||
| 267 | return mmu_memory_cache_alloc(&vcpu->mmu_rmap_desc_cache, | ||
| 268 | sizeof(struct kvm_rmap_desc)); | ||
| 269 | } | ||
| 270 | |||
| 271 | static void mmu_free_rmap_desc(struct kvm_vcpu *vcpu, | ||
| 272 | struct kvm_rmap_desc *rd) | ||
| 273 | { | ||
| 274 | mmu_memory_cache_free(&vcpu->mmu_rmap_desc_cache, rd); | ||
| 275 | } | ||
| 276 | |||
| 277 | /* | ||
| 278 | * Reverse mapping data structures: | ||
| 279 | * | ||
| 280 | * If page->private bit zero is zero, then page->private points to the | ||
| 281 | * shadow page table entry that points to page_address(page). | ||
| 282 | * | ||
| 283 | * If page->private bit zero is one, (then page->private & ~1) points | ||
| 284 | * to a struct kvm_rmap_desc containing more mappings. | ||
| 285 | */ | ||
| 286 | static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte) | ||
| 287 | { | ||
| 288 | struct page *page; | ||
| 289 | struct kvm_rmap_desc *desc; | ||
| 290 | int i; | ||
| 291 | |||
| 292 | if (!is_rmap_pte(*spte)) | ||
| 293 | return; | ||
| 294 | page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); | ||
| 295 | if (!page->private) { | ||
| 296 | rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); | ||
| 297 | page->private = (unsigned long)spte; | ||
| 298 | } else if (!(page->private & 1)) { | ||
| 299 | rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte); | ||
| 300 | desc = mmu_alloc_rmap_desc(vcpu); | ||
| 301 | desc->shadow_ptes[0] = (u64 *)page->private; | ||
| 302 | desc->shadow_ptes[1] = spte; | ||
| 303 | page->private = (unsigned long)desc | 1; | ||
| 304 | } else { | ||
| 305 | rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); | ||
| 306 | desc = (struct kvm_rmap_desc *)(page->private & ~1ul); | ||
| 307 | while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) | ||
| 308 | desc = desc->more; | ||
| 309 | if (desc->shadow_ptes[RMAP_EXT-1]) { | ||
| 310 | desc->more = mmu_alloc_rmap_desc(vcpu); | ||
| 311 | desc = desc->more; | ||
| 312 | } | ||
| 313 | for (i = 0; desc->shadow_ptes[i]; ++i) | ||
| 314 | ; | ||
| 315 | desc->shadow_ptes[i] = spte; | ||
| 316 | } | ||
| 317 | } | ||
| 318 | |||
| 319 | static void rmap_desc_remove_entry(struct kvm_vcpu *vcpu, | ||
| 320 | struct page *page, | ||
| 321 | struct kvm_rmap_desc *desc, | ||
| 322 | int i, | ||
| 323 | struct kvm_rmap_desc *prev_desc) | ||
| 324 | { | ||
| 325 | int j; | ||
| 326 | |||
| 327 | for (j = RMAP_EXT - 1; !desc->shadow_ptes[j] && j > i; --j) | ||
| 328 | ; | ||
| 329 | desc->shadow_ptes[i] = desc->shadow_ptes[j]; | ||
| 330 | desc->shadow_ptes[j] = 0; | ||
| 331 | if (j != 0) | ||
| 332 | return; | ||
| 333 | if (!prev_desc && !desc->more) | ||
| 334 | page->private = (unsigned long)desc->shadow_ptes[0]; | ||
| 335 | else | ||
| 336 | if (prev_desc) | ||
| 337 | prev_desc->more = desc->more; | ||
| 338 | else | ||
| 339 | page->private = (unsigned long)desc->more | 1; | ||
| 340 | mmu_free_rmap_desc(vcpu, desc); | ||
| 341 | } | ||
| 342 | |||
| 343 | static void rmap_remove(struct kvm_vcpu *vcpu, u64 *spte) | ||
| 344 | { | ||
| 345 | struct page *page; | ||
| 346 | struct kvm_rmap_desc *desc; | ||
| 347 | struct kvm_rmap_desc *prev_desc; | ||
| 348 | int i; | ||
| 349 | |||
| 350 | if (!is_rmap_pte(*spte)) | ||
| 351 | return; | ||
| 352 | page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); | ||
| 353 | if (!page->private) { | ||
| 354 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); | ||
| 355 | BUG(); | ||
| 356 | } else if (!(page->private & 1)) { | ||
| 357 | rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte); | ||
| 358 | if ((u64 *)page->private != spte) { | ||
| 359 | printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n", | ||
| 360 | spte, *spte); | ||
| 361 | BUG(); | ||
| 362 | } | ||
| 363 | page->private = 0; | ||
| 364 | } else { | ||
| 365 | rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte); | ||
| 366 | desc = (struct kvm_rmap_desc *)(page->private & ~1ul); | ||
| 367 | prev_desc = NULL; | ||
| 368 | while (desc) { | ||
| 369 | for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i) | ||
| 370 | if (desc->shadow_ptes[i] == spte) { | ||
| 371 | rmap_desc_remove_entry(vcpu, page, | ||
| 372 | desc, i, | ||
| 373 | prev_desc); | ||
| 374 | return; | ||
| 375 | } | ||
| 376 | prev_desc = desc; | ||
| 377 | desc = desc->more; | ||
| 378 | } | ||
| 379 | BUG(); | ||
| 380 | } | ||
| 381 | } | ||
| 382 | |||
| 383 | static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) | ||
| 384 | { | ||
| 385 | struct kvm *kvm = vcpu->kvm; | ||
| 386 | struct page *page; | ||
| 387 | struct kvm_memory_slot *slot; | ||
| 388 | struct kvm_rmap_desc *desc; | ||
| 389 | u64 *spte; | ||
| 390 | |||
| 391 | slot = gfn_to_memslot(kvm, gfn); | ||
| 392 | BUG_ON(!slot); | ||
| 393 | page = gfn_to_page(slot, gfn); | ||
| 394 | |||
| 395 | while (page->private) { | ||
| 396 | if (!(page->private & 1)) | ||
| 397 | spte = (u64 *)page->private; | ||
| 398 | else { | ||
| 399 | desc = (struct kvm_rmap_desc *)(page->private & ~1ul); | ||
| 400 | spte = desc->shadow_ptes[0]; | ||
| 401 | } | ||
| 402 | BUG_ON(!spte); | ||
| 403 | BUG_ON((*spte & PT64_BASE_ADDR_MASK) != | ||
| 404 | page_to_pfn(page) << PAGE_SHIFT); | ||
| 405 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | ||
| 406 | BUG_ON(!(*spte & PT_WRITABLE_MASK)); | ||
| 407 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); | ||
| 408 | rmap_remove(vcpu, spte); | ||
| 409 | kvm_arch_ops->tlb_flush(vcpu); | ||
| 410 | *spte &= ~(u64)PT_WRITABLE_MASK; | ||
| 411 | } | ||
| 412 | } | ||
| 413 | |||
| 414 | static int is_empty_shadow_page(hpa_t page_hpa) | ||
| 415 | { | ||
| 416 | u64 *pos; | ||
| 417 | u64 *end; | ||
| 418 | |||
| 419 | for (pos = __va(page_hpa), end = pos + PAGE_SIZE / sizeof(u64); | ||
| 420 | pos != end; pos++) | ||
| 421 | if (*pos != 0) { | ||
| 422 | printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__, | ||
| 423 | pos, *pos); | ||
| 424 | return 0; | ||
| 425 | } | ||
| 426 | return 1; | ||
| 427 | } | ||
| 428 | |||
| 153 | static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa) | 429 | static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa) |
| 154 | { | 430 | { |
| 155 | struct kvm_mmu_page *page_head = page_header(page_hpa); | 431 | struct kvm_mmu_page *page_head = page_header(page_hpa); |
| 156 | 432 | ||
| 433 | ASSERT(is_empty_shadow_page(page_hpa)); | ||
| 157 | list_del(&page_head->link); | 434 | list_del(&page_head->link); |
| 158 | page_head->page_hpa = page_hpa; | 435 | page_head->page_hpa = page_hpa; |
| 159 | list_add(&page_head->link, &vcpu->free_pages); | 436 | list_add(&page_head->link, &vcpu->free_pages); |
| 437 | ++vcpu->kvm->n_free_mmu_pages; | ||
| 160 | } | 438 | } |
| 161 | 439 | ||
| 162 | static int is_empty_shadow_page(hpa_t page_hpa) | 440 | static unsigned kvm_page_table_hashfn(gfn_t gfn) |
| 163 | { | 441 | { |
| 164 | u32 *pos; | 442 | return gfn; |
| 165 | u32 *end; | ||
| 166 | for (pos = __va(page_hpa), end = pos + PAGE_SIZE / sizeof(u32); | ||
| 167 | pos != end; pos++) | ||
| 168 | if (*pos != 0) | ||
| 169 | return 0; | ||
| 170 | return 1; | ||
| 171 | } | 443 | } |
| 172 | 444 | ||
| 173 | static hpa_t kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, u64 *parent_pte) | 445 | static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, |
| 446 | u64 *parent_pte) | ||
| 174 | { | 447 | { |
| 175 | struct kvm_mmu_page *page; | 448 | struct kvm_mmu_page *page; |
| 176 | 449 | ||
| 177 | if (list_empty(&vcpu->free_pages)) | 450 | if (list_empty(&vcpu->free_pages)) |
| 178 | return INVALID_PAGE; | 451 | return NULL; |
| 179 | 452 | ||
| 180 | page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link); | 453 | page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link); |
| 181 | list_del(&page->link); | 454 | list_del(&page->link); |
| @@ -183,8 +456,239 @@ static hpa_t kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, u64 *parent_pte) | |||
| 183 | ASSERT(is_empty_shadow_page(page->page_hpa)); | 456 | ASSERT(is_empty_shadow_page(page->page_hpa)); |
| 184 | page->slot_bitmap = 0; | 457 | page->slot_bitmap = 0; |
| 185 | page->global = 1; | 458 | page->global = 1; |
| 459 | page->multimapped = 0; | ||
| 186 | page->parent_pte = parent_pte; | 460 | page->parent_pte = parent_pte; |
| 187 | return page->page_hpa; | 461 | --vcpu->kvm->n_free_mmu_pages; |
| 462 | return page; | ||
| 463 | } | ||
| 464 | |||
| 465 | static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu, | ||
| 466 | struct kvm_mmu_page *page, u64 *parent_pte) | ||
| 467 | { | ||
| 468 | struct kvm_pte_chain *pte_chain; | ||
| 469 | struct hlist_node *node; | ||
| 470 | int i; | ||
| 471 | |||
| 472 | if (!parent_pte) | ||
| 473 | return; | ||
| 474 | if (!page->multimapped) { | ||
| 475 | u64 *old = page->parent_pte; | ||
| 476 | |||
| 477 | if (!old) { | ||
| 478 | page->parent_pte = parent_pte; | ||
| 479 | return; | ||
| 480 | } | ||
| 481 | page->multimapped = 1; | ||
| 482 | pte_chain = mmu_alloc_pte_chain(vcpu); | ||
| 483 | INIT_HLIST_HEAD(&page->parent_ptes); | ||
| 484 | hlist_add_head(&pte_chain->link, &page->parent_ptes); | ||
| 485 | pte_chain->parent_ptes[0] = old; | ||
| 486 | } | ||
| 487 | hlist_for_each_entry(pte_chain, node, &page->parent_ptes, link) { | ||
| 488 | if (pte_chain->parent_ptes[NR_PTE_CHAIN_ENTRIES-1]) | ||
| 489 | continue; | ||
| 490 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) | ||
| 491 | if (!pte_chain->parent_ptes[i]) { | ||
| 492 | pte_chain->parent_ptes[i] = parent_pte; | ||
| 493 | return; | ||
| 494 | } | ||
| 495 | } | ||
| 496 | pte_chain = mmu_alloc_pte_chain(vcpu); | ||
| 497 | BUG_ON(!pte_chain); | ||
| 498 | hlist_add_head(&pte_chain->link, &page->parent_ptes); | ||
| 499 | pte_chain->parent_ptes[0] = parent_pte; | ||
| 500 | } | ||
| 501 | |||
| 502 | static void mmu_page_remove_parent_pte(struct kvm_vcpu *vcpu, | ||
| 503 | struct kvm_mmu_page *page, | ||
| 504 | u64 *parent_pte) | ||
| 505 | { | ||
| 506 | struct kvm_pte_chain *pte_chain; | ||
| 507 | struct hlist_node *node; | ||
| 508 | int i; | ||
| 509 | |||
| 510 | if (!page->multimapped) { | ||
| 511 | BUG_ON(page->parent_pte != parent_pte); | ||
| 512 | page->parent_pte = NULL; | ||
| 513 | return; | ||
| 514 | } | ||
| 515 | hlist_for_each_entry(pte_chain, node, &page->parent_ptes, link) | ||
| 516 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { | ||
| 517 | if (!pte_chain->parent_ptes[i]) | ||
| 518 | break; | ||
| 519 | if (pte_chain->parent_ptes[i] != parent_pte) | ||
| 520 | continue; | ||
| 521 | while (i + 1 < NR_PTE_CHAIN_ENTRIES | ||
| 522 | && pte_chain->parent_ptes[i + 1]) { | ||
| 523 | pte_chain->parent_ptes[i] | ||
| 524 | = pte_chain->parent_ptes[i + 1]; | ||
| 525 | ++i; | ||
| 526 | } | ||
| 527 | pte_chain->parent_ptes[i] = NULL; | ||
| 528 | if (i == 0) { | ||
| 529 | hlist_del(&pte_chain->link); | ||
| 530 | mmu_free_pte_chain(vcpu, pte_chain); | ||
| 531 | if (hlist_empty(&page->parent_ptes)) { | ||
| 532 | page->multimapped = 0; | ||
| 533 | page->parent_pte = NULL; | ||
| 534 | } | ||
| 535 | } | ||
| 536 | return; | ||
| 537 | } | ||
| 538 | BUG(); | ||
| 539 | } | ||
| 540 | |||
| 541 | static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm_vcpu *vcpu, | ||
| 542 | gfn_t gfn) | ||
| 543 | { | ||
| 544 | unsigned index; | ||
| 545 | struct hlist_head *bucket; | ||
| 546 | struct kvm_mmu_page *page; | ||
| 547 | struct hlist_node *node; | ||
| 548 | |||
| 549 | pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn); | ||
| 550 | index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; | ||
| 551 | bucket = &vcpu->kvm->mmu_page_hash[index]; | ||
| 552 | hlist_for_each_entry(page, node, bucket, hash_link) | ||
| 553 | if (page->gfn == gfn && !page->role.metaphysical) { | ||
| 554 | pgprintk("%s: found role %x\n", | ||
| 555 | __FUNCTION__, page->role.word); | ||
| 556 | return page; | ||
| 557 | } | ||
| 558 | return NULL; | ||
| 559 | } | ||
| 560 | |||
| 561 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | ||
| 562 | gfn_t gfn, | ||
| 563 | gva_t gaddr, | ||
| 564 | unsigned level, | ||
| 565 | int metaphysical, | ||
| 566 | u64 *parent_pte) | ||
| 567 | { | ||
| 568 | union kvm_mmu_page_role role; | ||
| 569 | unsigned index; | ||
| 570 | unsigned quadrant; | ||
| 571 | struct hlist_head *bucket; | ||
| 572 | struct kvm_mmu_page *page; | ||
| 573 | struct hlist_node *node; | ||
| 574 | |||
| 575 | role.word = 0; | ||
| 576 | role.glevels = vcpu->mmu.root_level; | ||
| 577 | role.level = level; | ||
| 578 | role.metaphysical = metaphysical; | ||
| 579 | if (vcpu->mmu.root_level <= PT32_ROOT_LEVEL) { | ||
| 580 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); | ||
| 581 | quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; | ||
| 582 | role.quadrant = quadrant; | ||
| 583 | } | ||
| 584 | pgprintk("%s: looking gfn %lx role %x\n", __FUNCTION__, | ||
| 585 | gfn, role.word); | ||
| 586 | index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; | ||
| 587 | bucket = &vcpu->kvm->mmu_page_hash[index]; | ||
| 588 | hlist_for_each_entry(page, node, bucket, hash_link) | ||
| 589 | if (page->gfn == gfn && page->role.word == role.word) { | ||
| 590 | mmu_page_add_parent_pte(vcpu, page, parent_pte); | ||
| 591 | pgprintk("%s: found\n", __FUNCTION__); | ||
| 592 | return page; | ||
| 593 | } | ||
| 594 | page = kvm_mmu_alloc_page(vcpu, parent_pte); | ||
| 595 | if (!page) | ||
| 596 | return page; | ||
| 597 | pgprintk("%s: adding gfn %lx role %x\n", __FUNCTION__, gfn, role.word); | ||
| 598 | page->gfn = gfn; | ||
| 599 | page->role = role; | ||
| 600 | hlist_add_head(&page->hash_link, bucket); | ||
| 601 | if (!metaphysical) | ||
| 602 | rmap_write_protect(vcpu, gfn); | ||
| 603 | return page; | ||
| 604 | } | ||
| 605 | |||
| 606 | static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu, | ||
| 607 | struct kvm_mmu_page *page) | ||
| 608 | { | ||
| 609 | unsigned i; | ||
| 610 | u64 *pt; | ||
| 611 | u64 ent; | ||
| 612 | |||
| 613 | pt = __va(page->page_hpa); | ||
| 614 | |||
| 615 | if (page->role.level == PT_PAGE_TABLE_LEVEL) { | ||
| 616 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | ||
| 617 | if (pt[i] & PT_PRESENT_MASK) | ||
| 618 | rmap_remove(vcpu, &pt[i]); | ||
| 619 | pt[i] = 0; | ||
| 620 | } | ||
| 621 | kvm_arch_ops->tlb_flush(vcpu); | ||
| 622 | return; | ||
| 623 | } | ||
| 624 | |||
| 625 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | ||
| 626 | ent = pt[i]; | ||
| 627 | |||
| 628 | pt[i] = 0; | ||
| 629 | if (!(ent & PT_PRESENT_MASK)) | ||
| 630 | continue; | ||
| 631 | ent &= PT64_BASE_ADDR_MASK; | ||
| 632 | mmu_page_remove_parent_pte(vcpu, page_header(ent), &pt[i]); | ||
| 633 | } | ||
| 634 | } | ||
| 635 | |||
| 636 | static void kvm_mmu_put_page(struct kvm_vcpu *vcpu, | ||
| 637 | struct kvm_mmu_page *page, | ||
| 638 | u64 *parent_pte) | ||
| 639 | { | ||
| 640 | mmu_page_remove_parent_pte(vcpu, page, parent_pte); | ||
| 641 | } | ||
| 642 | |||
| 643 | static void kvm_mmu_zap_page(struct kvm_vcpu *vcpu, | ||
| 644 | struct kvm_mmu_page *page) | ||
| 645 | { | ||
| 646 | u64 *parent_pte; | ||
| 647 | |||
| 648 | while (page->multimapped || page->parent_pte) { | ||
| 649 | if (!page->multimapped) | ||
| 650 | parent_pte = page->parent_pte; | ||
| 651 | else { | ||
| 652 | struct kvm_pte_chain *chain; | ||
| 653 | |||
| 654 | chain = container_of(page->parent_ptes.first, | ||
| 655 | struct kvm_pte_chain, link); | ||
| 656 | parent_pte = chain->parent_ptes[0]; | ||
| 657 | } | ||
| 658 | BUG_ON(!parent_pte); | ||
| 659 | kvm_mmu_put_page(vcpu, page, parent_pte); | ||
| 660 | *parent_pte = 0; | ||
| 661 | } | ||
| 662 | kvm_mmu_page_unlink_children(vcpu, page); | ||
| 663 | if (!page->root_count) { | ||
| 664 | hlist_del(&page->hash_link); | ||
| 665 | kvm_mmu_free_page(vcpu, page->page_hpa); | ||
| 666 | } else { | ||
| 667 | list_del(&page->link); | ||
| 668 | list_add(&page->link, &vcpu->kvm->active_mmu_pages); | ||
| 669 | } | ||
| 670 | } | ||
| 671 | |||
| 672 | static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
| 673 | { | ||
| 674 | unsigned index; | ||
| 675 | struct hlist_head *bucket; | ||
| 676 | struct kvm_mmu_page *page; | ||
| 677 | struct hlist_node *node, *n; | ||
| 678 | int r; | ||
| 679 | |||
| 680 | pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn); | ||
| 681 | r = 0; | ||
| 682 | index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; | ||
| 683 | bucket = &vcpu->kvm->mmu_page_hash[index]; | ||
| 684 | hlist_for_each_entry_safe(page, node, n, bucket, hash_link) | ||
| 685 | if (page->gfn == gfn && !page->role.metaphysical) { | ||
| 686 | pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn, | ||
| 687 | page->role.word); | ||
| 688 | kvm_mmu_zap_page(vcpu, page); | ||
| 689 | r = 1; | ||
| 690 | } | ||
| 691 | return r; | ||
| 188 | } | 692 | } |
| 189 | 693 | ||
| 190 | static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa) | 694 | static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa) |
| @@ -225,35 +729,6 @@ hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva) | |||
| 225 | return gpa_to_hpa(vcpu, gpa); | 729 | return gpa_to_hpa(vcpu, gpa); |
| 226 | } | 730 | } |
| 227 | 731 | ||
| 228 | |||
| 229 | static void release_pt_page_64(struct kvm_vcpu *vcpu, hpa_t page_hpa, | ||
| 230 | int level) | ||
| 231 | { | ||
| 232 | ASSERT(vcpu); | ||
| 233 | ASSERT(VALID_PAGE(page_hpa)); | ||
| 234 | ASSERT(level <= PT64_ROOT_LEVEL && level > 0); | ||
| 235 | |||
| 236 | if (level == 1) | ||
| 237 | memset(__va(page_hpa), 0, PAGE_SIZE); | ||
| 238 | else { | ||
| 239 | u64 *pos; | ||
| 240 | u64 *end; | ||
| 241 | |||
| 242 | for (pos = __va(page_hpa), end = pos + PT64_ENT_PER_PAGE; | ||
| 243 | pos != end; pos++) { | ||
| 244 | u64 current_ent = *pos; | ||
| 245 | |||
| 246 | *pos = 0; | ||
| 247 | if (is_present_pte(current_ent)) | ||
| 248 | release_pt_page_64(vcpu, | ||
| 249 | current_ent & | ||
| 250 | PT64_BASE_ADDR_MASK, | ||
| 251 | level - 1); | ||
| 252 | } | ||
| 253 | } | ||
| 254 | kvm_mmu_free_page(vcpu, page_hpa); | ||
| 255 | } | ||
| 256 | |||
| 257 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | 732 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) |
| 258 | { | 733 | { |
| 259 | } | 734 | } |
| @@ -266,52 +741,109 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) | |||
| 266 | for (; ; level--) { | 741 | for (; ; level--) { |
| 267 | u32 index = PT64_INDEX(v, level); | 742 | u32 index = PT64_INDEX(v, level); |
| 268 | u64 *table; | 743 | u64 *table; |
| 744 | u64 pte; | ||
| 269 | 745 | ||
| 270 | ASSERT(VALID_PAGE(table_addr)); | 746 | ASSERT(VALID_PAGE(table_addr)); |
| 271 | table = __va(table_addr); | 747 | table = __va(table_addr); |
| 272 | 748 | ||
| 273 | if (level == 1) { | 749 | if (level == 1) { |
| 750 | pte = table[index]; | ||
| 751 | if (is_present_pte(pte) && is_writeble_pte(pte)) | ||
| 752 | return 0; | ||
| 274 | mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT); | 753 | mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT); |
| 275 | page_header_update_slot(vcpu->kvm, table, v); | 754 | page_header_update_slot(vcpu->kvm, table, v); |
| 276 | table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK | | 755 | table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK | |
| 277 | PT_USER_MASK; | 756 | PT_USER_MASK; |
| 757 | rmap_add(vcpu, &table[index]); | ||
| 278 | return 0; | 758 | return 0; |
| 279 | } | 759 | } |
| 280 | 760 | ||
| 281 | if (table[index] == 0) { | 761 | if (table[index] == 0) { |
| 282 | hpa_t new_table = kvm_mmu_alloc_page(vcpu, | 762 | struct kvm_mmu_page *new_table; |
| 283 | &table[index]); | 763 | gfn_t pseudo_gfn; |
| 284 | 764 | ||
| 285 | if (!VALID_PAGE(new_table)) { | 765 | pseudo_gfn = (v & PT64_DIR_BASE_ADDR_MASK) |
| 766 | >> PAGE_SHIFT; | ||
| 767 | new_table = kvm_mmu_get_page(vcpu, pseudo_gfn, | ||
| 768 | v, level - 1, | ||
| 769 | 1, &table[index]); | ||
| 770 | if (!new_table) { | ||
| 286 | pgprintk("nonpaging_map: ENOMEM\n"); | 771 | pgprintk("nonpaging_map: ENOMEM\n"); |
| 287 | return -ENOMEM; | 772 | return -ENOMEM; |
| 288 | } | 773 | } |
| 289 | 774 | ||
| 290 | if (level == PT32E_ROOT_LEVEL) | 775 | table[index] = new_table->page_hpa | PT_PRESENT_MASK |
| 291 | table[index] = new_table | PT_PRESENT_MASK; | 776 | | PT_WRITABLE_MASK | PT_USER_MASK; |
| 292 | else | ||
| 293 | table[index] = new_table | PT_PRESENT_MASK | | ||
| 294 | PT_WRITABLE_MASK | PT_USER_MASK; | ||
| 295 | } | 777 | } |
| 296 | table_addr = table[index] & PT64_BASE_ADDR_MASK; | 778 | table_addr = table[index] & PT64_BASE_ADDR_MASK; |
| 297 | } | 779 | } |
| 298 | } | 780 | } |
| 299 | 781 | ||
| 300 | static void nonpaging_flush(struct kvm_vcpu *vcpu) | 782 | static void mmu_free_roots(struct kvm_vcpu *vcpu) |
| 301 | { | 783 | { |
| 302 | hpa_t root = vcpu->mmu.root_hpa; | 784 | int i; |
| 785 | struct kvm_mmu_page *page; | ||
| 303 | 786 | ||
| 304 | ++kvm_stat.tlb_flush; | 787 | #ifdef CONFIG_X86_64 |
| 305 | pgprintk("nonpaging_flush\n"); | 788 | if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) { |
| 306 | ASSERT(VALID_PAGE(root)); | 789 | hpa_t root = vcpu->mmu.root_hpa; |
| 307 | release_pt_page_64(vcpu, root, vcpu->mmu.shadow_root_level); | 790 | |
| 308 | root = kvm_mmu_alloc_page(vcpu, NULL); | 791 | ASSERT(VALID_PAGE(root)); |
| 309 | ASSERT(VALID_PAGE(root)); | 792 | page = page_header(root); |
| 310 | vcpu->mmu.root_hpa = root; | 793 | --page->root_count; |
| 311 | if (is_paging(vcpu)) | 794 | vcpu->mmu.root_hpa = INVALID_PAGE; |
| 312 | root |= (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)); | 795 | return; |
| 313 | kvm_arch_ops->set_cr3(vcpu, root); | 796 | } |
| 314 | kvm_arch_ops->tlb_flush(vcpu); | 797 | #endif |
| 798 | for (i = 0; i < 4; ++i) { | ||
| 799 | hpa_t root = vcpu->mmu.pae_root[i]; | ||
| 800 | |||
| 801 | ASSERT(VALID_PAGE(root)); | ||
| 802 | root &= PT64_BASE_ADDR_MASK; | ||
| 803 | page = page_header(root); | ||
| 804 | --page->root_count; | ||
| 805 | vcpu->mmu.pae_root[i] = INVALID_PAGE; | ||
| 806 | } | ||
| 807 | vcpu->mmu.root_hpa = INVALID_PAGE; | ||
| 808 | } | ||
| 809 | |||
| 810 | static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | ||
| 811 | { | ||
| 812 | int i; | ||
| 813 | gfn_t root_gfn; | ||
| 814 | struct kvm_mmu_page *page; | ||
| 815 | |||
| 816 | root_gfn = vcpu->cr3 >> PAGE_SHIFT; | ||
| 817 | |||
| 818 | #ifdef CONFIG_X86_64 | ||
| 819 | if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) { | ||
| 820 | hpa_t root = vcpu->mmu.root_hpa; | ||
| 821 | |||
| 822 | ASSERT(!VALID_PAGE(root)); | ||
| 823 | page = kvm_mmu_get_page(vcpu, root_gfn, 0, | ||
| 824 | PT64_ROOT_LEVEL, 0, NULL); | ||
| 825 | root = page->page_hpa; | ||
| 826 | ++page->root_count; | ||
| 827 | vcpu->mmu.root_hpa = root; | ||
| 828 | return; | ||
| 829 | } | ||
| 830 | #endif | ||
| 831 | for (i = 0; i < 4; ++i) { | ||
| 832 | hpa_t root = vcpu->mmu.pae_root[i]; | ||
| 833 | |||
| 834 | ASSERT(!VALID_PAGE(root)); | ||
| 835 | if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL) | ||
| 836 | root_gfn = vcpu->pdptrs[i] >> PAGE_SHIFT; | ||
| 837 | else if (vcpu->mmu.root_level == 0) | ||
| 838 | root_gfn = 0; | ||
| 839 | page = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | ||
| 840 | PT32_ROOT_LEVEL, !is_paging(vcpu), | ||
| 841 | NULL); | ||
| 842 | root = page->page_hpa; | ||
| 843 | ++page->root_count; | ||
| 844 | vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK; | ||
| 845 | } | ||
| 846 | vcpu->mmu.root_hpa = __pa(vcpu->mmu.pae_root); | ||
| 315 | } | 847 | } |
| 316 | 848 | ||
| 317 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) | 849 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) |
| @@ -322,43 +854,29 @@ static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) | |||
| 322 | static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | 854 | static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, |
| 323 | u32 error_code) | 855 | u32 error_code) |
| 324 | { | 856 | { |
| 325 | int ret; | ||
| 326 | gpa_t addr = gva; | 857 | gpa_t addr = gva; |
| 858 | hpa_t paddr; | ||
| 859 | int r; | ||
| 860 | |||
| 861 | r = mmu_topup_memory_caches(vcpu); | ||
| 862 | if (r) | ||
| 863 | return r; | ||
| 327 | 864 | ||
| 328 | ASSERT(vcpu); | 865 | ASSERT(vcpu); |
| 329 | ASSERT(VALID_PAGE(vcpu->mmu.root_hpa)); | 866 | ASSERT(VALID_PAGE(vcpu->mmu.root_hpa)); |
| 330 | 867 | ||
| 331 | for (;;) { | ||
| 332 | hpa_t paddr; | ||
| 333 | |||
| 334 | paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK); | ||
| 335 | 868 | ||
| 336 | if (is_error_hpa(paddr)) | 869 | paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK); |
| 337 | return 1; | ||
| 338 | 870 | ||
| 339 | ret = nonpaging_map(vcpu, addr & PAGE_MASK, paddr); | 871 | if (is_error_hpa(paddr)) |
| 340 | if (ret) { | 872 | return 1; |
| 341 | nonpaging_flush(vcpu); | ||
| 342 | continue; | ||
| 343 | } | ||
| 344 | break; | ||
| 345 | } | ||
| 346 | return ret; | ||
| 347 | } | ||
| 348 | 873 | ||
| 349 | static void nonpaging_inval_page(struct kvm_vcpu *vcpu, gva_t addr) | 874 | return nonpaging_map(vcpu, addr & PAGE_MASK, paddr); |
| 350 | { | ||
| 351 | } | 875 | } |
| 352 | 876 | ||
| 353 | static void nonpaging_free(struct kvm_vcpu *vcpu) | 877 | static void nonpaging_free(struct kvm_vcpu *vcpu) |
| 354 | { | 878 | { |
| 355 | hpa_t root; | 879 | mmu_free_roots(vcpu); |
| 356 | |||
| 357 | ASSERT(vcpu); | ||
| 358 | root = vcpu->mmu.root_hpa; | ||
| 359 | if (VALID_PAGE(root)) | ||
| 360 | release_pt_page_64(vcpu, root, vcpu->mmu.shadow_root_level); | ||
| 361 | vcpu->mmu.root_hpa = INVALID_PAGE; | ||
| 362 | } | 880 | } |
| 363 | 881 | ||
| 364 | static int nonpaging_init_context(struct kvm_vcpu *vcpu) | 882 | static int nonpaging_init_context(struct kvm_vcpu *vcpu) |
| @@ -367,40 +885,31 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu) | |||
| 367 | 885 | ||
| 368 | context->new_cr3 = nonpaging_new_cr3; | 886 | context->new_cr3 = nonpaging_new_cr3; |
| 369 | context->page_fault = nonpaging_page_fault; | 887 | context->page_fault = nonpaging_page_fault; |
| 370 | context->inval_page = nonpaging_inval_page; | ||
| 371 | context->gva_to_gpa = nonpaging_gva_to_gpa; | 888 | context->gva_to_gpa = nonpaging_gva_to_gpa; |
| 372 | context->free = nonpaging_free; | 889 | context->free = nonpaging_free; |
| 373 | context->root_level = PT32E_ROOT_LEVEL; | 890 | context->root_level = 0; |
| 374 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 891 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
| 375 | context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL); | 892 | mmu_alloc_roots(vcpu); |
| 376 | ASSERT(VALID_PAGE(context->root_hpa)); | 893 | ASSERT(VALID_PAGE(context->root_hpa)); |
| 377 | kvm_arch_ops->set_cr3(vcpu, context->root_hpa); | 894 | kvm_arch_ops->set_cr3(vcpu, context->root_hpa); |
| 378 | return 0; | 895 | return 0; |
| 379 | } | 896 | } |
| 380 | 897 | ||
| 381 | |||
| 382 | static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) | 898 | static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) |
| 383 | { | 899 | { |
| 384 | struct kvm_mmu_page *page, *npage; | ||
| 385 | |||
| 386 | list_for_each_entry_safe(page, npage, &vcpu->kvm->active_mmu_pages, | ||
| 387 | link) { | ||
| 388 | if (page->global) | ||
| 389 | continue; | ||
| 390 | |||
| 391 | if (!page->parent_pte) | ||
| 392 | continue; | ||
| 393 | |||
| 394 | *page->parent_pte = 0; | ||
| 395 | release_pt_page_64(vcpu, page->page_hpa, 1); | ||
| 396 | } | ||
| 397 | ++kvm_stat.tlb_flush; | 900 | ++kvm_stat.tlb_flush; |
| 398 | kvm_arch_ops->tlb_flush(vcpu); | 901 | kvm_arch_ops->tlb_flush(vcpu); |
| 399 | } | 902 | } |
| 400 | 903 | ||
| 401 | static void paging_new_cr3(struct kvm_vcpu *vcpu) | 904 | static void paging_new_cr3(struct kvm_vcpu *vcpu) |
| 402 | { | 905 | { |
| 906 | pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3); | ||
| 907 | mmu_free_roots(vcpu); | ||
| 908 | if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES)) | ||
| 909 | kvm_mmu_free_some_pages(vcpu); | ||
| 910 | mmu_alloc_roots(vcpu); | ||
| 403 | kvm_mmu_flush_tlb(vcpu); | 911 | kvm_mmu_flush_tlb(vcpu); |
| 912 | kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); | ||
| 404 | } | 913 | } |
| 405 | 914 | ||
| 406 | static void mark_pagetable_nonglobal(void *shadow_pte) | 915 | static void mark_pagetable_nonglobal(void *shadow_pte) |
| @@ -412,7 +921,8 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu, | |||
| 412 | u64 *shadow_pte, | 921 | u64 *shadow_pte, |
| 413 | gpa_t gaddr, | 922 | gpa_t gaddr, |
| 414 | int dirty, | 923 | int dirty, |
| 415 | u64 access_bits) | 924 | u64 access_bits, |
| 925 | gfn_t gfn) | ||
| 416 | { | 926 | { |
| 417 | hpa_t paddr; | 927 | hpa_t paddr; |
| 418 | 928 | ||
| @@ -420,13 +930,10 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu, | |||
| 420 | if (!dirty) | 930 | if (!dirty) |
| 421 | access_bits &= ~PT_WRITABLE_MASK; | 931 | access_bits &= ~PT_WRITABLE_MASK; |
| 422 | 932 | ||
| 423 | if (access_bits & PT_WRITABLE_MASK) | 933 | paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK); |
| 424 | mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT); | ||
| 425 | 934 | ||
| 426 | *shadow_pte |= access_bits; | 935 | *shadow_pte |= access_bits; |
| 427 | 936 | ||
| 428 | paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK); | ||
| 429 | |||
| 430 | if (!(*shadow_pte & PT_GLOBAL_MASK)) | 937 | if (!(*shadow_pte & PT_GLOBAL_MASK)) |
| 431 | mark_pagetable_nonglobal(shadow_pte); | 938 | mark_pagetable_nonglobal(shadow_pte); |
| 432 | 939 | ||
| @@ -434,10 +941,31 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu, | |||
| 434 | *shadow_pte |= gaddr; | 941 | *shadow_pte |= gaddr; |
| 435 | *shadow_pte |= PT_SHADOW_IO_MARK; | 942 | *shadow_pte |= PT_SHADOW_IO_MARK; |
| 436 | *shadow_pte &= ~PT_PRESENT_MASK; | 943 | *shadow_pte &= ~PT_PRESENT_MASK; |
| 437 | } else { | 944 | return; |
| 438 | *shadow_pte |= paddr; | 945 | } |
| 439 | page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); | 946 | |
| 947 | *shadow_pte |= paddr; | ||
| 948 | |||
| 949 | if (access_bits & PT_WRITABLE_MASK) { | ||
| 950 | struct kvm_mmu_page *shadow; | ||
| 951 | |||
| 952 | shadow = kvm_mmu_lookup_page(vcpu, gfn); | ||
| 953 | if (shadow) { | ||
| 954 | pgprintk("%s: found shadow page for %lx, marking ro\n", | ||
| 955 | __FUNCTION__, gfn); | ||
| 956 | access_bits &= ~PT_WRITABLE_MASK; | ||
| 957 | if (is_writeble_pte(*shadow_pte)) { | ||
| 958 | *shadow_pte &= ~PT_WRITABLE_MASK; | ||
| 959 | kvm_arch_ops->tlb_flush(vcpu); | ||
| 960 | } | ||
| 961 | } | ||
| 440 | } | 962 | } |
| 963 | |||
| 964 | if (access_bits & PT_WRITABLE_MASK) | ||
| 965 | mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT); | ||
| 966 | |||
| 967 | page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); | ||
| 968 | rmap_add(vcpu, shadow_pte); | ||
| 441 | } | 969 | } |
| 442 | 970 | ||
| 443 | static void inject_page_fault(struct kvm_vcpu *vcpu, | 971 | static void inject_page_fault(struct kvm_vcpu *vcpu, |
| @@ -474,41 +1002,6 @@ static int may_access(u64 pte, int write, int user) | |||
| 474 | return 1; | 1002 | return 1; |
| 475 | } | 1003 | } |
| 476 | 1004 | ||
| 477 | /* | ||
| 478 | * Remove a shadow pte. | ||
| 479 | */ | ||
| 480 | static void paging_inval_page(struct kvm_vcpu *vcpu, gva_t addr) | ||
| 481 | { | ||
| 482 | hpa_t page_addr = vcpu->mmu.root_hpa; | ||
| 483 | int level = vcpu->mmu.shadow_root_level; | ||
| 484 | |||
| 485 | ++kvm_stat.invlpg; | ||
| 486 | |||
| 487 | for (; ; level--) { | ||
| 488 | u32 index = PT64_INDEX(addr, level); | ||
| 489 | u64 *table = __va(page_addr); | ||
| 490 | |||
| 491 | if (level == PT_PAGE_TABLE_LEVEL ) { | ||
| 492 | table[index] = 0; | ||
| 493 | return; | ||
| 494 | } | ||
| 495 | |||
| 496 | if (!is_present_pte(table[index])) | ||
| 497 | return; | ||
| 498 | |||
| 499 | page_addr = table[index] & PT64_BASE_ADDR_MASK; | ||
| 500 | |||
| 501 | if (level == PT_DIRECTORY_LEVEL && | ||
| 502 | (table[index] & PT_SHADOW_PS_MARK)) { | ||
| 503 | table[index] = 0; | ||
| 504 | release_pt_page_64(vcpu, page_addr, PT_PAGE_TABLE_LEVEL); | ||
| 505 | |||
| 506 | kvm_arch_ops->tlb_flush(vcpu); | ||
| 507 | return; | ||
| 508 | } | ||
| 509 | } | ||
| 510 | } | ||
| 511 | |||
| 512 | static void paging_free(struct kvm_vcpu *vcpu) | 1005 | static void paging_free(struct kvm_vcpu *vcpu) |
| 513 | { | 1006 | { |
| 514 | nonpaging_free(vcpu); | 1007 | nonpaging_free(vcpu); |
| @@ -522,37 +1015,40 @@ static void paging_free(struct kvm_vcpu *vcpu) | |||
| 522 | #include "paging_tmpl.h" | 1015 | #include "paging_tmpl.h" |
| 523 | #undef PTTYPE | 1016 | #undef PTTYPE |
| 524 | 1017 | ||
| 525 | static int paging64_init_context(struct kvm_vcpu *vcpu) | 1018 | static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) |
| 526 | { | 1019 | { |
| 527 | struct kvm_mmu *context = &vcpu->mmu; | 1020 | struct kvm_mmu *context = &vcpu->mmu; |
| 528 | 1021 | ||
| 529 | ASSERT(is_pae(vcpu)); | 1022 | ASSERT(is_pae(vcpu)); |
| 530 | context->new_cr3 = paging_new_cr3; | 1023 | context->new_cr3 = paging_new_cr3; |
| 531 | context->page_fault = paging64_page_fault; | 1024 | context->page_fault = paging64_page_fault; |
| 532 | context->inval_page = paging_inval_page; | ||
| 533 | context->gva_to_gpa = paging64_gva_to_gpa; | 1025 | context->gva_to_gpa = paging64_gva_to_gpa; |
| 534 | context->free = paging_free; | 1026 | context->free = paging_free; |
| 535 | context->root_level = PT64_ROOT_LEVEL; | 1027 | context->root_level = level; |
| 536 | context->shadow_root_level = PT64_ROOT_LEVEL; | 1028 | context->shadow_root_level = level; |
| 537 | context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL); | 1029 | mmu_alloc_roots(vcpu); |
| 538 | ASSERT(VALID_PAGE(context->root_hpa)); | 1030 | ASSERT(VALID_PAGE(context->root_hpa)); |
| 539 | kvm_arch_ops->set_cr3(vcpu, context->root_hpa | | 1031 | kvm_arch_ops->set_cr3(vcpu, context->root_hpa | |
| 540 | (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK))); | 1032 | (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK))); |
| 541 | return 0; | 1033 | return 0; |
| 542 | } | 1034 | } |
| 543 | 1035 | ||
| 1036 | static int paging64_init_context(struct kvm_vcpu *vcpu) | ||
| 1037 | { | ||
| 1038 | return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL); | ||
| 1039 | } | ||
| 1040 | |||
| 544 | static int paging32_init_context(struct kvm_vcpu *vcpu) | 1041 | static int paging32_init_context(struct kvm_vcpu *vcpu) |
| 545 | { | 1042 | { |
| 546 | struct kvm_mmu *context = &vcpu->mmu; | 1043 | struct kvm_mmu *context = &vcpu->mmu; |
| 547 | 1044 | ||
| 548 | context->new_cr3 = paging_new_cr3; | 1045 | context->new_cr3 = paging_new_cr3; |
| 549 | context->page_fault = paging32_page_fault; | 1046 | context->page_fault = paging32_page_fault; |
| 550 | context->inval_page = paging_inval_page; | ||
| 551 | context->gva_to_gpa = paging32_gva_to_gpa; | 1047 | context->gva_to_gpa = paging32_gva_to_gpa; |
| 552 | context->free = paging_free; | 1048 | context->free = paging_free; |
| 553 | context->root_level = PT32_ROOT_LEVEL; | 1049 | context->root_level = PT32_ROOT_LEVEL; |
| 554 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 1050 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
| 555 | context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL); | 1051 | mmu_alloc_roots(vcpu); |
| 556 | ASSERT(VALID_PAGE(context->root_hpa)); | 1052 | ASSERT(VALID_PAGE(context->root_hpa)); |
| 557 | kvm_arch_ops->set_cr3(vcpu, context->root_hpa | | 1053 | kvm_arch_ops->set_cr3(vcpu, context->root_hpa | |
| 558 | (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK))); | 1054 | (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK))); |
| @@ -561,14 +1057,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) | |||
| 561 | 1057 | ||
| 562 | static int paging32E_init_context(struct kvm_vcpu *vcpu) | 1058 | static int paging32E_init_context(struct kvm_vcpu *vcpu) |
| 563 | { | 1059 | { |
| 564 | int ret; | 1060 | return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL); |
| 565 | |||
| 566 | if ((ret = paging64_init_context(vcpu))) | ||
| 567 | return ret; | ||
| 568 | |||
| 569 | vcpu->mmu.root_level = PT32E_ROOT_LEVEL; | ||
| 570 | vcpu->mmu.shadow_root_level = PT32E_ROOT_LEVEL; | ||
| 571 | return 0; | ||
| 572 | } | 1061 | } |
| 573 | 1062 | ||
| 574 | static int init_kvm_mmu(struct kvm_vcpu *vcpu) | 1063 | static int init_kvm_mmu(struct kvm_vcpu *vcpu) |
| @@ -597,41 +1086,161 @@ static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) | |||
| 597 | 1086 | ||
| 598 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) | 1087 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) |
| 599 | { | 1088 | { |
| 1089 | int r; | ||
| 1090 | |||
| 600 | destroy_kvm_mmu(vcpu); | 1091 | destroy_kvm_mmu(vcpu); |
| 601 | return init_kvm_mmu(vcpu); | 1092 | r = init_kvm_mmu(vcpu); |
| 1093 | if (r < 0) | ||
| 1094 | goto out; | ||
| 1095 | r = mmu_topup_memory_caches(vcpu); | ||
| 1096 | out: | ||
| 1097 | return r; | ||
| 602 | } | 1098 | } |
| 603 | 1099 | ||
| 604 | static void free_mmu_pages(struct kvm_vcpu *vcpu) | 1100 | void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) |
| 605 | { | 1101 | { |
| 606 | while (!list_empty(&vcpu->free_pages)) { | 1102 | gfn_t gfn = gpa >> PAGE_SHIFT; |
| 1103 | struct kvm_mmu_page *page; | ||
| 1104 | struct kvm_mmu_page *child; | ||
| 1105 | struct hlist_node *node, *n; | ||
| 1106 | struct hlist_head *bucket; | ||
| 1107 | unsigned index; | ||
| 1108 | u64 *spte; | ||
| 1109 | u64 pte; | ||
| 1110 | unsigned offset = offset_in_page(gpa); | ||
| 1111 | unsigned pte_size; | ||
| 1112 | unsigned page_offset; | ||
| 1113 | unsigned misaligned; | ||
| 1114 | int level; | ||
| 1115 | int flooded = 0; | ||
| 1116 | |||
| 1117 | pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); | ||
| 1118 | if (gfn == vcpu->last_pt_write_gfn) { | ||
| 1119 | ++vcpu->last_pt_write_count; | ||
| 1120 | if (vcpu->last_pt_write_count >= 3) | ||
| 1121 | flooded = 1; | ||
| 1122 | } else { | ||
| 1123 | vcpu->last_pt_write_gfn = gfn; | ||
| 1124 | vcpu->last_pt_write_count = 1; | ||
| 1125 | } | ||
| 1126 | index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; | ||
| 1127 | bucket = &vcpu->kvm->mmu_page_hash[index]; | ||
| 1128 | hlist_for_each_entry_safe(page, node, n, bucket, hash_link) { | ||
| 1129 | if (page->gfn != gfn || page->role.metaphysical) | ||
| 1130 | continue; | ||
| 1131 | pte_size = page->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; | ||
| 1132 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); | ||
| 1133 | if (misaligned || flooded) { | ||
| 1134 | /* | ||
| 1135 | * Misaligned accesses are too much trouble to fix | ||
| 1136 | * up; also, they usually indicate a page is not used | ||
| 1137 | * as a page table. | ||
| 1138 | * | ||
| 1139 | * If we're seeing too many writes to a page, | ||
| 1140 | * it may no longer be a page table, or we may be | ||
| 1141 | * forking, in which case it is better to unmap the | ||
| 1142 | * page. | ||
| 1143 | */ | ||
| 1144 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", | ||
| 1145 | gpa, bytes, page->role.word); | ||
| 1146 | kvm_mmu_zap_page(vcpu, page); | ||
| 1147 | continue; | ||
| 1148 | } | ||
| 1149 | page_offset = offset; | ||
| 1150 | level = page->role.level; | ||
| 1151 | if (page->role.glevels == PT32_ROOT_LEVEL) { | ||
| 1152 | page_offset <<= 1; /* 32->64 */ | ||
| 1153 | page_offset &= ~PAGE_MASK; | ||
| 1154 | } | ||
| 1155 | spte = __va(page->page_hpa); | ||
| 1156 | spte += page_offset / sizeof(*spte); | ||
| 1157 | pte = *spte; | ||
| 1158 | if (is_present_pte(pte)) { | ||
| 1159 | if (level == PT_PAGE_TABLE_LEVEL) | ||
| 1160 | rmap_remove(vcpu, spte); | ||
| 1161 | else { | ||
| 1162 | child = page_header(pte & PT64_BASE_ADDR_MASK); | ||
| 1163 | mmu_page_remove_parent_pte(vcpu, child, spte); | ||
| 1164 | } | ||
| 1165 | } | ||
| 1166 | *spte = 0; | ||
| 1167 | } | ||
| 1168 | } | ||
| 1169 | |||
| 1170 | void kvm_mmu_post_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) | ||
| 1171 | { | ||
| 1172 | } | ||
| 1173 | |||
| 1174 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | ||
| 1175 | { | ||
| 1176 | gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva); | ||
| 1177 | |||
| 1178 | return kvm_mmu_unprotect_page(vcpu, gpa >> PAGE_SHIFT); | ||
| 1179 | } | ||
| 1180 | |||
| 1181 | void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | ||
| 1182 | { | ||
| 1183 | while (vcpu->kvm->n_free_mmu_pages < KVM_REFILL_PAGES) { | ||
| 607 | struct kvm_mmu_page *page; | 1184 | struct kvm_mmu_page *page; |
| 608 | 1185 | ||
| 1186 | page = container_of(vcpu->kvm->active_mmu_pages.prev, | ||
| 1187 | struct kvm_mmu_page, link); | ||
| 1188 | kvm_mmu_zap_page(vcpu, page); | ||
| 1189 | } | ||
| 1190 | } | ||
| 1191 | EXPORT_SYMBOL_GPL(kvm_mmu_free_some_pages); | ||
| 1192 | |||
| 1193 | static void free_mmu_pages(struct kvm_vcpu *vcpu) | ||
| 1194 | { | ||
| 1195 | struct kvm_mmu_page *page; | ||
| 1196 | |||
| 1197 | while (!list_empty(&vcpu->kvm->active_mmu_pages)) { | ||
| 1198 | page = container_of(vcpu->kvm->active_mmu_pages.next, | ||
| 1199 | struct kvm_mmu_page, link); | ||
| 1200 | kvm_mmu_zap_page(vcpu, page); | ||
| 1201 | } | ||
| 1202 | while (!list_empty(&vcpu->free_pages)) { | ||
| 609 | page = list_entry(vcpu->free_pages.next, | 1203 | page = list_entry(vcpu->free_pages.next, |
| 610 | struct kvm_mmu_page, link); | 1204 | struct kvm_mmu_page, link); |
| 611 | list_del(&page->link); | 1205 | list_del(&page->link); |
| 612 | __free_page(pfn_to_page(page->page_hpa >> PAGE_SHIFT)); | 1206 | __free_page(pfn_to_page(page->page_hpa >> PAGE_SHIFT)); |
| 613 | page->page_hpa = INVALID_PAGE; | 1207 | page->page_hpa = INVALID_PAGE; |
| 614 | } | 1208 | } |
| 1209 | free_page((unsigned long)vcpu->mmu.pae_root); | ||
| 615 | } | 1210 | } |
| 616 | 1211 | ||
| 617 | static int alloc_mmu_pages(struct kvm_vcpu *vcpu) | 1212 | static int alloc_mmu_pages(struct kvm_vcpu *vcpu) |
| 618 | { | 1213 | { |
| 1214 | struct page *page; | ||
| 619 | int i; | 1215 | int i; |
| 620 | 1216 | ||
| 621 | ASSERT(vcpu); | 1217 | ASSERT(vcpu); |
| 622 | 1218 | ||
| 623 | for (i = 0; i < KVM_NUM_MMU_PAGES; i++) { | 1219 | for (i = 0; i < KVM_NUM_MMU_PAGES; i++) { |
| 624 | struct page *page; | ||
| 625 | struct kvm_mmu_page *page_header = &vcpu->page_header_buf[i]; | 1220 | struct kvm_mmu_page *page_header = &vcpu->page_header_buf[i]; |
| 626 | 1221 | ||
| 627 | INIT_LIST_HEAD(&page_header->link); | 1222 | INIT_LIST_HEAD(&page_header->link); |
| 628 | if ((page = alloc_page(GFP_KVM_MMU)) == NULL) | 1223 | if ((page = alloc_page(GFP_KERNEL)) == NULL) |
| 629 | goto error_1; | 1224 | goto error_1; |
| 630 | page->private = (unsigned long)page_header; | 1225 | page->private = (unsigned long)page_header; |
| 631 | page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT; | 1226 | page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT; |
| 632 | memset(__va(page_header->page_hpa), 0, PAGE_SIZE); | 1227 | memset(__va(page_header->page_hpa), 0, PAGE_SIZE); |
| 633 | list_add(&page_header->link, &vcpu->free_pages); | 1228 | list_add(&page_header->link, &vcpu->free_pages); |
| 1229 | ++vcpu->kvm->n_free_mmu_pages; | ||
| 634 | } | 1230 | } |
| 1231 | |||
| 1232 | /* | ||
| 1233 | * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64. | ||
| 1234 | * Therefore we need to allocate shadow page tables in the first | ||
| 1235 | * 4GB of memory, which happens to fit the DMA32 zone. | ||
| 1236 | */ | ||
| 1237 | page = alloc_page(GFP_KERNEL | __GFP_DMA32); | ||
| 1238 | if (!page) | ||
| 1239 | goto error_1; | ||
| 1240 | vcpu->mmu.pae_root = page_address(page); | ||
| 1241 | for (i = 0; i < 4; ++i) | ||
| 1242 | vcpu->mmu.pae_root[i] = INVALID_PAGE; | ||
| 1243 | |||
| 635 | return 0; | 1244 | return 0; |
| 636 | 1245 | ||
| 637 | error_1: | 1246 | error_1: |
| @@ -663,10 +1272,12 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu) | |||
| 663 | 1272 | ||
| 664 | destroy_kvm_mmu(vcpu); | 1273 | destroy_kvm_mmu(vcpu); |
| 665 | free_mmu_pages(vcpu); | 1274 | free_mmu_pages(vcpu); |
| 1275 | mmu_free_memory_caches(vcpu); | ||
| 666 | } | 1276 | } |
| 667 | 1277 | ||
| 668 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | 1278 | void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot) |
| 669 | { | 1279 | { |
| 1280 | struct kvm *kvm = vcpu->kvm; | ||
| 670 | struct kvm_mmu_page *page; | 1281 | struct kvm_mmu_page *page; |
| 671 | 1282 | ||
| 672 | list_for_each_entry(page, &kvm->active_mmu_pages, link) { | 1283 | list_for_each_entry(page, &kvm->active_mmu_pages, link) { |
| @@ -679,8 +1290,169 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
| 679 | pt = __va(page->page_hpa); | 1290 | pt = __va(page->page_hpa); |
| 680 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) | 1291 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) |
| 681 | /* avoid RMW */ | 1292 | /* avoid RMW */ |
| 682 | if (pt[i] & PT_WRITABLE_MASK) | 1293 | if (pt[i] & PT_WRITABLE_MASK) { |
| 1294 | rmap_remove(vcpu, &pt[i]); | ||
| 683 | pt[i] &= ~PT_WRITABLE_MASK; | 1295 | pt[i] &= ~PT_WRITABLE_MASK; |
| 1296 | } | ||
| 1297 | } | ||
| 1298 | } | ||
| 1299 | |||
| 1300 | #ifdef AUDIT | ||
| 1301 | |||
| 1302 | static const char *audit_msg; | ||
| 1303 | |||
| 1304 | static gva_t canonicalize(gva_t gva) | ||
| 1305 | { | ||
| 1306 | #ifdef CONFIG_X86_64 | ||
| 1307 | gva = (long long)(gva << 16) >> 16; | ||
| 1308 | #endif | ||
| 1309 | return gva; | ||
| 1310 | } | ||
| 684 | 1311 | ||
| 1312 | static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, | ||
| 1313 | gva_t va, int level) | ||
| 1314 | { | ||
| 1315 | u64 *pt = __va(page_pte & PT64_BASE_ADDR_MASK); | ||
| 1316 | int i; | ||
| 1317 | gva_t va_delta = 1ul << (PAGE_SHIFT + 9 * (level - 1)); | ||
| 1318 | |||
| 1319 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) { | ||
| 1320 | u64 ent = pt[i]; | ||
| 1321 | |||
| 1322 | if (!ent & PT_PRESENT_MASK) | ||
| 1323 | continue; | ||
| 1324 | |||
| 1325 | va = canonicalize(va); | ||
| 1326 | if (level > 1) | ||
| 1327 | audit_mappings_page(vcpu, ent, va, level - 1); | ||
| 1328 | else { | ||
| 1329 | gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, va); | ||
| 1330 | hpa_t hpa = gpa_to_hpa(vcpu, gpa); | ||
| 1331 | |||
| 1332 | if ((ent & PT_PRESENT_MASK) | ||
| 1333 | && (ent & PT64_BASE_ADDR_MASK) != hpa) | ||
| 1334 | printk(KERN_ERR "audit error: (%s) levels %d" | ||
| 1335 | " gva %lx gpa %llx hpa %llx ent %llx\n", | ||
| 1336 | audit_msg, vcpu->mmu.root_level, | ||
| 1337 | va, gpa, hpa, ent); | ||
| 1338 | } | ||
| 685 | } | 1339 | } |
| 686 | } | 1340 | } |
| 1341 | |||
| 1342 | static void audit_mappings(struct kvm_vcpu *vcpu) | ||
| 1343 | { | ||
| 1344 | int i; | ||
| 1345 | |||
| 1346 | if (vcpu->mmu.root_level == 4) | ||
| 1347 | audit_mappings_page(vcpu, vcpu->mmu.root_hpa, 0, 4); | ||
| 1348 | else | ||
| 1349 | for (i = 0; i < 4; ++i) | ||
| 1350 | if (vcpu->mmu.pae_root[i] & PT_PRESENT_MASK) | ||
| 1351 | audit_mappings_page(vcpu, | ||
| 1352 | vcpu->mmu.pae_root[i], | ||
| 1353 | i << 30, | ||
| 1354 | 2); | ||
| 1355 | } | ||
| 1356 | |||
| 1357 | static int count_rmaps(struct kvm_vcpu *vcpu) | ||
| 1358 | { | ||
| 1359 | int nmaps = 0; | ||
| 1360 | int i, j, k; | ||
| 1361 | |||
| 1362 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | ||
| 1363 | struct kvm_memory_slot *m = &vcpu->kvm->memslots[i]; | ||
| 1364 | struct kvm_rmap_desc *d; | ||
| 1365 | |||
| 1366 | for (j = 0; j < m->npages; ++j) { | ||
| 1367 | struct page *page = m->phys_mem[j]; | ||
| 1368 | |||
| 1369 | if (!page->private) | ||
| 1370 | continue; | ||
| 1371 | if (!(page->private & 1)) { | ||
| 1372 | ++nmaps; | ||
| 1373 | continue; | ||
| 1374 | } | ||
| 1375 | d = (struct kvm_rmap_desc *)(page->private & ~1ul); | ||
| 1376 | while (d) { | ||
| 1377 | for (k = 0; k < RMAP_EXT; ++k) | ||
| 1378 | if (d->shadow_ptes[k]) | ||
| 1379 | ++nmaps; | ||
| 1380 | else | ||
| 1381 | break; | ||
| 1382 | d = d->more; | ||
| 1383 | } | ||
| 1384 | } | ||
| 1385 | } | ||
| 1386 | return nmaps; | ||
| 1387 | } | ||
| 1388 | |||
| 1389 | static int count_writable_mappings(struct kvm_vcpu *vcpu) | ||
| 1390 | { | ||
| 1391 | int nmaps = 0; | ||
| 1392 | struct kvm_mmu_page *page; | ||
| 1393 | int i; | ||
| 1394 | |||
| 1395 | list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) { | ||
| 1396 | u64 *pt = __va(page->page_hpa); | ||
| 1397 | |||
| 1398 | if (page->role.level != PT_PAGE_TABLE_LEVEL) | ||
| 1399 | continue; | ||
| 1400 | |||
| 1401 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | ||
| 1402 | u64 ent = pt[i]; | ||
| 1403 | |||
| 1404 | if (!(ent & PT_PRESENT_MASK)) | ||
| 1405 | continue; | ||
| 1406 | if (!(ent & PT_WRITABLE_MASK)) | ||
| 1407 | continue; | ||
| 1408 | ++nmaps; | ||
| 1409 | } | ||
| 1410 | } | ||
| 1411 | return nmaps; | ||
| 1412 | } | ||
| 1413 | |||
| 1414 | static void audit_rmap(struct kvm_vcpu *vcpu) | ||
| 1415 | { | ||
| 1416 | int n_rmap = count_rmaps(vcpu); | ||
| 1417 | int n_actual = count_writable_mappings(vcpu); | ||
| 1418 | |||
| 1419 | if (n_rmap != n_actual) | ||
| 1420 | printk(KERN_ERR "%s: (%s) rmap %d actual %d\n", | ||
| 1421 | __FUNCTION__, audit_msg, n_rmap, n_actual); | ||
| 1422 | } | ||
| 1423 | |||
| 1424 | static void audit_write_protection(struct kvm_vcpu *vcpu) | ||
| 1425 | { | ||
| 1426 | struct kvm_mmu_page *page; | ||
| 1427 | |||
| 1428 | list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) { | ||
| 1429 | hfn_t hfn; | ||
| 1430 | struct page *pg; | ||
| 1431 | |||
| 1432 | if (page->role.metaphysical) | ||
| 1433 | continue; | ||
| 1434 | |||
| 1435 | hfn = gpa_to_hpa(vcpu, (gpa_t)page->gfn << PAGE_SHIFT) | ||
| 1436 | >> PAGE_SHIFT; | ||
| 1437 | pg = pfn_to_page(hfn); | ||
| 1438 | if (pg->private) | ||
| 1439 | printk(KERN_ERR "%s: (%s) shadow page has writable" | ||
| 1440 | " mappings: gfn %lx role %x\n", | ||
| 1441 | __FUNCTION__, audit_msg, page->gfn, | ||
| 1442 | page->role.word); | ||
| 1443 | } | ||
| 1444 | } | ||
| 1445 | |||
| 1446 | static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) | ||
| 1447 | { | ||
| 1448 | int olddbg = dbg; | ||
| 1449 | |||
| 1450 | dbg = 0; | ||
| 1451 | audit_msg = msg; | ||
| 1452 | audit_rmap(vcpu); | ||
| 1453 | audit_write_protection(vcpu); | ||
| 1454 | audit_mappings(vcpu); | ||
| 1455 | dbg = olddbg; | ||
| 1456 | } | ||
| 1457 | |||
| 1458 | #endif | ||
