aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm
diff options
context:
space:
mode:
authorAndre Przywara <andre.przywara@amd.com>2010-12-21 05:12:04 -0500
committerAvi Kivity <avi@redhat.com>2011-01-12 04:31:03 -0500
commit7ff76d58a9dc03a38b86d283abcaae2ac3c74fe3 (patch)
treef45fdc961458c760f4d8a64f4f5e9269ce82bc38 /arch/x86/kvm
parentddce97aac5405e0b2b8b2191cb65b5a48eb14145 (diff)
KVM: SVM: enhance MOV CR intercept handler
Newer SVM implementations provide the GPR number in the VMCB, so that the emulation path is no longer necesarry to handle CR register access intercepts. Implement the handling in svm.c and use it when the info is provided. Signed-off-by: Andre Przywara <andre.przywara@amd.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/svm.c90
1 files changed, 79 insertions, 11 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2830a73ea736..5abaa5b2f624 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2660,12 +2660,80 @@ static int emulate_on_interception(struct vcpu_svm *svm)
2660 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; 2660 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
2661} 2661}
2662 2662
2663#define CR_VALID (1ULL << 63)
2664
2665static int cr_interception(struct vcpu_svm *svm)
2666{
2667 int reg, cr;
2668 unsigned long val;
2669 int err;
2670
2671 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
2672 return emulate_on_interception(svm);
2673
2674 if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0))
2675 return emulate_on_interception(svm);
2676
2677 reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
2678 cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
2679
2680 err = 0;
2681 if (cr >= 16) { /* mov to cr */
2682 cr -= 16;
2683 val = kvm_register_read(&svm->vcpu, reg);
2684 switch (cr) {
2685 case 0:
2686 err = kvm_set_cr0(&svm->vcpu, val);
2687 break;
2688 case 3:
2689 err = kvm_set_cr3(&svm->vcpu, val);
2690 break;
2691 case 4:
2692 err = kvm_set_cr4(&svm->vcpu, val);
2693 break;
2694 case 8:
2695 err = kvm_set_cr8(&svm->vcpu, val);
2696 break;
2697 default:
2698 WARN(1, "unhandled write to CR%d", cr);
2699 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2700 return 1;
2701 }
2702 } else { /* mov from cr */
2703 switch (cr) {
2704 case 0:
2705 val = kvm_read_cr0(&svm->vcpu);
2706 break;
2707 case 2:
2708 val = svm->vcpu.arch.cr2;
2709 break;
2710 case 3:
2711 val = svm->vcpu.arch.cr3;
2712 break;
2713 case 4:
2714 val = kvm_read_cr4(&svm->vcpu);
2715 break;
2716 case 8:
2717 val = kvm_get_cr8(&svm->vcpu);
2718 break;
2719 default:
2720 WARN(1, "unhandled read from CR%d", cr);
2721 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2722 return 1;
2723 }
2724 kvm_register_write(&svm->vcpu, reg, val);
2725 }
2726 kvm_complete_insn_gp(&svm->vcpu, err);
2727
2728 return 1;
2729}
2730
2663static int cr0_write_interception(struct vcpu_svm *svm) 2731static int cr0_write_interception(struct vcpu_svm *svm)
2664{ 2732{
2665 struct kvm_vcpu *vcpu = &svm->vcpu; 2733 struct kvm_vcpu *vcpu = &svm->vcpu;
2666 int r; 2734 int r;
2667 2735
2668 r = emulate_instruction(&svm->vcpu, 0); 2736 r = cr_interception(svm);
2669 2737
2670 if (svm->nested.vmexit_rip) { 2738 if (svm->nested.vmexit_rip) {
2671 kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip); 2739 kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip);
@@ -2674,7 +2742,7 @@ static int cr0_write_interception(struct vcpu_svm *svm)
2674 svm->nested.vmexit_rip = 0; 2742 svm->nested.vmexit_rip = 0;
2675 } 2743 }
2676 2744
2677 return r == EMULATE_DONE; 2745 return r;
2678} 2746}
2679 2747
2680static int cr8_write_interception(struct vcpu_svm *svm) 2748static int cr8_write_interception(struct vcpu_svm *svm)
@@ -2684,13 +2752,13 @@ static int cr8_write_interception(struct vcpu_svm *svm)
2684 2752
2685 u8 cr8_prev = kvm_get_cr8(&svm->vcpu); 2753 u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
2686 /* instruction emulation calls kvm_set_cr8() */ 2754 /* instruction emulation calls kvm_set_cr8() */
2687 r = emulate_instruction(&svm->vcpu, 0); 2755 r = cr_interception(svm);
2688 if (irqchip_in_kernel(svm->vcpu.kvm)) { 2756 if (irqchip_in_kernel(svm->vcpu.kvm)) {
2689 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); 2757 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
2690 return r == EMULATE_DONE; 2758 return r;
2691 } 2759 }
2692 if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) 2760 if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
2693 return r == EMULATE_DONE; 2761 return r;
2694 kvm_run->exit_reason = KVM_EXIT_SET_TPR; 2762 kvm_run->exit_reason = KVM_EXIT_SET_TPR;
2695 return 0; 2763 return 0;
2696} 2764}
@@ -2933,14 +3001,14 @@ static int pause_interception(struct vcpu_svm *svm)
2933} 3001}
2934 3002
2935static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { 3003static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
2936 [SVM_EXIT_READ_CR0] = emulate_on_interception, 3004 [SVM_EXIT_READ_CR0] = cr_interception,
2937 [SVM_EXIT_READ_CR3] = emulate_on_interception, 3005 [SVM_EXIT_READ_CR3] = cr_interception,
2938 [SVM_EXIT_READ_CR4] = emulate_on_interception, 3006 [SVM_EXIT_READ_CR4] = cr_interception,
2939 [SVM_EXIT_READ_CR8] = emulate_on_interception, 3007 [SVM_EXIT_READ_CR8] = cr_interception,
2940 [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, 3008 [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception,
2941 [SVM_EXIT_WRITE_CR0] = cr0_write_interception, 3009 [SVM_EXIT_WRITE_CR0] = cr0_write_interception,
2942 [SVM_EXIT_WRITE_CR3] = emulate_on_interception, 3010 [SVM_EXIT_WRITE_CR3] = cr_interception,
2943 [SVM_EXIT_WRITE_CR4] = emulate_on_interception, 3011 [SVM_EXIT_WRITE_CR4] = cr_interception,
2944 [SVM_EXIT_WRITE_CR8] = cr8_write_interception, 3012 [SVM_EXIT_WRITE_CR8] = cr8_write_interception,
2945 [SVM_EXIT_READ_DR0] = emulate_on_interception, 3013 [SVM_EXIT_READ_DR0] = emulate_on_interception,
2946 [SVM_EXIT_READ_DR1] = emulate_on_interception, 3014 [SVM_EXIT_READ_DR1] = emulate_on_interception,
>351 352 353 354 355 356 357 358 359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475





























                                                                       








                                                           
 
                                            
                               





                                     
                                      

                                                                          




                                                                         



                     







                                                              


















                                                                         


                                       
                           
















                                                           
                                                      












                                                                      
                               
         

                                                                              

 


                                                                                       

                            
                    
                                
                      

 


























                                                                      
                                      

                                                                  
                                    


















                                                                                







                                                        
                                  





                                                            
           
                    



                                                  
                                                 
                      




                             





                                                                           
                                                                


















                                                         


                                               


                                                                     
   
                                   


                            
                            

                        
                             
                                                  
                       






















                                                                  
                               






                                                                       

      































                                                                                





                                                          




























                                                          




                                                  


























































                                                                              




                                                                           
                                   
 
                                     


                                       





                                                                   

                                                                               
                                                                           















                                                                           


                                                                           












                                                                
/*
 * High memory handling common code and variables.
 *
 * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
 *          Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de
 *
 *
 * Redesigned the x86 32-bit VM architecture to deal with
 * 64-bit physical space. With current x86 CPUs this
 * means up to 64 Gigabytes physical RAM.
 *
 * Rewrote high memory support to move the page cache into
 * high memory. Implemented permanent (schedulable) kmaps
 * based on Linus' idea.
 *
 * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
 */

#include <linux/mm.h>
#include <linux/module.h>
#include <linux/swap.h>
#include <linux/bio.h>
#include <linux/pagemap.h>
#include <linux/mempool.h>
#include <linux/blkdev.h>
#include <linux/init.h>
#include <linux/hash.h>
#include <linux/highmem.h>
#include <asm/tlbflush.h>

/*
 * Virtual_count is not a pure "count".
 *  0 means that it is not mapped, and has not been mapped
 *    since a TLB flush - it is usable.
 *  1 means that there are no users, but it has been mapped
 *    since the last TLB flush - so we can't use it.
 *  n means that there are (n-1) current users of it.
 */
#ifdef CONFIG_HIGHMEM

unsigned long totalhigh_pages __read_mostly;
EXPORT_SYMBOL(totalhigh_pages);

unsigned int nr_free_highpages (void)
{
	pg_data_t *pgdat;
	unsigned int pages = 0;

	for_each_online_pgdat(pgdat) {
		pages += zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
			NR_FREE_PAGES);
		if (zone_movable_is_highmem())
			pages += zone_page_state(
					&pgdat->node_zones[ZONE_MOVABLE],
					NR_FREE_PAGES);
	}

	return pages;
}

static int pkmap_count[LAST_PKMAP];
static unsigned int last_pkmap_nr;
static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);

pte_t * pkmap_page_table;

static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait);

/*
 * Most architectures have no use for kmap_high_get(), so let's abstract
 * the disabling of IRQ out of the locking in that case to save on a
 * potential useless overhead.
 */
#ifdef ARCH_NEEDS_KMAP_HIGH_GET
#define lock_kmap()             spin_lock_irq(&kmap_lock)
#define unlock_kmap()           spin_unlock_irq(&kmap_lock)
#define lock_kmap_any(flags)    spin_lock_irqsave(&kmap_lock, flags)
#define unlock_kmap_any(flags)  spin_unlock_irqrestore(&kmap_lock, flags)
#else
#define lock_kmap()             spin_lock(&kmap_lock)
#define unlock_kmap()           spin_unlock(&kmap_lock)
#define lock_kmap_any(flags)    \
		do { spin_lock(&kmap_lock); (void)(flags); } while (0)
#define unlock_kmap_any(flags)  \
		do { spin_unlock(&kmap_lock); (void)(flags); } while (0)
#endif

static void flush_all_zero_pkmaps(void)
{
	int i;
	int need_flush = 0;

	flush_cache_kmaps();

	for (i = 0; i < LAST_PKMAP; i++) {
		struct page *page;

		/*
		 * zero means we don't have anything to do,
		 * >1 means that it is still in use. Only
		 * a count of 1 means that it is free but
		 * needs to be unmapped
		 */
		if (pkmap_count[i] != 1)
			continue;
		pkmap_count[i] = 0;

		/* sanity check */
		BUG_ON(pte_none(pkmap_page_table[i]));

		/*
		 * Don't need an atomic fetch-and-clear op here;
		 * no-one has the page mapped, and cannot get at
		 * its virtual address (and hence PTE) without first
		 * getting the kmap_lock (which is held here).
		 * So no dangers, even with speculative execution.
		 */
		page = pte_page(pkmap_page_table[i]);
		pte_clear(&init_mm, (unsigned long)page_address(page),
			  &pkmap_page_table[i]);

		set_page_address(page, NULL);
		need_flush = 1;
	}
	if (need_flush)
		flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP));
}

/**
 * kmap_flush_unused - flush all unused kmap mappings in order to remove stray mappings
 */
void kmap_flush_unused(void)
{
	lock_kmap();
	flush_all_zero_pkmaps();
	unlock_kmap();
}

static inline unsigned long map_new_virtual(struct page *page)
{
	unsigned long vaddr;
	int count;

start:
	count = LAST_PKMAP;
	/* Find an empty entry */
	for (;;) {
		last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK;
		if (!last_pkmap_nr) {
			flush_all_zero_pkmaps();
			count = LAST_PKMAP;
		}
		if (!pkmap_count[last_pkmap_nr])
			break;	/* Found a usable entry */
		if (--count)
			continue;

		/*
		 * Sleep for somebody else to unmap their entries
		 */
		{
			DECLARE_WAITQUEUE(wait, current);

			__set_current_state(TASK_UNINTERRUPTIBLE);
			add_wait_queue(&pkmap_map_wait, &wait);
			unlock_kmap();
			schedule();
			remove_wait_queue(&pkmap_map_wait, &wait);
			lock_kmap();

			/* Somebody else might have mapped it while we slept */
			if (page_address(page))
				return (unsigned long)page_address(page);

			/* Re-start */
			goto start;
		}
	}
	vaddr = PKMAP_ADDR(last_pkmap_nr);
	set_pte_at(&init_mm, vaddr,
		   &(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot));

	pkmap_count[last_pkmap_nr] = 1;
	set_page_address(page, (void *)vaddr);

	return vaddr;
}

/**
 * kmap_high - map a highmem page into memory
 * @page: &struct page to map
 *
 * Returns the page's virtual memory address.
 *
 * We cannot call this from interrupts, as it may block.
 */
void *kmap_high(struct page *page)
{
	unsigned long vaddr;

	/*
	 * For highmem pages, we can't trust "virtual" until
	 * after we have the lock.
	 */
	lock_kmap();
	vaddr = (unsigned long)page_address(page);
	if (!vaddr)
		vaddr = map_new_virtual(page);
	pkmap_count[PKMAP_NR(vaddr)]++;
	BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 2);
	unlock_kmap();
	return (void*) vaddr;
}

EXPORT_SYMBOL(kmap_high);

#ifdef ARCH_NEEDS_KMAP_HIGH_GET
/**
 * kmap_high_get - pin a highmem page into memory
 * @page: &struct page to pin
 *
 * Returns the page's current virtual memory address, or NULL if no mapping
 * exists.  If and only if a non null address is returned then a
 * matching call to kunmap_high() is necessary.
 *
 * This can be called from any context.
 */
void *kmap_high_get(struct page *page)
{
	unsigned long vaddr, flags;

	lock_kmap_any(flags);
	vaddr = (unsigned long)page_address(page);
	if (vaddr) {
		BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 1);
		pkmap_count[PKMAP_NR(vaddr)]++;
	}
	unlock_kmap_any(flags);
	return (void*) vaddr;
}
#endif

/**
 * kunmap_high - map a highmem page into memory
 * @page: &struct page to unmap
 *
 * If ARCH_NEEDS_KMAP_HIGH_GET is not defined then this may be called
 * only from user context.
 */
void kunmap_high(struct page *page)
{
	unsigned long vaddr;
	unsigned long nr;
	unsigned long flags;
	int need_wakeup;

	lock_kmap_any(flags);
	vaddr = (unsigned long)page_address(page);
	BUG_ON(!vaddr);
	nr = PKMAP_NR(vaddr);

	/*
	 * A count must never go down to zero
	 * without a TLB flush!
	 */
	need_wakeup = 0;
	switch (--pkmap_count[nr]) {
	case 0:
		BUG();
	case 1:
		/*
		 * Avoid an unnecessary wake_up() function call.
		 * The common case is pkmap_count[] == 1, but
		 * no waiters.
		 * The tasks queued in the wait-queue are guarded
		 * by both the lock in the wait-queue-head and by
		 * the kmap_lock.  As the kmap_lock is held here,
		 * no need for the wait-queue-head's lock.  Simply
		 * test if the queue is empty.
		 */
		need_wakeup = waitqueue_active(&pkmap_map_wait);
	}
	unlock_kmap_any(flags);

	/* do wake-up, if needed, race-free outside of the spin lock */
	if (need_wakeup)
		wake_up(&pkmap_map_wait);
}

EXPORT_SYMBOL(kunmap_high);
#endif

#if defined(HASHED_PAGE_VIRTUAL)

#define PA_HASH_ORDER	7

/*
 * Describes one page->virtual association
 */
struct page_address_map {
	struct page *page;
	void *virtual;
	struct list_head list;
};

/*
 * page_address_map freelist, allocated from page_address_maps.
 */
static struct list_head page_address_pool;	/* freelist */
static spinlock_t pool_lock;			/* protects page_address_pool */

/*
 * Hash table bucket
 */
static struct page_address_slot {
	struct list_head lh;			/* List of page_address_maps */
	spinlock_t lock;			/* Protect this bucket's list */
} ____cacheline_aligned_in_smp page_address_htable[1<<PA_HASH_ORDER];

static struct page_address_slot *page_slot(struct page *page)
{
	return &page_address_htable[hash_ptr(page, PA_HASH_ORDER)];
}

/**
 * page_address - get the mapped virtual address of a page
 * @page: &struct page to get the virtual address of
 *
 * Returns the page's virtual address.
 */
void *page_address(struct page *page)
{
	unsigned long flags;
	void *ret;
	struct page_address_slot *pas;

	if (!PageHighMem(page))
		return lowmem_page_address(page);

	pas = page_slot(page);
	ret = NULL;
	spin_lock_irqsave(&pas->lock, flags);
	if (!list_empty(&pas->lh)) {
		struct page_address_map *pam;

		list_for_each_entry(pam, &pas->lh, list) {
			if (pam->page == page) {
				ret = pam->virtual;
				goto done;
			}
		}
	}
done:
	spin_unlock_irqrestore(&pas->lock, flags);
	return ret;
}

EXPORT_SYMBOL(page_address);

/**
 * set_page_address - set a page's virtual address
 * @page: &struct page to set
 * @virtual: virtual address to use
 */
void set_page_address(struct page *page, void *virtual)
{
	unsigned long flags;
	struct page_address_slot *pas;
	struct page_address_map *pam;

	BUG_ON(!PageHighMem(page));

	pas = page_slot(page);
	if (virtual) {		/* Add */
		BUG_ON(list_empty(&page_address_pool));

		spin_lock_irqsave(&pool_lock, flags);
		pam = list_entry(page_address_pool.next,
				struct page_address_map, list);
		list_del(&pam->list);
		spin_unlock_irqrestore(&pool_lock, flags);

		pam->page = page;
		pam->virtual = virtual;

		spin_lock_irqsave(&pas->lock, flags);
		list_add_tail(&pam->list, &pas->lh);
		spin_unlock_irqrestore(&pas->lock, flags);
	} else {		/* Remove */
		spin_lock_irqsave(&pas->lock, flags);
		list_for_each_entry(pam, &pas->lh, list) {
			if (pam->page == page) {
				list_del(&pam->list);
				spin_unlock_irqrestore(&pas->lock, flags);
				spin_lock_irqsave(&pool_lock, flags);
				list_add_tail(&pam->list, &page_address_pool);
				spin_unlock_irqrestore(&pool_lock, flags);
				goto done;
			}
		}
		spin_unlock_irqrestore(&pas->lock, flags);
	}
done:
	return;
}

static struct page_address_map page_address_maps[LAST_PKMAP];

void __init page_address_init(void)
{
	int i;

	INIT_LIST_HEAD(&page_address_pool);
	for (i = 0; i < ARRAY_SIZE(page_address_maps); i++)
		list_add(&page_address_maps[i].list, &page_address_pool);
	for (i = 0; i < ARRAY_SIZE(page_address_htable); i++) {
		INIT_LIST_HEAD(&page_address_htable[i].lh);
		spin_lock_init(&page_address_htable[i].lock);
	}
	spin_lock_init(&pool_lock);
}

#endif	/* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */

#if defined(CONFIG_DEBUG_HIGHMEM) && defined(CONFIG_TRACE_IRQFLAGS_SUPPORT)

void debug_kmap_atomic(enum km_type type)
{
	static int warn_count = 10;

	if (unlikely(warn_count < 0))
		return;

	if (unlikely(in_interrupt())) {
		if (in_nmi()) {
			if (type != KM_NMI && type != KM_NMI_PTE) {
				WARN_ON(1);
				warn_count--;
			}
		} else if (in_irq()) {
			if (type != KM_IRQ0 && type != KM_IRQ1 &&
			    type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ &&
			    type != KM_BOUNCE_READ && type != KM_IRQ_PTE) {
				WARN_ON(1);
				warn_count--;
			}
		} else if (!irqs_disabled()) {	/* softirq */
			if (type != KM_IRQ0 && type != KM_IRQ1 &&
			    type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 &&
			    type != KM_SKB_SUNRPC_DATA &&
			    type != KM_SKB_DATA_SOFTIRQ &&
			    type != KM_BOUNCE_READ) {
				WARN_ON(1);
				warn_count--;
			}
		}
	}

	if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ ||
			type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ ||
			type == KM_IRQ_PTE || type == KM_NMI ||
			type == KM_NMI_PTE ) {
		if (!irqs_disabled()) {
			WARN_ON(1);
			warn_count--;
		}
	} else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) {
		if (irq_count() == 0 && !irqs_disabled()) {
			WARN_ON(1);
			warn_count--;
		}
	}
}

#endif