aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kvm/vmx.c146
1 files changed, 23 insertions, 123 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c829d89e2e63..ad6a883b7a32 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -185,7 +185,6 @@ module_param(ple_window_max, int, S_IRUGO);
185extern const ulong vmx_return; 185extern const ulong vmx_return;
186 186
187#define NR_AUTOLOAD_MSRS 8 187#define NR_AUTOLOAD_MSRS 8
188#define VMCS02_POOL_SIZE 1
189 188
190struct vmcs { 189struct vmcs {
191 u32 revision_id; 190 u32 revision_id;
@@ -226,7 +225,7 @@ struct shared_msr_entry {
226 * stored in guest memory specified by VMPTRLD, but is opaque to the guest, 225 * stored in guest memory specified by VMPTRLD, but is opaque to the guest,
227 * which must access it using VMREAD/VMWRITE/VMCLEAR instructions. 226 * which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
228 * More than one of these structures may exist, if L1 runs multiple L2 guests. 227 * More than one of these structures may exist, if L1 runs multiple L2 guests.
229 * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the 228 * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the
230 * underlying hardware which will be used to run L2. 229 * underlying hardware which will be used to run L2.
231 * This structure is packed to ensure that its layout is identical across 230 * This structure is packed to ensure that its layout is identical across
232 * machines (necessary for live migration). 231 * machines (necessary for live migration).
@@ -409,13 +408,6 @@ struct __packed vmcs12 {
409 */ 408 */
410#define VMCS12_SIZE 0x1000 409#define VMCS12_SIZE 0x1000
411 410
412/* Used to remember the last vmcs02 used for some recently used vmcs12s */
413struct vmcs02_list {
414 struct list_head list;
415 gpa_t vmptr;
416 struct loaded_vmcs vmcs02;
417};
418
419/* 411/*
420 * The nested_vmx structure is part of vcpu_vmx, and holds information we need 412 * The nested_vmx structure is part of vcpu_vmx, and holds information we need
421 * for correct emulation of VMX (i.e., nested VMX) on this vcpu. 413 * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
@@ -440,15 +432,15 @@ struct nested_vmx {
440 */ 432 */
441 bool sync_shadow_vmcs; 433 bool sync_shadow_vmcs;
442 434
443 /* vmcs02_list cache of VMCSs recently used to run L2 guests */
444 struct list_head vmcs02_pool;
445 int vmcs02_num;
446 bool change_vmcs01_virtual_x2apic_mode; 435 bool change_vmcs01_virtual_x2apic_mode;
447 /* L2 must run next, and mustn't decide to exit to L1. */ 436 /* L2 must run next, and mustn't decide to exit to L1. */
448 bool nested_run_pending; 437 bool nested_run_pending;
438
439 struct loaded_vmcs vmcs02;
440
449 /* 441 /*
450 * Guest pages referred to in vmcs02 with host-physical pointers, so 442 * Guest pages referred to in the vmcs02 with host-physical
451 * we must keep them pinned while L2 runs. 443 * pointers, so we must keep them pinned while L2 runs.
452 */ 444 */
453 struct page *apic_access_page; 445 struct page *apic_access_page;
454 struct page *virtual_apic_page; 446 struct page *virtual_apic_page;
@@ -6974,94 +6966,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu)
6974} 6966}
6975 6967
6976/* 6968/*
6977 * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12.
6978 * We could reuse a single VMCS for all the L2 guests, but we also want the
6979 * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this
6980 * allows keeping them loaded on the processor, and in the future will allow
6981 * optimizations where prepare_vmcs02 doesn't need to set all the fields on
6982 * every entry if they never change.
6983 * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE
6984 * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first.
6985 *
6986 * The following functions allocate and free a vmcs02 in this pool.
6987 */
6988
6989/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */
6990static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
6991{
6992 struct vmcs02_list *item;
6993 list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
6994 if (item->vmptr == vmx->nested.current_vmptr) {
6995 list_move(&item->list, &vmx->nested.vmcs02_pool);
6996 return &item->vmcs02;
6997 }
6998
6999 if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) {
7000 /* Recycle the least recently used VMCS. */
7001 item = list_last_entry(&vmx->nested.vmcs02_pool,
7002 struct vmcs02_list, list);
7003 item->vmptr = vmx->nested.current_vmptr;
7004 list_move(&item->list, &vmx->nested.vmcs02_pool);
7005 return &item->vmcs02;
7006 }
7007
7008 /* Create a new VMCS */
7009 item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
7010 if (!item)
7011 return NULL;
7012 item->vmcs02.vmcs = alloc_vmcs();
7013 item->vmcs02.shadow_vmcs = NULL;
7014 if (!item->vmcs02.vmcs) {
7015 kfree(item);
7016 return NULL;
7017 }
7018 loaded_vmcs_init(&item->vmcs02);
7019 item->vmptr = vmx->nested.current_vmptr;
7020 list_add(&(item->list), &(vmx->nested.vmcs02_pool));
7021 vmx->nested.vmcs02_num++;
7022 return &item->vmcs02;
7023}
7024
7025/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */
7026static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr)
7027{
7028 struct vmcs02_list *item;
7029 list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
7030 if (item->vmptr == vmptr) {
7031 free_loaded_vmcs(&item->vmcs02);
7032 list_del(&item->list);
7033 kfree(item);
7034 vmx->nested.vmcs02_num--;
7035 return;
7036 }
7037}
7038
7039/*
7040 * Free all VMCSs saved for this vcpu, except the one pointed by
7041 * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs
7042 * must be &vmx->vmcs01.
7043 */
7044static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
7045{
7046 struct vmcs02_list *item, *n;
7047
7048 WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01);
7049 list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) {
7050 /*
7051 * Something will leak if the above WARN triggers. Better than
7052 * a use-after-free.
7053 */
7054 if (vmx->loaded_vmcs == &item->vmcs02)
7055 continue;
7056
7057 free_loaded_vmcs(&item->vmcs02);
7058 list_del(&item->list);
7059 kfree(item);
7060 vmx->nested.vmcs02_num--;
7061 }
7062}
7063
7064/*
7065 * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), 6969 * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
7066 * set the success or error code of an emulated VMX instruction, as specified 6970 * set the success or error code of an emulated VMX instruction, as specified
7067 * by Vol 2B, VMX Instruction Reference, "Conventions". 6971 * by Vol 2B, VMX Instruction Reference, "Conventions".
@@ -7242,6 +7146,12 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
7242 struct vcpu_vmx *vmx = to_vmx(vcpu); 7146 struct vcpu_vmx *vmx = to_vmx(vcpu);
7243 struct vmcs *shadow_vmcs; 7147 struct vmcs *shadow_vmcs;
7244 7148
7149 vmx->nested.vmcs02.vmcs = alloc_vmcs();
7150 vmx->nested.vmcs02.shadow_vmcs = NULL;
7151 if (!vmx->nested.vmcs02.vmcs)
7152 goto out_vmcs02;
7153 loaded_vmcs_init(&vmx->nested.vmcs02);
7154
7245 if (cpu_has_vmx_msr_bitmap()) { 7155 if (cpu_has_vmx_msr_bitmap()) {
7246 vmx->nested.msr_bitmap = 7156 vmx->nested.msr_bitmap =
7247 (unsigned long *)__get_free_page(GFP_KERNEL); 7157 (unsigned long *)__get_free_page(GFP_KERNEL);
@@ -7264,9 +7174,6 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
7264 vmx->vmcs01.shadow_vmcs = shadow_vmcs; 7174 vmx->vmcs01.shadow_vmcs = shadow_vmcs;
7265 } 7175 }
7266 7176
7267 INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
7268 vmx->nested.vmcs02_num = 0;
7269
7270 hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, 7177 hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
7271 HRTIMER_MODE_REL_PINNED); 7178 HRTIMER_MODE_REL_PINNED);
7272 vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; 7179 vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
@@ -7281,6 +7188,9 @@ out_cached_vmcs12:
7281 free_page((unsigned long)vmx->nested.msr_bitmap); 7188 free_page((unsigned long)vmx->nested.msr_bitmap);
7282 7189
7283out_msr_bitmap: 7190out_msr_bitmap:
7191 free_loaded_vmcs(&vmx->nested.vmcs02);
7192
7193out_vmcs02:
7284 return -ENOMEM; 7194 return -ENOMEM;
7285} 7195}
7286 7196
@@ -7434,7 +7344,7 @@ static void free_nested(struct vcpu_vmx *vmx)
7434 vmx->vmcs01.shadow_vmcs = NULL; 7344 vmx->vmcs01.shadow_vmcs = NULL;
7435 } 7345 }
7436 kfree(vmx->nested.cached_vmcs12); 7346 kfree(vmx->nested.cached_vmcs12);
7437 /* Unpin physical memory we referred to in current vmcs02 */ 7347 /* Unpin physical memory we referred to in the vmcs02 */
7438 if (vmx->nested.apic_access_page) { 7348 if (vmx->nested.apic_access_page) {
7439 kvm_release_page_dirty(vmx->nested.apic_access_page); 7349 kvm_release_page_dirty(vmx->nested.apic_access_page);
7440 vmx->nested.apic_access_page = NULL; 7350 vmx->nested.apic_access_page = NULL;
@@ -7450,7 +7360,7 @@ static void free_nested(struct vcpu_vmx *vmx)
7450 vmx->nested.pi_desc = NULL; 7360 vmx->nested.pi_desc = NULL;
7451 } 7361 }
7452 7362
7453 nested_free_all_saved_vmcss(vmx); 7363 free_loaded_vmcs(&vmx->nested.vmcs02);
7454} 7364}
7455 7365
7456/* Emulate the VMXOFF instruction */ 7366/* Emulate the VMXOFF instruction */
@@ -7493,8 +7403,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
7493 vmptr + offsetof(struct vmcs12, launch_state), 7403 vmptr + offsetof(struct vmcs12, launch_state),
7494 &zero, sizeof(zero)); 7404 &zero, sizeof(zero));
7495 7405
7496 nested_free_vmcs02(vmx, vmptr);
7497
7498 nested_vmx_succeed(vcpu); 7406 nested_vmx_succeed(vcpu);
7499 return kvm_skip_emulated_instruction(vcpu); 7407 return kvm_skip_emulated_instruction(vcpu);
7500} 7408}
@@ -8406,10 +8314,11 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
8406 8314
8407 /* 8315 /*
8408 * The host physical addresses of some pages of guest memory 8316 * The host physical addresses of some pages of guest memory
8409 * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU 8317 * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
8410 * may write to these pages via their host physical address while 8318 * Page). The CPU may write to these pages via their host
8411 * L2 is running, bypassing any address-translation-based dirty 8319 * physical address while L2 is running, bypassing any
8412 * tracking (e.g. EPT write protection). 8320 * address-translation-based dirty tracking (e.g. EPT write
8321 * protection).
8413 * 8322 *
8414 * Mark them dirty on every exit from L2 to prevent them from 8323 * Mark them dirty on every exit from L2 to prevent them from
8415 * getting out of sync with dirty tracking. 8324 * getting out of sync with dirty tracking.
@@ -10903,20 +10812,15 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
10903{ 10812{
10904 struct vcpu_vmx *vmx = to_vmx(vcpu); 10813 struct vcpu_vmx *vmx = to_vmx(vcpu);
10905 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 10814 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
10906 struct loaded_vmcs *vmcs02;
10907 u32 msr_entry_idx; 10815 u32 msr_entry_idx;
10908 u32 exit_qual; 10816 u32 exit_qual;
10909 10817
10910 vmcs02 = nested_get_current_vmcs02(vmx);
10911 if (!vmcs02)
10912 return -ENOMEM;
10913
10914 enter_guest_mode(vcpu); 10818 enter_guest_mode(vcpu);
10915 10819
10916 if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) 10820 if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
10917 vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); 10821 vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
10918 10822
10919 vmx_switch_vmcs(vcpu, vmcs02); 10823 vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
10920 vmx_segment_cache_clear(vmx); 10824 vmx_segment_cache_clear(vmx);
10921 10825
10922 if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) { 10826 if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) {
@@ -11534,10 +11438,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
11534 vm_exit_controls_reset_shadow(vmx); 11438 vm_exit_controls_reset_shadow(vmx);
11535 vmx_segment_cache_clear(vmx); 11439 vmx_segment_cache_clear(vmx);
11536 11440
11537 /* if no vmcs02 cache requested, remove the one we used */
11538 if (VMCS02_POOL_SIZE == 0)
11539 nested_free_vmcs02(vmx, vmx->nested.current_vmptr);
11540
11541 /* Update any VMCS fields that might have changed while L2 ran */ 11441 /* Update any VMCS fields that might have changed while L2 ran */
11542 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr); 11442 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
11543 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr); 11443 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);