diff options
-rw-r--r-- | arch/x86/kvm/vmx.c | 146 |
1 files changed, 23 insertions, 123 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c829d89e2e63..ad6a883b7a32 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -185,7 +185,6 @@ module_param(ple_window_max, int, S_IRUGO); | |||
185 | extern const ulong vmx_return; | 185 | extern const ulong vmx_return; |
186 | 186 | ||
187 | #define NR_AUTOLOAD_MSRS 8 | 187 | #define NR_AUTOLOAD_MSRS 8 |
188 | #define VMCS02_POOL_SIZE 1 | ||
189 | 188 | ||
190 | struct vmcs { | 189 | struct vmcs { |
191 | u32 revision_id; | 190 | u32 revision_id; |
@@ -226,7 +225,7 @@ struct shared_msr_entry { | |||
226 | * stored in guest memory specified by VMPTRLD, but is opaque to the guest, | 225 | * stored in guest memory specified by VMPTRLD, but is opaque to the guest, |
227 | * which must access it using VMREAD/VMWRITE/VMCLEAR instructions. | 226 | * which must access it using VMREAD/VMWRITE/VMCLEAR instructions. |
228 | * More than one of these structures may exist, if L1 runs multiple L2 guests. | 227 | * More than one of these structures may exist, if L1 runs multiple L2 guests. |
229 | * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the | 228 | * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the |
230 | * underlying hardware which will be used to run L2. | 229 | * underlying hardware which will be used to run L2. |
231 | * This structure is packed to ensure that its layout is identical across | 230 | * This structure is packed to ensure that its layout is identical across |
232 | * machines (necessary for live migration). | 231 | * machines (necessary for live migration). |
@@ -409,13 +408,6 @@ struct __packed vmcs12 { | |||
409 | */ | 408 | */ |
410 | #define VMCS12_SIZE 0x1000 | 409 | #define VMCS12_SIZE 0x1000 |
411 | 410 | ||
412 | /* Used to remember the last vmcs02 used for some recently used vmcs12s */ | ||
413 | struct vmcs02_list { | ||
414 | struct list_head list; | ||
415 | gpa_t vmptr; | ||
416 | struct loaded_vmcs vmcs02; | ||
417 | }; | ||
418 | |||
419 | /* | 411 | /* |
420 | * The nested_vmx structure is part of vcpu_vmx, and holds information we need | 412 | * The nested_vmx structure is part of vcpu_vmx, and holds information we need |
421 | * for correct emulation of VMX (i.e., nested VMX) on this vcpu. | 413 | * for correct emulation of VMX (i.e., nested VMX) on this vcpu. |
@@ -440,15 +432,15 @@ struct nested_vmx { | |||
440 | */ | 432 | */ |
441 | bool sync_shadow_vmcs; | 433 | bool sync_shadow_vmcs; |
442 | 434 | ||
443 | /* vmcs02_list cache of VMCSs recently used to run L2 guests */ | ||
444 | struct list_head vmcs02_pool; | ||
445 | int vmcs02_num; | ||
446 | bool change_vmcs01_virtual_x2apic_mode; | 435 | bool change_vmcs01_virtual_x2apic_mode; |
447 | /* L2 must run next, and mustn't decide to exit to L1. */ | 436 | /* L2 must run next, and mustn't decide to exit to L1. */ |
448 | bool nested_run_pending; | 437 | bool nested_run_pending; |
438 | |||
439 | struct loaded_vmcs vmcs02; | ||
440 | |||
449 | /* | 441 | /* |
450 | * Guest pages referred to in vmcs02 with host-physical pointers, so | 442 | * Guest pages referred to in the vmcs02 with host-physical |
451 | * we must keep them pinned while L2 runs. | 443 | * pointers, so we must keep them pinned while L2 runs. |
452 | */ | 444 | */ |
453 | struct page *apic_access_page; | 445 | struct page *apic_access_page; |
454 | struct page *virtual_apic_page; | 446 | struct page *virtual_apic_page; |
@@ -6974,94 +6966,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu) | |||
6974 | } | 6966 | } |
6975 | 6967 | ||
6976 | /* | 6968 | /* |
6977 | * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12. | ||
6978 | * We could reuse a single VMCS for all the L2 guests, but we also want the | ||
6979 | * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this | ||
6980 | * allows keeping them loaded on the processor, and in the future will allow | ||
6981 | * optimizations where prepare_vmcs02 doesn't need to set all the fields on | ||
6982 | * every entry if they never change. | ||
6983 | * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE | ||
6984 | * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first. | ||
6985 | * | ||
6986 | * The following functions allocate and free a vmcs02 in this pool. | ||
6987 | */ | ||
6988 | |||
6989 | /* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */ | ||
6990 | static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) | ||
6991 | { | ||
6992 | struct vmcs02_list *item; | ||
6993 | list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) | ||
6994 | if (item->vmptr == vmx->nested.current_vmptr) { | ||
6995 | list_move(&item->list, &vmx->nested.vmcs02_pool); | ||
6996 | return &item->vmcs02; | ||
6997 | } | ||
6998 | |||
6999 | if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { | ||
7000 | /* Recycle the least recently used VMCS. */ | ||
7001 | item = list_last_entry(&vmx->nested.vmcs02_pool, | ||
7002 | struct vmcs02_list, list); | ||
7003 | item->vmptr = vmx->nested.current_vmptr; | ||
7004 | list_move(&item->list, &vmx->nested.vmcs02_pool); | ||
7005 | return &item->vmcs02; | ||
7006 | } | ||
7007 | |||
7008 | /* Create a new VMCS */ | ||
7009 | item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL); | ||
7010 | if (!item) | ||
7011 | return NULL; | ||
7012 | item->vmcs02.vmcs = alloc_vmcs(); | ||
7013 | item->vmcs02.shadow_vmcs = NULL; | ||
7014 | if (!item->vmcs02.vmcs) { | ||
7015 | kfree(item); | ||
7016 | return NULL; | ||
7017 | } | ||
7018 | loaded_vmcs_init(&item->vmcs02); | ||
7019 | item->vmptr = vmx->nested.current_vmptr; | ||
7020 | list_add(&(item->list), &(vmx->nested.vmcs02_pool)); | ||
7021 | vmx->nested.vmcs02_num++; | ||
7022 | return &item->vmcs02; | ||
7023 | } | ||
7024 | |||
7025 | /* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */ | ||
7026 | static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr) | ||
7027 | { | ||
7028 | struct vmcs02_list *item; | ||
7029 | list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) | ||
7030 | if (item->vmptr == vmptr) { | ||
7031 | free_loaded_vmcs(&item->vmcs02); | ||
7032 | list_del(&item->list); | ||
7033 | kfree(item); | ||
7034 | vmx->nested.vmcs02_num--; | ||
7035 | return; | ||
7036 | } | ||
7037 | } | ||
7038 | |||
7039 | /* | ||
7040 | * Free all VMCSs saved for this vcpu, except the one pointed by | ||
7041 | * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs | ||
7042 | * must be &vmx->vmcs01. | ||
7043 | */ | ||
7044 | static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) | ||
7045 | { | ||
7046 | struct vmcs02_list *item, *n; | ||
7047 | |||
7048 | WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01); | ||
7049 | list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { | ||
7050 | /* | ||
7051 | * Something will leak if the above WARN triggers. Better than | ||
7052 | * a use-after-free. | ||
7053 | */ | ||
7054 | if (vmx->loaded_vmcs == &item->vmcs02) | ||
7055 | continue; | ||
7056 | |||
7057 | free_loaded_vmcs(&item->vmcs02); | ||
7058 | list_del(&item->list); | ||
7059 | kfree(item); | ||
7060 | vmx->nested.vmcs02_num--; | ||
7061 | } | ||
7062 | } | ||
7063 | |||
7064 | /* | ||
7065 | * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), | 6969 | * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), |
7066 | * set the success or error code of an emulated VMX instruction, as specified | 6970 | * set the success or error code of an emulated VMX instruction, as specified |
7067 | * by Vol 2B, VMX Instruction Reference, "Conventions". | 6971 | * by Vol 2B, VMX Instruction Reference, "Conventions". |
@@ -7242,6 +7146,12 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) | |||
7242 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 7146 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
7243 | struct vmcs *shadow_vmcs; | 7147 | struct vmcs *shadow_vmcs; |
7244 | 7148 | ||
7149 | vmx->nested.vmcs02.vmcs = alloc_vmcs(); | ||
7150 | vmx->nested.vmcs02.shadow_vmcs = NULL; | ||
7151 | if (!vmx->nested.vmcs02.vmcs) | ||
7152 | goto out_vmcs02; | ||
7153 | loaded_vmcs_init(&vmx->nested.vmcs02); | ||
7154 | |||
7245 | if (cpu_has_vmx_msr_bitmap()) { | 7155 | if (cpu_has_vmx_msr_bitmap()) { |
7246 | vmx->nested.msr_bitmap = | 7156 | vmx->nested.msr_bitmap = |
7247 | (unsigned long *)__get_free_page(GFP_KERNEL); | 7157 | (unsigned long *)__get_free_page(GFP_KERNEL); |
@@ -7264,9 +7174,6 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) | |||
7264 | vmx->vmcs01.shadow_vmcs = shadow_vmcs; | 7174 | vmx->vmcs01.shadow_vmcs = shadow_vmcs; |
7265 | } | 7175 | } |
7266 | 7176 | ||
7267 | INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); | ||
7268 | vmx->nested.vmcs02_num = 0; | ||
7269 | |||
7270 | hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, | 7177 | hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, |
7271 | HRTIMER_MODE_REL_PINNED); | 7178 | HRTIMER_MODE_REL_PINNED); |
7272 | vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; | 7179 | vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; |
@@ -7281,6 +7188,9 @@ out_cached_vmcs12: | |||
7281 | free_page((unsigned long)vmx->nested.msr_bitmap); | 7188 | free_page((unsigned long)vmx->nested.msr_bitmap); |
7282 | 7189 | ||
7283 | out_msr_bitmap: | 7190 | out_msr_bitmap: |
7191 | free_loaded_vmcs(&vmx->nested.vmcs02); | ||
7192 | |||
7193 | out_vmcs02: | ||
7284 | return -ENOMEM; | 7194 | return -ENOMEM; |
7285 | } | 7195 | } |
7286 | 7196 | ||
@@ -7434,7 +7344,7 @@ static void free_nested(struct vcpu_vmx *vmx) | |||
7434 | vmx->vmcs01.shadow_vmcs = NULL; | 7344 | vmx->vmcs01.shadow_vmcs = NULL; |
7435 | } | 7345 | } |
7436 | kfree(vmx->nested.cached_vmcs12); | 7346 | kfree(vmx->nested.cached_vmcs12); |
7437 | /* Unpin physical memory we referred to in current vmcs02 */ | 7347 | /* Unpin physical memory we referred to in the vmcs02 */ |
7438 | if (vmx->nested.apic_access_page) { | 7348 | if (vmx->nested.apic_access_page) { |
7439 | kvm_release_page_dirty(vmx->nested.apic_access_page); | 7349 | kvm_release_page_dirty(vmx->nested.apic_access_page); |
7440 | vmx->nested.apic_access_page = NULL; | 7350 | vmx->nested.apic_access_page = NULL; |
@@ -7450,7 +7360,7 @@ static void free_nested(struct vcpu_vmx *vmx) | |||
7450 | vmx->nested.pi_desc = NULL; | 7360 | vmx->nested.pi_desc = NULL; |
7451 | } | 7361 | } |
7452 | 7362 | ||
7453 | nested_free_all_saved_vmcss(vmx); | 7363 | free_loaded_vmcs(&vmx->nested.vmcs02); |
7454 | } | 7364 | } |
7455 | 7365 | ||
7456 | /* Emulate the VMXOFF instruction */ | 7366 | /* Emulate the VMXOFF instruction */ |
@@ -7493,8 +7403,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) | |||
7493 | vmptr + offsetof(struct vmcs12, launch_state), | 7403 | vmptr + offsetof(struct vmcs12, launch_state), |
7494 | &zero, sizeof(zero)); | 7404 | &zero, sizeof(zero)); |
7495 | 7405 | ||
7496 | nested_free_vmcs02(vmx, vmptr); | ||
7497 | |||
7498 | nested_vmx_succeed(vcpu); | 7406 | nested_vmx_succeed(vcpu); |
7499 | return kvm_skip_emulated_instruction(vcpu); | 7407 | return kvm_skip_emulated_instruction(vcpu); |
7500 | } | 7408 | } |
@@ -8406,10 +8314,11 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) | |||
8406 | 8314 | ||
8407 | /* | 8315 | /* |
8408 | * The host physical addresses of some pages of guest memory | 8316 | * The host physical addresses of some pages of guest memory |
8409 | * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU | 8317 | * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC |
8410 | * may write to these pages via their host physical address while | 8318 | * Page). The CPU may write to these pages via their host |
8411 | * L2 is running, bypassing any address-translation-based dirty | 8319 | * physical address while L2 is running, bypassing any |
8412 | * tracking (e.g. EPT write protection). | 8320 | * address-translation-based dirty tracking (e.g. EPT write |
8321 | * protection). | ||
8413 | * | 8322 | * |
8414 | * Mark them dirty on every exit from L2 to prevent them from | 8323 | * Mark them dirty on every exit from L2 to prevent them from |
8415 | * getting out of sync with dirty tracking. | 8324 | * getting out of sync with dirty tracking. |
@@ -10903,20 +10812,15 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) | |||
10903 | { | 10812 | { |
10904 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 10813 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
10905 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 10814 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
10906 | struct loaded_vmcs *vmcs02; | ||
10907 | u32 msr_entry_idx; | 10815 | u32 msr_entry_idx; |
10908 | u32 exit_qual; | 10816 | u32 exit_qual; |
10909 | 10817 | ||
10910 | vmcs02 = nested_get_current_vmcs02(vmx); | ||
10911 | if (!vmcs02) | ||
10912 | return -ENOMEM; | ||
10913 | |||
10914 | enter_guest_mode(vcpu); | 10818 | enter_guest_mode(vcpu); |
10915 | 10819 | ||
10916 | if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) | 10820 | if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) |
10917 | vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); | 10821 | vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); |
10918 | 10822 | ||
10919 | vmx_switch_vmcs(vcpu, vmcs02); | 10823 | vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); |
10920 | vmx_segment_cache_clear(vmx); | 10824 | vmx_segment_cache_clear(vmx); |
10921 | 10825 | ||
10922 | if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) { | 10826 | if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) { |
@@ -11534,10 +11438,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | |||
11534 | vm_exit_controls_reset_shadow(vmx); | 11438 | vm_exit_controls_reset_shadow(vmx); |
11535 | vmx_segment_cache_clear(vmx); | 11439 | vmx_segment_cache_clear(vmx); |
11536 | 11440 | ||
11537 | /* if no vmcs02 cache requested, remove the one we used */ | ||
11538 | if (VMCS02_POOL_SIZE == 0) | ||
11539 | nested_free_vmcs02(vmx, vmx->nested.current_vmptr); | ||
11540 | |||
11541 | /* Update any VMCS fields that might have changed while L2 ran */ | 11441 | /* Update any VMCS fields that might have changed while L2 ran */ |
11542 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr); | 11442 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr); |
11543 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr); | 11443 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr); |