aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNadav Har'El <nyh@math.technion.ac.il>2011-05-24 08:26:10 -0400
committerAvi Kivity <avi@redhat.com>2011-07-12 04:45:08 -0400
commitd462b8192368f10e979250377930f9695a4039d0 (patch)
tree4ea7e4c8cf963742cfab9a0400f1b4d671684b67
parent24c82e576b7860a4f02a21103e9df39e11e97006 (diff)
KVM: VMX: Keep list of loaded VMCSs, instead of vcpus
In VMX, before we bring down a CPU we must VMCLEAR all VMCSs loaded on it because (at least in theory) the processor might not have written all of its content back to memory. Since a patch from June 26, 2008, this is done using a per-cpu "vcpus_on_cpu" linked list of vcpus loaded on each CPU. The problem is that with nested VMX, we no longer have the concept of a vcpu being loaded on a cpu: A vcpu has multiple VMCSs (one for L1, a pool for L2s), and each of those may be have been last loaded on a different cpu. So instead of linking the vcpus, we link the VMCSs, using a new structure loaded_vmcs. This structure contains the VMCS, and the information pertaining to its loading on a specific cpu (namely, the cpu number, and whether it was already launched on this cpu once). In nested we will also use the same structure to hold L2 VMCSs, and vmx->loaded_vmcs is a pointer to the currently active VMCS. Signed-off-by: Nadav Har'El <nyh@il.ibm.com> Acked-by: Acked-by: Kevin Tian <kevin.tian@intel.com> Signed-off-by: Avi Kivity <avi@redhat.com>
-rw-r--r--arch/x86/kvm/vmx.c150
1 files changed, 86 insertions, 64 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3365e5dd3360..1444e4149c4d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -118,6 +118,18 @@ struct vmcs {
118 char data[0]; 118 char data[0];
119}; 119};
120 120
121/*
122 * Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also
123 * remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs
124 * loaded on this CPU (so we can clear them if the CPU goes down).
125 */
126struct loaded_vmcs {
127 struct vmcs *vmcs;
128 int cpu;
129 int launched;
130 struct list_head loaded_vmcss_on_cpu_link;
131};
132
121struct shared_msr_entry { 133struct shared_msr_entry {
122 unsigned index; 134 unsigned index;
123 u64 data; 135 u64 data;
@@ -126,9 +138,7 @@ struct shared_msr_entry {
126 138
127struct vcpu_vmx { 139struct vcpu_vmx {
128 struct kvm_vcpu vcpu; 140 struct kvm_vcpu vcpu;
129 struct list_head local_vcpus_link;
130 unsigned long host_rsp; 141 unsigned long host_rsp;
131 int launched;
132 u8 fail; 142 u8 fail;
133 u8 cpl; 143 u8 cpl;
134 bool nmi_known_unmasked; 144 bool nmi_known_unmasked;
@@ -142,7 +152,14 @@ struct vcpu_vmx {
142 u64 msr_host_kernel_gs_base; 152 u64 msr_host_kernel_gs_base;
143 u64 msr_guest_kernel_gs_base; 153 u64 msr_guest_kernel_gs_base;
144#endif 154#endif
145 struct vmcs *vmcs; 155 /*
156 * loaded_vmcs points to the VMCS currently used in this vcpu. For a
157 * non-nested (L1) guest, it always points to vmcs01. For a nested
158 * guest (L2), it points to a different VMCS.
159 */
160 struct loaded_vmcs vmcs01;
161 struct loaded_vmcs *loaded_vmcs;
162 bool __launched; /* temporary, used in vmx_vcpu_run */
146 struct msr_autoload { 163 struct msr_autoload {
147 unsigned nr; 164 unsigned nr;
148 struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS]; 165 struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS];
@@ -202,7 +219,11 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
202 219
203static DEFINE_PER_CPU(struct vmcs *, vmxarea); 220static DEFINE_PER_CPU(struct vmcs *, vmxarea);
204static DEFINE_PER_CPU(struct vmcs *, current_vmcs); 221static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
205static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu); 222/*
223 * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed
224 * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
225 */
226static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
206static DEFINE_PER_CPU(struct desc_ptr, host_gdt); 227static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
207 228
208static unsigned long *vmx_io_bitmap_a; 229static unsigned long *vmx_io_bitmap_a;
@@ -503,6 +524,13 @@ static void vmcs_clear(struct vmcs *vmcs)
503 vmcs, phys_addr); 524 vmcs, phys_addr);
504} 525}
505 526
527static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
528{
529 vmcs_clear(loaded_vmcs->vmcs);
530 loaded_vmcs->cpu = -1;
531 loaded_vmcs->launched = 0;
532}
533
506static void vmcs_load(struct vmcs *vmcs) 534static void vmcs_load(struct vmcs *vmcs)
507{ 535{
508 u64 phys_addr = __pa(vmcs); 536 u64 phys_addr = __pa(vmcs);
@@ -516,25 +544,24 @@ static void vmcs_load(struct vmcs *vmcs)
516 vmcs, phys_addr); 544 vmcs, phys_addr);
517} 545}
518 546
519static void __vcpu_clear(void *arg) 547static void __loaded_vmcs_clear(void *arg)
520{ 548{
521 struct vcpu_vmx *vmx = arg; 549 struct loaded_vmcs *loaded_vmcs = arg;
522 int cpu = raw_smp_processor_id(); 550 int cpu = raw_smp_processor_id();
523 551
524 if (vmx->vcpu.cpu == cpu) 552 if (loaded_vmcs->cpu != cpu)
525 vmcs_clear(vmx->vmcs); 553 return; /* vcpu migration can race with cpu offline */
526 if (per_cpu(current_vmcs, cpu) == vmx->vmcs) 554 if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
527 per_cpu(current_vmcs, cpu) = NULL; 555 per_cpu(current_vmcs, cpu) = NULL;
528 list_del(&vmx->local_vcpus_link); 556 list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
529 vmx->vcpu.cpu = -1; 557 loaded_vmcs_init(loaded_vmcs);
530 vmx->launched = 0;
531} 558}
532 559
533static void vcpu_clear(struct vcpu_vmx *vmx) 560static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
534{ 561{
535 if (vmx->vcpu.cpu == -1) 562 if (loaded_vmcs->cpu != -1)
536 return; 563 smp_call_function_single(
537 smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1); 564 loaded_vmcs->cpu, __loaded_vmcs_clear, loaded_vmcs, 1);
538} 565}
539 566
540static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx) 567static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx)
@@ -973,22 +1000,22 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
973 1000
974 if (!vmm_exclusive) 1001 if (!vmm_exclusive)
975 kvm_cpu_vmxon(phys_addr); 1002 kvm_cpu_vmxon(phys_addr);
976 else if (vcpu->cpu != cpu) 1003 else if (vmx->loaded_vmcs->cpu != cpu)
977 vcpu_clear(vmx); 1004 loaded_vmcs_clear(vmx->loaded_vmcs);
978 1005
979 if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { 1006 if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
980 per_cpu(current_vmcs, cpu) = vmx->vmcs; 1007 per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
981 vmcs_load(vmx->vmcs); 1008 vmcs_load(vmx->loaded_vmcs->vmcs);
982 } 1009 }
983 1010
984 if (vcpu->cpu != cpu) { 1011 if (vmx->loaded_vmcs->cpu != cpu) {
985 struct desc_ptr *gdt = &__get_cpu_var(host_gdt); 1012 struct desc_ptr *gdt = &__get_cpu_var(host_gdt);
986 unsigned long sysenter_esp; 1013 unsigned long sysenter_esp;
987 1014
988 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 1015 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
989 local_irq_disable(); 1016 local_irq_disable();
990 list_add(&vmx->local_vcpus_link, 1017 list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
991 &per_cpu(vcpus_on_cpu, cpu)); 1018 &per_cpu(loaded_vmcss_on_cpu, cpu));
992 local_irq_enable(); 1019 local_irq_enable();
993 1020
994 /* 1021 /*
@@ -1000,6 +1027,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1000 1027
1001 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); 1028 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
1002 vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ 1029 vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
1030 vmx->loaded_vmcs->cpu = cpu;
1003 } 1031 }
1004} 1032}
1005 1033
@@ -1007,7 +1035,8 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
1007{ 1035{
1008 __vmx_load_host_state(to_vmx(vcpu)); 1036 __vmx_load_host_state(to_vmx(vcpu));
1009 if (!vmm_exclusive) { 1037 if (!vmm_exclusive) {
1010 __vcpu_clear(to_vmx(vcpu)); 1038 __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs);
1039 vcpu->cpu = -1;
1011 kvm_cpu_vmxoff(); 1040 kvm_cpu_vmxoff();
1012 } 1041 }
1013} 1042}
@@ -1471,7 +1500,7 @@ static int hardware_enable(void *garbage)
1471 if (read_cr4() & X86_CR4_VMXE) 1500 if (read_cr4() & X86_CR4_VMXE)
1472 return -EBUSY; 1501 return -EBUSY;
1473 1502
1474 INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); 1503 INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
1475 rdmsrl(MSR_IA32_FEATURE_CONTROL, old); 1504 rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
1476 1505
1477 test_bits = FEATURE_CONTROL_LOCKED; 1506 test_bits = FEATURE_CONTROL_LOCKED;
@@ -1495,14 +1524,14 @@ static int hardware_enable(void *garbage)
1495 return 0; 1524 return 0;
1496} 1525}
1497 1526
1498static void vmclear_local_vcpus(void) 1527static void vmclear_local_loaded_vmcss(void)
1499{ 1528{
1500 int cpu = raw_smp_processor_id(); 1529 int cpu = raw_smp_processor_id();
1501 struct vcpu_vmx *vmx, *n; 1530 struct loaded_vmcs *v, *n;
1502 1531
1503 list_for_each_entry_safe(vmx, n, &per_cpu(vcpus_on_cpu, cpu), 1532 list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu),
1504 local_vcpus_link) 1533 loaded_vmcss_on_cpu_link)
1505 __vcpu_clear(vmx); 1534 __loaded_vmcs_clear(v);
1506} 1535}
1507 1536
1508 1537
@@ -1517,7 +1546,7 @@ static void kvm_cpu_vmxoff(void)
1517static void hardware_disable(void *garbage) 1546static void hardware_disable(void *garbage)
1518{ 1547{
1519 if (vmm_exclusive) { 1548 if (vmm_exclusive) {
1520 vmclear_local_vcpus(); 1549 vmclear_local_loaded_vmcss();
1521 kvm_cpu_vmxoff(); 1550 kvm_cpu_vmxoff();
1522 } 1551 }
1523 write_cr4(read_cr4() & ~X86_CR4_VMXE); 1552 write_cr4(read_cr4() & ~X86_CR4_VMXE);
@@ -1698,6 +1727,18 @@ static void free_vmcs(struct vmcs *vmcs)
1698 free_pages((unsigned long)vmcs, vmcs_config.order); 1727 free_pages((unsigned long)vmcs, vmcs_config.order);
1699} 1728}
1700 1729
1730/*
1731 * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded
1732 */
1733static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
1734{
1735 if (!loaded_vmcs->vmcs)
1736 return;
1737 loaded_vmcs_clear(loaded_vmcs);
1738 free_vmcs(loaded_vmcs->vmcs);
1739 loaded_vmcs->vmcs = NULL;
1740}
1741
1701static void free_kvm_area(void) 1742static void free_kvm_area(void)
1702{ 1743{
1703 int cpu; 1744 int cpu;
@@ -4169,6 +4210,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
4169 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) 4210 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
4170 vmx_set_interrupt_shadow(vcpu, 0); 4211 vmx_set_interrupt_shadow(vcpu, 0);
4171 4212
4213 vmx->__launched = vmx->loaded_vmcs->launched;
4172 asm( 4214 asm(
4173 /* Store host registers */ 4215 /* Store host registers */
4174 "push %%"R"dx; push %%"R"bp;" 4216 "push %%"R"dx; push %%"R"bp;"
@@ -4239,7 +4281,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
4239 "pop %%"R"bp; pop %%"R"dx \n\t" 4281 "pop %%"R"bp; pop %%"R"dx \n\t"
4240 "setbe %c[fail](%0) \n\t" 4282 "setbe %c[fail](%0) \n\t"
4241 : : "c"(vmx), "d"((unsigned long)HOST_RSP), 4283 : : "c"(vmx), "d"((unsigned long)HOST_RSP),
4242 [launched]"i"(offsetof(struct vcpu_vmx, launched)), 4284 [launched]"i"(offsetof(struct vcpu_vmx, __launched)),
4243 [fail]"i"(offsetof(struct vcpu_vmx, fail)), 4285 [fail]"i"(offsetof(struct vcpu_vmx, fail)),
4244 [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)), 4286 [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
4245 [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])), 4287 [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])),
@@ -4279,7 +4321,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
4279 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); 4321 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
4280 4322
4281 asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); 4323 asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
4282 vmx->launched = 1; 4324 vmx->loaded_vmcs->launched = 1;
4283 4325
4284 vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); 4326 vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
4285 4327
@@ -4291,41 +4333,17 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
4291#undef R 4333#undef R
4292#undef Q 4334#undef Q
4293 4335
4294static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
4295{
4296 struct vcpu_vmx *vmx = to_vmx(vcpu);
4297
4298 if (vmx->vmcs) {
4299 vcpu_clear(vmx);
4300 free_vmcs(vmx->vmcs);
4301 vmx->vmcs = NULL;
4302 }
4303}
4304
4305static void vmx_free_vcpu(struct kvm_vcpu *vcpu) 4336static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
4306{ 4337{
4307 struct vcpu_vmx *vmx = to_vmx(vcpu); 4338 struct vcpu_vmx *vmx = to_vmx(vcpu);
4308 4339
4309 free_vpid(vmx); 4340 free_vpid(vmx);
4310 vmx_free_vmcs(vcpu); 4341 free_loaded_vmcs(vmx->loaded_vmcs);
4311 kfree(vmx->guest_msrs); 4342 kfree(vmx->guest_msrs);
4312 kvm_vcpu_uninit(vcpu); 4343 kvm_vcpu_uninit(vcpu);
4313 kmem_cache_free(kvm_vcpu_cache, vmx); 4344 kmem_cache_free(kvm_vcpu_cache, vmx);
4314} 4345}
4315 4346
4316static inline void vmcs_init(struct vmcs *vmcs)
4317{
4318 u64 phys_addr = __pa(per_cpu(vmxarea, raw_smp_processor_id()));
4319
4320 if (!vmm_exclusive)
4321 kvm_cpu_vmxon(phys_addr);
4322
4323 vmcs_clear(vmcs);
4324
4325 if (!vmm_exclusive)
4326 kvm_cpu_vmxoff();
4327}
4328
4329static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) 4347static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
4330{ 4348{
4331 int err; 4349 int err;
@@ -4347,11 +4365,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
4347 goto uninit_vcpu; 4365 goto uninit_vcpu;
4348 } 4366 }
4349 4367
4350 vmx->vmcs = alloc_vmcs(); 4368 vmx->loaded_vmcs = &vmx->vmcs01;
4351 if (!vmx->vmcs) 4369 vmx->loaded_vmcs->vmcs = alloc_vmcs();
4370 if (!vmx->loaded_vmcs->vmcs)
4352 goto free_msrs; 4371 goto free_msrs;
4353 4372 if (!vmm_exclusive)
4354 vmcs_init(vmx->vmcs); 4373 kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id())));
4374 loaded_vmcs_init(vmx->loaded_vmcs);
4375 if (!vmm_exclusive)
4376 kvm_cpu_vmxoff();
4355 4377
4356 cpu = get_cpu(); 4378 cpu = get_cpu();
4357 vmx_vcpu_load(&vmx->vcpu, cpu); 4379 vmx_vcpu_load(&vmx->vcpu, cpu);
@@ -4380,7 +4402,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
4380 return &vmx->vcpu; 4402 return &vmx->vcpu;
4381 4403
4382free_vmcs: 4404free_vmcs:
4383 free_vmcs(vmx->vmcs); 4405 free_vmcs(vmx->loaded_vmcs->vmcs);
4384free_msrs: 4406free_msrs:
4385 kfree(vmx->guest_msrs); 4407 kfree(vmx->guest_msrs);
4386uninit_vcpu: 4408uninit_vcpu: