diff options
-rw-r--r-- | drivers/kvm/kvm_main.c | 37 | ||||
-rw-r--r-- | drivers/kvm/lapic.c | 38 | ||||
-rw-r--r-- | drivers/kvm/mmu.c | 3 | ||||
-rw-r--r-- | drivers/kvm/vmx.c | 16 | ||||
-rw-r--r-- | drivers/kvm/x86_emulate.c | 77 | ||||
-rw-r--r-- | kernel/sched.c | 1 |
6 files changed, 103 insertions, 69 deletions
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index af2d288c881d..07ae280e8fe5 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c | |||
@@ -198,21 +198,15 @@ static void vcpu_put(struct kvm_vcpu *vcpu) | |||
198 | 198 | ||
199 | static void ack_flush(void *_completed) | 199 | static void ack_flush(void *_completed) |
200 | { | 200 | { |
201 | atomic_t *completed = _completed; | ||
202 | |||
203 | atomic_inc(completed); | ||
204 | } | 201 | } |
205 | 202 | ||
206 | void kvm_flush_remote_tlbs(struct kvm *kvm) | 203 | void kvm_flush_remote_tlbs(struct kvm *kvm) |
207 | { | 204 | { |
208 | int i, cpu, needed; | 205 | int i, cpu; |
209 | cpumask_t cpus; | 206 | cpumask_t cpus; |
210 | struct kvm_vcpu *vcpu; | 207 | struct kvm_vcpu *vcpu; |
211 | atomic_t completed; | ||
212 | 208 | ||
213 | atomic_set(&completed, 0); | ||
214 | cpus_clear(cpus); | 209 | cpus_clear(cpus); |
215 | needed = 0; | ||
216 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | 210 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { |
217 | vcpu = kvm->vcpus[i]; | 211 | vcpu = kvm->vcpus[i]; |
218 | if (!vcpu) | 212 | if (!vcpu) |
@@ -221,23 +215,9 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) | |||
221 | continue; | 215 | continue; |
222 | cpu = vcpu->cpu; | 216 | cpu = vcpu->cpu; |
223 | if (cpu != -1 && cpu != raw_smp_processor_id()) | 217 | if (cpu != -1 && cpu != raw_smp_processor_id()) |
224 | if (!cpu_isset(cpu, cpus)) { | 218 | cpu_set(cpu, cpus); |
225 | cpu_set(cpu, cpus); | ||
226 | ++needed; | ||
227 | } | ||
228 | } | ||
229 | |||
230 | /* | ||
231 | * We really want smp_call_function_mask() here. But that's not | ||
232 | * available, so ipi all cpus in parallel and wait for them | ||
233 | * to complete. | ||
234 | */ | ||
235 | for (cpu = first_cpu(cpus); cpu != NR_CPUS; cpu = next_cpu(cpu, cpus)) | ||
236 | smp_call_function_single(cpu, ack_flush, &completed, 1, 0); | ||
237 | while (atomic_read(&completed) != needed) { | ||
238 | cpu_relax(); | ||
239 | barrier(); | ||
240 | } | 219 | } |
220 | smp_call_function_mask(cpus, ack_flush, NULL, 1); | ||
241 | } | 221 | } |
242 | 222 | ||
243 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) | 223 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) |
@@ -2054,12 +2034,21 @@ again: | |||
2054 | 2034 | ||
2055 | kvm_x86_ops->run(vcpu, kvm_run); | 2035 | kvm_x86_ops->run(vcpu, kvm_run); |
2056 | 2036 | ||
2057 | kvm_guest_exit(); | ||
2058 | vcpu->guest_mode = 0; | 2037 | vcpu->guest_mode = 0; |
2059 | local_irq_enable(); | 2038 | local_irq_enable(); |
2060 | 2039 | ||
2061 | ++vcpu->stat.exits; | 2040 | ++vcpu->stat.exits; |
2062 | 2041 | ||
2042 | /* | ||
2043 | * We must have an instruction between local_irq_enable() and | ||
2044 | * kvm_guest_exit(), so the timer interrupt isn't delayed by | ||
2045 | * the interrupt shadow. The stat.exits increment will do nicely. | ||
2046 | * But we need to prevent reordering, hence this barrier(): | ||
2047 | */ | ||
2048 | barrier(); | ||
2049 | |||
2050 | kvm_guest_exit(); | ||
2051 | |||
2063 | preempt_enable(); | 2052 | preempt_enable(); |
2064 | 2053 | ||
2065 | /* | 2054 | /* |
diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c index a190587cf6a5..238fcad3cece 100644 --- a/drivers/kvm/lapic.c +++ b/drivers/kvm/lapic.c | |||
@@ -494,12 +494,19 @@ static void apic_send_ipi(struct kvm_lapic *apic) | |||
494 | 494 | ||
495 | static u32 apic_get_tmcct(struct kvm_lapic *apic) | 495 | static u32 apic_get_tmcct(struct kvm_lapic *apic) |
496 | { | 496 | { |
497 | u32 counter_passed; | 497 | u64 counter_passed; |
498 | ktime_t passed, now = apic->timer.dev.base->get_time(); | 498 | ktime_t passed, now; |
499 | u32 tmcct = apic_get_reg(apic, APIC_TMICT); | 499 | u32 tmcct; |
500 | 500 | ||
501 | ASSERT(apic != NULL); | 501 | ASSERT(apic != NULL); |
502 | 502 | ||
503 | now = apic->timer.dev.base->get_time(); | ||
504 | tmcct = apic_get_reg(apic, APIC_TMICT); | ||
505 | |||
506 | /* if initial count is 0, current count should also be 0 */ | ||
507 | if (tmcct == 0) | ||
508 | return 0; | ||
509 | |||
503 | if (unlikely(ktime_to_ns(now) <= | 510 | if (unlikely(ktime_to_ns(now) <= |
504 | ktime_to_ns(apic->timer.last_update))) { | 511 | ktime_to_ns(apic->timer.last_update))) { |
505 | /* Wrap around */ | 512 | /* Wrap around */ |
@@ -514,15 +521,24 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic) | |||
514 | 521 | ||
515 | counter_passed = div64_64(ktime_to_ns(passed), | 522 | counter_passed = div64_64(ktime_to_ns(passed), |
516 | (APIC_BUS_CYCLE_NS * apic->timer.divide_count)); | 523 | (APIC_BUS_CYCLE_NS * apic->timer.divide_count)); |
517 | tmcct -= counter_passed; | ||
518 | 524 | ||
519 | if (tmcct <= 0) { | 525 | if (counter_passed > tmcct) { |
520 | if (unlikely(!apic_lvtt_period(apic))) | 526 | if (unlikely(!apic_lvtt_period(apic))) { |
527 | /* one-shot timers stick at 0 until reset */ | ||
521 | tmcct = 0; | 528 | tmcct = 0; |
522 | else | 529 | } else { |
523 | do { | 530 | /* |
524 | tmcct += apic_get_reg(apic, APIC_TMICT); | 531 | * periodic timers reset to APIC_TMICT when they |
525 | } while (tmcct <= 0); | 532 | * hit 0. The while loop simulates this happening N |
533 | * times. (counter_passed %= tmcct) would also work, | ||
534 | * but might be slower or not work on 32-bit?? | ||
535 | */ | ||
536 | while (counter_passed > tmcct) | ||
537 | counter_passed -= tmcct; | ||
538 | tmcct -= counter_passed; | ||
539 | } | ||
540 | } else { | ||
541 | tmcct -= counter_passed; | ||
526 | } | 542 | } |
527 | 543 | ||
528 | return tmcct; | 544 | return tmcct; |
@@ -853,7 +869,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) | |||
853 | apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); | 869 | apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); |
854 | apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); | 870 | apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); |
855 | } | 871 | } |
856 | apic->timer.divide_count = 0; | 872 | update_divide_count(apic); |
857 | atomic_set(&apic->timer.pending, 0); | 873 | atomic_set(&apic->timer.pending, 0); |
858 | if (vcpu->vcpu_id == 0) | 874 | if (vcpu->vcpu_id == 0) |
859 | vcpu->apic_base |= MSR_IA32_APICBASE_BSP; | 875 | vcpu->apic_base |= MSR_IA32_APICBASE_BSP; |
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 6d84d30f5ed0..feb5ac986c5d 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c | |||
@@ -1049,6 +1049,7 @@ int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) | |||
1049 | destroy_kvm_mmu(vcpu); | 1049 | destroy_kvm_mmu(vcpu); |
1050 | return init_kvm_mmu(vcpu); | 1050 | return init_kvm_mmu(vcpu); |
1051 | } | 1051 | } |
1052 | EXPORT_SYMBOL_GPL(kvm_mmu_reset_context); | ||
1052 | 1053 | ||
1053 | int kvm_mmu_load(struct kvm_vcpu *vcpu) | 1054 | int kvm_mmu_load(struct kvm_vcpu *vcpu) |
1054 | { | 1055 | { |
@@ -1088,7 +1089,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, | |||
1088 | mmu_page_remove_parent_pte(child, spte); | 1089 | mmu_page_remove_parent_pte(child, spte); |
1089 | } | 1090 | } |
1090 | } | 1091 | } |
1091 | *spte = 0; | 1092 | set_shadow_pte(spte, 0); |
1092 | kvm_flush_remote_tlbs(vcpu->kvm); | 1093 | kvm_flush_remote_tlbs(vcpu->kvm); |
1093 | } | 1094 | } |
1094 | 1095 | ||
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 4f115a8e45ef..bb56ae3f89b6 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c | |||
@@ -523,6 +523,8 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) | |||
523 | 523 | ||
524 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | 524 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) |
525 | { | 525 | { |
526 | if (vcpu->rmode.active) | ||
527 | rflags |= IOPL_MASK | X86_EFLAGS_VM; | ||
526 | vmcs_writel(GUEST_RFLAGS, rflags); | 528 | vmcs_writel(GUEST_RFLAGS, rflags); |
527 | } | 529 | } |
528 | 530 | ||
@@ -1128,6 +1130,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
1128 | fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs); | 1130 | fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs); |
1129 | fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs); | 1131 | fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs); |
1130 | 1132 | ||
1133 | kvm_mmu_reset_context(vcpu); | ||
1131 | init_rmode_tss(vcpu->kvm); | 1134 | init_rmode_tss(vcpu->kvm); |
1132 | } | 1135 | } |
1133 | 1136 | ||
@@ -1760,10 +1763,8 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1760 | set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary); | 1763 | set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary); |
1761 | } | 1764 | } |
1762 | 1765 | ||
1763 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */ | 1766 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */ |
1764 | asm ("int $2"); | 1767 | return 1; /* already handled by vmx_vcpu_run() */ |
1765 | return 1; | ||
1766 | } | ||
1767 | 1768 | ||
1768 | if (is_no_device(intr_info)) { | 1769 | if (is_no_device(intr_info)) { |
1769 | vmx_fpu_activate(vcpu); | 1770 | vmx_fpu_activate(vcpu); |
@@ -2196,6 +2197,7 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) | |||
2196 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2197 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
2197 | { | 2198 | { |
2198 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2199 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2200 | u32 intr_info; | ||
2199 | 2201 | ||
2200 | /* | 2202 | /* |
2201 | * Loading guest fpu may have cleared host cr0.ts | 2203 | * Loading guest fpu may have cleared host cr0.ts |
@@ -2322,6 +2324,12 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2322 | 2324 | ||
2323 | asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); | 2325 | asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); |
2324 | vmx->launched = 1; | 2326 | vmx->launched = 1; |
2327 | |||
2328 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | ||
2329 | |||
2330 | /* We need to handle NMIs before interrupts are enabled */ | ||
2331 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */ | ||
2332 | asm("int $2"); | ||
2325 | } | 2333 | } |
2326 | 2334 | ||
2327 | static void vmx_inject_page_fault(struct kvm_vcpu *vcpu, | 2335 | static void vmx_inject_page_fault(struct kvm_vcpu *vcpu, |
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index 9737c3b2f48c..a6ace302e0cd 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c | |||
@@ -212,7 +212,8 @@ static u16 twobyte_table[256] = { | |||
212 | 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov, | 212 | 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov, |
213 | DstReg | SrcMem16 | ModRM | Mov, | 213 | DstReg | SrcMem16 | ModRM | Mov, |
214 | /* 0xC0 - 0xCF */ | 214 | /* 0xC0 - 0xCF */ |
215 | 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0, 0, | 215 | 0, 0, 0, DstMem | SrcReg | ModRM | Mov, 0, 0, 0, ImplicitOps | ModRM, |
216 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
216 | /* 0xD0 - 0xDF */ | 217 | /* 0xD0 - 0xDF */ |
217 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 218 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
218 | /* 0xE0 - 0xEF */ | 219 | /* 0xE0 - 0xEF */ |
@@ -596,11 +597,10 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
596 | case 0xf0: /* LOCK */ | 597 | case 0xf0: /* LOCK */ |
597 | lock_prefix = 1; | 598 | lock_prefix = 1; |
598 | break; | 599 | break; |
600 | case 0xf2: /* REPNE/REPNZ */ | ||
599 | case 0xf3: /* REP/REPE/REPZ */ | 601 | case 0xf3: /* REP/REPE/REPZ */ |
600 | rep_prefix = 1; | 602 | rep_prefix = 1; |
601 | break; | 603 | break; |
602 | case 0xf2: /* REPNE/REPNZ */ | ||
603 | break; | ||
604 | default: | 604 | default: |
605 | goto done_prefixes; | 605 | goto done_prefixes; |
606 | } | 606 | } |
@@ -825,6 +825,14 @@ done_prefixes: | |||
825 | if (twobyte && b == 0x01 && modrm_reg == 7) | 825 | if (twobyte && b == 0x01 && modrm_reg == 7) |
826 | break; | 826 | break; |
827 | srcmem_common: | 827 | srcmem_common: |
828 | /* | ||
829 | * For instructions with a ModR/M byte, switch to register | ||
830 | * access if Mod = 3. | ||
831 | */ | ||
832 | if ((d & ModRM) && modrm_mod == 3) { | ||
833 | src.type = OP_REG; | ||
834 | break; | ||
835 | } | ||
828 | src.type = OP_MEM; | 836 | src.type = OP_MEM; |
829 | src.ptr = (unsigned long *)cr2; | 837 | src.ptr = (unsigned long *)cr2; |
830 | src.val = 0; | 838 | src.val = 0; |
@@ -893,6 +901,14 @@ done_prefixes: | |||
893 | dst.ptr = (unsigned long *)cr2; | 901 | dst.ptr = (unsigned long *)cr2; |
894 | dst.bytes = (d & ByteOp) ? 1 : op_bytes; | 902 | dst.bytes = (d & ByteOp) ? 1 : op_bytes; |
895 | dst.val = 0; | 903 | dst.val = 0; |
904 | /* | ||
905 | * For instructions with a ModR/M byte, switch to register | ||
906 | * access if Mod = 3. | ||
907 | */ | ||
908 | if ((d & ModRM) && modrm_mod == 3) { | ||
909 | dst.type = OP_REG; | ||
910 | break; | ||
911 | } | ||
896 | if (d & BitOp) { | 912 | if (d & BitOp) { |
897 | unsigned long mask = ~(dst.bytes * 8 - 1); | 913 | unsigned long mask = ~(dst.bytes * 8 - 1); |
898 | 914 | ||
@@ -1083,31 +1099,6 @@ push: | |||
1083 | case 0xd2 ... 0xd3: /* Grp2 */ | 1099 | case 0xd2 ... 0xd3: /* Grp2 */ |
1084 | src.val = _regs[VCPU_REGS_RCX]; | 1100 | src.val = _regs[VCPU_REGS_RCX]; |
1085 | goto grp2; | 1101 | goto grp2; |
1086 | case 0xe8: /* call (near) */ { | ||
1087 | long int rel; | ||
1088 | switch (op_bytes) { | ||
1089 | case 2: | ||
1090 | rel = insn_fetch(s16, 2, _eip); | ||
1091 | break; | ||
1092 | case 4: | ||
1093 | rel = insn_fetch(s32, 4, _eip); | ||
1094 | break; | ||
1095 | case 8: | ||
1096 | rel = insn_fetch(s64, 8, _eip); | ||
1097 | break; | ||
1098 | default: | ||
1099 | DPRINTF("Call: Invalid op_bytes\n"); | ||
1100 | goto cannot_emulate; | ||
1101 | } | ||
1102 | src.val = (unsigned long) _eip; | ||
1103 | JMP_REL(rel); | ||
1104 | goto push; | ||
1105 | } | ||
1106 | case 0xe9: /* jmp rel */ | ||
1107 | case 0xeb: /* jmp rel short */ | ||
1108 | JMP_REL(src.val); | ||
1109 | no_wb = 1; /* Disable writeback. */ | ||
1110 | break; | ||
1111 | case 0xf6 ... 0xf7: /* Grp3 */ | 1102 | case 0xf6 ... 0xf7: /* Grp3 */ |
1112 | switch (modrm_reg) { | 1103 | switch (modrm_reg) { |
1113 | case 0 ... 1: /* test */ | 1104 | case 0 ... 1: /* test */ |
@@ -1350,6 +1341,32 @@ special_insn: | |||
1350 | case 0xae ... 0xaf: /* scas */ | 1341 | case 0xae ... 0xaf: /* scas */ |
1351 | DPRINTF("Urk! I don't handle SCAS.\n"); | 1342 | DPRINTF("Urk! I don't handle SCAS.\n"); |
1352 | goto cannot_emulate; | 1343 | goto cannot_emulate; |
1344 | case 0xe8: /* call (near) */ { | ||
1345 | long int rel; | ||
1346 | switch (op_bytes) { | ||
1347 | case 2: | ||
1348 | rel = insn_fetch(s16, 2, _eip); | ||
1349 | break; | ||
1350 | case 4: | ||
1351 | rel = insn_fetch(s32, 4, _eip); | ||
1352 | break; | ||
1353 | case 8: | ||
1354 | rel = insn_fetch(s64, 8, _eip); | ||
1355 | break; | ||
1356 | default: | ||
1357 | DPRINTF("Call: Invalid op_bytes\n"); | ||
1358 | goto cannot_emulate; | ||
1359 | } | ||
1360 | src.val = (unsigned long) _eip; | ||
1361 | JMP_REL(rel); | ||
1362 | goto push; | ||
1363 | } | ||
1364 | case 0xe9: /* jmp rel */ | ||
1365 | case 0xeb: /* jmp rel short */ | ||
1366 | JMP_REL(src.val); | ||
1367 | no_wb = 1; /* Disable writeback. */ | ||
1368 | break; | ||
1369 | |||
1353 | 1370 | ||
1354 | } | 1371 | } |
1355 | goto writeback; | 1372 | goto writeback; |
@@ -1501,6 +1518,10 @@ twobyte_insn: | |||
1501 | dst.bytes = op_bytes; | 1518 | dst.bytes = op_bytes; |
1502 | dst.val = (d & ByteOp) ? (s8) src.val : (s16) src.val; | 1519 | dst.val = (d & ByteOp) ? (s8) src.val : (s16) src.val; |
1503 | break; | 1520 | break; |
1521 | case 0xc3: /* movnti */ | ||
1522 | dst.bytes = op_bytes; | ||
1523 | dst.val = (op_bytes == 4) ? (u32) src.val : (u64) src.val; | ||
1524 | break; | ||
1504 | } | 1525 | } |
1505 | goto writeback; | 1526 | goto writeback; |
1506 | 1527 | ||
diff --git a/kernel/sched.c b/kernel/sched.c index 7581e331b139..2810e562a991 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -3375,7 +3375,6 @@ void account_system_time(struct task_struct *p, int hardirq_offset, | |||
3375 | 3375 | ||
3376 | if (p->flags & PF_VCPU) { | 3376 | if (p->flags & PF_VCPU) { |
3377 | account_guest_time(p, cputime); | 3377 | account_guest_time(p, cputime); |
3378 | p->flags &= ~PF_VCPU; | ||
3379 | return; | 3378 | return; |
3380 | } | 3379 | } |
3381 | 3380 | ||