aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-22 22:24:17 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-22 22:24:17 -0400
commit0fd56c70334d7899edaee742ae49d9b893951376 (patch)
tree76b82ef119aa4a5ef8bbacb8256aeaba4277f7e1
parent56d61a0e26c5a61c66d1ac259a59960295939da9 (diff)
parent49d3bd7e2b990e717aa66e229410b8f5096c4956 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: KVM: Use new smp_call_function_mask() in kvm_flush_remote_tlbs() sched: don't clear PF_VCPU in scheduler KVM: Improve local apic timer wraparound handling KVM: Fix local apic timer divide by zero KVM: Move kvm_guest_exit() after local_irq_enable() KVM: x86 emulator: fix access registers for instructions with ModR/M byte and Mod = 3 KVM: VMX: Force vm86 mode if setting flags during real mode KVM: x86 emulator: implement 'movnti mem, reg' KVM: VMX: Reset mmu context when entering real mode KVM: VMX: Handle NMIs before enabling interrupts and preemption KVM: MMU: Set shadow pte atomically in mmu_pte_write_zap_pte() KVM: x86 emulator: fix repne/repnz decoding KVM: x86 emulator: fix merge screwup due to emulator split
-rw-r--r--drivers/kvm/kvm_main.c37
-rw-r--r--drivers/kvm/lapic.c38
-rw-r--r--drivers/kvm/mmu.c3
-rw-r--r--drivers/kvm/vmx.c16
-rw-r--r--drivers/kvm/x86_emulate.c77
-rw-r--r--kernel/sched.c1
6 files changed, 103 insertions, 69 deletions
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index af2d288c881d..07ae280e8fe5 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -198,21 +198,15 @@ static void vcpu_put(struct kvm_vcpu *vcpu)
198 198
199static void ack_flush(void *_completed) 199static void ack_flush(void *_completed)
200{ 200{
201 atomic_t *completed = _completed;
202
203 atomic_inc(completed);
204} 201}
205 202
206void kvm_flush_remote_tlbs(struct kvm *kvm) 203void kvm_flush_remote_tlbs(struct kvm *kvm)
207{ 204{
208 int i, cpu, needed; 205 int i, cpu;
209 cpumask_t cpus; 206 cpumask_t cpus;
210 struct kvm_vcpu *vcpu; 207 struct kvm_vcpu *vcpu;
211 atomic_t completed;
212 208
213 atomic_set(&completed, 0);
214 cpus_clear(cpus); 209 cpus_clear(cpus);
215 needed = 0;
216 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 210 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
217 vcpu = kvm->vcpus[i]; 211 vcpu = kvm->vcpus[i];
218 if (!vcpu) 212 if (!vcpu)
@@ -221,23 +215,9 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
221 continue; 215 continue;
222 cpu = vcpu->cpu; 216 cpu = vcpu->cpu;
223 if (cpu != -1 && cpu != raw_smp_processor_id()) 217 if (cpu != -1 && cpu != raw_smp_processor_id())
224 if (!cpu_isset(cpu, cpus)) { 218 cpu_set(cpu, cpus);
225 cpu_set(cpu, cpus);
226 ++needed;
227 }
228 }
229
230 /*
231 * We really want smp_call_function_mask() here. But that's not
232 * available, so ipi all cpus in parallel and wait for them
233 * to complete.
234 */
235 for (cpu = first_cpu(cpus); cpu != NR_CPUS; cpu = next_cpu(cpu, cpus))
236 smp_call_function_single(cpu, ack_flush, &completed, 1, 0);
237 while (atomic_read(&completed) != needed) {
238 cpu_relax();
239 barrier();
240 } 219 }
220 smp_call_function_mask(cpus, ack_flush, NULL, 1);
241} 221}
242 222
243int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) 223int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
@@ -2054,12 +2034,21 @@ again:
2054 2034
2055 kvm_x86_ops->run(vcpu, kvm_run); 2035 kvm_x86_ops->run(vcpu, kvm_run);
2056 2036
2057 kvm_guest_exit();
2058 vcpu->guest_mode = 0; 2037 vcpu->guest_mode = 0;
2059 local_irq_enable(); 2038 local_irq_enable();
2060 2039
2061 ++vcpu->stat.exits; 2040 ++vcpu->stat.exits;
2062 2041
2042 /*
2043 * We must have an instruction between local_irq_enable() and
2044 * kvm_guest_exit(), so the timer interrupt isn't delayed by
2045 * the interrupt shadow. The stat.exits increment will do nicely.
2046 * But we need to prevent reordering, hence this barrier():
2047 */
2048 barrier();
2049
2050 kvm_guest_exit();
2051
2063 preempt_enable(); 2052 preempt_enable();
2064 2053
2065 /* 2054 /*
diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c
index a190587cf6a5..238fcad3cece 100644
--- a/drivers/kvm/lapic.c
+++ b/drivers/kvm/lapic.c
@@ -494,12 +494,19 @@ static void apic_send_ipi(struct kvm_lapic *apic)
494 494
495static u32 apic_get_tmcct(struct kvm_lapic *apic) 495static u32 apic_get_tmcct(struct kvm_lapic *apic)
496{ 496{
497 u32 counter_passed; 497 u64 counter_passed;
498 ktime_t passed, now = apic->timer.dev.base->get_time(); 498 ktime_t passed, now;
499 u32 tmcct = apic_get_reg(apic, APIC_TMICT); 499 u32 tmcct;
500 500
501 ASSERT(apic != NULL); 501 ASSERT(apic != NULL);
502 502
503 now = apic->timer.dev.base->get_time();
504 tmcct = apic_get_reg(apic, APIC_TMICT);
505
506 /* if initial count is 0, current count should also be 0 */
507 if (tmcct == 0)
508 return 0;
509
503 if (unlikely(ktime_to_ns(now) <= 510 if (unlikely(ktime_to_ns(now) <=
504 ktime_to_ns(apic->timer.last_update))) { 511 ktime_to_ns(apic->timer.last_update))) {
505 /* Wrap around */ 512 /* Wrap around */
@@ -514,15 +521,24 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
514 521
515 counter_passed = div64_64(ktime_to_ns(passed), 522 counter_passed = div64_64(ktime_to_ns(passed),
516 (APIC_BUS_CYCLE_NS * apic->timer.divide_count)); 523 (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
517 tmcct -= counter_passed;
518 524
519 if (tmcct <= 0) { 525 if (counter_passed > tmcct) {
520 if (unlikely(!apic_lvtt_period(apic))) 526 if (unlikely(!apic_lvtt_period(apic))) {
527 /* one-shot timers stick at 0 until reset */
521 tmcct = 0; 528 tmcct = 0;
522 else 529 } else {
523 do { 530 /*
524 tmcct += apic_get_reg(apic, APIC_TMICT); 531 * periodic timers reset to APIC_TMICT when they
525 } while (tmcct <= 0); 532 * hit 0. The while loop simulates this happening N
533 * times. (counter_passed %= tmcct) would also work,
534 * but might be slower or not work on 32-bit??
535 */
536 while (counter_passed > tmcct)
537 counter_passed -= tmcct;
538 tmcct -= counter_passed;
539 }
540 } else {
541 tmcct -= counter_passed;
526 } 542 }
527 543
528 return tmcct; 544 return tmcct;
@@ -853,7 +869,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
853 apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); 869 apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
854 apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); 870 apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
855 } 871 }
856 apic->timer.divide_count = 0; 872 update_divide_count(apic);
857 atomic_set(&apic->timer.pending, 0); 873 atomic_set(&apic->timer.pending, 0);
858 if (vcpu->vcpu_id == 0) 874 if (vcpu->vcpu_id == 0)
859 vcpu->apic_base |= MSR_IA32_APICBASE_BSP; 875 vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index 6d84d30f5ed0..feb5ac986c5d 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -1049,6 +1049,7 @@ int kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
1049 destroy_kvm_mmu(vcpu); 1049 destroy_kvm_mmu(vcpu);
1050 return init_kvm_mmu(vcpu); 1050 return init_kvm_mmu(vcpu);
1051} 1051}
1052EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
1052 1053
1053int kvm_mmu_load(struct kvm_vcpu *vcpu) 1054int kvm_mmu_load(struct kvm_vcpu *vcpu)
1054{ 1055{
@@ -1088,7 +1089,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
1088 mmu_page_remove_parent_pte(child, spte); 1089 mmu_page_remove_parent_pte(child, spte);
1089 } 1090 }
1090 } 1091 }
1091 *spte = 0; 1092 set_shadow_pte(spte, 0);
1092 kvm_flush_remote_tlbs(vcpu->kvm); 1093 kvm_flush_remote_tlbs(vcpu->kvm);
1093} 1094}
1094 1095
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 4f115a8e45ef..bb56ae3f89b6 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -523,6 +523,8 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
523 523
524static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 524static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
525{ 525{
526 if (vcpu->rmode.active)
527 rflags |= IOPL_MASK | X86_EFLAGS_VM;
526 vmcs_writel(GUEST_RFLAGS, rflags); 528 vmcs_writel(GUEST_RFLAGS, rflags);
527} 529}
528 530
@@ -1128,6 +1130,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
1128 fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs); 1130 fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs);
1129 fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs); 1131 fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs);
1130 1132
1133 kvm_mmu_reset_context(vcpu);
1131 init_rmode_tss(vcpu->kvm); 1134 init_rmode_tss(vcpu->kvm);
1132} 1135}
1133 1136
@@ -1760,10 +1763,8 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1760 set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary); 1763 set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
1761 } 1764 }
1762 1765
1763 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */ 1766 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */
1764 asm ("int $2"); 1767 return 1; /* already handled by vmx_vcpu_run() */
1765 return 1;
1766 }
1767 1768
1768 if (is_no_device(intr_info)) { 1769 if (is_no_device(intr_info)) {
1769 vmx_fpu_activate(vcpu); 1770 vmx_fpu_activate(vcpu);
@@ -2196,6 +2197,7 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
2196static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2197static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2197{ 2198{
2198 struct vcpu_vmx *vmx = to_vmx(vcpu); 2199 struct vcpu_vmx *vmx = to_vmx(vcpu);
2200 u32 intr_info;
2199 2201
2200 /* 2202 /*
2201 * Loading guest fpu may have cleared host cr0.ts 2203 * Loading guest fpu may have cleared host cr0.ts
@@ -2322,6 +2324,12 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2322 2324
2323 asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); 2325 asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
2324 vmx->launched = 1; 2326 vmx->launched = 1;
2327
2328 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
2329
2330 /* We need to handle NMIs before interrupts are enabled */
2331 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */
2332 asm("int $2");
2325} 2333}
2326 2334
2327static void vmx_inject_page_fault(struct kvm_vcpu *vcpu, 2335static void vmx_inject_page_fault(struct kvm_vcpu *vcpu,
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index 9737c3b2f48c..a6ace302e0cd 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -212,7 +212,8 @@ static u16 twobyte_table[256] = {
212 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov, 212 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
213 DstReg | SrcMem16 | ModRM | Mov, 213 DstReg | SrcMem16 | ModRM | Mov,
214 /* 0xC0 - 0xCF */ 214 /* 0xC0 - 0xCF */
215 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0, 0, 215 0, 0, 0, DstMem | SrcReg | ModRM | Mov, 0, 0, 0, ImplicitOps | ModRM,
216 0, 0, 0, 0, 0, 0, 0, 0,
216 /* 0xD0 - 0xDF */ 217 /* 0xD0 - 0xDF */
217 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 218 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
218 /* 0xE0 - 0xEF */ 219 /* 0xE0 - 0xEF */
@@ -596,11 +597,10 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
596 case 0xf0: /* LOCK */ 597 case 0xf0: /* LOCK */
597 lock_prefix = 1; 598 lock_prefix = 1;
598 break; 599 break;
600 case 0xf2: /* REPNE/REPNZ */
599 case 0xf3: /* REP/REPE/REPZ */ 601 case 0xf3: /* REP/REPE/REPZ */
600 rep_prefix = 1; 602 rep_prefix = 1;
601 break; 603 break;
602 case 0xf2: /* REPNE/REPNZ */
603 break;
604 default: 604 default:
605 goto done_prefixes; 605 goto done_prefixes;
606 } 606 }
@@ -825,6 +825,14 @@ done_prefixes:
825 if (twobyte && b == 0x01 && modrm_reg == 7) 825 if (twobyte && b == 0x01 && modrm_reg == 7)
826 break; 826 break;
827 srcmem_common: 827 srcmem_common:
828 /*
829 * For instructions with a ModR/M byte, switch to register
830 * access if Mod = 3.
831 */
832 if ((d & ModRM) && modrm_mod == 3) {
833 src.type = OP_REG;
834 break;
835 }
828 src.type = OP_MEM; 836 src.type = OP_MEM;
829 src.ptr = (unsigned long *)cr2; 837 src.ptr = (unsigned long *)cr2;
830 src.val = 0; 838 src.val = 0;
@@ -893,6 +901,14 @@ done_prefixes:
893 dst.ptr = (unsigned long *)cr2; 901 dst.ptr = (unsigned long *)cr2;
894 dst.bytes = (d & ByteOp) ? 1 : op_bytes; 902 dst.bytes = (d & ByteOp) ? 1 : op_bytes;
895 dst.val = 0; 903 dst.val = 0;
904 /*
905 * For instructions with a ModR/M byte, switch to register
906 * access if Mod = 3.
907 */
908 if ((d & ModRM) && modrm_mod == 3) {
909 dst.type = OP_REG;
910 break;
911 }
896 if (d & BitOp) { 912 if (d & BitOp) {
897 unsigned long mask = ~(dst.bytes * 8 - 1); 913 unsigned long mask = ~(dst.bytes * 8 - 1);
898 914
@@ -1083,31 +1099,6 @@ push:
1083 case 0xd2 ... 0xd3: /* Grp2 */ 1099 case 0xd2 ... 0xd3: /* Grp2 */
1084 src.val = _regs[VCPU_REGS_RCX]; 1100 src.val = _regs[VCPU_REGS_RCX];
1085 goto grp2; 1101 goto grp2;
1086 case 0xe8: /* call (near) */ {
1087 long int rel;
1088 switch (op_bytes) {
1089 case 2:
1090 rel = insn_fetch(s16, 2, _eip);
1091 break;
1092 case 4:
1093 rel = insn_fetch(s32, 4, _eip);
1094 break;
1095 case 8:
1096 rel = insn_fetch(s64, 8, _eip);
1097 break;
1098 default:
1099 DPRINTF("Call: Invalid op_bytes\n");
1100 goto cannot_emulate;
1101 }
1102 src.val = (unsigned long) _eip;
1103 JMP_REL(rel);
1104 goto push;
1105 }
1106 case 0xe9: /* jmp rel */
1107 case 0xeb: /* jmp rel short */
1108 JMP_REL(src.val);
1109 no_wb = 1; /* Disable writeback. */
1110 break;
1111 case 0xf6 ... 0xf7: /* Grp3 */ 1102 case 0xf6 ... 0xf7: /* Grp3 */
1112 switch (modrm_reg) { 1103 switch (modrm_reg) {
1113 case 0 ... 1: /* test */ 1104 case 0 ... 1: /* test */
@@ -1350,6 +1341,32 @@ special_insn:
1350 case 0xae ... 0xaf: /* scas */ 1341 case 0xae ... 0xaf: /* scas */
1351 DPRINTF("Urk! I don't handle SCAS.\n"); 1342 DPRINTF("Urk! I don't handle SCAS.\n");
1352 goto cannot_emulate; 1343 goto cannot_emulate;
1344 case 0xe8: /* call (near) */ {
1345 long int rel;
1346 switch (op_bytes) {
1347 case 2:
1348 rel = insn_fetch(s16, 2, _eip);
1349 break;
1350 case 4:
1351 rel = insn_fetch(s32, 4, _eip);
1352 break;
1353 case 8:
1354 rel = insn_fetch(s64, 8, _eip);
1355 break;
1356 default:
1357 DPRINTF("Call: Invalid op_bytes\n");
1358 goto cannot_emulate;
1359 }
1360 src.val = (unsigned long) _eip;
1361 JMP_REL(rel);
1362 goto push;
1363 }
1364 case 0xe9: /* jmp rel */
1365 case 0xeb: /* jmp rel short */
1366 JMP_REL(src.val);
1367 no_wb = 1; /* Disable writeback. */
1368 break;
1369
1353 1370
1354 } 1371 }
1355 goto writeback; 1372 goto writeback;
@@ -1501,6 +1518,10 @@ twobyte_insn:
1501 dst.bytes = op_bytes; 1518 dst.bytes = op_bytes;
1502 dst.val = (d & ByteOp) ? (s8) src.val : (s16) src.val; 1519 dst.val = (d & ByteOp) ? (s8) src.val : (s16) src.val;
1503 break; 1520 break;
1521 case 0xc3: /* movnti */
1522 dst.bytes = op_bytes;
1523 dst.val = (op_bytes == 4) ? (u32) src.val : (u64) src.val;
1524 break;
1504 } 1525 }
1505 goto writeback; 1526 goto writeback;
1506 1527
diff --git a/kernel/sched.c b/kernel/sched.c
index 7581e331b139..2810e562a991 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3375,7 +3375,6 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
3375 3375
3376 if (p->flags & PF_VCPU) { 3376 if (p->flags & PF_VCPU) {
3377 account_guest_time(p, cputime); 3377 account_guest_time(p, cputime);
3378 p->flags &= ~PF_VCPU;
3379 return; 3378 return;
3380 } 3379 }
3381 3380