aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-05-04 20:07:28 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-05-04 20:07:28 -0400
commit8dcf5782848600ecfd0df3a45c521b5ad0fcb42e (patch)
tree72821491c1ca5b0660599b4c06a27770d6800cd3
parente73b65f1db7e3baa3db43951476b7d2d2381ba35 (diff)
parentb8ba5f10c5956d2b297766fda8f4f5ab8ad1e2cc (diff)
Merge branch 'kvm-updates-2.6.26' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm
* 'kvm-updates-2.6.26' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: x86: KVM geust: make setup_secondary_clock definition dependent on local apic KVM: MMU: Allow more than PAGES_PER_HPAGE write protections per large page KVM: avoid fx_init() schedule in atomic KVM: Avoid spurious execeptions after setting registers KVM: PIT: support mode 4 KVM: x86 emulator: disable writeback on lmsw KVM: ppc: deliver INTERRUPT_FP_UNAVAIL to the guest KVM: ppc: Handle guest idle by emulating MSR[WE] writes KVM: x86: task switch: fix wrong bit setting for the busy flag KVM: VMX: Enable EPT feature for KVM KVM: VMX: Prepare an identity page table for EPT in real mode KVM: Export necessary function for EPT KVM: MMU: Remove #ifdef CONFIG_X86_64 to support 4 level EPT KVM: MMU: Add EPT support KVM: Add kvm_x86_ops get_tdp_level() KVM: MMU: Move some definitions to a header file KVM: VMX: EPT Feature Detection
-rw-r--r--arch/powerpc/kvm/booke_guest.c6
-rw-r--r--arch/powerpc/kvm/powerpc.c20
-rw-r--r--arch/x86/kernel/kvmclock.c4
-rw-r--r--arch/x86/kvm/i8254.c2
-rw-r--r--arch/x86/kvm/mmu.c89
-rw-r--r--arch/x86/kvm/mmu.h37
-rw-r--r--arch/x86/kvm/svm.c10
-rw-r--r--arch/x86/kvm/vmx.c375
-rw-r--r--arch/x86/kvm/vmx.h38
-rw-r--r--arch/x86/kvm/x86.c22
-rw-r--r--arch/x86/kvm/x86_emulate.c1
-rw-r--r--include/asm-powerpc/kvm_host.h1
-rw-r--r--include/asm-powerpc/kvm_ppc.h5
-rw-r--r--include/asm-x86/kvm_host.h10
-rw-r--r--virt/kvm/kvm_main.c1
15 files changed, 542 insertions, 79 deletions
diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke_guest.c
index 6d9884a6884a..712d89a28c46 100644
--- a/arch/powerpc/kvm/booke_guest.c
+++ b/arch/powerpc/kvm/booke_guest.c
@@ -49,6 +49,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
49 { "inst_emu", VCPU_STAT(emulated_inst_exits) }, 49 { "inst_emu", VCPU_STAT(emulated_inst_exits) },
50 { "dec", VCPU_STAT(dec_exits) }, 50 { "dec", VCPU_STAT(dec_exits) },
51 { "ext_intr", VCPU_STAT(ext_intr_exits) }, 51 { "ext_intr", VCPU_STAT(ext_intr_exits) },
52 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
52 { NULL } 53 { NULL }
53}; 54};
54 55
@@ -338,6 +339,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
338 } 339 }
339 break; 340 break;
340 341
342 case BOOKE_INTERRUPT_FP_UNAVAIL:
343 kvmppc_queue_exception(vcpu, exit_nr);
344 r = RESUME_GUEST;
345 break;
346
341 case BOOKE_INTERRUPT_DATA_STORAGE: 347 case BOOKE_INTERRUPT_DATA_STORAGE:
342 vcpu->arch.dear = vcpu->arch.fault_dear; 348 vcpu->arch.dear = vcpu->arch.fault_dear;
343 vcpu->arch.esr = vcpu->arch.fault_esr; 349 vcpu->arch.esr = vcpu->arch.fault_esr;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index bad40bd2d3ac..777e0f34e0ea 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -36,13 +36,12 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
36 36
37int kvm_cpu_has_interrupt(struct kvm_vcpu *v) 37int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
38{ 38{
39 /* XXX implement me */ 39 return !!(v->arch.pending_exceptions);
40 return 0;
41} 40}
42 41
43int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) 42int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
44{ 43{
45 return 1; 44 return !(v->arch.msr & MSR_WE);
46} 45}
47 46
48 47
@@ -214,6 +213,11 @@ static void kvmppc_decrementer_func(unsigned long data)
214 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; 213 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
215 214
216 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER); 215 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER);
216
217 if (waitqueue_active(&vcpu->wq)) {
218 wake_up_interruptible(&vcpu->wq);
219 vcpu->stat.halt_wakeup++;
220 }
217} 221}
218 222
219int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 223int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
@@ -339,6 +343,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
339 int r; 343 int r;
340 sigset_t sigsaved; 344 sigset_t sigsaved;
341 345
346 vcpu_load(vcpu);
347
342 if (vcpu->sigset_active) 348 if (vcpu->sigset_active)
343 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 349 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
344 350
@@ -363,12 +369,20 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
363 if (vcpu->sigset_active) 369 if (vcpu->sigset_active)
364 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 370 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
365 371
372 vcpu_put(vcpu);
373
366 return r; 374 return r;
367} 375}
368 376
369int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) 377int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
370{ 378{
371 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL); 379 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL);
380
381 if (waitqueue_active(&vcpu->wq)) {
382 wake_up_interruptible(&vcpu->wq);
383 vcpu->stat.halt_wakeup++;
384 }
385
372 return 0; 386 return 0;
373} 387}
374 388
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index ddee04043aeb..4bc1be5d5472 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -133,6 +133,7 @@ static int kvm_register_clock(void)
133 return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); 133 return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
134} 134}
135 135
136#ifdef CONFIG_X86_LOCAL_APIC
136static void kvm_setup_secondary_clock(void) 137static void kvm_setup_secondary_clock(void)
137{ 138{
138 /* 139 /*
@@ -143,6 +144,7 @@ static void kvm_setup_secondary_clock(void)
143 /* ok, done with our trickery, call native */ 144 /* ok, done with our trickery, call native */
144 setup_secondary_APIC_clock(); 145 setup_secondary_APIC_clock();
145} 146}
147#endif
146 148
147/* 149/*
148 * After the clock is registered, the host will keep writing to the 150 * After the clock is registered, the host will keep writing to the
@@ -177,7 +179,9 @@ void __init kvmclock_init(void)
177 pv_time_ops.get_wallclock = kvm_get_wallclock; 179 pv_time_ops.get_wallclock = kvm_get_wallclock;
178 pv_time_ops.set_wallclock = kvm_set_wallclock; 180 pv_time_ops.set_wallclock = kvm_set_wallclock;
179 pv_time_ops.sched_clock = kvm_clock_read; 181 pv_time_ops.sched_clock = kvm_clock_read;
182#ifdef CONFIG_X86_LOCAL_APIC
180 pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; 183 pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
184#endif
181 machine_ops.shutdown = kvm_shutdown; 185 machine_ops.shutdown = kvm_shutdown;
182#ifdef CONFIG_KEXEC 186#ifdef CONFIG_KEXEC
183 machine_ops.crash_shutdown = kvm_crash_shutdown; 187 machine_ops.crash_shutdown = kvm_crash_shutdown;
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 4c943eabacc3..3324d90038e4 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -288,6 +288,8 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
288 * mode 1 is one shot, mode 2 is period, otherwise del timer */ 288 * mode 1 is one shot, mode 2 is period, otherwise del timer */
289 switch (ps->channels[0].mode) { 289 switch (ps->channels[0].mode) {
290 case 1: 290 case 1:
291 /* FIXME: enhance mode 4 precision */
292 case 4:
291 create_pit_timer(&ps->pit_timer, val, 0); 293 create_pit_timer(&ps->pit_timer, val, 0);
292 break; 294 break;
293 case 2: 295 case 2:
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2ad6f5481671..36c5406b1813 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -79,36 +79,6 @@ static int dbg = 1;
79 } 79 }
80#endif 80#endif
81 81
82#define PT64_PT_BITS 9
83#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
84#define PT32_PT_BITS 10
85#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
86
87#define PT_WRITABLE_SHIFT 1
88
89#define PT_PRESENT_MASK (1ULL << 0)
90#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
91#define PT_USER_MASK (1ULL << 2)
92#define PT_PWT_MASK (1ULL << 3)
93#define PT_PCD_MASK (1ULL << 4)
94#define PT_ACCESSED_MASK (1ULL << 5)
95#define PT_DIRTY_MASK (1ULL << 6)
96#define PT_PAGE_SIZE_MASK (1ULL << 7)
97#define PT_PAT_MASK (1ULL << 7)
98#define PT_GLOBAL_MASK (1ULL << 8)
99#define PT64_NX_SHIFT 63
100#define PT64_NX_MASK (1ULL << PT64_NX_SHIFT)
101
102#define PT_PAT_SHIFT 7
103#define PT_DIR_PAT_SHIFT 12
104#define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)
105
106#define PT32_DIR_PSE36_SIZE 4
107#define PT32_DIR_PSE36_SHIFT 13
108#define PT32_DIR_PSE36_MASK \
109 (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
110
111
112#define PT_FIRST_AVAIL_BITS_SHIFT 9 82#define PT_FIRST_AVAIL_BITS_SHIFT 9
113#define PT64_SECOND_AVAIL_BITS_SHIFT 52 83#define PT64_SECOND_AVAIL_BITS_SHIFT 52
114 84
@@ -154,10 +124,6 @@ static int dbg = 1;
154#define PFERR_USER_MASK (1U << 2) 124#define PFERR_USER_MASK (1U << 2)
155#define PFERR_FETCH_MASK (1U << 4) 125#define PFERR_FETCH_MASK (1U << 4)
156 126
157#define PT64_ROOT_LEVEL 4
158#define PT32_ROOT_LEVEL 2
159#define PT32E_ROOT_LEVEL 3
160
161#define PT_DIRECTORY_LEVEL 2 127#define PT_DIRECTORY_LEVEL 2
162#define PT_PAGE_TABLE_LEVEL 1 128#define PT_PAGE_TABLE_LEVEL 1
163 129
@@ -186,6 +152,12 @@ static struct kmem_cache *mmu_page_header_cache;
186 152
187static u64 __read_mostly shadow_trap_nonpresent_pte; 153static u64 __read_mostly shadow_trap_nonpresent_pte;
188static u64 __read_mostly shadow_notrap_nonpresent_pte; 154static u64 __read_mostly shadow_notrap_nonpresent_pte;
155static u64 __read_mostly shadow_base_present_pte;
156static u64 __read_mostly shadow_nx_mask;
157static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
158static u64 __read_mostly shadow_user_mask;
159static u64 __read_mostly shadow_accessed_mask;
160static u64 __read_mostly shadow_dirty_mask;
189 161
190void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) 162void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
191{ 163{
@@ -194,6 +166,23 @@ void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
194} 166}
195EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes); 167EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes);
196 168
169void kvm_mmu_set_base_ptes(u64 base_pte)
170{
171 shadow_base_present_pte = base_pte;
172}
173EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
174
175void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
176 u64 dirty_mask, u64 nx_mask, u64 x_mask)
177{
178 shadow_user_mask = user_mask;
179 shadow_accessed_mask = accessed_mask;
180 shadow_dirty_mask = dirty_mask;
181 shadow_nx_mask = nx_mask;
182 shadow_x_mask = x_mask;
183}
184EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
185
197static int is_write_protection(struct kvm_vcpu *vcpu) 186static int is_write_protection(struct kvm_vcpu *vcpu)
198{ 187{
199 return vcpu->arch.cr0 & X86_CR0_WP; 188 return vcpu->arch.cr0 & X86_CR0_WP;
@@ -232,7 +221,7 @@ static int is_writeble_pte(unsigned long pte)
232 221
233static int is_dirty_pte(unsigned long pte) 222static int is_dirty_pte(unsigned long pte)
234{ 223{
235 return pte & PT_DIRTY_MASK; 224 return pte & shadow_dirty_mask;
236} 225}
237 226
238static int is_rmap_pte(u64 pte) 227static int is_rmap_pte(u64 pte)
@@ -387,7 +376,6 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn)
387 376
388 write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); 377 write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
389 *write_count += 1; 378 *write_count += 1;
390 WARN_ON(*write_count > KVM_PAGES_PER_HPAGE);
391} 379}
392 380
393static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) 381static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
@@ -547,7 +535,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
547 return; 535 return;
548 sp = page_header(__pa(spte)); 536 sp = page_header(__pa(spte));
549 pfn = spte_to_pfn(*spte); 537 pfn = spte_to_pfn(*spte);
550 if (*spte & PT_ACCESSED_MASK) 538 if (*spte & shadow_accessed_mask)
551 kvm_set_pfn_accessed(pfn); 539 kvm_set_pfn_accessed(pfn);
552 if (is_writeble_pte(*spte)) 540 if (is_writeble_pte(*spte))
553 kvm_release_pfn_dirty(pfn); 541 kvm_release_pfn_dirty(pfn);
@@ -1073,17 +1061,17 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1073 * whether the guest actually used the pte (in order to detect 1061 * whether the guest actually used the pte (in order to detect
1074 * demand paging). 1062 * demand paging).
1075 */ 1063 */
1076 spte = PT_PRESENT_MASK | PT_DIRTY_MASK; 1064 spte = shadow_base_present_pte | shadow_dirty_mask;
1077 if (!speculative) 1065 if (!speculative)
1078 pte_access |= PT_ACCESSED_MASK; 1066 pte_access |= PT_ACCESSED_MASK;
1079 if (!dirty) 1067 if (!dirty)
1080 pte_access &= ~ACC_WRITE_MASK; 1068 pte_access &= ~ACC_WRITE_MASK;
1081 if (!(pte_access & ACC_EXEC_MASK)) 1069 if (pte_access & ACC_EXEC_MASK)
1082 spte |= PT64_NX_MASK; 1070 spte |= shadow_x_mask;
1083 1071 else
1084 spte |= PT_PRESENT_MASK; 1072 spte |= shadow_nx_mask;
1085 if (pte_access & ACC_USER_MASK) 1073 if (pte_access & ACC_USER_MASK)
1086 spte |= PT_USER_MASK; 1074 spte |= shadow_user_mask;
1087 if (largepage) 1075 if (largepage)
1088 spte |= PT_PAGE_SIZE_MASK; 1076 spte |= PT_PAGE_SIZE_MASK;
1089 1077
@@ -1188,8 +1176,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
1188 return -ENOMEM; 1176 return -ENOMEM;
1189 } 1177 }
1190 1178
1191 table[index] = __pa(new_table->spt) | PT_PRESENT_MASK 1179 table[index] = __pa(new_table->spt)
1192 | PT_WRITABLE_MASK | PT_USER_MASK; 1180 | PT_PRESENT_MASK | PT_WRITABLE_MASK
1181 | shadow_user_mask | shadow_x_mask;
1193 } 1182 }
1194 table_addr = table[index] & PT64_BASE_ADDR_MASK; 1183 table_addr = table[index] & PT64_BASE_ADDR_MASK;
1195 } 1184 }
@@ -1244,7 +1233,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
1244 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) 1233 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
1245 return; 1234 return;
1246 spin_lock(&vcpu->kvm->mmu_lock); 1235 spin_lock(&vcpu->kvm->mmu_lock);
1247#ifdef CONFIG_X86_64
1248 if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { 1236 if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
1249 hpa_t root = vcpu->arch.mmu.root_hpa; 1237 hpa_t root = vcpu->arch.mmu.root_hpa;
1250 1238
@@ -1256,7 +1244,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
1256 spin_unlock(&vcpu->kvm->mmu_lock); 1244 spin_unlock(&vcpu->kvm->mmu_lock);
1257 return; 1245 return;
1258 } 1246 }
1259#endif
1260 for (i = 0; i < 4; ++i) { 1247 for (i = 0; i < 4; ++i) {
1261 hpa_t root = vcpu->arch.mmu.pae_root[i]; 1248 hpa_t root = vcpu->arch.mmu.pae_root[i];
1262 1249
@@ -1282,7 +1269,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1282 1269
1283 root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; 1270 root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
1284 1271
1285#ifdef CONFIG_X86_64
1286 if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { 1272 if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
1287 hpa_t root = vcpu->arch.mmu.root_hpa; 1273 hpa_t root = vcpu->arch.mmu.root_hpa;
1288 1274
@@ -1297,7 +1283,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1297 vcpu->arch.mmu.root_hpa = root; 1283 vcpu->arch.mmu.root_hpa = root;
1298 return; 1284 return;
1299 } 1285 }
1300#endif
1301 metaphysical = !is_paging(vcpu); 1286 metaphysical = !is_paging(vcpu);
1302 if (tdp_enabled) 1287 if (tdp_enabled)
1303 metaphysical = 1; 1288 metaphysical = 1;
@@ -1377,7 +1362,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
1377 spin_lock(&vcpu->kvm->mmu_lock); 1362 spin_lock(&vcpu->kvm->mmu_lock);
1378 kvm_mmu_free_some_pages(vcpu); 1363 kvm_mmu_free_some_pages(vcpu);
1379 r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, 1364 r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
1380 largepage, gfn, pfn, TDP_ROOT_LEVEL); 1365 largepage, gfn, pfn, kvm_x86_ops->get_tdp_level());
1381 spin_unlock(&vcpu->kvm->mmu_lock); 1366 spin_unlock(&vcpu->kvm->mmu_lock);
1382 1367
1383 return r; 1368 return r;
@@ -1484,7 +1469,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
1484 context->page_fault = tdp_page_fault; 1469 context->page_fault = tdp_page_fault;
1485 context->free = nonpaging_free; 1470 context->free = nonpaging_free;
1486 context->prefetch_page = nonpaging_prefetch_page; 1471 context->prefetch_page = nonpaging_prefetch_page;
1487 context->shadow_root_level = TDP_ROOT_LEVEL; 1472 context->shadow_root_level = kvm_x86_ops->get_tdp_level();
1488 context->root_hpa = INVALID_PAGE; 1473 context->root_hpa = INVALID_PAGE;
1489 1474
1490 if (!is_paging(vcpu)) { 1475 if (!is_paging(vcpu)) {
@@ -1633,7 +1618,7 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
1633{ 1618{
1634 u64 *spte = vcpu->arch.last_pte_updated; 1619 u64 *spte = vcpu->arch.last_pte_updated;
1635 1620
1636 return !!(spte && (*spte & PT_ACCESSED_MASK)); 1621 return !!(spte && (*spte & shadow_accessed_mask));
1637} 1622}
1638 1623
1639static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 1624static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index e64e9f56a65e..1730757bbc7a 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -3,11 +3,38 @@
3 3
4#include <linux/kvm_host.h> 4#include <linux/kvm_host.h>
5 5
6#ifdef CONFIG_X86_64 6#define PT64_PT_BITS 9
7#define TDP_ROOT_LEVEL PT64_ROOT_LEVEL 7#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
8#else 8#define PT32_PT_BITS 10
9#define TDP_ROOT_LEVEL PT32E_ROOT_LEVEL 9#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
10#endif 10
11#define PT_WRITABLE_SHIFT 1
12
13#define PT_PRESENT_MASK (1ULL << 0)
14#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
15#define PT_USER_MASK (1ULL << 2)
16#define PT_PWT_MASK (1ULL << 3)
17#define PT_PCD_MASK (1ULL << 4)
18#define PT_ACCESSED_MASK (1ULL << 5)
19#define PT_DIRTY_MASK (1ULL << 6)
20#define PT_PAGE_SIZE_MASK (1ULL << 7)
21#define PT_PAT_MASK (1ULL << 7)
22#define PT_GLOBAL_MASK (1ULL << 8)
23#define PT64_NX_SHIFT 63
24#define PT64_NX_MASK (1ULL << PT64_NX_SHIFT)
25
26#define PT_PAT_SHIFT 7
27#define PT_DIR_PAT_SHIFT 12
28#define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)
29
30#define PT32_DIR_PSE36_SIZE 4
31#define PT32_DIR_PSE36_SHIFT 13
32#define PT32_DIR_PSE36_MASK \
33 (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
34
35#define PT64_ROOT_LEVEL 4
36#define PT32_ROOT_LEVEL 2
37#define PT32E_ROOT_LEVEL 3
11 38
12static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) 39static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
13{ 40{
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 89e0be2c10d0..ab22615eee89 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1863,6 +1863,15 @@ static bool svm_cpu_has_accelerated_tpr(void)
1863 return false; 1863 return false;
1864} 1864}
1865 1865
1866static int get_npt_level(void)
1867{
1868#ifdef CONFIG_X86_64
1869 return PT64_ROOT_LEVEL;
1870#else
1871 return PT32E_ROOT_LEVEL;
1872#endif
1873}
1874
1866static struct kvm_x86_ops svm_x86_ops = { 1875static struct kvm_x86_ops svm_x86_ops = {
1867 .cpu_has_kvm_support = has_svm, 1876 .cpu_has_kvm_support = has_svm,
1868 .disabled_by_bios = is_disabled, 1877 .disabled_by_bios = is_disabled,
@@ -1920,6 +1929,7 @@ static struct kvm_x86_ops svm_x86_ops = {
1920 .inject_pending_vectors = do_interrupt_requests, 1929 .inject_pending_vectors = do_interrupt_requests,
1921 1930
1922 .set_tss_addr = svm_set_tss_addr, 1931 .set_tss_addr = svm_set_tss_addr,
1932 .get_tdp_level = get_npt_level,
1923}; 1933};
1924 1934
1925static int __init svm_init(void) 1935static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8e5d6645b90d..bfe4db11989c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -42,6 +42,9 @@ module_param(enable_vpid, bool, 0);
42static int flexpriority_enabled = 1; 42static int flexpriority_enabled = 1;
43module_param(flexpriority_enabled, bool, 0); 43module_param(flexpriority_enabled, bool, 0);
44 44
45static int enable_ept = 1;
46module_param(enable_ept, bool, 0);
47
45struct vmcs { 48struct vmcs {
46 u32 revision_id; 49 u32 revision_id;
47 u32 abort; 50 u32 abort;
@@ -84,7 +87,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
84 return container_of(vcpu, struct vcpu_vmx, vcpu); 87 return container_of(vcpu, struct vcpu_vmx, vcpu);
85} 88}
86 89
87static int init_rmode_tss(struct kvm *kvm); 90static int init_rmode(struct kvm *kvm);
88 91
89static DEFINE_PER_CPU(struct vmcs *, vmxarea); 92static DEFINE_PER_CPU(struct vmcs *, vmxarea);
90static DEFINE_PER_CPU(struct vmcs *, current_vmcs); 93static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -107,6 +110,11 @@ static struct vmcs_config {
107 u32 vmentry_ctrl; 110 u32 vmentry_ctrl;
108} vmcs_config; 111} vmcs_config;
109 112
113struct vmx_capability {
114 u32 ept;
115 u32 vpid;
116} vmx_capability;
117
110#define VMX_SEGMENT_FIELD(seg) \ 118#define VMX_SEGMENT_FIELD(seg) \
111 [VCPU_SREG_##seg] = { \ 119 [VCPU_SREG_##seg] = { \
112 .selector = GUEST_##seg##_SELECTOR, \ 120 .selector = GUEST_##seg##_SELECTOR, \
@@ -214,6 +222,32 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
214 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); 222 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
215} 223}
216 224
225static inline int cpu_has_vmx_invept_individual_addr(void)
226{
227 return (!!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT));
228}
229
230static inline int cpu_has_vmx_invept_context(void)
231{
232 return (!!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT));
233}
234
235static inline int cpu_has_vmx_invept_global(void)
236{
237 return (!!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT));
238}
239
240static inline int cpu_has_vmx_ept(void)
241{
242 return (vmcs_config.cpu_based_2nd_exec_ctrl &
243 SECONDARY_EXEC_ENABLE_EPT);
244}
245
246static inline int vm_need_ept(void)
247{
248 return (cpu_has_vmx_ept() && enable_ept);
249}
250
217static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) 251static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
218{ 252{
219 return ((cpu_has_vmx_virtualize_apic_accesses()) && 253 return ((cpu_has_vmx_virtualize_apic_accesses()) &&
@@ -250,6 +284,18 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva)
250 : : "a"(&operand), "c"(ext) : "cc", "memory"); 284 : : "a"(&operand), "c"(ext) : "cc", "memory");
251} 285}
252 286
287static inline void __invept(int ext, u64 eptp, gpa_t gpa)
288{
289 struct {
290 u64 eptp, gpa;
291 } operand = {eptp, gpa};
292
293 asm volatile (ASM_VMX_INVEPT
294 /* CF==1 or ZF==1 --> rc = -1 */
295 "; ja 1f ; ud2 ; 1:\n"
296 : : "a" (&operand), "c" (ext) : "cc", "memory");
297}
298
253static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) 299static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
254{ 300{
255 int i; 301 int i;
@@ -301,6 +347,33 @@ static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx)
301 __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); 347 __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0);
302} 348}
303 349
350static inline void ept_sync_global(void)
351{
352 if (cpu_has_vmx_invept_global())
353 __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0);
354}
355
356static inline void ept_sync_context(u64 eptp)
357{
358 if (vm_need_ept()) {
359 if (cpu_has_vmx_invept_context())
360 __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0);
361 else
362 ept_sync_global();
363 }
364}
365
366static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa)
367{
368 if (vm_need_ept()) {
369 if (cpu_has_vmx_invept_individual_addr())
370 __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR,
371 eptp, gpa);
372 else
373 ept_sync_context(eptp);
374 }
375}
376
304static unsigned long vmcs_readl(unsigned long field) 377static unsigned long vmcs_readl(unsigned long field)
305{ 378{
306 unsigned long value; 379 unsigned long value;
@@ -388,6 +461,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
388 eb |= 1u << 1; 461 eb |= 1u << 1;
389 if (vcpu->arch.rmode.active) 462 if (vcpu->arch.rmode.active)
390 eb = ~0; 463 eb = ~0;
464 if (vm_need_ept())
465 eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
391 vmcs_write32(EXCEPTION_BITMAP, eb); 466 vmcs_write32(EXCEPTION_BITMAP, eb);
392} 467}
393 468
@@ -985,7 +1060,7 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
985static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) 1060static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
986{ 1061{
987 u32 vmx_msr_low, vmx_msr_high; 1062 u32 vmx_msr_low, vmx_msr_high;
988 u32 min, opt; 1063 u32 min, opt, min2, opt2;
989 u32 _pin_based_exec_control = 0; 1064 u32 _pin_based_exec_control = 0;
990 u32 _cpu_based_exec_control = 0; 1065 u32 _cpu_based_exec_control = 0;
991 u32 _cpu_based_2nd_exec_control = 0; 1066 u32 _cpu_based_2nd_exec_control = 0;
@@ -1003,6 +1078,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1003 CPU_BASED_CR8_LOAD_EXITING | 1078 CPU_BASED_CR8_LOAD_EXITING |
1004 CPU_BASED_CR8_STORE_EXITING | 1079 CPU_BASED_CR8_STORE_EXITING |
1005#endif 1080#endif
1081 CPU_BASED_CR3_LOAD_EXITING |
1082 CPU_BASED_CR3_STORE_EXITING |
1006 CPU_BASED_USE_IO_BITMAPS | 1083 CPU_BASED_USE_IO_BITMAPS |
1007 CPU_BASED_MOV_DR_EXITING | 1084 CPU_BASED_MOV_DR_EXITING |
1008 CPU_BASED_USE_TSC_OFFSETING; 1085 CPU_BASED_USE_TSC_OFFSETING;
@@ -1018,11 +1095,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1018 ~CPU_BASED_CR8_STORE_EXITING; 1095 ~CPU_BASED_CR8_STORE_EXITING;
1019#endif 1096#endif
1020 if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { 1097 if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
1021 min = 0; 1098 min2 = 0;
1022 opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 1099 opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
1023 SECONDARY_EXEC_WBINVD_EXITING | 1100 SECONDARY_EXEC_WBINVD_EXITING |
1024 SECONDARY_EXEC_ENABLE_VPID; 1101 SECONDARY_EXEC_ENABLE_VPID |
1025 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS2, 1102 SECONDARY_EXEC_ENABLE_EPT;
1103 if (adjust_vmx_controls(min2, opt2,
1104 MSR_IA32_VMX_PROCBASED_CTLS2,
1026 &_cpu_based_2nd_exec_control) < 0) 1105 &_cpu_based_2nd_exec_control) < 0)
1027 return -EIO; 1106 return -EIO;
1028 } 1107 }
@@ -1031,6 +1110,16 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1031 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) 1110 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
1032 _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; 1111 _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
1033#endif 1112#endif
1113 if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
1114 /* CR3 accesses don't need to cause VM Exits when EPT enabled */
1115 min &= ~(CPU_BASED_CR3_LOAD_EXITING |
1116 CPU_BASED_CR3_STORE_EXITING);
1117 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
1118 &_cpu_based_exec_control) < 0)
1119 return -EIO;
1120 rdmsr(MSR_IA32_VMX_EPT_VPID_CAP,
1121 vmx_capability.ept, vmx_capability.vpid);
1122 }
1034 1123
1035 min = 0; 1124 min = 0;
1036#ifdef CONFIG_X86_64 1125#ifdef CONFIG_X86_64
@@ -1256,7 +1345,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
1256 fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs); 1345 fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
1257 1346
1258 kvm_mmu_reset_context(vcpu); 1347 kvm_mmu_reset_context(vcpu);
1259 init_rmode_tss(vcpu->kvm); 1348 init_rmode(vcpu->kvm);
1260} 1349}
1261 1350
1262#ifdef CONFIG_X86_64 1351#ifdef CONFIG_X86_64
@@ -1304,8 +1393,64 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
1304 vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; 1393 vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK;
1305} 1394}
1306 1395
1396static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
1397{
1398 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
1399 if (!load_pdptrs(vcpu, vcpu->arch.cr3)) {
1400 printk(KERN_ERR "EPT: Fail to load pdptrs!\n");
1401 return;
1402 }
1403 vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]);
1404 vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]);
1405 vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]);
1406 vmcs_write64(GUEST_PDPTR3, vcpu->arch.pdptrs[3]);
1407 }
1408}
1409
1410static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
1411
1412static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
1413 unsigned long cr0,
1414 struct kvm_vcpu *vcpu)
1415{
1416 if (!(cr0 & X86_CR0_PG)) {
1417 /* From paging/starting to nonpaging */
1418 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
1419 vmcs_config.cpu_based_exec_ctrl |
1420 (CPU_BASED_CR3_LOAD_EXITING |
1421 CPU_BASED_CR3_STORE_EXITING));
1422 vcpu->arch.cr0 = cr0;
1423 vmx_set_cr4(vcpu, vcpu->arch.cr4);
1424 *hw_cr0 |= X86_CR0_PE | X86_CR0_PG;
1425 *hw_cr0 &= ~X86_CR0_WP;
1426 } else if (!is_paging(vcpu)) {
1427 /* From nonpaging to paging */
1428 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
1429 vmcs_config.cpu_based_exec_ctrl &
1430 ~(CPU_BASED_CR3_LOAD_EXITING |
1431 CPU_BASED_CR3_STORE_EXITING));
1432 vcpu->arch.cr0 = cr0;
1433 vmx_set_cr4(vcpu, vcpu->arch.cr4);
1434 if (!(vcpu->arch.cr0 & X86_CR0_WP))
1435 *hw_cr0 &= ~X86_CR0_WP;
1436 }
1437}
1438
1439static void ept_update_paging_mode_cr4(unsigned long *hw_cr4,
1440 struct kvm_vcpu *vcpu)
1441{
1442 if (!is_paging(vcpu)) {
1443 *hw_cr4 &= ~X86_CR4_PAE;
1444 *hw_cr4 |= X86_CR4_PSE;
1445 } else if (!(vcpu->arch.cr4 & X86_CR4_PAE))
1446 *hw_cr4 &= ~X86_CR4_PAE;
1447}
1448
1307static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 1449static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1308{ 1450{
1451 unsigned long hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) |
1452 KVM_VM_CR0_ALWAYS_ON;
1453
1309 vmx_fpu_deactivate(vcpu); 1454 vmx_fpu_deactivate(vcpu);
1310 1455
1311 if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE)) 1456 if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE))
@@ -1323,29 +1468,61 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1323 } 1468 }
1324#endif 1469#endif
1325 1470
1471 if (vm_need_ept())
1472 ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
1473
1326 vmcs_writel(CR0_READ_SHADOW, cr0); 1474 vmcs_writel(CR0_READ_SHADOW, cr0);
1327 vmcs_writel(GUEST_CR0, 1475 vmcs_writel(GUEST_CR0, hw_cr0);
1328 (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON);
1329 vcpu->arch.cr0 = cr0; 1476 vcpu->arch.cr0 = cr0;
1330 1477
1331 if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE)) 1478 if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE))
1332 vmx_fpu_activate(vcpu); 1479 vmx_fpu_activate(vcpu);
1333} 1480}
1334 1481
1482static u64 construct_eptp(unsigned long root_hpa)
1483{
1484 u64 eptp;
1485
1486 /* TODO write the value reading from MSR */
1487 eptp = VMX_EPT_DEFAULT_MT |
1488 VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
1489 eptp |= (root_hpa & PAGE_MASK);
1490
1491 return eptp;
1492}
1493
1335static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) 1494static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
1336{ 1495{
1496 unsigned long guest_cr3;
1497 u64 eptp;
1498
1499 guest_cr3 = cr3;
1500 if (vm_need_ept()) {
1501 eptp = construct_eptp(cr3);
1502 vmcs_write64(EPT_POINTER, eptp);
1503 ept_sync_context(eptp);
1504 ept_load_pdptrs(vcpu);
1505 guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 :
1506 VMX_EPT_IDENTITY_PAGETABLE_ADDR;
1507 }
1508
1337 vmx_flush_tlb(vcpu); 1509 vmx_flush_tlb(vcpu);
1338 vmcs_writel(GUEST_CR3, cr3); 1510 vmcs_writel(GUEST_CR3, guest_cr3);
1339 if (vcpu->arch.cr0 & X86_CR0_PE) 1511 if (vcpu->arch.cr0 & X86_CR0_PE)
1340 vmx_fpu_deactivate(vcpu); 1512 vmx_fpu_deactivate(vcpu);
1341} 1513}
1342 1514
1343static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 1515static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1344{ 1516{
1345 vmcs_writel(CR4_READ_SHADOW, cr4); 1517 unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.active ?
1346 vmcs_writel(GUEST_CR4, cr4 | (vcpu->arch.rmode.active ? 1518 KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
1347 KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON)); 1519
1348 vcpu->arch.cr4 = cr4; 1520 vcpu->arch.cr4 = cr4;
1521 if (vm_need_ept())
1522 ept_update_paging_mode_cr4(&hw_cr4, vcpu);
1523
1524 vmcs_writel(CR4_READ_SHADOW, cr4);
1525 vmcs_writel(GUEST_CR4, hw_cr4);
1349} 1526}
1350 1527
1351static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) 1528static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
@@ -1530,6 +1707,41 @@ out:
1530 return ret; 1707 return ret;
1531} 1708}
1532 1709
1710static int init_rmode_identity_map(struct kvm *kvm)
1711{
1712 int i, r, ret;
1713 pfn_t identity_map_pfn;
1714 u32 tmp;
1715
1716 if (!vm_need_ept())
1717 return 1;
1718 if (unlikely(!kvm->arch.ept_identity_pagetable)) {
1719 printk(KERN_ERR "EPT: identity-mapping pagetable "
1720 "haven't been allocated!\n");
1721 return 0;
1722 }
1723 if (likely(kvm->arch.ept_identity_pagetable_done))
1724 return 1;
1725 ret = 0;
1726 identity_map_pfn = VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT;
1727 r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
1728 if (r < 0)
1729 goto out;
1730 /* Set up identity-mapping pagetable for EPT in real mode */
1731 for (i = 0; i < PT32_ENT_PER_PAGE; i++) {
1732 tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
1733 _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
1734 r = kvm_write_guest_page(kvm, identity_map_pfn,
1735 &tmp, i * sizeof(tmp), sizeof(tmp));
1736 if (r < 0)
1737 goto out;
1738 }
1739 kvm->arch.ept_identity_pagetable_done = true;
1740 ret = 1;
1741out:
1742 return ret;
1743}
1744
1533static void seg_setup(int seg) 1745static void seg_setup(int seg)
1534{ 1746{
1535 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 1747 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
@@ -1564,6 +1776,31 @@ out:
1564 return r; 1776 return r;
1565} 1777}
1566 1778
1779static int alloc_identity_pagetable(struct kvm *kvm)
1780{
1781 struct kvm_userspace_memory_region kvm_userspace_mem;
1782 int r = 0;
1783
1784 down_write(&kvm->slots_lock);
1785 if (kvm->arch.ept_identity_pagetable)
1786 goto out;
1787 kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
1788 kvm_userspace_mem.flags = 0;
1789 kvm_userspace_mem.guest_phys_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
1790 kvm_userspace_mem.memory_size = PAGE_SIZE;
1791 r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0);
1792 if (r)
1793 goto out;
1794
1795 down_read(&current->mm->mmap_sem);
1796 kvm->arch.ept_identity_pagetable = gfn_to_page(kvm,
1797 VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT);
1798 up_read(&current->mm->mmap_sem);
1799out:
1800 up_write(&kvm->slots_lock);
1801 return r;
1802}
1803
1567static void allocate_vpid(struct vcpu_vmx *vmx) 1804static void allocate_vpid(struct vcpu_vmx *vmx)
1568{ 1805{
1569 int vpid; 1806 int vpid;
@@ -1638,6 +1875,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
1638 CPU_BASED_CR8_LOAD_EXITING; 1875 CPU_BASED_CR8_LOAD_EXITING;
1639#endif 1876#endif
1640 } 1877 }
1878 if (!vm_need_ept())
1879 exec_control |= CPU_BASED_CR3_STORE_EXITING |
1880 CPU_BASED_CR3_LOAD_EXITING;
1641 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); 1881 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
1642 1882
1643 if (cpu_has_secondary_exec_ctrls()) { 1883 if (cpu_has_secondary_exec_ctrls()) {
@@ -1647,6 +1887,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
1647 ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; 1887 ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
1648 if (vmx->vpid == 0) 1888 if (vmx->vpid == 0)
1649 exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; 1889 exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
1890 if (!vm_need_ept())
1891 exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
1650 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); 1892 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
1651 } 1893 }
1652 1894
@@ -1722,6 +1964,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
1722 return 0; 1964 return 0;
1723} 1965}
1724 1966
1967static int init_rmode(struct kvm *kvm)
1968{
1969 if (!init_rmode_tss(kvm))
1970 return 0;
1971 if (!init_rmode_identity_map(kvm))
1972 return 0;
1973 return 1;
1974}
1975
1725static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) 1976static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
1726{ 1977{
1727 struct vcpu_vmx *vmx = to_vmx(vcpu); 1978 struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -1729,7 +1980,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
1729 int ret; 1980 int ret;
1730 1981
1731 down_read(&vcpu->kvm->slots_lock); 1982 down_read(&vcpu->kvm->slots_lock);
1732 if (!init_rmode_tss(vmx->vcpu.kvm)) { 1983 if (!init_rmode(vmx->vcpu.kvm)) {
1733 ret = -ENOMEM; 1984 ret = -ENOMEM;
1734 goto out; 1985 goto out;
1735 } 1986 }
@@ -1994,6 +2245,9 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1994 if (intr_info & INTR_INFO_DELIVER_CODE_MASK) 2245 if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
1995 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); 2246 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
1996 if (is_page_fault(intr_info)) { 2247 if (is_page_fault(intr_info)) {
2248 /* EPT won't cause page fault directly */
2249 if (vm_need_ept())
2250 BUG();
1997 cr2 = vmcs_readl(EXIT_QUALIFICATION); 2251 cr2 = vmcs_readl(EXIT_QUALIFICATION);
1998 KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, 2252 KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
1999 (u32)((u64)cr2 >> 32), handler); 2253 (u32)((u64)cr2 >> 32), handler);
@@ -2323,6 +2577,64 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2323 return kvm_task_switch(vcpu, tss_selector, reason); 2577 return kvm_task_switch(vcpu, tss_selector, reason);
2324} 2578}
2325 2579
2580static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2581{
2582 u64 exit_qualification;
2583 enum emulation_result er;
2584 gpa_t gpa;
2585 unsigned long hva;
2586 int gla_validity;
2587 int r;
2588
2589 exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
2590
2591 if (exit_qualification & (1 << 6)) {
2592 printk(KERN_ERR "EPT: GPA exceeds GAW!\n");
2593 return -ENOTSUPP;
2594 }
2595
2596 gla_validity = (exit_qualification >> 7) & 0x3;
2597 if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) {
2598 printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
2599 printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
2600 (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
2601 (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS));
2602 printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
2603 (long unsigned int)exit_qualification);
2604 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
2605 kvm_run->hw.hardware_exit_reason = 0;
2606 return -ENOTSUPP;
2607 }
2608
2609 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
2610 hva = gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT);
2611 if (!kvm_is_error_hva(hva)) {
2612 r = kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
2613 if (r < 0) {
2614 printk(KERN_ERR "EPT: Not enough memory!\n");
2615 return -ENOMEM;
2616 }
2617 return 1;
2618 } else {
2619 /* must be MMIO */
2620 er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
2621
2622 if (er == EMULATE_FAIL) {
2623 printk(KERN_ERR
2624 "EPT: Fail to handle EPT violation vmexit!er is %d\n",
2625 er);
2626 printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
2627 (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
2628 (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS));
2629 printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
2630 (long unsigned int)exit_qualification);
2631 return -ENOTSUPP;
2632 } else if (er == EMULATE_DO_MMIO)
2633 return 0;
2634 }
2635 return 1;
2636}
2637
2326/* 2638/*
2327 * The exit handlers return 1 if the exit was handled fully and guest execution 2639 * The exit handlers return 1 if the exit was handled fully and guest execution
2328 * may resume. Otherwise they set the kvm_run parameter to indicate what needs 2640 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -2346,6 +2658,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
2346 [EXIT_REASON_APIC_ACCESS] = handle_apic_access, 2658 [EXIT_REASON_APIC_ACCESS] = handle_apic_access,
2347 [EXIT_REASON_WBINVD] = handle_wbinvd, 2659 [EXIT_REASON_WBINVD] = handle_wbinvd,
2348 [EXIT_REASON_TASK_SWITCH] = handle_task_switch, 2660 [EXIT_REASON_TASK_SWITCH] = handle_task_switch,
2661 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
2349}; 2662};
2350 2663
2351static const int kvm_vmx_max_exit_handlers = 2664static const int kvm_vmx_max_exit_handlers =
@@ -2364,6 +2677,13 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2364 KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP), 2677 KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP),
2365 (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit); 2678 (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit);
2366 2679
2680 /* Access CR3 don't cause VMExit in paging mode, so we need
2681 * to sync with guest real CR3. */
2682 if (vm_need_ept() && is_paging(vcpu)) {
2683 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
2684 ept_load_pdptrs(vcpu);
2685 }
2686
2367 if (unlikely(vmx->fail)) { 2687 if (unlikely(vmx->fail)) {
2368 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 2688 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
2369 kvm_run->fail_entry.hardware_entry_failure_reason 2689 kvm_run->fail_entry.hardware_entry_failure_reason
@@ -2372,7 +2692,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2372 } 2692 }
2373 2693
2374 if ((vectoring_info & VECTORING_INFO_VALID_MASK) && 2694 if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
2375 exit_reason != EXIT_REASON_EXCEPTION_NMI) 2695 (exit_reason != EXIT_REASON_EXCEPTION_NMI &&
2696 exit_reason != EXIT_REASON_EPT_VIOLATION))
2376 printk(KERN_WARNING "%s: unexpected, valid vectoring info and " 2697 printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
2377 "exit reason is 0x%x\n", __func__, exit_reason); 2698 "exit reason is 0x%x\n", __func__, exit_reason);
2378 if (exit_reason < kvm_vmx_max_exit_handlers 2699 if (exit_reason < kvm_vmx_max_exit_handlers
@@ -2674,6 +2995,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
2674 return ERR_PTR(-ENOMEM); 2995 return ERR_PTR(-ENOMEM);
2675 2996
2676 allocate_vpid(vmx); 2997 allocate_vpid(vmx);
2998 if (id == 0 && vm_need_ept()) {
2999 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
3000 VMX_EPT_WRITABLE_MASK |
3001 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
3002 kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK,
3003 VMX_EPT_FAKE_DIRTY_MASK, 0ull,
3004 VMX_EPT_EXECUTABLE_MASK);
3005 kvm_enable_tdp();
3006 }
2677 3007
2678 err = kvm_vcpu_init(&vmx->vcpu, kvm, id); 3008 err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
2679 if (err) 3009 if (err)
@@ -2706,6 +3036,10 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
2706 if (alloc_apic_access_page(kvm) != 0) 3036 if (alloc_apic_access_page(kvm) != 0)
2707 goto free_vmcs; 3037 goto free_vmcs;
2708 3038
3039 if (vm_need_ept())
3040 if (alloc_identity_pagetable(kvm) != 0)
3041 goto free_vmcs;
3042
2709 return &vmx->vcpu; 3043 return &vmx->vcpu;
2710 3044
2711free_vmcs: 3045free_vmcs:
@@ -2735,6 +3069,11 @@ static void __init vmx_check_processor_compat(void *rtn)
2735 } 3069 }
2736} 3070}
2737 3071
3072static int get_ept_level(void)
3073{
3074 return VMX_EPT_DEFAULT_GAW + 1;
3075}
3076
2738static struct kvm_x86_ops vmx_x86_ops = { 3077static struct kvm_x86_ops vmx_x86_ops = {
2739 .cpu_has_kvm_support = cpu_has_kvm_support, 3078 .cpu_has_kvm_support = cpu_has_kvm_support,
2740 .disabled_by_bios = vmx_disabled_by_bios, 3079 .disabled_by_bios = vmx_disabled_by_bios,
@@ -2791,6 +3130,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
2791 .inject_pending_vectors = do_interrupt_requests, 3130 .inject_pending_vectors = do_interrupt_requests,
2792 3131
2793 .set_tss_addr = vmx_set_tss_addr, 3132 .set_tss_addr = vmx_set_tss_addr,
3133 .get_tdp_level = get_ept_level,
2794}; 3134};
2795 3135
2796static int __init vmx_init(void) 3136static int __init vmx_init(void)
@@ -2843,9 +3183,14 @@ static int __init vmx_init(void)
2843 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); 3183 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP);
2844 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); 3184 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP);
2845 3185
3186 if (cpu_has_vmx_ept())
3187 bypass_guest_pf = 0;
3188
2846 if (bypass_guest_pf) 3189 if (bypass_guest_pf)
2847 kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); 3190 kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull);
2848 3191
3192 ept_sync_global();
3193
2849 return 0; 3194 return 0;
2850 3195
2851out2: 3196out2:
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index 5dff4606b988..79d94c610dfe 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -35,6 +35,8 @@
35#define CPU_BASED_MWAIT_EXITING 0x00000400 35#define CPU_BASED_MWAIT_EXITING 0x00000400
36#define CPU_BASED_RDPMC_EXITING 0x00000800 36#define CPU_BASED_RDPMC_EXITING 0x00000800
37#define CPU_BASED_RDTSC_EXITING 0x00001000 37#define CPU_BASED_RDTSC_EXITING 0x00001000
38#define CPU_BASED_CR3_LOAD_EXITING 0x00008000
39#define CPU_BASED_CR3_STORE_EXITING 0x00010000
38#define CPU_BASED_CR8_LOAD_EXITING 0x00080000 40#define CPU_BASED_CR8_LOAD_EXITING 0x00080000
39#define CPU_BASED_CR8_STORE_EXITING 0x00100000 41#define CPU_BASED_CR8_STORE_EXITING 0x00100000
40#define CPU_BASED_TPR_SHADOW 0x00200000 42#define CPU_BASED_TPR_SHADOW 0x00200000
@@ -49,6 +51,7 @@
49 * Definitions of Secondary Processor-Based VM-Execution Controls. 51 * Definitions of Secondary Processor-Based VM-Execution Controls.
50 */ 52 */
51#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 53#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
54#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
52#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 55#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
53#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 56#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
54 57
@@ -100,10 +103,22 @@ enum vmcs_field {
100 VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, 103 VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013,
101 APIC_ACCESS_ADDR = 0x00002014, 104 APIC_ACCESS_ADDR = 0x00002014,
102 APIC_ACCESS_ADDR_HIGH = 0x00002015, 105 APIC_ACCESS_ADDR_HIGH = 0x00002015,
106 EPT_POINTER = 0x0000201a,
107 EPT_POINTER_HIGH = 0x0000201b,
108 GUEST_PHYSICAL_ADDRESS = 0x00002400,
109 GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
103 VMCS_LINK_POINTER = 0x00002800, 110 VMCS_LINK_POINTER = 0x00002800,
104 VMCS_LINK_POINTER_HIGH = 0x00002801, 111 VMCS_LINK_POINTER_HIGH = 0x00002801,
105 GUEST_IA32_DEBUGCTL = 0x00002802, 112 GUEST_IA32_DEBUGCTL = 0x00002802,
106 GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, 113 GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
114 GUEST_PDPTR0 = 0x0000280a,
115 GUEST_PDPTR0_HIGH = 0x0000280b,
116 GUEST_PDPTR1 = 0x0000280c,
117 GUEST_PDPTR1_HIGH = 0x0000280d,
118 GUEST_PDPTR2 = 0x0000280e,
119 GUEST_PDPTR2_HIGH = 0x0000280f,
120 GUEST_PDPTR3 = 0x00002810,
121 GUEST_PDPTR3_HIGH = 0x00002811,
107 PIN_BASED_VM_EXEC_CONTROL = 0x00004000, 122 PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
108 CPU_BASED_VM_EXEC_CONTROL = 0x00004002, 123 CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
109 EXCEPTION_BITMAP = 0x00004004, 124 EXCEPTION_BITMAP = 0x00004004,
@@ -226,6 +241,8 @@ enum vmcs_field {
226#define EXIT_REASON_MWAIT_INSTRUCTION 36 241#define EXIT_REASON_MWAIT_INSTRUCTION 36
227#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 242#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
228#define EXIT_REASON_APIC_ACCESS 44 243#define EXIT_REASON_APIC_ACCESS 44
244#define EXIT_REASON_EPT_VIOLATION 48
245#define EXIT_REASON_EPT_MISCONFIG 49
229#define EXIT_REASON_WBINVD 54 246#define EXIT_REASON_WBINVD 54
230 247
231/* 248/*
@@ -316,15 +333,36 @@ enum vmcs_field {
316#define MSR_IA32_VMX_CR4_FIXED1 0x489 333#define MSR_IA32_VMX_CR4_FIXED1 0x489
317#define MSR_IA32_VMX_VMCS_ENUM 0x48a 334#define MSR_IA32_VMX_VMCS_ENUM 0x48a
318#define MSR_IA32_VMX_PROCBASED_CTLS2 0x48b 335#define MSR_IA32_VMX_PROCBASED_CTLS2 0x48b
336#define MSR_IA32_VMX_EPT_VPID_CAP 0x48c
319 337
320#define MSR_IA32_FEATURE_CONTROL 0x3a 338#define MSR_IA32_FEATURE_CONTROL 0x3a
321#define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1 339#define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1
322#define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4 340#define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4
323 341
324#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 342#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9
343#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10
325 344
326#define VMX_NR_VPIDS (1 << 16) 345#define VMX_NR_VPIDS (1 << 16)
327#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 346#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1
328#define VMX_VPID_EXTENT_ALL_CONTEXT 2 347#define VMX_VPID_EXTENT_ALL_CONTEXT 2
329 348
349#define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0
350#define VMX_EPT_EXTENT_CONTEXT 1
351#define VMX_EPT_EXTENT_GLOBAL 2
352#define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24)
353#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25)
354#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
355#define VMX_EPT_DEFAULT_GAW 3
356#define VMX_EPT_MAX_GAW 0x4
357#define VMX_EPT_MT_EPTE_SHIFT 3
358#define VMX_EPT_GAW_EPTP_SHIFT 3
359#define VMX_EPT_DEFAULT_MT 0x6ull
360#define VMX_EPT_READABLE_MASK 0x1ull
361#define VMX_EPT_WRITABLE_MASK 0x2ull
362#define VMX_EPT_EXECUTABLE_MASK 0x4ull
363#define VMX_EPT_FAKE_ACCESSED_MASK (1ull << 62)
364#define VMX_EPT_FAKE_DIRTY_MASK (1ull << 63)
365
366#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
367
330#endif 368#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0ce556372a4d..21338bdb28ff 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2417,6 +2417,9 @@ int kvm_arch_init(void *opaque)
2417 2417
2418 kvm_x86_ops = ops; 2418 kvm_x86_ops = ops;
2419 kvm_mmu_set_nonpresent_ptes(0ull, 0ull); 2419 kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
2420 kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
2421 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
2422 PT_DIRTY_MASK, PT64_NX_MASK, 0);
2420 return 0; 2423 return 0;
2421 2424
2422out: 2425out:
@@ -3019,6 +3022,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3019 3022
3020 kvm_x86_ops->decache_regs(vcpu); 3023 kvm_x86_ops->decache_regs(vcpu);
3021 3024
3025 vcpu->arch.exception.pending = false;
3026
3022 vcpu_put(vcpu); 3027 vcpu_put(vcpu);
3023 3028
3024 return 0; 3029 return 0;
@@ -3481,7 +3486,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
3481 } 3486 }
3482 3487
3483 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { 3488 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
3484 cseg_desc.type &= ~(1 << 8); //clear the B flag 3489 cseg_desc.type &= ~(1 << 1); //clear the B flag
3485 save_guest_segment_descriptor(vcpu, tr_seg.selector, 3490 save_guest_segment_descriptor(vcpu, tr_seg.selector,
3486 &cseg_desc); 3491 &cseg_desc);
3487 } 3492 }
@@ -3507,7 +3512,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
3507 } 3512 }
3508 3513
3509 if (reason != TASK_SWITCH_IRET) { 3514 if (reason != TASK_SWITCH_IRET) {
3510 nseg_desc.type |= (1 << 8); 3515 nseg_desc.type |= (1 << 1);
3511 save_guest_segment_descriptor(vcpu, tss_selector, 3516 save_guest_segment_descriptor(vcpu, tss_selector,
3512 &nseg_desc); 3517 &nseg_desc);
3513 } 3518 }
@@ -3698,10 +3703,19 @@ void fx_init(struct kvm_vcpu *vcpu)
3698{ 3703{
3699 unsigned after_mxcsr_mask; 3704 unsigned after_mxcsr_mask;
3700 3705
3706 /*
3707 * Touch the fpu the first time in non atomic context as if
3708 * this is the first fpu instruction the exception handler
3709 * will fire before the instruction returns and it'll have to
3710 * allocate ram with GFP_KERNEL.
3711 */
3712 if (!used_math())
3713 fx_save(&vcpu->arch.host_fx_image);
3714
3701 /* Initialize guest FPU by resetting ours and saving into guest's */ 3715 /* Initialize guest FPU by resetting ours and saving into guest's */
3702 preempt_disable(); 3716 preempt_disable();
3703 fx_save(&vcpu->arch.host_fx_image); 3717 fx_save(&vcpu->arch.host_fx_image);
3704 fpu_init(); 3718 fx_finit();
3705 fx_save(&vcpu->arch.guest_fx_image); 3719 fx_save(&vcpu->arch.guest_fx_image);
3706 fx_restore(&vcpu->arch.host_fx_image); 3720 fx_restore(&vcpu->arch.host_fx_image);
3707 preempt_enable(); 3721 preempt_enable();
@@ -3906,6 +3920,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
3906 kvm_free_physmem(kvm); 3920 kvm_free_physmem(kvm);
3907 if (kvm->arch.apic_access_page) 3921 if (kvm->arch.apic_access_page)
3908 put_page(kvm->arch.apic_access_page); 3922 put_page(kvm->arch.apic_access_page);
3923 if (kvm->arch.ept_identity_pagetable)
3924 put_page(kvm->arch.ept_identity_pagetable);
3909 kfree(kvm); 3925 kfree(kvm);
3910} 3926}
3911 3927
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 2ca08386f993..f2a696d6a243 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1761,6 +1761,7 @@ twobyte_insn:
1761 case 6: /* lmsw */ 1761 case 6: /* lmsw */
1762 realmode_lmsw(ctxt->vcpu, (u16)c->src.val, 1762 realmode_lmsw(ctxt->vcpu, (u16)c->src.val,
1763 &ctxt->eflags); 1763 &ctxt->eflags);
1764 c->dst.type = OP_NONE;
1764 break; 1765 break;
1765 case 7: /* invlpg*/ 1766 case 7: /* invlpg*/
1766 emulate_invlpg(ctxt->vcpu, memop); 1767 emulate_invlpg(ctxt->vcpu, memop);
diff --git a/include/asm-powerpc/kvm_host.h b/include/asm-powerpc/kvm_host.h
index 04ffbb8e0a35..81a69d711017 100644
--- a/include/asm-powerpc/kvm_host.h
+++ b/include/asm-powerpc/kvm_host.h
@@ -59,6 +59,7 @@ struct kvm_vcpu_stat {
59 u32 emulated_inst_exits; 59 u32 emulated_inst_exits;
60 u32 dec_exits; 60 u32 dec_exits;
61 u32 ext_intr_exits; 61 u32 ext_intr_exits;
62 u32 halt_wakeup;
62}; 63};
63 64
64struct tlbe { 65struct tlbe {
diff --git a/include/asm-powerpc/kvm_ppc.h b/include/asm-powerpc/kvm_ppc.h
index 7ac820308a7e..b35a7e3ef978 100644
--- a/include/asm-powerpc/kvm_ppc.h
+++ b/include/asm-powerpc/kvm_ppc.h
@@ -77,12 +77,17 @@ static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception)
77 clear_bit(priority, &vcpu->arch.pending_exceptions); 77 clear_bit(priority, &vcpu->arch.pending_exceptions);
78} 78}
79 79
80/* Helper function for "full" MSR writes. No need to call this if only EE is
81 * changing. */
80static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) 82static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
81{ 83{
82 if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR)) 84 if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
83 kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); 85 kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
84 86
85 vcpu->arch.msr = new_msr; 87 vcpu->arch.msr = new_msr;
88
89 if (vcpu->arch.msr & MSR_WE)
90 kvm_vcpu_block(vcpu);
86} 91}
87 92
88#endif /* __POWERPC_KVM_PPC_H__ */ 93#endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 9d963cd6533c..1d8cd01fa514 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -314,6 +314,9 @@ struct kvm_arch{
314 struct page *apic_access_page; 314 struct page *apic_access_page;
315 315
316 gpa_t wall_clock; 316 gpa_t wall_clock;
317
318 struct page *ept_identity_pagetable;
319 bool ept_identity_pagetable_done;
317}; 320};
318 321
319struct kvm_vm_stat { 322struct kvm_vm_stat {
@@ -422,6 +425,7 @@ struct kvm_x86_ops {
422 struct kvm_run *run); 425 struct kvm_run *run);
423 426
424 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); 427 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
428 int (*get_tdp_level)(void);
425}; 429};
426 430
427extern struct kvm_x86_ops *kvm_x86_ops; 431extern struct kvm_x86_ops *kvm_x86_ops;
@@ -433,6 +437,9 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
433int kvm_mmu_create(struct kvm_vcpu *vcpu); 437int kvm_mmu_create(struct kvm_vcpu *vcpu);
434int kvm_mmu_setup(struct kvm_vcpu *vcpu); 438int kvm_mmu_setup(struct kvm_vcpu *vcpu);
435void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); 439void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
440void kvm_mmu_set_base_ptes(u64 base_pte);
441void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
442 u64 dirty_mask, u64 nx_mask, u64 x_mask);
436 443
437int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); 444int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
438void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); 445void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
@@ -620,7 +627,7 @@ static inline void fx_restore(struct i387_fxsave_struct *image)
620 asm("fxrstor (%0)":: "r" (image)); 627 asm("fxrstor (%0)":: "r" (image));
621} 628}
622 629
623static inline void fpu_init(void) 630static inline void fx_finit(void)
624{ 631{
625 asm("finit"); 632 asm("finit");
626} 633}
@@ -644,6 +651,7 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
644#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" 651#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4"
645#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" 652#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4"
646#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" 653#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30"
654#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
647#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" 655#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
648 656
649#define MSR_IA32_TIME_STAMP_COUNTER 0x010 657#define MSR_IA32_TIME_STAMP_COUNTER 0x010
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e89338e2b043..f7ba099049ea 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -522,6 +522,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
522 return bad_hva(); 522 return bad_hva();
523 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); 523 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
524} 524}
525EXPORT_SYMBOL_GPL(gfn_to_hva);
525 526
526/* 527/*
527 * Requires current->mm->mmap_sem to be held 528 * Requires current->mm->mmap_sem to be held