diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/powerpc/kvm/booke_guest.c | 6 | ||||
-rw-r--r-- | arch/powerpc/kvm/powerpc.c | 20 | ||||
-rw-r--r-- | arch/x86/kernel/kvmclock.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/i8254.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 89 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.h | 37 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 10 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 375 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.h | 38 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 22 | ||||
-rw-r--r-- | arch/x86/kvm/x86_emulate.c | 1 |
11 files changed, 526 insertions, 78 deletions
diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke_guest.c index 6d9884a6884a..712d89a28c46 100644 --- a/arch/powerpc/kvm/booke_guest.c +++ b/arch/powerpc/kvm/booke_guest.c | |||
@@ -49,6 +49,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
49 | { "inst_emu", VCPU_STAT(emulated_inst_exits) }, | 49 | { "inst_emu", VCPU_STAT(emulated_inst_exits) }, |
50 | { "dec", VCPU_STAT(dec_exits) }, | 50 | { "dec", VCPU_STAT(dec_exits) }, |
51 | { "ext_intr", VCPU_STAT(ext_intr_exits) }, | 51 | { "ext_intr", VCPU_STAT(ext_intr_exits) }, |
52 | { "halt_wakeup", VCPU_STAT(halt_wakeup) }, | ||
52 | { NULL } | 53 | { NULL } |
53 | }; | 54 | }; |
54 | 55 | ||
@@ -338,6 +339,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
338 | } | 339 | } |
339 | break; | 340 | break; |
340 | 341 | ||
342 | case BOOKE_INTERRUPT_FP_UNAVAIL: | ||
343 | kvmppc_queue_exception(vcpu, exit_nr); | ||
344 | r = RESUME_GUEST; | ||
345 | break; | ||
346 | |||
341 | case BOOKE_INTERRUPT_DATA_STORAGE: | 347 | case BOOKE_INTERRUPT_DATA_STORAGE: |
342 | vcpu->arch.dear = vcpu->arch.fault_dear; | 348 | vcpu->arch.dear = vcpu->arch.fault_dear; |
343 | vcpu->arch.esr = vcpu->arch.fault_esr; | 349 | vcpu->arch.esr = vcpu->arch.fault_esr; |
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index bad40bd2d3ac..777e0f34e0ea 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -36,13 +36,12 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | |||
36 | 36 | ||
37 | int kvm_cpu_has_interrupt(struct kvm_vcpu *v) | 37 | int kvm_cpu_has_interrupt(struct kvm_vcpu *v) |
38 | { | 38 | { |
39 | /* XXX implement me */ | 39 | return !!(v->arch.pending_exceptions); |
40 | return 0; | ||
41 | } | 40 | } |
42 | 41 | ||
43 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) | 42 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) |
44 | { | 43 | { |
45 | return 1; | 44 | return !(v->arch.msr & MSR_WE); |
46 | } | 45 | } |
47 | 46 | ||
48 | 47 | ||
@@ -214,6 +213,11 @@ static void kvmppc_decrementer_func(unsigned long data) | |||
214 | struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; | 213 | struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; |
215 | 214 | ||
216 | kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER); | 215 | kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER); |
216 | |||
217 | if (waitqueue_active(&vcpu->wq)) { | ||
218 | wake_up_interruptible(&vcpu->wq); | ||
219 | vcpu->stat.halt_wakeup++; | ||
220 | } | ||
217 | } | 221 | } |
218 | 222 | ||
219 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | 223 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) |
@@ -339,6 +343,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
339 | int r; | 343 | int r; |
340 | sigset_t sigsaved; | 344 | sigset_t sigsaved; |
341 | 345 | ||
346 | vcpu_load(vcpu); | ||
347 | |||
342 | if (vcpu->sigset_active) | 348 | if (vcpu->sigset_active) |
343 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | 349 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); |
344 | 350 | ||
@@ -363,12 +369,20 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
363 | if (vcpu->sigset_active) | 369 | if (vcpu->sigset_active) |
364 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 370 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
365 | 371 | ||
372 | vcpu_put(vcpu); | ||
373 | |||
366 | return r; | 374 | return r; |
367 | } | 375 | } |
368 | 376 | ||
369 | int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) | 377 | int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) |
370 | { | 378 | { |
371 | kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL); | 379 | kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL); |
380 | |||
381 | if (waitqueue_active(&vcpu->wq)) { | ||
382 | wake_up_interruptible(&vcpu->wq); | ||
383 | vcpu->stat.halt_wakeup++; | ||
384 | } | ||
385 | |||
372 | return 0; | 386 | return 0; |
373 | } | 387 | } |
374 | 388 | ||
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index ddee04043aeb..4bc1be5d5472 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -133,6 +133,7 @@ static int kvm_register_clock(void) | |||
133 | return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); | 133 | return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); |
134 | } | 134 | } |
135 | 135 | ||
136 | #ifdef CONFIG_X86_LOCAL_APIC | ||
136 | static void kvm_setup_secondary_clock(void) | 137 | static void kvm_setup_secondary_clock(void) |
137 | { | 138 | { |
138 | /* | 139 | /* |
@@ -143,6 +144,7 @@ static void kvm_setup_secondary_clock(void) | |||
143 | /* ok, done with our trickery, call native */ | 144 | /* ok, done with our trickery, call native */ |
144 | setup_secondary_APIC_clock(); | 145 | setup_secondary_APIC_clock(); |
145 | } | 146 | } |
147 | #endif | ||
146 | 148 | ||
147 | /* | 149 | /* |
148 | * After the clock is registered, the host will keep writing to the | 150 | * After the clock is registered, the host will keep writing to the |
@@ -177,7 +179,9 @@ void __init kvmclock_init(void) | |||
177 | pv_time_ops.get_wallclock = kvm_get_wallclock; | 179 | pv_time_ops.get_wallclock = kvm_get_wallclock; |
178 | pv_time_ops.set_wallclock = kvm_set_wallclock; | 180 | pv_time_ops.set_wallclock = kvm_set_wallclock; |
179 | pv_time_ops.sched_clock = kvm_clock_read; | 181 | pv_time_ops.sched_clock = kvm_clock_read; |
182 | #ifdef CONFIG_X86_LOCAL_APIC | ||
180 | pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; | 183 | pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; |
184 | #endif | ||
181 | machine_ops.shutdown = kvm_shutdown; | 185 | machine_ops.shutdown = kvm_shutdown; |
182 | #ifdef CONFIG_KEXEC | 186 | #ifdef CONFIG_KEXEC |
183 | machine_ops.crash_shutdown = kvm_crash_shutdown; | 187 | machine_ops.crash_shutdown = kvm_crash_shutdown; |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 4c943eabacc3..3324d90038e4 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -288,6 +288,8 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) | |||
288 | * mode 1 is one shot, mode 2 is period, otherwise del timer */ | 288 | * mode 1 is one shot, mode 2 is period, otherwise del timer */ |
289 | switch (ps->channels[0].mode) { | 289 | switch (ps->channels[0].mode) { |
290 | case 1: | 290 | case 1: |
291 | /* FIXME: enhance mode 4 precision */ | ||
292 | case 4: | ||
291 | create_pit_timer(&ps->pit_timer, val, 0); | 293 | create_pit_timer(&ps->pit_timer, val, 0); |
292 | break; | 294 | break; |
293 | case 2: | 295 | case 2: |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 2ad6f5481671..36c5406b1813 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -79,36 +79,6 @@ static int dbg = 1; | |||
79 | } | 79 | } |
80 | #endif | 80 | #endif |
81 | 81 | ||
82 | #define PT64_PT_BITS 9 | ||
83 | #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) | ||
84 | #define PT32_PT_BITS 10 | ||
85 | #define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS) | ||
86 | |||
87 | #define PT_WRITABLE_SHIFT 1 | ||
88 | |||
89 | #define PT_PRESENT_MASK (1ULL << 0) | ||
90 | #define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT) | ||
91 | #define PT_USER_MASK (1ULL << 2) | ||
92 | #define PT_PWT_MASK (1ULL << 3) | ||
93 | #define PT_PCD_MASK (1ULL << 4) | ||
94 | #define PT_ACCESSED_MASK (1ULL << 5) | ||
95 | #define PT_DIRTY_MASK (1ULL << 6) | ||
96 | #define PT_PAGE_SIZE_MASK (1ULL << 7) | ||
97 | #define PT_PAT_MASK (1ULL << 7) | ||
98 | #define PT_GLOBAL_MASK (1ULL << 8) | ||
99 | #define PT64_NX_SHIFT 63 | ||
100 | #define PT64_NX_MASK (1ULL << PT64_NX_SHIFT) | ||
101 | |||
102 | #define PT_PAT_SHIFT 7 | ||
103 | #define PT_DIR_PAT_SHIFT 12 | ||
104 | #define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT) | ||
105 | |||
106 | #define PT32_DIR_PSE36_SIZE 4 | ||
107 | #define PT32_DIR_PSE36_SHIFT 13 | ||
108 | #define PT32_DIR_PSE36_MASK \ | ||
109 | (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT) | ||
110 | |||
111 | |||
112 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 | 82 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 |
113 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 | 83 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 |
114 | 84 | ||
@@ -154,10 +124,6 @@ static int dbg = 1; | |||
154 | #define PFERR_USER_MASK (1U << 2) | 124 | #define PFERR_USER_MASK (1U << 2) |
155 | #define PFERR_FETCH_MASK (1U << 4) | 125 | #define PFERR_FETCH_MASK (1U << 4) |
156 | 126 | ||
157 | #define PT64_ROOT_LEVEL 4 | ||
158 | #define PT32_ROOT_LEVEL 2 | ||
159 | #define PT32E_ROOT_LEVEL 3 | ||
160 | |||
161 | #define PT_DIRECTORY_LEVEL 2 | 127 | #define PT_DIRECTORY_LEVEL 2 |
162 | #define PT_PAGE_TABLE_LEVEL 1 | 128 | #define PT_PAGE_TABLE_LEVEL 1 |
163 | 129 | ||
@@ -186,6 +152,12 @@ static struct kmem_cache *mmu_page_header_cache; | |||
186 | 152 | ||
187 | static u64 __read_mostly shadow_trap_nonpresent_pte; | 153 | static u64 __read_mostly shadow_trap_nonpresent_pte; |
188 | static u64 __read_mostly shadow_notrap_nonpresent_pte; | 154 | static u64 __read_mostly shadow_notrap_nonpresent_pte; |
155 | static u64 __read_mostly shadow_base_present_pte; | ||
156 | static u64 __read_mostly shadow_nx_mask; | ||
157 | static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ | ||
158 | static u64 __read_mostly shadow_user_mask; | ||
159 | static u64 __read_mostly shadow_accessed_mask; | ||
160 | static u64 __read_mostly shadow_dirty_mask; | ||
189 | 161 | ||
190 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) | 162 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) |
191 | { | 163 | { |
@@ -194,6 +166,23 @@ void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) | |||
194 | } | 166 | } |
195 | EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes); | 167 | EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes); |
196 | 168 | ||
169 | void kvm_mmu_set_base_ptes(u64 base_pte) | ||
170 | { | ||
171 | shadow_base_present_pte = base_pte; | ||
172 | } | ||
173 | EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); | ||
174 | |||
175 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | ||
176 | u64 dirty_mask, u64 nx_mask, u64 x_mask) | ||
177 | { | ||
178 | shadow_user_mask = user_mask; | ||
179 | shadow_accessed_mask = accessed_mask; | ||
180 | shadow_dirty_mask = dirty_mask; | ||
181 | shadow_nx_mask = nx_mask; | ||
182 | shadow_x_mask = x_mask; | ||
183 | } | ||
184 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); | ||
185 | |||
197 | static int is_write_protection(struct kvm_vcpu *vcpu) | 186 | static int is_write_protection(struct kvm_vcpu *vcpu) |
198 | { | 187 | { |
199 | return vcpu->arch.cr0 & X86_CR0_WP; | 188 | return vcpu->arch.cr0 & X86_CR0_WP; |
@@ -232,7 +221,7 @@ static int is_writeble_pte(unsigned long pte) | |||
232 | 221 | ||
233 | static int is_dirty_pte(unsigned long pte) | 222 | static int is_dirty_pte(unsigned long pte) |
234 | { | 223 | { |
235 | return pte & PT_DIRTY_MASK; | 224 | return pte & shadow_dirty_mask; |
236 | } | 225 | } |
237 | 226 | ||
238 | static int is_rmap_pte(u64 pte) | 227 | static int is_rmap_pte(u64 pte) |
@@ -387,7 +376,6 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn) | |||
387 | 376 | ||
388 | write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); | 377 | write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); |
389 | *write_count += 1; | 378 | *write_count += 1; |
390 | WARN_ON(*write_count > KVM_PAGES_PER_HPAGE); | ||
391 | } | 379 | } |
392 | 380 | ||
393 | static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) | 381 | static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) |
@@ -547,7 +535,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
547 | return; | 535 | return; |
548 | sp = page_header(__pa(spte)); | 536 | sp = page_header(__pa(spte)); |
549 | pfn = spte_to_pfn(*spte); | 537 | pfn = spte_to_pfn(*spte); |
550 | if (*spte & PT_ACCESSED_MASK) | 538 | if (*spte & shadow_accessed_mask) |
551 | kvm_set_pfn_accessed(pfn); | 539 | kvm_set_pfn_accessed(pfn); |
552 | if (is_writeble_pte(*spte)) | 540 | if (is_writeble_pte(*spte)) |
553 | kvm_release_pfn_dirty(pfn); | 541 | kvm_release_pfn_dirty(pfn); |
@@ -1073,17 +1061,17 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1073 | * whether the guest actually used the pte (in order to detect | 1061 | * whether the guest actually used the pte (in order to detect |
1074 | * demand paging). | 1062 | * demand paging). |
1075 | */ | 1063 | */ |
1076 | spte = PT_PRESENT_MASK | PT_DIRTY_MASK; | 1064 | spte = shadow_base_present_pte | shadow_dirty_mask; |
1077 | if (!speculative) | 1065 | if (!speculative) |
1078 | pte_access |= PT_ACCESSED_MASK; | 1066 | pte_access |= PT_ACCESSED_MASK; |
1079 | if (!dirty) | 1067 | if (!dirty) |
1080 | pte_access &= ~ACC_WRITE_MASK; | 1068 | pte_access &= ~ACC_WRITE_MASK; |
1081 | if (!(pte_access & ACC_EXEC_MASK)) | 1069 | if (pte_access & ACC_EXEC_MASK) |
1082 | spte |= PT64_NX_MASK; | 1070 | spte |= shadow_x_mask; |
1083 | 1071 | else | |
1084 | spte |= PT_PRESENT_MASK; | 1072 | spte |= shadow_nx_mask; |
1085 | if (pte_access & ACC_USER_MASK) | 1073 | if (pte_access & ACC_USER_MASK) |
1086 | spte |= PT_USER_MASK; | 1074 | spte |= shadow_user_mask; |
1087 | if (largepage) | 1075 | if (largepage) |
1088 | spte |= PT_PAGE_SIZE_MASK; | 1076 | spte |= PT_PAGE_SIZE_MASK; |
1089 | 1077 | ||
@@ -1188,8 +1176,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
1188 | return -ENOMEM; | 1176 | return -ENOMEM; |
1189 | } | 1177 | } |
1190 | 1178 | ||
1191 | table[index] = __pa(new_table->spt) | PT_PRESENT_MASK | 1179 | table[index] = __pa(new_table->spt) |
1192 | | PT_WRITABLE_MASK | PT_USER_MASK; | 1180 | | PT_PRESENT_MASK | PT_WRITABLE_MASK |
1181 | | shadow_user_mask | shadow_x_mask; | ||
1193 | } | 1182 | } |
1194 | table_addr = table[index] & PT64_BASE_ADDR_MASK; | 1183 | table_addr = table[index] & PT64_BASE_ADDR_MASK; |
1195 | } | 1184 | } |
@@ -1244,7 +1233,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
1244 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 1233 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
1245 | return; | 1234 | return; |
1246 | spin_lock(&vcpu->kvm->mmu_lock); | 1235 | spin_lock(&vcpu->kvm->mmu_lock); |
1247 | #ifdef CONFIG_X86_64 | ||
1248 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | 1236 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { |
1249 | hpa_t root = vcpu->arch.mmu.root_hpa; | 1237 | hpa_t root = vcpu->arch.mmu.root_hpa; |
1250 | 1238 | ||
@@ -1256,7 +1244,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
1256 | spin_unlock(&vcpu->kvm->mmu_lock); | 1244 | spin_unlock(&vcpu->kvm->mmu_lock); |
1257 | return; | 1245 | return; |
1258 | } | 1246 | } |
1259 | #endif | ||
1260 | for (i = 0; i < 4; ++i) { | 1247 | for (i = 0; i < 4; ++i) { |
1261 | hpa_t root = vcpu->arch.mmu.pae_root[i]; | 1248 | hpa_t root = vcpu->arch.mmu.pae_root[i]; |
1262 | 1249 | ||
@@ -1282,7 +1269,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
1282 | 1269 | ||
1283 | root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; | 1270 | root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; |
1284 | 1271 | ||
1285 | #ifdef CONFIG_X86_64 | ||
1286 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | 1272 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { |
1287 | hpa_t root = vcpu->arch.mmu.root_hpa; | 1273 | hpa_t root = vcpu->arch.mmu.root_hpa; |
1288 | 1274 | ||
@@ -1297,7 +1283,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
1297 | vcpu->arch.mmu.root_hpa = root; | 1283 | vcpu->arch.mmu.root_hpa = root; |
1298 | return; | 1284 | return; |
1299 | } | 1285 | } |
1300 | #endif | ||
1301 | metaphysical = !is_paging(vcpu); | 1286 | metaphysical = !is_paging(vcpu); |
1302 | if (tdp_enabled) | 1287 | if (tdp_enabled) |
1303 | metaphysical = 1; | 1288 | metaphysical = 1; |
@@ -1377,7 +1362,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
1377 | spin_lock(&vcpu->kvm->mmu_lock); | 1362 | spin_lock(&vcpu->kvm->mmu_lock); |
1378 | kvm_mmu_free_some_pages(vcpu); | 1363 | kvm_mmu_free_some_pages(vcpu); |
1379 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, | 1364 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, |
1380 | largepage, gfn, pfn, TDP_ROOT_LEVEL); | 1365 | largepage, gfn, pfn, kvm_x86_ops->get_tdp_level()); |
1381 | spin_unlock(&vcpu->kvm->mmu_lock); | 1366 | spin_unlock(&vcpu->kvm->mmu_lock); |
1382 | 1367 | ||
1383 | return r; | 1368 | return r; |
@@ -1484,7 +1469,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
1484 | context->page_fault = tdp_page_fault; | 1469 | context->page_fault = tdp_page_fault; |
1485 | context->free = nonpaging_free; | 1470 | context->free = nonpaging_free; |
1486 | context->prefetch_page = nonpaging_prefetch_page; | 1471 | context->prefetch_page = nonpaging_prefetch_page; |
1487 | context->shadow_root_level = TDP_ROOT_LEVEL; | 1472 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); |
1488 | context->root_hpa = INVALID_PAGE; | 1473 | context->root_hpa = INVALID_PAGE; |
1489 | 1474 | ||
1490 | if (!is_paging(vcpu)) { | 1475 | if (!is_paging(vcpu)) { |
@@ -1633,7 +1618,7 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) | |||
1633 | { | 1618 | { |
1634 | u64 *spte = vcpu->arch.last_pte_updated; | 1619 | u64 *spte = vcpu->arch.last_pte_updated; |
1635 | 1620 | ||
1636 | return !!(spte && (*spte & PT_ACCESSED_MASK)); | 1621 | return !!(spte && (*spte & shadow_accessed_mask)); |
1637 | } | 1622 | } |
1638 | 1623 | ||
1639 | static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 1624 | static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index e64e9f56a65e..1730757bbc7a 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -3,11 +3,38 @@ | |||
3 | 3 | ||
4 | #include <linux/kvm_host.h> | 4 | #include <linux/kvm_host.h> |
5 | 5 | ||
6 | #ifdef CONFIG_X86_64 | 6 | #define PT64_PT_BITS 9 |
7 | #define TDP_ROOT_LEVEL PT64_ROOT_LEVEL | 7 | #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) |
8 | #else | 8 | #define PT32_PT_BITS 10 |
9 | #define TDP_ROOT_LEVEL PT32E_ROOT_LEVEL | 9 | #define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS) |
10 | #endif | 10 | |
11 | #define PT_WRITABLE_SHIFT 1 | ||
12 | |||
13 | #define PT_PRESENT_MASK (1ULL << 0) | ||
14 | #define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT) | ||
15 | #define PT_USER_MASK (1ULL << 2) | ||
16 | #define PT_PWT_MASK (1ULL << 3) | ||
17 | #define PT_PCD_MASK (1ULL << 4) | ||
18 | #define PT_ACCESSED_MASK (1ULL << 5) | ||
19 | #define PT_DIRTY_MASK (1ULL << 6) | ||
20 | #define PT_PAGE_SIZE_MASK (1ULL << 7) | ||
21 | #define PT_PAT_MASK (1ULL << 7) | ||
22 | #define PT_GLOBAL_MASK (1ULL << 8) | ||
23 | #define PT64_NX_SHIFT 63 | ||
24 | #define PT64_NX_MASK (1ULL << PT64_NX_SHIFT) | ||
25 | |||
26 | #define PT_PAT_SHIFT 7 | ||
27 | #define PT_DIR_PAT_SHIFT 12 | ||
28 | #define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT) | ||
29 | |||
30 | #define PT32_DIR_PSE36_SIZE 4 | ||
31 | #define PT32_DIR_PSE36_SHIFT 13 | ||
32 | #define PT32_DIR_PSE36_MASK \ | ||
33 | (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT) | ||
34 | |||
35 | #define PT64_ROOT_LEVEL 4 | ||
36 | #define PT32_ROOT_LEVEL 2 | ||
37 | #define PT32E_ROOT_LEVEL 3 | ||
11 | 38 | ||
12 | static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | 39 | static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) |
13 | { | 40 | { |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 89e0be2c10d0..ab22615eee89 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -1863,6 +1863,15 @@ static bool svm_cpu_has_accelerated_tpr(void) | |||
1863 | return false; | 1863 | return false; |
1864 | } | 1864 | } |
1865 | 1865 | ||
1866 | static int get_npt_level(void) | ||
1867 | { | ||
1868 | #ifdef CONFIG_X86_64 | ||
1869 | return PT64_ROOT_LEVEL; | ||
1870 | #else | ||
1871 | return PT32E_ROOT_LEVEL; | ||
1872 | #endif | ||
1873 | } | ||
1874 | |||
1866 | static struct kvm_x86_ops svm_x86_ops = { | 1875 | static struct kvm_x86_ops svm_x86_ops = { |
1867 | .cpu_has_kvm_support = has_svm, | 1876 | .cpu_has_kvm_support = has_svm, |
1868 | .disabled_by_bios = is_disabled, | 1877 | .disabled_by_bios = is_disabled, |
@@ -1920,6 +1929,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
1920 | .inject_pending_vectors = do_interrupt_requests, | 1929 | .inject_pending_vectors = do_interrupt_requests, |
1921 | 1930 | ||
1922 | .set_tss_addr = svm_set_tss_addr, | 1931 | .set_tss_addr = svm_set_tss_addr, |
1932 | .get_tdp_level = get_npt_level, | ||
1923 | }; | 1933 | }; |
1924 | 1934 | ||
1925 | static int __init svm_init(void) | 1935 | static int __init svm_init(void) |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8e5d6645b90d..bfe4db11989c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -42,6 +42,9 @@ module_param(enable_vpid, bool, 0); | |||
42 | static int flexpriority_enabled = 1; | 42 | static int flexpriority_enabled = 1; |
43 | module_param(flexpriority_enabled, bool, 0); | 43 | module_param(flexpriority_enabled, bool, 0); |
44 | 44 | ||
45 | static int enable_ept = 1; | ||
46 | module_param(enable_ept, bool, 0); | ||
47 | |||
45 | struct vmcs { | 48 | struct vmcs { |
46 | u32 revision_id; | 49 | u32 revision_id; |
47 | u32 abort; | 50 | u32 abort; |
@@ -84,7 +87,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | |||
84 | return container_of(vcpu, struct vcpu_vmx, vcpu); | 87 | return container_of(vcpu, struct vcpu_vmx, vcpu); |
85 | } | 88 | } |
86 | 89 | ||
87 | static int init_rmode_tss(struct kvm *kvm); | 90 | static int init_rmode(struct kvm *kvm); |
88 | 91 | ||
89 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 92 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
90 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 93 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
@@ -107,6 +110,11 @@ static struct vmcs_config { | |||
107 | u32 vmentry_ctrl; | 110 | u32 vmentry_ctrl; |
108 | } vmcs_config; | 111 | } vmcs_config; |
109 | 112 | ||
113 | struct vmx_capability { | ||
114 | u32 ept; | ||
115 | u32 vpid; | ||
116 | } vmx_capability; | ||
117 | |||
110 | #define VMX_SEGMENT_FIELD(seg) \ | 118 | #define VMX_SEGMENT_FIELD(seg) \ |
111 | [VCPU_SREG_##seg] = { \ | 119 | [VCPU_SREG_##seg] = { \ |
112 | .selector = GUEST_##seg##_SELECTOR, \ | 120 | .selector = GUEST_##seg##_SELECTOR, \ |
@@ -214,6 +222,32 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void) | |||
214 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); | 222 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); |
215 | } | 223 | } |
216 | 224 | ||
225 | static inline int cpu_has_vmx_invept_individual_addr(void) | ||
226 | { | ||
227 | return (!!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT)); | ||
228 | } | ||
229 | |||
230 | static inline int cpu_has_vmx_invept_context(void) | ||
231 | { | ||
232 | return (!!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT)); | ||
233 | } | ||
234 | |||
235 | static inline int cpu_has_vmx_invept_global(void) | ||
236 | { | ||
237 | return (!!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT)); | ||
238 | } | ||
239 | |||
240 | static inline int cpu_has_vmx_ept(void) | ||
241 | { | ||
242 | return (vmcs_config.cpu_based_2nd_exec_ctrl & | ||
243 | SECONDARY_EXEC_ENABLE_EPT); | ||
244 | } | ||
245 | |||
246 | static inline int vm_need_ept(void) | ||
247 | { | ||
248 | return (cpu_has_vmx_ept() && enable_ept); | ||
249 | } | ||
250 | |||
217 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) | 251 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) |
218 | { | 252 | { |
219 | return ((cpu_has_vmx_virtualize_apic_accesses()) && | 253 | return ((cpu_has_vmx_virtualize_apic_accesses()) && |
@@ -250,6 +284,18 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva) | |||
250 | : : "a"(&operand), "c"(ext) : "cc", "memory"); | 284 | : : "a"(&operand), "c"(ext) : "cc", "memory"); |
251 | } | 285 | } |
252 | 286 | ||
287 | static inline void __invept(int ext, u64 eptp, gpa_t gpa) | ||
288 | { | ||
289 | struct { | ||
290 | u64 eptp, gpa; | ||
291 | } operand = {eptp, gpa}; | ||
292 | |||
293 | asm volatile (ASM_VMX_INVEPT | ||
294 | /* CF==1 or ZF==1 --> rc = -1 */ | ||
295 | "; ja 1f ; ud2 ; 1:\n" | ||
296 | : : "a" (&operand), "c" (ext) : "cc", "memory"); | ||
297 | } | ||
298 | |||
253 | static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) | 299 | static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) |
254 | { | 300 | { |
255 | int i; | 301 | int i; |
@@ -301,6 +347,33 @@ static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx) | |||
301 | __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); | 347 | __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); |
302 | } | 348 | } |
303 | 349 | ||
350 | static inline void ept_sync_global(void) | ||
351 | { | ||
352 | if (cpu_has_vmx_invept_global()) | ||
353 | __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0); | ||
354 | } | ||
355 | |||
356 | static inline void ept_sync_context(u64 eptp) | ||
357 | { | ||
358 | if (vm_need_ept()) { | ||
359 | if (cpu_has_vmx_invept_context()) | ||
360 | __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0); | ||
361 | else | ||
362 | ept_sync_global(); | ||
363 | } | ||
364 | } | ||
365 | |||
366 | static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa) | ||
367 | { | ||
368 | if (vm_need_ept()) { | ||
369 | if (cpu_has_vmx_invept_individual_addr()) | ||
370 | __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR, | ||
371 | eptp, gpa); | ||
372 | else | ||
373 | ept_sync_context(eptp); | ||
374 | } | ||
375 | } | ||
376 | |||
304 | static unsigned long vmcs_readl(unsigned long field) | 377 | static unsigned long vmcs_readl(unsigned long field) |
305 | { | 378 | { |
306 | unsigned long value; | 379 | unsigned long value; |
@@ -388,6 +461,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
388 | eb |= 1u << 1; | 461 | eb |= 1u << 1; |
389 | if (vcpu->arch.rmode.active) | 462 | if (vcpu->arch.rmode.active) |
390 | eb = ~0; | 463 | eb = ~0; |
464 | if (vm_need_ept()) | ||
465 | eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ | ||
391 | vmcs_write32(EXCEPTION_BITMAP, eb); | 466 | vmcs_write32(EXCEPTION_BITMAP, eb); |
392 | } | 467 | } |
393 | 468 | ||
@@ -985,7 +1060,7 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, | |||
985 | static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | 1060 | static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) |
986 | { | 1061 | { |
987 | u32 vmx_msr_low, vmx_msr_high; | 1062 | u32 vmx_msr_low, vmx_msr_high; |
988 | u32 min, opt; | 1063 | u32 min, opt, min2, opt2; |
989 | u32 _pin_based_exec_control = 0; | 1064 | u32 _pin_based_exec_control = 0; |
990 | u32 _cpu_based_exec_control = 0; | 1065 | u32 _cpu_based_exec_control = 0; |
991 | u32 _cpu_based_2nd_exec_control = 0; | 1066 | u32 _cpu_based_2nd_exec_control = 0; |
@@ -1003,6 +1078,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1003 | CPU_BASED_CR8_LOAD_EXITING | | 1078 | CPU_BASED_CR8_LOAD_EXITING | |
1004 | CPU_BASED_CR8_STORE_EXITING | | 1079 | CPU_BASED_CR8_STORE_EXITING | |
1005 | #endif | 1080 | #endif |
1081 | CPU_BASED_CR3_LOAD_EXITING | | ||
1082 | CPU_BASED_CR3_STORE_EXITING | | ||
1006 | CPU_BASED_USE_IO_BITMAPS | | 1083 | CPU_BASED_USE_IO_BITMAPS | |
1007 | CPU_BASED_MOV_DR_EXITING | | 1084 | CPU_BASED_MOV_DR_EXITING | |
1008 | CPU_BASED_USE_TSC_OFFSETING; | 1085 | CPU_BASED_USE_TSC_OFFSETING; |
@@ -1018,11 +1095,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1018 | ~CPU_BASED_CR8_STORE_EXITING; | 1095 | ~CPU_BASED_CR8_STORE_EXITING; |
1019 | #endif | 1096 | #endif |
1020 | if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { | 1097 | if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { |
1021 | min = 0; | 1098 | min2 = 0; |
1022 | opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 1099 | opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
1023 | SECONDARY_EXEC_WBINVD_EXITING | | 1100 | SECONDARY_EXEC_WBINVD_EXITING | |
1024 | SECONDARY_EXEC_ENABLE_VPID; | 1101 | SECONDARY_EXEC_ENABLE_VPID | |
1025 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS2, | 1102 | SECONDARY_EXEC_ENABLE_EPT; |
1103 | if (adjust_vmx_controls(min2, opt2, | ||
1104 | MSR_IA32_VMX_PROCBASED_CTLS2, | ||
1026 | &_cpu_based_2nd_exec_control) < 0) | 1105 | &_cpu_based_2nd_exec_control) < 0) |
1027 | return -EIO; | 1106 | return -EIO; |
1028 | } | 1107 | } |
@@ -1031,6 +1110,16 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1031 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) | 1110 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) |
1032 | _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; | 1111 | _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; |
1033 | #endif | 1112 | #endif |
1113 | if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { | ||
1114 | /* CR3 accesses don't need to cause VM Exits when EPT enabled */ | ||
1115 | min &= ~(CPU_BASED_CR3_LOAD_EXITING | | ||
1116 | CPU_BASED_CR3_STORE_EXITING); | ||
1117 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, | ||
1118 | &_cpu_based_exec_control) < 0) | ||
1119 | return -EIO; | ||
1120 | rdmsr(MSR_IA32_VMX_EPT_VPID_CAP, | ||
1121 | vmx_capability.ept, vmx_capability.vpid); | ||
1122 | } | ||
1034 | 1123 | ||
1035 | min = 0; | 1124 | min = 0; |
1036 | #ifdef CONFIG_X86_64 | 1125 | #ifdef CONFIG_X86_64 |
@@ -1256,7 +1345,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
1256 | fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs); | 1345 | fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs); |
1257 | 1346 | ||
1258 | kvm_mmu_reset_context(vcpu); | 1347 | kvm_mmu_reset_context(vcpu); |
1259 | init_rmode_tss(vcpu->kvm); | 1348 | init_rmode(vcpu->kvm); |
1260 | } | 1349 | } |
1261 | 1350 | ||
1262 | #ifdef CONFIG_X86_64 | 1351 | #ifdef CONFIG_X86_64 |
@@ -1304,8 +1393,64 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | |||
1304 | vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; | 1393 | vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; |
1305 | } | 1394 | } |
1306 | 1395 | ||
1396 | static void ept_load_pdptrs(struct kvm_vcpu *vcpu) | ||
1397 | { | ||
1398 | if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { | ||
1399 | if (!load_pdptrs(vcpu, vcpu->arch.cr3)) { | ||
1400 | printk(KERN_ERR "EPT: Fail to load pdptrs!\n"); | ||
1401 | return; | ||
1402 | } | ||
1403 | vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]); | ||
1404 | vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]); | ||
1405 | vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]); | ||
1406 | vmcs_write64(GUEST_PDPTR3, vcpu->arch.pdptrs[3]); | ||
1407 | } | ||
1408 | } | ||
1409 | |||
1410 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); | ||
1411 | |||
1412 | static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, | ||
1413 | unsigned long cr0, | ||
1414 | struct kvm_vcpu *vcpu) | ||
1415 | { | ||
1416 | if (!(cr0 & X86_CR0_PG)) { | ||
1417 | /* From paging/starting to nonpaging */ | ||
1418 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, | ||
1419 | vmcs_config.cpu_based_exec_ctrl | | ||
1420 | (CPU_BASED_CR3_LOAD_EXITING | | ||
1421 | CPU_BASED_CR3_STORE_EXITING)); | ||
1422 | vcpu->arch.cr0 = cr0; | ||
1423 | vmx_set_cr4(vcpu, vcpu->arch.cr4); | ||
1424 | *hw_cr0 |= X86_CR0_PE | X86_CR0_PG; | ||
1425 | *hw_cr0 &= ~X86_CR0_WP; | ||
1426 | } else if (!is_paging(vcpu)) { | ||
1427 | /* From nonpaging to paging */ | ||
1428 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, | ||
1429 | vmcs_config.cpu_based_exec_ctrl & | ||
1430 | ~(CPU_BASED_CR3_LOAD_EXITING | | ||
1431 | CPU_BASED_CR3_STORE_EXITING)); | ||
1432 | vcpu->arch.cr0 = cr0; | ||
1433 | vmx_set_cr4(vcpu, vcpu->arch.cr4); | ||
1434 | if (!(vcpu->arch.cr0 & X86_CR0_WP)) | ||
1435 | *hw_cr0 &= ~X86_CR0_WP; | ||
1436 | } | ||
1437 | } | ||
1438 | |||
1439 | static void ept_update_paging_mode_cr4(unsigned long *hw_cr4, | ||
1440 | struct kvm_vcpu *vcpu) | ||
1441 | { | ||
1442 | if (!is_paging(vcpu)) { | ||
1443 | *hw_cr4 &= ~X86_CR4_PAE; | ||
1444 | *hw_cr4 |= X86_CR4_PSE; | ||
1445 | } else if (!(vcpu->arch.cr4 & X86_CR4_PAE)) | ||
1446 | *hw_cr4 &= ~X86_CR4_PAE; | ||
1447 | } | ||
1448 | |||
1307 | static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | 1449 | static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) |
1308 | { | 1450 | { |
1451 | unsigned long hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | | ||
1452 | KVM_VM_CR0_ALWAYS_ON; | ||
1453 | |||
1309 | vmx_fpu_deactivate(vcpu); | 1454 | vmx_fpu_deactivate(vcpu); |
1310 | 1455 | ||
1311 | if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE)) | 1456 | if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE)) |
@@ -1323,29 +1468,61 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1323 | } | 1468 | } |
1324 | #endif | 1469 | #endif |
1325 | 1470 | ||
1471 | if (vm_need_ept()) | ||
1472 | ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); | ||
1473 | |||
1326 | vmcs_writel(CR0_READ_SHADOW, cr0); | 1474 | vmcs_writel(CR0_READ_SHADOW, cr0); |
1327 | vmcs_writel(GUEST_CR0, | 1475 | vmcs_writel(GUEST_CR0, hw_cr0); |
1328 | (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); | ||
1329 | vcpu->arch.cr0 = cr0; | 1476 | vcpu->arch.cr0 = cr0; |
1330 | 1477 | ||
1331 | if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE)) | 1478 | if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE)) |
1332 | vmx_fpu_activate(vcpu); | 1479 | vmx_fpu_activate(vcpu); |
1333 | } | 1480 | } |
1334 | 1481 | ||
1482 | static u64 construct_eptp(unsigned long root_hpa) | ||
1483 | { | ||
1484 | u64 eptp; | ||
1485 | |||
1486 | /* TODO write the value reading from MSR */ | ||
1487 | eptp = VMX_EPT_DEFAULT_MT | | ||
1488 | VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT; | ||
1489 | eptp |= (root_hpa & PAGE_MASK); | ||
1490 | |||
1491 | return eptp; | ||
1492 | } | ||
1493 | |||
1335 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | 1494 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) |
1336 | { | 1495 | { |
1496 | unsigned long guest_cr3; | ||
1497 | u64 eptp; | ||
1498 | |||
1499 | guest_cr3 = cr3; | ||
1500 | if (vm_need_ept()) { | ||
1501 | eptp = construct_eptp(cr3); | ||
1502 | vmcs_write64(EPT_POINTER, eptp); | ||
1503 | ept_sync_context(eptp); | ||
1504 | ept_load_pdptrs(vcpu); | ||
1505 | guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : | ||
1506 | VMX_EPT_IDENTITY_PAGETABLE_ADDR; | ||
1507 | } | ||
1508 | |||
1337 | vmx_flush_tlb(vcpu); | 1509 | vmx_flush_tlb(vcpu); |
1338 | vmcs_writel(GUEST_CR3, cr3); | 1510 | vmcs_writel(GUEST_CR3, guest_cr3); |
1339 | if (vcpu->arch.cr0 & X86_CR0_PE) | 1511 | if (vcpu->arch.cr0 & X86_CR0_PE) |
1340 | vmx_fpu_deactivate(vcpu); | 1512 | vmx_fpu_deactivate(vcpu); |
1341 | } | 1513 | } |
1342 | 1514 | ||
1343 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 1515 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
1344 | { | 1516 | { |
1345 | vmcs_writel(CR4_READ_SHADOW, cr4); | 1517 | unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.active ? |
1346 | vmcs_writel(GUEST_CR4, cr4 | (vcpu->arch.rmode.active ? | 1518 | KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); |
1347 | KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON)); | 1519 | |
1348 | vcpu->arch.cr4 = cr4; | 1520 | vcpu->arch.cr4 = cr4; |
1521 | if (vm_need_ept()) | ||
1522 | ept_update_paging_mode_cr4(&hw_cr4, vcpu); | ||
1523 | |||
1524 | vmcs_writel(CR4_READ_SHADOW, cr4); | ||
1525 | vmcs_writel(GUEST_CR4, hw_cr4); | ||
1349 | } | 1526 | } |
1350 | 1527 | ||
1351 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) | 1528 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) |
@@ -1530,6 +1707,41 @@ out: | |||
1530 | return ret; | 1707 | return ret; |
1531 | } | 1708 | } |
1532 | 1709 | ||
1710 | static int init_rmode_identity_map(struct kvm *kvm) | ||
1711 | { | ||
1712 | int i, r, ret; | ||
1713 | pfn_t identity_map_pfn; | ||
1714 | u32 tmp; | ||
1715 | |||
1716 | if (!vm_need_ept()) | ||
1717 | return 1; | ||
1718 | if (unlikely(!kvm->arch.ept_identity_pagetable)) { | ||
1719 | printk(KERN_ERR "EPT: identity-mapping pagetable " | ||
1720 | "haven't been allocated!\n"); | ||
1721 | return 0; | ||
1722 | } | ||
1723 | if (likely(kvm->arch.ept_identity_pagetable_done)) | ||
1724 | return 1; | ||
1725 | ret = 0; | ||
1726 | identity_map_pfn = VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT; | ||
1727 | r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); | ||
1728 | if (r < 0) | ||
1729 | goto out; | ||
1730 | /* Set up identity-mapping pagetable for EPT in real mode */ | ||
1731 | for (i = 0; i < PT32_ENT_PER_PAGE; i++) { | ||
1732 | tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | | ||
1733 | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE); | ||
1734 | r = kvm_write_guest_page(kvm, identity_map_pfn, | ||
1735 | &tmp, i * sizeof(tmp), sizeof(tmp)); | ||
1736 | if (r < 0) | ||
1737 | goto out; | ||
1738 | } | ||
1739 | kvm->arch.ept_identity_pagetable_done = true; | ||
1740 | ret = 1; | ||
1741 | out: | ||
1742 | return ret; | ||
1743 | } | ||
1744 | |||
1533 | static void seg_setup(int seg) | 1745 | static void seg_setup(int seg) |
1534 | { | 1746 | { |
1535 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 1747 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
@@ -1564,6 +1776,31 @@ out: | |||
1564 | return r; | 1776 | return r; |
1565 | } | 1777 | } |
1566 | 1778 | ||
1779 | static int alloc_identity_pagetable(struct kvm *kvm) | ||
1780 | { | ||
1781 | struct kvm_userspace_memory_region kvm_userspace_mem; | ||
1782 | int r = 0; | ||
1783 | |||
1784 | down_write(&kvm->slots_lock); | ||
1785 | if (kvm->arch.ept_identity_pagetable) | ||
1786 | goto out; | ||
1787 | kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; | ||
1788 | kvm_userspace_mem.flags = 0; | ||
1789 | kvm_userspace_mem.guest_phys_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; | ||
1790 | kvm_userspace_mem.memory_size = PAGE_SIZE; | ||
1791 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); | ||
1792 | if (r) | ||
1793 | goto out; | ||
1794 | |||
1795 | down_read(¤t->mm->mmap_sem); | ||
1796 | kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, | ||
1797 | VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT); | ||
1798 | up_read(¤t->mm->mmap_sem); | ||
1799 | out: | ||
1800 | up_write(&kvm->slots_lock); | ||
1801 | return r; | ||
1802 | } | ||
1803 | |||
1567 | static void allocate_vpid(struct vcpu_vmx *vmx) | 1804 | static void allocate_vpid(struct vcpu_vmx *vmx) |
1568 | { | 1805 | { |
1569 | int vpid; | 1806 | int vpid; |
@@ -1638,6 +1875,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
1638 | CPU_BASED_CR8_LOAD_EXITING; | 1875 | CPU_BASED_CR8_LOAD_EXITING; |
1639 | #endif | 1876 | #endif |
1640 | } | 1877 | } |
1878 | if (!vm_need_ept()) | ||
1879 | exec_control |= CPU_BASED_CR3_STORE_EXITING | | ||
1880 | CPU_BASED_CR3_LOAD_EXITING; | ||
1641 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); | 1881 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); |
1642 | 1882 | ||
1643 | if (cpu_has_secondary_exec_ctrls()) { | 1883 | if (cpu_has_secondary_exec_ctrls()) { |
@@ -1647,6 +1887,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
1647 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 1887 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
1648 | if (vmx->vpid == 0) | 1888 | if (vmx->vpid == 0) |
1649 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; | 1889 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; |
1890 | if (!vm_need_ept()) | ||
1891 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; | ||
1650 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | 1892 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); |
1651 | } | 1893 | } |
1652 | 1894 | ||
@@ -1722,6 +1964,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
1722 | return 0; | 1964 | return 0; |
1723 | } | 1965 | } |
1724 | 1966 | ||
1967 | static int init_rmode(struct kvm *kvm) | ||
1968 | { | ||
1969 | if (!init_rmode_tss(kvm)) | ||
1970 | return 0; | ||
1971 | if (!init_rmode_identity_map(kvm)) | ||
1972 | return 0; | ||
1973 | return 1; | ||
1974 | } | ||
1975 | |||
1725 | static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | 1976 | static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) |
1726 | { | 1977 | { |
1727 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1978 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -1729,7 +1980,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
1729 | int ret; | 1980 | int ret; |
1730 | 1981 | ||
1731 | down_read(&vcpu->kvm->slots_lock); | 1982 | down_read(&vcpu->kvm->slots_lock); |
1732 | if (!init_rmode_tss(vmx->vcpu.kvm)) { | 1983 | if (!init_rmode(vmx->vcpu.kvm)) { |
1733 | ret = -ENOMEM; | 1984 | ret = -ENOMEM; |
1734 | goto out; | 1985 | goto out; |
1735 | } | 1986 | } |
@@ -1994,6 +2245,9 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1994 | if (intr_info & INTR_INFO_DELIVER_CODE_MASK) | 2245 | if (intr_info & INTR_INFO_DELIVER_CODE_MASK) |
1995 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); | 2246 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); |
1996 | if (is_page_fault(intr_info)) { | 2247 | if (is_page_fault(intr_info)) { |
2248 | /* EPT won't cause page fault directly */ | ||
2249 | if (vm_need_ept()) | ||
2250 | BUG(); | ||
1997 | cr2 = vmcs_readl(EXIT_QUALIFICATION); | 2251 | cr2 = vmcs_readl(EXIT_QUALIFICATION); |
1998 | KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, | 2252 | KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, |
1999 | (u32)((u64)cr2 >> 32), handler); | 2253 | (u32)((u64)cr2 >> 32), handler); |
@@ -2323,6 +2577,64 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2323 | return kvm_task_switch(vcpu, tss_selector, reason); | 2577 | return kvm_task_switch(vcpu, tss_selector, reason); |
2324 | } | 2578 | } |
2325 | 2579 | ||
2580 | static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
2581 | { | ||
2582 | u64 exit_qualification; | ||
2583 | enum emulation_result er; | ||
2584 | gpa_t gpa; | ||
2585 | unsigned long hva; | ||
2586 | int gla_validity; | ||
2587 | int r; | ||
2588 | |||
2589 | exit_qualification = vmcs_read64(EXIT_QUALIFICATION); | ||
2590 | |||
2591 | if (exit_qualification & (1 << 6)) { | ||
2592 | printk(KERN_ERR "EPT: GPA exceeds GAW!\n"); | ||
2593 | return -ENOTSUPP; | ||
2594 | } | ||
2595 | |||
2596 | gla_validity = (exit_qualification >> 7) & 0x3; | ||
2597 | if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) { | ||
2598 | printk(KERN_ERR "EPT: Handling EPT violation failed!\n"); | ||
2599 | printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", | ||
2600 | (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), | ||
2601 | (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS)); | ||
2602 | printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", | ||
2603 | (long unsigned int)exit_qualification); | ||
2604 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | ||
2605 | kvm_run->hw.hardware_exit_reason = 0; | ||
2606 | return -ENOTSUPP; | ||
2607 | } | ||
2608 | |||
2609 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); | ||
2610 | hva = gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT); | ||
2611 | if (!kvm_is_error_hva(hva)) { | ||
2612 | r = kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0); | ||
2613 | if (r < 0) { | ||
2614 | printk(KERN_ERR "EPT: Not enough memory!\n"); | ||
2615 | return -ENOMEM; | ||
2616 | } | ||
2617 | return 1; | ||
2618 | } else { | ||
2619 | /* must be MMIO */ | ||
2620 | er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); | ||
2621 | |||
2622 | if (er == EMULATE_FAIL) { | ||
2623 | printk(KERN_ERR | ||
2624 | "EPT: Fail to handle EPT violation vmexit!er is %d\n", | ||
2625 | er); | ||
2626 | printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", | ||
2627 | (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), | ||
2628 | (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS)); | ||
2629 | printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", | ||
2630 | (long unsigned int)exit_qualification); | ||
2631 | return -ENOTSUPP; | ||
2632 | } else if (er == EMULATE_DO_MMIO) | ||
2633 | return 0; | ||
2634 | } | ||
2635 | return 1; | ||
2636 | } | ||
2637 | |||
2326 | /* | 2638 | /* |
2327 | * The exit handlers return 1 if the exit was handled fully and guest execution | 2639 | * The exit handlers return 1 if the exit was handled fully and guest execution |
2328 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 2640 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
@@ -2346,6 +2658,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, | |||
2346 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, | 2658 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, |
2347 | [EXIT_REASON_WBINVD] = handle_wbinvd, | 2659 | [EXIT_REASON_WBINVD] = handle_wbinvd, |
2348 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, | 2660 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, |
2661 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, | ||
2349 | }; | 2662 | }; |
2350 | 2663 | ||
2351 | static const int kvm_vmx_max_exit_handlers = | 2664 | static const int kvm_vmx_max_exit_handlers = |
@@ -2364,6 +2677,13 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
2364 | KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP), | 2677 | KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP), |
2365 | (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit); | 2678 | (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit); |
2366 | 2679 | ||
2680 | /* Access CR3 don't cause VMExit in paging mode, so we need | ||
2681 | * to sync with guest real CR3. */ | ||
2682 | if (vm_need_ept() && is_paging(vcpu)) { | ||
2683 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | ||
2684 | ept_load_pdptrs(vcpu); | ||
2685 | } | ||
2686 | |||
2367 | if (unlikely(vmx->fail)) { | 2687 | if (unlikely(vmx->fail)) { |
2368 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 2688 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
2369 | kvm_run->fail_entry.hardware_entry_failure_reason | 2689 | kvm_run->fail_entry.hardware_entry_failure_reason |
@@ -2372,7 +2692,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
2372 | } | 2692 | } |
2373 | 2693 | ||
2374 | if ((vectoring_info & VECTORING_INFO_VALID_MASK) && | 2694 | if ((vectoring_info & VECTORING_INFO_VALID_MASK) && |
2375 | exit_reason != EXIT_REASON_EXCEPTION_NMI) | 2695 | (exit_reason != EXIT_REASON_EXCEPTION_NMI && |
2696 | exit_reason != EXIT_REASON_EPT_VIOLATION)) | ||
2376 | printk(KERN_WARNING "%s: unexpected, valid vectoring info and " | 2697 | printk(KERN_WARNING "%s: unexpected, valid vectoring info and " |
2377 | "exit reason is 0x%x\n", __func__, exit_reason); | 2698 | "exit reason is 0x%x\n", __func__, exit_reason); |
2378 | if (exit_reason < kvm_vmx_max_exit_handlers | 2699 | if (exit_reason < kvm_vmx_max_exit_handlers |
@@ -2674,6 +2995,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
2674 | return ERR_PTR(-ENOMEM); | 2995 | return ERR_PTR(-ENOMEM); |
2675 | 2996 | ||
2676 | allocate_vpid(vmx); | 2997 | allocate_vpid(vmx); |
2998 | if (id == 0 && vm_need_ept()) { | ||
2999 | kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | | ||
3000 | VMX_EPT_WRITABLE_MASK | | ||
3001 | VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); | ||
3002 | kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK, | ||
3003 | VMX_EPT_FAKE_DIRTY_MASK, 0ull, | ||
3004 | VMX_EPT_EXECUTABLE_MASK); | ||
3005 | kvm_enable_tdp(); | ||
3006 | } | ||
2677 | 3007 | ||
2678 | err = kvm_vcpu_init(&vmx->vcpu, kvm, id); | 3008 | err = kvm_vcpu_init(&vmx->vcpu, kvm, id); |
2679 | if (err) | 3009 | if (err) |
@@ -2706,6 +3036,10 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
2706 | if (alloc_apic_access_page(kvm) != 0) | 3036 | if (alloc_apic_access_page(kvm) != 0) |
2707 | goto free_vmcs; | 3037 | goto free_vmcs; |
2708 | 3038 | ||
3039 | if (vm_need_ept()) | ||
3040 | if (alloc_identity_pagetable(kvm) != 0) | ||
3041 | goto free_vmcs; | ||
3042 | |||
2709 | return &vmx->vcpu; | 3043 | return &vmx->vcpu; |
2710 | 3044 | ||
2711 | free_vmcs: | 3045 | free_vmcs: |
@@ -2735,6 +3069,11 @@ static void __init vmx_check_processor_compat(void *rtn) | |||
2735 | } | 3069 | } |
2736 | } | 3070 | } |
2737 | 3071 | ||
3072 | static int get_ept_level(void) | ||
3073 | { | ||
3074 | return VMX_EPT_DEFAULT_GAW + 1; | ||
3075 | } | ||
3076 | |||
2738 | static struct kvm_x86_ops vmx_x86_ops = { | 3077 | static struct kvm_x86_ops vmx_x86_ops = { |
2739 | .cpu_has_kvm_support = cpu_has_kvm_support, | 3078 | .cpu_has_kvm_support = cpu_has_kvm_support, |
2740 | .disabled_by_bios = vmx_disabled_by_bios, | 3079 | .disabled_by_bios = vmx_disabled_by_bios, |
@@ -2791,6 +3130,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
2791 | .inject_pending_vectors = do_interrupt_requests, | 3130 | .inject_pending_vectors = do_interrupt_requests, |
2792 | 3131 | ||
2793 | .set_tss_addr = vmx_set_tss_addr, | 3132 | .set_tss_addr = vmx_set_tss_addr, |
3133 | .get_tdp_level = get_ept_level, | ||
2794 | }; | 3134 | }; |
2795 | 3135 | ||
2796 | static int __init vmx_init(void) | 3136 | static int __init vmx_init(void) |
@@ -2843,9 +3183,14 @@ static int __init vmx_init(void) | |||
2843 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); | 3183 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); |
2844 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); | 3184 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); |
2845 | 3185 | ||
3186 | if (cpu_has_vmx_ept()) | ||
3187 | bypass_guest_pf = 0; | ||
3188 | |||
2846 | if (bypass_guest_pf) | 3189 | if (bypass_guest_pf) |
2847 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); | 3190 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); |
2848 | 3191 | ||
3192 | ept_sync_global(); | ||
3193 | |||
2849 | return 0; | 3194 | return 0; |
2850 | 3195 | ||
2851 | out2: | 3196 | out2: |
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index 5dff4606b988..79d94c610dfe 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h | |||
@@ -35,6 +35,8 @@ | |||
35 | #define CPU_BASED_MWAIT_EXITING 0x00000400 | 35 | #define CPU_BASED_MWAIT_EXITING 0x00000400 |
36 | #define CPU_BASED_RDPMC_EXITING 0x00000800 | 36 | #define CPU_BASED_RDPMC_EXITING 0x00000800 |
37 | #define CPU_BASED_RDTSC_EXITING 0x00001000 | 37 | #define CPU_BASED_RDTSC_EXITING 0x00001000 |
38 | #define CPU_BASED_CR3_LOAD_EXITING 0x00008000 | ||
39 | #define CPU_BASED_CR3_STORE_EXITING 0x00010000 | ||
38 | #define CPU_BASED_CR8_LOAD_EXITING 0x00080000 | 40 | #define CPU_BASED_CR8_LOAD_EXITING 0x00080000 |
39 | #define CPU_BASED_CR8_STORE_EXITING 0x00100000 | 41 | #define CPU_BASED_CR8_STORE_EXITING 0x00100000 |
40 | #define CPU_BASED_TPR_SHADOW 0x00200000 | 42 | #define CPU_BASED_TPR_SHADOW 0x00200000 |
@@ -49,6 +51,7 @@ | |||
49 | * Definitions of Secondary Processor-Based VM-Execution Controls. | 51 | * Definitions of Secondary Processor-Based VM-Execution Controls. |
50 | */ | 52 | */ |
51 | #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 | 53 | #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 |
54 | #define SECONDARY_EXEC_ENABLE_EPT 0x00000002 | ||
52 | #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 | 55 | #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 |
53 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 | 56 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 |
54 | 57 | ||
@@ -100,10 +103,22 @@ enum vmcs_field { | |||
100 | VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, | 103 | VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, |
101 | APIC_ACCESS_ADDR = 0x00002014, | 104 | APIC_ACCESS_ADDR = 0x00002014, |
102 | APIC_ACCESS_ADDR_HIGH = 0x00002015, | 105 | APIC_ACCESS_ADDR_HIGH = 0x00002015, |
106 | EPT_POINTER = 0x0000201a, | ||
107 | EPT_POINTER_HIGH = 0x0000201b, | ||
108 | GUEST_PHYSICAL_ADDRESS = 0x00002400, | ||
109 | GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, | ||
103 | VMCS_LINK_POINTER = 0x00002800, | 110 | VMCS_LINK_POINTER = 0x00002800, |
104 | VMCS_LINK_POINTER_HIGH = 0x00002801, | 111 | VMCS_LINK_POINTER_HIGH = 0x00002801, |
105 | GUEST_IA32_DEBUGCTL = 0x00002802, | 112 | GUEST_IA32_DEBUGCTL = 0x00002802, |
106 | GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, | 113 | GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, |
114 | GUEST_PDPTR0 = 0x0000280a, | ||
115 | GUEST_PDPTR0_HIGH = 0x0000280b, | ||
116 | GUEST_PDPTR1 = 0x0000280c, | ||
117 | GUEST_PDPTR1_HIGH = 0x0000280d, | ||
118 | GUEST_PDPTR2 = 0x0000280e, | ||
119 | GUEST_PDPTR2_HIGH = 0x0000280f, | ||
120 | GUEST_PDPTR3 = 0x00002810, | ||
121 | GUEST_PDPTR3_HIGH = 0x00002811, | ||
107 | PIN_BASED_VM_EXEC_CONTROL = 0x00004000, | 122 | PIN_BASED_VM_EXEC_CONTROL = 0x00004000, |
108 | CPU_BASED_VM_EXEC_CONTROL = 0x00004002, | 123 | CPU_BASED_VM_EXEC_CONTROL = 0x00004002, |
109 | EXCEPTION_BITMAP = 0x00004004, | 124 | EXCEPTION_BITMAP = 0x00004004, |
@@ -226,6 +241,8 @@ enum vmcs_field { | |||
226 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 | 241 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 |
227 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 | 242 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 |
228 | #define EXIT_REASON_APIC_ACCESS 44 | 243 | #define EXIT_REASON_APIC_ACCESS 44 |
244 | #define EXIT_REASON_EPT_VIOLATION 48 | ||
245 | #define EXIT_REASON_EPT_MISCONFIG 49 | ||
229 | #define EXIT_REASON_WBINVD 54 | 246 | #define EXIT_REASON_WBINVD 54 |
230 | 247 | ||
231 | /* | 248 | /* |
@@ -316,15 +333,36 @@ enum vmcs_field { | |||
316 | #define MSR_IA32_VMX_CR4_FIXED1 0x489 | 333 | #define MSR_IA32_VMX_CR4_FIXED1 0x489 |
317 | #define MSR_IA32_VMX_VMCS_ENUM 0x48a | 334 | #define MSR_IA32_VMX_VMCS_ENUM 0x48a |
318 | #define MSR_IA32_VMX_PROCBASED_CTLS2 0x48b | 335 | #define MSR_IA32_VMX_PROCBASED_CTLS2 0x48b |
336 | #define MSR_IA32_VMX_EPT_VPID_CAP 0x48c | ||
319 | 337 | ||
320 | #define MSR_IA32_FEATURE_CONTROL 0x3a | 338 | #define MSR_IA32_FEATURE_CONTROL 0x3a |
321 | #define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1 | 339 | #define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1 |
322 | #define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4 | 340 | #define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4 |
323 | 341 | ||
324 | #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 | 342 | #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 |
343 | #define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10 | ||
325 | 344 | ||
326 | #define VMX_NR_VPIDS (1 << 16) | 345 | #define VMX_NR_VPIDS (1 << 16) |
327 | #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 | 346 | #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 |
328 | #define VMX_VPID_EXTENT_ALL_CONTEXT 2 | 347 | #define VMX_VPID_EXTENT_ALL_CONTEXT 2 |
329 | 348 | ||
349 | #define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0 | ||
350 | #define VMX_EPT_EXTENT_CONTEXT 1 | ||
351 | #define VMX_EPT_EXTENT_GLOBAL 2 | ||
352 | #define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24) | ||
353 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) | ||
354 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) | ||
355 | #define VMX_EPT_DEFAULT_GAW 3 | ||
356 | #define VMX_EPT_MAX_GAW 0x4 | ||
357 | #define VMX_EPT_MT_EPTE_SHIFT 3 | ||
358 | #define VMX_EPT_GAW_EPTP_SHIFT 3 | ||
359 | #define VMX_EPT_DEFAULT_MT 0x6ull | ||
360 | #define VMX_EPT_READABLE_MASK 0x1ull | ||
361 | #define VMX_EPT_WRITABLE_MASK 0x2ull | ||
362 | #define VMX_EPT_EXECUTABLE_MASK 0x4ull | ||
363 | #define VMX_EPT_FAKE_ACCESSED_MASK (1ull << 62) | ||
364 | #define VMX_EPT_FAKE_DIRTY_MASK (1ull << 63) | ||
365 | |||
366 | #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul | ||
367 | |||
330 | #endif | 368 | #endif |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0ce556372a4d..21338bdb28ff 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -2417,6 +2417,9 @@ int kvm_arch_init(void *opaque) | |||
2417 | 2417 | ||
2418 | kvm_x86_ops = ops; | 2418 | kvm_x86_ops = ops; |
2419 | kvm_mmu_set_nonpresent_ptes(0ull, 0ull); | 2419 | kvm_mmu_set_nonpresent_ptes(0ull, 0ull); |
2420 | kvm_mmu_set_base_ptes(PT_PRESENT_MASK); | ||
2421 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, | ||
2422 | PT_DIRTY_MASK, PT64_NX_MASK, 0); | ||
2420 | return 0; | 2423 | return 0; |
2421 | 2424 | ||
2422 | out: | 2425 | out: |
@@ -3019,6 +3022,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
3019 | 3022 | ||
3020 | kvm_x86_ops->decache_regs(vcpu); | 3023 | kvm_x86_ops->decache_regs(vcpu); |
3021 | 3024 | ||
3025 | vcpu->arch.exception.pending = false; | ||
3026 | |||
3022 | vcpu_put(vcpu); | 3027 | vcpu_put(vcpu); |
3023 | 3028 | ||
3024 | return 0; | 3029 | return 0; |
@@ -3481,7 +3486,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
3481 | } | 3486 | } |
3482 | 3487 | ||
3483 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { | 3488 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { |
3484 | cseg_desc.type &= ~(1 << 8); //clear the B flag | 3489 | cseg_desc.type &= ~(1 << 1); //clear the B flag |
3485 | save_guest_segment_descriptor(vcpu, tr_seg.selector, | 3490 | save_guest_segment_descriptor(vcpu, tr_seg.selector, |
3486 | &cseg_desc); | 3491 | &cseg_desc); |
3487 | } | 3492 | } |
@@ -3507,7 +3512,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
3507 | } | 3512 | } |
3508 | 3513 | ||
3509 | if (reason != TASK_SWITCH_IRET) { | 3514 | if (reason != TASK_SWITCH_IRET) { |
3510 | nseg_desc.type |= (1 << 8); | 3515 | nseg_desc.type |= (1 << 1); |
3511 | save_guest_segment_descriptor(vcpu, tss_selector, | 3516 | save_guest_segment_descriptor(vcpu, tss_selector, |
3512 | &nseg_desc); | 3517 | &nseg_desc); |
3513 | } | 3518 | } |
@@ -3698,10 +3703,19 @@ void fx_init(struct kvm_vcpu *vcpu) | |||
3698 | { | 3703 | { |
3699 | unsigned after_mxcsr_mask; | 3704 | unsigned after_mxcsr_mask; |
3700 | 3705 | ||
3706 | /* | ||
3707 | * Touch the fpu the first time in non atomic context as if | ||
3708 | * this is the first fpu instruction the exception handler | ||
3709 | * will fire before the instruction returns and it'll have to | ||
3710 | * allocate ram with GFP_KERNEL. | ||
3711 | */ | ||
3712 | if (!used_math()) | ||
3713 | fx_save(&vcpu->arch.host_fx_image); | ||
3714 | |||
3701 | /* Initialize guest FPU by resetting ours and saving into guest's */ | 3715 | /* Initialize guest FPU by resetting ours and saving into guest's */ |
3702 | preempt_disable(); | 3716 | preempt_disable(); |
3703 | fx_save(&vcpu->arch.host_fx_image); | 3717 | fx_save(&vcpu->arch.host_fx_image); |
3704 | fpu_init(); | 3718 | fx_finit(); |
3705 | fx_save(&vcpu->arch.guest_fx_image); | 3719 | fx_save(&vcpu->arch.guest_fx_image); |
3706 | fx_restore(&vcpu->arch.host_fx_image); | 3720 | fx_restore(&vcpu->arch.host_fx_image); |
3707 | preempt_enable(); | 3721 | preempt_enable(); |
@@ -3906,6 +3920,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
3906 | kvm_free_physmem(kvm); | 3920 | kvm_free_physmem(kvm); |
3907 | if (kvm->arch.apic_access_page) | 3921 | if (kvm->arch.apic_access_page) |
3908 | put_page(kvm->arch.apic_access_page); | 3922 | put_page(kvm->arch.apic_access_page); |
3923 | if (kvm->arch.ept_identity_pagetable) | ||
3924 | put_page(kvm->arch.ept_identity_pagetable); | ||
3909 | kfree(kvm); | 3925 | kfree(kvm); |
3910 | } | 3926 | } |
3911 | 3927 | ||
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 2ca08386f993..f2a696d6a243 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c | |||
@@ -1761,6 +1761,7 @@ twobyte_insn: | |||
1761 | case 6: /* lmsw */ | 1761 | case 6: /* lmsw */ |
1762 | realmode_lmsw(ctxt->vcpu, (u16)c->src.val, | 1762 | realmode_lmsw(ctxt->vcpu, (u16)c->src.val, |
1763 | &ctxt->eflags); | 1763 | &ctxt->eflags); |
1764 | c->dst.type = OP_NONE; | ||
1764 | break; | 1765 | break; |
1765 | case 7: /* invlpg*/ | 1766 | case 7: /* invlpg*/ |
1766 | emulate_invlpg(ctxt->vcpu, memop); | 1767 | emulate_invlpg(ctxt->vcpu, memop); |