diff options
Diffstat (limited to 'arch/x86/kvm/svm.c')
-rw-r--r-- | arch/x86/kvm/svm.c | 670 |
1 files changed, 661 insertions, 9 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index fafd720ce10a..2214214c786b 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -14,6 +14,9 @@ | |||
14 | * the COPYING file in the top-level directory. | 14 | * the COPYING file in the top-level directory. |
15 | * | 15 | * |
16 | */ | 16 | */ |
17 | |||
18 | #define pr_fmt(fmt) "SVM: " fmt | ||
19 | |||
17 | #include <linux/kvm_host.h> | 20 | #include <linux/kvm_host.h> |
18 | 21 | ||
19 | #include "irq.h" | 22 | #include "irq.h" |
@@ -32,6 +35,7 @@ | |||
32 | #include <linux/trace_events.h> | 35 | #include <linux/trace_events.h> |
33 | #include <linux/slab.h> | 36 | #include <linux/slab.h> |
34 | 37 | ||
38 | #include <asm/apic.h> | ||
35 | #include <asm/perf_event.h> | 39 | #include <asm/perf_event.h> |
36 | #include <asm/tlbflush.h> | 40 | #include <asm/tlbflush.h> |
37 | #include <asm/desc.h> | 41 | #include <asm/desc.h> |
@@ -68,6 +72,8 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id); | |||
68 | #define SVM_FEATURE_DECODE_ASSIST (1 << 7) | 72 | #define SVM_FEATURE_DECODE_ASSIST (1 << 7) |
69 | #define SVM_FEATURE_PAUSE_FILTER (1 << 10) | 73 | #define SVM_FEATURE_PAUSE_FILTER (1 << 10) |
70 | 74 | ||
75 | #define SVM_AVIC_DOORBELL 0xc001011b | ||
76 | |||
71 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ | 77 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ |
72 | #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ | 78 | #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ |
73 | #define NESTED_EXIT_CONTINUE 2 /* Further checks needed */ | 79 | #define NESTED_EXIT_CONTINUE 2 /* Further checks needed */ |
@@ -78,6 +84,18 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id); | |||
78 | #define TSC_RATIO_MIN 0x0000000000000001ULL | 84 | #define TSC_RATIO_MIN 0x0000000000000001ULL |
79 | #define TSC_RATIO_MAX 0x000000ffffffffffULL | 85 | #define TSC_RATIO_MAX 0x000000ffffffffffULL |
80 | 86 | ||
87 | #define AVIC_HPA_MASK ~((0xFFFULL << 52) || 0xFFF) | ||
88 | |||
89 | /* | ||
90 | * 0xff is broadcast, so the max index allowed for physical APIC ID | ||
91 | * table is 0xfe. APIC IDs above 0xff are reserved. | ||
92 | */ | ||
93 | #define AVIC_MAX_PHYSICAL_ID_COUNT 255 | ||
94 | |||
95 | #define AVIC_UNACCEL_ACCESS_WRITE_MASK 1 | ||
96 | #define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0 | ||
97 | #define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF | ||
98 | |||
81 | static bool erratum_383_found __read_mostly; | 99 | static bool erratum_383_found __read_mostly; |
82 | 100 | ||
83 | static const u32 host_save_user_msrs[] = { | 101 | static const u32 host_save_user_msrs[] = { |
@@ -162,8 +180,21 @@ struct vcpu_svm { | |||
162 | 180 | ||
163 | /* cached guest cpuid flags for faster access */ | 181 | /* cached guest cpuid flags for faster access */ |
164 | bool nrips_enabled : 1; | 182 | bool nrips_enabled : 1; |
183 | |||
184 | u32 ldr_reg; | ||
185 | struct page *avic_backing_page; | ||
186 | u64 *avic_physical_id_cache; | ||
187 | bool avic_is_running; | ||
165 | }; | 188 | }; |
166 | 189 | ||
190 | #define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF) | ||
191 | #define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) | ||
192 | |||
193 | #define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL) | ||
194 | #define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) | ||
195 | #define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) | ||
196 | #define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) | ||
197 | |||
167 | static DEFINE_PER_CPU(u64, current_tsc_ratio); | 198 | static DEFINE_PER_CPU(u64, current_tsc_ratio); |
168 | #define TSC_RATIO_DEFAULT 0x0100000000ULL | 199 | #define TSC_RATIO_DEFAULT 0x0100000000ULL |
169 | 200 | ||
@@ -205,6 +236,10 @@ module_param(npt, int, S_IRUGO); | |||
205 | static int nested = true; | 236 | static int nested = true; |
206 | module_param(nested, int, S_IRUGO); | 237 | module_param(nested, int, S_IRUGO); |
207 | 238 | ||
239 | /* enable / disable AVIC */ | ||
240 | static int avic; | ||
241 | module_param(avic, int, S_IRUGO); | ||
242 | |||
208 | static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); | 243 | static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); |
209 | static void svm_flush_tlb(struct kvm_vcpu *vcpu); | 244 | static void svm_flush_tlb(struct kvm_vcpu *vcpu); |
210 | static void svm_complete_interrupts(struct vcpu_svm *svm); | 245 | static void svm_complete_interrupts(struct vcpu_svm *svm); |
@@ -228,12 +263,18 @@ enum { | |||
228 | VMCB_SEG, /* CS, DS, SS, ES, CPL */ | 263 | VMCB_SEG, /* CS, DS, SS, ES, CPL */ |
229 | VMCB_CR2, /* CR2 only */ | 264 | VMCB_CR2, /* CR2 only */ |
230 | VMCB_LBR, /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */ | 265 | VMCB_LBR, /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */ |
266 | VMCB_AVIC, /* AVIC APIC_BAR, AVIC APIC_BACKING_PAGE, | ||
267 | * AVIC PHYSICAL_TABLE pointer, | ||
268 | * AVIC LOGICAL_TABLE pointer | ||
269 | */ | ||
231 | VMCB_DIRTY_MAX, | 270 | VMCB_DIRTY_MAX, |
232 | }; | 271 | }; |
233 | 272 | ||
234 | /* TPR and CR2 are always written before VMRUN */ | 273 | /* TPR and CR2 are always written before VMRUN */ |
235 | #define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2)) | 274 | #define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2)) |
236 | 275 | ||
276 | #define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL | ||
277 | |||
237 | static inline void mark_all_dirty(struct vmcb *vmcb) | 278 | static inline void mark_all_dirty(struct vmcb *vmcb) |
238 | { | 279 | { |
239 | vmcb->control.clean = 0; | 280 | vmcb->control.clean = 0; |
@@ -255,6 +296,23 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) | |||
255 | return container_of(vcpu, struct vcpu_svm, vcpu); | 296 | return container_of(vcpu, struct vcpu_svm, vcpu); |
256 | } | 297 | } |
257 | 298 | ||
299 | static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data) | ||
300 | { | ||
301 | svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK; | ||
302 | mark_dirty(svm->vmcb, VMCB_AVIC); | ||
303 | } | ||
304 | |||
305 | static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu) | ||
306 | { | ||
307 | struct vcpu_svm *svm = to_svm(vcpu); | ||
308 | u64 *entry = svm->avic_physical_id_cache; | ||
309 | |||
310 | if (!entry) | ||
311 | return false; | ||
312 | |||
313 | return (READ_ONCE(*entry) & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK); | ||
314 | } | ||
315 | |||
258 | static void recalc_intercepts(struct vcpu_svm *svm) | 316 | static void recalc_intercepts(struct vcpu_svm *svm) |
259 | { | 317 | { |
260 | struct vmcb_control_area *c, *h; | 318 | struct vmcb_control_area *c, *h; |
@@ -923,6 +981,12 @@ static __init int svm_hardware_setup(void) | |||
923 | } else | 981 | } else |
924 | kvm_disable_tdp(); | 982 | kvm_disable_tdp(); |
925 | 983 | ||
984 | if (avic && (!npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC))) | ||
985 | avic = false; | ||
986 | |||
987 | if (avic) | ||
988 | pr_info("AVIC enabled\n"); | ||
989 | |||
926 | return 0; | 990 | return 0; |
927 | 991 | ||
928 | err: | 992 | err: |
@@ -1000,6 +1064,22 @@ static void svm_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) | |||
1000 | mark_dirty(svm->vmcb, VMCB_INTERCEPTS); | 1064 | mark_dirty(svm->vmcb, VMCB_INTERCEPTS); |
1001 | } | 1065 | } |
1002 | 1066 | ||
1067 | static void avic_init_vmcb(struct vcpu_svm *svm) | ||
1068 | { | ||
1069 | struct vmcb *vmcb = svm->vmcb; | ||
1070 | struct kvm_arch *vm_data = &svm->vcpu.kvm->arch; | ||
1071 | phys_addr_t bpa = page_to_phys(svm->avic_backing_page); | ||
1072 | phys_addr_t lpa = page_to_phys(vm_data->avic_logical_id_table_page); | ||
1073 | phys_addr_t ppa = page_to_phys(vm_data->avic_physical_id_table_page); | ||
1074 | |||
1075 | vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK; | ||
1076 | vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK; | ||
1077 | vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK; | ||
1078 | vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT; | ||
1079 | vmcb->control.int_ctl |= AVIC_ENABLE_MASK; | ||
1080 | svm->vcpu.arch.apicv_active = true; | ||
1081 | } | ||
1082 | |||
1003 | static void init_vmcb(struct vcpu_svm *svm) | 1083 | static void init_vmcb(struct vcpu_svm *svm) |
1004 | { | 1084 | { |
1005 | struct vmcb_control_area *control = &svm->vmcb->control; | 1085 | struct vmcb_control_area *control = &svm->vmcb->control; |
@@ -1014,7 +1094,8 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
1014 | set_cr_intercept(svm, INTERCEPT_CR0_WRITE); | 1094 | set_cr_intercept(svm, INTERCEPT_CR0_WRITE); |
1015 | set_cr_intercept(svm, INTERCEPT_CR3_WRITE); | 1095 | set_cr_intercept(svm, INTERCEPT_CR3_WRITE); |
1016 | set_cr_intercept(svm, INTERCEPT_CR4_WRITE); | 1096 | set_cr_intercept(svm, INTERCEPT_CR4_WRITE); |
1017 | set_cr_intercept(svm, INTERCEPT_CR8_WRITE); | 1097 | if (!kvm_vcpu_apicv_active(&svm->vcpu)) |
1098 | set_cr_intercept(svm, INTERCEPT_CR8_WRITE); | ||
1018 | 1099 | ||
1019 | set_dr_intercepts(svm); | 1100 | set_dr_intercepts(svm); |
1020 | 1101 | ||
@@ -1110,9 +1191,197 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
1110 | set_intercept(svm, INTERCEPT_PAUSE); | 1191 | set_intercept(svm, INTERCEPT_PAUSE); |
1111 | } | 1192 | } |
1112 | 1193 | ||
1194 | if (avic) | ||
1195 | avic_init_vmcb(svm); | ||
1196 | |||
1113 | mark_all_dirty(svm->vmcb); | 1197 | mark_all_dirty(svm->vmcb); |
1114 | 1198 | ||
1115 | enable_gif(svm); | 1199 | enable_gif(svm); |
1200 | |||
1201 | } | ||
1202 | |||
1203 | static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, int index) | ||
1204 | { | ||
1205 | u64 *avic_physical_id_table; | ||
1206 | struct kvm_arch *vm_data = &vcpu->kvm->arch; | ||
1207 | |||
1208 | if (index >= AVIC_MAX_PHYSICAL_ID_COUNT) | ||
1209 | return NULL; | ||
1210 | |||
1211 | avic_physical_id_table = page_address(vm_data->avic_physical_id_table_page); | ||
1212 | |||
1213 | return &avic_physical_id_table[index]; | ||
1214 | } | ||
1215 | |||
1216 | /** | ||
1217 | * Note: | ||
1218 | * AVIC hardware walks the nested page table to check permissions, | ||
1219 | * but does not use the SPA address specified in the leaf page | ||
1220 | * table entry since it uses address in the AVIC_BACKING_PAGE pointer | ||
1221 | * field of the VMCB. Therefore, we set up the | ||
1222 | * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here. | ||
1223 | */ | ||
1224 | static int avic_init_access_page(struct kvm_vcpu *vcpu) | ||
1225 | { | ||
1226 | struct kvm *kvm = vcpu->kvm; | ||
1227 | int ret; | ||
1228 | |||
1229 | if (kvm->arch.apic_access_page_done) | ||
1230 | return 0; | ||
1231 | |||
1232 | ret = x86_set_memory_region(kvm, | ||
1233 | APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, | ||
1234 | APIC_DEFAULT_PHYS_BASE, | ||
1235 | PAGE_SIZE); | ||
1236 | if (ret) | ||
1237 | return ret; | ||
1238 | |||
1239 | kvm->arch.apic_access_page_done = true; | ||
1240 | return 0; | ||
1241 | } | ||
1242 | |||
1243 | static int avic_init_backing_page(struct kvm_vcpu *vcpu) | ||
1244 | { | ||
1245 | int ret; | ||
1246 | u64 *entry, new_entry; | ||
1247 | int id = vcpu->vcpu_id; | ||
1248 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1249 | |||
1250 | ret = avic_init_access_page(vcpu); | ||
1251 | if (ret) | ||
1252 | return ret; | ||
1253 | |||
1254 | if (id >= AVIC_MAX_PHYSICAL_ID_COUNT) | ||
1255 | return -EINVAL; | ||
1256 | |||
1257 | if (!svm->vcpu.arch.apic->regs) | ||
1258 | return -EINVAL; | ||
1259 | |||
1260 | svm->avic_backing_page = virt_to_page(svm->vcpu.arch.apic->regs); | ||
1261 | |||
1262 | /* Setting AVIC backing page address in the phy APIC ID table */ | ||
1263 | entry = avic_get_physical_id_entry(vcpu, id); | ||
1264 | if (!entry) | ||
1265 | return -EINVAL; | ||
1266 | |||
1267 | new_entry = READ_ONCE(*entry); | ||
1268 | new_entry = (page_to_phys(svm->avic_backing_page) & | ||
1269 | AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) | | ||
1270 | AVIC_PHYSICAL_ID_ENTRY_VALID_MASK; | ||
1271 | WRITE_ONCE(*entry, new_entry); | ||
1272 | |||
1273 | svm->avic_physical_id_cache = entry; | ||
1274 | |||
1275 | return 0; | ||
1276 | } | ||
1277 | |||
1278 | static void avic_vm_destroy(struct kvm *kvm) | ||
1279 | { | ||
1280 | struct kvm_arch *vm_data = &kvm->arch; | ||
1281 | |||
1282 | if (vm_data->avic_logical_id_table_page) | ||
1283 | __free_page(vm_data->avic_logical_id_table_page); | ||
1284 | if (vm_data->avic_physical_id_table_page) | ||
1285 | __free_page(vm_data->avic_physical_id_table_page); | ||
1286 | } | ||
1287 | |||
1288 | static int avic_vm_init(struct kvm *kvm) | ||
1289 | { | ||
1290 | int err = -ENOMEM; | ||
1291 | struct kvm_arch *vm_data = &kvm->arch; | ||
1292 | struct page *p_page; | ||
1293 | struct page *l_page; | ||
1294 | |||
1295 | if (!avic) | ||
1296 | return 0; | ||
1297 | |||
1298 | /* Allocating physical APIC ID table (4KB) */ | ||
1299 | p_page = alloc_page(GFP_KERNEL); | ||
1300 | if (!p_page) | ||
1301 | goto free_avic; | ||
1302 | |||
1303 | vm_data->avic_physical_id_table_page = p_page; | ||
1304 | clear_page(page_address(p_page)); | ||
1305 | |||
1306 | /* Allocating logical APIC ID table (4KB) */ | ||
1307 | l_page = alloc_page(GFP_KERNEL); | ||
1308 | if (!l_page) | ||
1309 | goto free_avic; | ||
1310 | |||
1311 | vm_data->avic_logical_id_table_page = l_page; | ||
1312 | clear_page(page_address(l_page)); | ||
1313 | |||
1314 | return 0; | ||
1315 | |||
1316 | free_avic: | ||
1317 | avic_vm_destroy(kvm); | ||
1318 | return err; | ||
1319 | } | ||
1320 | |||
1321 | /** | ||
1322 | * This function is called during VCPU halt/unhalt. | ||
1323 | */ | ||
1324 | static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run) | ||
1325 | { | ||
1326 | u64 entry; | ||
1327 | int h_physical_id = __default_cpu_present_to_apicid(vcpu->cpu); | ||
1328 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1329 | |||
1330 | if (!kvm_vcpu_apicv_active(vcpu)) | ||
1331 | return; | ||
1332 | |||
1333 | svm->avic_is_running = is_run; | ||
1334 | |||
1335 | /* ID = 0xff (broadcast), ID > 0xff (reserved) */ | ||
1336 | if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT)) | ||
1337 | return; | ||
1338 | |||
1339 | entry = READ_ONCE(*(svm->avic_physical_id_cache)); | ||
1340 | WARN_ON(is_run == !!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)); | ||
1341 | |||
1342 | entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; | ||
1343 | if (is_run) | ||
1344 | entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; | ||
1345 | WRITE_ONCE(*(svm->avic_physical_id_cache), entry); | ||
1346 | } | ||
1347 | |||
1348 | static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | ||
1349 | { | ||
1350 | u64 entry; | ||
1351 | /* ID = 0xff (broadcast), ID > 0xff (reserved) */ | ||
1352 | int h_physical_id = __default_cpu_present_to_apicid(cpu); | ||
1353 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1354 | |||
1355 | if (!kvm_vcpu_apicv_active(vcpu)) | ||
1356 | return; | ||
1357 | |||
1358 | if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT)) | ||
1359 | return; | ||
1360 | |||
1361 | entry = READ_ONCE(*(svm->avic_physical_id_cache)); | ||
1362 | WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK); | ||
1363 | |||
1364 | entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK; | ||
1365 | entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK); | ||
1366 | |||
1367 | entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; | ||
1368 | if (svm->avic_is_running) | ||
1369 | entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; | ||
1370 | |||
1371 | WRITE_ONCE(*(svm->avic_physical_id_cache), entry); | ||
1372 | } | ||
1373 | |||
1374 | static void avic_vcpu_put(struct kvm_vcpu *vcpu) | ||
1375 | { | ||
1376 | u64 entry; | ||
1377 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1378 | |||
1379 | if (!kvm_vcpu_apicv_active(vcpu)) | ||
1380 | return; | ||
1381 | |||
1382 | entry = READ_ONCE(*(svm->avic_physical_id_cache)); | ||
1383 | entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; | ||
1384 | WRITE_ONCE(*(svm->avic_physical_id_cache), entry); | ||
1116 | } | 1385 | } |
1117 | 1386 | ||
1118 | static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | 1387 | static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) |
@@ -1131,6 +1400,9 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | |||
1131 | 1400 | ||
1132 | kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); | 1401 | kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); |
1133 | kvm_register_write(vcpu, VCPU_REGS_RDX, eax); | 1402 | kvm_register_write(vcpu, VCPU_REGS_RDX, eax); |
1403 | |||
1404 | if (kvm_vcpu_apicv_active(vcpu) && !init_event) | ||
1405 | avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE); | ||
1134 | } | 1406 | } |
1135 | 1407 | ||
1136 | static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | 1408 | static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) |
@@ -1169,6 +1441,17 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
1169 | if (!hsave_page) | 1441 | if (!hsave_page) |
1170 | goto free_page3; | 1442 | goto free_page3; |
1171 | 1443 | ||
1444 | if (avic) { | ||
1445 | err = avic_init_backing_page(&svm->vcpu); | ||
1446 | if (err) | ||
1447 | goto free_page4; | ||
1448 | } | ||
1449 | |||
1450 | /* We initialize this flag to true to make sure that the is_running | ||
1451 | * bit would be set the first time the vcpu is loaded. | ||
1452 | */ | ||
1453 | svm->avic_is_running = true; | ||
1454 | |||
1172 | svm->nested.hsave = page_address(hsave_page); | 1455 | svm->nested.hsave = page_address(hsave_page); |
1173 | 1456 | ||
1174 | svm->msrpm = page_address(msrpm_pages); | 1457 | svm->msrpm = page_address(msrpm_pages); |
@@ -1187,6 +1470,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
1187 | 1470 | ||
1188 | return &svm->vcpu; | 1471 | return &svm->vcpu; |
1189 | 1472 | ||
1473 | free_page4: | ||
1474 | __free_page(hsave_page); | ||
1190 | free_page3: | 1475 | free_page3: |
1191 | __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); | 1476 | __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); |
1192 | free_page2: | 1477 | free_page2: |
@@ -1243,6 +1528,8 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
1243 | /* This assumes that the kernel never uses MSR_TSC_AUX */ | 1528 | /* This assumes that the kernel never uses MSR_TSC_AUX */ |
1244 | if (static_cpu_has(X86_FEATURE_RDTSCP)) | 1529 | if (static_cpu_has(X86_FEATURE_RDTSCP)) |
1245 | wrmsrl(MSR_TSC_AUX, svm->tsc_aux); | 1530 | wrmsrl(MSR_TSC_AUX, svm->tsc_aux); |
1531 | |||
1532 | avic_vcpu_load(vcpu, cpu); | ||
1246 | } | 1533 | } |
1247 | 1534 | ||
1248 | static void svm_vcpu_put(struct kvm_vcpu *vcpu) | 1535 | static void svm_vcpu_put(struct kvm_vcpu *vcpu) |
@@ -1250,6 +1537,8 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) | |||
1250 | struct vcpu_svm *svm = to_svm(vcpu); | 1537 | struct vcpu_svm *svm = to_svm(vcpu); |
1251 | int i; | 1538 | int i; |
1252 | 1539 | ||
1540 | avic_vcpu_put(vcpu); | ||
1541 | |||
1253 | ++vcpu->stat.host_state_reload; | 1542 | ++vcpu->stat.host_state_reload; |
1254 | kvm_load_ldt(svm->host.ldt); | 1543 | kvm_load_ldt(svm->host.ldt); |
1255 | #ifdef CONFIG_X86_64 | 1544 | #ifdef CONFIG_X86_64 |
@@ -1265,6 +1554,16 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) | |||
1265 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); | 1554 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); |
1266 | } | 1555 | } |
1267 | 1556 | ||
1557 | static void svm_vcpu_blocking(struct kvm_vcpu *vcpu) | ||
1558 | { | ||
1559 | avic_set_running(vcpu, false); | ||
1560 | } | ||
1561 | |||
1562 | static void svm_vcpu_unblocking(struct kvm_vcpu *vcpu) | ||
1563 | { | ||
1564 | avic_set_running(vcpu, true); | ||
1565 | } | ||
1566 | |||
1268 | static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) | 1567 | static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) |
1269 | { | 1568 | { |
1270 | return to_svm(vcpu)->vmcb->save.rflags; | 1569 | return to_svm(vcpu)->vmcb->save.rflags; |
@@ -2673,10 +2972,11 @@ static int clgi_interception(struct vcpu_svm *svm) | |||
2673 | disable_gif(svm); | 2972 | disable_gif(svm); |
2674 | 2973 | ||
2675 | /* After a CLGI no interrupts should come */ | 2974 | /* After a CLGI no interrupts should come */ |
2676 | svm_clear_vintr(svm); | 2975 | if (!kvm_vcpu_apicv_active(&svm->vcpu)) { |
2677 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; | 2976 | svm_clear_vintr(svm); |
2678 | 2977 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; | |
2679 | mark_dirty(svm->vmcb, VMCB_INTR); | 2978 | mark_dirty(svm->vmcb, VMCB_INTR); |
2979 | } | ||
2680 | 2980 | ||
2681 | return 1; | 2981 | return 1; |
2682 | } | 2982 | } |
@@ -3212,6 +3512,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
3212 | case MSR_VM_IGNNE: | 3512 | case MSR_VM_IGNNE: |
3213 | vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); | 3513 | vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); |
3214 | break; | 3514 | break; |
3515 | case MSR_IA32_APICBASE: | ||
3516 | if (kvm_vcpu_apicv_active(vcpu)) | ||
3517 | avic_update_vapic_bar(to_svm(vcpu), data); | ||
3518 | /* Follow through */ | ||
3215 | default: | 3519 | default: |
3216 | return kvm_set_msr_common(vcpu, msr); | 3520 | return kvm_set_msr_common(vcpu, msr); |
3217 | } | 3521 | } |
@@ -3281,6 +3585,278 @@ static int mwait_interception(struct vcpu_svm *svm) | |||
3281 | return nop_interception(svm); | 3585 | return nop_interception(svm); |
3282 | } | 3586 | } |
3283 | 3587 | ||
3588 | enum avic_ipi_failure_cause { | ||
3589 | AVIC_IPI_FAILURE_INVALID_INT_TYPE, | ||
3590 | AVIC_IPI_FAILURE_TARGET_NOT_RUNNING, | ||
3591 | AVIC_IPI_FAILURE_INVALID_TARGET, | ||
3592 | AVIC_IPI_FAILURE_INVALID_BACKING_PAGE, | ||
3593 | }; | ||
3594 | |||
3595 | static int avic_incomplete_ipi_interception(struct vcpu_svm *svm) | ||
3596 | { | ||
3597 | u32 icrh = svm->vmcb->control.exit_info_1 >> 32; | ||
3598 | u32 icrl = svm->vmcb->control.exit_info_1; | ||
3599 | u32 id = svm->vmcb->control.exit_info_2 >> 32; | ||
3600 | u32 index = svm->vmcb->control.exit_info_2 && 0xFF; | ||
3601 | struct kvm_lapic *apic = svm->vcpu.arch.apic; | ||
3602 | |||
3603 | trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index); | ||
3604 | |||
3605 | switch (id) { | ||
3606 | case AVIC_IPI_FAILURE_INVALID_INT_TYPE: | ||
3607 | /* | ||
3608 | * AVIC hardware handles the generation of | ||
3609 | * IPIs when the specified Message Type is Fixed | ||
3610 | * (also known as fixed delivery mode) and | ||
3611 | * the Trigger Mode is edge-triggered. The hardware | ||
3612 | * also supports self and broadcast delivery modes | ||
3613 | * specified via the Destination Shorthand(DSH) | ||
3614 | * field of the ICRL. Logical and physical APIC ID | ||
3615 | * formats are supported. All other IPI types cause | ||
3616 | * a #VMEXIT, which needs to emulated. | ||
3617 | */ | ||
3618 | kvm_lapic_reg_write(apic, APIC_ICR2, icrh); | ||
3619 | kvm_lapic_reg_write(apic, APIC_ICR, icrl); | ||
3620 | break; | ||
3621 | case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: { | ||
3622 | int i; | ||
3623 | struct kvm_vcpu *vcpu; | ||
3624 | struct kvm *kvm = svm->vcpu.kvm; | ||
3625 | struct kvm_lapic *apic = svm->vcpu.arch.apic; | ||
3626 | |||
3627 | /* | ||
3628 | * At this point, we expect that the AVIC HW has already | ||
3629 | * set the appropriate IRR bits on the valid target | ||
3630 | * vcpus. So, we just need to kick the appropriate vcpu. | ||
3631 | */ | ||
3632 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
3633 | bool m = kvm_apic_match_dest(vcpu, apic, | ||
3634 | icrl & KVM_APIC_SHORT_MASK, | ||
3635 | GET_APIC_DEST_FIELD(icrh), | ||
3636 | icrl & KVM_APIC_DEST_MASK); | ||
3637 | |||
3638 | if (m && !avic_vcpu_is_running(vcpu)) | ||
3639 | kvm_vcpu_wake_up(vcpu); | ||
3640 | } | ||
3641 | break; | ||
3642 | } | ||
3643 | case AVIC_IPI_FAILURE_INVALID_TARGET: | ||
3644 | break; | ||
3645 | case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE: | ||
3646 | WARN_ONCE(1, "Invalid backing page\n"); | ||
3647 | break; | ||
3648 | default: | ||
3649 | pr_err("Unknown IPI interception\n"); | ||
3650 | } | ||
3651 | |||
3652 | return 1; | ||
3653 | } | ||
3654 | |||
3655 | static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat) | ||
3656 | { | ||
3657 | struct kvm_arch *vm_data = &vcpu->kvm->arch; | ||
3658 | int index; | ||
3659 | u32 *logical_apic_id_table; | ||
3660 | int dlid = GET_APIC_LOGICAL_ID(ldr); | ||
3661 | |||
3662 | if (!dlid) | ||
3663 | return NULL; | ||
3664 | |||
3665 | if (flat) { /* flat */ | ||
3666 | index = ffs(dlid) - 1; | ||
3667 | if (index > 7) | ||
3668 | return NULL; | ||
3669 | } else { /* cluster */ | ||
3670 | int cluster = (dlid & 0xf0) >> 4; | ||
3671 | int apic = ffs(dlid & 0x0f) - 1; | ||
3672 | |||
3673 | if ((apic < 0) || (apic > 7) || | ||
3674 | (cluster >= 0xf)) | ||
3675 | return NULL; | ||
3676 | index = (cluster << 2) + apic; | ||
3677 | } | ||
3678 | |||
3679 | logical_apic_id_table = (u32 *) page_address(vm_data->avic_logical_id_table_page); | ||
3680 | |||
3681 | return &logical_apic_id_table[index]; | ||
3682 | } | ||
3683 | |||
3684 | static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr, | ||
3685 | bool valid) | ||
3686 | { | ||
3687 | bool flat; | ||
3688 | u32 *entry, new_entry; | ||
3689 | |||
3690 | flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT; | ||
3691 | entry = avic_get_logical_id_entry(vcpu, ldr, flat); | ||
3692 | if (!entry) | ||
3693 | return -EINVAL; | ||
3694 | |||
3695 | new_entry = READ_ONCE(*entry); | ||
3696 | new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK; | ||
3697 | new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK); | ||
3698 | if (valid) | ||
3699 | new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK; | ||
3700 | else | ||
3701 | new_entry &= ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK; | ||
3702 | WRITE_ONCE(*entry, new_entry); | ||
3703 | |||
3704 | return 0; | ||
3705 | } | ||
3706 | |||
3707 | static int avic_handle_ldr_update(struct kvm_vcpu *vcpu) | ||
3708 | { | ||
3709 | int ret; | ||
3710 | struct vcpu_svm *svm = to_svm(vcpu); | ||
3711 | u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR); | ||
3712 | |||
3713 | if (!ldr) | ||
3714 | return 1; | ||
3715 | |||
3716 | ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr, true); | ||
3717 | if (ret && svm->ldr_reg) { | ||
3718 | avic_ldr_write(vcpu, 0, svm->ldr_reg, false); | ||
3719 | svm->ldr_reg = 0; | ||
3720 | } else { | ||
3721 | svm->ldr_reg = ldr; | ||
3722 | } | ||
3723 | return ret; | ||
3724 | } | ||
3725 | |||
3726 | static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu) | ||
3727 | { | ||
3728 | u64 *old, *new; | ||
3729 | struct vcpu_svm *svm = to_svm(vcpu); | ||
3730 | u32 apic_id_reg = kvm_lapic_get_reg(vcpu->arch.apic, APIC_ID); | ||
3731 | u32 id = (apic_id_reg >> 24) & 0xff; | ||
3732 | |||
3733 | if (vcpu->vcpu_id == id) | ||
3734 | return 0; | ||
3735 | |||
3736 | old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id); | ||
3737 | new = avic_get_physical_id_entry(vcpu, id); | ||
3738 | if (!new || !old) | ||
3739 | return 1; | ||
3740 | |||
3741 | /* We need to move physical_id_entry to new offset */ | ||
3742 | *new = *old; | ||
3743 | *old = 0ULL; | ||
3744 | to_svm(vcpu)->avic_physical_id_cache = new; | ||
3745 | |||
3746 | /* | ||
3747 | * Also update the guest physical APIC ID in the logical | ||
3748 | * APIC ID table entry if already setup the LDR. | ||
3749 | */ | ||
3750 | if (svm->ldr_reg) | ||
3751 | avic_handle_ldr_update(vcpu); | ||
3752 | |||
3753 | return 0; | ||
3754 | } | ||
3755 | |||
3756 | static int avic_handle_dfr_update(struct kvm_vcpu *vcpu) | ||
3757 | { | ||
3758 | struct vcpu_svm *svm = to_svm(vcpu); | ||
3759 | struct kvm_arch *vm_data = &vcpu->kvm->arch; | ||
3760 | u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR); | ||
3761 | u32 mod = (dfr >> 28) & 0xf; | ||
3762 | |||
3763 | /* | ||
3764 | * We assume that all local APICs are using the same type. | ||
3765 | * If this changes, we need to flush the AVIC logical | ||
3766 | * APID id table. | ||
3767 | */ | ||
3768 | if (vm_data->ldr_mode == mod) | ||
3769 | return 0; | ||
3770 | |||
3771 | clear_page(page_address(vm_data->avic_logical_id_table_page)); | ||
3772 | vm_data->ldr_mode = mod; | ||
3773 | |||
3774 | if (svm->ldr_reg) | ||
3775 | avic_handle_ldr_update(vcpu); | ||
3776 | return 0; | ||
3777 | } | ||
3778 | |||
3779 | static int avic_unaccel_trap_write(struct vcpu_svm *svm) | ||
3780 | { | ||
3781 | struct kvm_lapic *apic = svm->vcpu.arch.apic; | ||
3782 | u32 offset = svm->vmcb->control.exit_info_1 & | ||
3783 | AVIC_UNACCEL_ACCESS_OFFSET_MASK; | ||
3784 | |||
3785 | switch (offset) { | ||
3786 | case APIC_ID: | ||
3787 | if (avic_handle_apic_id_update(&svm->vcpu)) | ||
3788 | return 0; | ||
3789 | break; | ||
3790 | case APIC_LDR: | ||
3791 | if (avic_handle_ldr_update(&svm->vcpu)) | ||
3792 | return 0; | ||
3793 | break; | ||
3794 | case APIC_DFR: | ||
3795 | avic_handle_dfr_update(&svm->vcpu); | ||
3796 | break; | ||
3797 | default: | ||
3798 | break; | ||
3799 | } | ||
3800 | |||
3801 | kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset)); | ||
3802 | |||
3803 | return 1; | ||
3804 | } | ||
3805 | |||
3806 | static bool is_avic_unaccelerated_access_trap(u32 offset) | ||
3807 | { | ||
3808 | bool ret = false; | ||
3809 | |||
3810 | switch (offset) { | ||
3811 | case APIC_ID: | ||
3812 | case APIC_EOI: | ||
3813 | case APIC_RRR: | ||
3814 | case APIC_LDR: | ||
3815 | case APIC_DFR: | ||
3816 | case APIC_SPIV: | ||
3817 | case APIC_ESR: | ||
3818 | case APIC_ICR: | ||
3819 | case APIC_LVTT: | ||
3820 | case APIC_LVTTHMR: | ||
3821 | case APIC_LVTPC: | ||
3822 | case APIC_LVT0: | ||
3823 | case APIC_LVT1: | ||
3824 | case APIC_LVTERR: | ||
3825 | case APIC_TMICT: | ||
3826 | case APIC_TDCR: | ||
3827 | ret = true; | ||
3828 | break; | ||
3829 | default: | ||
3830 | break; | ||
3831 | } | ||
3832 | return ret; | ||
3833 | } | ||
3834 | |||
3835 | static int avic_unaccelerated_access_interception(struct vcpu_svm *svm) | ||
3836 | { | ||
3837 | int ret = 0; | ||
3838 | u32 offset = svm->vmcb->control.exit_info_1 & | ||
3839 | AVIC_UNACCEL_ACCESS_OFFSET_MASK; | ||
3840 | u32 vector = svm->vmcb->control.exit_info_2 & | ||
3841 | AVIC_UNACCEL_ACCESS_VECTOR_MASK; | ||
3842 | bool write = (svm->vmcb->control.exit_info_1 >> 32) & | ||
3843 | AVIC_UNACCEL_ACCESS_WRITE_MASK; | ||
3844 | bool trap = is_avic_unaccelerated_access_trap(offset); | ||
3845 | |||
3846 | trace_kvm_avic_unaccelerated_access(svm->vcpu.vcpu_id, offset, | ||
3847 | trap, write, vector); | ||
3848 | if (trap) { | ||
3849 | /* Handling Trap */ | ||
3850 | WARN_ONCE(!write, "svm: Handling trap read.\n"); | ||
3851 | ret = avic_unaccel_trap_write(svm); | ||
3852 | } else { | ||
3853 | /* Handling Fault */ | ||
3854 | ret = (emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE); | ||
3855 | } | ||
3856 | |||
3857 | return ret; | ||
3858 | } | ||
3859 | |||
3284 | static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { | 3860 | static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { |
3285 | [SVM_EXIT_READ_CR0] = cr_interception, | 3861 | [SVM_EXIT_READ_CR0] = cr_interception, |
3286 | [SVM_EXIT_READ_CR3] = cr_interception, | 3862 | [SVM_EXIT_READ_CR3] = cr_interception, |
@@ -3344,6 +3920,8 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
3344 | [SVM_EXIT_XSETBV] = xsetbv_interception, | 3920 | [SVM_EXIT_XSETBV] = xsetbv_interception, |
3345 | [SVM_EXIT_NPF] = pf_interception, | 3921 | [SVM_EXIT_NPF] = pf_interception, |
3346 | [SVM_EXIT_RSM] = emulate_on_interception, | 3922 | [SVM_EXIT_RSM] = emulate_on_interception, |
3923 | [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception, | ||
3924 | [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception, | ||
3347 | }; | 3925 | }; |
3348 | 3926 | ||
3349 | static void dump_vmcb(struct kvm_vcpu *vcpu) | 3927 | static void dump_vmcb(struct kvm_vcpu *vcpu) |
@@ -3375,10 +3953,14 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) | |||
3375 | pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err); | 3953 | pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err); |
3376 | pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl); | 3954 | pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl); |
3377 | pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3); | 3955 | pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3); |
3956 | pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar); | ||
3378 | pr_err("%-20s%08x\n", "event_inj:", control->event_inj); | 3957 | pr_err("%-20s%08x\n", "event_inj:", control->event_inj); |
3379 | pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err); | 3958 | pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err); |
3380 | pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl); | 3959 | pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl); |
3381 | pr_err("%-20s%016llx\n", "next_rip:", control->next_rip); | 3960 | pr_err("%-20s%016llx\n", "next_rip:", control->next_rip); |
3961 | pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page); | ||
3962 | pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id); | ||
3963 | pr_err("%-20s%016llx\n", "avic_physical_id:", control->avic_physical_id); | ||
3382 | pr_err("VMCB State Save Area:\n"); | 3964 | pr_err("VMCB State Save Area:\n"); |
3383 | pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", | 3965 | pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", |
3384 | "es:", | 3966 | "es:", |
@@ -3562,6 +4144,7 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) | |||
3562 | { | 4144 | { |
3563 | struct vmcb_control_area *control; | 4145 | struct vmcb_control_area *control; |
3564 | 4146 | ||
4147 | /* The following fields are ignored when AVIC is enabled */ | ||
3565 | control = &svm->vmcb->control; | 4148 | control = &svm->vmcb->control; |
3566 | control->int_vector = irq; | 4149 | control->int_vector = irq; |
3567 | control->int_ctl &= ~V_INTR_PRIO_MASK; | 4150 | control->int_ctl &= ~V_INTR_PRIO_MASK; |
@@ -3583,11 +4166,17 @@ static void svm_set_irq(struct kvm_vcpu *vcpu) | |||
3583 | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR; | 4166 | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR; |
3584 | } | 4167 | } |
3585 | 4168 | ||
4169 | static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu) | ||
4170 | { | ||
4171 | return is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK); | ||
4172 | } | ||
4173 | |||
3586 | static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | 4174 | static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) |
3587 | { | 4175 | { |
3588 | struct vcpu_svm *svm = to_svm(vcpu); | 4176 | struct vcpu_svm *svm = to_svm(vcpu); |
3589 | 4177 | ||
3590 | if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) | 4178 | if (svm_nested_virtualize_tpr(vcpu) || |
4179 | kvm_vcpu_apicv_active(vcpu)) | ||
3591 | return; | 4180 | return; |
3592 | 4181 | ||
3593 | clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); | 4182 | clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); |
@@ -3606,11 +4195,28 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) | |||
3606 | 4195 | ||
3607 | static bool svm_get_enable_apicv(void) | 4196 | static bool svm_get_enable_apicv(void) |
3608 | { | 4197 | { |
3609 | return false; | 4198 | return avic; |
4199 | } | ||
4200 | |||
4201 | static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) | ||
4202 | { | ||
3610 | } | 4203 | } |
3611 | 4204 | ||
4205 | static void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) | ||
4206 | { | ||
4207 | } | ||
4208 | |||
4209 | /* Note: Currently only used by Hyper-V. */ | ||
3612 | static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) | 4210 | static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) |
3613 | { | 4211 | { |
4212 | struct vcpu_svm *svm = to_svm(vcpu); | ||
4213 | struct vmcb *vmcb = svm->vmcb; | ||
4214 | |||
4215 | if (!avic) | ||
4216 | return; | ||
4217 | |||
4218 | vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK; | ||
4219 | mark_dirty(vmcb, VMCB_INTR); | ||
3614 | } | 4220 | } |
3615 | 4221 | ||
3616 | static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | 4222 | static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) |
@@ -3623,6 +4229,18 @@ static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) | |||
3623 | return; | 4229 | return; |
3624 | } | 4230 | } |
3625 | 4231 | ||
4232 | static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) | ||
4233 | { | ||
4234 | kvm_lapic_set_irr(vec, vcpu->arch.apic); | ||
4235 | smp_mb__after_atomic(); | ||
4236 | |||
4237 | if (avic_vcpu_is_running(vcpu)) | ||
4238 | wrmsrl(SVM_AVIC_DOORBELL, | ||
4239 | __default_cpu_present_to_apicid(vcpu->cpu)); | ||
4240 | else | ||
4241 | kvm_vcpu_wake_up(vcpu); | ||
4242 | } | ||
4243 | |||
3626 | static int svm_nmi_allowed(struct kvm_vcpu *vcpu) | 4244 | static int svm_nmi_allowed(struct kvm_vcpu *vcpu) |
3627 | { | 4245 | { |
3628 | struct vcpu_svm *svm = to_svm(vcpu); | 4246 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -3677,6 +4295,9 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) | |||
3677 | { | 4295 | { |
3678 | struct vcpu_svm *svm = to_svm(vcpu); | 4296 | struct vcpu_svm *svm = to_svm(vcpu); |
3679 | 4297 | ||
4298 | if (kvm_vcpu_apicv_active(vcpu)) | ||
4299 | return; | ||
4300 | |||
3680 | /* | 4301 | /* |
3681 | * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes | 4302 | * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes |
3682 | * 1, because that's a separate STGI/VMRUN intercept. The next time we | 4303 | * 1, because that's a separate STGI/VMRUN intercept. The next time we |
@@ -3728,7 +4349,7 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu) | |||
3728 | { | 4349 | { |
3729 | struct vcpu_svm *svm = to_svm(vcpu); | 4350 | struct vcpu_svm *svm = to_svm(vcpu); |
3730 | 4351 | ||
3731 | if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) | 4352 | if (svm_nested_virtualize_tpr(vcpu)) |
3732 | return; | 4353 | return; |
3733 | 4354 | ||
3734 | if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) { | 4355 | if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) { |
@@ -3742,7 +4363,8 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) | |||
3742 | struct vcpu_svm *svm = to_svm(vcpu); | 4363 | struct vcpu_svm *svm = to_svm(vcpu); |
3743 | u64 cr8; | 4364 | u64 cr8; |
3744 | 4365 | ||
3745 | if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) | 4366 | if (svm_nested_virtualize_tpr(vcpu) || |
4367 | kvm_vcpu_apicv_active(vcpu)) | ||
3746 | return; | 4368 | return; |
3747 | 4369 | ||
3748 | cr8 = kvm_get_cr8(vcpu); | 4370 | cr8 = kvm_get_cr8(vcpu); |
@@ -4045,14 +4667,26 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | |||
4045 | static void svm_cpuid_update(struct kvm_vcpu *vcpu) | 4667 | static void svm_cpuid_update(struct kvm_vcpu *vcpu) |
4046 | { | 4668 | { |
4047 | struct vcpu_svm *svm = to_svm(vcpu); | 4669 | struct vcpu_svm *svm = to_svm(vcpu); |
4670 | struct kvm_cpuid_entry2 *entry; | ||
4048 | 4671 | ||
4049 | /* Update nrips enabled cache */ | 4672 | /* Update nrips enabled cache */ |
4050 | svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu); | 4673 | svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu); |
4674 | |||
4675 | if (!kvm_vcpu_apicv_active(vcpu)) | ||
4676 | return; | ||
4677 | |||
4678 | entry = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
4679 | if (entry) | ||
4680 | entry->ecx &= ~bit(X86_FEATURE_X2APIC); | ||
4051 | } | 4681 | } |
4052 | 4682 | ||
4053 | static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | 4683 | static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) |
4054 | { | 4684 | { |
4055 | switch (func) { | 4685 | switch (func) { |
4686 | case 0x1: | ||
4687 | if (avic) | ||
4688 | entry->ecx &= ~bit(X86_FEATURE_X2APIC); | ||
4689 | break; | ||
4056 | case 0x80000001: | 4690 | case 0x80000001: |
4057 | if (nested) | 4691 | if (nested) |
4058 | entry->ecx |= (1 << 2); /* Set SVM bit */ | 4692 | entry->ecx |= (1 << 2); /* Set SVM bit */ |
@@ -4307,6 +4941,15 @@ static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) | |||
4307 | { | 4941 | { |
4308 | } | 4942 | } |
4309 | 4943 | ||
4944 | static inline void avic_post_state_restore(struct kvm_vcpu *vcpu) | ||
4945 | { | ||
4946 | if (avic_handle_apic_id_update(vcpu) != 0) | ||
4947 | return; | ||
4948 | if (avic_handle_dfr_update(vcpu) != 0) | ||
4949 | return; | ||
4950 | avic_handle_ldr_update(vcpu); | ||
4951 | } | ||
4952 | |||
4310 | static struct kvm_x86_ops svm_x86_ops = { | 4953 | static struct kvm_x86_ops svm_x86_ops = { |
4311 | .cpu_has_kvm_support = has_svm, | 4954 | .cpu_has_kvm_support = has_svm, |
4312 | .disabled_by_bios = is_disabled, | 4955 | .disabled_by_bios = is_disabled, |
@@ -4322,9 +4965,14 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4322 | .vcpu_free = svm_free_vcpu, | 4965 | .vcpu_free = svm_free_vcpu, |
4323 | .vcpu_reset = svm_vcpu_reset, | 4966 | .vcpu_reset = svm_vcpu_reset, |
4324 | 4967 | ||
4968 | .vm_init = avic_vm_init, | ||
4969 | .vm_destroy = avic_vm_destroy, | ||
4970 | |||
4325 | .prepare_guest_switch = svm_prepare_guest_switch, | 4971 | .prepare_guest_switch = svm_prepare_guest_switch, |
4326 | .vcpu_load = svm_vcpu_load, | 4972 | .vcpu_load = svm_vcpu_load, |
4327 | .vcpu_put = svm_vcpu_put, | 4973 | .vcpu_put = svm_vcpu_put, |
4974 | .vcpu_blocking = svm_vcpu_blocking, | ||
4975 | .vcpu_unblocking = svm_vcpu_unblocking, | ||
4328 | 4976 | ||
4329 | .update_bp_intercept = update_bp_intercept, | 4977 | .update_bp_intercept = update_bp_intercept, |
4330 | .get_msr = svm_get_msr, | 4978 | .get_msr = svm_get_msr, |
@@ -4382,6 +5030,9 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4382 | .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl, | 5030 | .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl, |
4383 | .load_eoi_exitmap = svm_load_eoi_exitmap, | 5031 | .load_eoi_exitmap = svm_load_eoi_exitmap, |
4384 | .sync_pir_to_irr = svm_sync_pir_to_irr, | 5032 | .sync_pir_to_irr = svm_sync_pir_to_irr, |
5033 | .hwapic_irr_update = svm_hwapic_irr_update, | ||
5034 | .hwapic_isr_update = svm_hwapic_isr_update, | ||
5035 | .apicv_post_state_restore = avic_post_state_restore, | ||
4385 | 5036 | ||
4386 | .set_tss_addr = svm_set_tss_addr, | 5037 | .set_tss_addr = svm_set_tss_addr, |
4387 | .get_tdp_level = get_npt_level, | 5038 | .get_tdp_level = get_npt_level, |
@@ -4415,6 +5066,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4415 | .sched_in = svm_sched_in, | 5066 | .sched_in = svm_sched_in, |
4416 | 5067 | ||
4417 | .pmu_ops = &amd_pmu_ops, | 5068 | .pmu_ops = &amd_pmu_ops, |
5069 | .deliver_posted_interrupt = svm_deliver_avic_intr, | ||
4418 | }; | 5070 | }; |
4419 | 5071 | ||
4420 | static int __init svm_init(void) | 5072 | static int __init svm_init(void) |