diff options
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r-- | arch/x86/kvm/x86.c | 548 |
1 files changed, 472 insertions, 76 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4f7641756be2..76f54461f7cb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -46,6 +46,8 @@ | |||
46 | #include <linux/uaccess.h> | 46 | #include <linux/uaccess.h> |
47 | #include <linux/hash.h> | 47 | #include <linux/hash.h> |
48 | #include <linux/pci.h> | 48 | #include <linux/pci.h> |
49 | #include <linux/timekeeper_internal.h> | ||
50 | #include <linux/pvclock_gtod.h> | ||
49 | #include <trace/events/kvm.h> | 51 | #include <trace/events/kvm.h> |
50 | 52 | ||
51 | #define CREATE_TRACE_POINTS | 53 | #define CREATE_TRACE_POINTS |
@@ -158,7 +160,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
158 | 160 | ||
159 | u64 __read_mostly host_xcr0; | 161 | u64 __read_mostly host_xcr0; |
160 | 162 | ||
161 | int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); | 163 | static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); |
164 | |||
165 | static int kvm_vcpu_reset(struct kvm_vcpu *vcpu); | ||
162 | 166 | ||
163 | static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) | 167 | static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) |
164 | { | 168 | { |
@@ -633,7 +637,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
633 | } | 637 | } |
634 | 638 | ||
635 | if (is_long_mode(vcpu)) { | 639 | if (is_long_mode(vcpu)) { |
636 | if (kvm_read_cr4(vcpu) & X86_CR4_PCIDE) { | 640 | if (kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) { |
637 | if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS) | 641 | if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS) |
638 | return 1; | 642 | return 1; |
639 | } else | 643 | } else |
@@ -827,6 +831,7 @@ static u32 msrs_to_save[] = { | |||
827 | static unsigned num_msrs_to_save; | 831 | static unsigned num_msrs_to_save; |
828 | 832 | ||
829 | static const u32 emulated_msrs[] = { | 833 | static const u32 emulated_msrs[] = { |
834 | MSR_IA32_TSC_ADJUST, | ||
830 | MSR_IA32_TSCDEADLINE, | 835 | MSR_IA32_TSCDEADLINE, |
831 | MSR_IA32_MISC_ENABLE, | 836 | MSR_IA32_MISC_ENABLE, |
832 | MSR_IA32_MCG_STATUS, | 837 | MSR_IA32_MCG_STATUS, |
@@ -886,9 +891,9 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); | |||
886 | * Returns 0 on success, non-0 otherwise. | 891 | * Returns 0 on success, non-0 otherwise. |
887 | * Assumes vcpu_load() was already called. | 892 | * Assumes vcpu_load() was already called. |
888 | */ | 893 | */ |
889 | int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | 894 | int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) |
890 | { | 895 | { |
891 | return kvm_x86_ops->set_msr(vcpu, msr_index, data); | 896 | return kvm_x86_ops->set_msr(vcpu, msr); |
892 | } | 897 | } |
893 | 898 | ||
894 | /* | 899 | /* |
@@ -896,9 +901,63 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
896 | */ | 901 | */ |
897 | static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) | 902 | static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) |
898 | { | 903 | { |
899 | return kvm_set_msr(vcpu, index, *data); | 904 | struct msr_data msr; |
905 | |||
906 | msr.data = *data; | ||
907 | msr.index = index; | ||
908 | msr.host_initiated = true; | ||
909 | return kvm_set_msr(vcpu, &msr); | ||
900 | } | 910 | } |
901 | 911 | ||
912 | #ifdef CONFIG_X86_64 | ||
913 | struct pvclock_gtod_data { | ||
914 | seqcount_t seq; | ||
915 | |||
916 | struct { /* extract of a clocksource struct */ | ||
917 | int vclock_mode; | ||
918 | cycle_t cycle_last; | ||
919 | cycle_t mask; | ||
920 | u32 mult; | ||
921 | u32 shift; | ||
922 | } clock; | ||
923 | |||
924 | /* open coded 'struct timespec' */ | ||
925 | u64 monotonic_time_snsec; | ||
926 | time_t monotonic_time_sec; | ||
927 | }; | ||
928 | |||
929 | static struct pvclock_gtod_data pvclock_gtod_data; | ||
930 | |||
931 | static void update_pvclock_gtod(struct timekeeper *tk) | ||
932 | { | ||
933 | struct pvclock_gtod_data *vdata = &pvclock_gtod_data; | ||
934 | |||
935 | write_seqcount_begin(&vdata->seq); | ||
936 | |||
937 | /* copy pvclock gtod data */ | ||
938 | vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode; | ||
939 | vdata->clock.cycle_last = tk->clock->cycle_last; | ||
940 | vdata->clock.mask = tk->clock->mask; | ||
941 | vdata->clock.mult = tk->mult; | ||
942 | vdata->clock.shift = tk->shift; | ||
943 | |||
944 | vdata->monotonic_time_sec = tk->xtime_sec | ||
945 | + tk->wall_to_monotonic.tv_sec; | ||
946 | vdata->monotonic_time_snsec = tk->xtime_nsec | ||
947 | + (tk->wall_to_monotonic.tv_nsec | ||
948 | << tk->shift); | ||
949 | while (vdata->monotonic_time_snsec >= | ||
950 | (((u64)NSEC_PER_SEC) << tk->shift)) { | ||
951 | vdata->monotonic_time_snsec -= | ||
952 | ((u64)NSEC_PER_SEC) << tk->shift; | ||
953 | vdata->monotonic_time_sec++; | ||
954 | } | ||
955 | |||
956 | write_seqcount_end(&vdata->seq); | ||
957 | } | ||
958 | #endif | ||
959 | |||
960 | |||
902 | static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) | 961 | static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) |
903 | { | 962 | { |
904 | int version; | 963 | int version; |
@@ -995,6 +1054,10 @@ static inline u64 get_kernel_ns(void) | |||
995 | return timespec_to_ns(&ts); | 1054 | return timespec_to_ns(&ts); |
996 | } | 1055 | } |
997 | 1056 | ||
1057 | #ifdef CONFIG_X86_64 | ||
1058 | static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0); | ||
1059 | #endif | ||
1060 | |||
998 | static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); | 1061 | static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); |
999 | unsigned long max_tsc_khz; | 1062 | unsigned long max_tsc_khz; |
1000 | 1063 | ||
@@ -1046,12 +1109,47 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) | |||
1046 | return tsc; | 1109 | return tsc; |
1047 | } | 1110 | } |
1048 | 1111 | ||
1049 | void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | 1112 | void kvm_track_tsc_matching(struct kvm_vcpu *vcpu) |
1113 | { | ||
1114 | #ifdef CONFIG_X86_64 | ||
1115 | bool vcpus_matched; | ||
1116 | bool do_request = false; | ||
1117 | struct kvm_arch *ka = &vcpu->kvm->arch; | ||
1118 | struct pvclock_gtod_data *gtod = &pvclock_gtod_data; | ||
1119 | |||
1120 | vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 == | ||
1121 | atomic_read(&vcpu->kvm->online_vcpus)); | ||
1122 | |||
1123 | if (vcpus_matched && gtod->clock.vclock_mode == VCLOCK_TSC) | ||
1124 | if (!ka->use_master_clock) | ||
1125 | do_request = 1; | ||
1126 | |||
1127 | if (!vcpus_matched && ka->use_master_clock) | ||
1128 | do_request = 1; | ||
1129 | |||
1130 | if (do_request) | ||
1131 | kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); | ||
1132 | |||
1133 | trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc, | ||
1134 | atomic_read(&vcpu->kvm->online_vcpus), | ||
1135 | ka->use_master_clock, gtod->clock.vclock_mode); | ||
1136 | #endif | ||
1137 | } | ||
1138 | |||
1139 | static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset) | ||
1140 | { | ||
1141 | u64 curr_offset = kvm_x86_ops->read_tsc_offset(vcpu); | ||
1142 | vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset; | ||
1143 | } | ||
1144 | |||
1145 | void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) | ||
1050 | { | 1146 | { |
1051 | struct kvm *kvm = vcpu->kvm; | 1147 | struct kvm *kvm = vcpu->kvm; |
1052 | u64 offset, ns, elapsed; | 1148 | u64 offset, ns, elapsed; |
1053 | unsigned long flags; | 1149 | unsigned long flags; |
1054 | s64 usdiff; | 1150 | s64 usdiff; |
1151 | bool matched; | ||
1152 | u64 data = msr->data; | ||
1055 | 1153 | ||
1056 | raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); | 1154 | raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); |
1057 | offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); | 1155 | offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); |
@@ -1094,6 +1192,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | |||
1094 | offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); | 1192 | offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); |
1095 | pr_debug("kvm: adjusted tsc offset by %llu\n", delta); | 1193 | pr_debug("kvm: adjusted tsc offset by %llu\n", delta); |
1096 | } | 1194 | } |
1195 | matched = true; | ||
1097 | } else { | 1196 | } else { |
1098 | /* | 1197 | /* |
1099 | * We split periods of matched TSC writes into generations. | 1198 | * We split periods of matched TSC writes into generations. |
@@ -1108,6 +1207,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | |||
1108 | kvm->arch.cur_tsc_nsec = ns; | 1207 | kvm->arch.cur_tsc_nsec = ns; |
1109 | kvm->arch.cur_tsc_write = data; | 1208 | kvm->arch.cur_tsc_write = data; |
1110 | kvm->arch.cur_tsc_offset = offset; | 1209 | kvm->arch.cur_tsc_offset = offset; |
1210 | matched = false; | ||
1111 | pr_debug("kvm: new tsc generation %u, clock %llu\n", | 1211 | pr_debug("kvm: new tsc generation %u, clock %llu\n", |
1112 | kvm->arch.cur_tsc_generation, data); | 1212 | kvm->arch.cur_tsc_generation, data); |
1113 | } | 1213 | } |
@@ -1129,26 +1229,195 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | |||
1129 | vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec; | 1229 | vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec; |
1130 | vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write; | 1230 | vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write; |
1131 | 1231 | ||
1232 | if (guest_cpuid_has_tsc_adjust(vcpu) && !msr->host_initiated) | ||
1233 | update_ia32_tsc_adjust_msr(vcpu, offset); | ||
1132 | kvm_x86_ops->write_tsc_offset(vcpu, offset); | 1234 | kvm_x86_ops->write_tsc_offset(vcpu, offset); |
1133 | raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); | 1235 | raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); |
1236 | |||
1237 | spin_lock(&kvm->arch.pvclock_gtod_sync_lock); | ||
1238 | if (matched) | ||
1239 | kvm->arch.nr_vcpus_matched_tsc++; | ||
1240 | else | ||
1241 | kvm->arch.nr_vcpus_matched_tsc = 0; | ||
1242 | |||
1243 | kvm_track_tsc_matching(vcpu); | ||
1244 | spin_unlock(&kvm->arch.pvclock_gtod_sync_lock); | ||
1134 | } | 1245 | } |
1135 | 1246 | ||
1136 | EXPORT_SYMBOL_GPL(kvm_write_tsc); | 1247 | EXPORT_SYMBOL_GPL(kvm_write_tsc); |
1137 | 1248 | ||
1249 | #ifdef CONFIG_X86_64 | ||
1250 | |||
1251 | static cycle_t read_tsc(void) | ||
1252 | { | ||
1253 | cycle_t ret; | ||
1254 | u64 last; | ||
1255 | |||
1256 | /* | ||
1257 | * Empirically, a fence (of type that depends on the CPU) | ||
1258 | * before rdtsc is enough to ensure that rdtsc is ordered | ||
1259 | * with respect to loads. The various CPU manuals are unclear | ||
1260 | * as to whether rdtsc can be reordered with later loads, | ||
1261 | * but no one has ever seen it happen. | ||
1262 | */ | ||
1263 | rdtsc_barrier(); | ||
1264 | ret = (cycle_t)vget_cycles(); | ||
1265 | |||
1266 | last = pvclock_gtod_data.clock.cycle_last; | ||
1267 | |||
1268 | if (likely(ret >= last)) | ||
1269 | return ret; | ||
1270 | |||
1271 | /* | ||
1272 | * GCC likes to generate cmov here, but this branch is extremely | ||
1273 | * predictable (it's just a funciton of time and the likely is | ||
1274 | * very likely) and there's a data dependence, so force GCC | ||
1275 | * to generate a branch instead. I don't barrier() because | ||
1276 | * we don't actually need a barrier, and if this function | ||
1277 | * ever gets inlined it will generate worse code. | ||
1278 | */ | ||
1279 | asm volatile (""); | ||
1280 | return last; | ||
1281 | } | ||
1282 | |||
1283 | static inline u64 vgettsc(cycle_t *cycle_now) | ||
1284 | { | ||
1285 | long v; | ||
1286 | struct pvclock_gtod_data *gtod = &pvclock_gtod_data; | ||
1287 | |||
1288 | *cycle_now = read_tsc(); | ||
1289 | |||
1290 | v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask; | ||
1291 | return v * gtod->clock.mult; | ||
1292 | } | ||
1293 | |||
1294 | static int do_monotonic(struct timespec *ts, cycle_t *cycle_now) | ||
1295 | { | ||
1296 | unsigned long seq; | ||
1297 | u64 ns; | ||
1298 | int mode; | ||
1299 | struct pvclock_gtod_data *gtod = &pvclock_gtod_data; | ||
1300 | |||
1301 | ts->tv_nsec = 0; | ||
1302 | do { | ||
1303 | seq = read_seqcount_begin(>od->seq); | ||
1304 | mode = gtod->clock.vclock_mode; | ||
1305 | ts->tv_sec = gtod->monotonic_time_sec; | ||
1306 | ns = gtod->monotonic_time_snsec; | ||
1307 | ns += vgettsc(cycle_now); | ||
1308 | ns >>= gtod->clock.shift; | ||
1309 | } while (unlikely(read_seqcount_retry(>od->seq, seq))); | ||
1310 | timespec_add_ns(ts, ns); | ||
1311 | |||
1312 | return mode; | ||
1313 | } | ||
1314 | |||
1315 | /* returns true if host is using tsc clocksource */ | ||
1316 | static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now) | ||
1317 | { | ||
1318 | struct timespec ts; | ||
1319 | |||
1320 | /* checked again under seqlock below */ | ||
1321 | if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC) | ||
1322 | return false; | ||
1323 | |||
1324 | if (do_monotonic(&ts, cycle_now) != VCLOCK_TSC) | ||
1325 | return false; | ||
1326 | |||
1327 | monotonic_to_bootbased(&ts); | ||
1328 | *kernel_ns = timespec_to_ns(&ts); | ||
1329 | |||
1330 | return true; | ||
1331 | } | ||
1332 | #endif | ||
1333 | |||
1334 | /* | ||
1335 | * | ||
1336 | * Assuming a stable TSC across physical CPUS, and a stable TSC | ||
1337 | * across virtual CPUs, the following condition is possible. | ||
1338 | * Each numbered line represents an event visible to both | ||
1339 | * CPUs at the next numbered event. | ||
1340 | * | ||
1341 | * "timespecX" represents host monotonic time. "tscX" represents | ||
1342 | * RDTSC value. | ||
1343 | * | ||
1344 | * VCPU0 on CPU0 | VCPU1 on CPU1 | ||
1345 | * | ||
1346 | * 1. read timespec0,tsc0 | ||
1347 | * 2. | timespec1 = timespec0 + N | ||
1348 | * | tsc1 = tsc0 + M | ||
1349 | * 3. transition to guest | transition to guest | ||
1350 | * 4. ret0 = timespec0 + (rdtsc - tsc0) | | ||
1351 | * 5. | ret1 = timespec1 + (rdtsc - tsc1) | ||
1352 | * | ret1 = timespec0 + N + (rdtsc - (tsc0 + M)) | ||
1353 | * | ||
1354 | * Since ret0 update is visible to VCPU1 at time 5, to obey monotonicity: | ||
1355 | * | ||
1356 | * - ret0 < ret1 | ||
1357 | * - timespec0 + (rdtsc - tsc0) < timespec0 + N + (rdtsc - (tsc0 + M)) | ||
1358 | * ... | ||
1359 | * - 0 < N - M => M < N | ||
1360 | * | ||
1361 | * That is, when timespec0 != timespec1, M < N. Unfortunately that is not | ||
1362 | * always the case (the difference between two distinct xtime instances | ||
1363 | * might be smaller then the difference between corresponding TSC reads, | ||
1364 | * when updating guest vcpus pvclock areas). | ||
1365 | * | ||
1366 | * To avoid that problem, do not allow visibility of distinct | ||
1367 | * system_timestamp/tsc_timestamp values simultaneously: use a master | ||
1368 | * copy of host monotonic time values. Update that master copy | ||
1369 | * in lockstep. | ||
1370 | * | ||
1371 | * Rely on synchronization of host TSCs and guest TSCs for monotonicity. | ||
1372 | * | ||
1373 | */ | ||
1374 | |||
1375 | static void pvclock_update_vm_gtod_copy(struct kvm *kvm) | ||
1376 | { | ||
1377 | #ifdef CONFIG_X86_64 | ||
1378 | struct kvm_arch *ka = &kvm->arch; | ||
1379 | int vclock_mode; | ||
1380 | bool host_tsc_clocksource, vcpus_matched; | ||
1381 | |||
1382 | vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 == | ||
1383 | atomic_read(&kvm->online_vcpus)); | ||
1384 | |||
1385 | /* | ||
1386 | * If the host uses TSC clock, then passthrough TSC as stable | ||
1387 | * to the guest. | ||
1388 | */ | ||
1389 | host_tsc_clocksource = kvm_get_time_and_clockread( | ||
1390 | &ka->master_kernel_ns, | ||
1391 | &ka->master_cycle_now); | ||
1392 | |||
1393 | ka->use_master_clock = host_tsc_clocksource & vcpus_matched; | ||
1394 | |||
1395 | if (ka->use_master_clock) | ||
1396 | atomic_set(&kvm_guest_has_master_clock, 1); | ||
1397 | |||
1398 | vclock_mode = pvclock_gtod_data.clock.vclock_mode; | ||
1399 | trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode, | ||
1400 | vcpus_matched); | ||
1401 | #endif | ||
1402 | } | ||
1403 | |||
1138 | static int kvm_guest_time_update(struct kvm_vcpu *v) | 1404 | static int kvm_guest_time_update(struct kvm_vcpu *v) |
1139 | { | 1405 | { |
1140 | unsigned long flags; | 1406 | unsigned long flags, this_tsc_khz; |
1141 | struct kvm_vcpu_arch *vcpu = &v->arch; | 1407 | struct kvm_vcpu_arch *vcpu = &v->arch; |
1408 | struct kvm_arch *ka = &v->kvm->arch; | ||
1142 | void *shared_kaddr; | 1409 | void *shared_kaddr; |
1143 | unsigned long this_tsc_khz; | ||
1144 | s64 kernel_ns, max_kernel_ns; | 1410 | s64 kernel_ns, max_kernel_ns; |
1145 | u64 tsc_timestamp; | 1411 | u64 tsc_timestamp, host_tsc; |
1412 | struct pvclock_vcpu_time_info *guest_hv_clock; | ||
1146 | u8 pvclock_flags; | 1413 | u8 pvclock_flags; |
1414 | bool use_master_clock; | ||
1415 | |||
1416 | kernel_ns = 0; | ||
1417 | host_tsc = 0; | ||
1147 | 1418 | ||
1148 | /* Keep irq disabled to prevent changes to the clock */ | 1419 | /* Keep irq disabled to prevent changes to the clock */ |
1149 | local_irq_save(flags); | 1420 | local_irq_save(flags); |
1150 | tsc_timestamp = kvm_x86_ops->read_l1_tsc(v); | ||
1151 | kernel_ns = get_kernel_ns(); | ||
1152 | this_tsc_khz = __get_cpu_var(cpu_tsc_khz); | 1421 | this_tsc_khz = __get_cpu_var(cpu_tsc_khz); |
1153 | if (unlikely(this_tsc_khz == 0)) { | 1422 | if (unlikely(this_tsc_khz == 0)) { |
1154 | local_irq_restore(flags); | 1423 | local_irq_restore(flags); |
@@ -1157,6 +1426,24 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1157 | } | 1426 | } |
1158 | 1427 | ||
1159 | /* | 1428 | /* |
1429 | * If the host uses TSC clock, then passthrough TSC as stable | ||
1430 | * to the guest. | ||
1431 | */ | ||
1432 | spin_lock(&ka->pvclock_gtod_sync_lock); | ||
1433 | use_master_clock = ka->use_master_clock; | ||
1434 | if (use_master_clock) { | ||
1435 | host_tsc = ka->master_cycle_now; | ||
1436 | kernel_ns = ka->master_kernel_ns; | ||
1437 | } | ||
1438 | spin_unlock(&ka->pvclock_gtod_sync_lock); | ||
1439 | if (!use_master_clock) { | ||
1440 | host_tsc = native_read_tsc(); | ||
1441 | kernel_ns = get_kernel_ns(); | ||
1442 | } | ||
1443 | |||
1444 | tsc_timestamp = kvm_x86_ops->read_l1_tsc(v, host_tsc); | ||
1445 | |||
1446 | /* | ||
1160 | * We may have to catch up the TSC to match elapsed wall clock | 1447 | * We may have to catch up the TSC to match elapsed wall clock |
1161 | * time for two reasons, even if kvmclock is used. | 1448 | * time for two reasons, even if kvmclock is used. |
1162 | * 1) CPU could have been running below the maximum TSC rate | 1449 | * 1) CPU could have been running below the maximum TSC rate |
@@ -1217,23 +1504,20 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1217 | vcpu->hw_tsc_khz = this_tsc_khz; | 1504 | vcpu->hw_tsc_khz = this_tsc_khz; |
1218 | } | 1505 | } |
1219 | 1506 | ||
1220 | if (max_kernel_ns > kernel_ns) | 1507 | /* with a master <monotonic time, tsc value> tuple, |
1221 | kernel_ns = max_kernel_ns; | 1508 | * pvclock clock reads always increase at the (scaled) rate |
1222 | 1509 | * of guest TSC - no need to deal with sampling errors. | |
1510 | */ | ||
1511 | if (!use_master_clock) { | ||
1512 | if (max_kernel_ns > kernel_ns) | ||
1513 | kernel_ns = max_kernel_ns; | ||
1514 | } | ||
1223 | /* With all the info we got, fill in the values */ | 1515 | /* With all the info we got, fill in the values */ |
1224 | vcpu->hv_clock.tsc_timestamp = tsc_timestamp; | 1516 | vcpu->hv_clock.tsc_timestamp = tsc_timestamp; |
1225 | vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; | 1517 | vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; |
1226 | vcpu->last_kernel_ns = kernel_ns; | 1518 | vcpu->last_kernel_ns = kernel_ns; |
1227 | vcpu->last_guest_tsc = tsc_timestamp; | 1519 | vcpu->last_guest_tsc = tsc_timestamp; |
1228 | 1520 | ||
1229 | pvclock_flags = 0; | ||
1230 | if (vcpu->pvclock_set_guest_stopped_request) { | ||
1231 | pvclock_flags |= PVCLOCK_GUEST_STOPPED; | ||
1232 | vcpu->pvclock_set_guest_stopped_request = false; | ||
1233 | } | ||
1234 | |||
1235 | vcpu->hv_clock.flags = pvclock_flags; | ||
1236 | |||
1237 | /* | 1521 | /* |
1238 | * The interface expects us to write an even number signaling that the | 1522 | * The interface expects us to write an even number signaling that the |
1239 | * update is finished. Since the guest won't see the intermediate | 1523 | * update is finished. Since the guest won't see the intermediate |
@@ -1243,6 +1527,22 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1243 | 1527 | ||
1244 | shared_kaddr = kmap_atomic(vcpu->time_page); | 1528 | shared_kaddr = kmap_atomic(vcpu->time_page); |
1245 | 1529 | ||
1530 | guest_hv_clock = shared_kaddr + vcpu->time_offset; | ||
1531 | |||
1532 | /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */ | ||
1533 | pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED); | ||
1534 | |||
1535 | if (vcpu->pvclock_set_guest_stopped_request) { | ||
1536 | pvclock_flags |= PVCLOCK_GUEST_STOPPED; | ||
1537 | vcpu->pvclock_set_guest_stopped_request = false; | ||
1538 | } | ||
1539 | |||
1540 | /* If the host uses TSC clocksource, then it is stable */ | ||
1541 | if (use_master_clock) | ||
1542 | pvclock_flags |= PVCLOCK_TSC_STABLE_BIT; | ||
1543 | |||
1544 | vcpu->hv_clock.flags = pvclock_flags; | ||
1545 | |||
1246 | memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, | 1546 | memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, |
1247 | sizeof(vcpu->hv_clock)); | 1547 | sizeof(vcpu->hv_clock)); |
1248 | 1548 | ||
@@ -1572,9 +1872,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu) | |||
1572 | &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); | 1872 | &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); |
1573 | } | 1873 | } |
1574 | 1874 | ||
1575 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 1875 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
1576 | { | 1876 | { |
1577 | bool pr = false; | 1877 | bool pr = false; |
1878 | u32 msr = msr_info->index; | ||
1879 | u64 data = msr_info->data; | ||
1578 | 1880 | ||
1579 | switch (msr) { | 1881 | switch (msr) { |
1580 | case MSR_EFER: | 1882 | case MSR_EFER: |
@@ -1625,6 +1927,15 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1625 | case MSR_IA32_TSCDEADLINE: | 1927 | case MSR_IA32_TSCDEADLINE: |
1626 | kvm_set_lapic_tscdeadline_msr(vcpu, data); | 1928 | kvm_set_lapic_tscdeadline_msr(vcpu, data); |
1627 | break; | 1929 | break; |
1930 | case MSR_IA32_TSC_ADJUST: | ||
1931 | if (guest_cpuid_has_tsc_adjust(vcpu)) { | ||
1932 | if (!msr_info->host_initiated) { | ||
1933 | u64 adj = data - vcpu->arch.ia32_tsc_adjust_msr; | ||
1934 | kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true); | ||
1935 | } | ||
1936 | vcpu->arch.ia32_tsc_adjust_msr = data; | ||
1937 | } | ||
1938 | break; | ||
1628 | case MSR_IA32_MISC_ENABLE: | 1939 | case MSR_IA32_MISC_ENABLE: |
1629 | vcpu->arch.ia32_misc_enable_msr = data; | 1940 | vcpu->arch.ia32_misc_enable_msr = data; |
1630 | break; | 1941 | break; |
@@ -1984,6 +2295,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1984 | case MSR_IA32_TSCDEADLINE: | 2295 | case MSR_IA32_TSCDEADLINE: |
1985 | data = kvm_get_lapic_tscdeadline_msr(vcpu); | 2296 | data = kvm_get_lapic_tscdeadline_msr(vcpu); |
1986 | break; | 2297 | break; |
2298 | case MSR_IA32_TSC_ADJUST: | ||
2299 | data = (u64)vcpu->arch.ia32_tsc_adjust_msr; | ||
2300 | break; | ||
1987 | case MSR_IA32_MISC_ENABLE: | 2301 | case MSR_IA32_MISC_ENABLE: |
1988 | data = vcpu->arch.ia32_misc_enable_msr; | 2302 | data = vcpu->arch.ia32_misc_enable_msr; |
1989 | break; | 2303 | break; |
@@ -2342,7 +2656,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
2342 | kvm_x86_ops->write_tsc_offset(vcpu, offset); | 2656 | kvm_x86_ops->write_tsc_offset(vcpu, offset); |
2343 | vcpu->arch.tsc_catchup = 1; | 2657 | vcpu->arch.tsc_catchup = 1; |
2344 | } | 2658 | } |
2345 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | 2659 | /* |
2660 | * On a host with synchronized TSC, there is no need to update | ||
2661 | * kvmclock on vcpu->cpu migration | ||
2662 | */ | ||
2663 | if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1) | ||
2664 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | ||
2346 | if (vcpu->cpu != cpu) | 2665 | if (vcpu->cpu != cpu) |
2347 | kvm_migrate_timers(vcpu); | 2666 | kvm_migrate_timers(vcpu); |
2348 | vcpu->cpu = cpu; | 2667 | vcpu->cpu = cpu; |
@@ -2691,15 +3010,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
2691 | if (!vcpu->arch.apic) | 3010 | if (!vcpu->arch.apic) |
2692 | goto out; | 3011 | goto out; |
2693 | u.lapic = memdup_user(argp, sizeof(*u.lapic)); | 3012 | u.lapic = memdup_user(argp, sizeof(*u.lapic)); |
2694 | if (IS_ERR(u.lapic)) { | 3013 | if (IS_ERR(u.lapic)) |
2695 | r = PTR_ERR(u.lapic); | 3014 | return PTR_ERR(u.lapic); |
2696 | goto out; | ||
2697 | } | ||
2698 | 3015 | ||
2699 | r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic); | 3016 | r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic); |
2700 | if (r) | ||
2701 | goto out; | ||
2702 | r = 0; | ||
2703 | break; | 3017 | break; |
2704 | } | 3018 | } |
2705 | case KVM_INTERRUPT: { | 3019 | case KVM_INTERRUPT: { |
@@ -2709,16 +3023,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
2709 | if (copy_from_user(&irq, argp, sizeof irq)) | 3023 | if (copy_from_user(&irq, argp, sizeof irq)) |
2710 | goto out; | 3024 | goto out; |
2711 | r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); | 3025 | r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); |
2712 | if (r) | ||
2713 | goto out; | ||
2714 | r = 0; | ||
2715 | break; | 3026 | break; |
2716 | } | 3027 | } |
2717 | case KVM_NMI: { | 3028 | case KVM_NMI: { |
2718 | r = kvm_vcpu_ioctl_nmi(vcpu); | 3029 | r = kvm_vcpu_ioctl_nmi(vcpu); |
2719 | if (r) | ||
2720 | goto out; | ||
2721 | r = 0; | ||
2722 | break; | 3030 | break; |
2723 | } | 3031 | } |
2724 | case KVM_SET_CPUID: { | 3032 | case KVM_SET_CPUID: { |
@@ -2729,8 +3037,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
2729 | if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) | 3037 | if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) |
2730 | goto out; | 3038 | goto out; |
2731 | r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries); | 3039 | r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries); |
2732 | if (r) | ||
2733 | goto out; | ||
2734 | break; | 3040 | break; |
2735 | } | 3041 | } |
2736 | case KVM_SET_CPUID2: { | 3042 | case KVM_SET_CPUID2: { |
@@ -2742,8 +3048,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
2742 | goto out; | 3048 | goto out; |
2743 | r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid, | 3049 | r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid, |
2744 | cpuid_arg->entries); | 3050 | cpuid_arg->entries); |
2745 | if (r) | ||
2746 | goto out; | ||
2747 | break; | 3051 | break; |
2748 | } | 3052 | } |
2749 | case KVM_GET_CPUID2: { | 3053 | case KVM_GET_CPUID2: { |
@@ -2875,10 +3179,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
2875 | } | 3179 | } |
2876 | case KVM_SET_XSAVE: { | 3180 | case KVM_SET_XSAVE: { |
2877 | u.xsave = memdup_user(argp, sizeof(*u.xsave)); | 3181 | u.xsave = memdup_user(argp, sizeof(*u.xsave)); |
2878 | if (IS_ERR(u.xsave)) { | 3182 | if (IS_ERR(u.xsave)) |
2879 | r = PTR_ERR(u.xsave); | 3183 | return PTR_ERR(u.xsave); |
2880 | goto out; | ||
2881 | } | ||
2882 | 3184 | ||
2883 | r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave); | 3185 | r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave); |
2884 | break; | 3186 | break; |
@@ -2900,10 +3202,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
2900 | } | 3202 | } |
2901 | case KVM_SET_XCRS: { | 3203 | case KVM_SET_XCRS: { |
2902 | u.xcrs = memdup_user(argp, sizeof(*u.xcrs)); | 3204 | u.xcrs = memdup_user(argp, sizeof(*u.xcrs)); |
2903 | if (IS_ERR(u.xcrs)) { | 3205 | if (IS_ERR(u.xcrs)) |
2904 | r = PTR_ERR(u.xcrs); | 3206 | return PTR_ERR(u.xcrs); |
2905 | goto out; | ||
2906 | } | ||
2907 | 3207 | ||
2908 | r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); | 3208 | r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); |
2909 | break; | 3209 | break; |
@@ -2951,7 +3251,7 @@ static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr) | |||
2951 | int ret; | 3251 | int ret; |
2952 | 3252 | ||
2953 | if (addr > (unsigned int)(-3 * PAGE_SIZE)) | 3253 | if (addr > (unsigned int)(-3 * PAGE_SIZE)) |
2954 | return -1; | 3254 | return -EINVAL; |
2955 | ret = kvm_x86_ops->set_tss_addr(kvm, addr); | 3255 | ret = kvm_x86_ops->set_tss_addr(kvm, addr); |
2956 | return ret; | 3256 | return ret; |
2957 | } | 3257 | } |
@@ -3212,8 +3512,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3212 | switch (ioctl) { | 3512 | switch (ioctl) { |
3213 | case KVM_SET_TSS_ADDR: | 3513 | case KVM_SET_TSS_ADDR: |
3214 | r = kvm_vm_ioctl_set_tss_addr(kvm, arg); | 3514 | r = kvm_vm_ioctl_set_tss_addr(kvm, arg); |
3215 | if (r < 0) | ||
3216 | goto out; | ||
3217 | break; | 3515 | break; |
3218 | case KVM_SET_IDENTITY_MAP_ADDR: { | 3516 | case KVM_SET_IDENTITY_MAP_ADDR: { |
3219 | u64 ident_addr; | 3517 | u64 ident_addr; |
@@ -3222,14 +3520,10 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3222 | if (copy_from_user(&ident_addr, argp, sizeof ident_addr)) | 3520 | if (copy_from_user(&ident_addr, argp, sizeof ident_addr)) |
3223 | goto out; | 3521 | goto out; |
3224 | r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr); | 3522 | r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr); |
3225 | if (r < 0) | ||
3226 | goto out; | ||
3227 | break; | 3523 | break; |
3228 | } | 3524 | } |
3229 | case KVM_SET_NR_MMU_PAGES: | 3525 | case KVM_SET_NR_MMU_PAGES: |
3230 | r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg); | 3526 | r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg); |
3231 | if (r) | ||
3232 | goto out; | ||
3233 | break; | 3527 | break; |
3234 | case KVM_GET_NR_MMU_PAGES: | 3528 | case KVM_GET_NR_MMU_PAGES: |
3235 | r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); | 3529 | r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); |
@@ -3320,8 +3614,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3320 | r = 0; | 3614 | r = 0; |
3321 | get_irqchip_out: | 3615 | get_irqchip_out: |
3322 | kfree(chip); | 3616 | kfree(chip); |
3323 | if (r) | ||
3324 | goto out; | ||
3325 | break; | 3617 | break; |
3326 | } | 3618 | } |
3327 | case KVM_SET_IRQCHIP: { | 3619 | case KVM_SET_IRQCHIP: { |
@@ -3343,8 +3635,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3343 | r = 0; | 3635 | r = 0; |
3344 | set_irqchip_out: | 3636 | set_irqchip_out: |
3345 | kfree(chip); | 3637 | kfree(chip); |
3346 | if (r) | ||
3347 | goto out; | ||
3348 | break; | 3638 | break; |
3349 | } | 3639 | } |
3350 | case KVM_GET_PIT: { | 3640 | case KVM_GET_PIT: { |
@@ -3371,9 +3661,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3371 | if (!kvm->arch.vpit) | 3661 | if (!kvm->arch.vpit) |
3372 | goto out; | 3662 | goto out; |
3373 | r = kvm_vm_ioctl_set_pit(kvm, &u.ps); | 3663 | r = kvm_vm_ioctl_set_pit(kvm, &u.ps); |
3374 | if (r) | ||
3375 | goto out; | ||
3376 | r = 0; | ||
3377 | break; | 3664 | break; |
3378 | } | 3665 | } |
3379 | case KVM_GET_PIT2: { | 3666 | case KVM_GET_PIT2: { |
@@ -3397,9 +3684,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3397 | if (!kvm->arch.vpit) | 3684 | if (!kvm->arch.vpit) |
3398 | goto out; | 3685 | goto out; |
3399 | r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2); | 3686 | r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2); |
3400 | if (r) | ||
3401 | goto out; | ||
3402 | r = 0; | ||
3403 | break; | 3687 | break; |
3404 | } | 3688 | } |
3405 | case KVM_REINJECT_CONTROL: { | 3689 | case KVM_REINJECT_CONTROL: { |
@@ -3408,9 +3692,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3408 | if (copy_from_user(&control, argp, sizeof(control))) | 3692 | if (copy_from_user(&control, argp, sizeof(control))) |
3409 | goto out; | 3693 | goto out; |
3410 | r = kvm_vm_ioctl_reinject(kvm, &control); | 3694 | r = kvm_vm_ioctl_reinject(kvm, &control); |
3411 | if (r) | ||
3412 | goto out; | ||
3413 | r = 0; | ||
3414 | break; | 3695 | break; |
3415 | } | 3696 | } |
3416 | case KVM_XEN_HVM_CONFIG: { | 3697 | case KVM_XEN_HVM_CONFIG: { |
@@ -4273,7 +4554,12 @@ static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, | |||
4273 | static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, | 4554 | static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, |
4274 | u32 msr_index, u64 data) | 4555 | u32 msr_index, u64 data) |
4275 | { | 4556 | { |
4276 | return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data); | 4557 | struct msr_data msr; |
4558 | |||
4559 | msr.data = data; | ||
4560 | msr.index = msr_index; | ||
4561 | msr.host_initiated = false; | ||
4562 | return kvm_set_msr(emul_to_vcpu(ctxt), &msr); | ||
4277 | } | 4563 | } |
4278 | 4564 | ||
4279 | static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt, | 4565 | static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt, |
@@ -4495,7 +4781,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) | |||
4495 | * instruction -> ... | 4781 | * instruction -> ... |
4496 | */ | 4782 | */ |
4497 | pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); | 4783 | pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); |
4498 | if (!is_error_pfn(pfn)) { | 4784 | if (!is_error_noslot_pfn(pfn)) { |
4499 | kvm_release_pfn_clean(pfn); | 4785 | kvm_release_pfn_clean(pfn); |
4500 | return true; | 4786 | return true; |
4501 | } | 4787 | } |
@@ -4881,6 +5167,50 @@ static void kvm_set_mmio_spte_mask(void) | |||
4881 | kvm_mmu_set_mmio_spte_mask(mask); | 5167 | kvm_mmu_set_mmio_spte_mask(mask); |
4882 | } | 5168 | } |
4883 | 5169 | ||
5170 | #ifdef CONFIG_X86_64 | ||
5171 | static void pvclock_gtod_update_fn(struct work_struct *work) | ||
5172 | { | ||
5173 | struct kvm *kvm; | ||
5174 | |||
5175 | struct kvm_vcpu *vcpu; | ||
5176 | int i; | ||
5177 | |||
5178 | raw_spin_lock(&kvm_lock); | ||
5179 | list_for_each_entry(kvm, &vm_list, vm_list) | ||
5180 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
5181 | set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests); | ||
5182 | atomic_set(&kvm_guest_has_master_clock, 0); | ||
5183 | raw_spin_unlock(&kvm_lock); | ||
5184 | } | ||
5185 | |||
5186 | static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); | ||
5187 | |||
5188 | /* | ||
5189 | * Notification about pvclock gtod data update. | ||
5190 | */ | ||
5191 | static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused, | ||
5192 | void *priv) | ||
5193 | { | ||
5194 | struct pvclock_gtod_data *gtod = &pvclock_gtod_data; | ||
5195 | struct timekeeper *tk = priv; | ||
5196 | |||
5197 | update_pvclock_gtod(tk); | ||
5198 | |||
5199 | /* disable master clock if host does not trust, or does not | ||
5200 | * use, TSC clocksource | ||
5201 | */ | ||
5202 | if (gtod->clock.vclock_mode != VCLOCK_TSC && | ||
5203 | atomic_read(&kvm_guest_has_master_clock) != 0) | ||
5204 | queue_work(system_long_wq, &pvclock_gtod_work); | ||
5205 | |||
5206 | return 0; | ||
5207 | } | ||
5208 | |||
5209 | static struct notifier_block pvclock_gtod_notifier = { | ||
5210 | .notifier_call = pvclock_gtod_notify, | ||
5211 | }; | ||
5212 | #endif | ||
5213 | |||
4884 | int kvm_arch_init(void *opaque) | 5214 | int kvm_arch_init(void *opaque) |
4885 | { | 5215 | { |
4886 | int r; | 5216 | int r; |
@@ -4922,6 +5252,10 @@ int kvm_arch_init(void *opaque) | |||
4922 | host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | 5252 | host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); |
4923 | 5253 | ||
4924 | kvm_lapic_init(); | 5254 | kvm_lapic_init(); |
5255 | #ifdef CONFIG_X86_64 | ||
5256 | pvclock_gtod_register_notifier(&pvclock_gtod_notifier); | ||
5257 | #endif | ||
5258 | |||
4925 | return 0; | 5259 | return 0; |
4926 | 5260 | ||
4927 | out: | 5261 | out: |
@@ -4936,6 +5270,9 @@ void kvm_arch_exit(void) | |||
4936 | cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, | 5270 | cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, |
4937 | CPUFREQ_TRANSITION_NOTIFIER); | 5271 | CPUFREQ_TRANSITION_NOTIFIER); |
4938 | unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block); | 5272 | unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block); |
5273 | #ifdef CONFIG_X86_64 | ||
5274 | pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier); | ||
5275 | #endif | ||
4939 | kvm_x86_ops = NULL; | 5276 | kvm_x86_ops = NULL; |
4940 | kvm_mmu_module_exit(); | 5277 | kvm_mmu_module_exit(); |
4941 | } | 5278 | } |
@@ -5059,7 +5396,7 @@ out: | |||
5059 | } | 5396 | } |
5060 | EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); | 5397 | EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); |
5061 | 5398 | ||
5062 | int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) | 5399 | static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) |
5063 | { | 5400 | { |
5064 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | 5401 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); |
5065 | char instruction[3]; | 5402 | char instruction[3]; |
@@ -5235,6 +5572,29 @@ static void process_nmi(struct kvm_vcpu *vcpu) | |||
5235 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 5572 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
5236 | } | 5573 | } |
5237 | 5574 | ||
5575 | static void kvm_gen_update_masterclock(struct kvm *kvm) | ||
5576 | { | ||
5577 | #ifdef CONFIG_X86_64 | ||
5578 | int i; | ||
5579 | struct kvm_vcpu *vcpu; | ||
5580 | struct kvm_arch *ka = &kvm->arch; | ||
5581 | |||
5582 | spin_lock(&ka->pvclock_gtod_sync_lock); | ||
5583 | kvm_make_mclock_inprogress_request(kvm); | ||
5584 | /* no guest entries from this point */ | ||
5585 | pvclock_update_vm_gtod_copy(kvm); | ||
5586 | |||
5587 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
5588 | set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); | ||
5589 | |||
5590 | /* guest entries allowed */ | ||
5591 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
5592 | clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests); | ||
5593 | |||
5594 | spin_unlock(&ka->pvclock_gtod_sync_lock); | ||
5595 | #endif | ||
5596 | } | ||
5597 | |||
5238 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | 5598 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
5239 | { | 5599 | { |
5240 | int r; | 5600 | int r; |
@@ -5247,6 +5607,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5247 | kvm_mmu_unload(vcpu); | 5607 | kvm_mmu_unload(vcpu); |
5248 | if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) | 5608 | if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) |
5249 | __kvm_migrate_timers(vcpu); | 5609 | __kvm_migrate_timers(vcpu); |
5610 | if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu)) | ||
5611 | kvm_gen_update_masterclock(vcpu->kvm); | ||
5250 | if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) { | 5612 | if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) { |
5251 | r = kvm_guest_time_update(vcpu); | 5613 | r = kvm_guest_time_update(vcpu); |
5252 | if (unlikely(r)) | 5614 | if (unlikely(r)) |
@@ -5362,7 +5724,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5362 | if (hw_breakpoint_active()) | 5724 | if (hw_breakpoint_active()) |
5363 | hw_breakpoint_restore(); | 5725 | hw_breakpoint_restore(); |
5364 | 5726 | ||
5365 | vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); | 5727 | vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, |
5728 | native_read_tsc()); | ||
5366 | 5729 | ||
5367 | vcpu->mode = OUTSIDE_GUEST_MODE; | 5730 | vcpu->mode = OUTSIDE_GUEST_MODE; |
5368 | smp_wmb(); | 5731 | smp_wmb(); |
@@ -5419,7 +5782,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
5419 | pr_debug("vcpu %d received sipi with vector # %x\n", | 5782 | pr_debug("vcpu %d received sipi with vector # %x\n", |
5420 | vcpu->vcpu_id, vcpu->arch.sipi_vector); | 5783 | vcpu->vcpu_id, vcpu->arch.sipi_vector); |
5421 | kvm_lapic_reset(vcpu); | 5784 | kvm_lapic_reset(vcpu); |
5422 | r = kvm_arch_vcpu_reset(vcpu); | 5785 | r = kvm_vcpu_reset(vcpu); |
5423 | if (r) | 5786 | if (r) |
5424 | return r; | 5787 | return r; |
5425 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 5788 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
@@ -6047,7 +6410,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
6047 | r = vcpu_load(vcpu); | 6410 | r = vcpu_load(vcpu); |
6048 | if (r) | 6411 | if (r) |
6049 | return r; | 6412 | return r; |
6050 | r = kvm_arch_vcpu_reset(vcpu); | 6413 | r = kvm_vcpu_reset(vcpu); |
6051 | if (r == 0) | 6414 | if (r == 0) |
6052 | r = kvm_mmu_setup(vcpu); | 6415 | r = kvm_mmu_setup(vcpu); |
6053 | vcpu_put(vcpu); | 6416 | vcpu_put(vcpu); |
@@ -6055,6 +6418,23 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
6055 | return r; | 6418 | return r; |
6056 | } | 6419 | } |
6057 | 6420 | ||
6421 | int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) | ||
6422 | { | ||
6423 | int r; | ||
6424 | struct msr_data msr; | ||
6425 | |||
6426 | r = vcpu_load(vcpu); | ||
6427 | if (r) | ||
6428 | return r; | ||
6429 | msr.data = 0x0; | ||
6430 | msr.index = MSR_IA32_TSC; | ||
6431 | msr.host_initiated = true; | ||
6432 | kvm_write_tsc(vcpu, &msr); | ||
6433 | vcpu_put(vcpu); | ||
6434 | |||
6435 | return r; | ||
6436 | } | ||
6437 | |||
6058 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | 6438 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) |
6059 | { | 6439 | { |
6060 | int r; | 6440 | int r; |
@@ -6069,7 +6449,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | |||
6069 | kvm_x86_ops->vcpu_free(vcpu); | 6449 | kvm_x86_ops->vcpu_free(vcpu); |
6070 | } | 6450 | } |
6071 | 6451 | ||
6072 | int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) | 6452 | static int kvm_vcpu_reset(struct kvm_vcpu *vcpu) |
6073 | { | 6453 | { |
6074 | atomic_set(&vcpu->arch.nmi_queued, 0); | 6454 | atomic_set(&vcpu->arch.nmi_queued, 0); |
6075 | vcpu->arch.nmi_pending = 0; | 6455 | vcpu->arch.nmi_pending = 0; |
@@ -6092,6 +6472,10 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) | |||
6092 | 6472 | ||
6093 | kvm_pmu_reset(vcpu); | 6473 | kvm_pmu_reset(vcpu); |
6094 | 6474 | ||
6475 | memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs)); | ||
6476 | vcpu->arch.regs_avail = ~0; | ||
6477 | vcpu->arch.regs_dirty = ~0; | ||
6478 | |||
6095 | return kvm_x86_ops->vcpu_reset(vcpu); | 6479 | return kvm_x86_ops->vcpu_reset(vcpu); |
6096 | } | 6480 | } |
6097 | 6481 | ||
@@ -6168,6 +6552,8 @@ int kvm_arch_hardware_enable(void *garbage) | |||
6168 | kvm_for_each_vcpu(i, vcpu, kvm) { | 6552 | kvm_for_each_vcpu(i, vcpu, kvm) { |
6169 | vcpu->arch.tsc_offset_adjustment += delta_cyc; | 6553 | vcpu->arch.tsc_offset_adjustment += delta_cyc; |
6170 | vcpu->arch.last_host_tsc = local_tsc; | 6554 | vcpu->arch.last_host_tsc = local_tsc; |
6555 | set_bit(KVM_REQ_MASTERCLOCK_UPDATE, | ||
6556 | &vcpu->requests); | ||
6171 | } | 6557 | } |
6172 | 6558 | ||
6173 | /* | 6559 | /* |
@@ -6258,10 +6644,17 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
6258 | if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) | 6644 | if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) |
6259 | goto fail_free_mce_banks; | 6645 | goto fail_free_mce_banks; |
6260 | 6646 | ||
6647 | r = fx_init(vcpu); | ||
6648 | if (r) | ||
6649 | goto fail_free_wbinvd_dirty_mask; | ||
6650 | |||
6651 | vcpu->arch.ia32_tsc_adjust_msr = 0x0; | ||
6261 | kvm_async_pf_hash_reset(vcpu); | 6652 | kvm_async_pf_hash_reset(vcpu); |
6262 | kvm_pmu_init(vcpu); | 6653 | kvm_pmu_init(vcpu); |
6263 | 6654 | ||
6264 | return 0; | 6655 | return 0; |
6656 | fail_free_wbinvd_dirty_mask: | ||
6657 | free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); | ||
6265 | fail_free_mce_banks: | 6658 | fail_free_mce_banks: |
6266 | kfree(vcpu->arch.mce_banks); | 6659 | kfree(vcpu->arch.mce_banks); |
6267 | fail_free_lapic: | 6660 | fail_free_lapic: |
@@ -6305,6 +6698,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
6305 | 6698 | ||
6306 | raw_spin_lock_init(&kvm->arch.tsc_write_lock); | 6699 | raw_spin_lock_init(&kvm->arch.tsc_write_lock); |
6307 | mutex_init(&kvm->arch.apic_map_lock); | 6700 | mutex_init(&kvm->arch.apic_map_lock); |
6701 | spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); | ||
6702 | |||
6703 | pvclock_update_vm_gtod_copy(kvm); | ||
6308 | 6704 | ||
6309 | return 0; | 6705 | return 0; |
6310 | } | 6706 | } |