aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c548
1 files changed, 472 insertions, 76 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4f7641756be2..76f54461f7cb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -46,6 +46,8 @@
46#include <linux/uaccess.h> 46#include <linux/uaccess.h>
47#include <linux/hash.h> 47#include <linux/hash.h>
48#include <linux/pci.h> 48#include <linux/pci.h>
49#include <linux/timekeeper_internal.h>
50#include <linux/pvclock_gtod.h>
49#include <trace/events/kvm.h> 51#include <trace/events/kvm.h>
50 52
51#define CREATE_TRACE_POINTS 53#define CREATE_TRACE_POINTS
@@ -158,7 +160,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
158 160
159u64 __read_mostly host_xcr0; 161u64 __read_mostly host_xcr0;
160 162
161int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); 163static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
164
165static int kvm_vcpu_reset(struct kvm_vcpu *vcpu);
162 166
163static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) 167static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
164{ 168{
@@ -633,7 +637,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
633 } 637 }
634 638
635 if (is_long_mode(vcpu)) { 639 if (is_long_mode(vcpu)) {
636 if (kvm_read_cr4(vcpu) & X86_CR4_PCIDE) { 640 if (kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) {
637 if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS) 641 if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS)
638 return 1; 642 return 1;
639 } else 643 } else
@@ -827,6 +831,7 @@ static u32 msrs_to_save[] = {
827static unsigned num_msrs_to_save; 831static unsigned num_msrs_to_save;
828 832
829static const u32 emulated_msrs[] = { 833static const u32 emulated_msrs[] = {
834 MSR_IA32_TSC_ADJUST,
830 MSR_IA32_TSCDEADLINE, 835 MSR_IA32_TSCDEADLINE,
831 MSR_IA32_MISC_ENABLE, 836 MSR_IA32_MISC_ENABLE,
832 MSR_IA32_MCG_STATUS, 837 MSR_IA32_MCG_STATUS,
@@ -886,9 +891,9 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
886 * Returns 0 on success, non-0 otherwise. 891 * Returns 0 on success, non-0 otherwise.
887 * Assumes vcpu_load() was already called. 892 * Assumes vcpu_load() was already called.
888 */ 893 */
889int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) 894int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
890{ 895{
891 return kvm_x86_ops->set_msr(vcpu, msr_index, data); 896 return kvm_x86_ops->set_msr(vcpu, msr);
892} 897}
893 898
894/* 899/*
@@ -896,9 +901,63 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
896 */ 901 */
897static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) 902static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
898{ 903{
899 return kvm_set_msr(vcpu, index, *data); 904 struct msr_data msr;
905
906 msr.data = *data;
907 msr.index = index;
908 msr.host_initiated = true;
909 return kvm_set_msr(vcpu, &msr);
900} 910}
901 911
912#ifdef CONFIG_X86_64
913struct pvclock_gtod_data {
914 seqcount_t seq;
915
916 struct { /* extract of a clocksource struct */
917 int vclock_mode;
918 cycle_t cycle_last;
919 cycle_t mask;
920 u32 mult;
921 u32 shift;
922 } clock;
923
924 /* open coded 'struct timespec' */
925 u64 monotonic_time_snsec;
926 time_t monotonic_time_sec;
927};
928
929static struct pvclock_gtod_data pvclock_gtod_data;
930
931static void update_pvclock_gtod(struct timekeeper *tk)
932{
933 struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
934
935 write_seqcount_begin(&vdata->seq);
936
937 /* copy pvclock gtod data */
938 vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode;
939 vdata->clock.cycle_last = tk->clock->cycle_last;
940 vdata->clock.mask = tk->clock->mask;
941 vdata->clock.mult = tk->mult;
942 vdata->clock.shift = tk->shift;
943
944 vdata->monotonic_time_sec = tk->xtime_sec
945 + tk->wall_to_monotonic.tv_sec;
946 vdata->monotonic_time_snsec = tk->xtime_nsec
947 + (tk->wall_to_monotonic.tv_nsec
948 << tk->shift);
949 while (vdata->monotonic_time_snsec >=
950 (((u64)NSEC_PER_SEC) << tk->shift)) {
951 vdata->monotonic_time_snsec -=
952 ((u64)NSEC_PER_SEC) << tk->shift;
953 vdata->monotonic_time_sec++;
954 }
955
956 write_seqcount_end(&vdata->seq);
957}
958#endif
959
960
902static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) 961static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
903{ 962{
904 int version; 963 int version;
@@ -995,6 +1054,10 @@ static inline u64 get_kernel_ns(void)
995 return timespec_to_ns(&ts); 1054 return timespec_to_ns(&ts);
996} 1055}
997 1056
1057#ifdef CONFIG_X86_64
1058static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
1059#endif
1060
998static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); 1061static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
999unsigned long max_tsc_khz; 1062unsigned long max_tsc_khz;
1000 1063
@@ -1046,12 +1109,47 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
1046 return tsc; 1109 return tsc;
1047} 1110}
1048 1111
1049void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) 1112void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
1113{
1114#ifdef CONFIG_X86_64
1115 bool vcpus_matched;
1116 bool do_request = false;
1117 struct kvm_arch *ka = &vcpu->kvm->arch;
1118 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1119
1120 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
1121 atomic_read(&vcpu->kvm->online_vcpus));
1122
1123 if (vcpus_matched && gtod->clock.vclock_mode == VCLOCK_TSC)
1124 if (!ka->use_master_clock)
1125 do_request = 1;
1126
1127 if (!vcpus_matched && ka->use_master_clock)
1128 do_request = 1;
1129
1130 if (do_request)
1131 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
1132
1133 trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
1134 atomic_read(&vcpu->kvm->online_vcpus),
1135 ka->use_master_clock, gtod->clock.vclock_mode);
1136#endif
1137}
1138
1139static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
1140{
1141 u64 curr_offset = kvm_x86_ops->read_tsc_offset(vcpu);
1142 vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
1143}
1144
1145void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1050{ 1146{
1051 struct kvm *kvm = vcpu->kvm; 1147 struct kvm *kvm = vcpu->kvm;
1052 u64 offset, ns, elapsed; 1148 u64 offset, ns, elapsed;
1053 unsigned long flags; 1149 unsigned long flags;
1054 s64 usdiff; 1150 s64 usdiff;
1151 bool matched;
1152 u64 data = msr->data;
1055 1153
1056 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); 1154 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
1057 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); 1155 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
@@ -1094,6 +1192,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
1094 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); 1192 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
1095 pr_debug("kvm: adjusted tsc offset by %llu\n", delta); 1193 pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
1096 } 1194 }
1195 matched = true;
1097 } else { 1196 } else {
1098 /* 1197 /*
1099 * We split periods of matched TSC writes into generations. 1198 * We split periods of matched TSC writes into generations.
@@ -1108,6 +1207,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
1108 kvm->arch.cur_tsc_nsec = ns; 1207 kvm->arch.cur_tsc_nsec = ns;
1109 kvm->arch.cur_tsc_write = data; 1208 kvm->arch.cur_tsc_write = data;
1110 kvm->arch.cur_tsc_offset = offset; 1209 kvm->arch.cur_tsc_offset = offset;
1210 matched = false;
1111 pr_debug("kvm: new tsc generation %u, clock %llu\n", 1211 pr_debug("kvm: new tsc generation %u, clock %llu\n",
1112 kvm->arch.cur_tsc_generation, data); 1212 kvm->arch.cur_tsc_generation, data);
1113 } 1213 }
@@ -1129,26 +1229,195 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
1129 vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec; 1229 vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
1130 vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write; 1230 vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
1131 1231
1232 if (guest_cpuid_has_tsc_adjust(vcpu) && !msr->host_initiated)
1233 update_ia32_tsc_adjust_msr(vcpu, offset);
1132 kvm_x86_ops->write_tsc_offset(vcpu, offset); 1234 kvm_x86_ops->write_tsc_offset(vcpu, offset);
1133 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); 1235 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
1236
1237 spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
1238 if (matched)
1239 kvm->arch.nr_vcpus_matched_tsc++;
1240 else
1241 kvm->arch.nr_vcpus_matched_tsc = 0;
1242
1243 kvm_track_tsc_matching(vcpu);
1244 spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
1134} 1245}
1135 1246
1136EXPORT_SYMBOL_GPL(kvm_write_tsc); 1247EXPORT_SYMBOL_GPL(kvm_write_tsc);
1137 1248
1249#ifdef CONFIG_X86_64
1250
1251static cycle_t read_tsc(void)
1252{
1253 cycle_t ret;
1254 u64 last;
1255
1256 /*
1257 * Empirically, a fence (of type that depends on the CPU)
1258 * before rdtsc is enough to ensure that rdtsc is ordered
1259 * with respect to loads. The various CPU manuals are unclear
1260 * as to whether rdtsc can be reordered with later loads,
1261 * but no one has ever seen it happen.
1262 */
1263 rdtsc_barrier();
1264 ret = (cycle_t)vget_cycles();
1265
1266 last = pvclock_gtod_data.clock.cycle_last;
1267
1268 if (likely(ret >= last))
1269 return ret;
1270
1271 /*
1272 * GCC likes to generate cmov here, but this branch is extremely
1273 * predictable (it's just a funciton of time and the likely is
1274 * very likely) and there's a data dependence, so force GCC
1275 * to generate a branch instead. I don't barrier() because
1276 * we don't actually need a barrier, and if this function
1277 * ever gets inlined it will generate worse code.
1278 */
1279 asm volatile ("");
1280 return last;
1281}
1282
1283static inline u64 vgettsc(cycle_t *cycle_now)
1284{
1285 long v;
1286 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1287
1288 *cycle_now = read_tsc();
1289
1290 v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask;
1291 return v * gtod->clock.mult;
1292}
1293
1294static int do_monotonic(struct timespec *ts, cycle_t *cycle_now)
1295{
1296 unsigned long seq;
1297 u64 ns;
1298 int mode;
1299 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1300
1301 ts->tv_nsec = 0;
1302 do {
1303 seq = read_seqcount_begin(&gtod->seq);
1304 mode = gtod->clock.vclock_mode;
1305 ts->tv_sec = gtod->monotonic_time_sec;
1306 ns = gtod->monotonic_time_snsec;
1307 ns += vgettsc(cycle_now);
1308 ns >>= gtod->clock.shift;
1309 } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
1310 timespec_add_ns(ts, ns);
1311
1312 return mode;
1313}
1314
1315/* returns true if host is using tsc clocksource */
1316static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
1317{
1318 struct timespec ts;
1319
1320 /* checked again under seqlock below */
1321 if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
1322 return false;
1323
1324 if (do_monotonic(&ts, cycle_now) != VCLOCK_TSC)
1325 return false;
1326
1327 monotonic_to_bootbased(&ts);
1328 *kernel_ns = timespec_to_ns(&ts);
1329
1330 return true;
1331}
1332#endif
1333
1334/*
1335 *
1336 * Assuming a stable TSC across physical CPUS, and a stable TSC
1337 * across virtual CPUs, the following condition is possible.
1338 * Each numbered line represents an event visible to both
1339 * CPUs at the next numbered event.
1340 *
1341 * "timespecX" represents host monotonic time. "tscX" represents
1342 * RDTSC value.
1343 *
1344 * VCPU0 on CPU0 | VCPU1 on CPU1
1345 *
1346 * 1. read timespec0,tsc0
1347 * 2. | timespec1 = timespec0 + N
1348 * | tsc1 = tsc0 + M
1349 * 3. transition to guest | transition to guest
1350 * 4. ret0 = timespec0 + (rdtsc - tsc0) |
1351 * 5. | ret1 = timespec1 + (rdtsc - tsc1)
1352 * | ret1 = timespec0 + N + (rdtsc - (tsc0 + M))
1353 *
1354 * Since ret0 update is visible to VCPU1 at time 5, to obey monotonicity:
1355 *
1356 * - ret0 < ret1
1357 * - timespec0 + (rdtsc - tsc0) < timespec0 + N + (rdtsc - (tsc0 + M))
1358 * ...
1359 * - 0 < N - M => M < N
1360 *
1361 * That is, when timespec0 != timespec1, M < N. Unfortunately that is not
1362 * always the case (the difference between two distinct xtime instances
1363 * might be smaller then the difference between corresponding TSC reads,
1364 * when updating guest vcpus pvclock areas).
1365 *
1366 * To avoid that problem, do not allow visibility of distinct
1367 * system_timestamp/tsc_timestamp values simultaneously: use a master
1368 * copy of host monotonic time values. Update that master copy
1369 * in lockstep.
1370 *
1371 * Rely on synchronization of host TSCs and guest TSCs for monotonicity.
1372 *
1373 */
1374
1375static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
1376{
1377#ifdef CONFIG_X86_64
1378 struct kvm_arch *ka = &kvm->arch;
1379 int vclock_mode;
1380 bool host_tsc_clocksource, vcpus_matched;
1381
1382 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
1383 atomic_read(&kvm->online_vcpus));
1384
1385 /*
1386 * If the host uses TSC clock, then passthrough TSC as stable
1387 * to the guest.
1388 */
1389 host_tsc_clocksource = kvm_get_time_and_clockread(
1390 &ka->master_kernel_ns,
1391 &ka->master_cycle_now);
1392
1393 ka->use_master_clock = host_tsc_clocksource & vcpus_matched;
1394
1395 if (ka->use_master_clock)
1396 atomic_set(&kvm_guest_has_master_clock, 1);
1397
1398 vclock_mode = pvclock_gtod_data.clock.vclock_mode;
1399 trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
1400 vcpus_matched);
1401#endif
1402}
1403
1138static int kvm_guest_time_update(struct kvm_vcpu *v) 1404static int kvm_guest_time_update(struct kvm_vcpu *v)
1139{ 1405{
1140 unsigned long flags; 1406 unsigned long flags, this_tsc_khz;
1141 struct kvm_vcpu_arch *vcpu = &v->arch; 1407 struct kvm_vcpu_arch *vcpu = &v->arch;
1408 struct kvm_arch *ka = &v->kvm->arch;
1142 void *shared_kaddr; 1409 void *shared_kaddr;
1143 unsigned long this_tsc_khz;
1144 s64 kernel_ns, max_kernel_ns; 1410 s64 kernel_ns, max_kernel_ns;
1145 u64 tsc_timestamp; 1411 u64 tsc_timestamp, host_tsc;
1412 struct pvclock_vcpu_time_info *guest_hv_clock;
1146 u8 pvclock_flags; 1413 u8 pvclock_flags;
1414 bool use_master_clock;
1415
1416 kernel_ns = 0;
1417 host_tsc = 0;
1147 1418
1148 /* Keep irq disabled to prevent changes to the clock */ 1419 /* Keep irq disabled to prevent changes to the clock */
1149 local_irq_save(flags); 1420 local_irq_save(flags);
1150 tsc_timestamp = kvm_x86_ops->read_l1_tsc(v);
1151 kernel_ns = get_kernel_ns();
1152 this_tsc_khz = __get_cpu_var(cpu_tsc_khz); 1421 this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
1153 if (unlikely(this_tsc_khz == 0)) { 1422 if (unlikely(this_tsc_khz == 0)) {
1154 local_irq_restore(flags); 1423 local_irq_restore(flags);
@@ -1157,6 +1426,24 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1157 } 1426 }
1158 1427
1159 /* 1428 /*
1429 * If the host uses TSC clock, then passthrough TSC as stable
1430 * to the guest.
1431 */
1432 spin_lock(&ka->pvclock_gtod_sync_lock);
1433 use_master_clock = ka->use_master_clock;
1434 if (use_master_clock) {
1435 host_tsc = ka->master_cycle_now;
1436 kernel_ns = ka->master_kernel_ns;
1437 }
1438 spin_unlock(&ka->pvclock_gtod_sync_lock);
1439 if (!use_master_clock) {
1440 host_tsc = native_read_tsc();
1441 kernel_ns = get_kernel_ns();
1442 }
1443
1444 tsc_timestamp = kvm_x86_ops->read_l1_tsc(v, host_tsc);
1445
1446 /*
1160 * We may have to catch up the TSC to match elapsed wall clock 1447 * We may have to catch up the TSC to match elapsed wall clock
1161 * time for two reasons, even if kvmclock is used. 1448 * time for two reasons, even if kvmclock is used.
1162 * 1) CPU could have been running below the maximum TSC rate 1449 * 1) CPU could have been running below the maximum TSC rate
@@ -1217,23 +1504,20 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1217 vcpu->hw_tsc_khz = this_tsc_khz; 1504 vcpu->hw_tsc_khz = this_tsc_khz;
1218 } 1505 }
1219 1506
1220 if (max_kernel_ns > kernel_ns) 1507 /* with a master <monotonic time, tsc value> tuple,
1221 kernel_ns = max_kernel_ns; 1508 * pvclock clock reads always increase at the (scaled) rate
1222 1509 * of guest TSC - no need to deal with sampling errors.
1510 */
1511 if (!use_master_clock) {
1512 if (max_kernel_ns > kernel_ns)
1513 kernel_ns = max_kernel_ns;
1514 }
1223 /* With all the info we got, fill in the values */ 1515 /* With all the info we got, fill in the values */
1224 vcpu->hv_clock.tsc_timestamp = tsc_timestamp; 1516 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
1225 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; 1517 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
1226 vcpu->last_kernel_ns = kernel_ns; 1518 vcpu->last_kernel_ns = kernel_ns;
1227 vcpu->last_guest_tsc = tsc_timestamp; 1519 vcpu->last_guest_tsc = tsc_timestamp;
1228 1520
1229 pvclock_flags = 0;
1230 if (vcpu->pvclock_set_guest_stopped_request) {
1231 pvclock_flags |= PVCLOCK_GUEST_STOPPED;
1232 vcpu->pvclock_set_guest_stopped_request = false;
1233 }
1234
1235 vcpu->hv_clock.flags = pvclock_flags;
1236
1237 /* 1521 /*
1238 * The interface expects us to write an even number signaling that the 1522 * The interface expects us to write an even number signaling that the
1239 * update is finished. Since the guest won't see the intermediate 1523 * update is finished. Since the guest won't see the intermediate
@@ -1243,6 +1527,22 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1243 1527
1244 shared_kaddr = kmap_atomic(vcpu->time_page); 1528 shared_kaddr = kmap_atomic(vcpu->time_page);
1245 1529
1530 guest_hv_clock = shared_kaddr + vcpu->time_offset;
1531
1532 /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
1533 pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
1534
1535 if (vcpu->pvclock_set_guest_stopped_request) {
1536 pvclock_flags |= PVCLOCK_GUEST_STOPPED;
1537 vcpu->pvclock_set_guest_stopped_request = false;
1538 }
1539
1540 /* If the host uses TSC clocksource, then it is stable */
1541 if (use_master_clock)
1542 pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
1543
1544 vcpu->hv_clock.flags = pvclock_flags;
1545
1246 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, 1546 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
1247 sizeof(vcpu->hv_clock)); 1547 sizeof(vcpu->hv_clock));
1248 1548
@@ -1572,9 +1872,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
1572 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); 1872 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
1573} 1873}
1574 1874
1575int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1875int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1576{ 1876{
1577 bool pr = false; 1877 bool pr = false;
1878 u32 msr = msr_info->index;
1879 u64 data = msr_info->data;
1578 1880
1579 switch (msr) { 1881 switch (msr) {
1580 case MSR_EFER: 1882 case MSR_EFER:
@@ -1625,6 +1927,15 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1625 case MSR_IA32_TSCDEADLINE: 1927 case MSR_IA32_TSCDEADLINE:
1626 kvm_set_lapic_tscdeadline_msr(vcpu, data); 1928 kvm_set_lapic_tscdeadline_msr(vcpu, data);
1627 break; 1929 break;
1930 case MSR_IA32_TSC_ADJUST:
1931 if (guest_cpuid_has_tsc_adjust(vcpu)) {
1932 if (!msr_info->host_initiated) {
1933 u64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
1934 kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true);
1935 }
1936 vcpu->arch.ia32_tsc_adjust_msr = data;
1937 }
1938 break;
1628 case MSR_IA32_MISC_ENABLE: 1939 case MSR_IA32_MISC_ENABLE:
1629 vcpu->arch.ia32_misc_enable_msr = data; 1940 vcpu->arch.ia32_misc_enable_msr = data;
1630 break; 1941 break;
@@ -1984,6 +2295,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1984 case MSR_IA32_TSCDEADLINE: 2295 case MSR_IA32_TSCDEADLINE:
1985 data = kvm_get_lapic_tscdeadline_msr(vcpu); 2296 data = kvm_get_lapic_tscdeadline_msr(vcpu);
1986 break; 2297 break;
2298 case MSR_IA32_TSC_ADJUST:
2299 data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
2300 break;
1987 case MSR_IA32_MISC_ENABLE: 2301 case MSR_IA32_MISC_ENABLE:
1988 data = vcpu->arch.ia32_misc_enable_msr; 2302 data = vcpu->arch.ia32_misc_enable_msr;
1989 break; 2303 break;
@@ -2342,7 +2656,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2342 kvm_x86_ops->write_tsc_offset(vcpu, offset); 2656 kvm_x86_ops->write_tsc_offset(vcpu, offset);
2343 vcpu->arch.tsc_catchup = 1; 2657 vcpu->arch.tsc_catchup = 1;
2344 } 2658 }
2345 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); 2659 /*
2660 * On a host with synchronized TSC, there is no need to update
2661 * kvmclock on vcpu->cpu migration
2662 */
2663 if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
2664 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2346 if (vcpu->cpu != cpu) 2665 if (vcpu->cpu != cpu)
2347 kvm_migrate_timers(vcpu); 2666 kvm_migrate_timers(vcpu);
2348 vcpu->cpu = cpu; 2667 vcpu->cpu = cpu;
@@ -2691,15 +3010,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
2691 if (!vcpu->arch.apic) 3010 if (!vcpu->arch.apic)
2692 goto out; 3011 goto out;
2693 u.lapic = memdup_user(argp, sizeof(*u.lapic)); 3012 u.lapic = memdup_user(argp, sizeof(*u.lapic));
2694 if (IS_ERR(u.lapic)) { 3013 if (IS_ERR(u.lapic))
2695 r = PTR_ERR(u.lapic); 3014 return PTR_ERR(u.lapic);
2696 goto out;
2697 }
2698 3015
2699 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic); 3016 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
2700 if (r)
2701 goto out;
2702 r = 0;
2703 break; 3017 break;
2704 } 3018 }
2705 case KVM_INTERRUPT: { 3019 case KVM_INTERRUPT: {
@@ -2709,16 +3023,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
2709 if (copy_from_user(&irq, argp, sizeof irq)) 3023 if (copy_from_user(&irq, argp, sizeof irq))
2710 goto out; 3024 goto out;
2711 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); 3025 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
2712 if (r)
2713 goto out;
2714 r = 0;
2715 break; 3026 break;
2716 } 3027 }
2717 case KVM_NMI: { 3028 case KVM_NMI: {
2718 r = kvm_vcpu_ioctl_nmi(vcpu); 3029 r = kvm_vcpu_ioctl_nmi(vcpu);
2719 if (r)
2720 goto out;
2721 r = 0;
2722 break; 3030 break;
2723 } 3031 }
2724 case KVM_SET_CPUID: { 3032 case KVM_SET_CPUID: {
@@ -2729,8 +3037,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
2729 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) 3037 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2730 goto out; 3038 goto out;
2731 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries); 3039 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
2732 if (r)
2733 goto out;
2734 break; 3040 break;
2735 } 3041 }
2736 case KVM_SET_CPUID2: { 3042 case KVM_SET_CPUID2: {
@@ -2742,8 +3048,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
2742 goto out; 3048 goto out;
2743 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid, 3049 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
2744 cpuid_arg->entries); 3050 cpuid_arg->entries);
2745 if (r)
2746 goto out;
2747 break; 3051 break;
2748 } 3052 }
2749 case KVM_GET_CPUID2: { 3053 case KVM_GET_CPUID2: {
@@ -2875,10 +3179,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
2875 } 3179 }
2876 case KVM_SET_XSAVE: { 3180 case KVM_SET_XSAVE: {
2877 u.xsave = memdup_user(argp, sizeof(*u.xsave)); 3181 u.xsave = memdup_user(argp, sizeof(*u.xsave));
2878 if (IS_ERR(u.xsave)) { 3182 if (IS_ERR(u.xsave))
2879 r = PTR_ERR(u.xsave); 3183 return PTR_ERR(u.xsave);
2880 goto out;
2881 }
2882 3184
2883 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave); 3185 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
2884 break; 3186 break;
@@ -2900,10 +3202,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
2900 } 3202 }
2901 case KVM_SET_XCRS: { 3203 case KVM_SET_XCRS: {
2902 u.xcrs = memdup_user(argp, sizeof(*u.xcrs)); 3204 u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
2903 if (IS_ERR(u.xcrs)) { 3205 if (IS_ERR(u.xcrs))
2904 r = PTR_ERR(u.xcrs); 3206 return PTR_ERR(u.xcrs);
2905 goto out;
2906 }
2907 3207
2908 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); 3208 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
2909 break; 3209 break;
@@ -2951,7 +3251,7 @@ static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
2951 int ret; 3251 int ret;
2952 3252
2953 if (addr > (unsigned int)(-3 * PAGE_SIZE)) 3253 if (addr > (unsigned int)(-3 * PAGE_SIZE))
2954 return -1; 3254 return -EINVAL;
2955 ret = kvm_x86_ops->set_tss_addr(kvm, addr); 3255 ret = kvm_x86_ops->set_tss_addr(kvm, addr);
2956 return ret; 3256 return ret;
2957} 3257}
@@ -3212,8 +3512,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
3212 switch (ioctl) { 3512 switch (ioctl) {
3213 case KVM_SET_TSS_ADDR: 3513 case KVM_SET_TSS_ADDR:
3214 r = kvm_vm_ioctl_set_tss_addr(kvm, arg); 3514 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
3215 if (r < 0)
3216 goto out;
3217 break; 3515 break;
3218 case KVM_SET_IDENTITY_MAP_ADDR: { 3516 case KVM_SET_IDENTITY_MAP_ADDR: {
3219 u64 ident_addr; 3517 u64 ident_addr;
@@ -3222,14 +3520,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
3222 if (copy_from_user(&ident_addr, argp, sizeof ident_addr)) 3520 if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
3223 goto out; 3521 goto out;
3224 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr); 3522 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
3225 if (r < 0)
3226 goto out;
3227 break; 3523 break;
3228 } 3524 }
3229 case KVM_SET_NR_MMU_PAGES: 3525 case KVM_SET_NR_MMU_PAGES:
3230 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg); 3526 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
3231 if (r)
3232 goto out;
3233 break; 3527 break;
3234 case KVM_GET_NR_MMU_PAGES: 3528 case KVM_GET_NR_MMU_PAGES:
3235 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); 3529 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
@@ -3320,8 +3614,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
3320 r = 0; 3614 r = 0;
3321 get_irqchip_out: 3615 get_irqchip_out:
3322 kfree(chip); 3616 kfree(chip);
3323 if (r)
3324 goto out;
3325 break; 3617 break;
3326 } 3618 }
3327 case KVM_SET_IRQCHIP: { 3619 case KVM_SET_IRQCHIP: {
@@ -3343,8 +3635,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
3343 r = 0; 3635 r = 0;
3344 set_irqchip_out: 3636 set_irqchip_out:
3345 kfree(chip); 3637 kfree(chip);
3346 if (r)
3347 goto out;
3348 break; 3638 break;
3349 } 3639 }
3350 case KVM_GET_PIT: { 3640 case KVM_GET_PIT: {
@@ -3371,9 +3661,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
3371 if (!kvm->arch.vpit) 3661 if (!kvm->arch.vpit)
3372 goto out; 3662 goto out;
3373 r = kvm_vm_ioctl_set_pit(kvm, &u.ps); 3663 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
3374 if (r)
3375 goto out;
3376 r = 0;
3377 break; 3664 break;
3378 } 3665 }
3379 case KVM_GET_PIT2: { 3666 case KVM_GET_PIT2: {
@@ -3397,9 +3684,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
3397 if (!kvm->arch.vpit) 3684 if (!kvm->arch.vpit)
3398 goto out; 3685 goto out;
3399 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2); 3686 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
3400 if (r)
3401 goto out;
3402 r = 0;
3403 break; 3687 break;
3404 } 3688 }
3405 case KVM_REINJECT_CONTROL: { 3689 case KVM_REINJECT_CONTROL: {
@@ -3408,9 +3692,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
3408 if (copy_from_user(&control, argp, sizeof(control))) 3692 if (copy_from_user(&control, argp, sizeof(control)))
3409 goto out; 3693 goto out;
3410 r = kvm_vm_ioctl_reinject(kvm, &control); 3694 r = kvm_vm_ioctl_reinject(kvm, &control);
3411 if (r)
3412 goto out;
3413 r = 0;
3414 break; 3695 break;
3415 } 3696 }
3416 case KVM_XEN_HVM_CONFIG: { 3697 case KVM_XEN_HVM_CONFIG: {
@@ -4273,7 +4554,12 @@ static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
4273static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, 4554static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
4274 u32 msr_index, u64 data) 4555 u32 msr_index, u64 data)
4275{ 4556{
4276 return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data); 4557 struct msr_data msr;
4558
4559 msr.data = data;
4560 msr.index = msr_index;
4561 msr.host_initiated = false;
4562 return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
4277} 4563}
4278 4564
4279static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt, 4565static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
@@ -4495,7 +4781,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
4495 * instruction -> ... 4781 * instruction -> ...
4496 */ 4782 */
4497 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); 4783 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
4498 if (!is_error_pfn(pfn)) { 4784 if (!is_error_noslot_pfn(pfn)) {
4499 kvm_release_pfn_clean(pfn); 4785 kvm_release_pfn_clean(pfn);
4500 return true; 4786 return true;
4501 } 4787 }
@@ -4881,6 +5167,50 @@ static void kvm_set_mmio_spte_mask(void)
4881 kvm_mmu_set_mmio_spte_mask(mask); 5167 kvm_mmu_set_mmio_spte_mask(mask);
4882} 5168}
4883 5169
5170#ifdef CONFIG_X86_64
5171static void pvclock_gtod_update_fn(struct work_struct *work)
5172{
5173 struct kvm *kvm;
5174
5175 struct kvm_vcpu *vcpu;
5176 int i;
5177
5178 raw_spin_lock(&kvm_lock);
5179 list_for_each_entry(kvm, &vm_list, vm_list)
5180 kvm_for_each_vcpu(i, vcpu, kvm)
5181 set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
5182 atomic_set(&kvm_guest_has_master_clock, 0);
5183 raw_spin_unlock(&kvm_lock);
5184}
5185
5186static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
5187
5188/*
5189 * Notification about pvclock gtod data update.
5190 */
5191static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
5192 void *priv)
5193{
5194 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
5195 struct timekeeper *tk = priv;
5196
5197 update_pvclock_gtod(tk);
5198
5199 /* disable master clock if host does not trust, or does not
5200 * use, TSC clocksource
5201 */
5202 if (gtod->clock.vclock_mode != VCLOCK_TSC &&
5203 atomic_read(&kvm_guest_has_master_clock) != 0)
5204 queue_work(system_long_wq, &pvclock_gtod_work);
5205
5206 return 0;
5207}
5208
5209static struct notifier_block pvclock_gtod_notifier = {
5210 .notifier_call = pvclock_gtod_notify,
5211};
5212#endif
5213
4884int kvm_arch_init(void *opaque) 5214int kvm_arch_init(void *opaque)
4885{ 5215{
4886 int r; 5216 int r;
@@ -4922,6 +5252,10 @@ int kvm_arch_init(void *opaque)
4922 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); 5252 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
4923 5253
4924 kvm_lapic_init(); 5254 kvm_lapic_init();
5255#ifdef CONFIG_X86_64
5256 pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
5257#endif
5258
4925 return 0; 5259 return 0;
4926 5260
4927out: 5261out:
@@ -4936,6 +5270,9 @@ void kvm_arch_exit(void)
4936 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, 5270 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
4937 CPUFREQ_TRANSITION_NOTIFIER); 5271 CPUFREQ_TRANSITION_NOTIFIER);
4938 unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block); 5272 unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
5273#ifdef CONFIG_X86_64
5274 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
5275#endif
4939 kvm_x86_ops = NULL; 5276 kvm_x86_ops = NULL;
4940 kvm_mmu_module_exit(); 5277 kvm_mmu_module_exit();
4941} 5278}
@@ -5059,7 +5396,7 @@ out:
5059} 5396}
5060EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); 5397EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
5061 5398
5062int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) 5399static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
5063{ 5400{
5064 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); 5401 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5065 char instruction[3]; 5402 char instruction[3];
@@ -5235,6 +5572,29 @@ static void process_nmi(struct kvm_vcpu *vcpu)
5235 kvm_make_request(KVM_REQ_EVENT, vcpu); 5572 kvm_make_request(KVM_REQ_EVENT, vcpu);
5236} 5573}
5237 5574
5575static void kvm_gen_update_masterclock(struct kvm *kvm)
5576{
5577#ifdef CONFIG_X86_64
5578 int i;
5579 struct kvm_vcpu *vcpu;
5580 struct kvm_arch *ka = &kvm->arch;
5581
5582 spin_lock(&ka->pvclock_gtod_sync_lock);
5583 kvm_make_mclock_inprogress_request(kvm);
5584 /* no guest entries from this point */
5585 pvclock_update_vm_gtod_copy(kvm);
5586
5587 kvm_for_each_vcpu(i, vcpu, kvm)
5588 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
5589
5590 /* guest entries allowed */
5591 kvm_for_each_vcpu(i, vcpu, kvm)
5592 clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests);
5593
5594 spin_unlock(&ka->pvclock_gtod_sync_lock);
5595#endif
5596}
5597
5238static int vcpu_enter_guest(struct kvm_vcpu *vcpu) 5598static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5239{ 5599{
5240 int r; 5600 int r;
@@ -5247,6 +5607,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5247 kvm_mmu_unload(vcpu); 5607 kvm_mmu_unload(vcpu);
5248 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) 5608 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
5249 __kvm_migrate_timers(vcpu); 5609 __kvm_migrate_timers(vcpu);
5610 if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
5611 kvm_gen_update_masterclock(vcpu->kvm);
5250 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) { 5612 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
5251 r = kvm_guest_time_update(vcpu); 5613 r = kvm_guest_time_update(vcpu);
5252 if (unlikely(r)) 5614 if (unlikely(r))
@@ -5362,7 +5724,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5362 if (hw_breakpoint_active()) 5724 if (hw_breakpoint_active())
5363 hw_breakpoint_restore(); 5725 hw_breakpoint_restore();
5364 5726
5365 vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); 5727 vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu,
5728 native_read_tsc());
5366 5729
5367 vcpu->mode = OUTSIDE_GUEST_MODE; 5730 vcpu->mode = OUTSIDE_GUEST_MODE;
5368 smp_wmb(); 5731 smp_wmb();
@@ -5419,7 +5782,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
5419 pr_debug("vcpu %d received sipi with vector # %x\n", 5782 pr_debug("vcpu %d received sipi with vector # %x\n",
5420 vcpu->vcpu_id, vcpu->arch.sipi_vector); 5783 vcpu->vcpu_id, vcpu->arch.sipi_vector);
5421 kvm_lapic_reset(vcpu); 5784 kvm_lapic_reset(vcpu);
5422 r = kvm_arch_vcpu_reset(vcpu); 5785 r = kvm_vcpu_reset(vcpu);
5423 if (r) 5786 if (r)
5424 return r; 5787 return r;
5425 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 5788 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -6047,7 +6410,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
6047 r = vcpu_load(vcpu); 6410 r = vcpu_load(vcpu);
6048 if (r) 6411 if (r)
6049 return r; 6412 return r;
6050 r = kvm_arch_vcpu_reset(vcpu); 6413 r = kvm_vcpu_reset(vcpu);
6051 if (r == 0) 6414 if (r == 0)
6052 r = kvm_mmu_setup(vcpu); 6415 r = kvm_mmu_setup(vcpu);
6053 vcpu_put(vcpu); 6416 vcpu_put(vcpu);
@@ -6055,6 +6418,23 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
6055 return r; 6418 return r;
6056} 6419}
6057 6420
6421int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
6422{
6423 int r;
6424 struct msr_data msr;
6425
6426 r = vcpu_load(vcpu);
6427 if (r)
6428 return r;
6429 msr.data = 0x0;
6430 msr.index = MSR_IA32_TSC;
6431 msr.host_initiated = true;
6432 kvm_write_tsc(vcpu, &msr);
6433 vcpu_put(vcpu);
6434
6435 return r;
6436}
6437
6058void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 6438void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
6059{ 6439{
6060 int r; 6440 int r;
@@ -6069,7 +6449,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
6069 kvm_x86_ops->vcpu_free(vcpu); 6449 kvm_x86_ops->vcpu_free(vcpu);
6070} 6450}
6071 6451
6072int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) 6452static int kvm_vcpu_reset(struct kvm_vcpu *vcpu)
6073{ 6453{
6074 atomic_set(&vcpu->arch.nmi_queued, 0); 6454 atomic_set(&vcpu->arch.nmi_queued, 0);
6075 vcpu->arch.nmi_pending = 0; 6455 vcpu->arch.nmi_pending = 0;
@@ -6092,6 +6472,10 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
6092 6472
6093 kvm_pmu_reset(vcpu); 6473 kvm_pmu_reset(vcpu);
6094 6474
6475 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
6476 vcpu->arch.regs_avail = ~0;
6477 vcpu->arch.regs_dirty = ~0;
6478
6095 return kvm_x86_ops->vcpu_reset(vcpu); 6479 return kvm_x86_ops->vcpu_reset(vcpu);
6096} 6480}
6097 6481
@@ -6168,6 +6552,8 @@ int kvm_arch_hardware_enable(void *garbage)
6168 kvm_for_each_vcpu(i, vcpu, kvm) { 6552 kvm_for_each_vcpu(i, vcpu, kvm) {
6169 vcpu->arch.tsc_offset_adjustment += delta_cyc; 6553 vcpu->arch.tsc_offset_adjustment += delta_cyc;
6170 vcpu->arch.last_host_tsc = local_tsc; 6554 vcpu->arch.last_host_tsc = local_tsc;
6555 set_bit(KVM_REQ_MASTERCLOCK_UPDATE,
6556 &vcpu->requests);
6171 } 6557 }
6172 6558
6173 /* 6559 /*
@@ -6258,10 +6644,17 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
6258 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) 6644 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
6259 goto fail_free_mce_banks; 6645 goto fail_free_mce_banks;
6260 6646
6647 r = fx_init(vcpu);
6648 if (r)
6649 goto fail_free_wbinvd_dirty_mask;
6650
6651 vcpu->arch.ia32_tsc_adjust_msr = 0x0;
6261 kvm_async_pf_hash_reset(vcpu); 6652 kvm_async_pf_hash_reset(vcpu);
6262 kvm_pmu_init(vcpu); 6653 kvm_pmu_init(vcpu);
6263 6654
6264 return 0; 6655 return 0;
6656fail_free_wbinvd_dirty_mask:
6657 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
6265fail_free_mce_banks: 6658fail_free_mce_banks:
6266 kfree(vcpu->arch.mce_banks); 6659 kfree(vcpu->arch.mce_banks);
6267fail_free_lapic: 6660fail_free_lapic:
@@ -6305,6 +6698,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
6305 6698
6306 raw_spin_lock_init(&kvm->arch.tsc_write_lock); 6699 raw_spin_lock_init(&kvm->arch.tsc_write_lock);
6307 mutex_init(&kvm->arch.apic_map_lock); 6700 mutex_init(&kvm->arch.apic_map_lock);
6701 spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
6702
6703 pvclock_update_vm_gtod_copy(kvm);
6308 6704
6309 return 0; 6705 return 0;
6310} 6706}