diff options
Diffstat (limited to 'Documentation/kvm/msr.txt')
-rw-r--r-- | Documentation/kvm/msr.txt | 153 |
1 files changed, 153 insertions, 0 deletions
diff --git a/Documentation/kvm/msr.txt b/Documentation/kvm/msr.txt new file mode 100644 index 000000000000..8ddcfe84c09a --- /dev/null +++ b/Documentation/kvm/msr.txt | |||
@@ -0,0 +1,153 @@ | |||
1 | KVM-specific MSRs. | ||
2 | Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010 | ||
3 | ===================================================== | ||
4 | |||
5 | KVM makes use of some custom MSRs to service some requests. | ||
6 | At present, this facility is only used by kvmclock. | ||
7 | |||
8 | Custom MSRs have a range reserved for them, that goes from | ||
9 | 0x4b564d00 to 0x4b564dff. There are MSRs outside this area, | ||
10 | but they are deprecated and their use is discouraged. | ||
11 | |||
12 | Custom MSR list | ||
13 | -------- | ||
14 | |||
15 | The current supported Custom MSR list is: | ||
16 | |||
17 | MSR_KVM_WALL_CLOCK_NEW: 0x4b564d00 | ||
18 | |||
19 | data: 4-byte alignment physical address of a memory area which must be | ||
20 | in guest RAM. This memory is expected to hold a copy of the following | ||
21 | structure: | ||
22 | |||
23 | struct pvclock_wall_clock { | ||
24 | u32 version; | ||
25 | u32 sec; | ||
26 | u32 nsec; | ||
27 | } __attribute__((__packed__)); | ||
28 | |||
29 | whose data will be filled in by the hypervisor. The hypervisor is only | ||
30 | guaranteed to update this data at the moment of MSR write. | ||
31 | Users that want to reliably query this information more than once have | ||
32 | to write more than once to this MSR. Fields have the following meanings: | ||
33 | |||
34 | version: guest has to check version before and after grabbing | ||
35 | time information and check that they are both equal and even. | ||
36 | An odd version indicates an in-progress update. | ||
37 | |||
38 | sec: number of seconds for wallclock. | ||
39 | |||
40 | nsec: number of nanoseconds for wallclock. | ||
41 | |||
42 | Note that although MSRs are per-CPU entities, the effect of this | ||
43 | particular MSR is global. | ||
44 | |||
45 | Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid | ||
46 | leaf prior to usage. | ||
47 | |||
48 | MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01 | ||
49 | |||
50 | data: 4-byte aligned physical address of a memory area which must be in | ||
51 | guest RAM, plus an enable bit in bit 0. This memory is expected to hold | ||
52 | a copy of the following structure: | ||
53 | |||
54 | struct pvclock_vcpu_time_info { | ||
55 | u32 version; | ||
56 | u32 pad0; | ||
57 | u64 tsc_timestamp; | ||
58 | u64 system_time; | ||
59 | u32 tsc_to_system_mul; | ||
60 | s8 tsc_shift; | ||
61 | u8 flags; | ||
62 | u8 pad[2]; | ||
63 | } __attribute__((__packed__)); /* 32 bytes */ | ||
64 | |||
65 | whose data will be filled in by the hypervisor periodically. Only one | ||
66 | write, or registration, is needed for each VCPU. The interval between | ||
67 | updates of this structure is arbitrary and implementation-dependent. | ||
68 | The hypervisor may update this structure at any time it sees fit until | ||
69 | anything with bit0 == 0 is written to it. | ||
70 | |||
71 | Fields have the following meanings: | ||
72 | |||
73 | version: guest has to check version before and after grabbing | ||
74 | time information and check that they are both equal and even. | ||
75 | An odd version indicates an in-progress update. | ||
76 | |||
77 | tsc_timestamp: the tsc value at the current VCPU at the time | ||
78 | of the update of this structure. Guests can subtract this value | ||
79 | from current tsc to derive a notion of elapsed time since the | ||
80 | structure update. | ||
81 | |||
82 | system_time: a host notion of monotonic time, including sleep | ||
83 | time at the time this structure was last updated. Unit is | ||
84 | nanoseconds. | ||
85 | |||
86 | tsc_to_system_mul: a function of the tsc frequency. One has | ||
87 | to multiply any tsc-related quantity by this value to get | ||
88 | a value in nanoseconds, besides dividing by 2^tsc_shift | ||
89 | |||
90 | tsc_shift: cycle to nanosecond divider, as a power of two, to | ||
91 | allow for shift rights. One has to shift right any tsc-related | ||
92 | quantity by this value to get a value in nanoseconds, besides | ||
93 | multiplying by tsc_to_system_mul. | ||
94 | |||
95 | With this information, guests can derive per-CPU time by | ||
96 | doing: | ||
97 | |||
98 | time = (current_tsc - tsc_timestamp) | ||
99 | time = (time * tsc_to_system_mul) >> tsc_shift | ||
100 | time = time + system_time | ||
101 | |||
102 | flags: bits in this field indicate extended capabilities | ||
103 | coordinated between the guest and the hypervisor. Availability | ||
104 | of specific flags has to be checked in 0x40000001 cpuid leaf. | ||
105 | Current flags are: | ||
106 | |||
107 | flag bit | cpuid bit | meaning | ||
108 | ------------------------------------------------------------- | ||
109 | | | time measures taken across | ||
110 | 0 | 24 | multiple cpus are guaranteed to | ||
111 | | | be monotonic | ||
112 | ------------------------------------------------------------- | ||
113 | |||
114 | Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid | ||
115 | leaf prior to usage. | ||
116 | |||
117 | |||
118 | MSR_KVM_WALL_CLOCK: 0x11 | ||
119 | |||
120 | data and functioning: same as MSR_KVM_WALL_CLOCK_NEW. Use that instead. | ||
121 | |||
122 | This MSR falls outside the reserved KVM range and may be removed in the | ||
123 | future. Its usage is deprecated. | ||
124 | |||
125 | Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid | ||
126 | leaf prior to usage. | ||
127 | |||
128 | MSR_KVM_SYSTEM_TIME: 0x12 | ||
129 | |||
130 | data and functioning: same as MSR_KVM_SYSTEM_TIME_NEW. Use that instead. | ||
131 | |||
132 | This MSR falls outside the reserved KVM range and may be removed in the | ||
133 | future. Its usage is deprecated. | ||
134 | |||
135 | Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid | ||
136 | leaf prior to usage. | ||
137 | |||
138 | The suggested algorithm for detecting kvmclock presence is then: | ||
139 | |||
140 | if (!kvm_para_available()) /* refer to cpuid.txt */ | ||
141 | return NON_PRESENT; | ||
142 | |||
143 | flags = cpuid_eax(0x40000001); | ||
144 | if (flags & 3) { | ||
145 | msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW; | ||
146 | msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW; | ||
147 | return PRESENT; | ||
148 | } else if (flags & 0) { | ||
149 | msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; | ||
150 | msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; | ||
151 | return PRESENT; | ||
152 | } else | ||
153 | return NON_PRESENT; | ||