diff options
author | Zhai, Edwin <edwin.zhai@intel.com> | 2009-10-09 06:03:20 -0400 |
---|---|---|
committer | Avi Kivity <avi@redhat.com> | 2009-12-03 02:32:17 -0500 |
commit | 4b8d54f9726f1159330201c5ed2ea30bce7e63ea (patch) | |
tree | 903eaf21f46359f3e42a6d4582ca792b73d4b7b9 /arch/x86 | |
parent | d255f4f2bac81eb798fcf76938147f1f6c756ae2 (diff) |
KVM: VMX: Add support for Pause-Loop Exiting
New NHM processors will support Pause-Loop Exiting by adding 2 VM-execution
control fields:
PLE_Gap - upper bound on the amount of time between two successive
executions of PAUSE in a loop.
PLE_Window - upper bound on the amount of time a guest is allowed to execute in
a PAUSE loop
If the time, between this execution of PAUSE and previous one, exceeds the
PLE_Gap, processor consider this PAUSE belongs to a new loop.
Otherwise, processor determins the the total execution time of this loop(since
1st PAUSE in this loop), and triggers a VM exit if total time exceeds the
PLE_Window.
* Refer SDM volume 3b section 21.6.13 & 22.1.3.
Pause-Loop Exiting can be used to detect Lock-Holder Preemption, where one VP
is sched-out after hold a spinlock, then other VPs for same lock are sched-in
to waste the CPU time.
Our tests indicate that most spinlocks are held for less than 212 cycles.
Performance tests show that with 2X LP over-commitment we can get +2% perf
improvement for kernel build(Even more perf gain with more LPs).
Signed-off-by: Zhai Edwin <edwin.zhai@intel.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/vmx.h | 4 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 51 |
2 files changed, 54 insertions, 1 deletions
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 272514c2d456..2b4945419a84 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -56,6 +56,7 @@ | |||
56 | #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 | 56 | #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 |
57 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 | 57 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 |
58 | #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 | 58 | #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 |
59 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 | ||
59 | 60 | ||
60 | 61 | ||
61 | #define PIN_BASED_EXT_INTR_MASK 0x00000001 | 62 | #define PIN_BASED_EXT_INTR_MASK 0x00000001 |
@@ -144,6 +145,8 @@ enum vmcs_field { | |||
144 | VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, | 145 | VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, |
145 | TPR_THRESHOLD = 0x0000401c, | 146 | TPR_THRESHOLD = 0x0000401c, |
146 | SECONDARY_VM_EXEC_CONTROL = 0x0000401e, | 147 | SECONDARY_VM_EXEC_CONTROL = 0x0000401e, |
148 | PLE_GAP = 0x00004020, | ||
149 | PLE_WINDOW = 0x00004022, | ||
147 | VM_INSTRUCTION_ERROR = 0x00004400, | 150 | VM_INSTRUCTION_ERROR = 0x00004400, |
148 | VM_EXIT_REASON = 0x00004402, | 151 | VM_EXIT_REASON = 0x00004402, |
149 | VM_EXIT_INTR_INFO = 0x00004404, | 152 | VM_EXIT_INTR_INFO = 0x00004404, |
@@ -248,6 +251,7 @@ enum vmcs_field { | |||
248 | #define EXIT_REASON_MSR_READ 31 | 251 | #define EXIT_REASON_MSR_READ 31 |
249 | #define EXIT_REASON_MSR_WRITE 32 | 252 | #define EXIT_REASON_MSR_WRITE 32 |
250 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 | 253 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 |
254 | #define EXIT_REASON_PAUSE_INSTRUCTION 40 | ||
251 | #define EXIT_REASON_MCE_DURING_VMENTRY 41 | 255 | #define EXIT_REASON_MCE_DURING_VMENTRY 41 |
252 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 | 256 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 |
253 | #define EXIT_REASON_APIC_ACCESS 44 | 257 | #define EXIT_REASON_APIC_ACCESS 44 |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 70020e505c22..a4580d65af59 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -61,6 +61,25 @@ module_param_named(unrestricted_guest, | |||
61 | static int __read_mostly emulate_invalid_guest_state = 0; | 61 | static int __read_mostly emulate_invalid_guest_state = 0; |
62 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); | 62 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); |
63 | 63 | ||
64 | /* | ||
65 | * These 2 parameters are used to config the controls for Pause-Loop Exiting: | ||
66 | * ple_gap: upper bound on the amount of time between two successive | ||
67 | * executions of PAUSE in a loop. Also indicate if ple enabled. | ||
68 | * According to test, this time is usually small than 41 cycles. | ||
69 | * ple_window: upper bound on the amount of time a guest is allowed to execute | ||
70 | * in a PAUSE loop. Tests indicate that most spinlocks are held for | ||
71 | * less than 2^12 cycles | ||
72 | * Time is measured based on a counter that runs at the same rate as the TSC, | ||
73 | * refer SDM volume 3b section 21.6.13 & 22.1.3. | ||
74 | */ | ||
75 | #define KVM_VMX_DEFAULT_PLE_GAP 41 | ||
76 | #define KVM_VMX_DEFAULT_PLE_WINDOW 4096 | ||
77 | static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP; | ||
78 | module_param(ple_gap, int, S_IRUGO); | ||
79 | |||
80 | static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; | ||
81 | module_param(ple_window, int, S_IRUGO); | ||
82 | |||
64 | struct vmcs { | 83 | struct vmcs { |
65 | u32 revision_id; | 84 | u32 revision_id; |
66 | u32 abort; | 85 | u32 abort; |
@@ -319,6 +338,12 @@ static inline int cpu_has_vmx_unrestricted_guest(void) | |||
319 | SECONDARY_EXEC_UNRESTRICTED_GUEST; | 338 | SECONDARY_EXEC_UNRESTRICTED_GUEST; |
320 | } | 339 | } |
321 | 340 | ||
341 | static inline int cpu_has_vmx_ple(void) | ||
342 | { | ||
343 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
344 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; | ||
345 | } | ||
346 | |||
322 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) | 347 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) |
323 | { | 348 | { |
324 | return flexpriority_enabled && | 349 | return flexpriority_enabled && |
@@ -1240,7 +1265,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1240 | SECONDARY_EXEC_WBINVD_EXITING | | 1265 | SECONDARY_EXEC_WBINVD_EXITING | |
1241 | SECONDARY_EXEC_ENABLE_VPID | | 1266 | SECONDARY_EXEC_ENABLE_VPID | |
1242 | SECONDARY_EXEC_ENABLE_EPT | | 1267 | SECONDARY_EXEC_ENABLE_EPT | |
1243 | SECONDARY_EXEC_UNRESTRICTED_GUEST; | 1268 | SECONDARY_EXEC_UNRESTRICTED_GUEST | |
1269 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; | ||
1244 | if (adjust_vmx_controls(min2, opt2, | 1270 | if (adjust_vmx_controls(min2, opt2, |
1245 | MSR_IA32_VMX_PROCBASED_CTLS2, | 1271 | MSR_IA32_VMX_PROCBASED_CTLS2, |
1246 | &_cpu_based_2nd_exec_control) < 0) | 1272 | &_cpu_based_2nd_exec_control) < 0) |
@@ -1386,6 +1412,9 @@ static __init int hardware_setup(void) | |||
1386 | if (enable_ept && !cpu_has_vmx_ept_2m_page()) | 1412 | if (enable_ept && !cpu_has_vmx_ept_2m_page()) |
1387 | kvm_disable_largepages(); | 1413 | kvm_disable_largepages(); |
1388 | 1414 | ||
1415 | if (!cpu_has_vmx_ple()) | ||
1416 | ple_gap = 0; | ||
1417 | |||
1389 | return alloc_kvm_area(); | 1418 | return alloc_kvm_area(); |
1390 | } | 1419 | } |
1391 | 1420 | ||
@@ -2298,9 +2327,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2298 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; | 2327 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; |
2299 | if (!enable_unrestricted_guest) | 2328 | if (!enable_unrestricted_guest) |
2300 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; | 2329 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; |
2330 | if (!ple_gap) | ||
2331 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; | ||
2301 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | 2332 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); |
2302 | } | 2333 | } |
2303 | 2334 | ||
2335 | if (ple_gap) { | ||
2336 | vmcs_write32(PLE_GAP, ple_gap); | ||
2337 | vmcs_write32(PLE_WINDOW, ple_window); | ||
2338 | } | ||
2339 | |||
2304 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf); | 2340 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf); |
2305 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); | 2341 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); |
2306 | vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ | 2342 | vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ |
@@ -3348,6 +3384,18 @@ out: | |||
3348 | } | 3384 | } |
3349 | 3385 | ||
3350 | /* | 3386 | /* |
3387 | * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE | ||
3388 | * exiting, so only get here on cpu with PAUSE-Loop-Exiting. | ||
3389 | */ | ||
3390 | static int handle_pause(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
3391 | { | ||
3392 | skip_emulated_instruction(vcpu); | ||
3393 | kvm_vcpu_on_spin(vcpu); | ||
3394 | |||
3395 | return 1; | ||
3396 | } | ||
3397 | |||
3398 | /* | ||
3351 | * The exit handlers return 1 if the exit was handled fully and guest execution | 3399 | * The exit handlers return 1 if the exit was handled fully and guest execution |
3352 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 3400 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
3353 | * to be done to userspace and return 0. | 3401 | * to be done to userspace and return 0. |
@@ -3383,6 +3431,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
3383 | [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, | 3431 | [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, |
3384 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, | 3432 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, |
3385 | [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, | 3433 | [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, |
3434 | [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, | ||
3386 | }; | 3435 | }; |
3387 | 3436 | ||
3388 | static const int kvm_vmx_max_exit_handlers = | 3437 | static const int kvm_vmx_max_exit_handlers = |