aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorZhai, Edwin <edwin.zhai@intel.com>2009-10-09 06:03:20 -0400
committerAvi Kivity <avi@redhat.com>2009-12-03 02:32:17 -0500
commit4b8d54f9726f1159330201c5ed2ea30bce7e63ea (patch)
tree903eaf21f46359f3e42a6d4582ca792b73d4b7b9 /arch/x86
parentd255f4f2bac81eb798fcf76938147f1f6c756ae2 (diff)
KVM: VMX: Add support for Pause-Loop Exiting
New NHM processors will support Pause-Loop Exiting by adding 2 VM-execution control fields: PLE_Gap - upper bound on the amount of time between two successive executions of PAUSE in a loop. PLE_Window - upper bound on the amount of time a guest is allowed to execute in a PAUSE loop If the time, between this execution of PAUSE and previous one, exceeds the PLE_Gap, processor consider this PAUSE belongs to a new loop. Otherwise, processor determins the the total execution time of this loop(since 1st PAUSE in this loop), and triggers a VM exit if total time exceeds the PLE_Window. * Refer SDM volume 3b section 21.6.13 & 22.1.3. Pause-Loop Exiting can be used to detect Lock-Holder Preemption, where one VP is sched-out after hold a spinlock, then other VPs for same lock are sched-in to waste the CPU time. Our tests indicate that most spinlocks are held for less than 212 cycles. Performance tests show that with 2X LP over-commitment we can get +2% perf improvement for kernel build(Even more perf gain with more LPs). Signed-off-by: Zhai Edwin <edwin.zhai@intel.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/vmx.h4
-rw-r--r--arch/x86/kvm/vmx.c51
2 files changed, 54 insertions, 1 deletions
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 272514c2d456..2b4945419a84 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -56,6 +56,7 @@
56#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 56#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
57#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 57#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
58#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 58#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
59#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
59 60
60 61
61#define PIN_BASED_EXT_INTR_MASK 0x00000001 62#define PIN_BASED_EXT_INTR_MASK 0x00000001
@@ -144,6 +145,8 @@ enum vmcs_field {
144 VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, 145 VM_ENTRY_INSTRUCTION_LEN = 0x0000401a,
145 TPR_THRESHOLD = 0x0000401c, 146 TPR_THRESHOLD = 0x0000401c,
146 SECONDARY_VM_EXEC_CONTROL = 0x0000401e, 147 SECONDARY_VM_EXEC_CONTROL = 0x0000401e,
148 PLE_GAP = 0x00004020,
149 PLE_WINDOW = 0x00004022,
147 VM_INSTRUCTION_ERROR = 0x00004400, 150 VM_INSTRUCTION_ERROR = 0x00004400,
148 VM_EXIT_REASON = 0x00004402, 151 VM_EXIT_REASON = 0x00004402,
149 VM_EXIT_INTR_INFO = 0x00004404, 152 VM_EXIT_INTR_INFO = 0x00004404,
@@ -248,6 +251,7 @@ enum vmcs_field {
248#define EXIT_REASON_MSR_READ 31 251#define EXIT_REASON_MSR_READ 31
249#define EXIT_REASON_MSR_WRITE 32 252#define EXIT_REASON_MSR_WRITE 32
250#define EXIT_REASON_MWAIT_INSTRUCTION 36 253#define EXIT_REASON_MWAIT_INSTRUCTION 36
254#define EXIT_REASON_PAUSE_INSTRUCTION 40
251#define EXIT_REASON_MCE_DURING_VMENTRY 41 255#define EXIT_REASON_MCE_DURING_VMENTRY 41
252#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 256#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
253#define EXIT_REASON_APIC_ACCESS 44 257#define EXIT_REASON_APIC_ACCESS 44
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 70020e505c22..a4580d65af59 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -61,6 +61,25 @@ module_param_named(unrestricted_guest,
61static int __read_mostly emulate_invalid_guest_state = 0; 61static int __read_mostly emulate_invalid_guest_state = 0;
62module_param(emulate_invalid_guest_state, bool, S_IRUGO); 62module_param(emulate_invalid_guest_state, bool, S_IRUGO);
63 63
64/*
65 * These 2 parameters are used to config the controls for Pause-Loop Exiting:
66 * ple_gap: upper bound on the amount of time between two successive
67 * executions of PAUSE in a loop. Also indicate if ple enabled.
68 * According to test, this time is usually small than 41 cycles.
69 * ple_window: upper bound on the amount of time a guest is allowed to execute
70 * in a PAUSE loop. Tests indicate that most spinlocks are held for
71 * less than 2^12 cycles
72 * Time is measured based on a counter that runs at the same rate as the TSC,
73 * refer SDM volume 3b section 21.6.13 & 22.1.3.
74 */
75#define KVM_VMX_DEFAULT_PLE_GAP 41
76#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
77static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
78module_param(ple_gap, int, S_IRUGO);
79
80static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
81module_param(ple_window, int, S_IRUGO);
82
64struct vmcs { 83struct vmcs {
65 u32 revision_id; 84 u32 revision_id;
66 u32 abort; 85 u32 abort;
@@ -319,6 +338,12 @@ static inline int cpu_has_vmx_unrestricted_guest(void)
319 SECONDARY_EXEC_UNRESTRICTED_GUEST; 338 SECONDARY_EXEC_UNRESTRICTED_GUEST;
320} 339}
321 340
341static inline int cpu_has_vmx_ple(void)
342{
343 return vmcs_config.cpu_based_2nd_exec_ctrl &
344 SECONDARY_EXEC_PAUSE_LOOP_EXITING;
345}
346
322static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) 347static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
323{ 348{
324 return flexpriority_enabled && 349 return flexpriority_enabled &&
@@ -1240,7 +1265,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1240 SECONDARY_EXEC_WBINVD_EXITING | 1265 SECONDARY_EXEC_WBINVD_EXITING |
1241 SECONDARY_EXEC_ENABLE_VPID | 1266 SECONDARY_EXEC_ENABLE_VPID |
1242 SECONDARY_EXEC_ENABLE_EPT | 1267 SECONDARY_EXEC_ENABLE_EPT |
1243 SECONDARY_EXEC_UNRESTRICTED_GUEST; 1268 SECONDARY_EXEC_UNRESTRICTED_GUEST |
1269 SECONDARY_EXEC_PAUSE_LOOP_EXITING;
1244 if (adjust_vmx_controls(min2, opt2, 1270 if (adjust_vmx_controls(min2, opt2,
1245 MSR_IA32_VMX_PROCBASED_CTLS2, 1271 MSR_IA32_VMX_PROCBASED_CTLS2,
1246 &_cpu_based_2nd_exec_control) < 0) 1272 &_cpu_based_2nd_exec_control) < 0)
@@ -1386,6 +1412,9 @@ static __init int hardware_setup(void)
1386 if (enable_ept && !cpu_has_vmx_ept_2m_page()) 1412 if (enable_ept && !cpu_has_vmx_ept_2m_page())
1387 kvm_disable_largepages(); 1413 kvm_disable_largepages();
1388 1414
1415 if (!cpu_has_vmx_ple())
1416 ple_gap = 0;
1417
1389 return alloc_kvm_area(); 1418 return alloc_kvm_area();
1390} 1419}
1391 1420
@@ -2298,9 +2327,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2298 exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; 2327 exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
2299 if (!enable_unrestricted_guest) 2328 if (!enable_unrestricted_guest)
2300 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; 2329 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
2330 if (!ple_gap)
2331 exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
2301 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); 2332 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
2302 } 2333 }
2303 2334
2335 if (ple_gap) {
2336 vmcs_write32(PLE_GAP, ple_gap);
2337 vmcs_write32(PLE_WINDOW, ple_window);
2338 }
2339
2304 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf); 2340 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf);
2305 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); 2341 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf);
2306 vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ 2342 vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */
@@ -3348,6 +3384,18 @@ out:
3348} 3384}
3349 3385
3350/* 3386/*
3387 * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
3388 * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
3389 */
3390static int handle_pause(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3391{
3392 skip_emulated_instruction(vcpu);
3393 kvm_vcpu_on_spin(vcpu);
3394
3395 return 1;
3396}
3397
3398/*
3351 * The exit handlers return 1 if the exit was handled fully and guest execution 3399 * The exit handlers return 1 if the exit was handled fully and guest execution
3352 * may resume. Otherwise they set the kvm_run parameter to indicate what needs 3400 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
3353 * to be done to userspace and return 0. 3401 * to be done to userspace and return 0.
@@ -3383,6 +3431,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
3383 [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, 3431 [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check,
3384 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, 3432 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
3385 [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, 3433 [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
3434 [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
3386}; 3435};
3387 3436
3388static const int kvm_vmx_max_exit_handlers = 3437static const int kvm_vmx_max_exit_handlers =