aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/timer/nuvoton,npcm7xx-timer.txt21
-rw-r--r--Documentation/devicetree/bindings/timer/nxp,tpm-timer.txt2
-rw-r--r--arch/parisc/kernel/Makefile2
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/kernel/ldt.c2
-rw-r--r--arch/x86/kvm/svm.c31
-rw-r--r--arch/x86/kvm/vmx.c95
-rw-r--r--arch/x86/kvm/x86.c15
-rw-r--r--drivers/block/rbd.c101
-rw-r--r--drivers/clocksource/Kconfig8
-rw-r--r--drivers/clocksource/Makefile1
-rw-r--r--drivers/clocksource/timer-imx-tpm.c43
-rw-r--r--drivers/clocksource/timer-npcm7xx.c215
-rw-r--r--fs/ceph/inode.c10
-rw-r--r--tools/testing/selftests/kvm/Makefile5
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h15
-rw-r--r--tools/testing/selftests/kvm/include/vmx.h494
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c20
-rw-r--r--tools/testing/selftests/kvm/lib/sparsebit.c4
-rw-r--r--tools/testing/selftests/kvm/lib/vmx.c243
-rw-r--r--tools/testing/selftests/kvm/vmx_tsc_adjust_test.c231
21 files changed, 1438 insertions, 121 deletions
diff --git a/Documentation/devicetree/bindings/timer/nuvoton,npcm7xx-timer.txt b/Documentation/devicetree/bindings/timer/nuvoton,npcm7xx-timer.txt
new file mode 100644
index 000000000000..ea22dfe485be
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/nuvoton,npcm7xx-timer.txt
@@ -0,0 +1,21 @@
1Nuvoton NPCM7xx timer
2
3Nuvoton NPCM7xx have three timer modules, each timer module provides five 24-bit
4timer counters.
5
6Required properties:
7- compatible : "nuvoton,npcm750-timer" for Poleg NPCM750.
8- reg : Offset and length of the register set for the device.
9- interrupts : Contain the timer interrupt with flags for
10 falling edge.
11- clocks : phandle of timer reference clock (usually a 25 MHz clock).
12
13Example:
14
15timer@f0008000 {
16 compatible = "nuvoton,npcm750-timer";
17 interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>;
18 reg = <0xf0008000 0x50>;
19 clocks = <&clk NPCM7XX_CLK_TIMER>;
20};
21
diff --git a/Documentation/devicetree/bindings/timer/nxp,tpm-timer.txt b/Documentation/devicetree/bindings/timer/nxp,tpm-timer.txt
index b4aa7ddb5b13..f82087b220f4 100644
--- a/Documentation/devicetree/bindings/timer/nxp,tpm-timer.txt
+++ b/Documentation/devicetree/bindings/timer/nxp,tpm-timer.txt
@@ -15,7 +15,7 @@ Required properties:
15- interrupts : Should be the clock event device interrupt. 15- interrupts : Should be the clock event device interrupt.
16- clocks : The clocks provided by the SoC to drive the timer, must contain 16- clocks : The clocks provided by the SoC to drive the timer, must contain
17 an entry for each entry in clock-names. 17 an entry for each entry in clock-names.
18- clock-names : Must include the following entries: "igp" and "per". 18- clock-names : Must include the following entries: "ipg" and "per".
19 19
20Example: 20Example:
21tpm5: tpm@40260000 { 21tpm5: tpm@40260000 {
diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile
index eafd06ab59ef..e5de34d00b1a 100644
--- a/arch/parisc/kernel/Makefile
+++ b/arch/parisc/kernel/Makefile
@@ -23,7 +23,7 @@ obj-$(CONFIG_SMP) += smp.o
23obj-$(CONFIG_PA11) += pci-dma.o 23obj-$(CONFIG_PA11) += pci-dma.o
24obj-$(CONFIG_PCI) += pci.o 24obj-$(CONFIG_PCI) += pci.o
25obj-$(CONFIG_MODULES) += module.o 25obj-$(CONFIG_MODULES) += module.o
26obj-$(CONFIG_64BIT) += binfmt_elf32.o sys_parisc32.o signal32.o 26obj-$(CONFIG_64BIT) += sys_parisc32.o signal32.o
27obj-$(CONFIG_STACKTRACE)+= stacktrace.o 27obj-$(CONFIG_STACKTRACE)+= stacktrace.o
28obj-$(CONFIG_AUDIT) += audit.o 28obj-$(CONFIG_AUDIT) += audit.o
29obj64-$(CONFIG_AUDIT) += compat_audit.o 29obj64-$(CONFIG_AUDIT) += compat_audit.o
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 949c977bc4c9..c25775fad4ed 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1013,6 +1013,7 @@ struct kvm_x86_ops {
1013 1013
1014 bool (*has_wbinvd_exit)(void); 1014 bool (*has_wbinvd_exit)(void);
1015 1015
1016 u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu);
1016 void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); 1017 void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
1017 1018
1018 void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); 1019 void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index d41d896481b8..c9b14020f4dd 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -166,7 +166,7 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
166 */ 166 */
167 pte_prot = __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL); 167 pte_prot = __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL);
168 /* Filter out unsuppored __PAGE_KERNEL* bits: */ 168 /* Filter out unsuppored __PAGE_KERNEL* bits: */
169 pgprot_val(pte_prot) |= __supported_pte_mask; 169 pgprot_val(pte_prot) &= __supported_pte_mask;
170 pte = pfn_pte(pfn, pte_prot); 170 pte = pfn_pte(pfn, pte_prot);
171 set_pte_at(mm, va, ptep, pte); 171 set_pte_at(mm, va, ptep, pte);
172 pte_unmap_unlock(ptep, ptl); 172 pte_unmap_unlock(ptep, ptl);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b58787daf9f8..1fc05e428aba 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1423,12 +1423,23 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
1423 seg->base = 0; 1423 seg->base = 0;
1424} 1424}
1425 1425
1426static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
1427{
1428 struct vcpu_svm *svm = to_svm(vcpu);
1429
1430 if (is_guest_mode(vcpu))
1431 return svm->nested.hsave->control.tsc_offset;
1432
1433 return vcpu->arch.tsc_offset;
1434}
1435
1426static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) 1436static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
1427{ 1437{
1428 struct vcpu_svm *svm = to_svm(vcpu); 1438 struct vcpu_svm *svm = to_svm(vcpu);
1429 u64 g_tsc_offset = 0; 1439 u64 g_tsc_offset = 0;
1430 1440
1431 if (is_guest_mode(vcpu)) { 1441 if (is_guest_mode(vcpu)) {
1442 /* Write L1's TSC offset. */
1432 g_tsc_offset = svm->vmcb->control.tsc_offset - 1443 g_tsc_offset = svm->vmcb->control.tsc_offset -
1433 svm->nested.hsave->control.tsc_offset; 1444 svm->nested.hsave->control.tsc_offset;
1434 svm->nested.hsave->control.tsc_offset = offset; 1445 svm->nested.hsave->control.tsc_offset = offset;
@@ -3322,6 +3333,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
3322 /* Restore the original control entries */ 3333 /* Restore the original control entries */
3323 copy_vmcb_control_area(vmcb, hsave); 3334 copy_vmcb_control_area(vmcb, hsave);
3324 3335
3336 svm->vcpu.arch.tsc_offset = svm->vmcb->control.tsc_offset;
3325 kvm_clear_exception_queue(&svm->vcpu); 3337 kvm_clear_exception_queue(&svm->vcpu);
3326 kvm_clear_interrupt_queue(&svm->vcpu); 3338 kvm_clear_interrupt_queue(&svm->vcpu);
3327 3339
@@ -3482,10 +3494,12 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
3482 /* We don't want to see VMMCALLs from a nested guest */ 3494 /* We don't want to see VMMCALLs from a nested guest */
3483 clr_intercept(svm, INTERCEPT_VMMCALL); 3495 clr_intercept(svm, INTERCEPT_VMMCALL);
3484 3496
3497 svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset;
3498 svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;
3499
3485 svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext; 3500 svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
3486 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; 3501 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
3487 svm->vmcb->control.int_state = nested_vmcb->control.int_state; 3502 svm->vmcb->control.int_state = nested_vmcb->control.int_state;
3488 svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
3489 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; 3503 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
3490 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; 3504 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
3491 3505
@@ -4035,12 +4049,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
4035 struct vcpu_svm *svm = to_svm(vcpu); 4049 struct vcpu_svm *svm = to_svm(vcpu);
4036 4050
4037 switch (msr_info->index) { 4051 switch (msr_info->index) {
4038 case MSR_IA32_TSC: {
4039 msr_info->data = svm->vmcb->control.tsc_offset +
4040 kvm_scale_tsc(vcpu, rdtsc());
4041
4042 break;
4043 }
4044 case MSR_STAR: 4052 case MSR_STAR:
4045 msr_info->data = svm->vmcb->save.star; 4053 msr_info->data = svm->vmcb->save.star;
4046 break; 4054 break;
@@ -4193,9 +4201,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
4193 svm->vmcb->save.g_pat = data; 4201 svm->vmcb->save.g_pat = data;
4194 mark_dirty(svm->vmcb, VMCB_NPT); 4202 mark_dirty(svm->vmcb, VMCB_NPT);
4195 break; 4203 break;
4196 case MSR_IA32_TSC:
4197 kvm_write_tsc(vcpu, msr);
4198 break;
4199 case MSR_IA32_SPEC_CTRL: 4204 case MSR_IA32_SPEC_CTRL:
4200 if (!msr->host_initiated && 4205 if (!msr->host_initiated &&
4201 !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) 4206 !guest_cpuid_has(vcpu, X86_FEATURE_IBRS))
@@ -5265,9 +5270,8 @@ static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
5265 } 5270 }
5266 5271
5267 if (!ret && svm) { 5272 if (!ret && svm) {
5268 trace_kvm_pi_irte_update(svm->vcpu.vcpu_id, 5273 trace_kvm_pi_irte_update(host_irq, svm->vcpu.vcpu_id,
5269 host_irq, e->gsi, 5274 e->gsi, vcpu_info.vector,
5270 vcpu_info.vector,
5271 vcpu_info.pi_desc_addr, set); 5275 vcpu_info.pi_desc_addr, set);
5272 } 5276 }
5273 5277
@@ -7102,6 +7106,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
7102 7106
7103 .has_wbinvd_exit = svm_has_wbinvd_exit, 7107 .has_wbinvd_exit = svm_has_wbinvd_exit,
7104 7108
7109 .read_l1_tsc_offset = svm_read_l1_tsc_offset,
7105 .write_tsc_offset = svm_write_tsc_offset, 7110 .write_tsc_offset = svm_write_tsc_offset,
7106 7111
7107 .set_tdp_cr3 = set_tdp_cr3, 7112 .set_tdp_cr3 = set_tdp_cr3,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index aafcc9881e88..aa66ccd6ed6c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2880,18 +2880,15 @@ static void setup_msrs(struct vcpu_vmx *vmx)
2880 vmx_update_msr_bitmap(&vmx->vcpu); 2880 vmx_update_msr_bitmap(&vmx->vcpu);
2881} 2881}
2882 2882
2883/* 2883static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
2884 * reads and returns guest's timestamp counter "register"
2885 * guest_tsc = (host_tsc * tsc multiplier) >> 48 + tsc_offset
2886 * -- Intel TSC Scaling for Virtualization White Paper, sec 1.3
2887 */
2888static u64 guest_read_tsc(struct kvm_vcpu *vcpu)
2889{ 2884{
2890 u64 host_tsc, tsc_offset; 2885 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
2891 2886
2892 host_tsc = rdtsc(); 2887 if (is_guest_mode(vcpu) &&
2893 tsc_offset = vmcs_read64(TSC_OFFSET); 2888 (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING))
2894 return kvm_scale_tsc(vcpu, host_tsc) + tsc_offset; 2889 return vcpu->arch.tsc_offset - vmcs12->tsc_offset;
2890
2891 return vcpu->arch.tsc_offset;
2895} 2892}
2896 2893
2897/* 2894/*
@@ -3524,9 +3521,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3524#endif 3521#endif
3525 case MSR_EFER: 3522 case MSR_EFER:
3526 return kvm_get_msr_common(vcpu, msr_info); 3523 return kvm_get_msr_common(vcpu, msr_info);
3527 case MSR_IA32_TSC:
3528 msr_info->data = guest_read_tsc(vcpu);
3529 break;
3530 case MSR_IA32_SPEC_CTRL: 3524 case MSR_IA32_SPEC_CTRL:
3531 if (!msr_info->host_initiated && 3525 if (!msr_info->host_initiated &&
3532 !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && 3526 !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
@@ -3646,9 +3640,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3646 return 1; 3640 return 1;
3647 vmcs_write64(GUEST_BNDCFGS, data); 3641 vmcs_write64(GUEST_BNDCFGS, data);
3648 break; 3642 break;
3649 case MSR_IA32_TSC:
3650 kvm_write_tsc(vcpu, msr_info);
3651 break;
3652 case MSR_IA32_SPEC_CTRL: 3643 case MSR_IA32_SPEC_CTRL:
3653 if (!msr_info->host_initiated && 3644 if (!msr_info->host_initiated &&
3654 !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && 3645 !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
@@ -10608,6 +10599,16 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
10608 return true; 10599 return true;
10609} 10600}
10610 10601
10602static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu,
10603 struct vmcs12 *vmcs12)
10604{
10605 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
10606 !page_address_valid(vcpu, vmcs12->apic_access_addr))
10607 return -EINVAL;
10608 else
10609 return 0;
10610}
10611
10611static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu, 10612static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
10612 struct vmcs12 *vmcs12) 10613 struct vmcs12 *vmcs12)
10613{ 10614{
@@ -11176,11 +11177,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
11176 vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); 11177 vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
11177 } 11178 }
11178 11179
11179 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) 11180 vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
11180 vmcs_write64(TSC_OFFSET, 11181
11181 vcpu->arch.tsc_offset + vmcs12->tsc_offset);
11182 else
11183 vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
11184 if (kvm_has_tsc_control) 11182 if (kvm_has_tsc_control)
11185 decache_tsc_multiplier(vmx); 11183 decache_tsc_multiplier(vmx);
11186 11184
@@ -11299,6 +11297,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
11299 if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) 11297 if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12))
11300 return VMXERR_ENTRY_INVALID_CONTROL_FIELD; 11298 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
11301 11299
11300 if (nested_vmx_check_apic_access_controls(vcpu, vmcs12))
11301 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
11302
11302 if (nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12)) 11303 if (nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12))
11303 return VMXERR_ENTRY_INVALID_CONTROL_FIELD; 11304 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
11304 11305
@@ -11420,6 +11421,7 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
11420 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 11421 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
11421 u32 msr_entry_idx; 11422 u32 msr_entry_idx;
11422 u32 exit_qual; 11423 u32 exit_qual;
11424 int r;
11423 11425
11424 enter_guest_mode(vcpu); 11426 enter_guest_mode(vcpu);
11425 11427
@@ -11429,26 +11431,21 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
11429 vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); 11431 vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
11430 vmx_segment_cache_clear(vmx); 11432 vmx_segment_cache_clear(vmx);
11431 11433
11432 if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) { 11434 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
11433 leave_guest_mode(vcpu); 11435 vcpu->arch.tsc_offset += vmcs12->tsc_offset;
11434 vmx_switch_vmcs(vcpu, &vmx->vmcs01); 11436
11435 nested_vmx_entry_failure(vcpu, vmcs12, 11437 r = EXIT_REASON_INVALID_STATE;
11436 EXIT_REASON_INVALID_STATE, exit_qual); 11438 if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual))
11437 return 1; 11439 goto fail;
11438 }
11439 11440
11440 nested_get_vmcs12_pages(vcpu, vmcs12); 11441 nested_get_vmcs12_pages(vcpu, vmcs12);
11441 11442
11443 r = EXIT_REASON_MSR_LOAD_FAIL;
11442 msr_entry_idx = nested_vmx_load_msr(vcpu, 11444 msr_entry_idx = nested_vmx_load_msr(vcpu,
11443 vmcs12->vm_entry_msr_load_addr, 11445 vmcs12->vm_entry_msr_load_addr,
11444 vmcs12->vm_entry_msr_load_count); 11446 vmcs12->vm_entry_msr_load_count);
11445 if (msr_entry_idx) { 11447 if (msr_entry_idx)
11446 leave_guest_mode(vcpu); 11448 goto fail;
11447 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
11448 nested_vmx_entry_failure(vcpu, vmcs12,
11449 EXIT_REASON_MSR_LOAD_FAIL, msr_entry_idx);
11450 return 1;
11451 }
11452 11449
11453 /* 11450 /*
11454 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point 11451 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
@@ -11457,6 +11454,14 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
11457 * the success flag) when L2 exits (see nested_vmx_vmexit()). 11454 * the success flag) when L2 exits (see nested_vmx_vmexit()).
11458 */ 11455 */
11459 return 0; 11456 return 0;
11457
11458fail:
11459 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
11460 vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
11461 leave_guest_mode(vcpu);
11462 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
11463 nested_vmx_entry_failure(vcpu, vmcs12, r, exit_qual);
11464 return 1;
11460} 11465}
11461 11466
11462/* 11467/*
@@ -12028,6 +12033,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
12028 12033
12029 leave_guest_mode(vcpu); 12034 leave_guest_mode(vcpu);
12030 12035
12036 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
12037 vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
12038
12031 if (likely(!vmx->fail)) { 12039 if (likely(!vmx->fail)) {
12032 if (exit_reason == -1) 12040 if (exit_reason == -1)
12033 sync_vmcs12(vcpu, vmcs12); 12041 sync_vmcs12(vcpu, vmcs12);
@@ -12224,10 +12232,16 @@ static inline int u64_shl_div_u64(u64 a, unsigned int shift,
12224 12232
12225static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc) 12233static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
12226{ 12234{
12227 struct vcpu_vmx *vmx = to_vmx(vcpu); 12235 struct vcpu_vmx *vmx;
12228 u64 tscl = rdtsc(); 12236 u64 tscl, guest_tscl, delta_tsc;
12229 u64 guest_tscl = kvm_read_l1_tsc(vcpu, tscl); 12237
12230 u64 delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl; 12238 if (kvm_mwait_in_guest(vcpu->kvm))
12239 return -EOPNOTSUPP;
12240
12241 vmx = to_vmx(vcpu);
12242 tscl = rdtsc();
12243 guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
12244 delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
12231 12245
12232 /* Convert to host delta tsc if tsc scaling is enabled */ 12246 /* Convert to host delta tsc if tsc scaling is enabled */
12233 if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio && 12247 if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
@@ -12533,7 +12547,7 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
12533 vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); 12547 vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
12534 vcpu_info.vector = irq.vector; 12548 vcpu_info.vector = irq.vector;
12535 12549
12536 trace_kvm_pi_irte_update(vcpu->vcpu_id, host_irq, e->gsi, 12550 trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi,
12537 vcpu_info.vector, vcpu_info.pi_desc_addr, set); 12551 vcpu_info.vector, vcpu_info.pi_desc_addr, set);
12538 12552
12539 if (set) 12553 if (set)
@@ -12712,6 +12726,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
12712 12726
12713 .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, 12727 .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
12714 12728
12729 .read_l1_tsc_offset = vmx_read_l1_tsc_offset,
12715 .write_tsc_offset = vmx_write_tsc_offset, 12730 .write_tsc_offset = vmx_write_tsc_offset,
12716 12731
12717 .set_tdp_cr3 = vmx_set_cr3, 12732 .set_tdp_cr3 = vmx_set_cr3,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b2ff74b12ec4..51ecd381793b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1490,7 +1490,7 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
1490 1490
1491static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset) 1491static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
1492{ 1492{
1493 u64 curr_offset = vcpu->arch.tsc_offset; 1493 u64 curr_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
1494 vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset; 1494 vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
1495} 1495}
1496 1496
@@ -1532,7 +1532,9 @@ static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
1532 1532
1533u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) 1533u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
1534{ 1534{
1535 return vcpu->arch.tsc_offset + kvm_scale_tsc(vcpu, host_tsc); 1535 u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
1536
1537 return tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
1536} 1538}
1537EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); 1539EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
1538 1540
@@ -2362,6 +2364,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2362 return 1; 2364 return 1;
2363 vcpu->arch.smbase = data; 2365 vcpu->arch.smbase = data;
2364 break; 2366 break;
2367 case MSR_IA32_TSC:
2368 kvm_write_tsc(vcpu, msr_info);
2369 break;
2365 case MSR_SMI_COUNT: 2370 case MSR_SMI_COUNT:
2366 if (!msr_info->host_initiated) 2371 if (!msr_info->host_initiated)
2367 return 1; 2372 return 1;
@@ -2605,6 +2610,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2605 case MSR_IA32_UCODE_REV: 2610 case MSR_IA32_UCODE_REV:
2606 msr_info->data = vcpu->arch.microcode_version; 2611 msr_info->data = vcpu->arch.microcode_version;
2607 break; 2612 break;
2613 case MSR_IA32_TSC:
2614 msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset;
2615 break;
2608 case MSR_MTRRcap: 2616 case MSR_MTRRcap:
2609 case 0x200 ... 0x2ff: 2617 case 0x200 ... 0x2ff:
2610 return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data); 2618 return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
@@ -2819,7 +2827,8 @@ out:
2819static inline bool kvm_can_mwait_in_guest(void) 2827static inline bool kvm_can_mwait_in_guest(void)
2820{ 2828{
2821 return boot_cpu_has(X86_FEATURE_MWAIT) && 2829 return boot_cpu_has(X86_FEATURE_MWAIT) &&
2822 !boot_cpu_has_bug(X86_BUG_MONITOR); 2830 !boot_cpu_has_bug(X86_BUG_MONITOR) &&
2831 boot_cpu_has(X86_FEATURE_ARAT);
2823} 2832}
2824 2833
2825int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 2834int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 07dc5419bd63..8e8b04cc569a 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -732,6 +732,7 @@ static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts)
732 */ 732 */
733enum { 733enum {
734 Opt_queue_depth, 734 Opt_queue_depth,
735 Opt_lock_timeout,
735 Opt_last_int, 736 Opt_last_int,
736 /* int args above */ 737 /* int args above */
737 Opt_last_string, 738 Opt_last_string,
@@ -740,11 +741,13 @@ enum {
740 Opt_read_write, 741 Opt_read_write,
741 Opt_lock_on_read, 742 Opt_lock_on_read,
742 Opt_exclusive, 743 Opt_exclusive,
744 Opt_notrim,
743 Opt_err 745 Opt_err
744}; 746};
745 747
746static match_table_t rbd_opts_tokens = { 748static match_table_t rbd_opts_tokens = {
747 {Opt_queue_depth, "queue_depth=%d"}, 749 {Opt_queue_depth, "queue_depth=%d"},
750 {Opt_lock_timeout, "lock_timeout=%d"},
748 /* int args above */ 751 /* int args above */
749 /* string args above */ 752 /* string args above */
750 {Opt_read_only, "read_only"}, 753 {Opt_read_only, "read_only"},
@@ -753,20 +756,25 @@ static match_table_t rbd_opts_tokens = {
753 {Opt_read_write, "rw"}, /* Alternate spelling */ 756 {Opt_read_write, "rw"}, /* Alternate spelling */
754 {Opt_lock_on_read, "lock_on_read"}, 757 {Opt_lock_on_read, "lock_on_read"},
755 {Opt_exclusive, "exclusive"}, 758 {Opt_exclusive, "exclusive"},
759 {Opt_notrim, "notrim"},
756 {Opt_err, NULL} 760 {Opt_err, NULL}
757}; 761};
758 762
759struct rbd_options { 763struct rbd_options {
760 int queue_depth; 764 int queue_depth;
765 unsigned long lock_timeout;
761 bool read_only; 766 bool read_only;
762 bool lock_on_read; 767 bool lock_on_read;
763 bool exclusive; 768 bool exclusive;
769 bool trim;
764}; 770};
765 771
766#define RBD_QUEUE_DEPTH_DEFAULT BLKDEV_MAX_RQ 772#define RBD_QUEUE_DEPTH_DEFAULT BLKDEV_MAX_RQ
773#define RBD_LOCK_TIMEOUT_DEFAULT 0 /* no timeout */
767#define RBD_READ_ONLY_DEFAULT false 774#define RBD_READ_ONLY_DEFAULT false
768#define RBD_LOCK_ON_READ_DEFAULT false 775#define RBD_LOCK_ON_READ_DEFAULT false
769#define RBD_EXCLUSIVE_DEFAULT false 776#define RBD_EXCLUSIVE_DEFAULT false
777#define RBD_TRIM_DEFAULT true
770 778
771static int parse_rbd_opts_token(char *c, void *private) 779static int parse_rbd_opts_token(char *c, void *private)
772{ 780{
@@ -796,6 +804,14 @@ static int parse_rbd_opts_token(char *c, void *private)
796 } 804 }
797 rbd_opts->queue_depth = intval; 805 rbd_opts->queue_depth = intval;
798 break; 806 break;
807 case Opt_lock_timeout:
808 /* 0 is "wait forever" (i.e. infinite timeout) */
809 if (intval < 0 || intval > INT_MAX / 1000) {
810 pr_err("lock_timeout out of range\n");
811 return -EINVAL;
812 }
813 rbd_opts->lock_timeout = msecs_to_jiffies(intval * 1000);
814 break;
799 case Opt_read_only: 815 case Opt_read_only:
800 rbd_opts->read_only = true; 816 rbd_opts->read_only = true;
801 break; 817 break;
@@ -808,6 +824,9 @@ static int parse_rbd_opts_token(char *c, void *private)
808 case Opt_exclusive: 824 case Opt_exclusive:
809 rbd_opts->exclusive = true; 825 rbd_opts->exclusive = true;
810 break; 826 break;
827 case Opt_notrim:
828 rbd_opts->trim = false;
829 break;
811 default: 830 default:
812 /* libceph prints "bad option" msg */ 831 /* libceph prints "bad option" msg */
813 return -EINVAL; 832 return -EINVAL;
@@ -1392,7 +1411,7 @@ static bool rbd_img_is_write(struct rbd_img_request *img_req)
1392 case OBJ_OP_DISCARD: 1411 case OBJ_OP_DISCARD:
1393 return true; 1412 return true;
1394 default: 1413 default:
1395 rbd_assert(0); 1414 BUG();
1396 } 1415 }
1397} 1416}
1398 1417
@@ -2466,7 +2485,7 @@ again:
2466 } 2485 }
2467 return false; 2486 return false;
2468 default: 2487 default:
2469 rbd_assert(0); 2488 BUG();
2470 } 2489 }
2471} 2490}
2472 2491
@@ -2494,7 +2513,7 @@ static bool __rbd_obj_handle_request(struct rbd_obj_request *obj_req)
2494 } 2513 }
2495 return false; 2514 return false;
2496 default: 2515 default:
2497 rbd_assert(0); 2516 BUG();
2498 } 2517 }
2499} 2518}
2500 2519
@@ -3533,9 +3552,22 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
3533/* 3552/*
3534 * lock_rwsem must be held for read 3553 * lock_rwsem must be held for read
3535 */ 3554 */
3536static void rbd_wait_state_locked(struct rbd_device *rbd_dev) 3555static int rbd_wait_state_locked(struct rbd_device *rbd_dev, bool may_acquire)
3537{ 3556{
3538 DEFINE_WAIT(wait); 3557 DEFINE_WAIT(wait);
3558 unsigned long timeout;
3559 int ret = 0;
3560
3561 if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags))
3562 return -EBLACKLISTED;
3563
3564 if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED)
3565 return 0;
3566
3567 if (!may_acquire) {
3568 rbd_warn(rbd_dev, "exclusive lock required");
3569 return -EROFS;
3570 }
3539 3571
3540 do { 3572 do {
3541 /* 3573 /*
@@ -3547,12 +3579,22 @@ static void rbd_wait_state_locked(struct rbd_device *rbd_dev)
3547 prepare_to_wait_exclusive(&rbd_dev->lock_waitq, &wait, 3579 prepare_to_wait_exclusive(&rbd_dev->lock_waitq, &wait,
3548 TASK_UNINTERRUPTIBLE); 3580 TASK_UNINTERRUPTIBLE);
3549 up_read(&rbd_dev->lock_rwsem); 3581 up_read(&rbd_dev->lock_rwsem);
3550 schedule(); 3582 timeout = schedule_timeout(ceph_timeout_jiffies(
3583 rbd_dev->opts->lock_timeout));
3551 down_read(&rbd_dev->lock_rwsem); 3584 down_read(&rbd_dev->lock_rwsem);
3552 } while (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED && 3585 if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) {
3553 !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)); 3586 ret = -EBLACKLISTED;
3587 break;
3588 }
3589 if (!timeout) {
3590 rbd_warn(rbd_dev, "timed out waiting for lock");
3591 ret = -ETIMEDOUT;
3592 break;
3593 }
3594 } while (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED);
3554 3595
3555 finish_wait(&rbd_dev->lock_waitq, &wait); 3596 finish_wait(&rbd_dev->lock_waitq, &wait);
3597 return ret;
3556} 3598}
3557 3599
3558static void rbd_queue_workfn(struct work_struct *work) 3600static void rbd_queue_workfn(struct work_struct *work)
@@ -3638,19 +3680,10 @@ static void rbd_queue_workfn(struct work_struct *work)
3638 (op_type != OBJ_OP_READ || rbd_dev->opts->lock_on_read); 3680 (op_type != OBJ_OP_READ || rbd_dev->opts->lock_on_read);
3639 if (must_be_locked) { 3681 if (must_be_locked) {
3640 down_read(&rbd_dev->lock_rwsem); 3682 down_read(&rbd_dev->lock_rwsem);
3641 if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED && 3683 result = rbd_wait_state_locked(rbd_dev,
3642 !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { 3684 !rbd_dev->opts->exclusive);
3643 if (rbd_dev->opts->exclusive) { 3685 if (result)
3644 rbd_warn(rbd_dev, "exclusive lock required");
3645 result = -EROFS;
3646 goto err_unlock;
3647 }
3648 rbd_wait_state_locked(rbd_dev);
3649 }
3650 if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) {
3651 result = -EBLACKLISTED;
3652 goto err_unlock; 3686 goto err_unlock;
3653 }
3654 } 3687 }
3655 3688
3656 img_request = rbd_img_request_create(rbd_dev, op_type, snapc); 3689 img_request = rbd_img_request_create(rbd_dev, op_type, snapc);
@@ -3902,7 +3935,8 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
3902{ 3935{
3903 struct gendisk *disk; 3936 struct gendisk *disk;
3904 struct request_queue *q; 3937 struct request_queue *q;
3905 u64 segment_size; 3938 unsigned int objset_bytes =
3939 rbd_dev->layout.object_size * rbd_dev->layout.stripe_count;
3906 int err; 3940 int err;
3907 3941
3908 /* create gendisk info */ 3942 /* create gendisk info */
@@ -3942,20 +3976,19 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
3942 blk_queue_flag_set(QUEUE_FLAG_NONROT, q); 3976 blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
3943 /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */ 3977 /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */
3944 3978
3945 /* set io sizes to object size */ 3979 blk_queue_max_hw_sectors(q, objset_bytes >> SECTOR_SHIFT);
3946 segment_size = rbd_obj_bytes(&rbd_dev->header);
3947 blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
3948 q->limits.max_sectors = queue_max_hw_sectors(q); 3980 q->limits.max_sectors = queue_max_hw_sectors(q);
3949 blk_queue_max_segments(q, USHRT_MAX); 3981 blk_queue_max_segments(q, USHRT_MAX);
3950 blk_queue_max_segment_size(q, UINT_MAX); 3982 blk_queue_max_segment_size(q, UINT_MAX);
3951 blk_queue_io_min(q, segment_size); 3983 blk_queue_io_min(q, objset_bytes);
3952 blk_queue_io_opt(q, segment_size); 3984 blk_queue_io_opt(q, objset_bytes);
3953 3985
3954 /* enable the discard support */ 3986 if (rbd_dev->opts->trim) {
3955 blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); 3987 blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
3956 q->limits.discard_granularity = segment_size; 3988 q->limits.discard_granularity = objset_bytes;
3957 blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE); 3989 blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT);
3958 blk_queue_max_write_zeroes_sectors(q, segment_size / SECTOR_SIZE); 3990 blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT);
3991 }
3959 3992
3960 if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) 3993 if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
3961 q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; 3994 q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES;
@@ -5179,8 +5212,10 @@ static int rbd_add_parse_args(const char *buf,
5179 5212
5180 rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; 5213 rbd_opts->read_only = RBD_READ_ONLY_DEFAULT;
5181 rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT; 5214 rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT;
5215 rbd_opts->lock_timeout = RBD_LOCK_TIMEOUT_DEFAULT;
5182 rbd_opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT; 5216 rbd_opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT;
5183 rbd_opts->exclusive = RBD_EXCLUSIVE_DEFAULT; 5217 rbd_opts->exclusive = RBD_EXCLUSIVE_DEFAULT;
5218 rbd_opts->trim = RBD_TRIM_DEFAULT;
5184 5219
5185 copts = ceph_parse_options(options, mon_addrs, 5220 copts = ceph_parse_options(options, mon_addrs,
5186 mon_addrs + mon_addrs_size - 1, 5221 mon_addrs + mon_addrs_size - 1,
@@ -5216,6 +5251,8 @@ static void rbd_dev_image_unlock(struct rbd_device *rbd_dev)
5216 5251
5217static int rbd_add_acquire_lock(struct rbd_device *rbd_dev) 5252static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
5218{ 5253{
5254 int ret;
5255
5219 if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) { 5256 if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) {
5220 rbd_warn(rbd_dev, "exclusive-lock feature is not enabled"); 5257 rbd_warn(rbd_dev, "exclusive-lock feature is not enabled");
5221 return -EINVAL; 5258 return -EINVAL;
@@ -5223,9 +5260,9 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
5223 5260
5224 /* FIXME: "rbd map --exclusive" should be in interruptible */ 5261 /* FIXME: "rbd map --exclusive" should be in interruptible */
5225 down_read(&rbd_dev->lock_rwsem); 5262 down_read(&rbd_dev->lock_rwsem);
5226 rbd_wait_state_locked(rbd_dev); 5263 ret = rbd_wait_state_locked(rbd_dev, true);
5227 up_read(&rbd_dev->lock_rwsem); 5264 up_read(&rbd_dev->lock_rwsem);
5228 if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { 5265 if (ret) {
5229 rbd_warn(rbd_dev, "failed to acquire exclusive lock"); 5266 rbd_warn(rbd_dev, "failed to acquire exclusive lock");
5230 return -EROFS; 5267 return -EROFS;
5231 } 5268 }
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 9ee2888275c1..8e8a09755d10 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -133,6 +133,14 @@ config VT8500_TIMER
133 help 133 help
134 Enables support for the VT8500 driver. 134 Enables support for the VT8500 driver.
135 135
136config NPCM7XX_TIMER
137 bool "NPCM7xx timer driver" if COMPILE_TEST
138 depends on HAS_IOMEM
139 select CLKSRC_MMIO
140 help
141 Enable 24-bit TIMER0 and TIMER1 counters in the NPCM7xx architecture,
142 While TIMER0 serves as clockevent and TIMER1 serves as clocksource.
143
136config CADENCE_TTC_TIMER 144config CADENCE_TTC_TIMER
137 bool "Cadence TTC timer driver" if COMPILE_TEST 145 bool "Cadence TTC timer driver" if COMPILE_TEST
138 depends on COMMON_CLK 146 depends on COMMON_CLK
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index e8e76dfef00b..00caf37e52f9 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_CLKSRC_NPS) += timer-nps.o
56obj-$(CONFIG_OXNAS_RPS_TIMER) += timer-oxnas-rps.o 56obj-$(CONFIG_OXNAS_RPS_TIMER) += timer-oxnas-rps.o
57obj-$(CONFIG_OWL_TIMER) += owl-timer.o 57obj-$(CONFIG_OWL_TIMER) += owl-timer.o
58obj-$(CONFIG_SPRD_TIMER) += timer-sprd.o 58obj-$(CONFIG_SPRD_TIMER) += timer-sprd.o
59obj-$(CONFIG_NPCM7XX_TIMER) += timer-npcm7xx.o
59 60
60obj-$(CONFIG_ARC_TIMERS) += arc_timer.o 61obj-$(CONFIG_ARC_TIMERS) += arc_timer.o
61obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer.o 62obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer.o
diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c
index 21bffdcb2f20..05d97a6871d8 100644
--- a/drivers/clocksource/timer-imx-tpm.c
+++ b/drivers/clocksource/timer-imx-tpm.c
@@ -17,9 +17,14 @@
17#include <linux/of_irq.h> 17#include <linux/of_irq.h>
18#include <linux/sched_clock.h> 18#include <linux/sched_clock.h>
19 19
20#define TPM_PARAM 0x4
21#define TPM_PARAM_WIDTH_SHIFT 16
22#define TPM_PARAM_WIDTH_MASK (0xff << 16)
20#define TPM_SC 0x10 23#define TPM_SC 0x10
21#define TPM_SC_CMOD_INC_PER_CNT (0x1 << 3) 24#define TPM_SC_CMOD_INC_PER_CNT (0x1 << 3)
22#define TPM_SC_CMOD_DIV_DEFAULT 0x3 25#define TPM_SC_CMOD_DIV_DEFAULT 0x3
26#define TPM_SC_CMOD_DIV_MAX 0x7
27#define TPM_SC_TOF_MASK (0x1 << 7)
23#define TPM_CNT 0x14 28#define TPM_CNT 0x14
24#define TPM_MOD 0x18 29#define TPM_MOD 0x18
25#define TPM_STATUS 0x1c 30#define TPM_STATUS 0x1c
@@ -29,8 +34,11 @@
29#define TPM_C0SC_MODE_SHIFT 2 34#define TPM_C0SC_MODE_SHIFT 2
30#define TPM_C0SC_MODE_MASK 0x3c 35#define TPM_C0SC_MODE_MASK 0x3c
31#define TPM_C0SC_MODE_SW_COMPARE 0x4 36#define TPM_C0SC_MODE_SW_COMPARE 0x4
37#define TPM_C0SC_CHF_MASK (0x1 << 7)
32#define TPM_C0V 0x24 38#define TPM_C0V 0x24
33 39
40static int counter_width;
41static int rating;
34static void __iomem *timer_base; 42static void __iomem *timer_base;
35static struct clock_event_device clockevent_tpm; 43static struct clock_event_device clockevent_tpm;
36 44
@@ -83,10 +91,11 @@ static int __init tpm_clocksource_init(unsigned long rate)
83 tpm_delay_timer.freq = rate; 91 tpm_delay_timer.freq = rate;
84 register_current_timer_delay(&tpm_delay_timer); 92 register_current_timer_delay(&tpm_delay_timer);
85 93
86 sched_clock_register(tpm_read_sched_clock, 32, rate); 94 sched_clock_register(tpm_read_sched_clock, counter_width, rate);
87 95
88 return clocksource_mmio_init(timer_base + TPM_CNT, "imx-tpm", 96 return clocksource_mmio_init(timer_base + TPM_CNT, "imx-tpm",
89 rate, 200, 32, clocksource_mmio_readl_up); 97 rate, rating, counter_width,
98 clocksource_mmio_readl_up);
90} 99}
91 100
92static int tpm_set_next_event(unsigned long delta, 101static int tpm_set_next_event(unsigned long delta,
@@ -139,7 +148,6 @@ static struct clock_event_device clockevent_tpm = {
139 .set_state_oneshot = tpm_set_state_oneshot, 148 .set_state_oneshot = tpm_set_state_oneshot,
140 .set_next_event = tpm_set_next_event, 149 .set_next_event = tpm_set_next_event,
141 .set_state_shutdown = tpm_set_state_shutdown, 150 .set_state_shutdown = tpm_set_state_shutdown,
142 .rating = 200,
143}; 151};
144 152
145static int __init tpm_clockevent_init(unsigned long rate, int irq) 153static int __init tpm_clockevent_init(unsigned long rate, int irq)
@@ -149,10 +157,11 @@ static int __init tpm_clockevent_init(unsigned long rate, int irq)
149 ret = request_irq(irq, tpm_timer_interrupt, IRQF_TIMER | IRQF_IRQPOLL, 157 ret = request_irq(irq, tpm_timer_interrupt, IRQF_TIMER | IRQF_IRQPOLL,
150 "i.MX7ULP TPM Timer", &clockevent_tpm); 158 "i.MX7ULP TPM Timer", &clockevent_tpm);
151 159
160 clockevent_tpm.rating = rating;
152 clockevent_tpm.cpumask = cpumask_of(0); 161 clockevent_tpm.cpumask = cpumask_of(0);
153 clockevent_tpm.irq = irq; 162 clockevent_tpm.irq = irq;
154 clockevents_config_and_register(&clockevent_tpm, 163 clockevents_config_and_register(&clockevent_tpm, rate, 300,
155 rate, 300, 0xfffffffe); 164 GENMASK(counter_width - 1, 1));
156 165
157 return ret; 166 return ret;
158} 167}
@@ -179,7 +188,7 @@ static int __init tpm_timer_init(struct device_node *np)
179 ipg = of_clk_get_by_name(np, "ipg"); 188 ipg = of_clk_get_by_name(np, "ipg");
180 per = of_clk_get_by_name(np, "per"); 189 per = of_clk_get_by_name(np, "per");
181 if (IS_ERR(ipg) || IS_ERR(per)) { 190 if (IS_ERR(ipg) || IS_ERR(per)) {
182 pr_err("tpm: failed to get igp or per clk\n"); 191 pr_err("tpm: failed to get ipg or per clk\n");
183 ret = -ENODEV; 192 ret = -ENODEV;
184 goto err_clk_get; 193 goto err_clk_get;
185 } 194 }
@@ -197,6 +206,11 @@ static int __init tpm_timer_init(struct device_node *np)
197 goto err_per_clk_enable; 206 goto err_per_clk_enable;
198 } 207 }
199 208
209 counter_width = (readl(timer_base + TPM_PARAM) & TPM_PARAM_WIDTH_MASK)
210 >> TPM_PARAM_WIDTH_SHIFT;
211 /* use rating 200 for 32-bit counter and 150 for 16-bit counter */
212 rating = counter_width == 0x20 ? 200 : 150;
213
200 /* 214 /*
201 * Initialize tpm module to a known state 215 * Initialize tpm module to a known state
202 * 1) Counter disabled 216 * 1) Counter disabled
@@ -205,16 +219,25 @@ static int __init tpm_timer_init(struct device_node *np)
205 * 4) Channel0 disabled 219 * 4) Channel0 disabled
206 * 5) DMA transfers disabled 220 * 5) DMA transfers disabled
207 */ 221 */
222 /* make sure counter is disabled */
208 writel(0, timer_base + TPM_SC); 223 writel(0, timer_base + TPM_SC);
224 /* TOF is W1C */
225 writel(TPM_SC_TOF_MASK, timer_base + TPM_SC);
209 writel(0, timer_base + TPM_CNT); 226 writel(0, timer_base + TPM_CNT);
210 writel(0, timer_base + TPM_C0SC); 227 /* CHF is W1C */
228 writel(TPM_C0SC_CHF_MASK, timer_base + TPM_C0SC);
211 229
212 /* increase per cnt, div 8 by default */ 230 /*
213 writel(TPM_SC_CMOD_INC_PER_CNT | TPM_SC_CMOD_DIV_DEFAULT, 231 * increase per cnt,
232 * div 8 for 32-bit counter and div 128 for 16-bit counter
233 */
234 writel(TPM_SC_CMOD_INC_PER_CNT |
235 (counter_width == 0x20 ?
236 TPM_SC_CMOD_DIV_DEFAULT : TPM_SC_CMOD_DIV_MAX),
214 timer_base + TPM_SC); 237 timer_base + TPM_SC);
215 238
216 /* set MOD register to maximum for free running mode */ 239 /* set MOD register to maximum for free running mode */
217 writel(0xffffffff, timer_base + TPM_MOD); 240 writel(GENMASK(counter_width - 1, 0), timer_base + TPM_MOD);
218 241
219 rate = clk_get_rate(per) >> 3; 242 rate = clk_get_rate(per) >> 3;
220 ret = tpm_clocksource_init(rate); 243 ret = tpm_clocksource_init(rate);
diff --git a/drivers/clocksource/timer-npcm7xx.c b/drivers/clocksource/timer-npcm7xx.c
new file mode 100644
index 000000000000..7a9bb5532d99
--- /dev/null
+++ b/drivers/clocksource/timer-npcm7xx.c
@@ -0,0 +1,215 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2014-2018 Nuvoton Technologies tomer.maimon@nuvoton.com
4 * All rights reserved.
5 *
6 * Copyright 2017 Google, Inc.
7 */
8
9#include <linux/kernel.h>
10#include <linux/sched.h>
11#include <linux/init.h>
12#include <linux/interrupt.h>
13#include <linux/err.h>
14#include <linux/clk.h>
15#include <linux/io.h>
16#include <linux/clockchips.h>
17#include <linux/of_irq.h>
18#include <linux/of_address.h>
19#include "timer-of.h"
20
21/* Timers registers */
22#define NPCM7XX_REG_TCSR0 0x0 /* Timer 0 Control and Status Register */
23#define NPCM7XX_REG_TICR0 0x8 /* Timer 0 Initial Count Register */
24#define NPCM7XX_REG_TCSR1 0x4 /* Timer 1 Control and Status Register */
25#define NPCM7XX_REG_TICR1 0xc /* Timer 1 Initial Count Register */
26#define NPCM7XX_REG_TDR1 0x14 /* Timer 1 Data Register */
27#define NPCM7XX_REG_TISR 0x18 /* Timer Interrupt Status Register */
28
29/* Timers control */
30#define NPCM7XX_Tx_RESETINT 0x1f
31#define NPCM7XX_Tx_PERIOD BIT(27)
32#define NPCM7XX_Tx_INTEN BIT(29)
33#define NPCM7XX_Tx_COUNTEN BIT(30)
34#define NPCM7XX_Tx_ONESHOT 0x0
35#define NPCM7XX_Tx_OPER GENMASK(3, 27)
36#define NPCM7XX_Tx_MIN_PRESCALE 0x1
37#define NPCM7XX_Tx_TDR_MASK_BITS 24
38#define NPCM7XX_Tx_MAX_CNT 0xFFFFFF
39#define NPCM7XX_T0_CLR_INT 0x1
40#define NPCM7XX_Tx_CLR_CSR 0x0
41
42/* Timers operating mode */
43#define NPCM7XX_START_PERIODIC_Tx (NPCM7XX_Tx_PERIOD | NPCM7XX_Tx_COUNTEN | \
44 NPCM7XX_Tx_INTEN | \
45 NPCM7XX_Tx_MIN_PRESCALE)
46
47#define NPCM7XX_START_ONESHOT_Tx (NPCM7XX_Tx_ONESHOT | NPCM7XX_Tx_COUNTEN | \
48 NPCM7XX_Tx_INTEN | \
49 NPCM7XX_Tx_MIN_PRESCALE)
50
51#define NPCM7XX_START_Tx (NPCM7XX_Tx_COUNTEN | NPCM7XX_Tx_PERIOD | \
52 NPCM7XX_Tx_MIN_PRESCALE)
53
54#define NPCM7XX_DEFAULT_CSR (NPCM7XX_Tx_CLR_CSR | NPCM7XX_Tx_MIN_PRESCALE)
55
56static int npcm7xx_timer_resume(struct clock_event_device *evt)
57{
58 struct timer_of *to = to_timer_of(evt);
59 u32 val;
60
61 val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0);
62 val |= NPCM7XX_Tx_COUNTEN;
63 writel(val, timer_of_base(to) + NPCM7XX_REG_TCSR0);
64
65 return 0;
66}
67
68static int npcm7xx_timer_shutdown(struct clock_event_device *evt)
69{
70 struct timer_of *to = to_timer_of(evt);
71 u32 val;
72
73 val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0);
74 val &= ~NPCM7XX_Tx_COUNTEN;
75 writel(val, timer_of_base(to) + NPCM7XX_REG_TCSR0);
76
77 return 0;
78}
79
80static int npcm7xx_timer_oneshot(struct clock_event_device *evt)
81{
82 struct timer_of *to = to_timer_of(evt);
83 u32 val;
84
85 val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0);
86 val &= ~NPCM7XX_Tx_OPER;
87
88 val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0);
89 val |= NPCM7XX_START_ONESHOT_Tx;
90 writel(val, timer_of_base(to) + NPCM7XX_REG_TCSR0);
91
92 return 0;
93}
94
95static int npcm7xx_timer_periodic(struct clock_event_device *evt)
96{
97 struct timer_of *to = to_timer_of(evt);
98 u32 val;
99
100 val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0);
101 val &= ~NPCM7XX_Tx_OPER;
102
103 writel(timer_of_period(to), timer_of_base(to) + NPCM7XX_REG_TICR0);
104 val |= NPCM7XX_START_PERIODIC_Tx;
105
106 writel(val, timer_of_base(to) + NPCM7XX_REG_TCSR0);
107
108 return 0;
109}
110
111static int npcm7xx_clockevent_set_next_event(unsigned long evt,
112 struct clock_event_device *clk)
113{
114 struct timer_of *to = to_timer_of(clk);
115 u32 val;
116
117 writel(evt, timer_of_base(to) + NPCM7XX_REG_TICR0);
118 val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0);
119 val |= NPCM7XX_START_Tx;
120 writel(val, timer_of_base(to) + NPCM7XX_REG_TCSR0);
121
122 return 0;
123}
124
125static irqreturn_t npcm7xx_timer0_interrupt(int irq, void *dev_id)
126{
127 struct clock_event_device *evt = (struct clock_event_device *)dev_id;
128 struct timer_of *to = to_timer_of(evt);
129
130 writel(NPCM7XX_T0_CLR_INT, timer_of_base(to) + NPCM7XX_REG_TISR);
131
132 evt->event_handler(evt);
133
134 return IRQ_HANDLED;
135}
136
137static struct timer_of npcm7xx_to = {
138 .flags = TIMER_OF_IRQ | TIMER_OF_BASE | TIMER_OF_CLOCK,
139
140 .clkevt = {
141 .name = "npcm7xx-timer0",
142 .features = CLOCK_EVT_FEAT_PERIODIC |
143 CLOCK_EVT_FEAT_ONESHOT,
144 .set_next_event = npcm7xx_clockevent_set_next_event,
145 .set_state_shutdown = npcm7xx_timer_shutdown,
146 .set_state_periodic = npcm7xx_timer_periodic,
147 .set_state_oneshot = npcm7xx_timer_oneshot,
148 .tick_resume = npcm7xx_timer_resume,
149 .rating = 300,
150 },
151
152 .of_irq = {
153 .handler = npcm7xx_timer0_interrupt,
154 .flags = IRQF_TIMER | IRQF_IRQPOLL,
155 },
156};
157
158static void __init npcm7xx_clockevents_init(void)
159{
160 writel(NPCM7XX_DEFAULT_CSR,
161 timer_of_base(&npcm7xx_to) + NPCM7XX_REG_TCSR0);
162
163 writel(NPCM7XX_Tx_RESETINT,
164 timer_of_base(&npcm7xx_to) + NPCM7XX_REG_TISR);
165
166 npcm7xx_to.clkevt.cpumask = cpumask_of(0);
167 clockevents_config_and_register(&npcm7xx_to.clkevt,
168 timer_of_rate(&npcm7xx_to),
169 0x1, NPCM7XX_Tx_MAX_CNT);
170}
171
172static void __init npcm7xx_clocksource_init(void)
173{
174 u32 val;
175
176 writel(NPCM7XX_DEFAULT_CSR,
177 timer_of_base(&npcm7xx_to) + NPCM7XX_REG_TCSR1);
178 writel(NPCM7XX_Tx_MAX_CNT,
179 timer_of_base(&npcm7xx_to) + NPCM7XX_REG_TICR1);
180
181 val = readl(timer_of_base(&npcm7xx_to) + NPCM7XX_REG_TCSR1);
182 val |= NPCM7XX_START_Tx;
183 writel(val, timer_of_base(&npcm7xx_to) + NPCM7XX_REG_TCSR1);
184
185 clocksource_mmio_init(timer_of_base(&npcm7xx_to) +
186 NPCM7XX_REG_TDR1,
187 "npcm7xx-timer1", timer_of_rate(&npcm7xx_to),
188 200, (unsigned int)NPCM7XX_Tx_TDR_MASK_BITS,
189 clocksource_mmio_readl_down);
190}
191
192static int __init npcm7xx_timer_init(struct device_node *np)
193{
194 int ret;
195
196 ret = timer_of_init(np, &npcm7xx_to);
197 if (ret)
198 return ret;
199
200 /* Clock input is divided by PRESCALE + 1 before it is fed */
201 /* to the counter */
202 npcm7xx_to.of_clk.rate = npcm7xx_to.of_clk.rate /
203 (NPCM7XX_Tx_MIN_PRESCALE + 1);
204
205 npcm7xx_clocksource_init();
206 npcm7xx_clockevents_init();
207
208 pr_info("Enabling NPCM7xx clocksource timer base: %px, IRQ: %d ",
209 timer_of_base(&npcm7xx_to), timer_of_irq(&npcm7xx_to));
210
211 return 0;
212}
213
214TIMER_OF_DECLARE(npcm7xx, "nuvoton,npcm750-timer", npcm7xx_timer_init);
215
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 8bf60250309e..ae056927080d 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -669,13 +669,15 @@ void ceph_fill_file_time(struct inode *inode, int issued,
669 CEPH_CAP_FILE_BUFFER| 669 CEPH_CAP_FILE_BUFFER|
670 CEPH_CAP_AUTH_EXCL| 670 CEPH_CAP_AUTH_EXCL|
671 CEPH_CAP_XATTR_EXCL)) { 671 CEPH_CAP_XATTR_EXCL)) {
672 if (timespec_compare(ctime, &inode->i_ctime) > 0) { 672 if (ci->i_version == 0 ||
673 timespec_compare(ctime, &inode->i_ctime) > 0) {
673 dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", 674 dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n",
674 inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, 675 inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
675 ctime->tv_sec, ctime->tv_nsec); 676 ctime->tv_sec, ctime->tv_nsec);
676 inode->i_ctime = *ctime; 677 inode->i_ctime = *ctime;
677 } 678 }
678 if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) { 679 if (ci->i_version == 0 ||
680 ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) {
679 /* the MDS did a utimes() */ 681 /* the MDS did a utimes() */
680 dout("mtime %ld.%09ld -> %ld.%09ld " 682 dout("mtime %ld.%09ld -> %ld.%09ld "
681 "tw %d -> %d\n", 683 "tw %d -> %d\n",
@@ -795,7 +797,6 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
795 new_issued = ~issued & le32_to_cpu(info->cap.caps); 797 new_issued = ~issued & le32_to_cpu(info->cap.caps);
796 798
797 /* update inode */ 799 /* update inode */
798 ci->i_version = le64_to_cpu(info->version);
799 inode->i_rdev = le32_to_cpu(info->rdev); 800 inode->i_rdev = le32_to_cpu(info->rdev);
800 inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; 801 inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
801 802
@@ -868,6 +869,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
868 xattr_blob = NULL; 869 xattr_blob = NULL;
869 } 870 }
870 871
872 /* finally update i_version */
873 ci->i_version = le64_to_cpu(info->version);
874
871 inode->i_mapping->a_ops = &ceph_aops; 875 inode->i_mapping->a_ops = &ceph_aops;
872 876
873 switch (inode->i_mode & S_IFMT) { 877 switch (inode->i_mode & S_IFMT) {
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index dc44de904797..2ddcc96ae456 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -4,17 +4,18 @@ top_srcdir = ../../../../
4UNAME_M := $(shell uname -m) 4UNAME_M := $(shell uname -m)
5 5
6LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c 6LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c
7LIBKVM_x86_64 = lib/x86.c 7LIBKVM_x86_64 = lib/x86.c lib/vmx.c
8 8
9TEST_GEN_PROGS_x86_64 = set_sregs_test 9TEST_GEN_PROGS_x86_64 = set_sregs_test
10TEST_GEN_PROGS_x86_64 += sync_regs_test 10TEST_GEN_PROGS_x86_64 += sync_regs_test
11TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test
11 12
12TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) 13TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
13LIBKVM += $(LIBKVM_$(UNAME_M)) 14LIBKVM += $(LIBKVM_$(UNAME_M))
14 15
15INSTALL_HDR_PATH = $(top_srcdir)/usr 16INSTALL_HDR_PATH = $(top_srcdir)/usr
16LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ 17LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
17CFLAGS += -O2 -g -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) 18CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_HDR_PATH) -Iinclude -I$(<D)
18 19
19# After inclusion, $(OUTPUT) is defined and 20# After inclusion, $(OUTPUT) is defined and
20# $(TEST_GEN_PROGS) starts with $(OUTPUT)/ 21# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 57974ad46373..637b7017b6ee 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -112,24 +112,27 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
112vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, 112vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
113 vm_paddr_t paddr_min, uint32_t memslot); 113 vm_paddr_t paddr_min, uint32_t memslot);
114 114
115void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid); 115struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
116void vcpu_set_cpuid( 116void vcpu_set_cpuid(
117 struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid); 117 struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid);
118 118
119struct kvm_cpuid2 *allocate_kvm_cpuid2(void);
120struct kvm_cpuid_entry2 * 119struct kvm_cpuid_entry2 *
121find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function, 120kvm_get_supported_cpuid_index(uint32_t function, uint32_t index);
122 uint32_t index);
123 121
124static inline struct kvm_cpuid_entry2 * 122static inline struct kvm_cpuid_entry2 *
125find_cpuid_entry(struct kvm_cpuid2 *cpuid, uint32_t function) 123kvm_get_supported_cpuid_entry(uint32_t function)
126{ 124{
127 return find_cpuid_index_entry(cpuid, function, 0); 125 return kvm_get_supported_cpuid_index(function, 0);
128} 126}
129 127
130struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code); 128struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code);
131void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); 129void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
132 130
131typedef void (*vmx_guest_code_t)(vm_vaddr_t vmxon_vaddr,
132 vm_paddr_t vmxon_paddr,
133 vm_vaddr_t vmcs_vaddr,
134 vm_paddr_t vmcs_paddr);
135
133struct kvm_userspace_memory_region * 136struct kvm_userspace_memory_region *
134kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, 137kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
135 uint64_t end); 138 uint64_t end);
diff --git a/tools/testing/selftests/kvm/include/vmx.h b/tools/testing/selftests/kvm/include/vmx.h
new file mode 100644
index 000000000000..6ed8499807fd
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/vmx.h
@@ -0,0 +1,494 @@
1/*
2 * tools/testing/selftests/kvm/include/vmx.h
3 *
4 * Copyright (C) 2018, Google LLC.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2.
7 *
8 */
9
10#ifndef SELFTEST_KVM_VMX_H
11#define SELFTEST_KVM_VMX_H
12
13#include <stdint.h>
14#include "x86.h"
15
16#define CPUID_VMX_BIT 5
17
18#define CPUID_VMX (1 << 5)
19
20/*
21 * Definitions of Primary Processor-Based VM-Execution Controls.
22 */
23#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004
24#define CPU_BASED_USE_TSC_OFFSETING 0x00000008
25#define CPU_BASED_HLT_EXITING 0x00000080
26#define CPU_BASED_INVLPG_EXITING 0x00000200
27#define CPU_BASED_MWAIT_EXITING 0x00000400
28#define CPU_BASED_RDPMC_EXITING 0x00000800
29#define CPU_BASED_RDTSC_EXITING 0x00001000
30#define CPU_BASED_CR3_LOAD_EXITING 0x00008000
31#define CPU_BASED_CR3_STORE_EXITING 0x00010000
32#define CPU_BASED_CR8_LOAD_EXITING 0x00080000
33#define CPU_BASED_CR8_STORE_EXITING 0x00100000
34#define CPU_BASED_TPR_SHADOW 0x00200000
35#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000
36#define CPU_BASED_MOV_DR_EXITING 0x00800000
37#define CPU_BASED_UNCOND_IO_EXITING 0x01000000
38#define CPU_BASED_USE_IO_BITMAPS 0x02000000
39#define CPU_BASED_MONITOR_TRAP 0x08000000
40#define CPU_BASED_USE_MSR_BITMAPS 0x10000000
41#define CPU_BASED_MONITOR_EXITING 0x20000000
42#define CPU_BASED_PAUSE_EXITING 0x40000000
43#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000
44
45#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172
46
47/*
48 * Definitions of Secondary Processor-Based VM-Execution Controls.
49 */
50#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
51#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
52#define SECONDARY_EXEC_DESC 0x00000004
53#define SECONDARY_EXEC_RDTSCP 0x00000008
54#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
55#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
56#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
57#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
58#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
59#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
60#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
61#define SECONDARY_EXEC_RDRAND_EXITING 0x00000800
62#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
63#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000
64#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
65#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000
66#define SECONDARY_EXEC_ENABLE_PML 0x00020000
67#define SECONDARY_EPT_VE 0x00040000
68#define SECONDARY_ENABLE_XSAV_RESTORE 0x00100000
69#define SECONDARY_EXEC_TSC_SCALING 0x02000000
70
71#define PIN_BASED_EXT_INTR_MASK 0x00000001
72#define PIN_BASED_NMI_EXITING 0x00000008
73#define PIN_BASED_VIRTUAL_NMIS 0x00000020
74#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040
75#define PIN_BASED_POSTED_INTR 0x00000080
76
77#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016
78
79#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004
80#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200
81#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000
82#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
83#define VM_EXIT_SAVE_IA32_PAT 0x00040000
84#define VM_EXIT_LOAD_IA32_PAT 0x00080000
85#define VM_EXIT_SAVE_IA32_EFER 0x00100000
86#define VM_EXIT_LOAD_IA32_EFER 0x00200000
87#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000
88
89#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff
90
91#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004
92#define VM_ENTRY_IA32E_MODE 0x00000200
93#define VM_ENTRY_SMM 0x00000400
94#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800
95#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000
96#define VM_ENTRY_LOAD_IA32_PAT 0x00004000
97#define VM_ENTRY_LOAD_IA32_EFER 0x00008000
98
99#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
100
101#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
102#define VMX_MISC_SAVE_EFER_LMA 0x00000020
103
104#define EXIT_REASON_FAILED_VMENTRY 0x80000000
105#define EXIT_REASON_EXCEPTION_NMI 0
106#define EXIT_REASON_EXTERNAL_INTERRUPT 1
107#define EXIT_REASON_TRIPLE_FAULT 2
108#define EXIT_REASON_PENDING_INTERRUPT 7
109#define EXIT_REASON_NMI_WINDOW 8
110#define EXIT_REASON_TASK_SWITCH 9
111#define EXIT_REASON_CPUID 10
112#define EXIT_REASON_HLT 12
113#define EXIT_REASON_INVD 13
114#define EXIT_REASON_INVLPG 14
115#define EXIT_REASON_RDPMC 15
116#define EXIT_REASON_RDTSC 16
117#define EXIT_REASON_VMCALL 18
118#define EXIT_REASON_VMCLEAR 19
119#define EXIT_REASON_VMLAUNCH 20
120#define EXIT_REASON_VMPTRLD 21
121#define EXIT_REASON_VMPTRST 22
122#define EXIT_REASON_VMREAD 23
123#define EXIT_REASON_VMRESUME 24
124#define EXIT_REASON_VMWRITE 25
125#define EXIT_REASON_VMOFF 26
126#define EXIT_REASON_VMON 27
127#define EXIT_REASON_CR_ACCESS 28
128#define EXIT_REASON_DR_ACCESS 29
129#define EXIT_REASON_IO_INSTRUCTION 30
130#define EXIT_REASON_MSR_READ 31
131#define EXIT_REASON_MSR_WRITE 32
132#define EXIT_REASON_INVALID_STATE 33
133#define EXIT_REASON_MWAIT_INSTRUCTION 36
134#define EXIT_REASON_MONITOR_INSTRUCTION 39
135#define EXIT_REASON_PAUSE_INSTRUCTION 40
136#define EXIT_REASON_MCE_DURING_VMENTRY 41
137#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
138#define EXIT_REASON_APIC_ACCESS 44
139#define EXIT_REASON_EOI_INDUCED 45
140#define EXIT_REASON_EPT_VIOLATION 48
141#define EXIT_REASON_EPT_MISCONFIG 49
142#define EXIT_REASON_INVEPT 50
143#define EXIT_REASON_RDTSCP 51
144#define EXIT_REASON_PREEMPTION_TIMER 52
145#define EXIT_REASON_INVVPID 53
146#define EXIT_REASON_WBINVD 54
147#define EXIT_REASON_XSETBV 55
148#define EXIT_REASON_APIC_WRITE 56
149#define EXIT_REASON_INVPCID 58
150#define EXIT_REASON_PML_FULL 62
151#define EXIT_REASON_XSAVES 63
152#define EXIT_REASON_XRSTORS 64
153#define LAST_EXIT_REASON 64
154
155enum vmcs_field {
156 VIRTUAL_PROCESSOR_ID = 0x00000000,
157 POSTED_INTR_NV = 0x00000002,
158 GUEST_ES_SELECTOR = 0x00000800,
159 GUEST_CS_SELECTOR = 0x00000802,
160 GUEST_SS_SELECTOR = 0x00000804,
161 GUEST_DS_SELECTOR = 0x00000806,
162 GUEST_FS_SELECTOR = 0x00000808,
163 GUEST_GS_SELECTOR = 0x0000080a,
164 GUEST_LDTR_SELECTOR = 0x0000080c,
165 GUEST_TR_SELECTOR = 0x0000080e,
166 GUEST_INTR_STATUS = 0x00000810,
167 GUEST_PML_INDEX = 0x00000812,
168 HOST_ES_SELECTOR = 0x00000c00,
169 HOST_CS_SELECTOR = 0x00000c02,
170 HOST_SS_SELECTOR = 0x00000c04,
171 HOST_DS_SELECTOR = 0x00000c06,
172 HOST_FS_SELECTOR = 0x00000c08,
173 HOST_GS_SELECTOR = 0x00000c0a,
174 HOST_TR_SELECTOR = 0x00000c0c,
175 IO_BITMAP_A = 0x00002000,
176 IO_BITMAP_A_HIGH = 0x00002001,
177 IO_BITMAP_B = 0x00002002,
178 IO_BITMAP_B_HIGH = 0x00002003,
179 MSR_BITMAP = 0x00002004,
180 MSR_BITMAP_HIGH = 0x00002005,
181 VM_EXIT_MSR_STORE_ADDR = 0x00002006,
182 VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007,
183 VM_EXIT_MSR_LOAD_ADDR = 0x00002008,
184 VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009,
185 VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a,
186 VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b,
187 PML_ADDRESS = 0x0000200e,
188 PML_ADDRESS_HIGH = 0x0000200f,
189 TSC_OFFSET = 0x00002010,
190 TSC_OFFSET_HIGH = 0x00002011,
191 VIRTUAL_APIC_PAGE_ADDR = 0x00002012,
192 VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013,
193 APIC_ACCESS_ADDR = 0x00002014,
194 APIC_ACCESS_ADDR_HIGH = 0x00002015,
195 POSTED_INTR_DESC_ADDR = 0x00002016,
196 POSTED_INTR_DESC_ADDR_HIGH = 0x00002017,
197 EPT_POINTER = 0x0000201a,
198 EPT_POINTER_HIGH = 0x0000201b,
199 EOI_EXIT_BITMAP0 = 0x0000201c,
200 EOI_EXIT_BITMAP0_HIGH = 0x0000201d,
201 EOI_EXIT_BITMAP1 = 0x0000201e,
202 EOI_EXIT_BITMAP1_HIGH = 0x0000201f,
203 EOI_EXIT_BITMAP2 = 0x00002020,
204 EOI_EXIT_BITMAP2_HIGH = 0x00002021,
205 EOI_EXIT_BITMAP3 = 0x00002022,
206 EOI_EXIT_BITMAP3_HIGH = 0x00002023,
207 VMREAD_BITMAP = 0x00002026,
208 VMREAD_BITMAP_HIGH = 0x00002027,
209 VMWRITE_BITMAP = 0x00002028,
210 VMWRITE_BITMAP_HIGH = 0x00002029,
211 XSS_EXIT_BITMAP = 0x0000202C,
212 XSS_EXIT_BITMAP_HIGH = 0x0000202D,
213 TSC_MULTIPLIER = 0x00002032,
214 TSC_MULTIPLIER_HIGH = 0x00002033,
215 GUEST_PHYSICAL_ADDRESS = 0x00002400,
216 GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
217 VMCS_LINK_POINTER = 0x00002800,
218 VMCS_LINK_POINTER_HIGH = 0x00002801,
219 GUEST_IA32_DEBUGCTL = 0x00002802,
220 GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
221 GUEST_IA32_PAT = 0x00002804,
222 GUEST_IA32_PAT_HIGH = 0x00002805,
223 GUEST_IA32_EFER = 0x00002806,
224 GUEST_IA32_EFER_HIGH = 0x00002807,
225 GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808,
226 GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809,
227 GUEST_PDPTR0 = 0x0000280a,
228 GUEST_PDPTR0_HIGH = 0x0000280b,
229 GUEST_PDPTR1 = 0x0000280c,
230 GUEST_PDPTR1_HIGH = 0x0000280d,
231 GUEST_PDPTR2 = 0x0000280e,
232 GUEST_PDPTR2_HIGH = 0x0000280f,
233 GUEST_PDPTR3 = 0x00002810,
234 GUEST_PDPTR3_HIGH = 0x00002811,
235 GUEST_BNDCFGS = 0x00002812,
236 GUEST_BNDCFGS_HIGH = 0x00002813,
237 HOST_IA32_PAT = 0x00002c00,
238 HOST_IA32_PAT_HIGH = 0x00002c01,
239 HOST_IA32_EFER = 0x00002c02,
240 HOST_IA32_EFER_HIGH = 0x00002c03,
241 HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
242 HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05,
243 PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
244 CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
245 EXCEPTION_BITMAP = 0x00004004,
246 PAGE_FAULT_ERROR_CODE_MASK = 0x00004006,
247 PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008,
248 CR3_TARGET_COUNT = 0x0000400a,
249 VM_EXIT_CONTROLS = 0x0000400c,
250 VM_EXIT_MSR_STORE_COUNT = 0x0000400e,
251 VM_EXIT_MSR_LOAD_COUNT = 0x00004010,
252 VM_ENTRY_CONTROLS = 0x00004012,
253 VM_ENTRY_MSR_LOAD_COUNT = 0x00004014,
254 VM_ENTRY_INTR_INFO_FIELD = 0x00004016,
255 VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018,
256 VM_ENTRY_INSTRUCTION_LEN = 0x0000401a,
257 TPR_THRESHOLD = 0x0000401c,
258 SECONDARY_VM_EXEC_CONTROL = 0x0000401e,
259 PLE_GAP = 0x00004020,
260 PLE_WINDOW = 0x00004022,
261 VM_INSTRUCTION_ERROR = 0x00004400,
262 VM_EXIT_REASON = 0x00004402,
263 VM_EXIT_INTR_INFO = 0x00004404,
264 VM_EXIT_INTR_ERROR_CODE = 0x00004406,
265 IDT_VECTORING_INFO_FIELD = 0x00004408,
266 IDT_VECTORING_ERROR_CODE = 0x0000440a,
267 VM_EXIT_INSTRUCTION_LEN = 0x0000440c,
268 VMX_INSTRUCTION_INFO = 0x0000440e,
269 GUEST_ES_LIMIT = 0x00004800,
270 GUEST_CS_LIMIT = 0x00004802,
271 GUEST_SS_LIMIT = 0x00004804,
272 GUEST_DS_LIMIT = 0x00004806,
273 GUEST_FS_LIMIT = 0x00004808,
274 GUEST_GS_LIMIT = 0x0000480a,
275 GUEST_LDTR_LIMIT = 0x0000480c,
276 GUEST_TR_LIMIT = 0x0000480e,
277 GUEST_GDTR_LIMIT = 0x00004810,
278 GUEST_IDTR_LIMIT = 0x00004812,
279 GUEST_ES_AR_BYTES = 0x00004814,
280 GUEST_CS_AR_BYTES = 0x00004816,
281 GUEST_SS_AR_BYTES = 0x00004818,
282 GUEST_DS_AR_BYTES = 0x0000481a,
283 GUEST_FS_AR_BYTES = 0x0000481c,
284 GUEST_GS_AR_BYTES = 0x0000481e,
285 GUEST_LDTR_AR_BYTES = 0x00004820,
286 GUEST_TR_AR_BYTES = 0x00004822,
287 GUEST_INTERRUPTIBILITY_INFO = 0x00004824,
288 GUEST_ACTIVITY_STATE = 0X00004826,
289 GUEST_SYSENTER_CS = 0x0000482A,
290 VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
291 HOST_IA32_SYSENTER_CS = 0x00004c00,
292 CR0_GUEST_HOST_MASK = 0x00006000,
293 CR4_GUEST_HOST_MASK = 0x00006002,
294 CR0_READ_SHADOW = 0x00006004,
295 CR4_READ_SHADOW = 0x00006006,
296 CR3_TARGET_VALUE0 = 0x00006008,
297 CR3_TARGET_VALUE1 = 0x0000600a,
298 CR3_TARGET_VALUE2 = 0x0000600c,
299 CR3_TARGET_VALUE3 = 0x0000600e,
300 EXIT_QUALIFICATION = 0x00006400,
301 GUEST_LINEAR_ADDRESS = 0x0000640a,
302 GUEST_CR0 = 0x00006800,
303 GUEST_CR3 = 0x00006802,
304 GUEST_CR4 = 0x00006804,
305 GUEST_ES_BASE = 0x00006806,
306 GUEST_CS_BASE = 0x00006808,
307 GUEST_SS_BASE = 0x0000680a,
308 GUEST_DS_BASE = 0x0000680c,
309 GUEST_FS_BASE = 0x0000680e,
310 GUEST_GS_BASE = 0x00006810,
311 GUEST_LDTR_BASE = 0x00006812,
312 GUEST_TR_BASE = 0x00006814,
313 GUEST_GDTR_BASE = 0x00006816,
314 GUEST_IDTR_BASE = 0x00006818,
315 GUEST_DR7 = 0x0000681a,
316 GUEST_RSP = 0x0000681c,
317 GUEST_RIP = 0x0000681e,
318 GUEST_RFLAGS = 0x00006820,
319 GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822,
320 GUEST_SYSENTER_ESP = 0x00006824,
321 GUEST_SYSENTER_EIP = 0x00006826,
322 HOST_CR0 = 0x00006c00,
323 HOST_CR3 = 0x00006c02,
324 HOST_CR4 = 0x00006c04,
325 HOST_FS_BASE = 0x00006c06,
326 HOST_GS_BASE = 0x00006c08,
327 HOST_TR_BASE = 0x00006c0a,
328 HOST_GDTR_BASE = 0x00006c0c,
329 HOST_IDTR_BASE = 0x00006c0e,
330 HOST_IA32_SYSENTER_ESP = 0x00006c10,
331 HOST_IA32_SYSENTER_EIP = 0x00006c12,
332 HOST_RSP = 0x00006c14,
333 HOST_RIP = 0x00006c16,
334};
335
336struct vmx_msr_entry {
337 uint32_t index;
338 uint32_t reserved;
339 uint64_t value;
340} __attribute__ ((aligned(16)));
341
342static inline int vmxon(uint64_t phys)
343{
344 uint8_t ret;
345
346 __asm__ __volatile__ ("vmxon %[pa]; setna %[ret]"
347 : [ret]"=rm"(ret)
348 : [pa]"m"(phys)
349 : "cc", "memory");
350
351 return ret;
352}
353
354static inline void vmxoff(void)
355{
356 __asm__ __volatile__("vmxoff");
357}
358
359static inline int vmclear(uint64_t vmcs_pa)
360{
361 uint8_t ret;
362
363 __asm__ __volatile__ ("vmclear %[pa]; setna %[ret]"
364 : [ret]"=rm"(ret)
365 : [pa]"m"(vmcs_pa)
366 : "cc", "memory");
367
368 return ret;
369}
370
371static inline int vmptrld(uint64_t vmcs_pa)
372{
373 uint8_t ret;
374
375 __asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]"
376 : [ret]"=rm"(ret)
377 : [pa]"m"(vmcs_pa)
378 : "cc", "memory");
379
380 return ret;
381}
382
383/*
384 * No guest state (e.g. GPRs) is established by this vmlaunch.
385 */
386static inline int vmlaunch(void)
387{
388 int ret;
389
390 __asm__ __volatile__("push %%rbp;"
391 "push %%rcx;"
392 "push %%rdx;"
393 "push %%rsi;"
394 "push %%rdi;"
395 "push $0;"
396 "vmwrite %%rsp, %[host_rsp];"
397 "lea 1f(%%rip), %%rax;"
398 "vmwrite %%rax, %[host_rip];"
399 "vmlaunch;"
400 "incq (%%rsp);"
401 "1: pop %%rax;"
402 "pop %%rdi;"
403 "pop %%rsi;"
404 "pop %%rdx;"
405 "pop %%rcx;"
406 "pop %%rbp;"
407 : [ret]"=&a"(ret)
408 : [host_rsp]"r"((uint64_t)HOST_RSP),
409 [host_rip]"r"((uint64_t)HOST_RIP)
410 : "memory", "cc", "rbx", "r8", "r9", "r10",
411 "r11", "r12", "r13", "r14", "r15");
412 return ret;
413}
414
415/*
416 * No guest state (e.g. GPRs) is established by this vmresume.
417 */
418static inline int vmresume(void)
419{
420 int ret;
421
422 __asm__ __volatile__("push %%rbp;"
423 "push %%rcx;"
424 "push %%rdx;"
425 "push %%rsi;"
426 "push %%rdi;"
427 "push $0;"
428 "vmwrite %%rsp, %[host_rsp];"
429 "lea 1f(%%rip), %%rax;"
430 "vmwrite %%rax, %[host_rip];"
431 "vmresume;"
432 "incq (%%rsp);"
433 "1: pop %%rax;"
434 "pop %%rdi;"
435 "pop %%rsi;"
436 "pop %%rdx;"
437 "pop %%rcx;"
438 "pop %%rbp;"
439 : [ret]"=&a"(ret)
440 : [host_rsp]"r"((uint64_t)HOST_RSP),
441 [host_rip]"r"((uint64_t)HOST_RIP)
442 : "memory", "cc", "rbx", "r8", "r9", "r10",
443 "r11", "r12", "r13", "r14", "r15");
444 return ret;
445}
446
447static inline int vmread(uint64_t encoding, uint64_t *value)
448{
449 uint64_t tmp;
450 uint8_t ret;
451
452 __asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]"
453 : [value]"=rm"(tmp), [ret]"=rm"(ret)
454 : [encoding]"r"(encoding)
455 : "cc", "memory");
456
457 *value = tmp;
458 return ret;
459}
460
461/*
462 * A wrapper around vmread that ignores errors and returns zero if the
463 * vmread instruction fails.
464 */
465static inline uint64_t vmreadz(uint64_t encoding)
466{
467 uint64_t value = 0;
468 vmread(encoding, &value);
469 return value;
470}
471
472static inline int vmwrite(uint64_t encoding, uint64_t value)
473{
474 uint8_t ret;
475
476 __asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]"
477 : [ret]"=rm"(ret)
478 : [value]"rm"(value), [encoding]"r"(encoding)
479 : "cc", "memory");
480
481 return ret;
482}
483
484static inline uint32_t vmcs_revision(void)
485{
486 return rdmsr(MSR_IA32_VMX_BASIC);
487}
488
489void prepare_for_vmx_operation(void);
490void prepare_vmcs(void *guest_rip, void *guest_rsp);
491struct kvm_vm *vm_create_default_vmx(uint32_t vcpuid,
492 vmx_guest_code_t guest_code);
493
494#endif /* !SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 7ca1bb40c498..2cedfda181d4 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -378,7 +378,7 @@ int kvm_memcmp_hva_gva(void *hva,
378 * complicated. This function uses a reasonable default length for 378 * complicated. This function uses a reasonable default length for
379 * the array and performs the appropriate allocation. 379 * the array and performs the appropriate allocation.
380 */ 380 */
381struct kvm_cpuid2 *allocate_kvm_cpuid2(void) 381static struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
382{ 382{
383 struct kvm_cpuid2 *cpuid; 383 struct kvm_cpuid2 *cpuid;
384 int nent = 100; 384 int nent = 100;
@@ -402,17 +402,21 @@ struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
402 * Input Args: None 402 * Input Args: None
403 * 403 *
404 * Output Args: 404 * Output Args:
405 * cpuid - The supported KVM CPUID
406 * 405 *
407 * Return: void 406 * Return: The supported KVM CPUID
408 * 407 *
409 * Get the guest CPUID supported by KVM. 408 * Get the guest CPUID supported by KVM.
410 */ 409 */
411void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid) 410struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
412{ 411{
412 static struct kvm_cpuid2 *cpuid;
413 int ret; 413 int ret;
414 int kvm_fd; 414 int kvm_fd;
415 415
416 if (cpuid)
417 return cpuid;
418
419 cpuid = allocate_kvm_cpuid2();
416 kvm_fd = open(KVM_DEV_PATH, O_RDONLY); 420 kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
417 TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", 421 TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i",
418 KVM_DEV_PATH, kvm_fd, errno); 422 KVM_DEV_PATH, kvm_fd, errno);
@@ -422,6 +426,7 @@ void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid)
422 ret, errno); 426 ret, errno);
423 427
424 close(kvm_fd); 428 close(kvm_fd);
429 return cpuid;
425} 430}
426 431
427/* Locate a cpuid entry. 432/* Locate a cpuid entry.
@@ -435,12 +440,13 @@ void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid)
435 * Return: A pointer to the cpuid entry. Never returns NULL. 440 * Return: A pointer to the cpuid entry. Never returns NULL.
436 */ 441 */
437struct kvm_cpuid_entry2 * 442struct kvm_cpuid_entry2 *
438find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function, 443kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
439 uint32_t index)
440{ 444{
445 struct kvm_cpuid2 *cpuid;
441 struct kvm_cpuid_entry2 *entry = NULL; 446 struct kvm_cpuid_entry2 *entry = NULL;
442 int i; 447 int i;
443 448
449 cpuid = kvm_get_supported_cpuid();
444 for (i = 0; i < cpuid->nent; i++) { 450 for (i = 0; i < cpuid->nent; i++) {
445 if (cpuid->entries[i].function == function && 451 if (cpuid->entries[i].function == function &&
446 cpuid->entries[i].index == index) { 452 cpuid->entries[i].index == index) {
@@ -1435,7 +1441,7 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
1435 sparsebit_idx_t pg; 1441 sparsebit_idx_t pg;
1436 1442
1437 TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " 1443 TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "
1438 "not divisable by page size.\n" 1444 "not divisible by page size.\n"
1439 " paddr_min: 0x%lx page_size: 0x%x", 1445 " paddr_min: 0x%lx page_size: 0x%x",
1440 paddr_min, vm->page_size); 1446 paddr_min, vm->page_size);
1441 1447
diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c
index 0c5cf3e0cb6f..b132bc95d183 100644
--- a/tools/testing/selftests/kvm/lib/sparsebit.c
+++ b/tools/testing/selftests/kvm/lib/sparsebit.c
@@ -121,7 +121,7 @@
121 * avoided by moving the setting of the nodes mask bits into 121 * avoided by moving the setting of the nodes mask bits into
122 * the previous nodes num_after setting. 122 * the previous nodes num_after setting.
123 * 123 *
124 * + Node starting index is evenly divisable by the number of bits 124 * + Node starting index is evenly divisible by the number of bits
125 * within a nodes mask member. 125 * within a nodes mask member.
126 * 126 *
127 * + Nodes never represent a range of bits that wrap around the 127 * + Nodes never represent a range of bits that wrap around the
@@ -1741,7 +1741,7 @@ void sparsebit_validate_internal(struct sparsebit *s)
1741 1741
1742 /* Validate node index is divisible by the mask size */ 1742 /* Validate node index is divisible by the mask size */
1743 if (nodep->idx % MASK_BITS) { 1743 if (nodep->idx % MASK_BITS) {
1744 fprintf(stderr, "Node index not divisable by " 1744 fprintf(stderr, "Node index not divisible by "
1745 "mask size,\n" 1745 "mask size,\n"
1746 " nodep: %p nodep->idx: 0x%lx " 1746 " nodep: %p nodep->idx: 0x%lx "
1747 "MASK_BITS: %lu\n", 1747 "MASK_BITS: %lu\n",
diff --git a/tools/testing/selftests/kvm/lib/vmx.c b/tools/testing/selftests/kvm/lib/vmx.c
new file mode 100644
index 000000000000..0231bc0aae7b
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/vmx.c
@@ -0,0 +1,243 @@
1/*
2 * tools/testing/selftests/kvm/lib/x86.c
3 *
4 * Copyright (C) 2018, Google LLC.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2.
7 */
8
9#define _GNU_SOURCE /* for program_invocation_name */
10
11#include "test_util.h"
12#include "kvm_util.h"
13#include "x86.h"
14#include "vmx.h"
15
16/* Create a default VM for VMX tests.
17 *
18 * Input Args:
19 * vcpuid - The id of the single VCPU to add to the VM.
20 * guest_code - The vCPU's entry point
21 *
22 * Output Args: None
23 *
24 * Return:
25 * Pointer to opaque structure that describes the created VM.
26 */
27struct kvm_vm *
28vm_create_default_vmx(uint32_t vcpuid, vmx_guest_code_t guest_code)
29{
30 struct kvm_cpuid2 *cpuid;
31 struct kvm_vm *vm;
32 vm_vaddr_t vmxon_vaddr;
33 vm_paddr_t vmxon_paddr;
34 vm_vaddr_t vmcs_vaddr;
35 vm_paddr_t vmcs_paddr;
36
37 vm = vm_create_default(vcpuid, (void *) guest_code);
38
39 /* Enable nesting in CPUID */
40 vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
41
42 /* Setup of a region of guest memory for the vmxon region. */
43 vmxon_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0);
44 vmxon_paddr = addr_gva2gpa(vm, vmxon_vaddr);
45
46 /* Setup of a region of guest memory for a vmcs. */
47 vmcs_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0);
48 vmcs_paddr = addr_gva2gpa(vm, vmcs_vaddr);
49
50 vcpu_args_set(vm, vcpuid, 4, vmxon_vaddr, vmxon_paddr, vmcs_vaddr,
51 vmcs_paddr);
52
53 return vm;
54}
55
56void prepare_for_vmx_operation(void)
57{
58 uint64_t feature_control;
59 uint64_t required;
60 unsigned long cr0;
61 unsigned long cr4;
62
63 /*
64 * Ensure bits in CR0 and CR4 are valid in VMX operation:
65 * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx.
66 * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx.
67 */
68 __asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory");
69 cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1);
70 cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0);
71 __asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory");
72
73 __asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory");
74 cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1);
75 cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0);
76 /* Enable VMX operation */
77 cr4 |= X86_CR4_VMXE;
78 __asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory");
79
80 /*
81 * Configure IA32_FEATURE_CONTROL MSR to allow VMXON:
82 * Bit 0: Lock bit. If clear, VMXON causes a #GP.
83 * Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON
84 * outside of SMX causes a #GP.
85 */
86 required = FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
87 required |= FEATURE_CONTROL_LOCKED;
88 feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
89 if ((feature_control & required) != required)
90 wrmsr(MSR_IA32_FEATURE_CONTROL, feature_control | required);
91}
92
93/*
94 * Initialize the control fields to the most basic settings possible.
95 */
96static inline void init_vmcs_control_fields(void)
97{
98 vmwrite(VIRTUAL_PROCESSOR_ID, 0);
99 vmwrite(POSTED_INTR_NV, 0);
100
101 vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PINBASED_CTLS));
102 vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PROCBASED_CTLS));
103 vmwrite(EXCEPTION_BITMAP, 0);
104 vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
105 vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */
106 vmwrite(CR3_TARGET_COUNT, 0);
107 vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) |
108 VM_EXIT_HOST_ADDR_SPACE_SIZE); /* 64-bit host */
109 vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
110 vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
111 vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) |
112 VM_ENTRY_IA32E_MODE); /* 64-bit guest */
113 vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
114 vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
115 vmwrite(TPR_THRESHOLD, 0);
116 vmwrite(SECONDARY_VM_EXEC_CONTROL, 0);
117
118 vmwrite(CR0_GUEST_HOST_MASK, 0);
119 vmwrite(CR4_GUEST_HOST_MASK, 0);
120 vmwrite(CR0_READ_SHADOW, get_cr0());
121 vmwrite(CR4_READ_SHADOW, get_cr4());
122}
123
124/*
125 * Initialize the host state fields based on the current host state, with
126 * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch
127 * or vmresume.
128 */
129static inline void init_vmcs_host_state(void)
130{
131 uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS);
132
133 vmwrite(HOST_ES_SELECTOR, get_es());
134 vmwrite(HOST_CS_SELECTOR, get_cs());
135 vmwrite(HOST_SS_SELECTOR, get_ss());
136 vmwrite(HOST_DS_SELECTOR, get_ds());
137 vmwrite(HOST_FS_SELECTOR, get_fs());
138 vmwrite(HOST_GS_SELECTOR, get_gs());
139 vmwrite(HOST_TR_SELECTOR, get_tr());
140
141 if (exit_controls & VM_EXIT_LOAD_IA32_PAT)
142 vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT));
143 if (exit_controls & VM_EXIT_LOAD_IA32_EFER)
144 vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER));
145 if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
146 vmwrite(HOST_IA32_PERF_GLOBAL_CTRL,
147 rdmsr(MSR_CORE_PERF_GLOBAL_CTRL));
148
149 vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS));
150
151 vmwrite(HOST_CR0, get_cr0());
152 vmwrite(HOST_CR3, get_cr3());
153 vmwrite(HOST_CR4, get_cr4());
154 vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE));
155 vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE));
156 vmwrite(HOST_TR_BASE,
157 get_desc64_base((struct desc64 *)(get_gdt_base() + get_tr())));
158 vmwrite(HOST_GDTR_BASE, get_gdt_base());
159 vmwrite(HOST_IDTR_BASE, get_idt_base());
160 vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP));
161 vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP));
162}
163
164/*
165 * Initialize the guest state fields essentially as a clone of
166 * the host state fields. Some host state fields have fixed
167 * values, and we set the corresponding guest state fields accordingly.
168 */
169static inline void init_vmcs_guest_state(void *rip, void *rsp)
170{
171 vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR));
172 vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR));
173 vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR));
174 vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR));
175 vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR));
176 vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR));
177 vmwrite(GUEST_LDTR_SELECTOR, 0);
178 vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR));
179 vmwrite(GUEST_INTR_STATUS, 0);
180 vmwrite(GUEST_PML_INDEX, 0);
181
182 vmwrite(VMCS_LINK_POINTER, -1ll);
183 vmwrite(GUEST_IA32_DEBUGCTL, 0);
184 vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT));
185 vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER));
186 vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL,
187 vmreadz(HOST_IA32_PERF_GLOBAL_CTRL));
188
189 vmwrite(GUEST_ES_LIMIT, -1);
190 vmwrite(GUEST_CS_LIMIT, -1);
191 vmwrite(GUEST_SS_LIMIT, -1);
192 vmwrite(GUEST_DS_LIMIT, -1);
193 vmwrite(GUEST_FS_LIMIT, -1);
194 vmwrite(GUEST_GS_LIMIT, -1);
195 vmwrite(GUEST_LDTR_LIMIT, -1);
196 vmwrite(GUEST_TR_LIMIT, 0x67);
197 vmwrite(GUEST_GDTR_LIMIT, 0xffff);
198 vmwrite(GUEST_IDTR_LIMIT, 0xffff);
199 vmwrite(GUEST_ES_AR_BYTES,
200 vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093);
201 vmwrite(GUEST_CS_AR_BYTES, 0xa09b);
202 vmwrite(GUEST_SS_AR_BYTES, 0xc093);
203 vmwrite(GUEST_DS_AR_BYTES,
204 vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093);
205 vmwrite(GUEST_FS_AR_BYTES,
206 vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093);
207 vmwrite(GUEST_GS_AR_BYTES,
208 vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093);
209 vmwrite(GUEST_LDTR_AR_BYTES, 0x10000);
210 vmwrite(GUEST_TR_AR_BYTES, 0x8b);
211 vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
212 vmwrite(GUEST_ACTIVITY_STATE, 0);
213 vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS));
214 vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0);
215
216 vmwrite(GUEST_CR0, vmreadz(HOST_CR0));
217 vmwrite(GUEST_CR3, vmreadz(HOST_CR3));
218 vmwrite(GUEST_CR4, vmreadz(HOST_CR4));
219 vmwrite(GUEST_ES_BASE, 0);
220 vmwrite(GUEST_CS_BASE, 0);
221 vmwrite(GUEST_SS_BASE, 0);
222 vmwrite(GUEST_DS_BASE, 0);
223 vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE));
224 vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE));
225 vmwrite(GUEST_LDTR_BASE, 0);
226 vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE));
227 vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE));
228 vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE));
229 vmwrite(GUEST_DR7, 0x400);
230 vmwrite(GUEST_RSP, (uint64_t)rsp);
231 vmwrite(GUEST_RIP, (uint64_t)rip);
232 vmwrite(GUEST_RFLAGS, 2);
233 vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0);
234 vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP));
235 vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP));
236}
237
238void prepare_vmcs(void *guest_rip, void *guest_rsp)
239{
240 init_vmcs_control_fields();
241 init_vmcs_host_state();
242 init_vmcs_guest_state(guest_rip, guest_rsp);
243}
diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
new file mode 100644
index 000000000000..8f7f62093add
--- /dev/null
+++ b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
@@ -0,0 +1,231 @@
1/*
2 * gtests/tests/vmx_tsc_adjust_test.c
3 *
4 * Copyright (C) 2018, Google LLC.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2.
7 *
8 *
9 * IA32_TSC_ADJUST test
10 *
11 * According to the SDM, "if an execution of WRMSR to the
12 * IA32_TIME_STAMP_COUNTER MSR adds (or subtracts) value X from the TSC,
13 * the logical processor also adds (or subtracts) value X from the
14 * IA32_TSC_ADJUST MSR.
15 *
16 * Note that when L1 doesn't intercept writes to IA32_TSC, a
17 * WRMSR(IA32_TSC) from L2 sets L1's TSC value, not L2's perceived TSC
18 * value.
19 *
20 * This test verifies that this unusual case is handled correctly.
21 */
22
23#include "test_util.h"
24#include "kvm_util.h"
25#include "x86.h"
26#include "vmx.h"
27
28#include <string.h>
29#include <sys/ioctl.h>
30
31#ifndef MSR_IA32_TSC_ADJUST
32#define MSR_IA32_TSC_ADJUST 0x3b
33#endif
34
35#define PAGE_SIZE 4096
36#define VCPU_ID 5
37
38#define TSC_ADJUST_VALUE (1ll << 32)
39#define TSC_OFFSET_VALUE -(1ll << 48)
40
41enum {
42 PORT_ABORT = 0x1000,
43 PORT_REPORT,
44 PORT_DONE,
45};
46
47struct vmx_page {
48 vm_vaddr_t virt;
49 vm_paddr_t phys;
50};
51
52enum {
53 VMXON_PAGE = 0,
54 VMCS_PAGE,
55 MSR_BITMAP_PAGE,
56
57 NUM_VMX_PAGES,
58};
59
60struct kvm_single_msr {
61 struct kvm_msrs header;
62 struct kvm_msr_entry entry;
63} __attribute__((packed));
64
65/* The virtual machine object. */
66static struct kvm_vm *vm;
67
68/* Array of vmx_page descriptors that is shared with the guest. */
69struct vmx_page *vmx_pages;
70
71#define exit_to_l0(_port, _arg) do_exit_to_l0(_port, (unsigned long) (_arg))
72static void do_exit_to_l0(uint16_t port, unsigned long arg)
73{
74 __asm__ __volatile__("in %[port], %%al"
75 :
76 : [port]"d"(port), "D"(arg)
77 : "rax");
78}
79
80
81#define GUEST_ASSERT(_condition) do { \
82 if (!(_condition)) \
83 exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition); \
84} while (0)
85
86static void check_ia32_tsc_adjust(int64_t max)
87{
88 int64_t adjust;
89
90 adjust = rdmsr(MSR_IA32_TSC_ADJUST);
91 exit_to_l0(PORT_REPORT, adjust);
92 GUEST_ASSERT(adjust <= max);
93}
94
95static void l2_guest_code(void)
96{
97 uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE;
98
99 wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE);
100 check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
101
102 /* Exit to L1 */
103 __asm__ __volatile__("vmcall");
104}
105
106static void l1_guest_code(struct vmx_page *vmx_pages)
107{
108#define L2_GUEST_STACK_SIZE 64
109 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
110 uint32_t control;
111 uintptr_t save_cr3;
112
113 GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE);
114 wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE);
115 check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
116
117 prepare_for_vmx_operation();
118
119 /* Enter VMX root operation. */
120 *(uint32_t *)vmx_pages[VMXON_PAGE].virt = vmcs_revision();
121 GUEST_ASSERT(!vmxon(vmx_pages[VMXON_PAGE].phys));
122
123 /* Load a VMCS. */
124 *(uint32_t *)vmx_pages[VMCS_PAGE].virt = vmcs_revision();
125 GUEST_ASSERT(!vmclear(vmx_pages[VMCS_PAGE].phys));
126 GUEST_ASSERT(!vmptrld(vmx_pages[VMCS_PAGE].phys));
127
128 /* Prepare the VMCS for L2 execution. */
129 prepare_vmcs(l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
130 control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
131 control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETING;
132 vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
133 vmwrite(MSR_BITMAP, vmx_pages[MSR_BITMAP_PAGE].phys);
134 vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
135
136 /* Jump into L2. First, test failure to load guest CR3. */
137 save_cr3 = vmreadz(GUEST_CR3);
138 vmwrite(GUEST_CR3, -1ull);
139 GUEST_ASSERT(!vmlaunch());
140 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
141 (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
142 check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
143 vmwrite(GUEST_CR3, save_cr3);
144
145 GUEST_ASSERT(!vmlaunch());
146 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
147
148 check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
149
150 exit_to_l0(PORT_DONE, 0);
151}
152
153static void allocate_vmx_page(struct vmx_page *page)
154{
155 vm_vaddr_t virt;
156
157 virt = vm_vaddr_alloc(vm, PAGE_SIZE, 0, 0, 0);
158 memset(addr_gva2hva(vm, virt), 0, PAGE_SIZE);
159
160 page->virt = virt;
161 page->phys = addr_gva2gpa(vm, virt);
162}
163
164static vm_vaddr_t allocate_vmx_pages(void)
165{
166 vm_vaddr_t vmx_pages_vaddr;
167 int i;
168
169 vmx_pages_vaddr = vm_vaddr_alloc(
170 vm, sizeof(struct vmx_page) * NUM_VMX_PAGES, 0, 0, 0);
171
172 vmx_pages = (void *) addr_gva2hva(vm, vmx_pages_vaddr);
173
174 for (i = 0; i < NUM_VMX_PAGES; i++)
175 allocate_vmx_page(&vmx_pages[i]);
176
177 return vmx_pages_vaddr;
178}
179
180void report(int64_t val)
181{
182 printf("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n",
183 val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE);
184}
185
186int main(int argc, char *argv[])
187{
188 vm_vaddr_t vmx_pages_vaddr;
189 struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
190
191 if (!(entry->ecx & CPUID_VMX)) {
192 printf("nested VMX not enabled, skipping test");
193 return 0;
194 }
195
196 vm = vm_create_default_vmx(VCPU_ID, (void *) l1_guest_code);
197
198 /* Allocate VMX pages and shared descriptors (vmx_pages). */
199 vmx_pages_vaddr = allocate_vmx_pages();
200 vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_vaddr);
201
202 for (;;) {
203 volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
204 struct kvm_regs regs;
205
206 vcpu_run(vm, VCPU_ID);
207 TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
208 "Got exit_reason other than KVM_EXIT_IO: %u (%s),\n",
209 run->exit_reason,
210 exit_reason_str(run->exit_reason));
211
212 vcpu_regs_get(vm, VCPU_ID, &regs);
213
214 switch (run->io.port) {
215 case PORT_ABORT:
216 TEST_ASSERT(false, "%s", (const char *) regs.rdi);
217 /* NOT REACHED */
218 case PORT_REPORT:
219 report(regs.rdi);
220 break;
221 case PORT_DONE:
222 goto done;
223 default:
224 TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port);
225 }
226 }
227
228 kvm_vm_free(vm);
229done:
230 return 0;
231}