diff options
| -rw-r--r-- | Documentation/devicetree/bindings/timer/nuvoton,npcm7xx-timer.txt | 21 | ||||
| -rw-r--r-- | Documentation/devicetree/bindings/timer/nxp,tpm-timer.txt | 2 | ||||
| -rw-r--r-- | arch/parisc/kernel/Makefile | 2 | ||||
| -rw-r--r-- | arch/x86/include/asm/kvm_host.h | 1 | ||||
| -rw-r--r-- | arch/x86/kernel/ldt.c | 2 | ||||
| -rw-r--r-- | arch/x86/kvm/svm.c | 31 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx.c | 95 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.c | 15 | ||||
| -rw-r--r-- | drivers/block/rbd.c | 101 | ||||
| -rw-r--r-- | drivers/clocksource/Kconfig | 8 | ||||
| -rw-r--r-- | drivers/clocksource/Makefile | 1 | ||||
| -rw-r--r-- | drivers/clocksource/timer-imx-tpm.c | 43 | ||||
| -rw-r--r-- | drivers/clocksource/timer-npcm7xx.c | 215 | ||||
| -rw-r--r-- | fs/ceph/inode.c | 10 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/Makefile | 5 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/kvm_util.h | 15 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/vmx.h | 494 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/kvm_util.c | 20 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/sparsebit.c | 4 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/vmx.c | 243 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/vmx_tsc_adjust_test.c | 231 |
21 files changed, 1438 insertions, 121 deletions
diff --git a/Documentation/devicetree/bindings/timer/nuvoton,npcm7xx-timer.txt b/Documentation/devicetree/bindings/timer/nuvoton,npcm7xx-timer.txt new file mode 100644 index 000000000000..ea22dfe485be --- /dev/null +++ b/Documentation/devicetree/bindings/timer/nuvoton,npcm7xx-timer.txt | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | Nuvoton NPCM7xx timer | ||
| 2 | |||
| 3 | Nuvoton NPCM7xx have three timer modules, each timer module provides five 24-bit | ||
| 4 | timer counters. | ||
| 5 | |||
| 6 | Required properties: | ||
| 7 | - compatible : "nuvoton,npcm750-timer" for Poleg NPCM750. | ||
| 8 | - reg : Offset and length of the register set for the device. | ||
| 9 | - interrupts : Contain the timer interrupt with flags for | ||
| 10 | falling edge. | ||
| 11 | - clocks : phandle of timer reference clock (usually a 25 MHz clock). | ||
| 12 | |||
| 13 | Example: | ||
| 14 | |||
| 15 | timer@f0008000 { | ||
| 16 | compatible = "nuvoton,npcm750-timer"; | ||
| 17 | interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>; | ||
| 18 | reg = <0xf0008000 0x50>; | ||
| 19 | clocks = <&clk NPCM7XX_CLK_TIMER>; | ||
| 20 | }; | ||
| 21 | |||
diff --git a/Documentation/devicetree/bindings/timer/nxp,tpm-timer.txt b/Documentation/devicetree/bindings/timer/nxp,tpm-timer.txt index b4aa7ddb5b13..f82087b220f4 100644 --- a/Documentation/devicetree/bindings/timer/nxp,tpm-timer.txt +++ b/Documentation/devicetree/bindings/timer/nxp,tpm-timer.txt | |||
| @@ -15,7 +15,7 @@ Required properties: | |||
| 15 | - interrupts : Should be the clock event device interrupt. | 15 | - interrupts : Should be the clock event device interrupt. |
| 16 | - clocks : The clocks provided by the SoC to drive the timer, must contain | 16 | - clocks : The clocks provided by the SoC to drive the timer, must contain |
| 17 | an entry for each entry in clock-names. | 17 | an entry for each entry in clock-names. |
| 18 | - clock-names : Must include the following entries: "igp" and "per". | 18 | - clock-names : Must include the following entries: "ipg" and "per". |
| 19 | 19 | ||
| 20 | Example: | 20 | Example: |
| 21 | tpm5: tpm@40260000 { | 21 | tpm5: tpm@40260000 { |
diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile index eafd06ab59ef..e5de34d00b1a 100644 --- a/arch/parisc/kernel/Makefile +++ b/arch/parisc/kernel/Makefile | |||
| @@ -23,7 +23,7 @@ obj-$(CONFIG_SMP) += smp.o | |||
| 23 | obj-$(CONFIG_PA11) += pci-dma.o | 23 | obj-$(CONFIG_PA11) += pci-dma.o |
| 24 | obj-$(CONFIG_PCI) += pci.o | 24 | obj-$(CONFIG_PCI) += pci.o |
| 25 | obj-$(CONFIG_MODULES) += module.o | 25 | obj-$(CONFIG_MODULES) += module.o |
| 26 | obj-$(CONFIG_64BIT) += binfmt_elf32.o sys_parisc32.o signal32.o | 26 | obj-$(CONFIG_64BIT) += sys_parisc32.o signal32.o |
| 27 | obj-$(CONFIG_STACKTRACE)+= stacktrace.o | 27 | obj-$(CONFIG_STACKTRACE)+= stacktrace.o |
| 28 | obj-$(CONFIG_AUDIT) += audit.o | 28 | obj-$(CONFIG_AUDIT) += audit.o |
| 29 | obj64-$(CONFIG_AUDIT) += compat_audit.o | 29 | obj64-$(CONFIG_AUDIT) += compat_audit.o |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 949c977bc4c9..c25775fad4ed 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
| @@ -1013,6 +1013,7 @@ struct kvm_x86_ops { | |||
| 1013 | 1013 | ||
| 1014 | bool (*has_wbinvd_exit)(void); | 1014 | bool (*has_wbinvd_exit)(void); |
| 1015 | 1015 | ||
| 1016 | u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu); | ||
| 1016 | void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); | 1017 | void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); |
| 1017 | 1018 | ||
| 1018 | void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); | 1019 | void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); |
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index d41d896481b8..c9b14020f4dd 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c | |||
| @@ -166,7 +166,7 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) | |||
| 166 | */ | 166 | */ |
| 167 | pte_prot = __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL); | 167 | pte_prot = __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL); |
| 168 | /* Filter out unsuppored __PAGE_KERNEL* bits: */ | 168 | /* Filter out unsuppored __PAGE_KERNEL* bits: */ |
| 169 | pgprot_val(pte_prot) |= __supported_pte_mask; | 169 | pgprot_val(pte_prot) &= __supported_pte_mask; |
| 170 | pte = pfn_pte(pfn, pte_prot); | 170 | pte = pfn_pte(pfn, pte_prot); |
| 171 | set_pte_at(mm, va, ptep, pte); | 171 | set_pte_at(mm, va, ptep, pte); |
| 172 | pte_unmap_unlock(ptep, ptl); | 172 | pte_unmap_unlock(ptep, ptl); |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b58787daf9f8..1fc05e428aba 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
| @@ -1423,12 +1423,23 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) | |||
| 1423 | seg->base = 0; | 1423 | seg->base = 0; |
| 1424 | } | 1424 | } |
| 1425 | 1425 | ||
| 1426 | static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu) | ||
| 1427 | { | ||
| 1428 | struct vcpu_svm *svm = to_svm(vcpu); | ||
| 1429 | |||
| 1430 | if (is_guest_mode(vcpu)) | ||
| 1431 | return svm->nested.hsave->control.tsc_offset; | ||
| 1432 | |||
| 1433 | return vcpu->arch.tsc_offset; | ||
| 1434 | } | ||
| 1435 | |||
| 1426 | static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) | 1436 | static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) |
| 1427 | { | 1437 | { |
| 1428 | struct vcpu_svm *svm = to_svm(vcpu); | 1438 | struct vcpu_svm *svm = to_svm(vcpu); |
| 1429 | u64 g_tsc_offset = 0; | 1439 | u64 g_tsc_offset = 0; |
| 1430 | 1440 | ||
| 1431 | if (is_guest_mode(vcpu)) { | 1441 | if (is_guest_mode(vcpu)) { |
| 1442 | /* Write L1's TSC offset. */ | ||
| 1432 | g_tsc_offset = svm->vmcb->control.tsc_offset - | 1443 | g_tsc_offset = svm->vmcb->control.tsc_offset - |
| 1433 | svm->nested.hsave->control.tsc_offset; | 1444 | svm->nested.hsave->control.tsc_offset; |
| 1434 | svm->nested.hsave->control.tsc_offset = offset; | 1445 | svm->nested.hsave->control.tsc_offset = offset; |
| @@ -3322,6 +3333,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
| 3322 | /* Restore the original control entries */ | 3333 | /* Restore the original control entries */ |
| 3323 | copy_vmcb_control_area(vmcb, hsave); | 3334 | copy_vmcb_control_area(vmcb, hsave); |
| 3324 | 3335 | ||
| 3336 | svm->vcpu.arch.tsc_offset = svm->vmcb->control.tsc_offset; | ||
| 3325 | kvm_clear_exception_queue(&svm->vcpu); | 3337 | kvm_clear_exception_queue(&svm->vcpu); |
| 3326 | kvm_clear_interrupt_queue(&svm->vcpu); | 3338 | kvm_clear_interrupt_queue(&svm->vcpu); |
| 3327 | 3339 | ||
| @@ -3482,10 +3494,12 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, | |||
| 3482 | /* We don't want to see VMMCALLs from a nested guest */ | 3494 | /* We don't want to see VMMCALLs from a nested guest */ |
| 3483 | clr_intercept(svm, INTERCEPT_VMMCALL); | 3495 | clr_intercept(svm, INTERCEPT_VMMCALL); |
| 3484 | 3496 | ||
| 3497 | svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset; | ||
| 3498 | svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset; | ||
| 3499 | |||
| 3485 | svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext; | 3500 | svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext; |
| 3486 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; | 3501 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; |
| 3487 | svm->vmcb->control.int_state = nested_vmcb->control.int_state; | 3502 | svm->vmcb->control.int_state = nested_vmcb->control.int_state; |
| 3488 | svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; | ||
| 3489 | svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; | 3503 | svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; |
| 3490 | svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; | 3504 | svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; |
| 3491 | 3505 | ||
| @@ -4035,12 +4049,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
| 4035 | struct vcpu_svm *svm = to_svm(vcpu); | 4049 | struct vcpu_svm *svm = to_svm(vcpu); |
| 4036 | 4050 | ||
| 4037 | switch (msr_info->index) { | 4051 | switch (msr_info->index) { |
| 4038 | case MSR_IA32_TSC: { | ||
| 4039 | msr_info->data = svm->vmcb->control.tsc_offset + | ||
| 4040 | kvm_scale_tsc(vcpu, rdtsc()); | ||
| 4041 | |||
| 4042 | break; | ||
| 4043 | } | ||
| 4044 | case MSR_STAR: | 4052 | case MSR_STAR: |
| 4045 | msr_info->data = svm->vmcb->save.star; | 4053 | msr_info->data = svm->vmcb->save.star; |
| 4046 | break; | 4054 | break; |
| @@ -4193,9 +4201,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
| 4193 | svm->vmcb->save.g_pat = data; | 4201 | svm->vmcb->save.g_pat = data; |
| 4194 | mark_dirty(svm->vmcb, VMCB_NPT); | 4202 | mark_dirty(svm->vmcb, VMCB_NPT); |
| 4195 | break; | 4203 | break; |
| 4196 | case MSR_IA32_TSC: | ||
| 4197 | kvm_write_tsc(vcpu, msr); | ||
| 4198 | break; | ||
| 4199 | case MSR_IA32_SPEC_CTRL: | 4204 | case MSR_IA32_SPEC_CTRL: |
| 4200 | if (!msr->host_initiated && | 4205 | if (!msr->host_initiated && |
| 4201 | !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) | 4206 | !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) |
| @@ -5265,9 +5270,8 @@ static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, | |||
| 5265 | } | 5270 | } |
| 5266 | 5271 | ||
| 5267 | if (!ret && svm) { | 5272 | if (!ret && svm) { |
| 5268 | trace_kvm_pi_irte_update(svm->vcpu.vcpu_id, | 5273 | trace_kvm_pi_irte_update(host_irq, svm->vcpu.vcpu_id, |
| 5269 | host_irq, e->gsi, | 5274 | e->gsi, vcpu_info.vector, |
| 5270 | vcpu_info.vector, | ||
| 5271 | vcpu_info.pi_desc_addr, set); | 5275 | vcpu_info.pi_desc_addr, set); |
| 5272 | } | 5276 | } |
| 5273 | 5277 | ||
| @@ -7102,6 +7106,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { | |||
| 7102 | 7106 | ||
| 7103 | .has_wbinvd_exit = svm_has_wbinvd_exit, | 7107 | .has_wbinvd_exit = svm_has_wbinvd_exit, |
| 7104 | 7108 | ||
| 7109 | .read_l1_tsc_offset = svm_read_l1_tsc_offset, | ||
| 7105 | .write_tsc_offset = svm_write_tsc_offset, | 7110 | .write_tsc_offset = svm_write_tsc_offset, |
| 7106 | 7111 | ||
| 7107 | .set_tdp_cr3 = set_tdp_cr3, | 7112 | .set_tdp_cr3 = set_tdp_cr3, |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index aafcc9881e88..aa66ccd6ed6c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
| @@ -2880,18 +2880,15 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
| 2880 | vmx_update_msr_bitmap(&vmx->vcpu); | 2880 | vmx_update_msr_bitmap(&vmx->vcpu); |
| 2881 | } | 2881 | } |
| 2882 | 2882 | ||
| 2883 | /* | 2883 | static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu) |
| 2884 | * reads and returns guest's timestamp counter "register" | ||
| 2885 | * guest_tsc = (host_tsc * tsc multiplier) >> 48 + tsc_offset | ||
| 2886 | * -- Intel TSC Scaling for Virtualization White Paper, sec 1.3 | ||
| 2887 | */ | ||
| 2888 | static u64 guest_read_tsc(struct kvm_vcpu *vcpu) | ||
| 2889 | { | 2884 | { |
| 2890 | u64 host_tsc, tsc_offset; | 2885 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
| 2891 | 2886 | ||
| 2892 | host_tsc = rdtsc(); | 2887 | if (is_guest_mode(vcpu) && |
| 2893 | tsc_offset = vmcs_read64(TSC_OFFSET); | 2888 | (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)) |
| 2894 | return kvm_scale_tsc(vcpu, host_tsc) + tsc_offset; | 2889 | return vcpu->arch.tsc_offset - vmcs12->tsc_offset; |
| 2890 | |||
| 2891 | return vcpu->arch.tsc_offset; | ||
| 2895 | } | 2892 | } |
| 2896 | 2893 | ||
| 2897 | /* | 2894 | /* |
| @@ -3524,9 +3521,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
| 3524 | #endif | 3521 | #endif |
| 3525 | case MSR_EFER: | 3522 | case MSR_EFER: |
| 3526 | return kvm_get_msr_common(vcpu, msr_info); | 3523 | return kvm_get_msr_common(vcpu, msr_info); |
| 3527 | case MSR_IA32_TSC: | ||
| 3528 | msr_info->data = guest_read_tsc(vcpu); | ||
| 3529 | break; | ||
| 3530 | case MSR_IA32_SPEC_CTRL: | 3524 | case MSR_IA32_SPEC_CTRL: |
| 3531 | if (!msr_info->host_initiated && | 3525 | if (!msr_info->host_initiated && |
| 3532 | !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && | 3526 | !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && |
| @@ -3646,9 +3640,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
| 3646 | return 1; | 3640 | return 1; |
| 3647 | vmcs_write64(GUEST_BNDCFGS, data); | 3641 | vmcs_write64(GUEST_BNDCFGS, data); |
| 3648 | break; | 3642 | break; |
| 3649 | case MSR_IA32_TSC: | ||
| 3650 | kvm_write_tsc(vcpu, msr_info); | ||
| 3651 | break; | ||
| 3652 | case MSR_IA32_SPEC_CTRL: | 3643 | case MSR_IA32_SPEC_CTRL: |
| 3653 | if (!msr_info->host_initiated && | 3644 | if (!msr_info->host_initiated && |
| 3654 | !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && | 3645 | !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && |
| @@ -10608,6 +10599,16 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, | |||
| 10608 | return true; | 10599 | return true; |
| 10609 | } | 10600 | } |
| 10610 | 10601 | ||
| 10602 | static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu, | ||
| 10603 | struct vmcs12 *vmcs12) | ||
| 10604 | { | ||
| 10605 | if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) && | ||
| 10606 | !page_address_valid(vcpu, vmcs12->apic_access_addr)) | ||
| 10607 | return -EINVAL; | ||
| 10608 | else | ||
| 10609 | return 0; | ||
| 10610 | } | ||
| 10611 | |||
| 10611 | static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu, | 10612 | static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu, |
| 10612 | struct vmcs12 *vmcs12) | 10613 | struct vmcs12 *vmcs12) |
| 10613 | { | 10614 | { |
| @@ -11176,11 +11177,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
| 11176 | vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); | 11177 | vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); |
| 11177 | } | 11178 | } |
| 11178 | 11179 | ||
| 11179 | if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) | 11180 | vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); |
| 11180 | vmcs_write64(TSC_OFFSET, | 11181 | |
| 11181 | vcpu->arch.tsc_offset + vmcs12->tsc_offset); | ||
| 11182 | else | ||
| 11183 | vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); | ||
| 11184 | if (kvm_has_tsc_control) | 11182 | if (kvm_has_tsc_control) |
| 11185 | decache_tsc_multiplier(vmx); | 11183 | decache_tsc_multiplier(vmx); |
| 11186 | 11184 | ||
| @@ -11299,6 +11297,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
| 11299 | if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) | 11297 | if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) |
| 11300 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; | 11298 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; |
| 11301 | 11299 | ||
| 11300 | if (nested_vmx_check_apic_access_controls(vcpu, vmcs12)) | ||
| 11301 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; | ||
| 11302 | |||
| 11302 | if (nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12)) | 11303 | if (nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12)) |
| 11303 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; | 11304 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; |
| 11304 | 11305 | ||
| @@ -11420,6 +11421,7 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) | |||
| 11420 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 11421 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
| 11421 | u32 msr_entry_idx; | 11422 | u32 msr_entry_idx; |
| 11422 | u32 exit_qual; | 11423 | u32 exit_qual; |
| 11424 | int r; | ||
| 11423 | 11425 | ||
| 11424 | enter_guest_mode(vcpu); | 11426 | enter_guest_mode(vcpu); |
| 11425 | 11427 | ||
| @@ -11429,26 +11431,21 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) | |||
| 11429 | vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); | 11431 | vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); |
| 11430 | vmx_segment_cache_clear(vmx); | 11432 | vmx_segment_cache_clear(vmx); |
| 11431 | 11433 | ||
| 11432 | if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) { | 11434 | if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) |
| 11433 | leave_guest_mode(vcpu); | 11435 | vcpu->arch.tsc_offset += vmcs12->tsc_offset; |
| 11434 | vmx_switch_vmcs(vcpu, &vmx->vmcs01); | 11436 | |
| 11435 | nested_vmx_entry_failure(vcpu, vmcs12, | 11437 | r = EXIT_REASON_INVALID_STATE; |
| 11436 | EXIT_REASON_INVALID_STATE, exit_qual); | 11438 | if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) |
| 11437 | return 1; | 11439 | goto fail; |
| 11438 | } | ||
| 11439 | 11440 | ||
| 11440 | nested_get_vmcs12_pages(vcpu, vmcs12); | 11441 | nested_get_vmcs12_pages(vcpu, vmcs12); |
| 11441 | 11442 | ||
| 11443 | r = EXIT_REASON_MSR_LOAD_FAIL; | ||
| 11442 | msr_entry_idx = nested_vmx_load_msr(vcpu, | 11444 | msr_entry_idx = nested_vmx_load_msr(vcpu, |
| 11443 | vmcs12->vm_entry_msr_load_addr, | 11445 | vmcs12->vm_entry_msr_load_addr, |
| 11444 | vmcs12->vm_entry_msr_load_count); | 11446 | vmcs12->vm_entry_msr_load_count); |
| 11445 | if (msr_entry_idx) { | 11447 | if (msr_entry_idx) |
| 11446 | leave_guest_mode(vcpu); | 11448 | goto fail; |
| 11447 | vmx_switch_vmcs(vcpu, &vmx->vmcs01); | ||
| 11448 | nested_vmx_entry_failure(vcpu, vmcs12, | ||
| 11449 | EXIT_REASON_MSR_LOAD_FAIL, msr_entry_idx); | ||
| 11450 | return 1; | ||
| 11451 | } | ||
| 11452 | 11449 | ||
| 11453 | /* | 11450 | /* |
| 11454 | * Note no nested_vmx_succeed or nested_vmx_fail here. At this point | 11451 | * Note no nested_vmx_succeed or nested_vmx_fail here. At this point |
| @@ -11457,6 +11454,14 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) | |||
| 11457 | * the success flag) when L2 exits (see nested_vmx_vmexit()). | 11454 | * the success flag) when L2 exits (see nested_vmx_vmexit()). |
| 11458 | */ | 11455 | */ |
| 11459 | return 0; | 11456 | return 0; |
| 11457 | |||
| 11458 | fail: | ||
| 11459 | if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) | ||
| 11460 | vcpu->arch.tsc_offset -= vmcs12->tsc_offset; | ||
| 11461 | leave_guest_mode(vcpu); | ||
| 11462 | vmx_switch_vmcs(vcpu, &vmx->vmcs01); | ||
| 11463 | nested_vmx_entry_failure(vcpu, vmcs12, r, exit_qual); | ||
| 11464 | return 1; | ||
| 11460 | } | 11465 | } |
| 11461 | 11466 | ||
| 11462 | /* | 11467 | /* |
| @@ -12028,6 +12033,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | |||
| 12028 | 12033 | ||
| 12029 | leave_guest_mode(vcpu); | 12034 | leave_guest_mode(vcpu); |
| 12030 | 12035 | ||
| 12036 | if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) | ||
| 12037 | vcpu->arch.tsc_offset -= vmcs12->tsc_offset; | ||
| 12038 | |||
| 12031 | if (likely(!vmx->fail)) { | 12039 | if (likely(!vmx->fail)) { |
| 12032 | if (exit_reason == -1) | 12040 | if (exit_reason == -1) |
| 12033 | sync_vmcs12(vcpu, vmcs12); | 12041 | sync_vmcs12(vcpu, vmcs12); |
| @@ -12224,10 +12232,16 @@ static inline int u64_shl_div_u64(u64 a, unsigned int shift, | |||
| 12224 | 12232 | ||
| 12225 | static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc) | 12233 | static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc) |
| 12226 | { | 12234 | { |
| 12227 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 12235 | struct vcpu_vmx *vmx; |
| 12228 | u64 tscl = rdtsc(); | 12236 | u64 tscl, guest_tscl, delta_tsc; |
| 12229 | u64 guest_tscl = kvm_read_l1_tsc(vcpu, tscl); | 12237 | |
| 12230 | u64 delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl; | 12238 | if (kvm_mwait_in_guest(vcpu->kvm)) |
| 12239 | return -EOPNOTSUPP; | ||
| 12240 | |||
| 12241 | vmx = to_vmx(vcpu); | ||
| 12242 | tscl = rdtsc(); | ||
| 12243 | guest_tscl = kvm_read_l1_tsc(vcpu, tscl); | ||
| 12244 | delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl; | ||
| 12231 | 12245 | ||
| 12232 | /* Convert to host delta tsc if tsc scaling is enabled */ | 12246 | /* Convert to host delta tsc if tsc scaling is enabled */ |
| 12233 | if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio && | 12247 | if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio && |
| @@ -12533,7 +12547,7 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, | |||
| 12533 | vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); | 12547 | vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); |
| 12534 | vcpu_info.vector = irq.vector; | 12548 | vcpu_info.vector = irq.vector; |
| 12535 | 12549 | ||
| 12536 | trace_kvm_pi_irte_update(vcpu->vcpu_id, host_irq, e->gsi, | 12550 | trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi, |
| 12537 | vcpu_info.vector, vcpu_info.pi_desc_addr, set); | 12551 | vcpu_info.vector, vcpu_info.pi_desc_addr, set); |
| 12538 | 12552 | ||
| 12539 | if (set) | 12553 | if (set) |
| @@ -12712,6 +12726,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { | |||
| 12712 | 12726 | ||
| 12713 | .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, | 12727 | .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, |
| 12714 | 12728 | ||
| 12729 | .read_l1_tsc_offset = vmx_read_l1_tsc_offset, | ||
| 12715 | .write_tsc_offset = vmx_write_tsc_offset, | 12730 | .write_tsc_offset = vmx_write_tsc_offset, |
| 12716 | 12731 | ||
| 12717 | .set_tdp_cr3 = vmx_set_cr3, | 12732 | .set_tdp_cr3 = vmx_set_cr3, |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b2ff74b12ec4..51ecd381793b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
| @@ -1490,7 +1490,7 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu) | |||
| 1490 | 1490 | ||
| 1491 | static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset) | 1491 | static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset) |
| 1492 | { | 1492 | { |
| 1493 | u64 curr_offset = vcpu->arch.tsc_offset; | 1493 | u64 curr_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu); |
| 1494 | vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset; | 1494 | vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset; |
| 1495 | } | 1495 | } |
| 1496 | 1496 | ||
| @@ -1532,7 +1532,9 @@ static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) | |||
| 1532 | 1532 | ||
| 1533 | u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) | 1533 | u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) |
| 1534 | { | 1534 | { |
| 1535 | return vcpu->arch.tsc_offset + kvm_scale_tsc(vcpu, host_tsc); | 1535 | u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu); |
| 1536 | |||
| 1537 | return tsc_offset + kvm_scale_tsc(vcpu, host_tsc); | ||
| 1536 | } | 1538 | } |
| 1537 | EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); | 1539 | EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); |
| 1538 | 1540 | ||
| @@ -2362,6 +2364,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
| 2362 | return 1; | 2364 | return 1; |
| 2363 | vcpu->arch.smbase = data; | 2365 | vcpu->arch.smbase = data; |
| 2364 | break; | 2366 | break; |
| 2367 | case MSR_IA32_TSC: | ||
| 2368 | kvm_write_tsc(vcpu, msr_info); | ||
| 2369 | break; | ||
| 2365 | case MSR_SMI_COUNT: | 2370 | case MSR_SMI_COUNT: |
| 2366 | if (!msr_info->host_initiated) | 2371 | if (!msr_info->host_initiated) |
| 2367 | return 1; | 2372 | return 1; |
| @@ -2605,6 +2610,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
| 2605 | case MSR_IA32_UCODE_REV: | 2610 | case MSR_IA32_UCODE_REV: |
| 2606 | msr_info->data = vcpu->arch.microcode_version; | 2611 | msr_info->data = vcpu->arch.microcode_version; |
| 2607 | break; | 2612 | break; |
| 2613 | case MSR_IA32_TSC: | ||
| 2614 | msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset; | ||
| 2615 | break; | ||
| 2608 | case MSR_MTRRcap: | 2616 | case MSR_MTRRcap: |
| 2609 | case 0x200 ... 0x2ff: | 2617 | case 0x200 ... 0x2ff: |
| 2610 | return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data); | 2618 | return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data); |
| @@ -2819,7 +2827,8 @@ out: | |||
| 2819 | static inline bool kvm_can_mwait_in_guest(void) | 2827 | static inline bool kvm_can_mwait_in_guest(void) |
| 2820 | { | 2828 | { |
| 2821 | return boot_cpu_has(X86_FEATURE_MWAIT) && | 2829 | return boot_cpu_has(X86_FEATURE_MWAIT) && |
| 2822 | !boot_cpu_has_bug(X86_BUG_MONITOR); | 2830 | !boot_cpu_has_bug(X86_BUG_MONITOR) && |
| 2831 | boot_cpu_has(X86_FEATURE_ARAT); | ||
| 2823 | } | 2832 | } |
| 2824 | 2833 | ||
| 2825 | int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | 2834 | int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) |
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 07dc5419bd63..8e8b04cc569a 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c | |||
| @@ -732,6 +732,7 @@ static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts) | |||
| 732 | */ | 732 | */ |
| 733 | enum { | 733 | enum { |
| 734 | Opt_queue_depth, | 734 | Opt_queue_depth, |
| 735 | Opt_lock_timeout, | ||
| 735 | Opt_last_int, | 736 | Opt_last_int, |
| 736 | /* int args above */ | 737 | /* int args above */ |
| 737 | Opt_last_string, | 738 | Opt_last_string, |
| @@ -740,11 +741,13 @@ enum { | |||
| 740 | Opt_read_write, | 741 | Opt_read_write, |
| 741 | Opt_lock_on_read, | 742 | Opt_lock_on_read, |
| 742 | Opt_exclusive, | 743 | Opt_exclusive, |
| 744 | Opt_notrim, | ||
| 743 | Opt_err | 745 | Opt_err |
| 744 | }; | 746 | }; |
| 745 | 747 | ||
| 746 | static match_table_t rbd_opts_tokens = { | 748 | static match_table_t rbd_opts_tokens = { |
| 747 | {Opt_queue_depth, "queue_depth=%d"}, | 749 | {Opt_queue_depth, "queue_depth=%d"}, |
| 750 | {Opt_lock_timeout, "lock_timeout=%d"}, | ||
| 748 | /* int args above */ | 751 | /* int args above */ |
| 749 | /* string args above */ | 752 | /* string args above */ |
| 750 | {Opt_read_only, "read_only"}, | 753 | {Opt_read_only, "read_only"}, |
| @@ -753,20 +756,25 @@ static match_table_t rbd_opts_tokens = { | |||
| 753 | {Opt_read_write, "rw"}, /* Alternate spelling */ | 756 | {Opt_read_write, "rw"}, /* Alternate spelling */ |
| 754 | {Opt_lock_on_read, "lock_on_read"}, | 757 | {Opt_lock_on_read, "lock_on_read"}, |
| 755 | {Opt_exclusive, "exclusive"}, | 758 | {Opt_exclusive, "exclusive"}, |
| 759 | {Opt_notrim, "notrim"}, | ||
| 756 | {Opt_err, NULL} | 760 | {Opt_err, NULL} |
| 757 | }; | 761 | }; |
| 758 | 762 | ||
| 759 | struct rbd_options { | 763 | struct rbd_options { |
| 760 | int queue_depth; | 764 | int queue_depth; |
| 765 | unsigned long lock_timeout; | ||
| 761 | bool read_only; | 766 | bool read_only; |
| 762 | bool lock_on_read; | 767 | bool lock_on_read; |
| 763 | bool exclusive; | 768 | bool exclusive; |
| 769 | bool trim; | ||
| 764 | }; | 770 | }; |
| 765 | 771 | ||
| 766 | #define RBD_QUEUE_DEPTH_DEFAULT BLKDEV_MAX_RQ | 772 | #define RBD_QUEUE_DEPTH_DEFAULT BLKDEV_MAX_RQ |
| 773 | #define RBD_LOCK_TIMEOUT_DEFAULT 0 /* no timeout */ | ||
| 767 | #define RBD_READ_ONLY_DEFAULT false | 774 | #define RBD_READ_ONLY_DEFAULT false |
| 768 | #define RBD_LOCK_ON_READ_DEFAULT false | 775 | #define RBD_LOCK_ON_READ_DEFAULT false |
| 769 | #define RBD_EXCLUSIVE_DEFAULT false | 776 | #define RBD_EXCLUSIVE_DEFAULT false |
| 777 | #define RBD_TRIM_DEFAULT true | ||
| 770 | 778 | ||
| 771 | static int parse_rbd_opts_token(char *c, void *private) | 779 | static int parse_rbd_opts_token(char *c, void *private) |
| 772 | { | 780 | { |
| @@ -796,6 +804,14 @@ static int parse_rbd_opts_token(char *c, void *private) | |||
| 796 | } | 804 | } |
| 797 | rbd_opts->queue_depth = intval; | 805 | rbd_opts->queue_depth = intval; |
| 798 | break; | 806 | break; |
| 807 | case Opt_lock_timeout: | ||
| 808 | /* 0 is "wait forever" (i.e. infinite timeout) */ | ||
| 809 | if (intval < 0 || intval > INT_MAX / 1000) { | ||
| 810 | pr_err("lock_timeout out of range\n"); | ||
| 811 | return -EINVAL; | ||
| 812 | } | ||
| 813 | rbd_opts->lock_timeout = msecs_to_jiffies(intval * 1000); | ||
| 814 | break; | ||
| 799 | case Opt_read_only: | 815 | case Opt_read_only: |
| 800 | rbd_opts->read_only = true; | 816 | rbd_opts->read_only = true; |
| 801 | break; | 817 | break; |
| @@ -808,6 +824,9 @@ static int parse_rbd_opts_token(char *c, void *private) | |||
| 808 | case Opt_exclusive: | 824 | case Opt_exclusive: |
| 809 | rbd_opts->exclusive = true; | 825 | rbd_opts->exclusive = true; |
| 810 | break; | 826 | break; |
| 827 | case Opt_notrim: | ||
| 828 | rbd_opts->trim = false; | ||
| 829 | break; | ||
| 811 | default: | 830 | default: |
| 812 | /* libceph prints "bad option" msg */ | 831 | /* libceph prints "bad option" msg */ |
| 813 | return -EINVAL; | 832 | return -EINVAL; |
| @@ -1392,7 +1411,7 @@ static bool rbd_img_is_write(struct rbd_img_request *img_req) | |||
| 1392 | case OBJ_OP_DISCARD: | 1411 | case OBJ_OP_DISCARD: |
| 1393 | return true; | 1412 | return true; |
| 1394 | default: | 1413 | default: |
| 1395 | rbd_assert(0); | 1414 | BUG(); |
| 1396 | } | 1415 | } |
| 1397 | } | 1416 | } |
| 1398 | 1417 | ||
| @@ -2466,7 +2485,7 @@ again: | |||
| 2466 | } | 2485 | } |
| 2467 | return false; | 2486 | return false; |
| 2468 | default: | 2487 | default: |
| 2469 | rbd_assert(0); | 2488 | BUG(); |
| 2470 | } | 2489 | } |
| 2471 | } | 2490 | } |
| 2472 | 2491 | ||
| @@ -2494,7 +2513,7 @@ static bool __rbd_obj_handle_request(struct rbd_obj_request *obj_req) | |||
| 2494 | } | 2513 | } |
| 2495 | return false; | 2514 | return false; |
| 2496 | default: | 2515 | default: |
| 2497 | rbd_assert(0); | 2516 | BUG(); |
| 2498 | } | 2517 | } |
| 2499 | } | 2518 | } |
| 2500 | 2519 | ||
| @@ -3533,9 +3552,22 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev, | |||
| 3533 | /* | 3552 | /* |
| 3534 | * lock_rwsem must be held for read | 3553 | * lock_rwsem must be held for read |
| 3535 | */ | 3554 | */ |
| 3536 | static void rbd_wait_state_locked(struct rbd_device *rbd_dev) | 3555 | static int rbd_wait_state_locked(struct rbd_device *rbd_dev, bool may_acquire) |
| 3537 | { | 3556 | { |
| 3538 | DEFINE_WAIT(wait); | 3557 | DEFINE_WAIT(wait); |
| 3558 | unsigned long timeout; | ||
| 3559 | int ret = 0; | ||
| 3560 | |||
| 3561 | if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) | ||
| 3562 | return -EBLACKLISTED; | ||
| 3563 | |||
| 3564 | if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) | ||
| 3565 | return 0; | ||
| 3566 | |||
| 3567 | if (!may_acquire) { | ||
| 3568 | rbd_warn(rbd_dev, "exclusive lock required"); | ||
| 3569 | return -EROFS; | ||
| 3570 | } | ||
| 3539 | 3571 | ||
| 3540 | do { | 3572 | do { |
| 3541 | /* | 3573 | /* |
| @@ -3547,12 +3579,22 @@ static void rbd_wait_state_locked(struct rbd_device *rbd_dev) | |||
| 3547 | prepare_to_wait_exclusive(&rbd_dev->lock_waitq, &wait, | 3579 | prepare_to_wait_exclusive(&rbd_dev->lock_waitq, &wait, |
| 3548 | TASK_UNINTERRUPTIBLE); | 3580 | TASK_UNINTERRUPTIBLE); |
| 3549 | up_read(&rbd_dev->lock_rwsem); | 3581 | up_read(&rbd_dev->lock_rwsem); |
| 3550 | schedule(); | 3582 | timeout = schedule_timeout(ceph_timeout_jiffies( |
| 3583 | rbd_dev->opts->lock_timeout)); | ||
| 3551 | down_read(&rbd_dev->lock_rwsem); | 3584 | down_read(&rbd_dev->lock_rwsem); |
| 3552 | } while (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED && | 3585 | if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { |
| 3553 | !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)); | 3586 | ret = -EBLACKLISTED; |
| 3587 | break; | ||
| 3588 | } | ||
| 3589 | if (!timeout) { | ||
| 3590 | rbd_warn(rbd_dev, "timed out waiting for lock"); | ||
| 3591 | ret = -ETIMEDOUT; | ||
| 3592 | break; | ||
| 3593 | } | ||
| 3594 | } while (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED); | ||
| 3554 | 3595 | ||
| 3555 | finish_wait(&rbd_dev->lock_waitq, &wait); | 3596 | finish_wait(&rbd_dev->lock_waitq, &wait); |
| 3597 | return ret; | ||
| 3556 | } | 3598 | } |
| 3557 | 3599 | ||
| 3558 | static void rbd_queue_workfn(struct work_struct *work) | 3600 | static void rbd_queue_workfn(struct work_struct *work) |
| @@ -3638,19 +3680,10 @@ static void rbd_queue_workfn(struct work_struct *work) | |||
| 3638 | (op_type != OBJ_OP_READ || rbd_dev->opts->lock_on_read); | 3680 | (op_type != OBJ_OP_READ || rbd_dev->opts->lock_on_read); |
| 3639 | if (must_be_locked) { | 3681 | if (must_be_locked) { |
| 3640 | down_read(&rbd_dev->lock_rwsem); | 3682 | down_read(&rbd_dev->lock_rwsem); |
| 3641 | if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED && | 3683 | result = rbd_wait_state_locked(rbd_dev, |
| 3642 | !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { | 3684 | !rbd_dev->opts->exclusive); |
| 3643 | if (rbd_dev->opts->exclusive) { | 3685 | if (result) |
| 3644 | rbd_warn(rbd_dev, "exclusive lock required"); | ||
| 3645 | result = -EROFS; | ||
| 3646 | goto err_unlock; | ||
| 3647 | } | ||
| 3648 | rbd_wait_state_locked(rbd_dev); | ||
| 3649 | } | ||
| 3650 | if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { | ||
| 3651 | result = -EBLACKLISTED; | ||
| 3652 | goto err_unlock; | 3686 | goto err_unlock; |
| 3653 | } | ||
| 3654 | } | 3687 | } |
| 3655 | 3688 | ||
| 3656 | img_request = rbd_img_request_create(rbd_dev, op_type, snapc); | 3689 | img_request = rbd_img_request_create(rbd_dev, op_type, snapc); |
| @@ -3902,7 +3935,8 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) | |||
| 3902 | { | 3935 | { |
| 3903 | struct gendisk *disk; | 3936 | struct gendisk *disk; |
| 3904 | struct request_queue *q; | 3937 | struct request_queue *q; |
| 3905 | u64 segment_size; | 3938 | unsigned int objset_bytes = |
| 3939 | rbd_dev->layout.object_size * rbd_dev->layout.stripe_count; | ||
| 3906 | int err; | 3940 | int err; |
| 3907 | 3941 | ||
| 3908 | /* create gendisk info */ | 3942 | /* create gendisk info */ |
| @@ -3942,20 +3976,19 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) | |||
| 3942 | blk_queue_flag_set(QUEUE_FLAG_NONROT, q); | 3976 | blk_queue_flag_set(QUEUE_FLAG_NONROT, q); |
| 3943 | /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */ | 3977 | /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */ |
| 3944 | 3978 | ||
| 3945 | /* set io sizes to object size */ | 3979 | blk_queue_max_hw_sectors(q, objset_bytes >> SECTOR_SHIFT); |
| 3946 | segment_size = rbd_obj_bytes(&rbd_dev->header); | ||
| 3947 | blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); | ||
| 3948 | q->limits.max_sectors = queue_max_hw_sectors(q); | 3980 | q->limits.max_sectors = queue_max_hw_sectors(q); |
| 3949 | blk_queue_max_segments(q, USHRT_MAX); | 3981 | blk_queue_max_segments(q, USHRT_MAX); |
| 3950 | blk_queue_max_segment_size(q, UINT_MAX); | 3982 | blk_queue_max_segment_size(q, UINT_MAX); |
| 3951 | blk_queue_io_min(q, segment_size); | 3983 | blk_queue_io_min(q, objset_bytes); |
| 3952 | blk_queue_io_opt(q, segment_size); | 3984 | blk_queue_io_opt(q, objset_bytes); |
| 3953 | 3985 | ||
| 3954 | /* enable the discard support */ | 3986 | if (rbd_dev->opts->trim) { |
| 3955 | blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); | 3987 | blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); |
| 3956 | q->limits.discard_granularity = segment_size; | 3988 | q->limits.discard_granularity = objset_bytes; |
| 3957 | blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE); | 3989 | blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT); |
| 3958 | blk_queue_max_write_zeroes_sectors(q, segment_size / SECTOR_SIZE); | 3990 | blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT); |
| 3991 | } | ||
| 3959 | 3992 | ||
| 3960 | if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) | 3993 | if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) |
| 3961 | q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; | 3994 | q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; |
| @@ -5179,8 +5212,10 @@ static int rbd_add_parse_args(const char *buf, | |||
| 5179 | 5212 | ||
| 5180 | rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; | 5213 | rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; |
| 5181 | rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT; | 5214 | rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT; |
| 5215 | rbd_opts->lock_timeout = RBD_LOCK_TIMEOUT_DEFAULT; | ||
| 5182 | rbd_opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT; | 5216 | rbd_opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT; |
| 5183 | rbd_opts->exclusive = RBD_EXCLUSIVE_DEFAULT; | 5217 | rbd_opts->exclusive = RBD_EXCLUSIVE_DEFAULT; |
| 5218 | rbd_opts->trim = RBD_TRIM_DEFAULT; | ||
| 5184 | 5219 | ||
| 5185 | copts = ceph_parse_options(options, mon_addrs, | 5220 | copts = ceph_parse_options(options, mon_addrs, |
| 5186 | mon_addrs + mon_addrs_size - 1, | 5221 | mon_addrs + mon_addrs_size - 1, |
| @@ -5216,6 +5251,8 @@ static void rbd_dev_image_unlock(struct rbd_device *rbd_dev) | |||
| 5216 | 5251 | ||
| 5217 | static int rbd_add_acquire_lock(struct rbd_device *rbd_dev) | 5252 | static int rbd_add_acquire_lock(struct rbd_device *rbd_dev) |
| 5218 | { | 5253 | { |
| 5254 | int ret; | ||
| 5255 | |||
| 5219 | if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) { | 5256 | if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) { |
| 5220 | rbd_warn(rbd_dev, "exclusive-lock feature is not enabled"); | 5257 | rbd_warn(rbd_dev, "exclusive-lock feature is not enabled"); |
| 5221 | return -EINVAL; | 5258 | return -EINVAL; |
| @@ -5223,9 +5260,9 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev) | |||
| 5223 | 5260 | ||
| 5224 | /* FIXME: "rbd map --exclusive" should be in interruptible */ | 5261 | /* FIXME: "rbd map --exclusive" should be in interruptible */ |
| 5225 | down_read(&rbd_dev->lock_rwsem); | 5262 | down_read(&rbd_dev->lock_rwsem); |
| 5226 | rbd_wait_state_locked(rbd_dev); | 5263 | ret = rbd_wait_state_locked(rbd_dev, true); |
| 5227 | up_read(&rbd_dev->lock_rwsem); | 5264 | up_read(&rbd_dev->lock_rwsem); |
| 5228 | if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { | 5265 | if (ret) { |
| 5229 | rbd_warn(rbd_dev, "failed to acquire exclusive lock"); | 5266 | rbd_warn(rbd_dev, "failed to acquire exclusive lock"); |
| 5230 | return -EROFS; | 5267 | return -EROFS; |
| 5231 | } | 5268 | } |
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 9ee2888275c1..8e8a09755d10 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig | |||
| @@ -133,6 +133,14 @@ config VT8500_TIMER | |||
| 133 | help | 133 | help |
| 134 | Enables support for the VT8500 driver. | 134 | Enables support for the VT8500 driver. |
| 135 | 135 | ||
| 136 | config NPCM7XX_TIMER | ||
| 137 | bool "NPCM7xx timer driver" if COMPILE_TEST | ||
| 138 | depends on HAS_IOMEM | ||
| 139 | select CLKSRC_MMIO | ||
| 140 | help | ||
| 141 | Enable 24-bit TIMER0 and TIMER1 counters in the NPCM7xx architecture, | ||
| 142 | While TIMER0 serves as clockevent and TIMER1 serves as clocksource. | ||
| 143 | |||
| 136 | config CADENCE_TTC_TIMER | 144 | config CADENCE_TTC_TIMER |
| 137 | bool "Cadence TTC timer driver" if COMPILE_TEST | 145 | bool "Cadence TTC timer driver" if COMPILE_TEST |
| 138 | depends on COMMON_CLK | 146 | depends on COMMON_CLK |
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index e8e76dfef00b..00caf37e52f9 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile | |||
| @@ -56,6 +56,7 @@ obj-$(CONFIG_CLKSRC_NPS) += timer-nps.o | |||
| 56 | obj-$(CONFIG_OXNAS_RPS_TIMER) += timer-oxnas-rps.o | 56 | obj-$(CONFIG_OXNAS_RPS_TIMER) += timer-oxnas-rps.o |
| 57 | obj-$(CONFIG_OWL_TIMER) += owl-timer.o | 57 | obj-$(CONFIG_OWL_TIMER) += owl-timer.o |
| 58 | obj-$(CONFIG_SPRD_TIMER) += timer-sprd.o | 58 | obj-$(CONFIG_SPRD_TIMER) += timer-sprd.o |
| 59 | obj-$(CONFIG_NPCM7XX_TIMER) += timer-npcm7xx.o | ||
| 59 | 60 | ||
| 60 | obj-$(CONFIG_ARC_TIMERS) += arc_timer.o | 61 | obj-$(CONFIG_ARC_TIMERS) += arc_timer.o |
| 61 | obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer.o | 62 | obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer.o |
diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c index 21bffdcb2f20..05d97a6871d8 100644 --- a/drivers/clocksource/timer-imx-tpm.c +++ b/drivers/clocksource/timer-imx-tpm.c | |||
| @@ -17,9 +17,14 @@ | |||
| 17 | #include <linux/of_irq.h> | 17 | #include <linux/of_irq.h> |
| 18 | #include <linux/sched_clock.h> | 18 | #include <linux/sched_clock.h> |
| 19 | 19 | ||
| 20 | #define TPM_PARAM 0x4 | ||
| 21 | #define TPM_PARAM_WIDTH_SHIFT 16 | ||
| 22 | #define TPM_PARAM_WIDTH_MASK (0xff << 16) | ||
| 20 | #define TPM_SC 0x10 | 23 | #define TPM_SC 0x10 |
| 21 | #define TPM_SC_CMOD_INC_PER_CNT (0x1 << 3) | 24 | #define TPM_SC_CMOD_INC_PER_CNT (0x1 << 3) |
| 22 | #define TPM_SC_CMOD_DIV_DEFAULT 0x3 | 25 | #define TPM_SC_CMOD_DIV_DEFAULT 0x3 |
| 26 | #define TPM_SC_CMOD_DIV_MAX 0x7 | ||
| 27 | #define TPM_SC_TOF_MASK (0x1 << 7) | ||
| 23 | #define TPM_CNT 0x14 | 28 | #define TPM_CNT 0x14 |
| 24 | #define TPM_MOD 0x18 | 29 | #define TPM_MOD 0x18 |
| 25 | #define TPM_STATUS 0x1c | 30 | #define TPM_STATUS 0x1c |
| @@ -29,8 +34,11 @@ | |||
| 29 | #define TPM_C0SC_MODE_SHIFT 2 | 34 | #define TPM_C0SC_MODE_SHIFT 2 |
| 30 | #define TPM_C0SC_MODE_MASK 0x3c | 35 | #define TPM_C0SC_MODE_MASK 0x3c |
| 31 | #define TPM_C0SC_MODE_SW_COMPARE 0x4 | 36 | #define TPM_C0SC_MODE_SW_COMPARE 0x4 |
| 37 | #define TPM_C0SC_CHF_MASK (0x1 << 7) | ||
| 32 | #define TPM_C0V 0x24 | 38 | #define TPM_C0V 0x24 |
| 33 | 39 | ||
| 40 | static int counter_width; | ||
| 41 | static int rating; | ||
| 34 | static void __iomem *timer_base; | 42 | static void __iomem *timer_base; |
| 35 | static struct clock_event_device clockevent_tpm; | 43 | static struct clock_event_device clockevent_tpm; |
| 36 | 44 | ||
| @@ -83,10 +91,11 @@ static int __init tpm_clocksource_init(unsigned long rate) | |||
| 83 | tpm_delay_timer.freq = rate; | 91 | tpm_delay_timer.freq = rate; |
| 84 | register_current_timer_delay(&tpm_delay_timer); | 92 | register_current_timer_delay(&tpm_delay_timer); |
| 85 | 93 | ||
| 86 | sched_clock_register(tpm_read_sched_clock, 32, rate); | 94 | sched_clock_register(tpm_read_sched_clock, counter_width, rate); |
| 87 | 95 | ||
| 88 | return clocksource_mmio_init(timer_base + TPM_CNT, "imx-tpm", | 96 | return clocksource_mmio_init(timer_base + TPM_CNT, "imx-tpm", |
| 89 | rate, 200, 32, clocksource_mmio_readl_up); | 97 | rate, rating, counter_width, |
| 98 | clocksource_mmio_readl_up); | ||
| 90 | } | 99 | } |
| 91 | 100 | ||
| 92 | static int tpm_set_next_event(unsigned long delta, | 101 | static int tpm_set_next_event(unsigned long delta, |
| @@ -139,7 +148,6 @@ static struct clock_event_device clockevent_tpm = { | |||
| 139 | .set_state_oneshot = tpm_set_state_oneshot, | 148 | .set_state_oneshot = tpm_set_state_oneshot, |
| 140 | .set_next_event = tpm_set_next_event, | 149 | .set_next_event = tpm_set_next_event, |
| 141 | .set_state_shutdown = tpm_set_state_shutdown, | 150 | .set_state_shutdown = tpm_set_state_shutdown, |
| 142 | .rating = 200, | ||
| 143 | }; | 151 | }; |
| 144 | 152 | ||
| 145 | static int __init tpm_clockevent_init(unsigned long rate, int irq) | 153 | static int __init tpm_clockevent_init(unsigned long rate, int irq) |
| @@ -149,10 +157,11 @@ static int __init tpm_clockevent_init(unsigned long rate, int irq) | |||
| 149 | ret = request_irq(irq, tpm_timer_interrupt, IRQF_TIMER | IRQF_IRQPOLL, | 157 | ret = request_irq(irq, tpm_timer_interrupt, IRQF_TIMER | IRQF_IRQPOLL, |
| 150 | "i.MX7ULP TPM Timer", &clockevent_tpm); | 158 | "i.MX7ULP TPM Timer", &clockevent_tpm); |
| 151 | 159 | ||
| 160 | clockevent_tpm.rating = rating; | ||
| 152 | clockevent_tpm.cpumask = cpumask_of(0); | 161 | clockevent_tpm.cpumask = cpumask_of(0); |
| 153 | clockevent_tpm.irq = irq; | 162 | clockevent_tpm.irq = irq; |
| 154 | clockevents_config_and_register(&clockevent_tpm, | 163 | clockevents_config_and_register(&clockevent_tpm, rate, 300, |
| 155 | rate, 300, 0xfffffffe); | 164 | GENMASK(counter_width - 1, 1)); |
| 156 | 165 | ||
| 157 | return ret; | 166 | return ret; |
| 158 | } | 167 | } |
| @@ -179,7 +188,7 @@ static int __init tpm_timer_init(struct device_node *np) | |||
| 179 | ipg = of_clk_get_by_name(np, "ipg"); | 188 | ipg = of_clk_get_by_name(np, "ipg"); |
| 180 | per = of_clk_get_by_name(np, "per"); | 189 | per = of_clk_get_by_name(np, "per"); |
| 181 | if (IS_ERR(ipg) || IS_ERR(per)) { | 190 | if (IS_ERR(ipg) || IS_ERR(per)) { |
| 182 | pr_err("tpm: failed to get igp or per clk\n"); | 191 | pr_err("tpm: failed to get ipg or per clk\n"); |
| 183 | ret = -ENODEV; | 192 | ret = -ENODEV; |
| 184 | goto err_clk_get; | 193 | goto err_clk_get; |
| 185 | } | 194 | } |
| @@ -197,6 +206,11 @@ static int __init tpm_timer_init(struct device_node *np) | |||
| 197 | goto err_per_clk_enable; | 206 | goto err_per_clk_enable; |
| 198 | } | 207 | } |
| 199 | 208 | ||
| 209 | counter_width = (readl(timer_base + TPM_PARAM) & TPM_PARAM_WIDTH_MASK) | ||
| 210 | >> TPM_PARAM_WIDTH_SHIFT; | ||
| 211 | /* use rating 200 for 32-bit counter and 150 for 16-bit counter */ | ||
| 212 | rating = counter_width == 0x20 ? 200 : 150; | ||
| 213 | |||
| 200 | /* | 214 | /* |
| 201 | * Initialize tpm module to a known state | 215 | * Initialize tpm module to a known state |
| 202 | * 1) Counter disabled | 216 | * 1) Counter disabled |
| @@ -205,16 +219,25 @@ static int __init tpm_timer_init(struct device_node *np) | |||
| 205 | * 4) Channel0 disabled | 219 | * 4) Channel0 disabled |
| 206 | * 5) DMA transfers disabled | 220 | * 5) DMA transfers disabled |
| 207 | */ | 221 | */ |
| 222 | /* make sure counter is disabled */ | ||
| 208 | writel(0, timer_base + TPM_SC); | 223 | writel(0, timer_base + TPM_SC); |
| 224 | /* TOF is W1C */ | ||
| 225 | writel(TPM_SC_TOF_MASK, timer_base + TPM_SC); | ||
| 209 | writel(0, timer_base + TPM_CNT); | 226 | writel(0, timer_base + TPM_CNT); |
| 210 | writel(0, timer_base + TPM_C0SC); | 227 | /* CHF is W1C */ |
| 228 | writel(TPM_C0SC_CHF_MASK, timer_base + TPM_C0SC); | ||
| 211 | 229 | ||
| 212 | /* increase per cnt, div 8 by default */ | 230 | /* |
| 213 | writel(TPM_SC_CMOD_INC_PER_CNT | TPM_SC_CMOD_DIV_DEFAULT, | 231 | * increase per cnt, |
| 232 | * div 8 for 32-bit counter and div 128 for 16-bit counter | ||
| 233 | */ | ||
| 234 | writel(TPM_SC_CMOD_INC_PER_CNT | | ||
| 235 | (counter_width == 0x20 ? | ||
| 236 | TPM_SC_CMOD_DIV_DEFAULT : TPM_SC_CMOD_DIV_MAX), | ||
| 214 | timer_base + TPM_SC); | 237 | timer_base + TPM_SC); |
| 215 | 238 | ||
| 216 | /* set MOD register to maximum for free running mode */ | 239 | /* set MOD register to maximum for free running mode */ |
| 217 | writel(0xffffffff, timer_base + TPM_MOD); | 240 | writel(GENMASK(counter_width - 1, 0), timer_base + TPM_MOD); |
| 218 | 241 | ||
| 219 | rate = clk_get_rate(per) >> 3; | 242 | rate = clk_get_rate(per) >> 3; |
| 220 | ret = tpm_clocksource_init(rate); | 243 | ret = tpm_clocksource_init(rate); |
diff --git a/drivers/clocksource/timer-npcm7xx.c b/drivers/clocksource/timer-npcm7xx.c new file mode 100644 index 000000000000..7a9bb5532d99 --- /dev/null +++ b/drivers/clocksource/timer-npcm7xx.c | |||
| @@ -0,0 +1,215 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | ||
| 2 | /* | ||
| 3 | * Copyright (C) 2014-2018 Nuvoton Technologies tomer.maimon@nuvoton.com | ||
| 4 | * All rights reserved. | ||
| 5 | * | ||
| 6 | * Copyright 2017 Google, Inc. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include <linux/kernel.h> | ||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/init.h> | ||
| 12 | #include <linux/interrupt.h> | ||
| 13 | #include <linux/err.h> | ||
| 14 | #include <linux/clk.h> | ||
| 15 | #include <linux/io.h> | ||
| 16 | #include <linux/clockchips.h> | ||
| 17 | #include <linux/of_irq.h> | ||
| 18 | #include <linux/of_address.h> | ||
| 19 | #include "timer-of.h" | ||
| 20 | |||
| 21 | /* Timers registers */ | ||
| 22 | #define NPCM7XX_REG_TCSR0 0x0 /* Timer 0 Control and Status Register */ | ||
| 23 | #define NPCM7XX_REG_TICR0 0x8 /* Timer 0 Initial Count Register */ | ||
| 24 | #define NPCM7XX_REG_TCSR1 0x4 /* Timer 1 Control and Status Register */ | ||
| 25 | #define NPCM7XX_REG_TICR1 0xc /* Timer 1 Initial Count Register */ | ||
| 26 | #define NPCM7XX_REG_TDR1 0x14 /* Timer 1 Data Register */ | ||
| 27 | #define NPCM7XX_REG_TISR 0x18 /* Timer Interrupt Status Register */ | ||
| 28 | |||
| 29 | /* Timers control */ | ||
| 30 | #define NPCM7XX_Tx_RESETINT 0x1f | ||
| 31 | #define NPCM7XX_Tx_PERIOD BIT(27) | ||
| 32 | #define NPCM7XX_Tx_INTEN BIT(29) | ||
| 33 | #define NPCM7XX_Tx_COUNTEN BIT(30) | ||
| 34 | #define NPCM7XX_Tx_ONESHOT 0x0 | ||
| 35 | #define NPCM7XX_Tx_OPER GENMASK(3, 27) | ||
| 36 | #define NPCM7XX_Tx_MIN_PRESCALE 0x1 | ||
| 37 | #define NPCM7XX_Tx_TDR_MASK_BITS 24 | ||
| 38 | #define NPCM7XX_Tx_MAX_CNT 0xFFFFFF | ||
| 39 | #define NPCM7XX_T0_CLR_INT 0x1 | ||
| 40 | #define NPCM7XX_Tx_CLR_CSR 0x0 | ||
| 41 | |||
| 42 | /* Timers operating mode */ | ||
| 43 | #define NPCM7XX_START_PERIODIC_Tx (NPCM7XX_Tx_PERIOD | NPCM7XX_Tx_COUNTEN | \ | ||
| 44 | NPCM7XX_Tx_INTEN | \ | ||
| 45 | NPCM7XX_Tx_MIN_PRESCALE) | ||
| 46 | |||
| 47 | #define NPCM7XX_START_ONESHOT_Tx (NPCM7XX_Tx_ONESHOT | NPCM7XX_Tx_COUNTEN | \ | ||
| 48 | NPCM7XX_Tx_INTEN | \ | ||
| 49 | NPCM7XX_Tx_MIN_PRESCALE) | ||
| 50 | |||
| 51 | #define NPCM7XX_START_Tx (NPCM7XX_Tx_COUNTEN | NPCM7XX_Tx_PERIOD | \ | ||
| 52 | NPCM7XX_Tx_MIN_PRESCALE) | ||
| 53 | |||
| 54 | #define NPCM7XX_DEFAULT_CSR (NPCM7XX_Tx_CLR_CSR | NPCM7XX_Tx_MIN_PRESCALE) | ||
| 55 | |||
| 56 | static int npcm7xx_timer_resume(struct clock_event_device *evt) | ||
| 57 | { | ||
| 58 | struct timer_of *to = to_timer_of(evt); | ||
| 59 | u32 val; | ||
| 60 | |||
| 61 | val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0); | ||
| 62 | val |= NPCM7XX_Tx_COUNTEN; | ||
| 63 | writel(val, timer_of_base(to) + NPCM7XX_REG_TCSR0); | ||
| 64 | |||
| 65 | return 0; | ||
| 66 | } | ||
| 67 | |||
| 68 | static int npcm7xx_timer_shutdown(struct clock_event_device *evt) | ||
| 69 | { | ||
| 70 | struct timer_of *to = to_timer_of(evt); | ||
| 71 | u32 val; | ||
| 72 | |||
| 73 | val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0); | ||
| 74 | val &= ~NPCM7XX_Tx_COUNTEN; | ||
| 75 | writel(val, timer_of_base(to) + NPCM7XX_REG_TCSR0); | ||
| 76 | |||
| 77 | return 0; | ||
| 78 | } | ||
| 79 | |||
| 80 | static int npcm7xx_timer_oneshot(struct clock_event_device *evt) | ||
| 81 | { | ||
| 82 | struct timer_of *to = to_timer_of(evt); | ||
| 83 | u32 val; | ||
| 84 | |||
| 85 | val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0); | ||
| 86 | val &= ~NPCM7XX_Tx_OPER; | ||
| 87 | |||
| 88 | val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0); | ||
| 89 | val |= NPCM7XX_START_ONESHOT_Tx; | ||
| 90 | writel(val, timer_of_base(to) + NPCM7XX_REG_TCSR0); | ||
| 91 | |||
| 92 | return 0; | ||
| 93 | } | ||
| 94 | |||
| 95 | static int npcm7xx_timer_periodic(struct clock_event_device *evt) | ||
| 96 | { | ||
| 97 | struct timer_of *to = to_timer_of(evt); | ||
| 98 | u32 val; | ||
| 99 | |||
| 100 | val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0); | ||
| 101 | val &= ~NPCM7XX_Tx_OPER; | ||
| 102 | |||
| 103 | writel(timer_of_period(to), timer_of_base(to) + NPCM7XX_REG_TICR0); | ||
| 104 | val |= NPCM7XX_START_PERIODIC_Tx; | ||
| 105 | |||
| 106 | writel(val, timer_of_base(to) + NPCM7XX_REG_TCSR0); | ||
| 107 | |||
| 108 | return 0; | ||
| 109 | } | ||
| 110 | |||
| 111 | static int npcm7xx_clockevent_set_next_event(unsigned long evt, | ||
| 112 | struct clock_event_device *clk) | ||
| 113 | { | ||
| 114 | struct timer_of *to = to_timer_of(clk); | ||
| 115 | u32 val; | ||
| 116 | |||
| 117 | writel(evt, timer_of_base(to) + NPCM7XX_REG_TICR0); | ||
| 118 | val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0); | ||
| 119 | val |= NPCM7XX_START_Tx; | ||
| 120 | writel(val, timer_of_base(to) + NPCM7XX_REG_TCSR0); | ||
| 121 | |||
| 122 | return 0; | ||
| 123 | } | ||
| 124 | |||
| 125 | static irqreturn_t npcm7xx_timer0_interrupt(int irq, void *dev_id) | ||
| 126 | { | ||
| 127 | struct clock_event_device *evt = (struct clock_event_device *)dev_id; | ||
| 128 | struct timer_of *to = to_timer_of(evt); | ||
| 129 | |||
| 130 | writel(NPCM7XX_T0_CLR_INT, timer_of_base(to) + NPCM7XX_REG_TISR); | ||
| 131 | |||
| 132 | evt->event_handler(evt); | ||
| 133 | |||
| 134 | return IRQ_HANDLED; | ||
| 135 | } | ||
| 136 | |||
| 137 | static struct timer_of npcm7xx_to = { | ||
| 138 | .flags = TIMER_OF_IRQ | TIMER_OF_BASE | TIMER_OF_CLOCK, | ||
| 139 | |||
| 140 | .clkevt = { | ||
| 141 | .name = "npcm7xx-timer0", | ||
| 142 | .features = CLOCK_EVT_FEAT_PERIODIC | | ||
| 143 | CLOCK_EVT_FEAT_ONESHOT, | ||
| 144 | .set_next_event = npcm7xx_clockevent_set_next_event, | ||
| 145 | .set_state_shutdown = npcm7xx_timer_shutdown, | ||
| 146 | .set_state_periodic = npcm7xx_timer_periodic, | ||
| 147 | .set_state_oneshot = npcm7xx_timer_oneshot, | ||
| 148 | .tick_resume = npcm7xx_timer_resume, | ||
| 149 | .rating = 300, | ||
| 150 | }, | ||
| 151 | |||
| 152 | .of_irq = { | ||
| 153 | .handler = npcm7xx_timer0_interrupt, | ||
| 154 | .flags = IRQF_TIMER | IRQF_IRQPOLL, | ||
| 155 | }, | ||
| 156 | }; | ||
| 157 | |||
| 158 | static void __init npcm7xx_clockevents_init(void) | ||
| 159 | { | ||
| 160 | writel(NPCM7XX_DEFAULT_CSR, | ||
| 161 | timer_of_base(&npcm7xx_to) + NPCM7XX_REG_TCSR0); | ||
| 162 | |||
| 163 | writel(NPCM7XX_Tx_RESETINT, | ||
| 164 | timer_of_base(&npcm7xx_to) + NPCM7XX_REG_TISR); | ||
| 165 | |||
| 166 | npcm7xx_to.clkevt.cpumask = cpumask_of(0); | ||
| 167 | clockevents_config_and_register(&npcm7xx_to.clkevt, | ||
| 168 | timer_of_rate(&npcm7xx_to), | ||
| 169 | 0x1, NPCM7XX_Tx_MAX_CNT); | ||
| 170 | } | ||
| 171 | |||
| 172 | static void __init npcm7xx_clocksource_init(void) | ||
| 173 | { | ||
| 174 | u32 val; | ||
| 175 | |||
| 176 | writel(NPCM7XX_DEFAULT_CSR, | ||
| 177 | timer_of_base(&npcm7xx_to) + NPCM7XX_REG_TCSR1); | ||
| 178 | writel(NPCM7XX_Tx_MAX_CNT, | ||
| 179 | timer_of_base(&npcm7xx_to) + NPCM7XX_REG_TICR1); | ||
| 180 | |||
| 181 | val = readl(timer_of_base(&npcm7xx_to) + NPCM7XX_REG_TCSR1); | ||
| 182 | val |= NPCM7XX_START_Tx; | ||
| 183 | writel(val, timer_of_base(&npcm7xx_to) + NPCM7XX_REG_TCSR1); | ||
| 184 | |||
| 185 | clocksource_mmio_init(timer_of_base(&npcm7xx_to) + | ||
| 186 | NPCM7XX_REG_TDR1, | ||
| 187 | "npcm7xx-timer1", timer_of_rate(&npcm7xx_to), | ||
| 188 | 200, (unsigned int)NPCM7XX_Tx_TDR_MASK_BITS, | ||
| 189 | clocksource_mmio_readl_down); | ||
| 190 | } | ||
| 191 | |||
| 192 | static int __init npcm7xx_timer_init(struct device_node *np) | ||
| 193 | { | ||
| 194 | int ret; | ||
| 195 | |||
| 196 | ret = timer_of_init(np, &npcm7xx_to); | ||
| 197 | if (ret) | ||
| 198 | return ret; | ||
| 199 | |||
| 200 | /* Clock input is divided by PRESCALE + 1 before it is fed */ | ||
| 201 | /* to the counter */ | ||
| 202 | npcm7xx_to.of_clk.rate = npcm7xx_to.of_clk.rate / | ||
| 203 | (NPCM7XX_Tx_MIN_PRESCALE + 1); | ||
| 204 | |||
| 205 | npcm7xx_clocksource_init(); | ||
| 206 | npcm7xx_clockevents_init(); | ||
| 207 | |||
| 208 | pr_info("Enabling NPCM7xx clocksource timer base: %px, IRQ: %d ", | ||
| 209 | timer_of_base(&npcm7xx_to), timer_of_irq(&npcm7xx_to)); | ||
| 210 | |||
| 211 | return 0; | ||
| 212 | } | ||
| 213 | |||
| 214 | TIMER_OF_DECLARE(npcm7xx, "nuvoton,npcm750-timer", npcm7xx_timer_init); | ||
| 215 | |||
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 8bf60250309e..ae056927080d 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
| @@ -669,13 +669,15 @@ void ceph_fill_file_time(struct inode *inode, int issued, | |||
| 669 | CEPH_CAP_FILE_BUFFER| | 669 | CEPH_CAP_FILE_BUFFER| |
| 670 | CEPH_CAP_AUTH_EXCL| | 670 | CEPH_CAP_AUTH_EXCL| |
| 671 | CEPH_CAP_XATTR_EXCL)) { | 671 | CEPH_CAP_XATTR_EXCL)) { |
| 672 | if (timespec_compare(ctime, &inode->i_ctime) > 0) { | 672 | if (ci->i_version == 0 || |
| 673 | timespec_compare(ctime, &inode->i_ctime) > 0) { | ||
| 673 | dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", | 674 | dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", |
| 674 | inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, | 675 | inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, |
| 675 | ctime->tv_sec, ctime->tv_nsec); | 676 | ctime->tv_sec, ctime->tv_nsec); |
| 676 | inode->i_ctime = *ctime; | 677 | inode->i_ctime = *ctime; |
| 677 | } | 678 | } |
| 678 | if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) { | 679 | if (ci->i_version == 0 || |
| 680 | ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) { | ||
| 679 | /* the MDS did a utimes() */ | 681 | /* the MDS did a utimes() */ |
| 680 | dout("mtime %ld.%09ld -> %ld.%09ld " | 682 | dout("mtime %ld.%09ld -> %ld.%09ld " |
| 681 | "tw %d -> %d\n", | 683 | "tw %d -> %d\n", |
| @@ -795,7 +797,6 @@ static int fill_inode(struct inode *inode, struct page *locked_page, | |||
| 795 | new_issued = ~issued & le32_to_cpu(info->cap.caps); | 797 | new_issued = ~issued & le32_to_cpu(info->cap.caps); |
| 796 | 798 | ||
| 797 | /* update inode */ | 799 | /* update inode */ |
| 798 | ci->i_version = le64_to_cpu(info->version); | ||
| 799 | inode->i_rdev = le32_to_cpu(info->rdev); | 800 | inode->i_rdev = le32_to_cpu(info->rdev); |
| 800 | inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; | 801 | inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; |
| 801 | 802 | ||
| @@ -868,6 +869,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page, | |||
| 868 | xattr_blob = NULL; | 869 | xattr_blob = NULL; |
| 869 | } | 870 | } |
| 870 | 871 | ||
| 872 | /* finally update i_version */ | ||
| 873 | ci->i_version = le64_to_cpu(info->version); | ||
| 874 | |||
| 871 | inode->i_mapping->a_ops = &ceph_aops; | 875 | inode->i_mapping->a_ops = &ceph_aops; |
| 872 | 876 | ||
| 873 | switch (inode->i_mode & S_IFMT) { | 877 | switch (inode->i_mode & S_IFMT) { |
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index dc44de904797..2ddcc96ae456 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile | |||
| @@ -4,17 +4,18 @@ top_srcdir = ../../../../ | |||
| 4 | UNAME_M := $(shell uname -m) | 4 | UNAME_M := $(shell uname -m) |
| 5 | 5 | ||
| 6 | LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c | 6 | LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c |
| 7 | LIBKVM_x86_64 = lib/x86.c | 7 | LIBKVM_x86_64 = lib/x86.c lib/vmx.c |
| 8 | 8 | ||
| 9 | TEST_GEN_PROGS_x86_64 = set_sregs_test | 9 | TEST_GEN_PROGS_x86_64 = set_sregs_test |
| 10 | TEST_GEN_PROGS_x86_64 += sync_regs_test | 10 | TEST_GEN_PROGS_x86_64 += sync_regs_test |
| 11 | TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test | ||
| 11 | 12 | ||
| 12 | TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) | 13 | TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) |
| 13 | LIBKVM += $(LIBKVM_$(UNAME_M)) | 14 | LIBKVM += $(LIBKVM_$(UNAME_M)) |
| 14 | 15 | ||
| 15 | INSTALL_HDR_PATH = $(top_srcdir)/usr | 16 | INSTALL_HDR_PATH = $(top_srcdir)/usr |
| 16 | LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ | 17 | LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ |
| 17 | CFLAGS += -O2 -g -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) | 18 | CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) |
| 18 | 19 | ||
| 19 | # After inclusion, $(OUTPUT) is defined and | 20 | # After inclusion, $(OUTPUT) is defined and |
| 20 | # $(TEST_GEN_PROGS) starts with $(OUTPUT)/ | 21 | # $(TEST_GEN_PROGS) starts with $(OUTPUT)/ |
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 57974ad46373..637b7017b6ee 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h | |||
| @@ -112,24 +112,27 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, | |||
| 112 | vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, | 112 | vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, |
| 113 | vm_paddr_t paddr_min, uint32_t memslot); | 113 | vm_paddr_t paddr_min, uint32_t memslot); |
| 114 | 114 | ||
| 115 | void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid); | 115 | struct kvm_cpuid2 *kvm_get_supported_cpuid(void); |
| 116 | void vcpu_set_cpuid( | 116 | void vcpu_set_cpuid( |
| 117 | struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid); | 117 | struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid); |
| 118 | 118 | ||
| 119 | struct kvm_cpuid2 *allocate_kvm_cpuid2(void); | ||
| 120 | struct kvm_cpuid_entry2 * | 119 | struct kvm_cpuid_entry2 * |
| 121 | find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function, | 120 | kvm_get_supported_cpuid_index(uint32_t function, uint32_t index); |
| 122 | uint32_t index); | ||
| 123 | 121 | ||
| 124 | static inline struct kvm_cpuid_entry2 * | 122 | static inline struct kvm_cpuid_entry2 * |
| 125 | find_cpuid_entry(struct kvm_cpuid2 *cpuid, uint32_t function) | 123 | kvm_get_supported_cpuid_entry(uint32_t function) |
| 126 | { | 124 | { |
| 127 | return find_cpuid_index_entry(cpuid, function, 0); | 125 | return kvm_get_supported_cpuid_index(function, 0); |
| 128 | } | 126 | } |
| 129 | 127 | ||
| 130 | struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code); | 128 | struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code); |
| 131 | void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); | 129 | void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); |
| 132 | 130 | ||
| 131 | typedef void (*vmx_guest_code_t)(vm_vaddr_t vmxon_vaddr, | ||
| 132 | vm_paddr_t vmxon_paddr, | ||
| 133 | vm_vaddr_t vmcs_vaddr, | ||
| 134 | vm_paddr_t vmcs_paddr); | ||
| 135 | |||
| 133 | struct kvm_userspace_memory_region * | 136 | struct kvm_userspace_memory_region * |
| 134 | kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, | 137 | kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, |
| 135 | uint64_t end); | 138 | uint64_t end); |
diff --git a/tools/testing/selftests/kvm/include/vmx.h b/tools/testing/selftests/kvm/include/vmx.h new file mode 100644 index 000000000000..6ed8499807fd --- /dev/null +++ b/tools/testing/selftests/kvm/include/vmx.h | |||
| @@ -0,0 +1,494 @@ | |||
| 1 | /* | ||
| 2 | * tools/testing/selftests/kvm/include/vmx.h | ||
| 3 | * | ||
| 4 | * Copyright (C) 2018, Google LLC. | ||
| 5 | * | ||
| 6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
| 7 | * | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef SELFTEST_KVM_VMX_H | ||
| 11 | #define SELFTEST_KVM_VMX_H | ||
| 12 | |||
| 13 | #include <stdint.h> | ||
| 14 | #include "x86.h" | ||
| 15 | |||
| 16 | #define CPUID_VMX_BIT 5 | ||
| 17 | |||
| 18 | #define CPUID_VMX (1 << 5) | ||
| 19 | |||
| 20 | /* | ||
| 21 | * Definitions of Primary Processor-Based VM-Execution Controls. | ||
| 22 | */ | ||
| 23 | #define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 | ||
| 24 | #define CPU_BASED_USE_TSC_OFFSETING 0x00000008 | ||
| 25 | #define CPU_BASED_HLT_EXITING 0x00000080 | ||
| 26 | #define CPU_BASED_INVLPG_EXITING 0x00000200 | ||
| 27 | #define CPU_BASED_MWAIT_EXITING 0x00000400 | ||
| 28 | #define CPU_BASED_RDPMC_EXITING 0x00000800 | ||
| 29 | #define CPU_BASED_RDTSC_EXITING 0x00001000 | ||
| 30 | #define CPU_BASED_CR3_LOAD_EXITING 0x00008000 | ||
| 31 | #define CPU_BASED_CR3_STORE_EXITING 0x00010000 | ||
| 32 | #define CPU_BASED_CR8_LOAD_EXITING 0x00080000 | ||
| 33 | #define CPU_BASED_CR8_STORE_EXITING 0x00100000 | ||
| 34 | #define CPU_BASED_TPR_SHADOW 0x00200000 | ||
| 35 | #define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000 | ||
| 36 | #define CPU_BASED_MOV_DR_EXITING 0x00800000 | ||
| 37 | #define CPU_BASED_UNCOND_IO_EXITING 0x01000000 | ||
| 38 | #define CPU_BASED_USE_IO_BITMAPS 0x02000000 | ||
| 39 | #define CPU_BASED_MONITOR_TRAP 0x08000000 | ||
| 40 | #define CPU_BASED_USE_MSR_BITMAPS 0x10000000 | ||
| 41 | #define CPU_BASED_MONITOR_EXITING 0x20000000 | ||
| 42 | #define CPU_BASED_PAUSE_EXITING 0x40000000 | ||
| 43 | #define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 | ||
| 44 | |||
| 45 | #define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172 | ||
| 46 | |||
| 47 | /* | ||
| 48 | * Definitions of Secondary Processor-Based VM-Execution Controls. | ||
| 49 | */ | ||
| 50 | #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 | ||
| 51 | #define SECONDARY_EXEC_ENABLE_EPT 0x00000002 | ||
| 52 | #define SECONDARY_EXEC_DESC 0x00000004 | ||
| 53 | #define SECONDARY_EXEC_RDTSCP 0x00000008 | ||
| 54 | #define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 | ||
| 55 | #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 | ||
| 56 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 | ||
| 57 | #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 | ||
| 58 | #define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 | ||
| 59 | #define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 | ||
| 60 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 | ||
| 61 | #define SECONDARY_EXEC_RDRAND_EXITING 0x00000800 | ||
| 62 | #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 | ||
| 63 | #define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 | ||
| 64 | #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 | ||
| 65 | #define SECONDARY_EXEC_RDSEED_EXITING 0x00010000 | ||
| 66 | #define SECONDARY_EXEC_ENABLE_PML 0x00020000 | ||
| 67 | #define SECONDARY_EPT_VE 0x00040000 | ||
| 68 | #define SECONDARY_ENABLE_XSAV_RESTORE 0x00100000 | ||
| 69 | #define SECONDARY_EXEC_TSC_SCALING 0x02000000 | ||
| 70 | |||
| 71 | #define PIN_BASED_EXT_INTR_MASK 0x00000001 | ||
| 72 | #define PIN_BASED_NMI_EXITING 0x00000008 | ||
| 73 | #define PIN_BASED_VIRTUAL_NMIS 0x00000020 | ||
| 74 | #define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 | ||
| 75 | #define PIN_BASED_POSTED_INTR 0x00000080 | ||
| 76 | |||
| 77 | #define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 | ||
| 78 | |||
| 79 | #define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004 | ||
| 80 | #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 | ||
| 81 | #define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000 | ||
| 82 | #define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 | ||
| 83 | #define VM_EXIT_SAVE_IA32_PAT 0x00040000 | ||
| 84 | #define VM_EXIT_LOAD_IA32_PAT 0x00080000 | ||
| 85 | #define VM_EXIT_SAVE_IA32_EFER 0x00100000 | ||
| 86 | #define VM_EXIT_LOAD_IA32_EFER 0x00200000 | ||
| 87 | #define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 | ||
| 88 | |||
| 89 | #define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff | ||
| 90 | |||
| 91 | #define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004 | ||
| 92 | #define VM_ENTRY_IA32E_MODE 0x00000200 | ||
| 93 | #define VM_ENTRY_SMM 0x00000400 | ||
| 94 | #define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 | ||
| 95 | #define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000 | ||
| 96 | #define VM_ENTRY_LOAD_IA32_PAT 0x00004000 | ||
| 97 | #define VM_ENTRY_LOAD_IA32_EFER 0x00008000 | ||
| 98 | |||
| 99 | #define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff | ||
| 100 | |||
| 101 | #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f | ||
| 102 | #define VMX_MISC_SAVE_EFER_LMA 0x00000020 | ||
| 103 | |||
| 104 | #define EXIT_REASON_FAILED_VMENTRY 0x80000000 | ||
| 105 | #define EXIT_REASON_EXCEPTION_NMI 0 | ||
| 106 | #define EXIT_REASON_EXTERNAL_INTERRUPT 1 | ||
| 107 | #define EXIT_REASON_TRIPLE_FAULT 2 | ||
| 108 | #define EXIT_REASON_PENDING_INTERRUPT 7 | ||
| 109 | #define EXIT_REASON_NMI_WINDOW 8 | ||
| 110 | #define EXIT_REASON_TASK_SWITCH 9 | ||
| 111 | #define EXIT_REASON_CPUID 10 | ||
| 112 | #define EXIT_REASON_HLT 12 | ||
| 113 | #define EXIT_REASON_INVD 13 | ||
| 114 | #define EXIT_REASON_INVLPG 14 | ||
| 115 | #define EXIT_REASON_RDPMC 15 | ||
| 116 | #define EXIT_REASON_RDTSC 16 | ||
| 117 | #define EXIT_REASON_VMCALL 18 | ||
| 118 | #define EXIT_REASON_VMCLEAR 19 | ||
| 119 | #define EXIT_REASON_VMLAUNCH 20 | ||
| 120 | #define EXIT_REASON_VMPTRLD 21 | ||
| 121 | #define EXIT_REASON_VMPTRST 22 | ||
| 122 | #define EXIT_REASON_VMREAD 23 | ||
| 123 | #define EXIT_REASON_VMRESUME 24 | ||
| 124 | #define EXIT_REASON_VMWRITE 25 | ||
| 125 | #define EXIT_REASON_VMOFF 26 | ||
| 126 | #define EXIT_REASON_VMON 27 | ||
| 127 | #define EXIT_REASON_CR_ACCESS 28 | ||
| 128 | #define EXIT_REASON_DR_ACCESS 29 | ||
| 129 | #define EXIT_REASON_IO_INSTRUCTION 30 | ||
| 130 | #define EXIT_REASON_MSR_READ 31 | ||
| 131 | #define EXIT_REASON_MSR_WRITE 32 | ||
| 132 | #define EXIT_REASON_INVALID_STATE 33 | ||
| 133 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 | ||
| 134 | #define EXIT_REASON_MONITOR_INSTRUCTION 39 | ||
| 135 | #define EXIT_REASON_PAUSE_INSTRUCTION 40 | ||
| 136 | #define EXIT_REASON_MCE_DURING_VMENTRY 41 | ||
| 137 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 | ||
| 138 | #define EXIT_REASON_APIC_ACCESS 44 | ||
| 139 | #define EXIT_REASON_EOI_INDUCED 45 | ||
| 140 | #define EXIT_REASON_EPT_VIOLATION 48 | ||
| 141 | #define EXIT_REASON_EPT_MISCONFIG 49 | ||
| 142 | #define EXIT_REASON_INVEPT 50 | ||
| 143 | #define EXIT_REASON_RDTSCP 51 | ||
| 144 | #define EXIT_REASON_PREEMPTION_TIMER 52 | ||
| 145 | #define EXIT_REASON_INVVPID 53 | ||
| 146 | #define EXIT_REASON_WBINVD 54 | ||
| 147 | #define EXIT_REASON_XSETBV 55 | ||
| 148 | #define EXIT_REASON_APIC_WRITE 56 | ||
| 149 | #define EXIT_REASON_INVPCID 58 | ||
| 150 | #define EXIT_REASON_PML_FULL 62 | ||
| 151 | #define EXIT_REASON_XSAVES 63 | ||
| 152 | #define EXIT_REASON_XRSTORS 64 | ||
| 153 | #define LAST_EXIT_REASON 64 | ||
| 154 | |||
| 155 | enum vmcs_field { | ||
| 156 | VIRTUAL_PROCESSOR_ID = 0x00000000, | ||
| 157 | POSTED_INTR_NV = 0x00000002, | ||
| 158 | GUEST_ES_SELECTOR = 0x00000800, | ||
| 159 | GUEST_CS_SELECTOR = 0x00000802, | ||
| 160 | GUEST_SS_SELECTOR = 0x00000804, | ||
| 161 | GUEST_DS_SELECTOR = 0x00000806, | ||
| 162 | GUEST_FS_SELECTOR = 0x00000808, | ||
| 163 | GUEST_GS_SELECTOR = 0x0000080a, | ||
| 164 | GUEST_LDTR_SELECTOR = 0x0000080c, | ||
| 165 | GUEST_TR_SELECTOR = 0x0000080e, | ||
| 166 | GUEST_INTR_STATUS = 0x00000810, | ||
| 167 | GUEST_PML_INDEX = 0x00000812, | ||
| 168 | HOST_ES_SELECTOR = 0x00000c00, | ||
| 169 | HOST_CS_SELECTOR = 0x00000c02, | ||
| 170 | HOST_SS_SELECTOR = 0x00000c04, | ||
| 171 | HOST_DS_SELECTOR = 0x00000c06, | ||
| 172 | HOST_FS_SELECTOR = 0x00000c08, | ||
| 173 | HOST_GS_SELECTOR = 0x00000c0a, | ||
| 174 | HOST_TR_SELECTOR = 0x00000c0c, | ||
| 175 | IO_BITMAP_A = 0x00002000, | ||
| 176 | IO_BITMAP_A_HIGH = 0x00002001, | ||
| 177 | IO_BITMAP_B = 0x00002002, | ||
| 178 | IO_BITMAP_B_HIGH = 0x00002003, | ||
| 179 | MSR_BITMAP = 0x00002004, | ||
| 180 | MSR_BITMAP_HIGH = 0x00002005, | ||
| 181 | VM_EXIT_MSR_STORE_ADDR = 0x00002006, | ||
| 182 | VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007, | ||
| 183 | VM_EXIT_MSR_LOAD_ADDR = 0x00002008, | ||
| 184 | VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009, | ||
| 185 | VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a, | ||
| 186 | VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b, | ||
| 187 | PML_ADDRESS = 0x0000200e, | ||
| 188 | PML_ADDRESS_HIGH = 0x0000200f, | ||
| 189 | TSC_OFFSET = 0x00002010, | ||
| 190 | TSC_OFFSET_HIGH = 0x00002011, | ||
| 191 | VIRTUAL_APIC_PAGE_ADDR = 0x00002012, | ||
| 192 | VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, | ||
| 193 | APIC_ACCESS_ADDR = 0x00002014, | ||
| 194 | APIC_ACCESS_ADDR_HIGH = 0x00002015, | ||
| 195 | POSTED_INTR_DESC_ADDR = 0x00002016, | ||
| 196 | POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, | ||
| 197 | EPT_POINTER = 0x0000201a, | ||
| 198 | EPT_POINTER_HIGH = 0x0000201b, | ||
| 199 | EOI_EXIT_BITMAP0 = 0x0000201c, | ||
| 200 | EOI_EXIT_BITMAP0_HIGH = 0x0000201d, | ||
| 201 | EOI_EXIT_BITMAP1 = 0x0000201e, | ||
| 202 | EOI_EXIT_BITMAP1_HIGH = 0x0000201f, | ||
| 203 | EOI_EXIT_BITMAP2 = 0x00002020, | ||
| 204 | EOI_EXIT_BITMAP2_HIGH = 0x00002021, | ||
| 205 | EOI_EXIT_BITMAP3 = 0x00002022, | ||
| 206 | EOI_EXIT_BITMAP3_HIGH = 0x00002023, | ||
| 207 | VMREAD_BITMAP = 0x00002026, | ||
| 208 | VMREAD_BITMAP_HIGH = 0x00002027, | ||
| 209 | VMWRITE_BITMAP = 0x00002028, | ||
| 210 | VMWRITE_BITMAP_HIGH = 0x00002029, | ||
| 211 | XSS_EXIT_BITMAP = 0x0000202C, | ||
| 212 | XSS_EXIT_BITMAP_HIGH = 0x0000202D, | ||
| 213 | TSC_MULTIPLIER = 0x00002032, | ||
| 214 | TSC_MULTIPLIER_HIGH = 0x00002033, | ||
| 215 | GUEST_PHYSICAL_ADDRESS = 0x00002400, | ||
| 216 | GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, | ||
| 217 | VMCS_LINK_POINTER = 0x00002800, | ||
| 218 | VMCS_LINK_POINTER_HIGH = 0x00002801, | ||
| 219 | GUEST_IA32_DEBUGCTL = 0x00002802, | ||
| 220 | GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, | ||
| 221 | GUEST_IA32_PAT = 0x00002804, | ||
| 222 | GUEST_IA32_PAT_HIGH = 0x00002805, | ||
| 223 | GUEST_IA32_EFER = 0x00002806, | ||
| 224 | GUEST_IA32_EFER_HIGH = 0x00002807, | ||
| 225 | GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808, | ||
| 226 | GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809, | ||
| 227 | GUEST_PDPTR0 = 0x0000280a, | ||
| 228 | GUEST_PDPTR0_HIGH = 0x0000280b, | ||
| 229 | GUEST_PDPTR1 = 0x0000280c, | ||
| 230 | GUEST_PDPTR1_HIGH = 0x0000280d, | ||
| 231 | GUEST_PDPTR2 = 0x0000280e, | ||
| 232 | GUEST_PDPTR2_HIGH = 0x0000280f, | ||
| 233 | GUEST_PDPTR3 = 0x00002810, | ||
| 234 | GUEST_PDPTR3_HIGH = 0x00002811, | ||
| 235 | GUEST_BNDCFGS = 0x00002812, | ||
| 236 | GUEST_BNDCFGS_HIGH = 0x00002813, | ||
| 237 | HOST_IA32_PAT = 0x00002c00, | ||
| 238 | HOST_IA32_PAT_HIGH = 0x00002c01, | ||
| 239 | HOST_IA32_EFER = 0x00002c02, | ||
| 240 | HOST_IA32_EFER_HIGH = 0x00002c03, | ||
| 241 | HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04, | ||
| 242 | HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05, | ||
| 243 | PIN_BASED_VM_EXEC_CONTROL = 0x00004000, | ||
| 244 | CPU_BASED_VM_EXEC_CONTROL = 0x00004002, | ||
| 245 | EXCEPTION_BITMAP = 0x00004004, | ||
| 246 | PAGE_FAULT_ERROR_CODE_MASK = 0x00004006, | ||
| 247 | PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008, | ||
| 248 | CR3_TARGET_COUNT = 0x0000400a, | ||
| 249 | VM_EXIT_CONTROLS = 0x0000400c, | ||
| 250 | VM_EXIT_MSR_STORE_COUNT = 0x0000400e, | ||
| 251 | VM_EXIT_MSR_LOAD_COUNT = 0x00004010, | ||
| 252 | VM_ENTRY_CONTROLS = 0x00004012, | ||
| 253 | VM_ENTRY_MSR_LOAD_COUNT = 0x00004014, | ||
| 254 | VM_ENTRY_INTR_INFO_FIELD = 0x00004016, | ||
| 255 | VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018, | ||
| 256 | VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, | ||
| 257 | TPR_THRESHOLD = 0x0000401c, | ||
| 258 | SECONDARY_VM_EXEC_CONTROL = 0x0000401e, | ||
| 259 | PLE_GAP = 0x00004020, | ||
| 260 | PLE_WINDOW = 0x00004022, | ||
| 261 | VM_INSTRUCTION_ERROR = 0x00004400, | ||
| 262 | VM_EXIT_REASON = 0x00004402, | ||
| 263 | VM_EXIT_INTR_INFO = 0x00004404, | ||
| 264 | VM_EXIT_INTR_ERROR_CODE = 0x00004406, | ||
| 265 | IDT_VECTORING_INFO_FIELD = 0x00004408, | ||
| 266 | IDT_VECTORING_ERROR_CODE = 0x0000440a, | ||
| 267 | VM_EXIT_INSTRUCTION_LEN = 0x0000440c, | ||
| 268 | VMX_INSTRUCTION_INFO = 0x0000440e, | ||
| 269 | GUEST_ES_LIMIT = 0x00004800, | ||
| 270 | GUEST_CS_LIMIT = 0x00004802, | ||
| 271 | GUEST_SS_LIMIT = 0x00004804, | ||
| 272 | GUEST_DS_LIMIT = 0x00004806, | ||
| 273 | GUEST_FS_LIMIT = 0x00004808, | ||
| 274 | GUEST_GS_LIMIT = 0x0000480a, | ||
| 275 | GUEST_LDTR_LIMIT = 0x0000480c, | ||
| 276 | GUEST_TR_LIMIT = 0x0000480e, | ||
| 277 | GUEST_GDTR_LIMIT = 0x00004810, | ||
| 278 | GUEST_IDTR_LIMIT = 0x00004812, | ||
| 279 | GUEST_ES_AR_BYTES = 0x00004814, | ||
| 280 | GUEST_CS_AR_BYTES = 0x00004816, | ||
| 281 | GUEST_SS_AR_BYTES = 0x00004818, | ||
| 282 | GUEST_DS_AR_BYTES = 0x0000481a, | ||
| 283 | GUEST_FS_AR_BYTES = 0x0000481c, | ||
| 284 | GUEST_GS_AR_BYTES = 0x0000481e, | ||
| 285 | GUEST_LDTR_AR_BYTES = 0x00004820, | ||
| 286 | GUEST_TR_AR_BYTES = 0x00004822, | ||
| 287 | GUEST_INTERRUPTIBILITY_INFO = 0x00004824, | ||
| 288 | GUEST_ACTIVITY_STATE = 0X00004826, | ||
| 289 | GUEST_SYSENTER_CS = 0x0000482A, | ||
| 290 | VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, | ||
| 291 | HOST_IA32_SYSENTER_CS = 0x00004c00, | ||
| 292 | CR0_GUEST_HOST_MASK = 0x00006000, | ||
| 293 | CR4_GUEST_HOST_MASK = 0x00006002, | ||
| 294 | CR0_READ_SHADOW = 0x00006004, | ||
| 295 | CR4_READ_SHADOW = 0x00006006, | ||
| 296 | CR3_TARGET_VALUE0 = 0x00006008, | ||
| 297 | CR3_TARGET_VALUE1 = 0x0000600a, | ||
| 298 | CR3_TARGET_VALUE2 = 0x0000600c, | ||
| 299 | CR3_TARGET_VALUE3 = 0x0000600e, | ||
| 300 | EXIT_QUALIFICATION = 0x00006400, | ||
| 301 | GUEST_LINEAR_ADDRESS = 0x0000640a, | ||
| 302 | GUEST_CR0 = 0x00006800, | ||
| 303 | GUEST_CR3 = 0x00006802, | ||
| 304 | GUEST_CR4 = 0x00006804, | ||
| 305 | GUEST_ES_BASE = 0x00006806, | ||
| 306 | GUEST_CS_BASE = 0x00006808, | ||
| 307 | GUEST_SS_BASE = 0x0000680a, | ||
| 308 | GUEST_DS_BASE = 0x0000680c, | ||
| 309 | GUEST_FS_BASE = 0x0000680e, | ||
| 310 | GUEST_GS_BASE = 0x00006810, | ||
| 311 | GUEST_LDTR_BASE = 0x00006812, | ||
| 312 | GUEST_TR_BASE = 0x00006814, | ||
| 313 | GUEST_GDTR_BASE = 0x00006816, | ||
| 314 | GUEST_IDTR_BASE = 0x00006818, | ||
| 315 | GUEST_DR7 = 0x0000681a, | ||
| 316 | GUEST_RSP = 0x0000681c, | ||
| 317 | GUEST_RIP = 0x0000681e, | ||
| 318 | GUEST_RFLAGS = 0x00006820, | ||
| 319 | GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822, | ||
| 320 | GUEST_SYSENTER_ESP = 0x00006824, | ||
| 321 | GUEST_SYSENTER_EIP = 0x00006826, | ||
| 322 | HOST_CR0 = 0x00006c00, | ||
| 323 | HOST_CR3 = 0x00006c02, | ||
| 324 | HOST_CR4 = 0x00006c04, | ||
| 325 | HOST_FS_BASE = 0x00006c06, | ||
| 326 | HOST_GS_BASE = 0x00006c08, | ||
| 327 | HOST_TR_BASE = 0x00006c0a, | ||
| 328 | HOST_GDTR_BASE = 0x00006c0c, | ||
| 329 | HOST_IDTR_BASE = 0x00006c0e, | ||
| 330 | HOST_IA32_SYSENTER_ESP = 0x00006c10, | ||
| 331 | HOST_IA32_SYSENTER_EIP = 0x00006c12, | ||
| 332 | HOST_RSP = 0x00006c14, | ||
| 333 | HOST_RIP = 0x00006c16, | ||
| 334 | }; | ||
| 335 | |||
| 336 | struct vmx_msr_entry { | ||
| 337 | uint32_t index; | ||
| 338 | uint32_t reserved; | ||
| 339 | uint64_t value; | ||
| 340 | } __attribute__ ((aligned(16))); | ||
| 341 | |||
| 342 | static inline int vmxon(uint64_t phys) | ||
| 343 | { | ||
| 344 | uint8_t ret; | ||
| 345 | |||
| 346 | __asm__ __volatile__ ("vmxon %[pa]; setna %[ret]" | ||
| 347 | : [ret]"=rm"(ret) | ||
| 348 | : [pa]"m"(phys) | ||
| 349 | : "cc", "memory"); | ||
| 350 | |||
| 351 | return ret; | ||
| 352 | } | ||
| 353 | |||
| 354 | static inline void vmxoff(void) | ||
| 355 | { | ||
| 356 | __asm__ __volatile__("vmxoff"); | ||
| 357 | } | ||
| 358 | |||
| 359 | static inline int vmclear(uint64_t vmcs_pa) | ||
| 360 | { | ||
| 361 | uint8_t ret; | ||
| 362 | |||
| 363 | __asm__ __volatile__ ("vmclear %[pa]; setna %[ret]" | ||
| 364 | : [ret]"=rm"(ret) | ||
| 365 | : [pa]"m"(vmcs_pa) | ||
| 366 | : "cc", "memory"); | ||
| 367 | |||
| 368 | return ret; | ||
| 369 | } | ||
| 370 | |||
| 371 | static inline int vmptrld(uint64_t vmcs_pa) | ||
| 372 | { | ||
| 373 | uint8_t ret; | ||
| 374 | |||
| 375 | __asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]" | ||
| 376 | : [ret]"=rm"(ret) | ||
| 377 | : [pa]"m"(vmcs_pa) | ||
| 378 | : "cc", "memory"); | ||
| 379 | |||
| 380 | return ret; | ||
| 381 | } | ||
| 382 | |||
| 383 | /* | ||
| 384 | * No guest state (e.g. GPRs) is established by this vmlaunch. | ||
| 385 | */ | ||
| 386 | static inline int vmlaunch(void) | ||
| 387 | { | ||
| 388 | int ret; | ||
| 389 | |||
| 390 | __asm__ __volatile__("push %%rbp;" | ||
| 391 | "push %%rcx;" | ||
| 392 | "push %%rdx;" | ||
| 393 | "push %%rsi;" | ||
| 394 | "push %%rdi;" | ||
| 395 | "push $0;" | ||
| 396 | "vmwrite %%rsp, %[host_rsp];" | ||
| 397 | "lea 1f(%%rip), %%rax;" | ||
| 398 | "vmwrite %%rax, %[host_rip];" | ||
| 399 | "vmlaunch;" | ||
| 400 | "incq (%%rsp);" | ||
| 401 | "1: pop %%rax;" | ||
| 402 | "pop %%rdi;" | ||
| 403 | "pop %%rsi;" | ||
| 404 | "pop %%rdx;" | ||
| 405 | "pop %%rcx;" | ||
| 406 | "pop %%rbp;" | ||
| 407 | : [ret]"=&a"(ret) | ||
| 408 | : [host_rsp]"r"((uint64_t)HOST_RSP), | ||
| 409 | [host_rip]"r"((uint64_t)HOST_RIP) | ||
| 410 | : "memory", "cc", "rbx", "r8", "r9", "r10", | ||
| 411 | "r11", "r12", "r13", "r14", "r15"); | ||
| 412 | return ret; | ||
| 413 | } | ||
| 414 | |||
| 415 | /* | ||
| 416 | * No guest state (e.g. GPRs) is established by this vmresume. | ||
| 417 | */ | ||
| 418 | static inline int vmresume(void) | ||
| 419 | { | ||
| 420 | int ret; | ||
| 421 | |||
| 422 | __asm__ __volatile__("push %%rbp;" | ||
| 423 | "push %%rcx;" | ||
| 424 | "push %%rdx;" | ||
| 425 | "push %%rsi;" | ||
| 426 | "push %%rdi;" | ||
| 427 | "push $0;" | ||
| 428 | "vmwrite %%rsp, %[host_rsp];" | ||
| 429 | "lea 1f(%%rip), %%rax;" | ||
| 430 | "vmwrite %%rax, %[host_rip];" | ||
| 431 | "vmresume;" | ||
| 432 | "incq (%%rsp);" | ||
| 433 | "1: pop %%rax;" | ||
| 434 | "pop %%rdi;" | ||
| 435 | "pop %%rsi;" | ||
| 436 | "pop %%rdx;" | ||
| 437 | "pop %%rcx;" | ||
| 438 | "pop %%rbp;" | ||
| 439 | : [ret]"=&a"(ret) | ||
| 440 | : [host_rsp]"r"((uint64_t)HOST_RSP), | ||
| 441 | [host_rip]"r"((uint64_t)HOST_RIP) | ||
| 442 | : "memory", "cc", "rbx", "r8", "r9", "r10", | ||
| 443 | "r11", "r12", "r13", "r14", "r15"); | ||
| 444 | return ret; | ||
| 445 | } | ||
| 446 | |||
| 447 | static inline int vmread(uint64_t encoding, uint64_t *value) | ||
| 448 | { | ||
| 449 | uint64_t tmp; | ||
| 450 | uint8_t ret; | ||
| 451 | |||
| 452 | __asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]" | ||
| 453 | : [value]"=rm"(tmp), [ret]"=rm"(ret) | ||
| 454 | : [encoding]"r"(encoding) | ||
| 455 | : "cc", "memory"); | ||
| 456 | |||
| 457 | *value = tmp; | ||
| 458 | return ret; | ||
| 459 | } | ||
| 460 | |||
| 461 | /* | ||
| 462 | * A wrapper around vmread that ignores errors and returns zero if the | ||
| 463 | * vmread instruction fails. | ||
| 464 | */ | ||
| 465 | static inline uint64_t vmreadz(uint64_t encoding) | ||
| 466 | { | ||
| 467 | uint64_t value = 0; | ||
| 468 | vmread(encoding, &value); | ||
| 469 | return value; | ||
| 470 | } | ||
| 471 | |||
| 472 | static inline int vmwrite(uint64_t encoding, uint64_t value) | ||
| 473 | { | ||
| 474 | uint8_t ret; | ||
| 475 | |||
| 476 | __asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]" | ||
| 477 | : [ret]"=rm"(ret) | ||
| 478 | : [value]"rm"(value), [encoding]"r"(encoding) | ||
| 479 | : "cc", "memory"); | ||
| 480 | |||
| 481 | return ret; | ||
| 482 | } | ||
| 483 | |||
| 484 | static inline uint32_t vmcs_revision(void) | ||
| 485 | { | ||
| 486 | return rdmsr(MSR_IA32_VMX_BASIC); | ||
| 487 | } | ||
| 488 | |||
| 489 | void prepare_for_vmx_operation(void); | ||
| 490 | void prepare_vmcs(void *guest_rip, void *guest_rsp); | ||
| 491 | struct kvm_vm *vm_create_default_vmx(uint32_t vcpuid, | ||
| 492 | vmx_guest_code_t guest_code); | ||
| 493 | |||
| 494 | #endif /* !SELFTEST_KVM_VMX_H */ | ||
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 7ca1bb40c498..2cedfda181d4 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c | |||
| @@ -378,7 +378,7 @@ int kvm_memcmp_hva_gva(void *hva, | |||
| 378 | * complicated. This function uses a reasonable default length for | 378 | * complicated. This function uses a reasonable default length for |
| 379 | * the array and performs the appropriate allocation. | 379 | * the array and performs the appropriate allocation. |
| 380 | */ | 380 | */ |
| 381 | struct kvm_cpuid2 *allocate_kvm_cpuid2(void) | 381 | static struct kvm_cpuid2 *allocate_kvm_cpuid2(void) |
| 382 | { | 382 | { |
| 383 | struct kvm_cpuid2 *cpuid; | 383 | struct kvm_cpuid2 *cpuid; |
| 384 | int nent = 100; | 384 | int nent = 100; |
| @@ -402,17 +402,21 @@ struct kvm_cpuid2 *allocate_kvm_cpuid2(void) | |||
| 402 | * Input Args: None | 402 | * Input Args: None |
| 403 | * | 403 | * |
| 404 | * Output Args: | 404 | * Output Args: |
| 405 | * cpuid - The supported KVM CPUID | ||
| 406 | * | 405 | * |
| 407 | * Return: void | 406 | * Return: The supported KVM CPUID |
| 408 | * | 407 | * |
| 409 | * Get the guest CPUID supported by KVM. | 408 | * Get the guest CPUID supported by KVM. |
| 410 | */ | 409 | */ |
| 411 | void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid) | 410 | struct kvm_cpuid2 *kvm_get_supported_cpuid(void) |
| 412 | { | 411 | { |
| 412 | static struct kvm_cpuid2 *cpuid; | ||
| 413 | int ret; | 413 | int ret; |
| 414 | int kvm_fd; | 414 | int kvm_fd; |
| 415 | 415 | ||
| 416 | if (cpuid) | ||
| 417 | return cpuid; | ||
| 418 | |||
| 419 | cpuid = allocate_kvm_cpuid2(); | ||
| 416 | kvm_fd = open(KVM_DEV_PATH, O_RDONLY); | 420 | kvm_fd = open(KVM_DEV_PATH, O_RDONLY); |
| 417 | TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", | 421 | TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", |
| 418 | KVM_DEV_PATH, kvm_fd, errno); | 422 | KVM_DEV_PATH, kvm_fd, errno); |
| @@ -422,6 +426,7 @@ void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid) | |||
| 422 | ret, errno); | 426 | ret, errno); |
| 423 | 427 | ||
| 424 | close(kvm_fd); | 428 | close(kvm_fd); |
| 429 | return cpuid; | ||
| 425 | } | 430 | } |
| 426 | 431 | ||
| 427 | /* Locate a cpuid entry. | 432 | /* Locate a cpuid entry. |
| @@ -435,12 +440,13 @@ void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid) | |||
| 435 | * Return: A pointer to the cpuid entry. Never returns NULL. | 440 | * Return: A pointer to the cpuid entry. Never returns NULL. |
| 436 | */ | 441 | */ |
| 437 | struct kvm_cpuid_entry2 * | 442 | struct kvm_cpuid_entry2 * |
| 438 | find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function, | 443 | kvm_get_supported_cpuid_index(uint32_t function, uint32_t index) |
| 439 | uint32_t index) | ||
| 440 | { | 444 | { |
| 445 | struct kvm_cpuid2 *cpuid; | ||
| 441 | struct kvm_cpuid_entry2 *entry = NULL; | 446 | struct kvm_cpuid_entry2 *entry = NULL; |
| 442 | int i; | 447 | int i; |
| 443 | 448 | ||
| 449 | cpuid = kvm_get_supported_cpuid(); | ||
| 444 | for (i = 0; i < cpuid->nent; i++) { | 450 | for (i = 0; i < cpuid->nent; i++) { |
| 445 | if (cpuid->entries[i].function == function && | 451 | if (cpuid->entries[i].function == function && |
| 446 | cpuid->entries[i].index == index) { | 452 | cpuid->entries[i].index == index) { |
| @@ -1435,7 +1441,7 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, | |||
| 1435 | sparsebit_idx_t pg; | 1441 | sparsebit_idx_t pg; |
| 1436 | 1442 | ||
| 1437 | TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " | 1443 | TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " |
| 1438 | "not divisable by page size.\n" | 1444 | "not divisible by page size.\n" |
| 1439 | " paddr_min: 0x%lx page_size: 0x%x", | 1445 | " paddr_min: 0x%lx page_size: 0x%x", |
| 1440 | paddr_min, vm->page_size); | 1446 | paddr_min, vm->page_size); |
| 1441 | 1447 | ||
diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c index 0c5cf3e0cb6f..b132bc95d183 100644 --- a/tools/testing/selftests/kvm/lib/sparsebit.c +++ b/tools/testing/selftests/kvm/lib/sparsebit.c | |||
| @@ -121,7 +121,7 @@ | |||
| 121 | * avoided by moving the setting of the nodes mask bits into | 121 | * avoided by moving the setting of the nodes mask bits into |
| 122 | * the previous nodes num_after setting. | 122 | * the previous nodes num_after setting. |
| 123 | * | 123 | * |
| 124 | * + Node starting index is evenly divisable by the number of bits | 124 | * + Node starting index is evenly divisible by the number of bits |
| 125 | * within a nodes mask member. | 125 | * within a nodes mask member. |
| 126 | * | 126 | * |
| 127 | * + Nodes never represent a range of bits that wrap around the | 127 | * + Nodes never represent a range of bits that wrap around the |
| @@ -1741,7 +1741,7 @@ void sparsebit_validate_internal(struct sparsebit *s) | |||
| 1741 | 1741 | ||
| 1742 | /* Validate node index is divisible by the mask size */ | 1742 | /* Validate node index is divisible by the mask size */ |
| 1743 | if (nodep->idx % MASK_BITS) { | 1743 | if (nodep->idx % MASK_BITS) { |
| 1744 | fprintf(stderr, "Node index not divisable by " | 1744 | fprintf(stderr, "Node index not divisible by " |
| 1745 | "mask size,\n" | 1745 | "mask size,\n" |
| 1746 | " nodep: %p nodep->idx: 0x%lx " | 1746 | " nodep: %p nodep->idx: 0x%lx " |
| 1747 | "MASK_BITS: %lu\n", | 1747 | "MASK_BITS: %lu\n", |
diff --git a/tools/testing/selftests/kvm/lib/vmx.c b/tools/testing/selftests/kvm/lib/vmx.c new file mode 100644 index 000000000000..0231bc0aae7b --- /dev/null +++ b/tools/testing/selftests/kvm/lib/vmx.c | |||
| @@ -0,0 +1,243 @@ | |||
| 1 | /* | ||
| 2 | * tools/testing/selftests/kvm/lib/x86.c | ||
| 3 | * | ||
| 4 | * Copyright (C) 2018, Google LLC. | ||
| 5 | * | ||
| 6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #define _GNU_SOURCE /* for program_invocation_name */ | ||
| 10 | |||
| 11 | #include "test_util.h" | ||
| 12 | #include "kvm_util.h" | ||
| 13 | #include "x86.h" | ||
| 14 | #include "vmx.h" | ||
| 15 | |||
| 16 | /* Create a default VM for VMX tests. | ||
| 17 | * | ||
| 18 | * Input Args: | ||
| 19 | * vcpuid - The id of the single VCPU to add to the VM. | ||
| 20 | * guest_code - The vCPU's entry point | ||
| 21 | * | ||
| 22 | * Output Args: None | ||
| 23 | * | ||
| 24 | * Return: | ||
| 25 | * Pointer to opaque structure that describes the created VM. | ||
| 26 | */ | ||
| 27 | struct kvm_vm * | ||
| 28 | vm_create_default_vmx(uint32_t vcpuid, vmx_guest_code_t guest_code) | ||
| 29 | { | ||
| 30 | struct kvm_cpuid2 *cpuid; | ||
| 31 | struct kvm_vm *vm; | ||
| 32 | vm_vaddr_t vmxon_vaddr; | ||
| 33 | vm_paddr_t vmxon_paddr; | ||
| 34 | vm_vaddr_t vmcs_vaddr; | ||
| 35 | vm_paddr_t vmcs_paddr; | ||
| 36 | |||
| 37 | vm = vm_create_default(vcpuid, (void *) guest_code); | ||
| 38 | |||
| 39 | /* Enable nesting in CPUID */ | ||
| 40 | vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid()); | ||
| 41 | |||
| 42 | /* Setup of a region of guest memory for the vmxon region. */ | ||
| 43 | vmxon_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0); | ||
| 44 | vmxon_paddr = addr_gva2gpa(vm, vmxon_vaddr); | ||
| 45 | |||
| 46 | /* Setup of a region of guest memory for a vmcs. */ | ||
| 47 | vmcs_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0); | ||
| 48 | vmcs_paddr = addr_gva2gpa(vm, vmcs_vaddr); | ||
| 49 | |||
| 50 | vcpu_args_set(vm, vcpuid, 4, vmxon_vaddr, vmxon_paddr, vmcs_vaddr, | ||
| 51 | vmcs_paddr); | ||
| 52 | |||
| 53 | return vm; | ||
| 54 | } | ||
| 55 | |||
| 56 | void prepare_for_vmx_operation(void) | ||
| 57 | { | ||
| 58 | uint64_t feature_control; | ||
| 59 | uint64_t required; | ||
| 60 | unsigned long cr0; | ||
| 61 | unsigned long cr4; | ||
| 62 | |||
| 63 | /* | ||
| 64 | * Ensure bits in CR0 and CR4 are valid in VMX operation: | ||
| 65 | * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx. | ||
| 66 | * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx. | ||
| 67 | */ | ||
| 68 | __asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory"); | ||
| 69 | cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1); | ||
| 70 | cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0); | ||
| 71 | __asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory"); | ||
| 72 | |||
| 73 | __asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory"); | ||
| 74 | cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1); | ||
| 75 | cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0); | ||
| 76 | /* Enable VMX operation */ | ||
| 77 | cr4 |= X86_CR4_VMXE; | ||
| 78 | __asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory"); | ||
| 79 | |||
| 80 | /* | ||
| 81 | * Configure IA32_FEATURE_CONTROL MSR to allow VMXON: | ||
| 82 | * Bit 0: Lock bit. If clear, VMXON causes a #GP. | ||
| 83 | * Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON | ||
| 84 | * outside of SMX causes a #GP. | ||
| 85 | */ | ||
| 86 | required = FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; | ||
| 87 | required |= FEATURE_CONTROL_LOCKED; | ||
| 88 | feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); | ||
| 89 | if ((feature_control & required) != required) | ||
| 90 | wrmsr(MSR_IA32_FEATURE_CONTROL, feature_control | required); | ||
| 91 | } | ||
| 92 | |||
| 93 | /* | ||
| 94 | * Initialize the control fields to the most basic settings possible. | ||
| 95 | */ | ||
| 96 | static inline void init_vmcs_control_fields(void) | ||
| 97 | { | ||
| 98 | vmwrite(VIRTUAL_PROCESSOR_ID, 0); | ||
| 99 | vmwrite(POSTED_INTR_NV, 0); | ||
| 100 | |||
| 101 | vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PINBASED_CTLS)); | ||
| 102 | vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PROCBASED_CTLS)); | ||
| 103 | vmwrite(EXCEPTION_BITMAP, 0); | ||
| 104 | vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); | ||
| 105 | vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */ | ||
| 106 | vmwrite(CR3_TARGET_COUNT, 0); | ||
| 107 | vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) | | ||
| 108 | VM_EXIT_HOST_ADDR_SPACE_SIZE); /* 64-bit host */ | ||
| 109 | vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); | ||
| 110 | vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); | ||
| 111 | vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) | | ||
| 112 | VM_ENTRY_IA32E_MODE); /* 64-bit guest */ | ||
| 113 | vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); | ||
| 114 | vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0); | ||
| 115 | vmwrite(TPR_THRESHOLD, 0); | ||
| 116 | vmwrite(SECONDARY_VM_EXEC_CONTROL, 0); | ||
| 117 | |||
| 118 | vmwrite(CR0_GUEST_HOST_MASK, 0); | ||
| 119 | vmwrite(CR4_GUEST_HOST_MASK, 0); | ||
| 120 | vmwrite(CR0_READ_SHADOW, get_cr0()); | ||
| 121 | vmwrite(CR4_READ_SHADOW, get_cr4()); | ||
| 122 | } | ||
| 123 | |||
| 124 | /* | ||
| 125 | * Initialize the host state fields based on the current host state, with | ||
| 126 | * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch | ||
| 127 | * or vmresume. | ||
| 128 | */ | ||
| 129 | static inline void init_vmcs_host_state(void) | ||
| 130 | { | ||
| 131 | uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS); | ||
| 132 | |||
| 133 | vmwrite(HOST_ES_SELECTOR, get_es()); | ||
| 134 | vmwrite(HOST_CS_SELECTOR, get_cs()); | ||
| 135 | vmwrite(HOST_SS_SELECTOR, get_ss()); | ||
| 136 | vmwrite(HOST_DS_SELECTOR, get_ds()); | ||
| 137 | vmwrite(HOST_FS_SELECTOR, get_fs()); | ||
| 138 | vmwrite(HOST_GS_SELECTOR, get_gs()); | ||
| 139 | vmwrite(HOST_TR_SELECTOR, get_tr()); | ||
| 140 | |||
| 141 | if (exit_controls & VM_EXIT_LOAD_IA32_PAT) | ||
| 142 | vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT)); | ||
| 143 | if (exit_controls & VM_EXIT_LOAD_IA32_EFER) | ||
| 144 | vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER)); | ||
| 145 | if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) | ||
| 146 | vmwrite(HOST_IA32_PERF_GLOBAL_CTRL, | ||
| 147 | rdmsr(MSR_CORE_PERF_GLOBAL_CTRL)); | ||
| 148 | |||
| 149 | vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS)); | ||
| 150 | |||
| 151 | vmwrite(HOST_CR0, get_cr0()); | ||
| 152 | vmwrite(HOST_CR3, get_cr3()); | ||
| 153 | vmwrite(HOST_CR4, get_cr4()); | ||
| 154 | vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE)); | ||
| 155 | vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE)); | ||
| 156 | vmwrite(HOST_TR_BASE, | ||
| 157 | get_desc64_base((struct desc64 *)(get_gdt_base() + get_tr()))); | ||
| 158 | vmwrite(HOST_GDTR_BASE, get_gdt_base()); | ||
| 159 | vmwrite(HOST_IDTR_BASE, get_idt_base()); | ||
| 160 | vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP)); | ||
| 161 | vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP)); | ||
| 162 | } | ||
| 163 | |||
| 164 | /* | ||
| 165 | * Initialize the guest state fields essentially as a clone of | ||
| 166 | * the host state fields. Some host state fields have fixed | ||
| 167 | * values, and we set the corresponding guest state fields accordingly. | ||
| 168 | */ | ||
| 169 | static inline void init_vmcs_guest_state(void *rip, void *rsp) | ||
| 170 | { | ||
| 171 | vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR)); | ||
| 172 | vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR)); | ||
| 173 | vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR)); | ||
| 174 | vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR)); | ||
| 175 | vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR)); | ||
| 176 | vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR)); | ||
| 177 | vmwrite(GUEST_LDTR_SELECTOR, 0); | ||
| 178 | vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR)); | ||
| 179 | vmwrite(GUEST_INTR_STATUS, 0); | ||
| 180 | vmwrite(GUEST_PML_INDEX, 0); | ||
| 181 | |||
| 182 | vmwrite(VMCS_LINK_POINTER, -1ll); | ||
| 183 | vmwrite(GUEST_IA32_DEBUGCTL, 0); | ||
| 184 | vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT)); | ||
| 185 | vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER)); | ||
| 186 | vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL, | ||
| 187 | vmreadz(HOST_IA32_PERF_GLOBAL_CTRL)); | ||
| 188 | |||
| 189 | vmwrite(GUEST_ES_LIMIT, -1); | ||
| 190 | vmwrite(GUEST_CS_LIMIT, -1); | ||
| 191 | vmwrite(GUEST_SS_LIMIT, -1); | ||
| 192 | vmwrite(GUEST_DS_LIMIT, -1); | ||
| 193 | vmwrite(GUEST_FS_LIMIT, -1); | ||
| 194 | vmwrite(GUEST_GS_LIMIT, -1); | ||
| 195 | vmwrite(GUEST_LDTR_LIMIT, -1); | ||
| 196 | vmwrite(GUEST_TR_LIMIT, 0x67); | ||
| 197 | vmwrite(GUEST_GDTR_LIMIT, 0xffff); | ||
| 198 | vmwrite(GUEST_IDTR_LIMIT, 0xffff); | ||
| 199 | vmwrite(GUEST_ES_AR_BYTES, | ||
| 200 | vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093); | ||
| 201 | vmwrite(GUEST_CS_AR_BYTES, 0xa09b); | ||
| 202 | vmwrite(GUEST_SS_AR_BYTES, 0xc093); | ||
| 203 | vmwrite(GUEST_DS_AR_BYTES, | ||
| 204 | vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093); | ||
| 205 | vmwrite(GUEST_FS_AR_BYTES, | ||
| 206 | vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093); | ||
| 207 | vmwrite(GUEST_GS_AR_BYTES, | ||
| 208 | vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093); | ||
| 209 | vmwrite(GUEST_LDTR_AR_BYTES, 0x10000); | ||
| 210 | vmwrite(GUEST_TR_AR_BYTES, 0x8b); | ||
| 211 | vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0); | ||
| 212 | vmwrite(GUEST_ACTIVITY_STATE, 0); | ||
| 213 | vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS)); | ||
| 214 | vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0); | ||
| 215 | |||
| 216 | vmwrite(GUEST_CR0, vmreadz(HOST_CR0)); | ||
| 217 | vmwrite(GUEST_CR3, vmreadz(HOST_CR3)); | ||
| 218 | vmwrite(GUEST_CR4, vmreadz(HOST_CR4)); | ||
| 219 | vmwrite(GUEST_ES_BASE, 0); | ||
| 220 | vmwrite(GUEST_CS_BASE, 0); | ||
| 221 | vmwrite(GUEST_SS_BASE, 0); | ||
| 222 | vmwrite(GUEST_DS_BASE, 0); | ||
| 223 | vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE)); | ||
| 224 | vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE)); | ||
| 225 | vmwrite(GUEST_LDTR_BASE, 0); | ||
| 226 | vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE)); | ||
| 227 | vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE)); | ||
| 228 | vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE)); | ||
| 229 | vmwrite(GUEST_DR7, 0x400); | ||
| 230 | vmwrite(GUEST_RSP, (uint64_t)rsp); | ||
| 231 | vmwrite(GUEST_RIP, (uint64_t)rip); | ||
| 232 | vmwrite(GUEST_RFLAGS, 2); | ||
| 233 | vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0); | ||
| 234 | vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP)); | ||
| 235 | vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP)); | ||
| 236 | } | ||
| 237 | |||
| 238 | void prepare_vmcs(void *guest_rip, void *guest_rsp) | ||
| 239 | { | ||
| 240 | init_vmcs_control_fields(); | ||
| 241 | init_vmcs_host_state(); | ||
| 242 | init_vmcs_guest_state(guest_rip, guest_rsp); | ||
| 243 | } | ||
diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c new file mode 100644 index 000000000000..8f7f62093add --- /dev/null +++ b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c | |||
| @@ -0,0 +1,231 @@ | |||
| 1 | /* | ||
| 2 | * gtests/tests/vmx_tsc_adjust_test.c | ||
| 3 | * | ||
| 4 | * Copyright (C) 2018, Google LLC. | ||
| 5 | * | ||
| 6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
| 7 | * | ||
| 8 | * | ||
| 9 | * IA32_TSC_ADJUST test | ||
| 10 | * | ||
| 11 | * According to the SDM, "if an execution of WRMSR to the | ||
| 12 | * IA32_TIME_STAMP_COUNTER MSR adds (or subtracts) value X from the TSC, | ||
| 13 | * the logical processor also adds (or subtracts) value X from the | ||
| 14 | * IA32_TSC_ADJUST MSR. | ||
| 15 | * | ||
| 16 | * Note that when L1 doesn't intercept writes to IA32_TSC, a | ||
| 17 | * WRMSR(IA32_TSC) from L2 sets L1's TSC value, not L2's perceived TSC | ||
| 18 | * value. | ||
| 19 | * | ||
| 20 | * This test verifies that this unusual case is handled correctly. | ||
| 21 | */ | ||
| 22 | |||
| 23 | #include "test_util.h" | ||
| 24 | #include "kvm_util.h" | ||
| 25 | #include "x86.h" | ||
| 26 | #include "vmx.h" | ||
| 27 | |||
| 28 | #include <string.h> | ||
| 29 | #include <sys/ioctl.h> | ||
| 30 | |||
| 31 | #ifndef MSR_IA32_TSC_ADJUST | ||
| 32 | #define MSR_IA32_TSC_ADJUST 0x3b | ||
| 33 | #endif | ||
| 34 | |||
| 35 | #define PAGE_SIZE 4096 | ||
| 36 | #define VCPU_ID 5 | ||
| 37 | |||
| 38 | #define TSC_ADJUST_VALUE (1ll << 32) | ||
| 39 | #define TSC_OFFSET_VALUE -(1ll << 48) | ||
| 40 | |||
| 41 | enum { | ||
| 42 | PORT_ABORT = 0x1000, | ||
| 43 | PORT_REPORT, | ||
| 44 | PORT_DONE, | ||
| 45 | }; | ||
| 46 | |||
| 47 | struct vmx_page { | ||
| 48 | vm_vaddr_t virt; | ||
| 49 | vm_paddr_t phys; | ||
| 50 | }; | ||
| 51 | |||
| 52 | enum { | ||
| 53 | VMXON_PAGE = 0, | ||
| 54 | VMCS_PAGE, | ||
| 55 | MSR_BITMAP_PAGE, | ||
| 56 | |||
| 57 | NUM_VMX_PAGES, | ||
| 58 | }; | ||
| 59 | |||
| 60 | struct kvm_single_msr { | ||
| 61 | struct kvm_msrs header; | ||
| 62 | struct kvm_msr_entry entry; | ||
| 63 | } __attribute__((packed)); | ||
| 64 | |||
| 65 | /* The virtual machine object. */ | ||
| 66 | static struct kvm_vm *vm; | ||
| 67 | |||
| 68 | /* Array of vmx_page descriptors that is shared with the guest. */ | ||
| 69 | struct vmx_page *vmx_pages; | ||
| 70 | |||
| 71 | #define exit_to_l0(_port, _arg) do_exit_to_l0(_port, (unsigned long) (_arg)) | ||
| 72 | static void do_exit_to_l0(uint16_t port, unsigned long arg) | ||
| 73 | { | ||
| 74 | __asm__ __volatile__("in %[port], %%al" | ||
| 75 | : | ||
| 76 | : [port]"d"(port), "D"(arg) | ||
| 77 | : "rax"); | ||
| 78 | } | ||
| 79 | |||
| 80 | |||
| 81 | #define GUEST_ASSERT(_condition) do { \ | ||
| 82 | if (!(_condition)) \ | ||
| 83 | exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition); \ | ||
| 84 | } while (0) | ||
| 85 | |||
| 86 | static void check_ia32_tsc_adjust(int64_t max) | ||
| 87 | { | ||
| 88 | int64_t adjust; | ||
| 89 | |||
| 90 | adjust = rdmsr(MSR_IA32_TSC_ADJUST); | ||
| 91 | exit_to_l0(PORT_REPORT, adjust); | ||
| 92 | GUEST_ASSERT(adjust <= max); | ||
| 93 | } | ||
| 94 | |||
| 95 | static void l2_guest_code(void) | ||
| 96 | { | ||
| 97 | uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE; | ||
| 98 | |||
| 99 | wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE); | ||
| 100 | check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); | ||
| 101 | |||
| 102 | /* Exit to L1 */ | ||
| 103 | __asm__ __volatile__("vmcall"); | ||
| 104 | } | ||
| 105 | |||
| 106 | static void l1_guest_code(struct vmx_page *vmx_pages) | ||
| 107 | { | ||
| 108 | #define L2_GUEST_STACK_SIZE 64 | ||
| 109 | unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; | ||
| 110 | uint32_t control; | ||
| 111 | uintptr_t save_cr3; | ||
| 112 | |||
| 113 | GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE); | ||
| 114 | wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE); | ||
| 115 | check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); | ||
| 116 | |||
| 117 | prepare_for_vmx_operation(); | ||
| 118 | |||
| 119 | /* Enter VMX root operation. */ | ||
| 120 | *(uint32_t *)vmx_pages[VMXON_PAGE].virt = vmcs_revision(); | ||
| 121 | GUEST_ASSERT(!vmxon(vmx_pages[VMXON_PAGE].phys)); | ||
| 122 | |||
| 123 | /* Load a VMCS. */ | ||
| 124 | *(uint32_t *)vmx_pages[VMCS_PAGE].virt = vmcs_revision(); | ||
| 125 | GUEST_ASSERT(!vmclear(vmx_pages[VMCS_PAGE].phys)); | ||
| 126 | GUEST_ASSERT(!vmptrld(vmx_pages[VMCS_PAGE].phys)); | ||
| 127 | |||
| 128 | /* Prepare the VMCS for L2 execution. */ | ||
| 129 | prepare_vmcs(l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); | ||
| 130 | control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); | ||
| 131 | control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETING; | ||
| 132 | vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); | ||
| 133 | vmwrite(MSR_BITMAP, vmx_pages[MSR_BITMAP_PAGE].phys); | ||
| 134 | vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE); | ||
| 135 | |||
| 136 | /* Jump into L2. First, test failure to load guest CR3. */ | ||
| 137 | save_cr3 = vmreadz(GUEST_CR3); | ||
| 138 | vmwrite(GUEST_CR3, -1ull); | ||
| 139 | GUEST_ASSERT(!vmlaunch()); | ||
| 140 | GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == | ||
| 141 | (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE)); | ||
| 142 | check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); | ||
| 143 | vmwrite(GUEST_CR3, save_cr3); | ||
| 144 | |||
| 145 | GUEST_ASSERT(!vmlaunch()); | ||
| 146 | GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); | ||
| 147 | |||
| 148 | check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); | ||
| 149 | |||
| 150 | exit_to_l0(PORT_DONE, 0); | ||
| 151 | } | ||
| 152 | |||
| 153 | static void allocate_vmx_page(struct vmx_page *page) | ||
| 154 | { | ||
| 155 | vm_vaddr_t virt; | ||
| 156 | |||
| 157 | virt = vm_vaddr_alloc(vm, PAGE_SIZE, 0, 0, 0); | ||
| 158 | memset(addr_gva2hva(vm, virt), 0, PAGE_SIZE); | ||
| 159 | |||
| 160 | page->virt = virt; | ||
| 161 | page->phys = addr_gva2gpa(vm, virt); | ||
| 162 | } | ||
| 163 | |||
| 164 | static vm_vaddr_t allocate_vmx_pages(void) | ||
| 165 | { | ||
| 166 | vm_vaddr_t vmx_pages_vaddr; | ||
| 167 | int i; | ||
| 168 | |||
| 169 | vmx_pages_vaddr = vm_vaddr_alloc( | ||
| 170 | vm, sizeof(struct vmx_page) * NUM_VMX_PAGES, 0, 0, 0); | ||
| 171 | |||
| 172 | vmx_pages = (void *) addr_gva2hva(vm, vmx_pages_vaddr); | ||
| 173 | |||
| 174 | for (i = 0; i < NUM_VMX_PAGES; i++) | ||
| 175 | allocate_vmx_page(&vmx_pages[i]); | ||
| 176 | |||
| 177 | return vmx_pages_vaddr; | ||
| 178 | } | ||
| 179 | |||
| 180 | void report(int64_t val) | ||
| 181 | { | ||
| 182 | printf("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n", | ||
| 183 | val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE); | ||
| 184 | } | ||
| 185 | |||
| 186 | int main(int argc, char *argv[]) | ||
| 187 | { | ||
| 188 | vm_vaddr_t vmx_pages_vaddr; | ||
| 189 | struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); | ||
| 190 | |||
| 191 | if (!(entry->ecx & CPUID_VMX)) { | ||
| 192 | printf("nested VMX not enabled, skipping test"); | ||
| 193 | return 0; | ||
| 194 | } | ||
| 195 | |||
| 196 | vm = vm_create_default_vmx(VCPU_ID, (void *) l1_guest_code); | ||
| 197 | |||
| 198 | /* Allocate VMX pages and shared descriptors (vmx_pages). */ | ||
| 199 | vmx_pages_vaddr = allocate_vmx_pages(); | ||
| 200 | vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_vaddr); | ||
| 201 | |||
| 202 | for (;;) { | ||
| 203 | volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); | ||
| 204 | struct kvm_regs regs; | ||
| 205 | |||
| 206 | vcpu_run(vm, VCPU_ID); | ||
| 207 | TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, | ||
| 208 | "Got exit_reason other than KVM_EXIT_IO: %u (%s),\n", | ||
| 209 | run->exit_reason, | ||
| 210 | exit_reason_str(run->exit_reason)); | ||
| 211 | |||
| 212 | vcpu_regs_get(vm, VCPU_ID, ®s); | ||
| 213 | |||
| 214 | switch (run->io.port) { | ||
| 215 | case PORT_ABORT: | ||
| 216 | TEST_ASSERT(false, "%s", (const char *) regs.rdi); | ||
| 217 | /* NOT REACHED */ | ||
| 218 | case PORT_REPORT: | ||
| 219 | report(regs.rdi); | ||
| 220 | break; | ||
| 221 | case PORT_DONE: | ||
| 222 | goto done; | ||
| 223 | default: | ||
| 224 | TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port); | ||
| 225 | } | ||
| 226 | } | ||
| 227 | |||
| 228 | kvm_vm_free(vm); | ||
| 229 | done: | ||
| 230 | return 0; | ||
| 231 | } | ||
