diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-14 19:21:28 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-14 19:21:28 -0400 |
commit | 55101e2d6ce1c780f6ee8fee5f37306971aac6cd (patch) | |
tree | 348adcfd97517ee9b5041f31df15cdd7fedb8ea7 /arch | |
parent | dafe344d2288f0ebc0e3d4c6a5eb15bc82189c53 (diff) | |
parent | b351c39cc9e0151cee9b8d52a1e714928faabb38 (diff) |
Merge git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Marcelo Tosatti:
- Fix for guest triggerable BUG_ON (CVE-2014-0155)
- CR4.SMAP support
- Spurious WARN_ON() fix
* git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: x86: remove WARN_ON from get_kernel_ns()
KVM: Rename variable smep to cr4_smep
KVM: expose SMAP feature to guest
KVM: Disable SMAP for guests in EPT realmode and EPT unpaging mode
KVM: Add SMAP support when setting CR4
KVM: Remove SMAP bit from CR4_RESERVED_BITS
KVM: ioapic: try to recover if pending_eoi goes out of range
KVM: ioapic: fix assignment of ioapic->rtc_status.pending_eoi (CVE-2014-0155)
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.h | 8 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 38 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.h | 44 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 11 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 10 |
8 files changed, 94 insertions, 23 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index fcaf9c961265..7de069afb382 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -60,7 +60,7 @@ | |||
60 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | 60 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ |
61 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ | 61 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ |
62 | | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ | 62 | | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ |
63 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) | 63 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP)) |
64 | 64 | ||
65 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) | 65 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) |
66 | 66 | ||
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index bea60671ef8a..f47a104a749c 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -308,7 +308,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
308 | const u32 kvm_supported_word9_x86_features = | 308 | const u32 kvm_supported_word9_x86_features = |
309 | F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | | 309 | F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | |
310 | F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) | | 310 | F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) | |
311 | F(ADX); | 311 | F(ADX) | F(SMAP); |
312 | 312 | ||
313 | /* all calls to cpuid_count() should be made on the same cpu */ | 313 | /* all calls to cpuid_count() should be made on the same cpu */ |
314 | get_cpu(); | 314 | get_cpu(); |
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index a2a1bb7ed8c1..eeecbed26ac7 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h | |||
@@ -48,6 +48,14 @@ static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu) | |||
48 | return best && (best->ebx & bit(X86_FEATURE_SMEP)); | 48 | return best && (best->ebx & bit(X86_FEATURE_SMEP)); |
49 | } | 49 | } |
50 | 50 | ||
51 | static inline bool guest_cpuid_has_smap(struct kvm_vcpu *vcpu) | ||
52 | { | ||
53 | struct kvm_cpuid_entry2 *best; | ||
54 | |||
55 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
56 | return best && (best->ebx & bit(X86_FEATURE_SMAP)); | ||
57 | } | ||
58 | |||
51 | static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu) | 59 | static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu) |
52 | { | 60 | { |
53 | struct kvm_cpuid_entry2 *best; | 61 | struct kvm_cpuid_entry2 *best; |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f5704d9e5ddc..813d31038b93 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -3601,20 +3601,27 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, | |||
3601 | } | 3601 | } |
3602 | } | 3602 | } |
3603 | 3603 | ||
3604 | static void update_permission_bitmask(struct kvm_vcpu *vcpu, | 3604 | void update_permission_bitmask(struct kvm_vcpu *vcpu, |
3605 | struct kvm_mmu *mmu, bool ept) | 3605 | struct kvm_mmu *mmu, bool ept) |
3606 | { | 3606 | { |
3607 | unsigned bit, byte, pfec; | 3607 | unsigned bit, byte, pfec; |
3608 | u8 map; | 3608 | u8 map; |
3609 | bool fault, x, w, u, wf, uf, ff, smep; | 3609 | bool fault, x, w, u, wf, uf, ff, smapf, cr4_smap, cr4_smep, smap = 0; |
3610 | 3610 | ||
3611 | smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); | 3611 | cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); |
3612 | cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); | ||
3612 | for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) { | 3613 | for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) { |
3613 | pfec = byte << 1; | 3614 | pfec = byte << 1; |
3614 | map = 0; | 3615 | map = 0; |
3615 | wf = pfec & PFERR_WRITE_MASK; | 3616 | wf = pfec & PFERR_WRITE_MASK; |
3616 | uf = pfec & PFERR_USER_MASK; | 3617 | uf = pfec & PFERR_USER_MASK; |
3617 | ff = pfec & PFERR_FETCH_MASK; | 3618 | ff = pfec & PFERR_FETCH_MASK; |
3619 | /* | ||
3620 | * PFERR_RSVD_MASK bit is set in PFEC if the access is not | ||
3621 | * subject to SMAP restrictions, and cleared otherwise. The | ||
3622 | * bit is only meaningful if the SMAP bit is set in CR4. | ||
3623 | */ | ||
3624 | smapf = !(pfec & PFERR_RSVD_MASK); | ||
3618 | for (bit = 0; bit < 8; ++bit) { | 3625 | for (bit = 0; bit < 8; ++bit) { |
3619 | x = bit & ACC_EXEC_MASK; | 3626 | x = bit & ACC_EXEC_MASK; |
3620 | w = bit & ACC_WRITE_MASK; | 3627 | w = bit & ACC_WRITE_MASK; |
@@ -3626,12 +3633,33 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu, | |||
3626 | /* Allow supervisor writes if !cr0.wp */ | 3633 | /* Allow supervisor writes if !cr0.wp */ |
3627 | w |= !is_write_protection(vcpu) && !uf; | 3634 | w |= !is_write_protection(vcpu) && !uf; |
3628 | /* Disallow supervisor fetches of user code if cr4.smep */ | 3635 | /* Disallow supervisor fetches of user code if cr4.smep */ |
3629 | x &= !(smep && u && !uf); | 3636 | x &= !(cr4_smep && u && !uf); |
3637 | |||
3638 | /* | ||
3639 | * SMAP:kernel-mode data accesses from user-mode | ||
3640 | * mappings should fault. A fault is considered | ||
3641 | * as a SMAP violation if all of the following | ||
3642 | * conditions are ture: | ||
3643 | * - X86_CR4_SMAP is set in CR4 | ||
3644 | * - An user page is accessed | ||
3645 | * - Page fault in kernel mode | ||
3646 | * - if CPL = 3 or X86_EFLAGS_AC is clear | ||
3647 | * | ||
3648 | * Here, we cover the first three conditions. | ||
3649 | * The fourth is computed dynamically in | ||
3650 | * permission_fault() and is in smapf. | ||
3651 | * | ||
3652 | * Also, SMAP does not affect instruction | ||
3653 | * fetches, add the !ff check here to make it | ||
3654 | * clearer. | ||
3655 | */ | ||
3656 | smap = cr4_smap && u && !uf && !ff; | ||
3630 | } else | 3657 | } else |
3631 | /* Not really needed: no U/S accesses on ept */ | 3658 | /* Not really needed: no U/S accesses on ept */ |
3632 | u = 1; | 3659 | u = 1; |
3633 | 3660 | ||
3634 | fault = (ff && !x) || (uf && !u) || (wf && !w); | 3661 | fault = (ff && !x) || (uf && !u) || (wf && !w) || |
3662 | (smapf && smap); | ||
3635 | map |= fault << bit; | 3663 | map |= fault << bit; |
3636 | } | 3664 | } |
3637 | mmu->permissions[byte] = map; | 3665 | mmu->permissions[byte] = map; |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 292615274358..3842e70bdb7c 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -44,11 +44,17 @@ | |||
44 | #define PT_DIRECTORY_LEVEL 2 | 44 | #define PT_DIRECTORY_LEVEL 2 |
45 | #define PT_PAGE_TABLE_LEVEL 1 | 45 | #define PT_PAGE_TABLE_LEVEL 1 |
46 | 46 | ||
47 | #define PFERR_PRESENT_MASK (1U << 0) | 47 | #define PFERR_PRESENT_BIT 0 |
48 | #define PFERR_WRITE_MASK (1U << 1) | 48 | #define PFERR_WRITE_BIT 1 |
49 | #define PFERR_USER_MASK (1U << 2) | 49 | #define PFERR_USER_BIT 2 |
50 | #define PFERR_RSVD_MASK (1U << 3) | 50 | #define PFERR_RSVD_BIT 3 |
51 | #define PFERR_FETCH_MASK (1U << 4) | 51 | #define PFERR_FETCH_BIT 4 |
52 | |||
53 | #define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT) | ||
54 | #define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT) | ||
55 | #define PFERR_USER_MASK (1U << PFERR_USER_BIT) | ||
56 | #define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT) | ||
57 | #define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT) | ||
52 | 58 | ||
53 | int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); | 59 | int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); |
54 | void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask); | 60 | void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask); |
@@ -73,6 +79,8 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); | |||
73 | void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); | 79 | void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); |
74 | void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, | 80 | void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, |
75 | bool execonly); | 81 | bool execonly); |
82 | void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | ||
83 | bool ept); | ||
76 | 84 | ||
77 | static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) | 85 | static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) |
78 | { | 86 | { |
@@ -110,10 +118,30 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu) | |||
110 | * Will a fault with a given page-fault error code (pfec) cause a permission | 118 | * Will a fault with a given page-fault error code (pfec) cause a permission |
111 | * fault with the given access (in ACC_* format)? | 119 | * fault with the given access (in ACC_* format)? |
112 | */ | 120 | */ |
113 | static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access, | 121 | static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, |
114 | unsigned pfec) | 122 | unsigned pte_access, unsigned pfec) |
115 | { | 123 | { |
116 | return (mmu->permissions[pfec >> 1] >> pte_access) & 1; | 124 | int cpl = kvm_x86_ops->get_cpl(vcpu); |
125 | unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); | ||
126 | |||
127 | /* | ||
128 | * If CPL < 3, SMAP prevention are disabled if EFLAGS.AC = 1. | ||
129 | * | ||
130 | * If CPL = 3, SMAP applies to all supervisor-mode data accesses | ||
131 | * (these are implicit supervisor accesses) regardless of the value | ||
132 | * of EFLAGS.AC. | ||
133 | * | ||
134 | * This computes (cpl < 3) && (rflags & X86_EFLAGS_AC), leaving | ||
135 | * the result in X86_EFLAGS_AC. We then insert it in place of | ||
136 | * the PFERR_RSVD_MASK bit; this bit will always be zero in pfec, | ||
137 | * but it will be one in index if SMAP checks are being overridden. | ||
138 | * It is important to keep this branchless. | ||
139 | */ | ||
140 | unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC); | ||
141 | int index = (pfec >> 1) + | ||
142 | (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1)); | ||
143 | |||
144 | return (mmu->permissions[index] >> pte_access) & 1; | ||
117 | } | 145 | } |
118 | 146 | ||
119 | void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm); | 147 | void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm); |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index b1e6c1bf68d3..123efd3ec29f 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -353,7 +353,7 @@ retry_walk: | |||
353 | walker->ptes[walker->level - 1] = pte; | 353 | walker->ptes[walker->level - 1] = pte; |
354 | } while (!is_last_gpte(mmu, walker->level, pte)); | 354 | } while (!is_last_gpte(mmu, walker->level, pte)); |
355 | 355 | ||
356 | if (unlikely(permission_fault(mmu, pte_access, access))) { | 356 | if (unlikely(permission_fault(vcpu, mmu, pte_access, access))) { |
357 | errcode |= PFERR_PRESENT_MASK; | 357 | errcode |= PFERR_PRESENT_MASK; |
358 | goto error; | 358 | goto error; |
359 | } | 359 | } |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1320e0f8e611..1f68c5831924 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -3484,13 +3484,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
3484 | hw_cr4 &= ~X86_CR4_PAE; | 3484 | hw_cr4 &= ~X86_CR4_PAE; |
3485 | hw_cr4 |= X86_CR4_PSE; | 3485 | hw_cr4 |= X86_CR4_PSE; |
3486 | /* | 3486 | /* |
3487 | * SMEP is disabled if CPU is in non-paging mode in | 3487 | * SMEP/SMAP is disabled if CPU is in non-paging mode |
3488 | * hardware. However KVM always uses paging mode to | 3488 | * in hardware. However KVM always uses paging mode to |
3489 | * emulate guest non-paging mode with TDP. | 3489 | * emulate guest non-paging mode with TDP. |
3490 | * To emulate this behavior, SMEP needs to be manually | 3490 | * To emulate this behavior, SMEP/SMAP needs to be |
3491 | * disabled when guest switches to non-paging mode. | 3491 | * manually disabled when guest switches to non-paging |
3492 | * mode. | ||
3492 | */ | 3493 | */ |
3493 | hw_cr4 &= ~X86_CR4_SMEP; | 3494 | hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP); |
3494 | } else if (!(cr4 & X86_CR4_PAE)) { | 3495 | } else if (!(cr4 & X86_CR4_PAE)) { |
3495 | hw_cr4 &= ~X86_CR4_PAE; | 3496 | hw_cr4 &= ~X86_CR4_PAE; |
3496 | } | 3497 | } |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9d1b5cd4d34c..8b8fc0b792ba 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -652,6 +652,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
652 | if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP)) | 652 | if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP)) |
653 | return 1; | 653 | return 1; |
654 | 654 | ||
655 | if (!guest_cpuid_has_smap(vcpu) && (cr4 & X86_CR4_SMAP)) | ||
656 | return 1; | ||
657 | |||
655 | if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE)) | 658 | if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE)) |
656 | return 1; | 659 | return 1; |
657 | 660 | ||
@@ -680,6 +683,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
680 | (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) | 683 | (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) |
681 | kvm_mmu_reset_context(vcpu); | 684 | kvm_mmu_reset_context(vcpu); |
682 | 685 | ||
686 | if ((cr4 ^ old_cr4) & X86_CR4_SMAP) | ||
687 | update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false); | ||
688 | |||
683 | if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) | 689 | if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) |
684 | kvm_update_cpuid(vcpu); | 690 | kvm_update_cpuid(vcpu); |
685 | 691 | ||
@@ -1117,7 +1123,6 @@ static inline u64 get_kernel_ns(void) | |||
1117 | { | 1123 | { |
1118 | struct timespec ts; | 1124 | struct timespec ts; |
1119 | 1125 | ||
1120 | WARN_ON(preemptible()); | ||
1121 | ktime_get_ts(&ts); | 1126 | ktime_get_ts(&ts); |
1122 | monotonic_to_bootbased(&ts); | 1127 | monotonic_to_bootbased(&ts); |
1123 | return timespec_to_ns(&ts); | 1128 | return timespec_to_ns(&ts); |
@@ -4164,7 +4169,8 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, | |||
4164 | | (write ? PFERR_WRITE_MASK : 0); | 4169 | | (write ? PFERR_WRITE_MASK : 0); |
4165 | 4170 | ||
4166 | if (vcpu_match_mmio_gva(vcpu, gva) | 4171 | if (vcpu_match_mmio_gva(vcpu, gva) |
4167 | && !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) { | 4172 | && !permission_fault(vcpu, vcpu->arch.walk_mmu, |
4173 | vcpu->arch.access, access)) { | ||
4168 | *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT | | 4174 | *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT | |
4169 | (gva & (PAGE_SIZE - 1)); | 4175 | (gva & (PAGE_SIZE - 1)); |
4170 | trace_vcpu_match_mmio(gva, *gpa, write, false); | 4176 | trace_vcpu_match_mmio(gva, *gpa, write, false); |