aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-04-14 19:21:28 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-14 19:21:28 -0400
commit55101e2d6ce1c780f6ee8fee5f37306971aac6cd (patch)
tree348adcfd97517ee9b5041f31df15cdd7fedb8ea7 /arch
parentdafe344d2288f0ebc0e3d4c6a5eb15bc82189c53 (diff)
parentb351c39cc9e0151cee9b8d52a1e714928faabb38 (diff)
Merge git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Marcelo Tosatti: - Fix for guest triggerable BUG_ON (CVE-2014-0155) - CR4.SMAP support - Spurious WARN_ON() fix * git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86: remove WARN_ON from get_kernel_ns() KVM: Rename variable smep to cr4_smep KVM: expose SMAP feature to guest KVM: Disable SMAP for guests in EPT realmode and EPT unpaging mode KVM: Add SMAP support when setting CR4 KVM: Remove SMAP bit from CR4_RESERVED_BITS KVM: ioapic: try to recover if pending_eoi goes out of range KVM: ioapic: fix assignment of ioapic->rtc_status.pending_eoi (CVE-2014-0155)
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/mmu.c38
-rw-r--r--arch/x86/kvm/mmu.h44
-rw-r--r--arch/x86/kvm/paging_tmpl.h2
-rw-r--r--arch/x86/kvm/vmx.c11
-rw-r--r--arch/x86/kvm/x86.c10
8 files changed, 94 insertions, 23 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fcaf9c961265..7de069afb382 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -60,7 +60,7 @@
60 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ 60 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
61 | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ 61 | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
62 | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ 62 | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
63 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) 63 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP))
64 64
65#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) 65#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
66 66
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index bea60671ef8a..f47a104a749c 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -308,7 +308,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
308 const u32 kvm_supported_word9_x86_features = 308 const u32 kvm_supported_word9_x86_features =
309 F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | 309 F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
310 F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) | 310 F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
311 F(ADX); 311 F(ADX) | F(SMAP);
312 312
313 /* all calls to cpuid_count() should be made on the same cpu */ 313 /* all calls to cpuid_count() should be made on the same cpu */
314 get_cpu(); 314 get_cpu();
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index a2a1bb7ed8c1..eeecbed26ac7 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -48,6 +48,14 @@ static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu)
48 return best && (best->ebx & bit(X86_FEATURE_SMEP)); 48 return best && (best->ebx & bit(X86_FEATURE_SMEP));
49} 49}
50 50
51static inline bool guest_cpuid_has_smap(struct kvm_vcpu *vcpu)
52{
53 struct kvm_cpuid_entry2 *best;
54
55 best = kvm_find_cpuid_entry(vcpu, 7, 0);
56 return best && (best->ebx & bit(X86_FEATURE_SMAP));
57}
58
51static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu) 59static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
52{ 60{
53 struct kvm_cpuid_entry2 *best; 61 struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f5704d9e5ddc..813d31038b93 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3601,20 +3601,27 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
3601 } 3601 }
3602} 3602}
3603 3603
3604static void update_permission_bitmask(struct kvm_vcpu *vcpu, 3604void update_permission_bitmask(struct kvm_vcpu *vcpu,
3605 struct kvm_mmu *mmu, bool ept) 3605 struct kvm_mmu *mmu, bool ept)
3606{ 3606{
3607 unsigned bit, byte, pfec; 3607 unsigned bit, byte, pfec;
3608 u8 map; 3608 u8 map;
3609 bool fault, x, w, u, wf, uf, ff, smep; 3609 bool fault, x, w, u, wf, uf, ff, smapf, cr4_smap, cr4_smep, smap = 0;
3610 3610
3611 smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); 3611 cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
3612 cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
3612 for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) { 3613 for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
3613 pfec = byte << 1; 3614 pfec = byte << 1;
3614 map = 0; 3615 map = 0;
3615 wf = pfec & PFERR_WRITE_MASK; 3616 wf = pfec & PFERR_WRITE_MASK;
3616 uf = pfec & PFERR_USER_MASK; 3617 uf = pfec & PFERR_USER_MASK;
3617 ff = pfec & PFERR_FETCH_MASK; 3618 ff = pfec & PFERR_FETCH_MASK;
3619 /*
3620 * PFERR_RSVD_MASK bit is set in PFEC if the access is not
3621 * subject to SMAP restrictions, and cleared otherwise. The
3622 * bit is only meaningful if the SMAP bit is set in CR4.
3623 */
3624 smapf = !(pfec & PFERR_RSVD_MASK);
3618 for (bit = 0; bit < 8; ++bit) { 3625 for (bit = 0; bit < 8; ++bit) {
3619 x = bit & ACC_EXEC_MASK; 3626 x = bit & ACC_EXEC_MASK;
3620 w = bit & ACC_WRITE_MASK; 3627 w = bit & ACC_WRITE_MASK;
@@ -3626,12 +3633,33 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
3626 /* Allow supervisor writes if !cr0.wp */ 3633 /* Allow supervisor writes if !cr0.wp */
3627 w |= !is_write_protection(vcpu) && !uf; 3634 w |= !is_write_protection(vcpu) && !uf;
3628 /* Disallow supervisor fetches of user code if cr4.smep */ 3635 /* Disallow supervisor fetches of user code if cr4.smep */
3629 x &= !(smep && u && !uf); 3636 x &= !(cr4_smep && u && !uf);
3637
3638 /*
3639 * SMAP:kernel-mode data accesses from user-mode
3640 * mappings should fault. A fault is considered
3641 * as a SMAP violation if all of the following
3642 * conditions are ture:
3643 * - X86_CR4_SMAP is set in CR4
3644 * - An user page is accessed
3645 * - Page fault in kernel mode
3646 * - if CPL = 3 or X86_EFLAGS_AC is clear
3647 *
3648 * Here, we cover the first three conditions.
3649 * The fourth is computed dynamically in
3650 * permission_fault() and is in smapf.
3651 *
3652 * Also, SMAP does not affect instruction
3653 * fetches, add the !ff check here to make it
3654 * clearer.
3655 */
3656 smap = cr4_smap && u && !uf && !ff;
3630 } else 3657 } else
3631 /* Not really needed: no U/S accesses on ept */ 3658 /* Not really needed: no U/S accesses on ept */
3632 u = 1; 3659 u = 1;
3633 3660
3634 fault = (ff && !x) || (uf && !u) || (wf && !w); 3661 fault = (ff && !x) || (uf && !u) || (wf && !w) ||
3662 (smapf && smap);
3635 map |= fault << bit; 3663 map |= fault << bit;
3636 } 3664 }
3637 mmu->permissions[byte] = map; 3665 mmu->permissions[byte] = map;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 292615274358..3842e70bdb7c 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -44,11 +44,17 @@
44#define PT_DIRECTORY_LEVEL 2 44#define PT_DIRECTORY_LEVEL 2
45#define PT_PAGE_TABLE_LEVEL 1 45#define PT_PAGE_TABLE_LEVEL 1
46 46
47#define PFERR_PRESENT_MASK (1U << 0) 47#define PFERR_PRESENT_BIT 0
48#define PFERR_WRITE_MASK (1U << 1) 48#define PFERR_WRITE_BIT 1
49#define PFERR_USER_MASK (1U << 2) 49#define PFERR_USER_BIT 2
50#define PFERR_RSVD_MASK (1U << 3) 50#define PFERR_RSVD_BIT 3
51#define PFERR_FETCH_MASK (1U << 4) 51#define PFERR_FETCH_BIT 4
52
53#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
54#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
55#define PFERR_USER_MASK (1U << PFERR_USER_BIT)
56#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
57#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
52 58
53int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); 59int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
54void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask); 60void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
@@ -73,6 +79,8 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
73void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); 79void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
74void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, 80void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
75 bool execonly); 81 bool execonly);
82void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
83 bool ept);
76 84
77static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) 85static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
78{ 86{
@@ -110,10 +118,30 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu)
110 * Will a fault with a given page-fault error code (pfec) cause a permission 118 * Will a fault with a given page-fault error code (pfec) cause a permission
111 * fault with the given access (in ACC_* format)? 119 * fault with the given access (in ACC_* format)?
112 */ 120 */
113static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access, 121static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
114 unsigned pfec) 122 unsigned pte_access, unsigned pfec)
115{ 123{
116 return (mmu->permissions[pfec >> 1] >> pte_access) & 1; 124 int cpl = kvm_x86_ops->get_cpl(vcpu);
125 unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
126
127 /*
128 * If CPL < 3, SMAP prevention are disabled if EFLAGS.AC = 1.
129 *
130 * If CPL = 3, SMAP applies to all supervisor-mode data accesses
131 * (these are implicit supervisor accesses) regardless of the value
132 * of EFLAGS.AC.
133 *
134 * This computes (cpl < 3) && (rflags & X86_EFLAGS_AC), leaving
135 * the result in X86_EFLAGS_AC. We then insert it in place of
136 * the PFERR_RSVD_MASK bit; this bit will always be zero in pfec,
137 * but it will be one in index if SMAP checks are being overridden.
138 * It is important to keep this branchless.
139 */
140 unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC);
141 int index = (pfec >> 1) +
142 (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
143
144 return (mmu->permissions[index] >> pte_access) & 1;
117} 145}
118 146
119void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm); 147void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index b1e6c1bf68d3..123efd3ec29f 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -353,7 +353,7 @@ retry_walk:
353 walker->ptes[walker->level - 1] = pte; 353 walker->ptes[walker->level - 1] = pte;
354 } while (!is_last_gpte(mmu, walker->level, pte)); 354 } while (!is_last_gpte(mmu, walker->level, pte));
355 355
356 if (unlikely(permission_fault(mmu, pte_access, access))) { 356 if (unlikely(permission_fault(vcpu, mmu, pte_access, access))) {
357 errcode |= PFERR_PRESENT_MASK; 357 errcode |= PFERR_PRESENT_MASK;
358 goto error; 358 goto error;
359 } 359 }
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1320e0f8e611..1f68c5831924 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3484,13 +3484,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
3484 hw_cr4 &= ~X86_CR4_PAE; 3484 hw_cr4 &= ~X86_CR4_PAE;
3485 hw_cr4 |= X86_CR4_PSE; 3485 hw_cr4 |= X86_CR4_PSE;
3486 /* 3486 /*
3487 * SMEP is disabled if CPU is in non-paging mode in 3487 * SMEP/SMAP is disabled if CPU is in non-paging mode
3488 * hardware. However KVM always uses paging mode to 3488 * in hardware. However KVM always uses paging mode to
3489 * emulate guest non-paging mode with TDP. 3489 * emulate guest non-paging mode with TDP.
3490 * To emulate this behavior, SMEP needs to be manually 3490 * To emulate this behavior, SMEP/SMAP needs to be
3491 * disabled when guest switches to non-paging mode. 3491 * manually disabled when guest switches to non-paging
3492 * mode.
3492 */ 3493 */
3493 hw_cr4 &= ~X86_CR4_SMEP; 3494 hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP);
3494 } else if (!(cr4 & X86_CR4_PAE)) { 3495 } else if (!(cr4 & X86_CR4_PAE)) {
3495 hw_cr4 &= ~X86_CR4_PAE; 3496 hw_cr4 &= ~X86_CR4_PAE;
3496 } 3497 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9d1b5cd4d34c..8b8fc0b792ba 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -652,6 +652,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
652 if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP)) 652 if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP))
653 return 1; 653 return 1;
654 654
655 if (!guest_cpuid_has_smap(vcpu) && (cr4 & X86_CR4_SMAP))
656 return 1;
657
655 if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE)) 658 if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE))
656 return 1; 659 return 1;
657 660
@@ -680,6 +683,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
680 (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) 683 (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
681 kvm_mmu_reset_context(vcpu); 684 kvm_mmu_reset_context(vcpu);
682 685
686 if ((cr4 ^ old_cr4) & X86_CR4_SMAP)
687 update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false);
688
683 if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) 689 if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
684 kvm_update_cpuid(vcpu); 690 kvm_update_cpuid(vcpu);
685 691
@@ -1117,7 +1123,6 @@ static inline u64 get_kernel_ns(void)
1117{ 1123{
1118 struct timespec ts; 1124 struct timespec ts;
1119 1125
1120 WARN_ON(preemptible());
1121 ktime_get_ts(&ts); 1126 ktime_get_ts(&ts);
1122 monotonic_to_bootbased(&ts); 1127 monotonic_to_bootbased(&ts);
1123 return timespec_to_ns(&ts); 1128 return timespec_to_ns(&ts);
@@ -4164,7 +4169,8 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
4164 | (write ? PFERR_WRITE_MASK : 0); 4169 | (write ? PFERR_WRITE_MASK : 0);
4165 4170
4166 if (vcpu_match_mmio_gva(vcpu, gva) 4171 if (vcpu_match_mmio_gva(vcpu, gva)
4167 && !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) { 4172 && !permission_fault(vcpu, vcpu->arch.walk_mmu,
4173 vcpu->arch.access, access)) {
4168 *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT | 4174 *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
4169 (gva & (PAGE_SIZE - 1)); 4175 (gva & (PAGE_SIZE - 1));
4170 trace_vcpu_match_mmio(gva, *gpa, write, false); 4176 trace_vcpu_match_mmio(gva, *gpa, write, false);