aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHuang Ying <ying.huang@intel.com>2010-05-31 02:28:19 -0400
committerAvi Kivity <avi@redhat.com>2010-08-01 03:35:26 -0400
commitbf998156d24bcb127318ad5bf531ac3bdfcd6449 (patch)
tree616c19474d7cb626ff9eebc54f6753563a4322cd
parent540ad6b62b3a188a53b51cac81d8a60d40e29fbd (diff)
KVM: Avoid killing userspace through guest SRAO MCE on unmapped pages
In common cases, guest SRAO MCE will cause corresponding poisoned page be un-mapped and SIGBUS be sent to QEMU-KVM, then QEMU-KVM will relay the MCE to guest OS. But it is reported that if the poisoned page is accessed in guest after unmapping and before MCE is relayed to guest OS, userspace will be killed. The reason is as follows. Because poisoned page has been un-mapped, guest access will cause guest exit and kvm_mmu_page_fault will be called. kvm_mmu_page_fault can not get the poisoned page for fault address, so kernel and user space MMIO processing is tried in turn. In user MMIO processing, poisoned page is accessed again, then userspace is killed by force_sig_info. To fix the bug, kvm_mmu_page_fault send HWPOISON signal to QEMU-KVM and do not try kernel and user space MMIO processing for poisoned page. [xiao: fix warning introduced by avi] Reported-by: Max Asbock <masbock@linux.vnet.ibm.com> Signed-off-by: Huang Ying <ying.huang@intel.com> Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Avi Kivity <avi@redhat.com>
-rw-r--r--arch/x86/kvm/mmu.c34
-rw-r--r--arch/x86/kvm/paging_tmpl.h7
-rw-r--r--include/linux/kvm_host.h1
-rw-r--r--include/linux/mm.h8
-rw-r--r--mm/memory-failure.c30
-rw-r--r--virt/kvm/kvm_main.c30
6 files changed, 95 insertions, 15 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b1ed0a1a591..b666d8d106a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -32,6 +32,7 @@
32#include <linux/compiler.h> 32#include <linux/compiler.h>
33#include <linux/srcu.h> 33#include <linux/srcu.h>
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/uaccess.h>
35 36
36#include <asm/page.h> 37#include <asm/page.h>
37#include <asm/cmpxchg.h> 38#include <asm/cmpxchg.h>
@@ -1960,6 +1961,27 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
1960 return pt_write; 1961 return pt_write;
1961} 1962}
1962 1963
1964static void kvm_send_hwpoison_signal(struct kvm *kvm, gfn_t gfn)
1965{
1966 char buf[1];
1967 void __user *hva;
1968 int r;
1969
1970 /* Touch the page, so send SIGBUS */
1971 hva = (void __user *)gfn_to_hva(kvm, gfn);
1972 r = copy_from_user(buf, hva, 1);
1973}
1974
1975static int kvm_handle_bad_page(struct kvm *kvm, gfn_t gfn, pfn_t pfn)
1976{
1977 kvm_release_pfn_clean(pfn);
1978 if (is_hwpoison_pfn(pfn)) {
1979 kvm_send_hwpoison_signal(kvm, gfn);
1980 return 0;
1981 }
1982 return 1;
1983}
1984
1963static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) 1985static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1964{ 1986{
1965 int r; 1987 int r;
@@ -1983,10 +2005,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1983 pfn = gfn_to_pfn(vcpu->kvm, gfn); 2005 pfn = gfn_to_pfn(vcpu->kvm, gfn);
1984 2006
1985 /* mmio */ 2007 /* mmio */
1986 if (is_error_pfn(pfn)) { 2008 if (is_error_pfn(pfn))
1987 kvm_release_pfn_clean(pfn); 2009 return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);
1988 return 1;
1989 }
1990 2010
1991 spin_lock(&vcpu->kvm->mmu_lock); 2011 spin_lock(&vcpu->kvm->mmu_lock);
1992 if (mmu_notifier_retry(vcpu, mmu_seq)) 2012 if (mmu_notifier_retry(vcpu, mmu_seq))
@@ -2198,10 +2218,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
2198 mmu_seq = vcpu->kvm->mmu_notifier_seq; 2218 mmu_seq = vcpu->kvm->mmu_notifier_seq;
2199 smp_rmb(); 2219 smp_rmb();
2200 pfn = gfn_to_pfn(vcpu->kvm, gfn); 2220 pfn = gfn_to_pfn(vcpu->kvm, gfn);
2201 if (is_error_pfn(pfn)) { 2221 if (is_error_pfn(pfn))
2202 kvm_release_pfn_clean(pfn); 2222 return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);
2203 return 1;
2204 }
2205 spin_lock(&vcpu->kvm->mmu_lock); 2223 spin_lock(&vcpu->kvm->mmu_lock);
2206 if (mmu_notifier_retry(vcpu, mmu_seq)) 2224 if (mmu_notifier_retry(vcpu, mmu_seq))
2207 goto out_unlock; 2225 goto out_unlock;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 2331bdc2b54..c7f27779c99 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -431,11 +431,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
431 pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); 431 pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
432 432
433 /* mmio */ 433 /* mmio */
434 if (is_error_pfn(pfn)) { 434 if (is_error_pfn(pfn))
435 pgprintk("gfn %lx is mmio\n", walker.gfn); 435 return kvm_handle_bad_page(vcpu->kvm, walker.gfn, pfn);
436 kvm_release_pfn_clean(pfn);
437 return 1;
438 }
439 436
440 spin_lock(&vcpu->kvm->mmu_lock); 437 spin_lock(&vcpu->kvm->mmu_lock);
441 if (mmu_notifier_retry(vcpu, mmu_seq)) 438 if (mmu_notifier_retry(vcpu, mmu_seq))
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7cb116afa1c..a0e019769f5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -266,6 +266,7 @@ extern pfn_t bad_pfn;
266 266
267int is_error_page(struct page *page); 267int is_error_page(struct page *page);
268int is_error_pfn(pfn_t pfn); 268int is_error_pfn(pfn_t pfn);
269int is_hwpoison_pfn(pfn_t pfn);
269int kvm_is_error_hva(unsigned long addr); 270int kvm_is_error_hva(unsigned long addr);
270int kvm_set_memory_region(struct kvm *kvm, 271int kvm_set_memory_region(struct kvm *kvm,
271 struct kvm_userspace_memory_region *mem, 272 struct kvm_userspace_memory_region *mem,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a2b48041b91..7a9ab7db197 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1465,6 +1465,14 @@ extern int sysctl_memory_failure_recovery;
1465extern void shake_page(struct page *p, int access); 1465extern void shake_page(struct page *p, int access);
1466extern atomic_long_t mce_bad_pages; 1466extern atomic_long_t mce_bad_pages;
1467extern int soft_offline_page(struct page *page, int flags); 1467extern int soft_offline_page(struct page *page, int flags);
1468#ifdef CONFIG_MEMORY_FAILURE
1469int is_hwpoison_address(unsigned long addr);
1470#else
1471static inline int is_hwpoison_address(unsigned long addr)
1472{
1473 return 0;
1474}
1475#endif
1468 1476
1469extern void dump_page(struct page *page); 1477extern void dump_page(struct page *page);
1470 1478
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 620b0b46159..378b0f61fd3 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -45,6 +45,7 @@
45#include <linux/page-isolation.h> 45#include <linux/page-isolation.h>
46#include <linux/suspend.h> 46#include <linux/suspend.h>
47#include <linux/slab.h> 47#include <linux/slab.h>
48#include <linux/swapops.h>
48#include "internal.h" 49#include "internal.h"
49 50
50int sysctl_memory_failure_early_kill __read_mostly = 0; 51int sysctl_memory_failure_early_kill __read_mostly = 0;
@@ -1296,3 +1297,32 @@ done:
1296 /* keep elevated page count for bad page */ 1297 /* keep elevated page count for bad page */
1297 return ret; 1298 return ret;
1298} 1299}
1300
1301int is_hwpoison_address(unsigned long addr)
1302{
1303 pgd_t *pgdp;
1304 pud_t pud, *pudp;
1305 pmd_t pmd, *pmdp;
1306 pte_t pte, *ptep;
1307 swp_entry_t entry;
1308
1309 pgdp = pgd_offset(current->mm, addr);
1310 if (!pgd_present(*pgdp))
1311 return 0;
1312 pudp = pud_offset(pgdp, addr);
1313 pud = *pudp;
1314 if (!pud_present(pud) || pud_large(pud))
1315 return 0;
1316 pmdp = pmd_offset(pudp, addr);
1317 pmd = *pmdp;
1318 if (!pmd_present(pmd) || pmd_large(pmd))
1319 return 0;
1320 ptep = pte_offset_map(pmdp, addr);
1321 pte = *ptep;
1322 pte_unmap(ptep);
1323 if (!is_swap_pte(pte))
1324 return 0;
1325 entry = pte_to_swp_entry(pte);
1326 return is_hwpoison_entry(entry);
1327}
1328EXPORT_SYMBOL_GPL(is_hwpoison_address);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f032806a212..187aa8d984a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -92,6 +92,9 @@ static bool kvm_rebooting;
92 92
93static bool largepages_enabled = true; 93static bool largepages_enabled = true;
94 94
95struct page *hwpoison_page;
96pfn_t hwpoison_pfn;
97
95inline int kvm_is_mmio_pfn(pfn_t pfn) 98inline int kvm_is_mmio_pfn(pfn_t pfn)
96{ 99{
97 if (pfn_valid(pfn)) { 100 if (pfn_valid(pfn)) {
@@ -810,16 +813,22 @@ EXPORT_SYMBOL_GPL(kvm_disable_largepages);
810 813
811int is_error_page(struct page *page) 814int is_error_page(struct page *page)
812{ 815{
813 return page == bad_page; 816 return page == bad_page || page == hwpoison_page;
814} 817}
815EXPORT_SYMBOL_GPL(is_error_page); 818EXPORT_SYMBOL_GPL(is_error_page);
816 819
817int is_error_pfn(pfn_t pfn) 820int is_error_pfn(pfn_t pfn)
818{ 821{
819 return pfn == bad_pfn; 822 return pfn == bad_pfn || pfn == hwpoison_pfn;
820} 823}
821EXPORT_SYMBOL_GPL(is_error_pfn); 824EXPORT_SYMBOL_GPL(is_error_pfn);
822 825
826int is_hwpoison_pfn(pfn_t pfn)
827{
828 return pfn == hwpoison_pfn;
829}
830EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
831
823static inline unsigned long bad_hva(void) 832static inline unsigned long bad_hva(void)
824{ 833{
825 return PAGE_OFFSET; 834 return PAGE_OFFSET;
@@ -945,6 +954,11 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr)
945 if (unlikely(npages != 1)) { 954 if (unlikely(npages != 1)) {
946 struct vm_area_struct *vma; 955 struct vm_area_struct *vma;
947 956
957 if (is_hwpoison_address(addr)) {
958 get_page(hwpoison_page);
959 return page_to_pfn(hwpoison_page);
960 }
961
948 down_read(&current->mm->mmap_sem); 962 down_read(&current->mm->mmap_sem);
949 vma = find_vma(current->mm, addr); 963 vma = find_vma(current->mm, addr);
950 964
@@ -2197,6 +2211,15 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
2197 2211
2198 bad_pfn = page_to_pfn(bad_page); 2212 bad_pfn = page_to_pfn(bad_page);
2199 2213
2214 hwpoison_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
2215
2216 if (hwpoison_page == NULL) {
2217 r = -ENOMEM;
2218 goto out_free_0;
2219 }
2220
2221 hwpoison_pfn = page_to_pfn(hwpoison_page);
2222
2200 if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { 2223 if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
2201 r = -ENOMEM; 2224 r = -ENOMEM;
2202 goto out_free_0; 2225 goto out_free_0;
@@ -2269,6 +2292,8 @@ out_free_1:
2269out_free_0a: 2292out_free_0a:
2270 free_cpumask_var(cpus_hardware_enabled); 2293 free_cpumask_var(cpus_hardware_enabled);
2271out_free_0: 2294out_free_0:
2295 if (hwpoison_page)
2296 __free_page(hwpoison_page);
2272 __free_page(bad_page); 2297 __free_page(bad_page);
2273out: 2298out:
2274 kvm_arch_exit(); 2299 kvm_arch_exit();
@@ -2290,6 +2315,7 @@ void kvm_exit(void)
2290 kvm_arch_hardware_unsetup(); 2315 kvm_arch_hardware_unsetup();
2291 kvm_arch_exit(); 2316 kvm_arch_exit();
2292 free_cpumask_var(cpus_hardware_enabled); 2317 free_cpumask_var(cpus_hardware_enabled);
2318 __free_page(hwpoison_page);
2293 __free_page(bad_page); 2319 __free_page(bad_page);
2294} 2320}
2295EXPORT_SYMBOL_GPL(kvm_exit); 2321EXPORT_SYMBOL_GPL(kvm_exit);