x86/xen: Probe target addresses in set_aliased_prot() before the hypercall

The update_va_mapping hypercall can fail if the VA isn't present in the guest's page tables. Under certain loads, this can result in an OOPS when the target address is in unpopulated vmap space. While we're at it, add comments to help explain what's going on. This isn't a great long-term fix. This code should probably be changed to use something like set_memory_ro. Signed-off-by: Andy Lutomirski <luto@kernel.org> Cc: Andrew Cooper <andrew.cooper3@citrix.com> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: David Vrabel <dvrabel@cantab.net> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Jan Beulich <jbeulich@suse.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: security@kernel.org <security@kernel.org> Cc: <stable@vger.kernel.org> Cc: xen-devel <xen-devel@lists.xen.org> Link: http://lkml.kernel.org/r/0b0e55b995cda11e7829f140b833ef932fcabe3a.1438291540.git.luto@kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Andy Lutomirski <luto@kernel.org> 2015-07-30 17:31:31 -0400
committer: Ingo Molnar <mingo@kernel.org> 2015-07-31 04:23:22 -0400
commit: aa1acff356bbedfd03b544051f5b371746735d89 (patch)
tree: 5f0fbb4ebb793dc0dbd35669838967617c593840
parent: 1adb9123f96a2553b5b373aff1eb6ae939d31d5c (diff)
1 files changed, 40 insertions, 0 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 0b95c9b8283f..11d6fb4e8483 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -483,6 +483,7 @@ static void set_aliased_prot(void *v, pgprot_t prot)
        pte_t pte;
        unsigned long pfn;
        struct page *page;
+        unsigned char dummy;
        ptep = lookup_address((unsigned long)v, &level);
        BUG_ON(ptep == NULL);
@@ -492,6 +493,32 @@ static void set_aliased_prot(void *v, pgprot_t prot)
        pte = pfn_pte(pfn, prot);
+        /*
+         * Careful: update_va_mapping() will fail if the virtual address
+         * we're poking isn't populated in the page tables.  We don't
+         * need to worry about the direct map (that's always in the page
+         * tables), but we need to be careful about vmap space.  In
+         * particular, the top level page table can lazily propagate
+         * entries between processes, so if we've switched mms since we
+         * vmapped the target in the first place, we might not have the
+         * top-level page table entry populated.
+         *
+         * We disable preemption because we want the same mm active when
+         * we probe the target and when we issue the hypercall.  We'll
+         * have the same nominal mm, but if we're a kernel thread, lazy
+         * mm dropping could change our pgd.
+         *
+         * Out of an abundance of caution, this uses __get_user() to fault
+         * in the target address just in case there's some obscure case
+         * in which the target address isn't readable.
+         */
+        preempt_disable();
+        pagefault_disable();    /* Avoid warnings due to being atomic. */
+        __get_user(dummy, (unsigned char __user __force *)v);
+        pagefault_enable();
        if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
                BUG();
@@ -503,6 +530,8 @@ static void set_aliased_prot(void *v, pgprot_t prot)
                                BUG();
        } else
                kmap_flush_unused();
+        preempt_enable();
 }
 static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
@@ -510,6 +539,17 @@ static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
        const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
        int i;
+        /*
+         * We need to mark the all aliases of the LDT pages RO.  We
+         * don't need to call vm_flush_aliases(), though, since that's
+         * only responsible for flushing aliases out the TLBs, not the
+         * page tables, and Xen will flush the TLB for us if needed.
+         *
+         * To avoid confusing future readers: none of this is necessary
+         * to load the LDT.  The hypervisor only checks this when the
+         * LDT is faulted in due to subsequent descriptor access.
+         */
        for(i = 0; i < entries; i += entries_per_page)
                set_aliased_prot(ldt + i, PAGE_KERNEL_RO);
 }
author	Andy Lutomirski <luto@kernel.org>	2015-07-30 17:31:31 -0400
committer	Ingo Molnar <mingo@kernel.org>	2015-07-31 04:23:22 -0400
commit	aa1acff356bbedfd03b544051f5b371746735d89 (patch)
tree	5f0fbb4ebb793dc0dbd35669838967617c593840
parent	1adb9123f96a2553b5b373aff1eb6ae939d31d5c (diff)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0b95c9b8283f..11d6fb4e8483 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c
@@ -483,6 +483,7 @@ static void set_aliased_prot(void *v, pgprot_t prot)
483	pte_t pte;	483	pte_t pte;
484	unsigned long pfn;	484	unsigned long pfn;
485	struct page *page;	485	struct page *page;
		486	unsigned char dummy;
486		487
487	ptep = lookup_address((unsigned long)v, &level);	488	ptep = lookup_address((unsigned long)v, &level);
488	BUG_ON(ptep == NULL);	489	BUG_ON(ptep == NULL);
@@ -492,6 +493,32 @@ static void set_aliased_prot(void *v, pgprot_t prot)
492		493
493	pte = pfn_pte(pfn, prot);	494	pte = pfn_pte(pfn, prot);
494		495
		496	/*
		497	* Careful: update_va_mapping() will fail if the virtual address
		498	* we're poking isn't populated in the page tables. We don't
		499	* need to worry about the direct map (that's always in the page
		500	* tables), but we need to be careful about vmap space. In
		501	* particular, the top level page table can lazily propagate
		502	* entries between processes, so if we've switched mms since we
		503	* vmapped the target in the first place, we might not have the
		504	* top-level page table entry populated.
		505	*
		506	* We disable preemption because we want the same mm active when
		507	* we probe the target and when we issue the hypercall. We'll
		508	* have the same nominal mm, but if we're a kernel thread, lazy
		509	* mm dropping could change our pgd.
		510	*
		511	* Out of an abundance of caution, this uses __get_user() to fault
		512	* in the target address just in case there's some obscure case
		513	* in which the target address isn't readable.
		514	*/
		515
		516	preempt_disable();
		517
		518	pagefault_disable(); /* Avoid warnings due to being atomic. */
		519	__get_user(dummy, (unsigned char __user __force *)v);
		520	pagefault_enable();
		521
495	if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))	522	if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
496	BUG();	523	BUG();
497		524
@@ -503,6 +530,8 @@ static void set_aliased_prot(void *v, pgprot_t prot)
503	BUG();	530	BUG();
504	} else	531	} else
505	kmap_flush_unused();	532	kmap_flush_unused();
		533
		534	preempt_enable();
506	}	535	}
507		536
508	static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)	537	static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
@@ -510,6 +539,17 @@ static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
510	const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;	539	const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
511	int i;	540	int i;
512		541
		542	/*
		543	* We need to mark the all aliases of the LDT pages RO. We
		544	* don't need to call vm_flush_aliases(), though, since that's
		545	* only responsible for flushing aliases out the TLBs, not the
		546	* page tables, and Xen will flush the TLB for us if needed.
		547	*
		548	* To avoid confusing future readers: none of this is necessary
		549	* to load the LDT. The hypervisor only checks this when the
		550	* LDT is faulted in due to subsequent descriptor access.
		551	*/
		552
513	for(i = 0; i < entries; i += entries_per_page)	553	for(i = 0; i < entries; i += entries_per_page)
514	set_aliased_prot(ldt + i, PAGE_KERNEL_RO);	554	set_aliased_prot(ldt + i, PAGE_KERNEL_RO);
515	}	555	}