xen: mark local pages as FOREIGN in the m2p_override

commit b9e0d95c041ca2d7ad297ee37c2e9cfab67a188f upstream. When the frontend and the backend reside on the same domain, even if we add pages to the m2p_override, these pages will never be returned by mfn_to_pfn because the check "get_phys_to_machine(pfn) != mfn" will always fail, so the pfn of the frontend will be returned instead (resulting in a deadlock because the frontend pages are already locked). INFO: task qemu-system-i38:1085 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. qemu-system-i38 D ffff8800cfc137c0 0 1085 1 0x00000000 ffff8800c47ed898 0000000000000282 ffff8800be4596b0 00000000000137c0 ffff8800c47edfd8 ffff8800c47ec010 00000000000137c0 00000000000137c0 ffff8800c47edfd8 00000000000137c0 ffffffff82213020 ffff8800be4596b0 Call Trace: [<ffffffff81101ee0>] ? __lock_page+0x70/0x70 [<ffffffff81a0fdd9>] schedule+0x29/0x70 [<ffffffff81a0fe80>] io_schedule+0x60/0x80 [<ffffffff81101eee>] sleep_on_page+0xe/0x20 [<ffffffff81a0e1ca>] __wait_on_bit_lock+0x5a/0xc0 [<ffffffff81101ed7>] __lock_page+0x67/0x70 [<ffffffff8106f750>] ? autoremove_wake_function+0x40/0x40 [<ffffffff811867e6>] ? bio_add_page+0x36/0x40 [<ffffffff8110b692>] set_page_dirty_lock+0x52/0x60 [<ffffffff81186021>] bio_set_pages_dirty+0x51/0x70 [<ffffffff8118c6b4>] do_blockdev_direct_IO+0xb24/0xeb0 [<ffffffff811e71a0>] ? ext3_get_blocks_handle+0xe00/0xe00 [<ffffffff8118ca95>] __blockdev_direct_IO+0x55/0x60 [<ffffffff811e71a0>] ? ext3_get_blocks_handle+0xe00/0xe00 [<ffffffff811e91c8>] ext3_direct_IO+0xf8/0x390 [<ffffffff811e71a0>] ? ext3_get_blocks_handle+0xe00/0xe00 [<ffffffff81004b60>] ? xen_mc_flush+0xb0/0x1b0 [<ffffffff81104027>] generic_file_aio_read+0x737/0x780 [<ffffffff813bedeb>] ? gnttab_map_refs+0x15b/0x1e0 [<ffffffff811038f0>] ? find_get_pages+0x150/0x150 [<ffffffff8119736c>] aio_rw_vect_retry+0x7c/0x1d0 [<ffffffff811972f0>] ? lookup_ioctx+0x90/0x90 [<ffffffff81198856>] aio_run_iocb+0x66/0x1a0 [<ffffffff811998b8>] do_io_submit+0x708/0xb90 [<ffffffff81199d50>] sys_io_submit+0x10/0x20 [<ffffffff81a18d69>] system_call_fastpath+0x16/0x1b The explanation is in the comment within the code: We need to do this because the pages shared by the frontend (xen-blkfront) can be already locked (lock_page, called by do_read_cache_page); when the userspace backend tries to use them with direct_IO, mfn_to_pfn returns the pfn of the frontend, so do_blockdev_direct_IO is going to try to lock the same pages again resulting in a deadlock. A simplified call graph looks like this: pygrub QEMU ----------------------------------------------- do_read_cache_page io_submit | | lock_page ext3_direct_IO | bio_add_page | lock_page Internally the xen-blkback uses m2p_add_override to swizzle (temporarily) a 'struct page' to have a different MFN (so that it can point to another guest). It also can easily find out whether another pfn corresponding to the mfn exists in the m2p, and can set the FOREIGN bit in the p2m, making sure that mfn_to_pfn returns the pfn of the backend. This allows the backend to perform direct_IO on these pages, but as a side effect prevents the frontend from using get_user_pages_fast on them while they are being shared with the backend. Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
author: Stefano Stabellini <stefano.stabellini@eu.citrix.com> 2012-05-23 13:57:20 -0400
committer: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2012-08-26 18:12:11 -0400
commit: 318095d39c962873a6aa0f31683745cb0420753e (patch)
tree: ba5614821ffbd28d20bc935628d64a4f75391f1e /arch/x86/xen
parent: bd697182ee0264c6a6c9ac108d5d765e24b82724 (diff)
1 files changed, 36 insertions, 0 deletions
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 58efeb9d544..2f7847d1055 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -683,6 +683,7 @@ int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
        unsigned long uninitialized_var(address);
        unsigned level;
        pte_t *ptep = NULL;
+        int ret = 0;
        pfn = page_to_pfn(page);
        if (!PageHighMem(page)) {
@@ -706,6 +707,24 @@ int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
        list_add(&page->lru,  &m2p_overrides[mfn_hash(mfn)]);
        spin_unlock_irqrestore(&m2p_override_lock, flags);
+        /* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in
+         * this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other
+         * pfn so that the following mfn_to_pfn(mfn) calls will return the
+         * pfn from the m2p_override (the backend pfn) instead.
+         * We need to do this because the pages shared by the frontend
+         * (xen-blkfront) can be already locked (lock_page, called by
+         * do_read_cache_page); when the userspace backend tries to use them
+         * with direct_IO, mfn_to_pfn returns the pfn of the frontend, so
+         * do_blockdev_direct_IO is going to try to lock the same pages
+         * again resulting in a deadlock.
+         * As a side effect get_user_pages_fast might not be safe on the
+         * frontend pages while they are being shared with the backend,
+         * because mfn_to_pfn (that ends up being called by GUPF) will
+         * return the backend pfn rather than the frontend pfn. */
+        ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
+        if (ret == 0 && get_phys_to_machine(pfn) == mfn)
+                set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));
        return 0;
 }
 EXPORT_SYMBOL_GPL(m2p_add_override);
@@ -717,6 +736,7 @@ int m2p_remove_override(struct page *page, bool clear_pte)
        unsigned long uninitialized_var(address);
        unsigned level;
        pte_t *ptep = NULL;
+        int ret = 0;
        pfn = page_to_pfn(page);
        mfn = get_phys_to_machine(pfn);
@@ -743,6 +763,22 @@ int m2p_remove_override(struct page *page, bool clear_pte)
                /* No tlb flush necessary because the caller already
                 * left the pte unmapped. */
+        /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present
+         * somewhere in this domain, even before being added to the
+         * m2p_override (see comment above in m2p_add_override).
+         * If there are no other entries in the m2p_override corresponding
+         * to this mfn, then remove the FOREIGN_FRAME_BIT from the p2m for
+         * the original pfn (the one shared by the frontend): the backend
+         * cannot do any IO on this page anymore because it has been
+         * unshared. Removing the FOREIGN_FRAME_BIT from the p2m entry of
+         * the original pfn causes mfn_to_pfn(mfn) to return the frontend
+         * pfn again. */
+        mfn &= ~FOREIGN_FRAME_BIT;
+        ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
+        if (ret == 0 && get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) &&
+                        m2p_find_override(mfn) == NULL)
+                set_phys_to_machine(pfn, mfn);
        return 0;
 }
 EXPORT_SYMBOL_GPL(m2p_remove_override);
author	Stefano Stabellini <stefano.stabellini@eu.citrix.com>	2012-05-23 13:57:20 -0400
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2012-08-26 18:12:11 -0400
commit	318095d39c962873a6aa0f31683745cb0420753e (patch)
tree	ba5614821ffbd28d20bc935628d64a4f75391f1e /arch/x86/xen
parent	bd697182ee0264c6a6c9ac108d5d765e24b82724 (diff)

diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 58efeb9d544..2f7847d1055 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c
@@ -683,6 +683,7 @@ int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
683	unsigned long uninitialized_var(address);	683	unsigned long uninitialized_var(address);
684	unsigned level;	684	unsigned level;
685	pte_t *ptep = NULL;	685	pte_t *ptep = NULL;
		686	int ret = 0;
686		687
687	pfn = page_to_pfn(page);	688	pfn = page_to_pfn(page);
688	if (!PageHighMem(page)) {	689	if (!PageHighMem(page)) {
@@ -706,6 +707,24 @@ int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
706	list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]);	707	list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]);
707	spin_unlock_irqrestore(&m2p_override_lock, flags);	708	spin_unlock_irqrestore(&m2p_override_lock, flags);
708		709
		710	/* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in
		711	* this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other
		712	* pfn so that the following mfn_to_pfn(mfn) calls will return the
		713	* pfn from the m2p_override (the backend pfn) instead.
		714	* We need to do this because the pages shared by the frontend
		715	* (xen-blkfront) can be already locked (lock_page, called by
		716	* do_read_cache_page); when the userspace backend tries to use them
		717	* with direct_IO, mfn_to_pfn returns the pfn of the frontend, so
		718	* do_blockdev_direct_IO is going to try to lock the same pages
		719	* again resulting in a deadlock.
		720	* As a side effect get_user_pages_fast might not be safe on the
		721	* frontend pages while they are being shared with the backend,
		722	* because mfn_to_pfn (that ends up being called by GUPF) will
		723	* return the backend pfn rather than the frontend pfn. */
		724	ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
		725	if (ret == 0 && get_phys_to_machine(pfn) == mfn)
		726	set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));
		727
709	return 0;	728	return 0;
710	}	729	}
711	EXPORT_SYMBOL_GPL(m2p_add_override);	730	EXPORT_SYMBOL_GPL(m2p_add_override);
@@ -717,6 +736,7 @@ int m2p_remove_override(struct page *page, bool clear_pte)
717	unsigned long uninitialized_var(address);	736	unsigned long uninitialized_var(address);
718	unsigned level;	737	unsigned level;
719	pte_t *ptep = NULL;	738	pte_t *ptep = NULL;
		739	int ret = 0;
720		740
721	pfn = page_to_pfn(page);	741	pfn = page_to_pfn(page);
722	mfn = get_phys_to_machine(pfn);	742	mfn = get_phys_to_machine(pfn);
@@ -743,6 +763,22 @@ int m2p_remove_override(struct page *page, bool clear_pte)
743	/* No tlb flush necessary because the caller already	763	/* No tlb flush necessary because the caller already
744	* left the pte unmapped. */	764	* left the pte unmapped. */
745		765
		766	/* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present
		767	* somewhere in this domain, even before being added to the
		768	* m2p_override (see comment above in m2p_add_override).
		769	* If there are no other entries in the m2p_override corresponding
		770	* to this mfn, then remove the FOREIGN_FRAME_BIT from the p2m for
		771	* the original pfn (the one shared by the frontend): the backend
		772	* cannot do any IO on this page anymore because it has been
		773	* unshared. Removing the FOREIGN_FRAME_BIT from the p2m entry of
		774	* the original pfn causes mfn_to_pfn(mfn) to return the frontend
		775	* pfn again. */
		776	mfn &= ~FOREIGN_FRAME_BIT;
		777	ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
		778	if (ret == 0 && get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) &&
		779	m2p_find_override(mfn) == NULL)
		780	set_phys_to_machine(pfn, mfn);
		781
746	return 0;	782	return 0;
747	}	783	}
748	EXPORT_SYMBOL_GPL(m2p_remove_override);	784	EXPORT_SYMBOL_GPL(m2p_remove_override);