aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorStefano Stabellini <stefano.stabellini@eu.citrix.com>2012-05-23 13:57:20 -0400
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2012-06-14 14:03:41 -0400
commitb9e0d95c041ca2d7ad297ee37c2e9cfab67a188f (patch)
tree0b055245bfb1493545b5f4d5e7ba3a9fdade81cf /arch
parent5e626254206a709c6e937f3dda69bf26c7344f6f (diff)
xen: mark local pages as FOREIGN in the m2p_override
When the frontend and the backend reside on the same domain, even if we add pages to the m2p_override, these pages will never be returned by mfn_to_pfn because the check "get_phys_to_machine(pfn) != mfn" will always fail, so the pfn of the frontend will be returned instead (resulting in a deadlock because the frontend pages are already locked). INFO: task qemu-system-i38:1085 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. qemu-system-i38 D ffff8800cfc137c0 0 1085 1 0x00000000 ffff8800c47ed898 0000000000000282 ffff8800be4596b0 00000000000137c0 ffff8800c47edfd8 ffff8800c47ec010 00000000000137c0 00000000000137c0 ffff8800c47edfd8 00000000000137c0 ffffffff82213020 ffff8800be4596b0 Call Trace: [<ffffffff81101ee0>] ? __lock_page+0x70/0x70 [<ffffffff81a0fdd9>] schedule+0x29/0x70 [<ffffffff81a0fe80>] io_schedule+0x60/0x80 [<ffffffff81101eee>] sleep_on_page+0xe/0x20 [<ffffffff81a0e1ca>] __wait_on_bit_lock+0x5a/0xc0 [<ffffffff81101ed7>] __lock_page+0x67/0x70 [<ffffffff8106f750>] ? autoremove_wake_function+0x40/0x40 [<ffffffff811867e6>] ? bio_add_page+0x36/0x40 [<ffffffff8110b692>] set_page_dirty_lock+0x52/0x60 [<ffffffff81186021>] bio_set_pages_dirty+0x51/0x70 [<ffffffff8118c6b4>] do_blockdev_direct_IO+0xb24/0xeb0 [<ffffffff811e71a0>] ? ext3_get_blocks_handle+0xe00/0xe00 [<ffffffff8118ca95>] __blockdev_direct_IO+0x55/0x60 [<ffffffff811e71a0>] ? ext3_get_blocks_handle+0xe00/0xe00 [<ffffffff811e91c8>] ext3_direct_IO+0xf8/0x390 [<ffffffff811e71a0>] ? ext3_get_blocks_handle+0xe00/0xe00 [<ffffffff81004b60>] ? xen_mc_flush+0xb0/0x1b0 [<ffffffff81104027>] generic_file_aio_read+0x737/0x780 [<ffffffff813bedeb>] ? gnttab_map_refs+0x15b/0x1e0 [<ffffffff811038f0>] ? find_get_pages+0x150/0x150 [<ffffffff8119736c>] aio_rw_vect_retry+0x7c/0x1d0 [<ffffffff811972f0>] ? lookup_ioctx+0x90/0x90 [<ffffffff81198856>] aio_run_iocb+0x66/0x1a0 [<ffffffff811998b8>] do_io_submit+0x708/0xb90 [<ffffffff81199d50>] sys_io_submit+0x10/0x20 [<ffffffff81a18d69>] system_call_fastpath+0x16/0x1b The explanation is in the comment within the code: We need to do this because the pages shared by the frontend (xen-blkfront) can be already locked (lock_page, called by do_read_cache_page); when the userspace backend tries to use them with direct_IO, mfn_to_pfn returns the pfn of the frontend, so do_blockdev_direct_IO is going to try to lock the same pages again resulting in a deadlock. A simplified call graph looks like this: pygrub QEMU ----------------------------------------------- do_read_cache_page io_submit | | lock_page ext3_direct_IO | bio_add_page | lock_page Internally the xen-blkback uses m2p_add_override to swizzle (temporarily) a 'struct page' to have a different MFN (so that it can point to another guest). It also can easily find out whether another pfn corresponding to the mfn exists in the m2p, and can set the FOREIGN bit in the p2m, making sure that mfn_to_pfn returns the pfn of the backend. This allows the backend to perform direct_IO on these pages, but as a side effect prevents the frontend from using get_user_pages_fast on them while they are being shared with the backend. Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/xen/p2m.c36
1 files changed, 36 insertions, 0 deletions
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index ffd08c414e91..64effdc6da94 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -706,6 +706,7 @@ int m2p_add_override(unsigned long mfn, struct page *page,
706 unsigned long uninitialized_var(address); 706 unsigned long uninitialized_var(address);
707 unsigned level; 707 unsigned level;
708 pte_t *ptep = NULL; 708 pte_t *ptep = NULL;
709 int ret = 0;
709 710
710 pfn = page_to_pfn(page); 711 pfn = page_to_pfn(page);
711 if (!PageHighMem(page)) { 712 if (!PageHighMem(page)) {
@@ -741,6 +742,24 @@ int m2p_add_override(unsigned long mfn, struct page *page,
741 list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); 742 list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]);
742 spin_unlock_irqrestore(&m2p_override_lock, flags); 743 spin_unlock_irqrestore(&m2p_override_lock, flags);
743 744
745 /* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in
746 * this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other
747 * pfn so that the following mfn_to_pfn(mfn) calls will return the
748 * pfn from the m2p_override (the backend pfn) instead.
749 * We need to do this because the pages shared by the frontend
750 * (xen-blkfront) can be already locked (lock_page, called by
751 * do_read_cache_page); when the userspace backend tries to use them
752 * with direct_IO, mfn_to_pfn returns the pfn of the frontend, so
753 * do_blockdev_direct_IO is going to try to lock the same pages
754 * again resulting in a deadlock.
755 * As a side effect get_user_pages_fast might not be safe on the
756 * frontend pages while they are being shared with the backend,
757 * because mfn_to_pfn (that ends up being called by GUPF) will
758 * return the backend pfn rather than the frontend pfn. */
759 ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
760 if (ret == 0 && get_phys_to_machine(pfn) == mfn)
761 set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));
762
744 return 0; 763 return 0;
745} 764}
746EXPORT_SYMBOL_GPL(m2p_add_override); 765EXPORT_SYMBOL_GPL(m2p_add_override);
@@ -752,6 +771,7 @@ int m2p_remove_override(struct page *page, bool clear_pte)
752 unsigned long uninitialized_var(address); 771 unsigned long uninitialized_var(address);
753 unsigned level; 772 unsigned level;
754 pte_t *ptep = NULL; 773 pte_t *ptep = NULL;
774 int ret = 0;
755 775
756 pfn = page_to_pfn(page); 776 pfn = page_to_pfn(page);
757 mfn = get_phys_to_machine(pfn); 777 mfn = get_phys_to_machine(pfn);
@@ -821,6 +841,22 @@ int m2p_remove_override(struct page *page, bool clear_pte)
821 } else 841 } else
822 set_phys_to_machine(pfn, page->index); 842 set_phys_to_machine(pfn, page->index);
823 843
844 /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present
845 * somewhere in this domain, even before being added to the
846 * m2p_override (see comment above in m2p_add_override).
847 * If there are no other entries in the m2p_override corresponding
848 * to this mfn, then remove the FOREIGN_FRAME_BIT from the p2m for
849 * the original pfn (the one shared by the frontend): the backend
850 * cannot do any IO on this page anymore because it has been
851 * unshared. Removing the FOREIGN_FRAME_BIT from the p2m entry of
852 * the original pfn causes mfn_to_pfn(mfn) to return the frontend
853 * pfn again. */
854 mfn &= ~FOREIGN_FRAME_BIT;
855 ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
856 if (ret == 0 && get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) &&
857 m2p_find_override(mfn) == NULL)
858 set_phys_to_machine(pfn, mfn);
859
824 return 0; 860 return 0;
825} 861}
826EXPORT_SYMBOL_GPL(m2p_remove_override); 862EXPORT_SYMBOL_GPL(m2p_remove_override);