aboutsummaryrefslogtreecommitdiffstats
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
authorAndrea Arcangeli <aarcange@redhat.com>2011-01-13 18:47:06 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 20:32:45 -0500
commitce83d2174ea9c3d72d5821cf3ebc974e36391bf7 (patch)
tree5e31167f84110551cf00ed3335b2cc3af317b33a /mm/huge_memory.c
parent0bbbc0b33d141f78a0d9218a54a47f50621220d3 (diff)
thp: allocate memory in khugepaged outside of mmap_sem write mode
This tries to be more friendly to filesystem in userland, with userland backends that allocate memory in the I/O paths and that could deadlock if khugepaged holds the mmap_sem write mode of the userland backend while allocating memory. Memory allocation may wait for writeback I/O completion from the daemon that may be blocked in the mmap_sem read mode if a page fault happens and the daemon wasn't using mlock for the memory required for the I/O submission and completion. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c56
1 files changed, 34 insertions, 22 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f6559e7711bd..bce6e12140e2 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1664,9 +1664,9 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
1664 1664
1665static void collapse_huge_page(struct mm_struct *mm, 1665static void collapse_huge_page(struct mm_struct *mm,
1666 unsigned long address, 1666 unsigned long address,
1667 struct page **hpage) 1667 struct page **hpage,
1668 struct vm_area_struct *vma)
1668{ 1669{
1669 struct vm_area_struct *vma;
1670 pgd_t *pgd; 1670 pgd_t *pgd;
1671 pud_t *pud; 1671 pud_t *pud;
1672 pmd_t *pmd, _pmd; 1672 pmd_t *pmd, _pmd;
@@ -1680,9 +1680,34 @@ static void collapse_huge_page(struct mm_struct *mm,
1680 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 1680 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
1681#ifndef CONFIG_NUMA 1681#ifndef CONFIG_NUMA
1682 VM_BUG_ON(!*hpage); 1682 VM_BUG_ON(!*hpage);
1683 new_page = *hpage;
1683#else 1684#else
1684 VM_BUG_ON(*hpage); 1685 VM_BUG_ON(*hpage);
1686 /*
1687 * Allocate the page while the vma is still valid and under
1688 * the mmap_sem read mode so there is no memory allocation
1689 * later when we take the mmap_sem in write mode. This is more
1690 * friendly behavior (OTOH it may actually hide bugs) to
1691 * filesystems in userland with daemons allocating memory in
1692 * the userland I/O paths. Allocating memory with the
1693 * mmap_sem in read mode is good idea also to allow greater
1694 * scalability.
1695 */
1696 new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address);
1697 if (unlikely(!new_page)) {
1698 up_read(&mm->mmap_sem);
1699 *hpage = ERR_PTR(-ENOMEM);
1700 return;
1701 }
1685#endif 1702#endif
1703 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
1704 up_read(&mm->mmap_sem);
1705 put_page(new_page);
1706 return;
1707 }
1708
1709 /* after allocating the hugepage upgrade to mmap_sem write mode */
1710 up_read(&mm->mmap_sem);
1686 1711
1687 /* 1712 /*
1688 * Prevent all access to pagetables with the exception of 1713 * Prevent all access to pagetables with the exception of
@@ -1720,18 +1745,6 @@ static void collapse_huge_page(struct mm_struct *mm,
1720 if (!pmd_present(*pmd) || pmd_trans_huge(*pmd)) 1745 if (!pmd_present(*pmd) || pmd_trans_huge(*pmd))
1721 goto out; 1746 goto out;
1722 1747
1723#ifndef CONFIG_NUMA
1724 new_page = *hpage;
1725#else
1726 new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address);
1727 if (unlikely(!new_page)) {
1728 *hpage = ERR_PTR(-ENOMEM);
1729 goto out;
1730 }
1731#endif
1732 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)))
1733 goto out_put_page;
1734
1735 anon_vma_lock(vma->anon_vma); 1748 anon_vma_lock(vma->anon_vma);
1736 1749
1737 pte = pte_offset_map(pmd, address); 1750 pte = pte_offset_map(pmd, address);
@@ -1759,7 +1772,7 @@ static void collapse_huge_page(struct mm_struct *mm,
1759 spin_unlock(&mm->page_table_lock); 1772 spin_unlock(&mm->page_table_lock);
1760 anon_vma_unlock(vma->anon_vma); 1773 anon_vma_unlock(vma->anon_vma);
1761 mem_cgroup_uncharge_page(new_page); 1774 mem_cgroup_uncharge_page(new_page);
1762 goto out_put_page; 1775 goto out;
1763 } 1776 }
1764 1777
1765 /* 1778 /*
@@ -1798,15 +1811,15 @@ static void collapse_huge_page(struct mm_struct *mm,
1798 *hpage = NULL; 1811 *hpage = NULL;
1799#endif 1812#endif
1800 khugepaged_pages_collapsed++; 1813 khugepaged_pages_collapsed++;
1801out: 1814out_up_write:
1802 up_write(&mm->mmap_sem); 1815 up_write(&mm->mmap_sem);
1803 return; 1816 return;
1804 1817
1805out_put_page: 1818out:
1806#ifdef CONFIG_NUMA 1819#ifdef CONFIG_NUMA
1807 put_page(new_page); 1820 put_page(new_page);
1808#endif 1821#endif
1809 goto out; 1822 goto out_up_write;
1810} 1823}
1811 1824
1812static int khugepaged_scan_pmd(struct mm_struct *mm, 1825static int khugepaged_scan_pmd(struct mm_struct *mm,
@@ -1865,10 +1878,9 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
1865 ret = 1; 1878 ret = 1;
1866out_unmap: 1879out_unmap:
1867 pte_unmap_unlock(pte, ptl); 1880 pte_unmap_unlock(pte, ptl);
1868 if (ret) { 1881 if (ret)
1869 up_read(&mm->mmap_sem); 1882 /* collapse_huge_page will return with the mmap_sem released */
1870 collapse_huge_page(mm, address, hpage); 1883 collapse_huge_page(mm, address, hpage, vma);
1871 }
1872out: 1884out:
1873 return ret; 1885 return ret;
1874} 1886}