diff options
author | Andrea Arcangeli <aarcange@redhat.com> | 2011-01-13 18:47:06 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-13 20:32:45 -0500 |
commit | ce83d2174ea9c3d72d5821cf3ebc974e36391bf7 (patch) | |
tree | 5e31167f84110551cf00ed3335b2cc3af317b33a /mm/huge_memory.c | |
parent | 0bbbc0b33d141f78a0d9218a54a47f50621220d3 (diff) |
thp: allocate memory in khugepaged outside of mmap_sem write mode
This tries to be more friendly to filesystem in userland, with userland
backends that allocate memory in the I/O paths and that could deadlock if
khugepaged holds the mmap_sem write mode of the userland backend while
allocating memory. Memory allocation may wait for writeback I/O
completion from the daemon that may be blocked in the mmap_sem read mode
if a page fault happens and the daemon wasn't using mlock for the memory
required for the I/O submission and completion.
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 56 |
1 files changed, 34 insertions, 22 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f6559e7711bd..bce6e12140e2 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1664,9 +1664,9 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, | |||
1664 | 1664 | ||
1665 | static void collapse_huge_page(struct mm_struct *mm, | 1665 | static void collapse_huge_page(struct mm_struct *mm, |
1666 | unsigned long address, | 1666 | unsigned long address, |
1667 | struct page **hpage) | 1667 | struct page **hpage, |
1668 | struct vm_area_struct *vma) | ||
1668 | { | 1669 | { |
1669 | struct vm_area_struct *vma; | ||
1670 | pgd_t *pgd; | 1670 | pgd_t *pgd; |
1671 | pud_t *pud; | 1671 | pud_t *pud; |
1672 | pmd_t *pmd, _pmd; | 1672 | pmd_t *pmd, _pmd; |
@@ -1680,9 +1680,34 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1680 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | 1680 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); |
1681 | #ifndef CONFIG_NUMA | 1681 | #ifndef CONFIG_NUMA |
1682 | VM_BUG_ON(!*hpage); | 1682 | VM_BUG_ON(!*hpage); |
1683 | new_page = *hpage; | ||
1683 | #else | 1684 | #else |
1684 | VM_BUG_ON(*hpage); | 1685 | VM_BUG_ON(*hpage); |
1686 | /* | ||
1687 | * Allocate the page while the vma is still valid and under | ||
1688 | * the mmap_sem read mode so there is no memory allocation | ||
1689 | * later when we take the mmap_sem in write mode. This is more | ||
1690 | * friendly behavior (OTOH it may actually hide bugs) to | ||
1691 | * filesystems in userland with daemons allocating memory in | ||
1692 | * the userland I/O paths. Allocating memory with the | ||
1693 | * mmap_sem in read mode is good idea also to allow greater | ||
1694 | * scalability. | ||
1695 | */ | ||
1696 | new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address); | ||
1697 | if (unlikely(!new_page)) { | ||
1698 | up_read(&mm->mmap_sem); | ||
1699 | *hpage = ERR_PTR(-ENOMEM); | ||
1700 | return; | ||
1701 | } | ||
1685 | #endif | 1702 | #endif |
1703 | if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { | ||
1704 | up_read(&mm->mmap_sem); | ||
1705 | put_page(new_page); | ||
1706 | return; | ||
1707 | } | ||
1708 | |||
1709 | /* after allocating the hugepage upgrade to mmap_sem write mode */ | ||
1710 | up_read(&mm->mmap_sem); | ||
1686 | 1711 | ||
1687 | /* | 1712 | /* |
1688 | * Prevent all access to pagetables with the exception of | 1713 | * Prevent all access to pagetables with the exception of |
@@ -1720,18 +1745,6 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1720 | if (!pmd_present(*pmd) || pmd_trans_huge(*pmd)) | 1745 | if (!pmd_present(*pmd) || pmd_trans_huge(*pmd)) |
1721 | goto out; | 1746 | goto out; |
1722 | 1747 | ||
1723 | #ifndef CONFIG_NUMA | ||
1724 | new_page = *hpage; | ||
1725 | #else | ||
1726 | new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address); | ||
1727 | if (unlikely(!new_page)) { | ||
1728 | *hpage = ERR_PTR(-ENOMEM); | ||
1729 | goto out; | ||
1730 | } | ||
1731 | #endif | ||
1732 | if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) | ||
1733 | goto out_put_page; | ||
1734 | |||
1735 | anon_vma_lock(vma->anon_vma); | 1748 | anon_vma_lock(vma->anon_vma); |
1736 | 1749 | ||
1737 | pte = pte_offset_map(pmd, address); | 1750 | pte = pte_offset_map(pmd, address); |
@@ -1759,7 +1772,7 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1759 | spin_unlock(&mm->page_table_lock); | 1772 | spin_unlock(&mm->page_table_lock); |
1760 | anon_vma_unlock(vma->anon_vma); | 1773 | anon_vma_unlock(vma->anon_vma); |
1761 | mem_cgroup_uncharge_page(new_page); | 1774 | mem_cgroup_uncharge_page(new_page); |
1762 | goto out_put_page; | 1775 | goto out; |
1763 | } | 1776 | } |
1764 | 1777 | ||
1765 | /* | 1778 | /* |
@@ -1798,15 +1811,15 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1798 | *hpage = NULL; | 1811 | *hpage = NULL; |
1799 | #endif | 1812 | #endif |
1800 | khugepaged_pages_collapsed++; | 1813 | khugepaged_pages_collapsed++; |
1801 | out: | 1814 | out_up_write: |
1802 | up_write(&mm->mmap_sem); | 1815 | up_write(&mm->mmap_sem); |
1803 | return; | 1816 | return; |
1804 | 1817 | ||
1805 | out_put_page: | 1818 | out: |
1806 | #ifdef CONFIG_NUMA | 1819 | #ifdef CONFIG_NUMA |
1807 | put_page(new_page); | 1820 | put_page(new_page); |
1808 | #endif | 1821 | #endif |
1809 | goto out; | 1822 | goto out_up_write; |
1810 | } | 1823 | } |
1811 | 1824 | ||
1812 | static int khugepaged_scan_pmd(struct mm_struct *mm, | 1825 | static int khugepaged_scan_pmd(struct mm_struct *mm, |
@@ -1865,10 +1878,9 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, | |||
1865 | ret = 1; | 1878 | ret = 1; |
1866 | out_unmap: | 1879 | out_unmap: |
1867 | pte_unmap_unlock(pte, ptl); | 1880 | pte_unmap_unlock(pte, ptl); |
1868 | if (ret) { | 1881 | if (ret) |
1869 | up_read(&mm->mmap_sem); | 1882 | /* collapse_huge_page will return with the mmap_sem released */ |
1870 | collapse_huge_page(mm, address, hpage); | 1883 | collapse_huge_page(mm, address, hpage, vma); |
1871 | } | ||
1872 | out: | 1884 | out: |
1873 | return ret; | 1885 | return ret; |
1874 | } | 1886 | } |