aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Arcangeli <aarcange@redhat.com>2011-01-13 18:47:05 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 20:32:45 -0500
commit0bbbc0b33d141f78a0d9218a54a47f50621220d3 (patch)
tree3ef3363c189ac536926119731eb86dcf989f4adb
parentd39d33c332c611094f84cee39715866f4cbf79e2 (diff)
thp: add numa awareness to hugepage allocations
It's mostly a matter of replacing alloc_pages with alloc_pages_vma after introducing alloc_pages_vma. khugepaged needs special handling as the allocation has to happen inside collapse_huge_page where the vma is known and an error has to be returned to the outer loop to sleep alloc_sleep_millisecs in case of failure. But it retains the more efficient logic of handling allocation failures in khugepaged in case of CONFIG_NUMA=n. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/gfp.h7
-rw-r--r--mm/huge_memory.c87
-rw-r--r--mm/mempolicy.c13
3 files changed, 87 insertions, 20 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index d95082cc6f4a..a3b148a91874 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -331,14 +331,17 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
331{ 331{
332 return alloc_pages_current(gfp_mask, order); 332 return alloc_pages_current(gfp_mask, order);
333} 333}
334extern struct page *alloc_page_vma(gfp_t gfp_mask, 334extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
335 struct vm_area_struct *vma, unsigned long addr); 335 struct vm_area_struct *vma, unsigned long addr);
336#else 336#else
337#define alloc_pages(gfp_mask, order) \ 337#define alloc_pages(gfp_mask, order) \
338 alloc_pages_node(numa_node_id(), gfp_mask, order) 338 alloc_pages_node(numa_node_id(), gfp_mask, order)
339#define alloc_page_vma(gfp_mask, vma, addr) alloc_pages(gfp_mask, 0) 339#define alloc_pages_vma(gfp_mask, order, vma, addr) \
340 alloc_pages(gfp_mask, order)
340#endif 341#endif
341#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) 342#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
343#define alloc_page_vma(gfp_mask, vma, addr) \
344 alloc_pages_vma(gfp_mask, 0, vma, addr)
342 345
343extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); 346extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
344extern unsigned long get_zeroed_page(gfp_t gfp_mask); 347extern unsigned long get_zeroed_page(gfp_t gfp_mask);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 0415a83afd66..f6559e7711bd 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -620,11 +620,26 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
620 return ret; 620 return ret;
621} 621}
622 622
623static inline gfp_t alloc_hugepage_gfpmask(int defrag)
624{
625 return GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT);
626}
627
628static inline struct page *alloc_hugepage_vma(int defrag,
629 struct vm_area_struct *vma,
630 unsigned long haddr)
631{
632 return alloc_pages_vma(alloc_hugepage_gfpmask(defrag),
633 HPAGE_PMD_ORDER, vma, haddr);
634}
635
636#ifndef CONFIG_NUMA
623static inline struct page *alloc_hugepage(int defrag) 637static inline struct page *alloc_hugepage(int defrag)
624{ 638{
625 return alloc_pages(GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT), 639 return alloc_pages(alloc_hugepage_gfpmask(defrag),
626 HPAGE_PMD_ORDER); 640 HPAGE_PMD_ORDER);
627} 641}
642#endif
628 643
629int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, 644int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
630 unsigned long address, pmd_t *pmd, 645 unsigned long address, pmd_t *pmd,
@@ -639,7 +654,8 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
639 return VM_FAULT_OOM; 654 return VM_FAULT_OOM;
640 if (unlikely(khugepaged_enter(vma))) 655 if (unlikely(khugepaged_enter(vma)))
641 return VM_FAULT_OOM; 656 return VM_FAULT_OOM;
642 page = alloc_hugepage(transparent_hugepage_defrag(vma)); 657 page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
658 vma, haddr);
643 if (unlikely(!page)) 659 if (unlikely(!page))
644 goto out; 660 goto out;
645 if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { 661 if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
@@ -862,7 +878,8 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
862 878
863 if (transparent_hugepage_enabled(vma) && 879 if (transparent_hugepage_enabled(vma) &&
864 !transparent_hugepage_debug_cow()) 880 !transparent_hugepage_debug_cow())
865 new_page = alloc_hugepage(transparent_hugepage_defrag(vma)); 881 new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
882 vma, haddr);
866 else 883 else
867 new_page = NULL; 884 new_page = NULL;
868 885
@@ -1661,7 +1678,11 @@ static void collapse_huge_page(struct mm_struct *mm,
1661 unsigned long hstart, hend; 1678 unsigned long hstart, hend;
1662 1679
1663 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 1680 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
1681#ifndef CONFIG_NUMA
1664 VM_BUG_ON(!*hpage); 1682 VM_BUG_ON(!*hpage);
1683#else
1684 VM_BUG_ON(*hpage);
1685#endif
1665 1686
1666 /* 1687 /*
1667 * Prevent all access to pagetables with the exception of 1688 * Prevent all access to pagetables with the exception of
@@ -1699,9 +1720,17 @@ static void collapse_huge_page(struct mm_struct *mm,
1699 if (!pmd_present(*pmd) || pmd_trans_huge(*pmd)) 1720 if (!pmd_present(*pmd) || pmd_trans_huge(*pmd))
1700 goto out; 1721 goto out;
1701 1722
1723#ifndef CONFIG_NUMA
1702 new_page = *hpage; 1724 new_page = *hpage;
1703 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) 1725#else
1726 new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address);
1727 if (unlikely(!new_page)) {
1728 *hpage = ERR_PTR(-ENOMEM);
1704 goto out; 1729 goto out;
1730 }
1731#endif
1732 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)))
1733 goto out_put_page;
1705 1734
1706 anon_vma_lock(vma->anon_vma); 1735 anon_vma_lock(vma->anon_vma);
1707 1736
@@ -1730,7 +1759,7 @@ static void collapse_huge_page(struct mm_struct *mm,
1730 spin_unlock(&mm->page_table_lock); 1759 spin_unlock(&mm->page_table_lock);
1731 anon_vma_unlock(vma->anon_vma); 1760 anon_vma_unlock(vma->anon_vma);
1732 mem_cgroup_uncharge_page(new_page); 1761 mem_cgroup_uncharge_page(new_page);
1733 goto out; 1762 goto out_put_page;
1734 } 1763 }
1735 1764
1736 /* 1765 /*
@@ -1765,10 +1794,19 @@ static void collapse_huge_page(struct mm_struct *mm,
1765 mm->nr_ptes--; 1794 mm->nr_ptes--;
1766 spin_unlock(&mm->page_table_lock); 1795 spin_unlock(&mm->page_table_lock);
1767 1796
1797#ifndef CONFIG_NUMA
1768 *hpage = NULL; 1798 *hpage = NULL;
1799#endif
1769 khugepaged_pages_collapsed++; 1800 khugepaged_pages_collapsed++;
1770out: 1801out:
1771 up_write(&mm->mmap_sem); 1802 up_write(&mm->mmap_sem);
1803 return;
1804
1805out_put_page:
1806#ifdef CONFIG_NUMA
1807 put_page(new_page);
1808#endif
1809 goto out;
1772} 1810}
1773 1811
1774static int khugepaged_scan_pmd(struct mm_struct *mm, 1812static int khugepaged_scan_pmd(struct mm_struct *mm,
@@ -2001,11 +2039,16 @@ static void khugepaged_do_scan(struct page **hpage)
2001 while (progress < pages) { 2039 while (progress < pages) {
2002 cond_resched(); 2040 cond_resched();
2003 2041
2042#ifndef CONFIG_NUMA
2004 if (!*hpage) { 2043 if (!*hpage) {
2005 *hpage = alloc_hugepage(khugepaged_defrag()); 2044 *hpage = alloc_hugepage(khugepaged_defrag());
2006 if (unlikely(!*hpage)) 2045 if (unlikely(!*hpage))
2007 break; 2046 break;
2008 } 2047 }
2048#else
2049 if (IS_ERR(*hpage))
2050 break;
2051#endif
2009 2052
2010 spin_lock(&khugepaged_mm_lock); 2053 spin_lock(&khugepaged_mm_lock);
2011 if (!khugepaged_scan.mm_slot) 2054 if (!khugepaged_scan.mm_slot)
@@ -2020,37 +2063,55 @@ static void khugepaged_do_scan(struct page **hpage)
2020 } 2063 }
2021} 2064}
2022 2065
2066static void khugepaged_alloc_sleep(void)
2067{
2068 DEFINE_WAIT(wait);
2069 add_wait_queue(&khugepaged_wait, &wait);
2070 schedule_timeout_interruptible(
2071 msecs_to_jiffies(
2072 khugepaged_alloc_sleep_millisecs));
2073 remove_wait_queue(&khugepaged_wait, &wait);
2074}
2075
2076#ifndef CONFIG_NUMA
2023static struct page *khugepaged_alloc_hugepage(void) 2077static struct page *khugepaged_alloc_hugepage(void)
2024{ 2078{
2025 struct page *hpage; 2079 struct page *hpage;
2026 2080
2027 do { 2081 do {
2028 hpage = alloc_hugepage(khugepaged_defrag()); 2082 hpage = alloc_hugepage(khugepaged_defrag());
2029 if (!hpage) { 2083 if (!hpage)
2030 DEFINE_WAIT(wait); 2084 khugepaged_alloc_sleep();
2031 add_wait_queue(&khugepaged_wait, &wait);
2032 schedule_timeout_interruptible(
2033 msecs_to_jiffies(
2034 khugepaged_alloc_sleep_millisecs));
2035 remove_wait_queue(&khugepaged_wait, &wait);
2036 }
2037 } while (unlikely(!hpage) && 2085 } while (unlikely(!hpage) &&
2038 likely(khugepaged_enabled())); 2086 likely(khugepaged_enabled()));
2039 return hpage; 2087 return hpage;
2040} 2088}
2089#endif
2041 2090
2042static void khugepaged_loop(void) 2091static void khugepaged_loop(void)
2043{ 2092{
2044 struct page *hpage; 2093 struct page *hpage;
2045 2094
2095#ifdef CONFIG_NUMA
2096 hpage = NULL;
2097#endif
2046 while (likely(khugepaged_enabled())) { 2098 while (likely(khugepaged_enabled())) {
2099#ifndef CONFIG_NUMA
2047 hpage = khugepaged_alloc_hugepage(); 2100 hpage = khugepaged_alloc_hugepage();
2048 if (unlikely(!hpage)) 2101 if (unlikely(!hpage))
2049 break; 2102 break;
2103#else
2104 if (IS_ERR(hpage)) {
2105 khugepaged_alloc_sleep();
2106 hpage = NULL;
2107 }
2108#endif
2050 2109
2051 khugepaged_do_scan(&hpage); 2110 khugepaged_do_scan(&hpage);
2111#ifndef CONFIG_NUMA
2052 if (hpage) 2112 if (hpage)
2053 put_page(hpage); 2113 put_page(hpage);
2114#endif
2054 if (khugepaged_has_work()) { 2115 if (khugepaged_has_work()) {
2055 DEFINE_WAIT(wait); 2116 DEFINE_WAIT(wait);
2056 if (!khugepaged_scan_sleep_millisecs) 2117 if (!khugepaged_scan_sleep_millisecs)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 83b7df309fc4..368fc9d23610 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1796,7 +1796,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
1796} 1796}
1797 1797
1798/** 1798/**
1799 * alloc_page_vma - Allocate a page for a VMA. 1799 * alloc_pages_vma - Allocate a page for a VMA.
1800 * 1800 *
1801 * @gfp: 1801 * @gfp:
1802 * %GFP_USER user allocation. 1802 * %GFP_USER user allocation.
@@ -1805,6 +1805,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
1805 * %GFP_FS allocation should not call back into a file system. 1805 * %GFP_FS allocation should not call back into a file system.
1806 * %GFP_ATOMIC don't sleep. 1806 * %GFP_ATOMIC don't sleep.
1807 * 1807 *
1808 * @order:Order of the GFP allocation.
1808 * @vma: Pointer to VMA or NULL if not available. 1809 * @vma: Pointer to VMA or NULL if not available.
1809 * @addr: Virtual Address of the allocation. Must be inside the VMA. 1810 * @addr: Virtual Address of the allocation. Must be inside the VMA.
1810 * 1811 *
@@ -1818,7 +1819,8 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
1818 * Should be called with the mm_sem of the vma hold. 1819 * Should be called with the mm_sem of the vma hold.
1819 */ 1820 */
1820struct page * 1821struct page *
1821alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) 1822alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
1823 unsigned long addr)
1822{ 1824{
1823 struct mempolicy *pol = get_vma_policy(current, vma, addr); 1825 struct mempolicy *pol = get_vma_policy(current, vma, addr);
1824 struct zonelist *zl; 1826 struct zonelist *zl;
@@ -1830,7 +1832,7 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
1830 1832
1831 nid = interleave_nid(pol, vma, addr, PAGE_SHIFT); 1833 nid = interleave_nid(pol, vma, addr, PAGE_SHIFT);
1832 mpol_cond_put(pol); 1834 mpol_cond_put(pol);
1833 page = alloc_page_interleave(gfp, 0, nid); 1835 page = alloc_page_interleave(gfp, order, nid);
1834 put_mems_allowed(); 1836 put_mems_allowed();
1835 return page; 1837 return page;
1836 } 1838 }
@@ -1839,7 +1841,7 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
1839 /* 1841 /*
1840 * slow path: ref counted shared policy 1842 * slow path: ref counted shared policy
1841 */ 1843 */
1842 struct page *page = __alloc_pages_nodemask(gfp, 0, 1844 struct page *page = __alloc_pages_nodemask(gfp, order,
1843 zl, policy_nodemask(gfp, pol)); 1845 zl, policy_nodemask(gfp, pol));
1844 __mpol_put(pol); 1846 __mpol_put(pol);
1845 put_mems_allowed(); 1847 put_mems_allowed();
@@ -1848,7 +1850,8 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
1848 /* 1850 /*
1849 * fast path: default or task policy 1851 * fast path: default or task policy
1850 */ 1852 */
1851 page = __alloc_pages_nodemask(gfp, 0, zl, policy_nodemask(gfp, pol)); 1853 page = __alloc_pages_nodemask(gfp, order, zl,
1854 policy_nodemask(gfp, pol));
1852 put_mems_allowed(); 1855 put_mems_allowed();
1853 return page; 1856 return page;
1854} 1857}