aboutsummaryrefslogtreecommitdiffstats
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
authorAndrea Arcangeli <aarcange@redhat.com>2011-01-13 18:47:05 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 20:32:45 -0500
commit0bbbc0b33d141f78a0d9218a54a47f50621220d3 (patch)
tree3ef3363c189ac536926119731eb86dcf989f4adb /mm/huge_memory.c
parentd39d33c332c611094f84cee39715866f4cbf79e2 (diff)
thp: add numa awareness to hugepage allocations
It's mostly a matter of replacing alloc_pages with alloc_pages_vma after introducing alloc_pages_vma. khugepaged needs special handling as the allocation has to happen inside collapse_huge_page where the vma is known and an error has to be returned to the outer loop to sleep alloc_sleep_millisecs in case of failure. But it retains the more efficient logic of handling allocation failures in khugepaged in case of CONFIG_NUMA=n. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c87
1 files changed, 74 insertions, 13 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 0415a83afd66..f6559e7711bd 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -620,11 +620,26 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
620 return ret; 620 return ret;
621} 621}
622 622
623static inline gfp_t alloc_hugepage_gfpmask(int defrag)
624{
625 return GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT);
626}
627
628static inline struct page *alloc_hugepage_vma(int defrag,
629 struct vm_area_struct *vma,
630 unsigned long haddr)
631{
632 return alloc_pages_vma(alloc_hugepage_gfpmask(defrag),
633 HPAGE_PMD_ORDER, vma, haddr);
634}
635
636#ifndef CONFIG_NUMA
623static inline struct page *alloc_hugepage(int defrag) 637static inline struct page *alloc_hugepage(int defrag)
624{ 638{
625 return alloc_pages(GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT), 639 return alloc_pages(alloc_hugepage_gfpmask(defrag),
626 HPAGE_PMD_ORDER); 640 HPAGE_PMD_ORDER);
627} 641}
642#endif
628 643
629int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, 644int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
630 unsigned long address, pmd_t *pmd, 645 unsigned long address, pmd_t *pmd,
@@ -639,7 +654,8 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
639 return VM_FAULT_OOM; 654 return VM_FAULT_OOM;
640 if (unlikely(khugepaged_enter(vma))) 655 if (unlikely(khugepaged_enter(vma)))
641 return VM_FAULT_OOM; 656 return VM_FAULT_OOM;
642 page = alloc_hugepage(transparent_hugepage_defrag(vma)); 657 page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
658 vma, haddr);
643 if (unlikely(!page)) 659 if (unlikely(!page))
644 goto out; 660 goto out;
645 if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { 661 if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
@@ -862,7 +878,8 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
862 878
863 if (transparent_hugepage_enabled(vma) && 879 if (transparent_hugepage_enabled(vma) &&
864 !transparent_hugepage_debug_cow()) 880 !transparent_hugepage_debug_cow())
865 new_page = alloc_hugepage(transparent_hugepage_defrag(vma)); 881 new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
882 vma, haddr);
866 else 883 else
867 new_page = NULL; 884 new_page = NULL;
868 885
@@ -1661,7 +1678,11 @@ static void collapse_huge_page(struct mm_struct *mm,
1661 unsigned long hstart, hend; 1678 unsigned long hstart, hend;
1662 1679
1663 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 1680 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
1681#ifndef CONFIG_NUMA
1664 VM_BUG_ON(!*hpage); 1682 VM_BUG_ON(!*hpage);
1683#else
1684 VM_BUG_ON(*hpage);
1685#endif
1665 1686
1666 /* 1687 /*
1667 * Prevent all access to pagetables with the exception of 1688 * Prevent all access to pagetables with the exception of
@@ -1699,9 +1720,17 @@ static void collapse_huge_page(struct mm_struct *mm,
1699 if (!pmd_present(*pmd) || pmd_trans_huge(*pmd)) 1720 if (!pmd_present(*pmd) || pmd_trans_huge(*pmd))
1700 goto out; 1721 goto out;
1701 1722
1723#ifndef CONFIG_NUMA
1702 new_page = *hpage; 1724 new_page = *hpage;
1703 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) 1725#else
1726 new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address);
1727 if (unlikely(!new_page)) {
1728 *hpage = ERR_PTR(-ENOMEM);
1704 goto out; 1729 goto out;
1730 }
1731#endif
1732 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)))
1733 goto out_put_page;
1705 1734
1706 anon_vma_lock(vma->anon_vma); 1735 anon_vma_lock(vma->anon_vma);
1707 1736
@@ -1730,7 +1759,7 @@ static void collapse_huge_page(struct mm_struct *mm,
1730 spin_unlock(&mm->page_table_lock); 1759 spin_unlock(&mm->page_table_lock);
1731 anon_vma_unlock(vma->anon_vma); 1760 anon_vma_unlock(vma->anon_vma);
1732 mem_cgroup_uncharge_page(new_page); 1761 mem_cgroup_uncharge_page(new_page);
1733 goto out; 1762 goto out_put_page;
1734 } 1763 }
1735 1764
1736 /* 1765 /*
@@ -1765,10 +1794,19 @@ static void collapse_huge_page(struct mm_struct *mm,
1765 mm->nr_ptes--; 1794 mm->nr_ptes--;
1766 spin_unlock(&mm->page_table_lock); 1795 spin_unlock(&mm->page_table_lock);
1767 1796
1797#ifndef CONFIG_NUMA
1768 *hpage = NULL; 1798 *hpage = NULL;
1799#endif
1769 khugepaged_pages_collapsed++; 1800 khugepaged_pages_collapsed++;
1770out: 1801out:
1771 up_write(&mm->mmap_sem); 1802 up_write(&mm->mmap_sem);
1803 return;
1804
1805out_put_page:
1806#ifdef CONFIG_NUMA
1807 put_page(new_page);
1808#endif
1809 goto out;
1772} 1810}
1773 1811
1774static int khugepaged_scan_pmd(struct mm_struct *mm, 1812static int khugepaged_scan_pmd(struct mm_struct *mm,
@@ -2001,11 +2039,16 @@ static void khugepaged_do_scan(struct page **hpage)
2001 while (progress < pages) { 2039 while (progress < pages) {
2002 cond_resched(); 2040 cond_resched();
2003 2041
2042#ifndef CONFIG_NUMA
2004 if (!*hpage) { 2043 if (!*hpage) {
2005 *hpage = alloc_hugepage(khugepaged_defrag()); 2044 *hpage = alloc_hugepage(khugepaged_defrag());
2006 if (unlikely(!*hpage)) 2045 if (unlikely(!*hpage))
2007 break; 2046 break;
2008 } 2047 }
2048#else
2049 if (IS_ERR(*hpage))
2050 break;
2051#endif
2009 2052
2010 spin_lock(&khugepaged_mm_lock); 2053 spin_lock(&khugepaged_mm_lock);
2011 if (!khugepaged_scan.mm_slot) 2054 if (!khugepaged_scan.mm_slot)
@@ -2020,37 +2063,55 @@ static void khugepaged_do_scan(struct page **hpage)
2020 } 2063 }
2021} 2064}
2022 2065
2066static void khugepaged_alloc_sleep(void)
2067{
2068 DEFINE_WAIT(wait);
2069 add_wait_queue(&khugepaged_wait, &wait);
2070 schedule_timeout_interruptible(
2071 msecs_to_jiffies(
2072 khugepaged_alloc_sleep_millisecs));
2073 remove_wait_queue(&khugepaged_wait, &wait);
2074}
2075
2076#ifndef CONFIG_NUMA
2023static struct page *khugepaged_alloc_hugepage(void) 2077static struct page *khugepaged_alloc_hugepage(void)
2024{ 2078{
2025 struct page *hpage; 2079 struct page *hpage;
2026 2080
2027 do { 2081 do {
2028 hpage = alloc_hugepage(khugepaged_defrag()); 2082 hpage = alloc_hugepage(khugepaged_defrag());
2029 if (!hpage) { 2083 if (!hpage)
2030 DEFINE_WAIT(wait); 2084 khugepaged_alloc_sleep();
2031 add_wait_queue(&khugepaged_wait, &wait);
2032 schedule_timeout_interruptible(
2033 msecs_to_jiffies(
2034 khugepaged_alloc_sleep_millisecs));
2035 remove_wait_queue(&khugepaged_wait, &wait);
2036 }
2037 } while (unlikely(!hpage) && 2085 } while (unlikely(!hpage) &&
2038 likely(khugepaged_enabled())); 2086 likely(khugepaged_enabled()));
2039 return hpage; 2087 return hpage;
2040} 2088}
2089#endif
2041 2090
2042static void khugepaged_loop(void) 2091static void khugepaged_loop(void)
2043{ 2092{
2044 struct page *hpage; 2093 struct page *hpage;
2045 2094
2095#ifdef CONFIG_NUMA
2096 hpage = NULL;
2097#endif
2046 while (likely(khugepaged_enabled())) { 2098 while (likely(khugepaged_enabled())) {
2099#ifndef CONFIG_NUMA
2047 hpage = khugepaged_alloc_hugepage(); 2100 hpage = khugepaged_alloc_hugepage();
2048 if (unlikely(!hpage)) 2101 if (unlikely(!hpage))
2049 break; 2102 break;
2103#else
2104 if (IS_ERR(hpage)) {
2105 khugepaged_alloc_sleep();
2106 hpage = NULL;
2107 }
2108#endif
2050 2109
2051 khugepaged_do_scan(&hpage); 2110 khugepaged_do_scan(&hpage);
2111#ifndef CONFIG_NUMA
2052 if (hpage) 2112 if (hpage)
2053 put_page(hpage); 2113 put_page(hpage);
2114#endif
2054 if (khugepaged_has_work()) { 2115 if (khugepaged_has_work()) {
2055 DEFINE_WAIT(wait); 2116 DEFINE_WAIT(wait);
2056 if (!khugepaged_scan_sleep_millisecs) 2117 if (!khugepaged_scan_sleep_millisecs)