diff options
author | Andrea Arcangeli <aarcange@redhat.com> | 2011-01-13 18:47:05 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-13 20:32:45 -0500 |
commit | 0bbbc0b33d141f78a0d9218a54a47f50621220d3 (patch) | |
tree | 3ef3363c189ac536926119731eb86dcf989f4adb /mm/huge_memory.c | |
parent | d39d33c332c611094f84cee39715866f4cbf79e2 (diff) |
thp: add numa awareness to hugepage allocations
It's mostly a matter of replacing alloc_pages with alloc_pages_vma after
introducing alloc_pages_vma. khugepaged needs special handling as the
allocation has to happen inside collapse_huge_page where the vma is known
and an error has to be returned to the outer loop to sleep
alloc_sleep_millisecs in case of failure. But it retains the more
efficient logic of handling allocation failures in khugepaged in case of
CONFIG_NUMA=n.
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 87 |
1 files changed, 74 insertions, 13 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 0415a83afd66..f6559e7711bd 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -620,11 +620,26 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, | |||
620 | return ret; | 620 | return ret; |
621 | } | 621 | } |
622 | 622 | ||
623 | static inline gfp_t alloc_hugepage_gfpmask(int defrag) | ||
624 | { | ||
625 | return GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT); | ||
626 | } | ||
627 | |||
628 | static inline struct page *alloc_hugepage_vma(int defrag, | ||
629 | struct vm_area_struct *vma, | ||
630 | unsigned long haddr) | ||
631 | { | ||
632 | return alloc_pages_vma(alloc_hugepage_gfpmask(defrag), | ||
633 | HPAGE_PMD_ORDER, vma, haddr); | ||
634 | } | ||
635 | |||
636 | #ifndef CONFIG_NUMA | ||
623 | static inline struct page *alloc_hugepage(int defrag) | 637 | static inline struct page *alloc_hugepage(int defrag) |
624 | { | 638 | { |
625 | return alloc_pages(GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT), | 639 | return alloc_pages(alloc_hugepage_gfpmask(defrag), |
626 | HPAGE_PMD_ORDER); | 640 | HPAGE_PMD_ORDER); |
627 | } | 641 | } |
642 | #endif | ||
628 | 643 | ||
629 | int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | 644 | int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, |
630 | unsigned long address, pmd_t *pmd, | 645 | unsigned long address, pmd_t *pmd, |
@@ -639,7 +654,8 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
639 | return VM_FAULT_OOM; | 654 | return VM_FAULT_OOM; |
640 | if (unlikely(khugepaged_enter(vma))) | 655 | if (unlikely(khugepaged_enter(vma))) |
641 | return VM_FAULT_OOM; | 656 | return VM_FAULT_OOM; |
642 | page = alloc_hugepage(transparent_hugepage_defrag(vma)); | 657 | page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), |
658 | vma, haddr); | ||
643 | if (unlikely(!page)) | 659 | if (unlikely(!page)) |
644 | goto out; | 660 | goto out; |
645 | if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { | 661 | if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { |
@@ -862,7 +878,8 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
862 | 878 | ||
863 | if (transparent_hugepage_enabled(vma) && | 879 | if (transparent_hugepage_enabled(vma) && |
864 | !transparent_hugepage_debug_cow()) | 880 | !transparent_hugepage_debug_cow()) |
865 | new_page = alloc_hugepage(transparent_hugepage_defrag(vma)); | 881 | new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), |
882 | vma, haddr); | ||
866 | else | 883 | else |
867 | new_page = NULL; | 884 | new_page = NULL; |
868 | 885 | ||
@@ -1661,7 +1678,11 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1661 | unsigned long hstart, hend; | 1678 | unsigned long hstart, hend; |
1662 | 1679 | ||
1663 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | 1680 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); |
1681 | #ifndef CONFIG_NUMA | ||
1664 | VM_BUG_ON(!*hpage); | 1682 | VM_BUG_ON(!*hpage); |
1683 | #else | ||
1684 | VM_BUG_ON(*hpage); | ||
1685 | #endif | ||
1665 | 1686 | ||
1666 | /* | 1687 | /* |
1667 | * Prevent all access to pagetables with the exception of | 1688 | * Prevent all access to pagetables with the exception of |
@@ -1699,9 +1720,17 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1699 | if (!pmd_present(*pmd) || pmd_trans_huge(*pmd)) | 1720 | if (!pmd_present(*pmd) || pmd_trans_huge(*pmd)) |
1700 | goto out; | 1721 | goto out; |
1701 | 1722 | ||
1723 | #ifndef CONFIG_NUMA | ||
1702 | new_page = *hpage; | 1724 | new_page = *hpage; |
1703 | if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) | 1725 | #else |
1726 | new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address); | ||
1727 | if (unlikely(!new_page)) { | ||
1728 | *hpage = ERR_PTR(-ENOMEM); | ||
1704 | goto out; | 1729 | goto out; |
1730 | } | ||
1731 | #endif | ||
1732 | if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) | ||
1733 | goto out_put_page; | ||
1705 | 1734 | ||
1706 | anon_vma_lock(vma->anon_vma); | 1735 | anon_vma_lock(vma->anon_vma); |
1707 | 1736 | ||
@@ -1730,7 +1759,7 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1730 | spin_unlock(&mm->page_table_lock); | 1759 | spin_unlock(&mm->page_table_lock); |
1731 | anon_vma_unlock(vma->anon_vma); | 1760 | anon_vma_unlock(vma->anon_vma); |
1732 | mem_cgroup_uncharge_page(new_page); | 1761 | mem_cgroup_uncharge_page(new_page); |
1733 | goto out; | 1762 | goto out_put_page; |
1734 | } | 1763 | } |
1735 | 1764 | ||
1736 | /* | 1765 | /* |
@@ -1765,10 +1794,19 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1765 | mm->nr_ptes--; | 1794 | mm->nr_ptes--; |
1766 | spin_unlock(&mm->page_table_lock); | 1795 | spin_unlock(&mm->page_table_lock); |
1767 | 1796 | ||
1797 | #ifndef CONFIG_NUMA | ||
1768 | *hpage = NULL; | 1798 | *hpage = NULL; |
1799 | #endif | ||
1769 | khugepaged_pages_collapsed++; | 1800 | khugepaged_pages_collapsed++; |
1770 | out: | 1801 | out: |
1771 | up_write(&mm->mmap_sem); | 1802 | up_write(&mm->mmap_sem); |
1803 | return; | ||
1804 | |||
1805 | out_put_page: | ||
1806 | #ifdef CONFIG_NUMA | ||
1807 | put_page(new_page); | ||
1808 | #endif | ||
1809 | goto out; | ||
1772 | } | 1810 | } |
1773 | 1811 | ||
1774 | static int khugepaged_scan_pmd(struct mm_struct *mm, | 1812 | static int khugepaged_scan_pmd(struct mm_struct *mm, |
@@ -2001,11 +2039,16 @@ static void khugepaged_do_scan(struct page **hpage) | |||
2001 | while (progress < pages) { | 2039 | while (progress < pages) { |
2002 | cond_resched(); | 2040 | cond_resched(); |
2003 | 2041 | ||
2042 | #ifndef CONFIG_NUMA | ||
2004 | if (!*hpage) { | 2043 | if (!*hpage) { |
2005 | *hpage = alloc_hugepage(khugepaged_defrag()); | 2044 | *hpage = alloc_hugepage(khugepaged_defrag()); |
2006 | if (unlikely(!*hpage)) | 2045 | if (unlikely(!*hpage)) |
2007 | break; | 2046 | break; |
2008 | } | 2047 | } |
2048 | #else | ||
2049 | if (IS_ERR(*hpage)) | ||
2050 | break; | ||
2051 | #endif | ||
2009 | 2052 | ||
2010 | spin_lock(&khugepaged_mm_lock); | 2053 | spin_lock(&khugepaged_mm_lock); |
2011 | if (!khugepaged_scan.mm_slot) | 2054 | if (!khugepaged_scan.mm_slot) |
@@ -2020,37 +2063,55 @@ static void khugepaged_do_scan(struct page **hpage) | |||
2020 | } | 2063 | } |
2021 | } | 2064 | } |
2022 | 2065 | ||
2066 | static void khugepaged_alloc_sleep(void) | ||
2067 | { | ||
2068 | DEFINE_WAIT(wait); | ||
2069 | add_wait_queue(&khugepaged_wait, &wait); | ||
2070 | schedule_timeout_interruptible( | ||
2071 | msecs_to_jiffies( | ||
2072 | khugepaged_alloc_sleep_millisecs)); | ||
2073 | remove_wait_queue(&khugepaged_wait, &wait); | ||
2074 | } | ||
2075 | |||
2076 | #ifndef CONFIG_NUMA | ||
2023 | static struct page *khugepaged_alloc_hugepage(void) | 2077 | static struct page *khugepaged_alloc_hugepage(void) |
2024 | { | 2078 | { |
2025 | struct page *hpage; | 2079 | struct page *hpage; |
2026 | 2080 | ||
2027 | do { | 2081 | do { |
2028 | hpage = alloc_hugepage(khugepaged_defrag()); | 2082 | hpage = alloc_hugepage(khugepaged_defrag()); |
2029 | if (!hpage) { | 2083 | if (!hpage) |
2030 | DEFINE_WAIT(wait); | 2084 | khugepaged_alloc_sleep(); |
2031 | add_wait_queue(&khugepaged_wait, &wait); | ||
2032 | schedule_timeout_interruptible( | ||
2033 | msecs_to_jiffies( | ||
2034 | khugepaged_alloc_sleep_millisecs)); | ||
2035 | remove_wait_queue(&khugepaged_wait, &wait); | ||
2036 | } | ||
2037 | } while (unlikely(!hpage) && | 2085 | } while (unlikely(!hpage) && |
2038 | likely(khugepaged_enabled())); | 2086 | likely(khugepaged_enabled())); |
2039 | return hpage; | 2087 | return hpage; |
2040 | } | 2088 | } |
2089 | #endif | ||
2041 | 2090 | ||
2042 | static void khugepaged_loop(void) | 2091 | static void khugepaged_loop(void) |
2043 | { | 2092 | { |
2044 | struct page *hpage; | 2093 | struct page *hpage; |
2045 | 2094 | ||
2095 | #ifdef CONFIG_NUMA | ||
2096 | hpage = NULL; | ||
2097 | #endif | ||
2046 | while (likely(khugepaged_enabled())) { | 2098 | while (likely(khugepaged_enabled())) { |
2099 | #ifndef CONFIG_NUMA | ||
2047 | hpage = khugepaged_alloc_hugepage(); | 2100 | hpage = khugepaged_alloc_hugepage(); |
2048 | if (unlikely(!hpage)) | 2101 | if (unlikely(!hpage)) |
2049 | break; | 2102 | break; |
2103 | #else | ||
2104 | if (IS_ERR(hpage)) { | ||
2105 | khugepaged_alloc_sleep(); | ||
2106 | hpage = NULL; | ||
2107 | } | ||
2108 | #endif | ||
2050 | 2109 | ||
2051 | khugepaged_do_scan(&hpage); | 2110 | khugepaged_do_scan(&hpage); |
2111 | #ifndef CONFIG_NUMA | ||
2052 | if (hpage) | 2112 | if (hpage) |
2053 | put_page(hpage); | 2113 | put_page(hpage); |
2114 | #endif | ||
2054 | if (khugepaged_has_work()) { | 2115 | if (khugepaged_has_work()) { |
2055 | DEFINE_WAIT(wait); | 2116 | DEFINE_WAIT(wait); |
2056 | if (!khugepaged_scan_sleep_millisecs) | 2117 | if (!khugepaged_scan_sleep_millisecs) |