aboutsummaryrefslogtreecommitdiffstats
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c78
1 files changed, 56 insertions, 22 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2612f60f53ee..0556c6a44959 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -27,11 +27,12 @@
27#include "internal.h" 27#include "internal.h"
28 28
29/* 29/*
30 * By default transparent hugepage support is enabled for all mappings 30 * By default transparent hugepage support is disabled in order that avoid
31 * and khugepaged scans all mappings. Defrag is only invoked by 31 * to risk increase the memory footprint of applications without a guaranteed
32 * khugepaged hugepage allocations and by page faults inside 32 * benefit. When transparent hugepage support is enabled, is for all mappings,
33 * MADV_HUGEPAGE regions to avoid the risk of slowing down short lived 33 * and khugepaged scans all mappings.
34 * allocations. 34 * Defrag is invoked by khugepaged hugepage allocations and by page faults
35 * for all hugepage allocations.
35 */ 36 */
36unsigned long transparent_hugepage_flags __read_mostly = 37unsigned long transparent_hugepage_flags __read_mostly =
37#ifdef CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS 38#ifdef CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS
@@ -758,14 +759,6 @@ static inline struct page *alloc_hugepage_vma(int defrag,
758 HPAGE_PMD_ORDER, vma, haddr, nd); 759 HPAGE_PMD_ORDER, vma, haddr, nd);
759} 760}
760 761
761#ifndef CONFIG_NUMA
762static inline struct page *alloc_hugepage(int defrag)
763{
764 return alloc_pages(alloc_hugepage_gfpmask(defrag, 0),
765 HPAGE_PMD_ORDER);
766}
767#endif
768
769static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, 762static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
770 struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd, 763 struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd,
771 struct page *zero_page) 764 struct page *zero_page)
@@ -2198,7 +2191,34 @@ static void khugepaged_alloc_sleep(void)
2198 msecs_to_jiffies(khugepaged_alloc_sleep_millisecs)); 2191 msecs_to_jiffies(khugepaged_alloc_sleep_millisecs));
2199} 2192}
2200 2193
2194static int khugepaged_node_load[MAX_NUMNODES];
2195
2201#ifdef CONFIG_NUMA 2196#ifdef CONFIG_NUMA
2197static int khugepaged_find_target_node(void)
2198{
2199 static int last_khugepaged_target_node = NUMA_NO_NODE;
2200 int nid, target_node = 0, max_value = 0;
2201
2202 /* find first node with max normal pages hit */
2203 for (nid = 0; nid < MAX_NUMNODES; nid++)
2204 if (khugepaged_node_load[nid] > max_value) {
2205 max_value = khugepaged_node_load[nid];
2206 target_node = nid;
2207 }
2208
2209 /* do some balance if several nodes have the same hit record */
2210 if (target_node <= last_khugepaged_target_node)
2211 for (nid = last_khugepaged_target_node + 1; nid < MAX_NUMNODES;
2212 nid++)
2213 if (max_value == khugepaged_node_load[nid]) {
2214 target_node = nid;
2215 break;
2216 }
2217
2218 last_khugepaged_target_node = target_node;
2219 return target_node;
2220}
2221
2202static bool khugepaged_prealloc_page(struct page **hpage, bool *wait) 2222static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
2203{ 2223{
2204 if (IS_ERR(*hpage)) { 2224 if (IS_ERR(*hpage)) {
@@ -2232,9 +2252,8 @@ static struct page
2232 * mmap_sem in read mode is good idea also to allow greater 2252 * mmap_sem in read mode is good idea also to allow greater
2233 * scalability. 2253 * scalability.
2234 */ 2254 */
2235 *hpage = alloc_hugepage_vma(khugepaged_defrag(), vma, address, 2255 *hpage = alloc_pages_exact_node(node, alloc_hugepage_gfpmask(
2236 node, __GFP_OTHER_NODE); 2256 khugepaged_defrag(), __GFP_OTHER_NODE), HPAGE_PMD_ORDER);
2237
2238 /* 2257 /*
2239 * After allocating the hugepage, release the mmap_sem read lock in 2258 * After allocating the hugepage, release the mmap_sem read lock in
2240 * preparation for taking it in write mode. 2259 * preparation for taking it in write mode.
@@ -2250,6 +2269,17 @@ static struct page
2250 return *hpage; 2269 return *hpage;
2251} 2270}
2252#else 2271#else
2272static int khugepaged_find_target_node(void)
2273{
2274 return 0;
2275}
2276
2277static inline struct page *alloc_hugepage(int defrag)
2278{
2279 return alloc_pages(alloc_hugepage_gfpmask(defrag, 0),
2280 HPAGE_PMD_ORDER);
2281}
2282
2253static struct page *khugepaged_alloc_hugepage(bool *wait) 2283static struct page *khugepaged_alloc_hugepage(bool *wait)
2254{ 2284{
2255 struct page *hpage; 2285 struct page *hpage;
@@ -2456,6 +2486,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
2456 if (pmd_trans_huge(*pmd)) 2486 if (pmd_trans_huge(*pmd))
2457 goto out; 2487 goto out;
2458 2488
2489 memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load));
2459 pte = pte_offset_map_lock(mm, pmd, address, &ptl); 2490 pte = pte_offset_map_lock(mm, pmd, address, &ptl);
2460 for (_address = address, _pte = pte; _pte < pte+HPAGE_PMD_NR; 2491 for (_address = address, _pte = pte; _pte < pte+HPAGE_PMD_NR;
2461 _pte++, _address += PAGE_SIZE) { 2492 _pte++, _address += PAGE_SIZE) {
@@ -2472,12 +2503,13 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
2472 if (unlikely(!page)) 2503 if (unlikely(!page))
2473 goto out_unmap; 2504 goto out_unmap;
2474 /* 2505 /*
2475 * Chose the node of the first page. This could 2506 * Record which node the original page is from and save this
2476 * be more sophisticated and look at more pages, 2507 * information to khugepaged_node_load[].
2477 * but isn't for now. 2508 * Khupaged will allocate hugepage from the node has the max
2509 * hit record.
2478 */ 2510 */
2479 if (node == NUMA_NO_NODE) 2511 node = page_to_nid(page);
2480 node = page_to_nid(page); 2512 khugepaged_node_load[node]++;
2481 VM_BUG_ON(PageCompound(page)); 2513 VM_BUG_ON(PageCompound(page));
2482 if (!PageLRU(page) || PageLocked(page) || !PageAnon(page)) 2514 if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))
2483 goto out_unmap; 2515 goto out_unmap;
@@ -2492,9 +2524,11 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
2492 ret = 1; 2524 ret = 1;
2493out_unmap: 2525out_unmap:
2494 pte_unmap_unlock(pte, ptl); 2526 pte_unmap_unlock(pte, ptl);
2495 if (ret) 2527 if (ret) {
2528 node = khugepaged_find_target_node();
2496 /* collapse_huge_page will return with the mmap_sem released */ 2529 /* collapse_huge_page will return with the mmap_sem released */
2497 collapse_huge_page(mm, address, hpage, vma, node); 2530 collapse_huge_page(mm, address, hpage, vma, node);
2531 }
2498out: 2532out:
2499 return ret; 2533 return ret;
2500} 2534}