diff options
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 78 |
1 files changed, 56 insertions, 22 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2612f60f53ee..0556c6a44959 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -27,11 +27,12 @@ | |||
27 | #include "internal.h" | 27 | #include "internal.h" |
28 | 28 | ||
29 | /* | 29 | /* |
30 | * By default transparent hugepage support is enabled for all mappings | 30 | * By default transparent hugepage support is disabled in order that avoid |
31 | * and khugepaged scans all mappings. Defrag is only invoked by | 31 | * to risk increase the memory footprint of applications without a guaranteed |
32 | * khugepaged hugepage allocations and by page faults inside | 32 | * benefit. When transparent hugepage support is enabled, is for all mappings, |
33 | * MADV_HUGEPAGE regions to avoid the risk of slowing down short lived | 33 | * and khugepaged scans all mappings. |
34 | * allocations. | 34 | * Defrag is invoked by khugepaged hugepage allocations and by page faults |
35 | * for all hugepage allocations. | ||
35 | */ | 36 | */ |
36 | unsigned long transparent_hugepage_flags __read_mostly = | 37 | unsigned long transparent_hugepage_flags __read_mostly = |
37 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS | 38 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS |
@@ -758,14 +759,6 @@ static inline struct page *alloc_hugepage_vma(int defrag, | |||
758 | HPAGE_PMD_ORDER, vma, haddr, nd); | 759 | HPAGE_PMD_ORDER, vma, haddr, nd); |
759 | } | 760 | } |
760 | 761 | ||
761 | #ifndef CONFIG_NUMA | ||
762 | static inline struct page *alloc_hugepage(int defrag) | ||
763 | { | ||
764 | return alloc_pages(alloc_hugepage_gfpmask(defrag, 0), | ||
765 | HPAGE_PMD_ORDER); | ||
766 | } | ||
767 | #endif | ||
768 | |||
769 | static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, | 762 | static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, |
770 | struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd, | 763 | struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd, |
771 | struct page *zero_page) | 764 | struct page *zero_page) |
@@ -2198,7 +2191,34 @@ static void khugepaged_alloc_sleep(void) | |||
2198 | msecs_to_jiffies(khugepaged_alloc_sleep_millisecs)); | 2191 | msecs_to_jiffies(khugepaged_alloc_sleep_millisecs)); |
2199 | } | 2192 | } |
2200 | 2193 | ||
2194 | static int khugepaged_node_load[MAX_NUMNODES]; | ||
2195 | |||
2201 | #ifdef CONFIG_NUMA | 2196 | #ifdef CONFIG_NUMA |
2197 | static int khugepaged_find_target_node(void) | ||
2198 | { | ||
2199 | static int last_khugepaged_target_node = NUMA_NO_NODE; | ||
2200 | int nid, target_node = 0, max_value = 0; | ||
2201 | |||
2202 | /* find first node with max normal pages hit */ | ||
2203 | for (nid = 0; nid < MAX_NUMNODES; nid++) | ||
2204 | if (khugepaged_node_load[nid] > max_value) { | ||
2205 | max_value = khugepaged_node_load[nid]; | ||
2206 | target_node = nid; | ||
2207 | } | ||
2208 | |||
2209 | /* do some balance if several nodes have the same hit record */ | ||
2210 | if (target_node <= last_khugepaged_target_node) | ||
2211 | for (nid = last_khugepaged_target_node + 1; nid < MAX_NUMNODES; | ||
2212 | nid++) | ||
2213 | if (max_value == khugepaged_node_load[nid]) { | ||
2214 | target_node = nid; | ||
2215 | break; | ||
2216 | } | ||
2217 | |||
2218 | last_khugepaged_target_node = target_node; | ||
2219 | return target_node; | ||
2220 | } | ||
2221 | |||
2202 | static bool khugepaged_prealloc_page(struct page **hpage, bool *wait) | 2222 | static bool khugepaged_prealloc_page(struct page **hpage, bool *wait) |
2203 | { | 2223 | { |
2204 | if (IS_ERR(*hpage)) { | 2224 | if (IS_ERR(*hpage)) { |
@@ -2232,9 +2252,8 @@ static struct page | |||
2232 | * mmap_sem in read mode is good idea also to allow greater | 2252 | * mmap_sem in read mode is good idea also to allow greater |
2233 | * scalability. | 2253 | * scalability. |
2234 | */ | 2254 | */ |
2235 | *hpage = alloc_hugepage_vma(khugepaged_defrag(), vma, address, | 2255 | *hpage = alloc_pages_exact_node(node, alloc_hugepage_gfpmask( |
2236 | node, __GFP_OTHER_NODE); | 2256 | khugepaged_defrag(), __GFP_OTHER_NODE), HPAGE_PMD_ORDER); |
2237 | |||
2238 | /* | 2257 | /* |
2239 | * After allocating the hugepage, release the mmap_sem read lock in | 2258 | * After allocating the hugepage, release the mmap_sem read lock in |
2240 | * preparation for taking it in write mode. | 2259 | * preparation for taking it in write mode. |
@@ -2250,6 +2269,17 @@ static struct page | |||
2250 | return *hpage; | 2269 | return *hpage; |
2251 | } | 2270 | } |
2252 | #else | 2271 | #else |
2272 | static int khugepaged_find_target_node(void) | ||
2273 | { | ||
2274 | return 0; | ||
2275 | } | ||
2276 | |||
2277 | static inline struct page *alloc_hugepage(int defrag) | ||
2278 | { | ||
2279 | return alloc_pages(alloc_hugepage_gfpmask(defrag, 0), | ||
2280 | HPAGE_PMD_ORDER); | ||
2281 | } | ||
2282 | |||
2253 | static struct page *khugepaged_alloc_hugepage(bool *wait) | 2283 | static struct page *khugepaged_alloc_hugepage(bool *wait) |
2254 | { | 2284 | { |
2255 | struct page *hpage; | 2285 | struct page *hpage; |
@@ -2456,6 +2486,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, | |||
2456 | if (pmd_trans_huge(*pmd)) | 2486 | if (pmd_trans_huge(*pmd)) |
2457 | goto out; | 2487 | goto out; |
2458 | 2488 | ||
2489 | memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load)); | ||
2459 | pte = pte_offset_map_lock(mm, pmd, address, &ptl); | 2490 | pte = pte_offset_map_lock(mm, pmd, address, &ptl); |
2460 | for (_address = address, _pte = pte; _pte < pte+HPAGE_PMD_NR; | 2491 | for (_address = address, _pte = pte; _pte < pte+HPAGE_PMD_NR; |
2461 | _pte++, _address += PAGE_SIZE) { | 2492 | _pte++, _address += PAGE_SIZE) { |
@@ -2472,12 +2503,13 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, | |||
2472 | if (unlikely(!page)) | 2503 | if (unlikely(!page)) |
2473 | goto out_unmap; | 2504 | goto out_unmap; |
2474 | /* | 2505 | /* |
2475 | * Chose the node of the first page. This could | 2506 | * Record which node the original page is from and save this |
2476 | * be more sophisticated and look at more pages, | 2507 | * information to khugepaged_node_load[]. |
2477 | * but isn't for now. | 2508 | * Khupaged will allocate hugepage from the node has the max |
2509 | * hit record. | ||
2478 | */ | 2510 | */ |
2479 | if (node == NUMA_NO_NODE) | 2511 | node = page_to_nid(page); |
2480 | node = page_to_nid(page); | 2512 | khugepaged_node_load[node]++; |
2481 | VM_BUG_ON(PageCompound(page)); | 2513 | VM_BUG_ON(PageCompound(page)); |
2482 | if (!PageLRU(page) || PageLocked(page) || !PageAnon(page)) | 2514 | if (!PageLRU(page) || PageLocked(page) || !PageAnon(page)) |
2483 | goto out_unmap; | 2515 | goto out_unmap; |
@@ -2492,9 +2524,11 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, | |||
2492 | ret = 1; | 2524 | ret = 1; |
2493 | out_unmap: | 2525 | out_unmap: |
2494 | pte_unmap_unlock(pte, ptl); | 2526 | pte_unmap_unlock(pte, ptl); |
2495 | if (ret) | 2527 | if (ret) { |
2528 | node = khugepaged_find_target_node(); | ||
2496 | /* collapse_huge_page will return with the mmap_sem released */ | 2529 | /* collapse_huge_page will return with the mmap_sem released */ |
2497 | collapse_huge_page(mm, address, hpage, vma, node); | 2530 | collapse_huge_page(mm, address, hpage, vma, node); |
2531 | } | ||
2498 | out: | 2532 | out: |
2499 | return ret; | 2533 | return ret; |
2500 | } | 2534 | } |