aboutsummaryrefslogtreecommitdiffstats
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c86
1 files changed, 49 insertions, 37 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3afb5cbe1312..078832cf3636 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -67,6 +67,7 @@ static unsigned int khugepaged_max_ptes_none __read_mostly = HPAGE_PMD_NR-1;
67 67
68static int khugepaged(void *none); 68static int khugepaged(void *none);
69static int khugepaged_slab_init(void); 69static int khugepaged_slab_init(void);
70static void khugepaged_slab_exit(void);
70 71
71#define MM_SLOTS_HASH_BITS 10 72#define MM_SLOTS_HASH_BITS 10
72static __read_mostly DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS); 73static __read_mostly DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
@@ -109,9 +110,6 @@ static int set_recommended_min_free_kbytes(void)
109 int nr_zones = 0; 110 int nr_zones = 0;
110 unsigned long recommended_min; 111 unsigned long recommended_min;
111 112
112 if (!khugepaged_enabled())
113 return 0;
114
115 for_each_populated_zone(zone) 113 for_each_populated_zone(zone)
116 nr_zones++; 114 nr_zones++;
117 115
@@ -143,9 +141,8 @@ static int set_recommended_min_free_kbytes(void)
143 setup_per_zone_wmarks(); 141 setup_per_zone_wmarks();
144 return 0; 142 return 0;
145} 143}
146late_initcall(set_recommended_min_free_kbytes);
147 144
148static int start_khugepaged(void) 145static int start_stop_khugepaged(void)
149{ 146{
150 int err = 0; 147 int err = 0;
151 if (khugepaged_enabled()) { 148 if (khugepaged_enabled()) {
@@ -156,6 +153,7 @@ static int start_khugepaged(void)
156 pr_err("khugepaged: kthread_run(khugepaged) failed\n"); 153 pr_err("khugepaged: kthread_run(khugepaged) failed\n");
157 err = PTR_ERR(khugepaged_thread); 154 err = PTR_ERR(khugepaged_thread);
158 khugepaged_thread = NULL; 155 khugepaged_thread = NULL;
156 goto fail;
159 } 157 }
160 158
161 if (!list_empty(&khugepaged_scan.mm_head)) 159 if (!list_empty(&khugepaged_scan.mm_head))
@@ -166,7 +164,7 @@ static int start_khugepaged(void)
166 kthread_stop(khugepaged_thread); 164 kthread_stop(khugepaged_thread);
167 khugepaged_thread = NULL; 165 khugepaged_thread = NULL;
168 } 166 }
169 167fail:
170 return err; 168 return err;
171} 169}
172 170
@@ -183,7 +181,7 @@ static struct page *get_huge_zero_page(void)
183 struct page *zero_page; 181 struct page *zero_page;
184retry: 182retry:
185 if (likely(atomic_inc_not_zero(&huge_zero_refcount))) 183 if (likely(atomic_inc_not_zero(&huge_zero_refcount)))
186 return ACCESS_ONCE(huge_zero_page); 184 return READ_ONCE(huge_zero_page);
187 185
188 zero_page = alloc_pages((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE, 186 zero_page = alloc_pages((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE,
189 HPAGE_PMD_ORDER); 187 HPAGE_PMD_ORDER);
@@ -202,7 +200,7 @@ retry:
202 /* We take additional reference here. It will be put back by shrinker */ 200 /* We take additional reference here. It will be put back by shrinker */
203 atomic_set(&huge_zero_refcount, 2); 201 atomic_set(&huge_zero_refcount, 2);
204 preempt_enable(); 202 preempt_enable();
205 return ACCESS_ONCE(huge_zero_page); 203 return READ_ONCE(huge_zero_page);
206} 204}
207 205
208static void put_huge_zero_page(void) 206static void put_huge_zero_page(void)
@@ -300,7 +298,7 @@ static ssize_t enabled_store(struct kobject *kobj,
300 int err; 298 int err;
301 299
302 mutex_lock(&khugepaged_mutex); 300 mutex_lock(&khugepaged_mutex);
303 err = start_khugepaged(); 301 err = start_stop_khugepaged();
304 mutex_unlock(&khugepaged_mutex); 302 mutex_unlock(&khugepaged_mutex);
305 303
306 if (err) 304 if (err)
@@ -634,27 +632,38 @@ static int __init hugepage_init(void)
634 632
635 err = hugepage_init_sysfs(&hugepage_kobj); 633 err = hugepage_init_sysfs(&hugepage_kobj);
636 if (err) 634 if (err)
637 return err; 635 goto err_sysfs;
638 636
639 err = khugepaged_slab_init(); 637 err = khugepaged_slab_init();
640 if (err) 638 if (err)
641 goto out; 639 goto err_slab;
642 640
643 register_shrinker(&huge_zero_page_shrinker); 641 err = register_shrinker(&huge_zero_page_shrinker);
642 if (err)
643 goto err_hzp_shrinker;
644 644
645 /* 645 /*
646 * By default disable transparent hugepages on smaller systems, 646 * By default disable transparent hugepages on smaller systems,
647 * where the extra memory used could hurt more than TLB overhead 647 * where the extra memory used could hurt more than TLB overhead
648 * is likely to save. The admin can still enable it through /sys. 648 * is likely to save. The admin can still enable it through /sys.
649 */ 649 */
650 if (totalram_pages < (512 << (20 - PAGE_SHIFT))) 650 if (totalram_pages < (512 << (20 - PAGE_SHIFT))) {
651 transparent_hugepage_flags = 0; 651 transparent_hugepage_flags = 0;
652 return 0;
653 }
652 654
653 start_khugepaged(); 655 err = start_stop_khugepaged();
656 if (err)
657 goto err_khugepaged;
654 658
655 return 0; 659 return 0;
656out: 660err_khugepaged:
661 unregister_shrinker(&huge_zero_page_shrinker);
662err_hzp_shrinker:
663 khugepaged_slab_exit();
664err_slab:
657 hugepage_exit_sysfs(hugepage_kobj); 665 hugepage_exit_sysfs(hugepage_kobj);
666err_sysfs:
658 return err; 667 return err;
659} 668}
660subsys_initcall(hugepage_init); 669subsys_initcall(hugepage_init);
@@ -708,7 +717,7 @@ static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot)
708static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, 717static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
709 struct vm_area_struct *vma, 718 struct vm_area_struct *vma,
710 unsigned long haddr, pmd_t *pmd, 719 unsigned long haddr, pmd_t *pmd,
711 struct page *page) 720 struct page *page, gfp_t gfp)
712{ 721{
713 struct mem_cgroup *memcg; 722 struct mem_cgroup *memcg;
714 pgtable_t pgtable; 723 pgtable_t pgtable;
@@ -716,7 +725,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
716 725
717 VM_BUG_ON_PAGE(!PageCompound(page), page); 726 VM_BUG_ON_PAGE(!PageCompound(page), page);
718 727
719 if (mem_cgroup_try_charge(page, mm, GFP_TRANSHUGE, &memcg)) 728 if (mem_cgroup_try_charge(page, mm, gfp, &memcg))
720 return VM_FAULT_OOM; 729 return VM_FAULT_OOM;
721 730
722 pgtable = pte_alloc_one(mm, haddr); 731 pgtable = pte_alloc_one(mm, haddr);
@@ -822,7 +831,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
822 count_vm_event(THP_FAULT_FALLBACK); 831 count_vm_event(THP_FAULT_FALLBACK);
823 return VM_FAULT_FALLBACK; 832 return VM_FAULT_FALLBACK;
824 } 833 }
825 if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) { 834 if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page, gfp))) {
826 put_page(page); 835 put_page(page);
827 count_vm_event(THP_FAULT_FALLBACK); 836 count_vm_event(THP_FAULT_FALLBACK);
828 return VM_FAULT_FALLBACK; 837 return VM_FAULT_FALLBACK;
@@ -1080,6 +1089,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1080 unsigned long haddr; 1089 unsigned long haddr;
1081 unsigned long mmun_start; /* For mmu_notifiers */ 1090 unsigned long mmun_start; /* For mmu_notifiers */
1082 unsigned long mmun_end; /* For mmu_notifiers */ 1091 unsigned long mmun_end; /* For mmu_notifiers */
1092 gfp_t huge_gfp; /* for allocation and charge */
1083 1093
1084 ptl = pmd_lockptr(mm, pmd); 1094 ptl = pmd_lockptr(mm, pmd);
1085 VM_BUG_ON_VMA(!vma->anon_vma, vma); 1095 VM_BUG_ON_VMA(!vma->anon_vma, vma);
@@ -1106,10 +1116,8 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1106alloc: 1116alloc:
1107 if (transparent_hugepage_enabled(vma) && 1117 if (transparent_hugepage_enabled(vma) &&
1108 !transparent_hugepage_debug_cow()) { 1118 !transparent_hugepage_debug_cow()) {
1109 gfp_t gfp; 1119 huge_gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0);
1110 1120 new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
1111 gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0);
1112 new_page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
1113 } else 1121 } else
1114 new_page = NULL; 1122 new_page = NULL;
1115 1123
@@ -1130,8 +1138,7 @@ alloc:
1130 goto out; 1138 goto out;
1131 } 1139 }
1132 1140
1133 if (unlikely(mem_cgroup_try_charge(new_page, mm, 1141 if (unlikely(mem_cgroup_try_charge(new_page, mm, huge_gfp, &memcg))) {
1134 GFP_TRANSHUGE, &memcg))) {
1135 put_page(new_page); 1142 put_page(new_page);
1136 if (page) { 1143 if (page) {
1137 split_huge_page(page); 1144 split_huge_page(page);
@@ -1976,6 +1983,11 @@ static int __init khugepaged_slab_init(void)
1976 return 0; 1983 return 0;
1977} 1984}
1978 1985
1986static void __init khugepaged_slab_exit(void)
1987{
1988 kmem_cache_destroy(mm_slot_cache);
1989}
1990
1979static inline struct mm_slot *alloc_mm_slot(void) 1991static inline struct mm_slot *alloc_mm_slot(void)
1980{ 1992{
1981 if (!mm_slot_cache) /* initialization failed */ 1993 if (!mm_slot_cache) /* initialization failed */
@@ -2323,19 +2335,13 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
2323 return true; 2335 return true;
2324} 2336}
2325 2337
2326static struct page 2338static struct page *
2327*khugepaged_alloc_page(struct page **hpage, struct mm_struct *mm, 2339khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm,
2328 struct vm_area_struct *vma, unsigned long address, 2340 struct vm_area_struct *vma, unsigned long address,
2329 int node) 2341 int node)
2330{ 2342{
2331 gfp_t flags;
2332
2333 VM_BUG_ON_PAGE(*hpage, *hpage); 2343 VM_BUG_ON_PAGE(*hpage, *hpage);
2334 2344
2335 /* Only allocate from the target node */
2336 flags = alloc_hugepage_gfpmask(khugepaged_defrag(), __GFP_OTHER_NODE) |
2337 __GFP_THISNODE;
2338
2339 /* 2345 /*
2340 * Before allocating the hugepage, release the mmap_sem read lock. 2346 * Before allocating the hugepage, release the mmap_sem read lock.
2341 * The allocation can take potentially a long time if it involves 2347 * The allocation can take potentially a long time if it involves
@@ -2344,7 +2350,7 @@ static struct page
2344 */ 2350 */
2345 up_read(&mm->mmap_sem); 2351 up_read(&mm->mmap_sem);
2346 2352
2347 *hpage = alloc_pages_exact_node(node, flags, HPAGE_PMD_ORDER); 2353 *hpage = alloc_pages_exact_node(node, gfp, HPAGE_PMD_ORDER);
2348 if (unlikely(!*hpage)) { 2354 if (unlikely(!*hpage)) {
2349 count_vm_event(THP_COLLAPSE_ALLOC_FAILED); 2355 count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
2350 *hpage = ERR_PTR(-ENOMEM); 2356 *hpage = ERR_PTR(-ENOMEM);
@@ -2397,13 +2403,14 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
2397 return true; 2403 return true;
2398} 2404}
2399 2405
2400static struct page 2406static struct page *
2401*khugepaged_alloc_page(struct page **hpage, struct mm_struct *mm, 2407khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm,
2402 struct vm_area_struct *vma, unsigned long address, 2408 struct vm_area_struct *vma, unsigned long address,
2403 int node) 2409 int node)
2404{ 2410{
2405 up_read(&mm->mmap_sem); 2411 up_read(&mm->mmap_sem);
2406 VM_BUG_ON(!*hpage); 2412 VM_BUG_ON(!*hpage);
2413
2407 return *hpage; 2414 return *hpage;
2408} 2415}
2409#endif 2416#endif
@@ -2438,16 +2445,21 @@ static void collapse_huge_page(struct mm_struct *mm,
2438 struct mem_cgroup *memcg; 2445 struct mem_cgroup *memcg;
2439 unsigned long mmun_start; /* For mmu_notifiers */ 2446 unsigned long mmun_start; /* For mmu_notifiers */
2440 unsigned long mmun_end; /* For mmu_notifiers */ 2447 unsigned long mmun_end; /* For mmu_notifiers */
2448 gfp_t gfp;
2441 2449
2442 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 2450 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
2443 2451
2452 /* Only allocate from the target node */
2453 gfp = alloc_hugepage_gfpmask(khugepaged_defrag(), __GFP_OTHER_NODE) |
2454 __GFP_THISNODE;
2455
2444 /* release the mmap_sem read lock. */ 2456 /* release the mmap_sem read lock. */
2445 new_page = khugepaged_alloc_page(hpage, mm, vma, address, node); 2457 new_page = khugepaged_alloc_page(hpage, gfp, mm, vma, address, node);
2446 if (!new_page) 2458 if (!new_page)
2447 return; 2459 return;
2448 2460
2449 if (unlikely(mem_cgroup_try_charge(new_page, mm, 2461 if (unlikely(mem_cgroup_try_charge(new_page, mm,
2450 GFP_TRANSHUGE, &memcg))) 2462 gfp, &memcg)))
2451 return; 2463 return;
2452 2464
2453 /* 2465 /*