diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-15 19:39:15 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-15 19:39:15 -0400 |
commit | eea3a00264cf243a28e4331566ce67b86059339d (patch) | |
tree | 487f16389e0dfa32e9caa7604d1274a7dcda8f04 /mm/huge_memory.c | |
parent | e7c82412433a8039616c7314533a0a1c025d99bf (diff) | |
parent | e693d73c20ffdb06840c9378f367bad849ac0d5d (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge second patchbomb from Andrew Morton:
- the rest of MM
- various misc bits
- add ability to run /sbin/reboot at reboot time
- printk/vsprintf changes
- fiddle with seq_printf() return value
* akpm: (114 commits)
parisc: remove use of seq_printf return value
lru_cache: remove use of seq_printf return value
tracing: remove use of seq_printf return value
cgroup: remove use of seq_printf return value
proc: remove use of seq_printf return value
s390: remove use of seq_printf return value
cris fasttimer: remove use of seq_printf return value
cris: remove use of seq_printf return value
openrisc: remove use of seq_printf return value
ARM: plat-pxa: remove use of seq_printf return value
nios2: cpuinfo: remove use of seq_printf return value
microblaze: mb: remove use of seq_printf return value
ipc: remove use of seq_printf return value
rtc: remove use of seq_printf return value
power: wakeup: remove use of seq_printf return value
x86: mtrr: if: remove use of seq_printf return value
linux/bitmap.h: improve BITMAP_{LAST,FIRST}_WORD_MASK
MAINTAINERS: CREDITS: remove Stefano Brivio from B43
.mailmap: add Ricardo Ribalda
CREDITS: add Ricardo Ribalda Delgado
...
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 86 |
1 files changed, 49 insertions, 37 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 3afb5cbe1312..078832cf3636 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -67,6 +67,7 @@ static unsigned int khugepaged_max_ptes_none __read_mostly = HPAGE_PMD_NR-1; | |||
67 | 67 | ||
68 | static int khugepaged(void *none); | 68 | static int khugepaged(void *none); |
69 | static int khugepaged_slab_init(void); | 69 | static int khugepaged_slab_init(void); |
70 | static void khugepaged_slab_exit(void); | ||
70 | 71 | ||
71 | #define MM_SLOTS_HASH_BITS 10 | 72 | #define MM_SLOTS_HASH_BITS 10 |
72 | static __read_mostly DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS); | 73 | static __read_mostly DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS); |
@@ -109,9 +110,6 @@ static int set_recommended_min_free_kbytes(void) | |||
109 | int nr_zones = 0; | 110 | int nr_zones = 0; |
110 | unsigned long recommended_min; | 111 | unsigned long recommended_min; |
111 | 112 | ||
112 | if (!khugepaged_enabled()) | ||
113 | return 0; | ||
114 | |||
115 | for_each_populated_zone(zone) | 113 | for_each_populated_zone(zone) |
116 | nr_zones++; | 114 | nr_zones++; |
117 | 115 | ||
@@ -143,9 +141,8 @@ static int set_recommended_min_free_kbytes(void) | |||
143 | setup_per_zone_wmarks(); | 141 | setup_per_zone_wmarks(); |
144 | return 0; | 142 | return 0; |
145 | } | 143 | } |
146 | late_initcall(set_recommended_min_free_kbytes); | ||
147 | 144 | ||
148 | static int start_khugepaged(void) | 145 | static int start_stop_khugepaged(void) |
149 | { | 146 | { |
150 | int err = 0; | 147 | int err = 0; |
151 | if (khugepaged_enabled()) { | 148 | if (khugepaged_enabled()) { |
@@ -156,6 +153,7 @@ static int start_khugepaged(void) | |||
156 | pr_err("khugepaged: kthread_run(khugepaged) failed\n"); | 153 | pr_err("khugepaged: kthread_run(khugepaged) failed\n"); |
157 | err = PTR_ERR(khugepaged_thread); | 154 | err = PTR_ERR(khugepaged_thread); |
158 | khugepaged_thread = NULL; | 155 | khugepaged_thread = NULL; |
156 | goto fail; | ||
159 | } | 157 | } |
160 | 158 | ||
161 | if (!list_empty(&khugepaged_scan.mm_head)) | 159 | if (!list_empty(&khugepaged_scan.mm_head)) |
@@ -166,7 +164,7 @@ static int start_khugepaged(void) | |||
166 | kthread_stop(khugepaged_thread); | 164 | kthread_stop(khugepaged_thread); |
167 | khugepaged_thread = NULL; | 165 | khugepaged_thread = NULL; |
168 | } | 166 | } |
169 | 167 | fail: | |
170 | return err; | 168 | return err; |
171 | } | 169 | } |
172 | 170 | ||
@@ -183,7 +181,7 @@ static struct page *get_huge_zero_page(void) | |||
183 | struct page *zero_page; | 181 | struct page *zero_page; |
184 | retry: | 182 | retry: |
185 | if (likely(atomic_inc_not_zero(&huge_zero_refcount))) | 183 | if (likely(atomic_inc_not_zero(&huge_zero_refcount))) |
186 | return ACCESS_ONCE(huge_zero_page); | 184 | return READ_ONCE(huge_zero_page); |
187 | 185 | ||
188 | zero_page = alloc_pages((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE, | 186 | zero_page = alloc_pages((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE, |
189 | HPAGE_PMD_ORDER); | 187 | HPAGE_PMD_ORDER); |
@@ -202,7 +200,7 @@ retry: | |||
202 | /* We take additional reference here. It will be put back by shrinker */ | 200 | /* We take additional reference here. It will be put back by shrinker */ |
203 | atomic_set(&huge_zero_refcount, 2); | 201 | atomic_set(&huge_zero_refcount, 2); |
204 | preempt_enable(); | 202 | preempt_enable(); |
205 | return ACCESS_ONCE(huge_zero_page); | 203 | return READ_ONCE(huge_zero_page); |
206 | } | 204 | } |
207 | 205 | ||
208 | static void put_huge_zero_page(void) | 206 | static void put_huge_zero_page(void) |
@@ -300,7 +298,7 @@ static ssize_t enabled_store(struct kobject *kobj, | |||
300 | int err; | 298 | int err; |
301 | 299 | ||
302 | mutex_lock(&khugepaged_mutex); | 300 | mutex_lock(&khugepaged_mutex); |
303 | err = start_khugepaged(); | 301 | err = start_stop_khugepaged(); |
304 | mutex_unlock(&khugepaged_mutex); | 302 | mutex_unlock(&khugepaged_mutex); |
305 | 303 | ||
306 | if (err) | 304 | if (err) |
@@ -634,27 +632,38 @@ static int __init hugepage_init(void) | |||
634 | 632 | ||
635 | err = hugepage_init_sysfs(&hugepage_kobj); | 633 | err = hugepage_init_sysfs(&hugepage_kobj); |
636 | if (err) | 634 | if (err) |
637 | return err; | 635 | goto err_sysfs; |
638 | 636 | ||
639 | err = khugepaged_slab_init(); | 637 | err = khugepaged_slab_init(); |
640 | if (err) | 638 | if (err) |
641 | goto out; | 639 | goto err_slab; |
642 | 640 | ||
643 | register_shrinker(&huge_zero_page_shrinker); | 641 | err = register_shrinker(&huge_zero_page_shrinker); |
642 | if (err) | ||
643 | goto err_hzp_shrinker; | ||
644 | 644 | ||
645 | /* | 645 | /* |
646 | * By default disable transparent hugepages on smaller systems, | 646 | * By default disable transparent hugepages on smaller systems, |
647 | * where the extra memory used could hurt more than TLB overhead | 647 | * where the extra memory used could hurt more than TLB overhead |
648 | * is likely to save. The admin can still enable it through /sys. | 648 | * is likely to save. The admin can still enable it through /sys. |
649 | */ | 649 | */ |
650 | if (totalram_pages < (512 << (20 - PAGE_SHIFT))) | 650 | if (totalram_pages < (512 << (20 - PAGE_SHIFT))) { |
651 | transparent_hugepage_flags = 0; | 651 | transparent_hugepage_flags = 0; |
652 | return 0; | ||
653 | } | ||
652 | 654 | ||
653 | start_khugepaged(); | 655 | err = start_stop_khugepaged(); |
656 | if (err) | ||
657 | goto err_khugepaged; | ||
654 | 658 | ||
655 | return 0; | 659 | return 0; |
656 | out: | 660 | err_khugepaged: |
661 | unregister_shrinker(&huge_zero_page_shrinker); | ||
662 | err_hzp_shrinker: | ||
663 | khugepaged_slab_exit(); | ||
664 | err_slab: | ||
657 | hugepage_exit_sysfs(hugepage_kobj); | 665 | hugepage_exit_sysfs(hugepage_kobj); |
666 | err_sysfs: | ||
658 | return err; | 667 | return err; |
659 | } | 668 | } |
660 | subsys_initcall(hugepage_init); | 669 | subsys_initcall(hugepage_init); |
@@ -708,7 +717,7 @@ static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot) | |||
708 | static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, | 717 | static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, |
709 | struct vm_area_struct *vma, | 718 | struct vm_area_struct *vma, |
710 | unsigned long haddr, pmd_t *pmd, | 719 | unsigned long haddr, pmd_t *pmd, |
711 | struct page *page) | 720 | struct page *page, gfp_t gfp) |
712 | { | 721 | { |
713 | struct mem_cgroup *memcg; | 722 | struct mem_cgroup *memcg; |
714 | pgtable_t pgtable; | 723 | pgtable_t pgtable; |
@@ -716,7 +725,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, | |||
716 | 725 | ||
717 | VM_BUG_ON_PAGE(!PageCompound(page), page); | 726 | VM_BUG_ON_PAGE(!PageCompound(page), page); |
718 | 727 | ||
719 | if (mem_cgroup_try_charge(page, mm, GFP_TRANSHUGE, &memcg)) | 728 | if (mem_cgroup_try_charge(page, mm, gfp, &memcg)) |
720 | return VM_FAULT_OOM; | 729 | return VM_FAULT_OOM; |
721 | 730 | ||
722 | pgtable = pte_alloc_one(mm, haddr); | 731 | pgtable = pte_alloc_one(mm, haddr); |
@@ -822,7 +831,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
822 | count_vm_event(THP_FAULT_FALLBACK); | 831 | count_vm_event(THP_FAULT_FALLBACK); |
823 | return VM_FAULT_FALLBACK; | 832 | return VM_FAULT_FALLBACK; |
824 | } | 833 | } |
825 | if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) { | 834 | if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page, gfp))) { |
826 | put_page(page); | 835 | put_page(page); |
827 | count_vm_event(THP_FAULT_FALLBACK); | 836 | count_vm_event(THP_FAULT_FALLBACK); |
828 | return VM_FAULT_FALLBACK; | 837 | return VM_FAULT_FALLBACK; |
@@ -1080,6 +1089,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1080 | unsigned long haddr; | 1089 | unsigned long haddr; |
1081 | unsigned long mmun_start; /* For mmu_notifiers */ | 1090 | unsigned long mmun_start; /* For mmu_notifiers */ |
1082 | unsigned long mmun_end; /* For mmu_notifiers */ | 1091 | unsigned long mmun_end; /* For mmu_notifiers */ |
1092 | gfp_t huge_gfp; /* for allocation and charge */ | ||
1083 | 1093 | ||
1084 | ptl = pmd_lockptr(mm, pmd); | 1094 | ptl = pmd_lockptr(mm, pmd); |
1085 | VM_BUG_ON_VMA(!vma->anon_vma, vma); | 1095 | VM_BUG_ON_VMA(!vma->anon_vma, vma); |
@@ -1106,10 +1116,8 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1106 | alloc: | 1116 | alloc: |
1107 | if (transparent_hugepage_enabled(vma) && | 1117 | if (transparent_hugepage_enabled(vma) && |
1108 | !transparent_hugepage_debug_cow()) { | 1118 | !transparent_hugepage_debug_cow()) { |
1109 | gfp_t gfp; | 1119 | huge_gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0); |
1110 | 1120 | new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER); | |
1111 | gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0); | ||
1112 | new_page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER); | ||
1113 | } else | 1121 | } else |
1114 | new_page = NULL; | 1122 | new_page = NULL; |
1115 | 1123 | ||
@@ -1130,8 +1138,7 @@ alloc: | |||
1130 | goto out; | 1138 | goto out; |
1131 | } | 1139 | } |
1132 | 1140 | ||
1133 | if (unlikely(mem_cgroup_try_charge(new_page, mm, | 1141 | if (unlikely(mem_cgroup_try_charge(new_page, mm, huge_gfp, &memcg))) { |
1134 | GFP_TRANSHUGE, &memcg))) { | ||
1135 | put_page(new_page); | 1142 | put_page(new_page); |
1136 | if (page) { | 1143 | if (page) { |
1137 | split_huge_page(page); | 1144 | split_huge_page(page); |
@@ -1976,6 +1983,11 @@ static int __init khugepaged_slab_init(void) | |||
1976 | return 0; | 1983 | return 0; |
1977 | } | 1984 | } |
1978 | 1985 | ||
1986 | static void __init khugepaged_slab_exit(void) | ||
1987 | { | ||
1988 | kmem_cache_destroy(mm_slot_cache); | ||
1989 | } | ||
1990 | |||
1979 | static inline struct mm_slot *alloc_mm_slot(void) | 1991 | static inline struct mm_slot *alloc_mm_slot(void) |
1980 | { | 1992 | { |
1981 | if (!mm_slot_cache) /* initialization failed */ | 1993 | if (!mm_slot_cache) /* initialization failed */ |
@@ -2323,19 +2335,13 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait) | |||
2323 | return true; | 2335 | return true; |
2324 | } | 2336 | } |
2325 | 2337 | ||
2326 | static struct page | 2338 | static struct page * |
2327 | *khugepaged_alloc_page(struct page **hpage, struct mm_struct *mm, | 2339 | khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm, |
2328 | struct vm_area_struct *vma, unsigned long address, | 2340 | struct vm_area_struct *vma, unsigned long address, |
2329 | int node) | 2341 | int node) |
2330 | { | 2342 | { |
2331 | gfp_t flags; | ||
2332 | |||
2333 | VM_BUG_ON_PAGE(*hpage, *hpage); | 2343 | VM_BUG_ON_PAGE(*hpage, *hpage); |
2334 | 2344 | ||
2335 | /* Only allocate from the target node */ | ||
2336 | flags = alloc_hugepage_gfpmask(khugepaged_defrag(), __GFP_OTHER_NODE) | | ||
2337 | __GFP_THISNODE; | ||
2338 | |||
2339 | /* | 2345 | /* |
2340 | * Before allocating the hugepage, release the mmap_sem read lock. | 2346 | * Before allocating the hugepage, release the mmap_sem read lock. |
2341 | * The allocation can take potentially a long time if it involves | 2347 | * The allocation can take potentially a long time if it involves |
@@ -2344,7 +2350,7 @@ static struct page | |||
2344 | */ | 2350 | */ |
2345 | up_read(&mm->mmap_sem); | 2351 | up_read(&mm->mmap_sem); |
2346 | 2352 | ||
2347 | *hpage = alloc_pages_exact_node(node, flags, HPAGE_PMD_ORDER); | 2353 | *hpage = alloc_pages_exact_node(node, gfp, HPAGE_PMD_ORDER); |
2348 | if (unlikely(!*hpage)) { | 2354 | if (unlikely(!*hpage)) { |
2349 | count_vm_event(THP_COLLAPSE_ALLOC_FAILED); | 2355 | count_vm_event(THP_COLLAPSE_ALLOC_FAILED); |
2350 | *hpage = ERR_PTR(-ENOMEM); | 2356 | *hpage = ERR_PTR(-ENOMEM); |
@@ -2397,13 +2403,14 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait) | |||
2397 | return true; | 2403 | return true; |
2398 | } | 2404 | } |
2399 | 2405 | ||
2400 | static struct page | 2406 | static struct page * |
2401 | *khugepaged_alloc_page(struct page **hpage, struct mm_struct *mm, | 2407 | khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm, |
2402 | struct vm_area_struct *vma, unsigned long address, | 2408 | struct vm_area_struct *vma, unsigned long address, |
2403 | int node) | 2409 | int node) |
2404 | { | 2410 | { |
2405 | up_read(&mm->mmap_sem); | 2411 | up_read(&mm->mmap_sem); |
2406 | VM_BUG_ON(!*hpage); | 2412 | VM_BUG_ON(!*hpage); |
2413 | |||
2407 | return *hpage; | 2414 | return *hpage; |
2408 | } | 2415 | } |
2409 | #endif | 2416 | #endif |
@@ -2438,16 +2445,21 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
2438 | struct mem_cgroup *memcg; | 2445 | struct mem_cgroup *memcg; |
2439 | unsigned long mmun_start; /* For mmu_notifiers */ | 2446 | unsigned long mmun_start; /* For mmu_notifiers */ |
2440 | unsigned long mmun_end; /* For mmu_notifiers */ | 2447 | unsigned long mmun_end; /* For mmu_notifiers */ |
2448 | gfp_t gfp; | ||
2441 | 2449 | ||
2442 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | 2450 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); |
2443 | 2451 | ||
2452 | /* Only allocate from the target node */ | ||
2453 | gfp = alloc_hugepage_gfpmask(khugepaged_defrag(), __GFP_OTHER_NODE) | | ||
2454 | __GFP_THISNODE; | ||
2455 | |||
2444 | /* release the mmap_sem read lock. */ | 2456 | /* release the mmap_sem read lock. */ |
2445 | new_page = khugepaged_alloc_page(hpage, mm, vma, address, node); | 2457 | new_page = khugepaged_alloc_page(hpage, gfp, mm, vma, address, node); |
2446 | if (!new_page) | 2458 | if (!new_page) |
2447 | return; | 2459 | return; |
2448 | 2460 | ||
2449 | if (unlikely(mem_cgroup_try_charge(new_page, mm, | 2461 | if (unlikely(mem_cgroup_try_charge(new_page, mm, |
2450 | GFP_TRANSHUGE, &memcg))) | 2462 | gfp, &memcg))) |
2451 | return; | 2463 | return; |
2452 | 2464 | ||
2453 | /* | 2465 | /* |