aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/huge_memory.c12
-rw-r--r--mm/memcontrol.c37
-rw-r--r--mm/memory-failure.c10
-rw-r--r--mm/memory.c40
-rw-r--r--mm/migrate.c8
-rw-r--r--mm/page_io.c6
-rw-r--r--mm/swap.c5
-rw-r--r--mm/vmscan.c30
8 files changed, 105 insertions, 43 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 86f9f8b82f8e..df67b53ae3c5 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -232,7 +232,7 @@ retry:
232 return READ_ONCE(huge_zero_page); 232 return READ_ONCE(huge_zero_page);
233} 233}
234 234
235static void put_huge_zero_page(void) 235void put_huge_zero_page(void)
236{ 236{
237 /* 237 /*
238 * Counter should never go to zero here. Only shrinker can put 238 * Counter should never go to zero here. Only shrinker can put
@@ -1684,12 +1684,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
1684 if (vma_is_dax(vma)) { 1684 if (vma_is_dax(vma)) {
1685 spin_unlock(ptl); 1685 spin_unlock(ptl);
1686 if (is_huge_zero_pmd(orig_pmd)) 1686 if (is_huge_zero_pmd(orig_pmd))
1687 put_huge_zero_page(); 1687 tlb_remove_page(tlb, pmd_page(orig_pmd));
1688 } else if (is_huge_zero_pmd(orig_pmd)) { 1688 } else if (is_huge_zero_pmd(orig_pmd)) {
1689 pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd)); 1689 pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd));
1690 atomic_long_dec(&tlb->mm->nr_ptes); 1690 atomic_long_dec(&tlb->mm->nr_ptes);
1691 spin_unlock(ptl); 1691 spin_unlock(ptl);
1692 put_huge_zero_page(); 1692 tlb_remove_page(tlb, pmd_page(orig_pmd));
1693 } else { 1693 } else {
1694 struct page *page = pmd_page(orig_pmd); 1694 struct page *page = pmd_page(orig_pmd);
1695 page_remove_rmap(page, true); 1695 page_remove_rmap(page, true);
@@ -1960,10 +1960,9 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
1960 * page fault if needed. 1960 * page fault if needed.
1961 */ 1961 */
1962 return 0; 1962 return 0;
1963 if (vma->vm_ops) 1963 if (vma->vm_ops || (vm_flags & VM_NO_THP))
1964 /* khugepaged not yet working on file or special mappings */ 1964 /* khugepaged not yet working on file or special mappings */
1965 return 0; 1965 return 0;
1966 VM_BUG_ON_VMA(vm_flags & VM_NO_THP, vma);
1967 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; 1966 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
1968 hend = vma->vm_end & HPAGE_PMD_MASK; 1967 hend = vma->vm_end & HPAGE_PMD_MASK;
1969 if (hstart < hend) 1968 if (hstart < hend)
@@ -2352,8 +2351,7 @@ static bool hugepage_vma_check(struct vm_area_struct *vma)
2352 return false; 2351 return false;
2353 if (is_vma_temporary_stack(vma)) 2352 if (is_vma_temporary_stack(vma))
2354 return false; 2353 return false;
2355 VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma); 2354 return !(vma->vm_flags & VM_NO_THP);
2356 return true;
2357} 2355}
2358 2356
2359static void collapse_huge_page(struct mm_struct *mm, 2357static void collapse_huge_page(struct mm_struct *mm,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 36db05fa8acb..fe787f5c41bd 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -207,6 +207,7 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
207/* "mc" and its members are protected by cgroup_mutex */ 207/* "mc" and its members are protected by cgroup_mutex */
208static struct move_charge_struct { 208static struct move_charge_struct {
209 spinlock_t lock; /* for from, to */ 209 spinlock_t lock; /* for from, to */
210 struct mm_struct *mm;
210 struct mem_cgroup *from; 211 struct mem_cgroup *from;
211 struct mem_cgroup *to; 212 struct mem_cgroup *to;
212 unsigned long flags; 213 unsigned long flags;
@@ -4667,6 +4668,8 @@ static void __mem_cgroup_clear_mc(void)
4667 4668
4668static void mem_cgroup_clear_mc(void) 4669static void mem_cgroup_clear_mc(void)
4669{ 4670{
4671 struct mm_struct *mm = mc.mm;
4672
4670 /* 4673 /*
4671 * we must clear moving_task before waking up waiters at the end of 4674 * we must clear moving_task before waking up waiters at the end of
4672 * task migration. 4675 * task migration.
@@ -4676,7 +4679,10 @@ static void mem_cgroup_clear_mc(void)
4676 spin_lock(&mc.lock); 4679 spin_lock(&mc.lock);
4677 mc.from = NULL; 4680 mc.from = NULL;
4678 mc.to = NULL; 4681 mc.to = NULL;
4682 mc.mm = NULL;
4679 spin_unlock(&mc.lock); 4683 spin_unlock(&mc.lock);
4684
4685 mmput(mm);
4680} 4686}
4681 4687
4682static int mem_cgroup_can_attach(struct cgroup_taskset *tset) 4688static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
@@ -4733,6 +4739,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
4733 VM_BUG_ON(mc.moved_swap); 4739 VM_BUG_ON(mc.moved_swap);
4734 4740
4735 spin_lock(&mc.lock); 4741 spin_lock(&mc.lock);
4742 mc.mm = mm;
4736 mc.from = from; 4743 mc.from = from;
4737 mc.to = memcg; 4744 mc.to = memcg;
4738 mc.flags = move_flags; 4745 mc.flags = move_flags;
@@ -4742,8 +4749,9 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
4742 ret = mem_cgroup_precharge_mc(mm); 4749 ret = mem_cgroup_precharge_mc(mm);
4743 if (ret) 4750 if (ret)
4744 mem_cgroup_clear_mc(); 4751 mem_cgroup_clear_mc();
4752 } else {
4753 mmput(mm);
4745 } 4754 }
4746 mmput(mm);
4747 return ret; 4755 return ret;
4748} 4756}
4749 4757
@@ -4852,11 +4860,11 @@ put: /* get_mctgt_type() gets the page */
4852 return ret; 4860 return ret;
4853} 4861}
4854 4862
4855static void mem_cgroup_move_charge(struct mm_struct *mm) 4863static void mem_cgroup_move_charge(void)
4856{ 4864{
4857 struct mm_walk mem_cgroup_move_charge_walk = { 4865 struct mm_walk mem_cgroup_move_charge_walk = {
4858 .pmd_entry = mem_cgroup_move_charge_pte_range, 4866 .pmd_entry = mem_cgroup_move_charge_pte_range,
4859 .mm = mm, 4867 .mm = mc.mm,
4860 }; 4868 };
4861 4869
4862 lru_add_drain_all(); 4870 lru_add_drain_all();
@@ -4868,7 +4876,7 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
4868 atomic_inc(&mc.from->moving_account); 4876 atomic_inc(&mc.from->moving_account);
4869 synchronize_rcu(); 4877 synchronize_rcu();
4870retry: 4878retry:
4871 if (unlikely(!down_read_trylock(&mm->mmap_sem))) { 4879 if (unlikely(!down_read_trylock(&mc.mm->mmap_sem))) {
4872 /* 4880 /*
4873 * Someone who are holding the mmap_sem might be waiting in 4881 * Someone who are holding the mmap_sem might be waiting in
4874 * waitq. So we cancel all extra charges, wake up all waiters, 4882 * waitq. So we cancel all extra charges, wake up all waiters,
@@ -4885,23 +4893,16 @@ retry:
4885 * additional charge, the page walk just aborts. 4893 * additional charge, the page walk just aborts.
4886 */ 4894 */
4887 walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk); 4895 walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk);
4888 up_read(&mm->mmap_sem); 4896 up_read(&mc.mm->mmap_sem);
4889 atomic_dec(&mc.from->moving_account); 4897 atomic_dec(&mc.from->moving_account);
4890} 4898}
4891 4899
4892static void mem_cgroup_move_task(struct cgroup_taskset *tset) 4900static void mem_cgroup_move_task(void)
4893{ 4901{
4894 struct cgroup_subsys_state *css; 4902 if (mc.to) {
4895 struct task_struct *p = cgroup_taskset_first(tset, &css); 4903 mem_cgroup_move_charge();
4896 struct mm_struct *mm = get_task_mm(p);
4897
4898 if (mm) {
4899 if (mc.to)
4900 mem_cgroup_move_charge(mm);
4901 mmput(mm);
4902 }
4903 if (mc.to)
4904 mem_cgroup_clear_mc(); 4904 mem_cgroup_clear_mc();
4905 }
4905} 4906}
4906#else /* !CONFIG_MMU */ 4907#else /* !CONFIG_MMU */
4907static int mem_cgroup_can_attach(struct cgroup_taskset *tset) 4908static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
@@ -4911,7 +4912,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
4911static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset) 4912static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
4912{ 4913{
4913} 4914}
4914static void mem_cgroup_move_task(struct cgroup_taskset *tset) 4915static void mem_cgroup_move_task(void)
4915{ 4916{
4916} 4917}
4917#endif 4918#endif
@@ -5195,7 +5196,7 @@ struct cgroup_subsys memory_cgrp_subsys = {
5195 .css_reset = mem_cgroup_css_reset, 5196 .css_reset = mem_cgroup_css_reset,
5196 .can_attach = mem_cgroup_can_attach, 5197 .can_attach = mem_cgroup_can_attach,
5197 .cancel_attach = mem_cgroup_cancel_attach, 5198 .cancel_attach = mem_cgroup_cancel_attach,
5198 .attach = mem_cgroup_move_task, 5199 .post_attach = mem_cgroup_move_task,
5199 .bind = mem_cgroup_bind, 5200 .bind = mem_cgroup_bind,
5200 .dfl_cftypes = memory_files, 5201 .dfl_cftypes = memory_files,
5201 .legacy_cftypes = mem_cgroup_legacy_files, 5202 .legacy_cftypes = mem_cgroup_legacy_files,
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 78f5f2641b91..ca5acee53b7a 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -888,7 +888,15 @@ int get_hwpoison_page(struct page *page)
888 } 888 }
889 } 889 }
890 890
891 return get_page_unless_zero(head); 891 if (get_page_unless_zero(head)) {
892 if (head == compound_head(page))
893 return 1;
894
895 pr_info("MCE: %#lx cannot catch tail\n", page_to_pfn(page));
896 put_page(head);
897 }
898
899 return 0;
892} 900}
893EXPORT_SYMBOL_GPL(get_hwpoison_page); 901EXPORT_SYMBOL_GPL(get_hwpoison_page);
894 902
diff --git a/mm/memory.c b/mm/memory.c
index 93897f23cc11..305537fc8640 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -789,6 +789,46 @@ out:
789 return pfn_to_page(pfn); 789 return pfn_to_page(pfn);
790} 790}
791 791
792#ifdef CONFIG_TRANSPARENT_HUGEPAGE
793struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
794 pmd_t pmd)
795{
796 unsigned long pfn = pmd_pfn(pmd);
797
798 /*
799 * There is no pmd_special() but there may be special pmds, e.g.
800 * in a direct-access (dax) mapping, so let's just replicate the
801 * !HAVE_PTE_SPECIAL case from vm_normal_page() here.
802 */
803 if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
804 if (vma->vm_flags & VM_MIXEDMAP) {
805 if (!pfn_valid(pfn))
806 return NULL;
807 goto out;
808 } else {
809 unsigned long off;
810 off = (addr - vma->vm_start) >> PAGE_SHIFT;
811 if (pfn == vma->vm_pgoff + off)
812 return NULL;
813 if (!is_cow_mapping(vma->vm_flags))
814 return NULL;
815 }
816 }
817
818 if (is_zero_pfn(pfn))
819 return NULL;
820 if (unlikely(pfn > highest_memmap_pfn))
821 return NULL;
822
823 /*
824 * NOTE! We still have PageReserved() pages in the page tables.
825 * eg. VDSO mappings can cause them to exist.
826 */
827out:
828 return pfn_to_page(pfn);
829}
830#endif
831
792/* 832/*
793 * copy one vm_area from one task to the other. Assumes the page tables 833 * copy one vm_area from one task to the other. Assumes the page tables
794 * already present in the new task to be cleared in the whole range 834 * already present in the new task to be cleared in the whole range
diff --git a/mm/migrate.c b/mm/migrate.c
index 6c822a7b27e0..f9dfb18a4eba 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -975,7 +975,13 @@ out:
975 dec_zone_page_state(page, NR_ISOLATED_ANON + 975 dec_zone_page_state(page, NR_ISOLATED_ANON +
976 page_is_file_cache(page)); 976 page_is_file_cache(page));
977 /* Soft-offlined page shouldn't go through lru cache list */ 977 /* Soft-offlined page shouldn't go through lru cache list */
978 if (reason == MR_MEMORY_FAILURE) { 978 if (reason == MR_MEMORY_FAILURE && rc == MIGRATEPAGE_SUCCESS) {
979 /*
980 * With this release, we free successfully migrated
981 * page and set PG_HWPoison on just freed page
982 * intentionally. Although it's rather weird, it's how
983 * HWPoison flag works at the moment.
984 */
979 put_page(page); 985 put_page(page);
980 if (!test_set_page_hwpoison(page)) 986 if (!test_set_page_hwpoison(page))
981 num_poisoned_pages_inc(); 987 num_poisoned_pages_inc();
diff --git a/mm/page_io.c b/mm/page_io.c
index cd92e3d67a32..985f23cfa79b 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -353,7 +353,11 @@ int swap_readpage(struct page *page)
353 353
354 ret = bdev_read_page(sis->bdev, swap_page_sector(page), page); 354 ret = bdev_read_page(sis->bdev, swap_page_sector(page), page);
355 if (!ret) { 355 if (!ret) {
356 swap_slot_free_notify(page); 356 if (trylock_page(page)) {
357 swap_slot_free_notify(page);
358 unlock_page(page);
359 }
360
357 count_vm_event(PSWPIN); 361 count_vm_event(PSWPIN);
358 return 0; 362 return 0;
359 } 363 }
diff --git a/mm/swap.c b/mm/swap.c
index a0bc206b4ac6..03aacbcb013f 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -728,6 +728,11 @@ void release_pages(struct page **pages, int nr, bool cold)
728 zone = NULL; 728 zone = NULL;
729 } 729 }
730 730
731 if (is_huge_zero_page(page)) {
732 put_huge_zero_page();
733 continue;
734 }
735
731 page = compound_head(page); 736 page = compound_head(page);
732 if (!put_page_testzero(page)) 737 if (!put_page_testzero(page))
733 continue; 738 continue;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b934223eaa45..142cb61f4822 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2553,7 +2553,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2553 sc->gfp_mask |= __GFP_HIGHMEM; 2553 sc->gfp_mask |= __GFP_HIGHMEM;
2554 2554
2555 for_each_zone_zonelist_nodemask(zone, z, zonelist, 2555 for_each_zone_zonelist_nodemask(zone, z, zonelist,
2556 requested_highidx, sc->nodemask) { 2556 gfp_zone(sc->gfp_mask), sc->nodemask) {
2557 enum zone_type classzone_idx; 2557 enum zone_type classzone_idx;
2558 2558
2559 if (!populated_zone(zone)) 2559 if (!populated_zone(zone))
@@ -3318,6 +3318,20 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order,
3318 /* Try to sleep for a short interval */ 3318 /* Try to sleep for a short interval */
3319 if (prepare_kswapd_sleep(pgdat, order, remaining, 3319 if (prepare_kswapd_sleep(pgdat, order, remaining,
3320 balanced_classzone_idx)) { 3320 balanced_classzone_idx)) {
3321 /*
3322 * Compaction records what page blocks it recently failed to
3323 * isolate pages from and skips them in the future scanning.
3324 * When kswapd is going to sleep, it is reasonable to assume
3325 * that pages and compaction may succeed so reset the cache.
3326 */
3327 reset_isolation_suitable(pgdat);
3328
3329 /*
3330 * We have freed the memory, now we should compact it to make
3331 * allocation of the requested order possible.
3332 */
3333 wakeup_kcompactd(pgdat, order, classzone_idx);
3334
3321 remaining = schedule_timeout(HZ/10); 3335 remaining = schedule_timeout(HZ/10);
3322 finish_wait(&pgdat->kswapd_wait, &wait); 3336 finish_wait(&pgdat->kswapd_wait, &wait);
3323 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); 3337 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
@@ -3341,20 +3355,6 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order,
3341 */ 3355 */
3342 set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold); 3356 set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold);
3343 3357
3344 /*
3345 * Compaction records what page blocks it recently failed to
3346 * isolate pages from and skips them in the future scanning.
3347 * When kswapd is going to sleep, it is reasonable to assume
3348 * that pages and compaction may succeed so reset the cache.
3349 */
3350 reset_isolation_suitable(pgdat);
3351
3352 /*
3353 * We have freed the memory, now we should compact it to make
3354 * allocation of the requested order possible.
3355 */
3356 wakeup_kcompactd(pgdat, order, classzone_idx);
3357
3358 if (!kthread_should_stop()) 3358 if (!kthread_should_stop())
3359 schedule(); 3359 schedule();
3360 3360