diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/huge_memory.c | 12 | ||||
-rw-r--r-- | mm/memcontrol.c | 37 | ||||
-rw-r--r-- | mm/memory-failure.c | 10 | ||||
-rw-r--r-- | mm/memory.c | 40 | ||||
-rw-r--r-- | mm/migrate.c | 8 | ||||
-rw-r--r-- | mm/page_io.c | 6 | ||||
-rw-r--r-- | mm/swap.c | 5 | ||||
-rw-r--r-- | mm/vmscan.c | 30 |
8 files changed, 105 insertions, 43 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 86f9f8b82f8e..df67b53ae3c5 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -232,7 +232,7 @@ retry: | |||
232 | return READ_ONCE(huge_zero_page); | 232 | return READ_ONCE(huge_zero_page); |
233 | } | 233 | } |
234 | 234 | ||
235 | static void put_huge_zero_page(void) | 235 | void put_huge_zero_page(void) |
236 | { | 236 | { |
237 | /* | 237 | /* |
238 | * Counter should never go to zero here. Only shrinker can put | 238 | * Counter should never go to zero here. Only shrinker can put |
@@ -1684,12 +1684,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
1684 | if (vma_is_dax(vma)) { | 1684 | if (vma_is_dax(vma)) { |
1685 | spin_unlock(ptl); | 1685 | spin_unlock(ptl); |
1686 | if (is_huge_zero_pmd(orig_pmd)) | 1686 | if (is_huge_zero_pmd(orig_pmd)) |
1687 | put_huge_zero_page(); | 1687 | tlb_remove_page(tlb, pmd_page(orig_pmd)); |
1688 | } else if (is_huge_zero_pmd(orig_pmd)) { | 1688 | } else if (is_huge_zero_pmd(orig_pmd)) { |
1689 | pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd)); | 1689 | pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd)); |
1690 | atomic_long_dec(&tlb->mm->nr_ptes); | 1690 | atomic_long_dec(&tlb->mm->nr_ptes); |
1691 | spin_unlock(ptl); | 1691 | spin_unlock(ptl); |
1692 | put_huge_zero_page(); | 1692 | tlb_remove_page(tlb, pmd_page(orig_pmd)); |
1693 | } else { | 1693 | } else { |
1694 | struct page *page = pmd_page(orig_pmd); | 1694 | struct page *page = pmd_page(orig_pmd); |
1695 | page_remove_rmap(page, true); | 1695 | page_remove_rmap(page, true); |
@@ -1960,10 +1960,9 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma, | |||
1960 | * page fault if needed. | 1960 | * page fault if needed. |
1961 | */ | 1961 | */ |
1962 | return 0; | 1962 | return 0; |
1963 | if (vma->vm_ops) | 1963 | if (vma->vm_ops || (vm_flags & VM_NO_THP)) |
1964 | /* khugepaged not yet working on file or special mappings */ | 1964 | /* khugepaged not yet working on file or special mappings */ |
1965 | return 0; | 1965 | return 0; |
1966 | VM_BUG_ON_VMA(vm_flags & VM_NO_THP, vma); | ||
1967 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; | 1966 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; |
1968 | hend = vma->vm_end & HPAGE_PMD_MASK; | 1967 | hend = vma->vm_end & HPAGE_PMD_MASK; |
1969 | if (hstart < hend) | 1968 | if (hstart < hend) |
@@ -2352,8 +2351,7 @@ static bool hugepage_vma_check(struct vm_area_struct *vma) | |||
2352 | return false; | 2351 | return false; |
2353 | if (is_vma_temporary_stack(vma)) | 2352 | if (is_vma_temporary_stack(vma)) |
2354 | return false; | 2353 | return false; |
2355 | VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma); | 2354 | return !(vma->vm_flags & VM_NO_THP); |
2356 | return true; | ||
2357 | } | 2355 | } |
2358 | 2356 | ||
2359 | static void collapse_huge_page(struct mm_struct *mm, | 2357 | static void collapse_huge_page(struct mm_struct *mm, |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 36db05fa8acb..fe787f5c41bd 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -207,6 +207,7 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); | |||
207 | /* "mc" and its members are protected by cgroup_mutex */ | 207 | /* "mc" and its members are protected by cgroup_mutex */ |
208 | static struct move_charge_struct { | 208 | static struct move_charge_struct { |
209 | spinlock_t lock; /* for from, to */ | 209 | spinlock_t lock; /* for from, to */ |
210 | struct mm_struct *mm; | ||
210 | struct mem_cgroup *from; | 211 | struct mem_cgroup *from; |
211 | struct mem_cgroup *to; | 212 | struct mem_cgroup *to; |
212 | unsigned long flags; | 213 | unsigned long flags; |
@@ -4667,6 +4668,8 @@ static void __mem_cgroup_clear_mc(void) | |||
4667 | 4668 | ||
4668 | static void mem_cgroup_clear_mc(void) | 4669 | static void mem_cgroup_clear_mc(void) |
4669 | { | 4670 | { |
4671 | struct mm_struct *mm = mc.mm; | ||
4672 | |||
4670 | /* | 4673 | /* |
4671 | * we must clear moving_task before waking up waiters at the end of | 4674 | * we must clear moving_task before waking up waiters at the end of |
4672 | * task migration. | 4675 | * task migration. |
@@ -4676,7 +4679,10 @@ static void mem_cgroup_clear_mc(void) | |||
4676 | spin_lock(&mc.lock); | 4679 | spin_lock(&mc.lock); |
4677 | mc.from = NULL; | 4680 | mc.from = NULL; |
4678 | mc.to = NULL; | 4681 | mc.to = NULL; |
4682 | mc.mm = NULL; | ||
4679 | spin_unlock(&mc.lock); | 4683 | spin_unlock(&mc.lock); |
4684 | |||
4685 | mmput(mm); | ||
4680 | } | 4686 | } |
4681 | 4687 | ||
4682 | static int mem_cgroup_can_attach(struct cgroup_taskset *tset) | 4688 | static int mem_cgroup_can_attach(struct cgroup_taskset *tset) |
@@ -4733,6 +4739,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset) | |||
4733 | VM_BUG_ON(mc.moved_swap); | 4739 | VM_BUG_ON(mc.moved_swap); |
4734 | 4740 | ||
4735 | spin_lock(&mc.lock); | 4741 | spin_lock(&mc.lock); |
4742 | mc.mm = mm; | ||
4736 | mc.from = from; | 4743 | mc.from = from; |
4737 | mc.to = memcg; | 4744 | mc.to = memcg; |
4738 | mc.flags = move_flags; | 4745 | mc.flags = move_flags; |
@@ -4742,8 +4749,9 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset) | |||
4742 | ret = mem_cgroup_precharge_mc(mm); | 4749 | ret = mem_cgroup_precharge_mc(mm); |
4743 | if (ret) | 4750 | if (ret) |
4744 | mem_cgroup_clear_mc(); | 4751 | mem_cgroup_clear_mc(); |
4752 | } else { | ||
4753 | mmput(mm); | ||
4745 | } | 4754 | } |
4746 | mmput(mm); | ||
4747 | return ret; | 4755 | return ret; |
4748 | } | 4756 | } |
4749 | 4757 | ||
@@ -4852,11 +4860,11 @@ put: /* get_mctgt_type() gets the page */ | |||
4852 | return ret; | 4860 | return ret; |
4853 | } | 4861 | } |
4854 | 4862 | ||
4855 | static void mem_cgroup_move_charge(struct mm_struct *mm) | 4863 | static void mem_cgroup_move_charge(void) |
4856 | { | 4864 | { |
4857 | struct mm_walk mem_cgroup_move_charge_walk = { | 4865 | struct mm_walk mem_cgroup_move_charge_walk = { |
4858 | .pmd_entry = mem_cgroup_move_charge_pte_range, | 4866 | .pmd_entry = mem_cgroup_move_charge_pte_range, |
4859 | .mm = mm, | 4867 | .mm = mc.mm, |
4860 | }; | 4868 | }; |
4861 | 4869 | ||
4862 | lru_add_drain_all(); | 4870 | lru_add_drain_all(); |
@@ -4868,7 +4876,7 @@ static void mem_cgroup_move_charge(struct mm_struct *mm) | |||
4868 | atomic_inc(&mc.from->moving_account); | 4876 | atomic_inc(&mc.from->moving_account); |
4869 | synchronize_rcu(); | 4877 | synchronize_rcu(); |
4870 | retry: | 4878 | retry: |
4871 | if (unlikely(!down_read_trylock(&mm->mmap_sem))) { | 4879 | if (unlikely(!down_read_trylock(&mc.mm->mmap_sem))) { |
4872 | /* | 4880 | /* |
4873 | * Someone who are holding the mmap_sem might be waiting in | 4881 | * Someone who are holding the mmap_sem might be waiting in |
4874 | * waitq. So we cancel all extra charges, wake up all waiters, | 4882 | * waitq. So we cancel all extra charges, wake up all waiters, |
@@ -4885,23 +4893,16 @@ retry: | |||
4885 | * additional charge, the page walk just aborts. | 4893 | * additional charge, the page walk just aborts. |
4886 | */ | 4894 | */ |
4887 | walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk); | 4895 | walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk); |
4888 | up_read(&mm->mmap_sem); | 4896 | up_read(&mc.mm->mmap_sem); |
4889 | atomic_dec(&mc.from->moving_account); | 4897 | atomic_dec(&mc.from->moving_account); |
4890 | } | 4898 | } |
4891 | 4899 | ||
4892 | static void mem_cgroup_move_task(struct cgroup_taskset *tset) | 4900 | static void mem_cgroup_move_task(void) |
4893 | { | 4901 | { |
4894 | struct cgroup_subsys_state *css; | 4902 | if (mc.to) { |
4895 | struct task_struct *p = cgroup_taskset_first(tset, &css); | 4903 | mem_cgroup_move_charge(); |
4896 | struct mm_struct *mm = get_task_mm(p); | ||
4897 | |||
4898 | if (mm) { | ||
4899 | if (mc.to) | ||
4900 | mem_cgroup_move_charge(mm); | ||
4901 | mmput(mm); | ||
4902 | } | ||
4903 | if (mc.to) | ||
4904 | mem_cgroup_clear_mc(); | 4904 | mem_cgroup_clear_mc(); |
4905 | } | ||
4905 | } | 4906 | } |
4906 | #else /* !CONFIG_MMU */ | 4907 | #else /* !CONFIG_MMU */ |
4907 | static int mem_cgroup_can_attach(struct cgroup_taskset *tset) | 4908 | static int mem_cgroup_can_attach(struct cgroup_taskset *tset) |
@@ -4911,7 +4912,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset) | |||
4911 | static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset) | 4912 | static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset) |
4912 | { | 4913 | { |
4913 | } | 4914 | } |
4914 | static void mem_cgroup_move_task(struct cgroup_taskset *tset) | 4915 | static void mem_cgroup_move_task(void) |
4915 | { | 4916 | { |
4916 | } | 4917 | } |
4917 | #endif | 4918 | #endif |
@@ -5195,7 +5196,7 @@ struct cgroup_subsys memory_cgrp_subsys = { | |||
5195 | .css_reset = mem_cgroup_css_reset, | 5196 | .css_reset = mem_cgroup_css_reset, |
5196 | .can_attach = mem_cgroup_can_attach, | 5197 | .can_attach = mem_cgroup_can_attach, |
5197 | .cancel_attach = mem_cgroup_cancel_attach, | 5198 | .cancel_attach = mem_cgroup_cancel_attach, |
5198 | .attach = mem_cgroup_move_task, | 5199 | .post_attach = mem_cgroup_move_task, |
5199 | .bind = mem_cgroup_bind, | 5200 | .bind = mem_cgroup_bind, |
5200 | .dfl_cftypes = memory_files, | 5201 | .dfl_cftypes = memory_files, |
5201 | .legacy_cftypes = mem_cgroup_legacy_files, | 5202 | .legacy_cftypes = mem_cgroup_legacy_files, |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 78f5f2641b91..ca5acee53b7a 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -888,7 +888,15 @@ int get_hwpoison_page(struct page *page) | |||
888 | } | 888 | } |
889 | } | 889 | } |
890 | 890 | ||
891 | return get_page_unless_zero(head); | 891 | if (get_page_unless_zero(head)) { |
892 | if (head == compound_head(page)) | ||
893 | return 1; | ||
894 | |||
895 | pr_info("MCE: %#lx cannot catch tail\n", page_to_pfn(page)); | ||
896 | put_page(head); | ||
897 | } | ||
898 | |||
899 | return 0; | ||
892 | } | 900 | } |
893 | EXPORT_SYMBOL_GPL(get_hwpoison_page); | 901 | EXPORT_SYMBOL_GPL(get_hwpoison_page); |
894 | 902 | ||
diff --git a/mm/memory.c b/mm/memory.c index 93897f23cc11..305537fc8640 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -789,6 +789,46 @@ out: | |||
789 | return pfn_to_page(pfn); | 789 | return pfn_to_page(pfn); |
790 | } | 790 | } |
791 | 791 | ||
792 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
793 | struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, | ||
794 | pmd_t pmd) | ||
795 | { | ||
796 | unsigned long pfn = pmd_pfn(pmd); | ||
797 | |||
798 | /* | ||
799 | * There is no pmd_special() but there may be special pmds, e.g. | ||
800 | * in a direct-access (dax) mapping, so let's just replicate the | ||
801 | * !HAVE_PTE_SPECIAL case from vm_normal_page() here. | ||
802 | */ | ||
803 | if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { | ||
804 | if (vma->vm_flags & VM_MIXEDMAP) { | ||
805 | if (!pfn_valid(pfn)) | ||
806 | return NULL; | ||
807 | goto out; | ||
808 | } else { | ||
809 | unsigned long off; | ||
810 | off = (addr - vma->vm_start) >> PAGE_SHIFT; | ||
811 | if (pfn == vma->vm_pgoff + off) | ||
812 | return NULL; | ||
813 | if (!is_cow_mapping(vma->vm_flags)) | ||
814 | return NULL; | ||
815 | } | ||
816 | } | ||
817 | |||
818 | if (is_zero_pfn(pfn)) | ||
819 | return NULL; | ||
820 | if (unlikely(pfn > highest_memmap_pfn)) | ||
821 | return NULL; | ||
822 | |||
823 | /* | ||
824 | * NOTE! We still have PageReserved() pages in the page tables. | ||
825 | * eg. VDSO mappings can cause them to exist. | ||
826 | */ | ||
827 | out: | ||
828 | return pfn_to_page(pfn); | ||
829 | } | ||
830 | #endif | ||
831 | |||
792 | /* | 832 | /* |
793 | * copy one vm_area from one task to the other. Assumes the page tables | 833 | * copy one vm_area from one task to the other. Assumes the page tables |
794 | * already present in the new task to be cleared in the whole range | 834 | * already present in the new task to be cleared in the whole range |
diff --git a/mm/migrate.c b/mm/migrate.c index 6c822a7b27e0..f9dfb18a4eba 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -975,7 +975,13 @@ out: | |||
975 | dec_zone_page_state(page, NR_ISOLATED_ANON + | 975 | dec_zone_page_state(page, NR_ISOLATED_ANON + |
976 | page_is_file_cache(page)); | 976 | page_is_file_cache(page)); |
977 | /* Soft-offlined page shouldn't go through lru cache list */ | 977 | /* Soft-offlined page shouldn't go through lru cache list */ |
978 | if (reason == MR_MEMORY_FAILURE) { | 978 | if (reason == MR_MEMORY_FAILURE && rc == MIGRATEPAGE_SUCCESS) { |
979 | /* | ||
980 | * With this release, we free successfully migrated | ||
981 | * page and set PG_HWPoison on just freed page | ||
982 | * intentionally. Although it's rather weird, it's how | ||
983 | * HWPoison flag works at the moment. | ||
984 | */ | ||
979 | put_page(page); | 985 | put_page(page); |
980 | if (!test_set_page_hwpoison(page)) | 986 | if (!test_set_page_hwpoison(page)) |
981 | num_poisoned_pages_inc(); | 987 | num_poisoned_pages_inc(); |
diff --git a/mm/page_io.c b/mm/page_io.c index cd92e3d67a32..985f23cfa79b 100644 --- a/mm/page_io.c +++ b/mm/page_io.c | |||
@@ -353,7 +353,11 @@ int swap_readpage(struct page *page) | |||
353 | 353 | ||
354 | ret = bdev_read_page(sis->bdev, swap_page_sector(page), page); | 354 | ret = bdev_read_page(sis->bdev, swap_page_sector(page), page); |
355 | if (!ret) { | 355 | if (!ret) { |
356 | swap_slot_free_notify(page); | 356 | if (trylock_page(page)) { |
357 | swap_slot_free_notify(page); | ||
358 | unlock_page(page); | ||
359 | } | ||
360 | |||
357 | count_vm_event(PSWPIN); | 361 | count_vm_event(PSWPIN); |
358 | return 0; | 362 | return 0; |
359 | } | 363 | } |
@@ -728,6 +728,11 @@ void release_pages(struct page **pages, int nr, bool cold) | |||
728 | zone = NULL; | 728 | zone = NULL; |
729 | } | 729 | } |
730 | 730 | ||
731 | if (is_huge_zero_page(page)) { | ||
732 | put_huge_zero_page(); | ||
733 | continue; | ||
734 | } | ||
735 | |||
731 | page = compound_head(page); | 736 | page = compound_head(page); |
732 | if (!put_page_testzero(page)) | 737 | if (!put_page_testzero(page)) |
733 | continue; | 738 | continue; |
diff --git a/mm/vmscan.c b/mm/vmscan.c index b934223eaa45..142cb61f4822 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -2553,7 +2553,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) | |||
2553 | sc->gfp_mask |= __GFP_HIGHMEM; | 2553 | sc->gfp_mask |= __GFP_HIGHMEM; |
2554 | 2554 | ||
2555 | for_each_zone_zonelist_nodemask(zone, z, zonelist, | 2555 | for_each_zone_zonelist_nodemask(zone, z, zonelist, |
2556 | requested_highidx, sc->nodemask) { | 2556 | gfp_zone(sc->gfp_mask), sc->nodemask) { |
2557 | enum zone_type classzone_idx; | 2557 | enum zone_type classzone_idx; |
2558 | 2558 | ||
2559 | if (!populated_zone(zone)) | 2559 | if (!populated_zone(zone)) |
@@ -3318,6 +3318,20 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, | |||
3318 | /* Try to sleep for a short interval */ | 3318 | /* Try to sleep for a short interval */ |
3319 | if (prepare_kswapd_sleep(pgdat, order, remaining, | 3319 | if (prepare_kswapd_sleep(pgdat, order, remaining, |
3320 | balanced_classzone_idx)) { | 3320 | balanced_classzone_idx)) { |
3321 | /* | ||
3322 | * Compaction records what page blocks it recently failed to | ||
3323 | * isolate pages from and skips them in the future scanning. | ||
3324 | * When kswapd is going to sleep, it is reasonable to assume | ||
3325 | * that pages and compaction may succeed so reset the cache. | ||
3326 | */ | ||
3327 | reset_isolation_suitable(pgdat); | ||
3328 | |||
3329 | /* | ||
3330 | * We have freed the memory, now we should compact it to make | ||
3331 | * allocation of the requested order possible. | ||
3332 | */ | ||
3333 | wakeup_kcompactd(pgdat, order, classzone_idx); | ||
3334 | |||
3321 | remaining = schedule_timeout(HZ/10); | 3335 | remaining = schedule_timeout(HZ/10); |
3322 | finish_wait(&pgdat->kswapd_wait, &wait); | 3336 | finish_wait(&pgdat->kswapd_wait, &wait); |
3323 | prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); | 3337 | prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); |
@@ -3341,20 +3355,6 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, | |||
3341 | */ | 3355 | */ |
3342 | set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold); | 3356 | set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold); |
3343 | 3357 | ||
3344 | /* | ||
3345 | * Compaction records what page blocks it recently failed to | ||
3346 | * isolate pages from and skips them in the future scanning. | ||
3347 | * When kswapd is going to sleep, it is reasonable to assume | ||
3348 | * that pages and compaction may succeed so reset the cache. | ||
3349 | */ | ||
3350 | reset_isolation_suitable(pgdat); | ||
3351 | |||
3352 | /* | ||
3353 | * We have freed the memory, now we should compact it to make | ||
3354 | * allocation of the requested order possible. | ||
3355 | */ | ||
3356 | wakeup_kcompactd(pgdat, order, classzone_idx); | ||
3357 | |||
3358 | if (!kthread_should_stop()) | 3358 | if (!kthread_should_stop()) |
3359 | schedule(); | 3359 | schedule(); |
3360 | 3360 | ||