diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/backing-dev.c | 1 | ||||
-rw-r--r-- | mm/debug.c | 4 | ||||
-rw-r--r-- | mm/gup.c | 3 | ||||
-rw-r--r-- | mm/hugetlb.c | 84 | ||||
-rw-r--r-- | mm/kasan/Makefile | 3 | ||||
-rw-r--r-- | mm/kasan/common.c | 82 | ||||
-rw-r--r-- | mm/kasan/tags.c | 2 | ||||
-rw-r--r-- | mm/kmemleak.c | 10 | ||||
-rw-r--r-- | mm/memblock.c | 11 | ||||
-rw-r--r-- | mm/memory-failure.c | 19 | ||||
-rw-r--r-- | mm/memory.c | 26 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 85 | ||||
-rw-r--r-- | mm/mempolicy.c | 6 | ||||
-rw-r--r-- | mm/migrate.c | 25 | ||||
-rw-r--r-- | mm/mincore.c | 94 | ||||
-rw-r--r-- | mm/oom_kill.c | 12 | ||||
-rw-r--r-- | mm/page_alloc.c | 40 | ||||
-rw-r--r-- | mm/page_ext.c | 4 | ||||
-rw-r--r-- | mm/rmap.c | 8 | ||||
-rw-r--r-- | mm/shmem.c | 10 | ||||
-rw-r--r-- | mm/slab.c | 21 | ||||
-rw-r--r-- | mm/slab.h | 7 | ||||
-rw-r--r-- | mm/slab_common.c | 3 | ||||
-rw-r--r-- | mm/slub.c | 61 | ||||
-rw-r--r-- | mm/swap.c | 17 | ||||
-rw-r--r-- | mm/usercopy.c | 9 | ||||
-rw-r--r-- | mm/userfaultfd.c | 11 | ||||
-rw-r--r-- | mm/util.c | 4 | ||||
-rw-r--r-- | mm/vmscan.c | 10 |
29 files changed, 387 insertions, 285 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 8a8bb8796c6c..72e6d0c55cfa 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -689,6 +689,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi) | |||
689 | INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC); | 689 | INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC); |
690 | bdi->cgwb_congested_tree = RB_ROOT; | 690 | bdi->cgwb_congested_tree = RB_ROOT; |
691 | mutex_init(&bdi->cgwb_release_mutex); | 691 | mutex_init(&bdi->cgwb_release_mutex); |
692 | init_rwsem(&bdi->wb_switch_rwsem); | ||
692 | 693 | ||
693 | ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL); | 694 | ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL); |
694 | if (!ret) { | 695 | if (!ret) { |
diff --git a/mm/debug.c b/mm/debug.c index 0abb987dad9b..1611cf00a137 100644 --- a/mm/debug.c +++ b/mm/debug.c | |||
@@ -44,7 +44,7 @@ const struct trace_print_flags vmaflag_names[] = { | |||
44 | 44 | ||
45 | void __dump_page(struct page *page, const char *reason) | 45 | void __dump_page(struct page *page, const char *reason) |
46 | { | 46 | { |
47 | struct address_space *mapping = page_mapping(page); | 47 | struct address_space *mapping; |
48 | bool page_poisoned = PagePoisoned(page); | 48 | bool page_poisoned = PagePoisoned(page); |
49 | int mapcount; | 49 | int mapcount; |
50 | 50 | ||
@@ -58,6 +58,8 @@ void __dump_page(struct page *page, const char *reason) | |||
58 | goto hex_only; | 58 | goto hex_only; |
59 | } | 59 | } |
60 | 60 | ||
61 | mapping = page_mapping(page); | ||
62 | |||
61 | /* | 63 | /* |
62 | * Avoid VM_BUG_ON() in page_mapcount(). | 64 | * Avoid VM_BUG_ON() in page_mapcount(). |
63 | * page->_mapcount space in struct page is used by sl[aou]b pages to | 65 | * page->_mapcount space in struct page is used by sl[aou]b pages to |
@@ -1674,7 +1674,8 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, | |||
1674 | if (!pmd_present(pmd)) | 1674 | if (!pmd_present(pmd)) |
1675 | return 0; | 1675 | return 0; |
1676 | 1676 | ||
1677 | if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd))) { | 1677 | if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) || |
1678 | pmd_devmap(pmd))) { | ||
1678 | /* | 1679 | /* |
1679 | * NUMA hinting faults need to be handled in the GUP | 1680 | * NUMA hinting faults need to be handled in the GUP |
1680 | * slowpath for accounting purposes and so that they | 1681 | * slowpath for accounting purposes and so that they |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 745088810965..afef61656c1e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -3238,7 +3238,6 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | |||
3238 | struct page *ptepage; | 3238 | struct page *ptepage; |
3239 | unsigned long addr; | 3239 | unsigned long addr; |
3240 | int cow; | 3240 | int cow; |
3241 | struct address_space *mapping = vma->vm_file->f_mapping; | ||
3242 | struct hstate *h = hstate_vma(vma); | 3241 | struct hstate *h = hstate_vma(vma); |
3243 | unsigned long sz = huge_page_size(h); | 3242 | unsigned long sz = huge_page_size(h); |
3244 | struct mmu_notifier_range range; | 3243 | struct mmu_notifier_range range; |
@@ -3250,23 +3249,13 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | |||
3250 | mmu_notifier_range_init(&range, src, vma->vm_start, | 3249 | mmu_notifier_range_init(&range, src, vma->vm_start, |
3251 | vma->vm_end); | 3250 | vma->vm_end); |
3252 | mmu_notifier_invalidate_range_start(&range); | 3251 | mmu_notifier_invalidate_range_start(&range); |
3253 | } else { | ||
3254 | /* | ||
3255 | * For shared mappings i_mmap_rwsem must be held to call | ||
3256 | * huge_pte_alloc, otherwise the returned ptep could go | ||
3257 | * away if part of a shared pmd and another thread calls | ||
3258 | * huge_pmd_unshare. | ||
3259 | */ | ||
3260 | i_mmap_lock_read(mapping); | ||
3261 | } | 3252 | } |
3262 | 3253 | ||
3263 | for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) { | 3254 | for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) { |
3264 | spinlock_t *src_ptl, *dst_ptl; | 3255 | spinlock_t *src_ptl, *dst_ptl; |
3265 | |||
3266 | src_pte = huge_pte_offset(src, addr, sz); | 3256 | src_pte = huge_pte_offset(src, addr, sz); |
3267 | if (!src_pte) | 3257 | if (!src_pte) |
3268 | continue; | 3258 | continue; |
3269 | |||
3270 | dst_pte = huge_pte_alloc(dst, addr, sz); | 3259 | dst_pte = huge_pte_alloc(dst, addr, sz); |
3271 | if (!dst_pte) { | 3260 | if (!dst_pte) { |
3272 | ret = -ENOMEM; | 3261 | ret = -ENOMEM; |
@@ -3337,8 +3326,6 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | |||
3337 | 3326 | ||
3338 | if (cow) | 3327 | if (cow) |
3339 | mmu_notifier_invalidate_range_end(&range); | 3328 | mmu_notifier_invalidate_range_end(&range); |
3340 | else | ||
3341 | i_mmap_unlock_read(mapping); | ||
3342 | 3329 | ||
3343 | return ret; | 3330 | return ret; |
3344 | } | 3331 | } |
@@ -3755,16 +3742,16 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, | |||
3755 | } | 3742 | } |
3756 | 3743 | ||
3757 | /* | 3744 | /* |
3758 | * We can not race with truncation due to holding i_mmap_rwsem. | 3745 | * Use page lock to guard against racing truncation |
3759 | * Check once here for faults beyond end of file. | 3746 | * before we get page_table_lock. |
3760 | */ | 3747 | */ |
3761 | size = i_size_read(mapping->host) >> huge_page_shift(h); | ||
3762 | if (idx >= size) | ||
3763 | goto out; | ||
3764 | |||
3765 | retry: | 3748 | retry: |
3766 | page = find_lock_page(mapping, idx); | 3749 | page = find_lock_page(mapping, idx); |
3767 | if (!page) { | 3750 | if (!page) { |
3751 | size = i_size_read(mapping->host) >> huge_page_shift(h); | ||
3752 | if (idx >= size) | ||
3753 | goto out; | ||
3754 | |||
3768 | /* | 3755 | /* |
3769 | * Check for page in userfault range | 3756 | * Check for page in userfault range |
3770 | */ | 3757 | */ |
@@ -3784,18 +3771,14 @@ retry: | |||
3784 | }; | 3771 | }; |
3785 | 3772 | ||
3786 | /* | 3773 | /* |
3787 | * hugetlb_fault_mutex and i_mmap_rwsem must be | 3774 | * hugetlb_fault_mutex must be dropped before |
3788 | * dropped before handling userfault. Reacquire | 3775 | * handling userfault. Reacquire after handling |
3789 | * after handling fault to make calling code simpler. | 3776 | * fault to make calling code simpler. |
3790 | */ | 3777 | */ |
3791 | hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, | 3778 | hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, |
3792 | idx, haddr); | 3779 | idx, haddr); |
3793 | mutex_unlock(&hugetlb_fault_mutex_table[hash]); | 3780 | mutex_unlock(&hugetlb_fault_mutex_table[hash]); |
3794 | i_mmap_unlock_read(mapping); | ||
3795 | |||
3796 | ret = handle_userfault(&vmf, VM_UFFD_MISSING); | 3781 | ret = handle_userfault(&vmf, VM_UFFD_MISSING); |
3797 | |||
3798 | i_mmap_lock_read(mapping); | ||
3799 | mutex_lock(&hugetlb_fault_mutex_table[hash]); | 3782 | mutex_lock(&hugetlb_fault_mutex_table[hash]); |
3800 | goto out; | 3783 | goto out; |
3801 | } | 3784 | } |
@@ -3854,6 +3837,9 @@ retry: | |||
3854 | } | 3837 | } |
3855 | 3838 | ||
3856 | ptl = huge_pte_lock(h, mm, ptep); | 3839 | ptl = huge_pte_lock(h, mm, ptep); |
3840 | size = i_size_read(mapping->host) >> huge_page_shift(h); | ||
3841 | if (idx >= size) | ||
3842 | goto backout; | ||
3857 | 3843 | ||
3858 | ret = 0; | 3844 | ret = 0; |
3859 | if (!huge_pte_none(huge_ptep_get(ptep))) | 3845 | if (!huge_pte_none(huge_ptep_get(ptep))) |
@@ -3940,11 +3926,6 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3940 | 3926 | ||
3941 | ptep = huge_pte_offset(mm, haddr, huge_page_size(h)); | 3927 | ptep = huge_pte_offset(mm, haddr, huge_page_size(h)); |
3942 | if (ptep) { | 3928 | if (ptep) { |
3943 | /* | ||
3944 | * Since we hold no locks, ptep could be stale. That is | ||
3945 | * OK as we are only making decisions based on content and | ||
3946 | * not actually modifying content here. | ||
3947 | */ | ||
3948 | entry = huge_ptep_get(ptep); | 3929 | entry = huge_ptep_get(ptep); |
3949 | if (unlikely(is_hugetlb_entry_migration(entry))) { | 3930 | if (unlikely(is_hugetlb_entry_migration(entry))) { |
3950 | migration_entry_wait_huge(vma, mm, ptep); | 3931 | migration_entry_wait_huge(vma, mm, ptep); |
@@ -3952,33 +3933,20 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3952 | } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) | 3933 | } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) |
3953 | return VM_FAULT_HWPOISON_LARGE | | 3934 | return VM_FAULT_HWPOISON_LARGE | |
3954 | VM_FAULT_SET_HINDEX(hstate_index(h)); | 3935 | VM_FAULT_SET_HINDEX(hstate_index(h)); |
3936 | } else { | ||
3937 | ptep = huge_pte_alloc(mm, haddr, huge_page_size(h)); | ||
3938 | if (!ptep) | ||
3939 | return VM_FAULT_OOM; | ||
3955 | } | 3940 | } |
3956 | 3941 | ||
3957 | /* | ||
3958 | * Acquire i_mmap_rwsem before calling huge_pte_alloc and hold | ||
3959 | * until finished with ptep. This serves two purposes: | ||
3960 | * 1) It prevents huge_pmd_unshare from being called elsewhere | ||
3961 | * and making the ptep no longer valid. | ||
3962 | * 2) It synchronizes us with file truncation. | ||
3963 | * | ||
3964 | * ptep could have already be assigned via huge_pte_offset. That | ||
3965 | * is OK, as huge_pte_alloc will return the same value unless | ||
3966 | * something changed. | ||
3967 | */ | ||
3968 | mapping = vma->vm_file->f_mapping; | 3942 | mapping = vma->vm_file->f_mapping; |
3969 | i_mmap_lock_read(mapping); | 3943 | idx = vma_hugecache_offset(h, vma, haddr); |
3970 | ptep = huge_pte_alloc(mm, haddr, huge_page_size(h)); | ||
3971 | if (!ptep) { | ||
3972 | i_mmap_unlock_read(mapping); | ||
3973 | return VM_FAULT_OOM; | ||
3974 | } | ||
3975 | 3944 | ||
3976 | /* | 3945 | /* |
3977 | * Serialize hugepage allocation and instantiation, so that we don't | 3946 | * Serialize hugepage allocation and instantiation, so that we don't |
3978 | * get spurious allocation failures if two CPUs race to instantiate | 3947 | * get spurious allocation failures if two CPUs race to instantiate |
3979 | * the same page in the page cache. | 3948 | * the same page in the page cache. |
3980 | */ | 3949 | */ |
3981 | idx = vma_hugecache_offset(h, vma, haddr); | ||
3982 | hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, idx, haddr); | 3950 | hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, idx, haddr); |
3983 | mutex_lock(&hugetlb_fault_mutex_table[hash]); | 3951 | mutex_lock(&hugetlb_fault_mutex_table[hash]); |
3984 | 3952 | ||
@@ -4066,7 +4034,6 @@ out_ptl: | |||
4066 | } | 4034 | } |
4067 | out_mutex: | 4035 | out_mutex: |
4068 | mutex_unlock(&hugetlb_fault_mutex_table[hash]); | 4036 | mutex_unlock(&hugetlb_fault_mutex_table[hash]); |
4069 | i_mmap_unlock_read(mapping); | ||
4070 | /* | 4037 | /* |
4071 | * Generally it's safe to hold refcount during waiting page lock. But | 4038 | * Generally it's safe to hold refcount during waiting page lock. But |
4072 | * here we just wait to defer the next page fault to avoid busy loop and | 4039 | * here we just wait to defer the next page fault to avoid busy loop and |
@@ -4301,7 +4268,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
4301 | break; | 4268 | break; |
4302 | } | 4269 | } |
4303 | if (ret & VM_FAULT_RETRY) { | 4270 | if (ret & VM_FAULT_RETRY) { |
4304 | if (nonblocking) | 4271 | if (nonblocking && |
4272 | !(fault_flags & FAULT_FLAG_RETRY_NOWAIT)) | ||
4305 | *nonblocking = 0; | 4273 | *nonblocking = 0; |
4306 | *nr_pages = 0; | 4274 | *nr_pages = 0; |
4307 | /* | 4275 | /* |
@@ -4671,12 +4639,10 @@ void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, | |||
4671 | * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() | 4639 | * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() |
4672 | * and returns the corresponding pte. While this is not necessary for the | 4640 | * and returns the corresponding pte. While this is not necessary for the |
4673 | * !shared pmd case because we can allocate the pmd later as well, it makes the | 4641 | * !shared pmd case because we can allocate the pmd later as well, it makes the |
4674 | * code much cleaner. | 4642 | * code much cleaner. pmd allocation is essential for the shared case because |
4675 | * | 4643 | * pud has to be populated inside the same i_mmap_rwsem section - otherwise |
4676 | * This routine must be called with i_mmap_rwsem held in at least read mode. | 4644 | * racing tasks could either miss the sharing (see huge_pte_offset) or select a |
4677 | * For hugetlbfs, this prevents removal of any page table entries associated | 4645 | * bad pmd for sharing. |
4678 | * with the address space. This is important as we are setting up sharing | ||
4679 | * based on existing page table entries (mappings). | ||
4680 | */ | 4646 | */ |
4681 | pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) | 4647 | pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) |
4682 | { | 4648 | { |
@@ -4693,6 +4659,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) | |||
4693 | if (!vma_shareable(vma, addr)) | 4659 | if (!vma_shareable(vma, addr)) |
4694 | return (pte_t *)pmd_alloc(mm, pud, addr); | 4660 | return (pte_t *)pmd_alloc(mm, pud, addr); |
4695 | 4661 | ||
4662 | i_mmap_lock_write(mapping); | ||
4696 | vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { | 4663 | vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { |
4697 | if (svma == vma) | 4664 | if (svma == vma) |
4698 | continue; | 4665 | continue; |
@@ -4722,6 +4689,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) | |||
4722 | spin_unlock(ptl); | 4689 | spin_unlock(ptl); |
4723 | out: | 4690 | out: |
4724 | pte = (pte_t *)pmd_alloc(mm, pud, addr); | 4691 | pte = (pte_t *)pmd_alloc(mm, pud, addr); |
4692 | i_mmap_unlock_write(mapping); | ||
4725 | return pte; | 4693 | return pte; |
4726 | } | 4694 | } |
4727 | 4695 | ||
@@ -4732,7 +4700,7 @@ out: | |||
4732 | * indicated by page_count > 1, unmap is achieved by clearing pud and | 4700 | * indicated by page_count > 1, unmap is achieved by clearing pud and |
4733 | * decrementing the ref count. If count == 1, the pte page is not shared. | 4701 | * decrementing the ref count. If count == 1, the pte page is not shared. |
4734 | * | 4702 | * |
4735 | * Called with page table lock held and i_mmap_rwsem held in write mode. | 4703 | * called with page table lock held. |
4736 | * | 4704 | * |
4737 | * returns: 1 successfully unmapped a shared pte page | 4705 | * returns: 1 successfully unmapped a shared pte page |
4738 | * 0 the underlying pte page is not shared, or it is the last user | 4706 | * 0 the underlying pte page is not shared, or it is the last user |
diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile index 0a14fcff70ed..5d1065efbd47 100644 --- a/mm/kasan/Makefile +++ b/mm/kasan/Makefile | |||
@@ -5,7 +5,10 @@ UBSAN_SANITIZE_generic.o := n | |||
5 | UBSAN_SANITIZE_tags.o := n | 5 | UBSAN_SANITIZE_tags.o := n |
6 | KCOV_INSTRUMENT := n | 6 | KCOV_INSTRUMENT := n |
7 | 7 | ||
8 | CFLAGS_REMOVE_common.o = -pg | ||
8 | CFLAGS_REMOVE_generic.o = -pg | 9 | CFLAGS_REMOVE_generic.o = -pg |
10 | CFLAGS_REMOVE_tags.o = -pg | ||
11 | |||
9 | # Function splitter causes unnecessary splits in __asan_load1/__asan_store1 | 12 | # Function splitter causes unnecessary splits in __asan_load1/__asan_store1 |
10 | # see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533 | 13 | # see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533 |
11 | 14 | ||
diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 03d5d1374ca7..09b534fbba17 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c | |||
@@ -298,8 +298,6 @@ void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, | |||
298 | return; | 298 | return; |
299 | } | 299 | } |
300 | 300 | ||
301 | cache->align = round_up(cache->align, KASAN_SHADOW_SCALE_SIZE); | ||
302 | |||
303 | *flags |= SLAB_KASAN; | 301 | *flags |= SLAB_KASAN; |
304 | } | 302 | } |
305 | 303 | ||
@@ -349,28 +347,48 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object) | |||
349 | } | 347 | } |
350 | 348 | ||
351 | /* | 349 | /* |
352 | * Since it's desirable to only call object contructors once during slab | 350 | * This function assigns a tag to an object considering the following: |
353 | * allocation, we preassign tags to all such objects. Also preassign tags for | 351 | * 1. A cache might have a constructor, which might save a pointer to a slab |
354 | * SLAB_TYPESAFE_BY_RCU slabs to avoid use-after-free reports. | 352 | * object somewhere (e.g. in the object itself). We preassign a tag for |
355 | * For SLAB allocator we can't preassign tags randomly since the freelist is | 353 | * each object in caches with constructors during slab creation and reuse |
356 | * stored as an array of indexes instead of a linked list. Assign tags based | 354 | * the same tag each time a particular object is allocated. |
357 | * on objects indexes, so that objects that are next to each other get | 355 | * 2. A cache might be SLAB_TYPESAFE_BY_RCU, which means objects can be |
358 | * different tags. | 356 | * accessed after being freed. We preassign tags for objects in these |
359 | * After a tag is assigned, the object always gets allocated with the same tag. | 357 | * caches as well. |
360 | * The reason is that we can't change tags for objects with constructors on | 358 | * 3. For SLAB allocator we can't preassign tags randomly since the freelist |
361 | * reallocation (even for non-SLAB_TYPESAFE_BY_RCU), because the constructor | 359 | * is stored as an array of indexes instead of a linked list. Assign tags |
362 | * code can save the pointer to the object somewhere (e.g. in the object | 360 | * based on objects indexes, so that objects that are next to each other |
363 | * itself). Then if we retag it, the old saved pointer will become invalid. | 361 | * get different tags. |
364 | */ | 362 | */ |
365 | static u8 assign_tag(struct kmem_cache *cache, const void *object, bool new) | 363 | static u8 assign_tag(struct kmem_cache *cache, const void *object, |
364 | bool init, bool keep_tag) | ||
366 | { | 365 | { |
366 | /* | ||
367 | * 1. When an object is kmalloc()'ed, two hooks are called: | ||
368 | * kasan_slab_alloc() and kasan_kmalloc(). We assign the | ||
369 | * tag only in the first one. | ||
370 | * 2. We reuse the same tag for krealloc'ed objects. | ||
371 | */ | ||
372 | if (keep_tag) | ||
373 | return get_tag(object); | ||
374 | |||
375 | /* | ||
376 | * If the cache neither has a constructor nor has SLAB_TYPESAFE_BY_RCU | ||
377 | * set, assign a tag when the object is being allocated (init == false). | ||
378 | */ | ||
367 | if (!cache->ctor && !(cache->flags & SLAB_TYPESAFE_BY_RCU)) | 379 | if (!cache->ctor && !(cache->flags & SLAB_TYPESAFE_BY_RCU)) |
368 | return new ? KASAN_TAG_KERNEL : random_tag(); | 380 | return init ? KASAN_TAG_KERNEL : random_tag(); |
369 | 381 | ||
382 | /* For caches that either have a constructor or SLAB_TYPESAFE_BY_RCU: */ | ||
370 | #ifdef CONFIG_SLAB | 383 | #ifdef CONFIG_SLAB |
384 | /* For SLAB assign tags based on the object index in the freelist. */ | ||
371 | return (u8)obj_to_index(cache, virt_to_page(object), (void *)object); | 385 | return (u8)obj_to_index(cache, virt_to_page(object), (void *)object); |
372 | #else | 386 | #else |
373 | return new ? random_tag() : get_tag(object); | 387 | /* |
388 | * For SLUB assign a random tag during slab creation, otherwise reuse | ||
389 | * the already assigned tag. | ||
390 | */ | ||
391 | return init ? random_tag() : get_tag(object); | ||
374 | #endif | 392 | #endif |
375 | } | 393 | } |
376 | 394 | ||
@@ -386,17 +404,12 @@ void * __must_check kasan_init_slab_obj(struct kmem_cache *cache, | |||
386 | __memset(alloc_info, 0, sizeof(*alloc_info)); | 404 | __memset(alloc_info, 0, sizeof(*alloc_info)); |
387 | 405 | ||
388 | if (IS_ENABLED(CONFIG_KASAN_SW_TAGS)) | 406 | if (IS_ENABLED(CONFIG_KASAN_SW_TAGS)) |
389 | object = set_tag(object, assign_tag(cache, object, true)); | 407 | object = set_tag(object, |
408 | assign_tag(cache, object, true, false)); | ||
390 | 409 | ||
391 | return (void *)object; | 410 | return (void *)object; |
392 | } | 411 | } |
393 | 412 | ||
394 | void * __must_check kasan_slab_alloc(struct kmem_cache *cache, void *object, | ||
395 | gfp_t flags) | ||
396 | { | ||
397 | return kasan_kmalloc(cache, object, cache->object_size, flags); | ||
398 | } | ||
399 | |||
400 | static inline bool shadow_invalid(u8 tag, s8 shadow_byte) | 413 | static inline bool shadow_invalid(u8 tag, s8 shadow_byte) |
401 | { | 414 | { |
402 | if (IS_ENABLED(CONFIG_KASAN_GENERIC)) | 415 | if (IS_ENABLED(CONFIG_KASAN_GENERIC)) |
@@ -452,8 +465,8 @@ bool kasan_slab_free(struct kmem_cache *cache, void *object, unsigned long ip) | |||
452 | return __kasan_slab_free(cache, object, ip, true); | 465 | return __kasan_slab_free(cache, object, ip, true); |
453 | } | 466 | } |
454 | 467 | ||
455 | void * __must_check kasan_kmalloc(struct kmem_cache *cache, const void *object, | 468 | static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object, |
456 | size_t size, gfp_t flags) | 469 | size_t size, gfp_t flags, bool keep_tag) |
457 | { | 470 | { |
458 | unsigned long redzone_start; | 471 | unsigned long redzone_start; |
459 | unsigned long redzone_end; | 472 | unsigned long redzone_end; |
@@ -471,7 +484,7 @@ void * __must_check kasan_kmalloc(struct kmem_cache *cache, const void *object, | |||
471 | KASAN_SHADOW_SCALE_SIZE); | 484 | KASAN_SHADOW_SCALE_SIZE); |
472 | 485 | ||
473 | if (IS_ENABLED(CONFIG_KASAN_SW_TAGS)) | 486 | if (IS_ENABLED(CONFIG_KASAN_SW_TAGS)) |
474 | tag = assign_tag(cache, object, false); | 487 | tag = assign_tag(cache, object, false, keep_tag); |
475 | 488 | ||
476 | /* Tag is ignored in set_tag without CONFIG_KASAN_SW_TAGS */ | 489 | /* Tag is ignored in set_tag without CONFIG_KASAN_SW_TAGS */ |
477 | kasan_unpoison_shadow(set_tag(object, tag), size); | 490 | kasan_unpoison_shadow(set_tag(object, tag), size); |
@@ -483,6 +496,18 @@ void * __must_check kasan_kmalloc(struct kmem_cache *cache, const void *object, | |||
483 | 496 | ||
484 | return set_tag(object, tag); | 497 | return set_tag(object, tag); |
485 | } | 498 | } |
499 | |||
500 | void * __must_check kasan_slab_alloc(struct kmem_cache *cache, void *object, | ||
501 | gfp_t flags) | ||
502 | { | ||
503 | return __kasan_kmalloc(cache, object, cache->object_size, flags, false); | ||
504 | } | ||
505 | |||
506 | void * __must_check kasan_kmalloc(struct kmem_cache *cache, const void *object, | ||
507 | size_t size, gfp_t flags) | ||
508 | { | ||
509 | return __kasan_kmalloc(cache, object, size, flags, true); | ||
510 | } | ||
486 | EXPORT_SYMBOL(kasan_kmalloc); | 511 | EXPORT_SYMBOL(kasan_kmalloc); |
487 | 512 | ||
488 | void * __must_check kasan_kmalloc_large(const void *ptr, size_t size, | 513 | void * __must_check kasan_kmalloc_large(const void *ptr, size_t size, |
@@ -522,7 +547,8 @@ void * __must_check kasan_krealloc(const void *object, size_t size, gfp_t flags) | |||
522 | if (unlikely(!PageSlab(page))) | 547 | if (unlikely(!PageSlab(page))) |
523 | return kasan_kmalloc_large(object, size, flags); | 548 | return kasan_kmalloc_large(object, size, flags); |
524 | else | 549 | else |
525 | return kasan_kmalloc(page->slab_cache, object, size, flags); | 550 | return __kasan_kmalloc(page->slab_cache, object, size, |
551 | flags, true); | ||
526 | } | 552 | } |
527 | 553 | ||
528 | void kasan_poison_kfree(void *ptr, unsigned long ip) | 554 | void kasan_poison_kfree(void *ptr, unsigned long ip) |
diff --git a/mm/kasan/tags.c b/mm/kasan/tags.c index 0777649e07c4..63fca3172659 100644 --- a/mm/kasan/tags.c +++ b/mm/kasan/tags.c | |||
@@ -46,7 +46,7 @@ void kasan_init_tags(void) | |||
46 | int cpu; | 46 | int cpu; |
47 | 47 | ||
48 | for_each_possible_cpu(cpu) | 48 | for_each_possible_cpu(cpu) |
49 | per_cpu(prng_state, cpu) = get_random_u32(); | 49 | per_cpu(prng_state, cpu) = (u32)get_cycles(); |
50 | } | 50 | } |
51 | 51 | ||
52 | /* | 52 | /* |
diff --git a/mm/kmemleak.c b/mm/kmemleak.c index f9d9dc250428..707fa5579f66 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c | |||
@@ -574,6 +574,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size, | |||
574 | unsigned long flags; | 574 | unsigned long flags; |
575 | struct kmemleak_object *object, *parent; | 575 | struct kmemleak_object *object, *parent; |
576 | struct rb_node **link, *rb_parent; | 576 | struct rb_node **link, *rb_parent; |
577 | unsigned long untagged_ptr; | ||
577 | 578 | ||
578 | object = kmem_cache_alloc(object_cache, gfp_kmemleak_mask(gfp)); | 579 | object = kmem_cache_alloc(object_cache, gfp_kmemleak_mask(gfp)); |
579 | if (!object) { | 580 | if (!object) { |
@@ -619,8 +620,9 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size, | |||
619 | 620 | ||
620 | write_lock_irqsave(&kmemleak_lock, flags); | 621 | write_lock_irqsave(&kmemleak_lock, flags); |
621 | 622 | ||
622 | min_addr = min(min_addr, ptr); | 623 | untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr); |
623 | max_addr = max(max_addr, ptr + size); | 624 | min_addr = min(min_addr, untagged_ptr); |
625 | max_addr = max(max_addr, untagged_ptr + size); | ||
624 | link = &object_tree_root.rb_node; | 626 | link = &object_tree_root.rb_node; |
625 | rb_parent = NULL; | 627 | rb_parent = NULL; |
626 | while (*link) { | 628 | while (*link) { |
@@ -1333,6 +1335,7 @@ static void scan_block(void *_start, void *_end, | |||
1333 | unsigned long *start = PTR_ALIGN(_start, BYTES_PER_POINTER); | 1335 | unsigned long *start = PTR_ALIGN(_start, BYTES_PER_POINTER); |
1334 | unsigned long *end = _end - (BYTES_PER_POINTER - 1); | 1336 | unsigned long *end = _end - (BYTES_PER_POINTER - 1); |
1335 | unsigned long flags; | 1337 | unsigned long flags; |
1338 | unsigned long untagged_ptr; | ||
1336 | 1339 | ||
1337 | read_lock_irqsave(&kmemleak_lock, flags); | 1340 | read_lock_irqsave(&kmemleak_lock, flags); |
1338 | for (ptr = start; ptr < end; ptr++) { | 1341 | for (ptr = start; ptr < end; ptr++) { |
@@ -1347,7 +1350,8 @@ static void scan_block(void *_start, void *_end, | |||
1347 | pointer = *ptr; | 1350 | pointer = *ptr; |
1348 | kasan_enable_current(); | 1351 | kasan_enable_current(); |
1349 | 1352 | ||
1350 | if (pointer < min_addr || pointer >= max_addr) | 1353 | untagged_ptr = (unsigned long)kasan_reset_tag((void *)pointer); |
1354 | if (untagged_ptr < min_addr || untagged_ptr >= max_addr) | ||
1351 | continue; | 1355 | continue; |
1352 | 1356 | ||
1353 | /* | 1357 | /* |
diff --git a/mm/memblock.c b/mm/memblock.c index 022d4cbb3618..ea31045ba704 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
@@ -26,6 +26,13 @@ | |||
26 | 26 | ||
27 | #include "internal.h" | 27 | #include "internal.h" |
28 | 28 | ||
29 | #define INIT_MEMBLOCK_REGIONS 128 | ||
30 | #define INIT_PHYSMEM_REGIONS 4 | ||
31 | |||
32 | #ifndef INIT_MEMBLOCK_RESERVED_REGIONS | ||
33 | # define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS | ||
34 | #endif | ||
35 | |||
29 | /** | 36 | /** |
30 | * DOC: memblock overview | 37 | * DOC: memblock overview |
31 | * | 38 | * |
@@ -92,7 +99,7 @@ unsigned long max_pfn; | |||
92 | unsigned long long max_possible_pfn; | 99 | unsigned long long max_possible_pfn; |
93 | 100 | ||
94 | static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; | 101 | static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; |
95 | static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; | 102 | static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock; |
96 | #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP | 103 | #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP |
97 | static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock; | 104 | static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock; |
98 | #endif | 105 | #endif |
@@ -105,7 +112,7 @@ struct memblock memblock __initdata_memblock = { | |||
105 | 112 | ||
106 | .reserved.regions = memblock_reserved_init_regions, | 113 | .reserved.regions = memblock_reserved_init_regions, |
107 | .reserved.cnt = 1, /* empty dummy entry */ | 114 | .reserved.cnt = 1, /* empty dummy entry */ |
108 | .reserved.max = INIT_MEMBLOCK_REGIONS, | 115 | .reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS, |
109 | .reserved.name = "reserved", | 116 | .reserved.name = "reserved", |
110 | 117 | ||
111 | #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP | 118 | #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 6379fff1a5ff..831be5ff5f4d 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -372,7 +372,8 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail, | |||
372 | if (fail || tk->addr_valid == 0) { | 372 | if (fail || tk->addr_valid == 0) { |
373 | pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n", | 373 | pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n", |
374 | pfn, tk->tsk->comm, tk->tsk->pid); | 374 | pfn, tk->tsk->comm, tk->tsk->pid); |
375 | force_sig(SIGKILL, tk->tsk); | 375 | do_send_sig_info(SIGKILL, SEND_SIG_PRIV, |
376 | tk->tsk, PIDTYPE_PID); | ||
376 | } | 377 | } |
377 | 378 | ||
378 | /* | 379 | /* |
@@ -966,7 +967,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
966 | enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS; | 967 | enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS; |
967 | struct address_space *mapping; | 968 | struct address_space *mapping; |
968 | LIST_HEAD(tokill); | 969 | LIST_HEAD(tokill); |
969 | bool unmap_success = true; | 970 | bool unmap_success; |
970 | int kill = 1, forcekill; | 971 | int kill = 1, forcekill; |
971 | struct page *hpage = *hpagep; | 972 | struct page *hpage = *hpagep; |
972 | bool mlocked = PageMlocked(hpage); | 973 | bool mlocked = PageMlocked(hpage); |
@@ -1028,19 +1029,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
1028 | if (kill) | 1029 | if (kill) |
1029 | collect_procs(hpage, &tokill, flags & MF_ACTION_REQUIRED); | 1030 | collect_procs(hpage, &tokill, flags & MF_ACTION_REQUIRED); |
1030 | 1031 | ||
1031 | if (!PageHuge(hpage)) { | 1032 | unmap_success = try_to_unmap(hpage, ttu); |
1032 | unmap_success = try_to_unmap(hpage, ttu); | ||
1033 | } else if (mapping) { | ||
1034 | /* | ||
1035 | * For hugetlb pages, try_to_unmap could potentially call | ||
1036 | * huge_pmd_unshare. Because of this, take semaphore in | ||
1037 | * write mode here and set TTU_RMAP_LOCKED to indicate we | ||
1038 | * have taken the lock at this higer level. | ||
1039 | */ | ||
1040 | i_mmap_lock_write(mapping); | ||
1041 | unmap_success = try_to_unmap(hpage, ttu|TTU_RMAP_LOCKED); | ||
1042 | i_mmap_unlock_write(mapping); | ||
1043 | } | ||
1044 | if (!unmap_success) | 1033 | if (!unmap_success) |
1045 | pr_err("Memory failure: %#lx: failed to unmap page (mapcount=%d)\n", | 1034 | pr_err("Memory failure: %#lx: failed to unmap page (mapcount=%d)\n", |
1046 | pfn, page_mapcount(hpage)); | 1035 | pfn, page_mapcount(hpage)); |
diff --git a/mm/memory.c b/mm/memory.c index a52663c0612d..e11ca9dd823f 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -2994,6 +2994,28 @@ static vm_fault_t __do_fault(struct vm_fault *vmf) | |||
2994 | struct vm_area_struct *vma = vmf->vma; | 2994 | struct vm_area_struct *vma = vmf->vma; |
2995 | vm_fault_t ret; | 2995 | vm_fault_t ret; |
2996 | 2996 | ||
2997 | /* | ||
2998 | * Preallocate pte before we take page_lock because this might lead to | ||
2999 | * deadlocks for memcg reclaim which waits for pages under writeback: | ||
3000 | * lock_page(A) | ||
3001 | * SetPageWriteback(A) | ||
3002 | * unlock_page(A) | ||
3003 | * lock_page(B) | ||
3004 | * lock_page(B) | ||
3005 | * pte_alloc_pne | ||
3006 | * shrink_page_list | ||
3007 | * wait_on_page_writeback(A) | ||
3008 | * SetPageWriteback(B) | ||
3009 | * unlock_page(B) | ||
3010 | * # flush A, B to clear the writeback | ||
3011 | */ | ||
3012 | if (pmd_none(*vmf->pmd) && !vmf->prealloc_pte) { | ||
3013 | vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm); | ||
3014 | if (!vmf->prealloc_pte) | ||
3015 | return VM_FAULT_OOM; | ||
3016 | smp_wmb(); /* See comment in __pte_alloc() */ | ||
3017 | } | ||
3018 | |||
2997 | ret = vma->vm_ops->fault(vmf); | 3019 | ret = vma->vm_ops->fault(vmf); |
2998 | if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY | | 3020 | if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY | |
2999 | VM_FAULT_DONE_COW))) | 3021 | VM_FAULT_DONE_COW))) |
@@ -4077,8 +4099,8 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, | |||
4077 | goto out; | 4099 | goto out; |
4078 | 4100 | ||
4079 | if (range) { | 4101 | if (range) { |
4080 | range->start = address & PAGE_MASK; | 4102 | mmu_notifier_range_init(range, mm, address & PAGE_MASK, |
4081 | range->end = range->start + PAGE_SIZE; | 4103 | (address & PAGE_MASK) + PAGE_SIZE); |
4082 | mmu_notifier_invalidate_range_start(range); | 4104 | mmu_notifier_invalidate_range_start(range); |
4083 | } | 4105 | } |
4084 | ptep = pte_offset_map_lock(mm, pmd, address, ptlp); | 4106 | ptep = pte_offset_map_lock(mm, pmd, address, ptlp); |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b9a667d36c55..1ad28323fb9f 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -1188,11 +1188,13 @@ static inline int pageblock_free(struct page *page) | |||
1188 | return PageBuddy(page) && page_order(page) >= pageblock_order; | 1188 | return PageBuddy(page) && page_order(page) >= pageblock_order; |
1189 | } | 1189 | } |
1190 | 1190 | ||
1191 | /* Return the start of the next active pageblock after a given page */ | 1191 | /* Return the pfn of the start of the next active pageblock after a given pfn */ |
1192 | static struct page *next_active_pageblock(struct page *page) | 1192 | static unsigned long next_active_pageblock(unsigned long pfn) |
1193 | { | 1193 | { |
1194 | struct page *page = pfn_to_page(pfn); | ||
1195 | |||
1194 | /* Ensure the starting page is pageblock-aligned */ | 1196 | /* Ensure the starting page is pageblock-aligned */ |
1195 | BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1)); | 1197 | BUG_ON(pfn & (pageblock_nr_pages - 1)); |
1196 | 1198 | ||
1197 | /* If the entire pageblock is free, move to the end of free page */ | 1199 | /* If the entire pageblock is free, move to the end of free page */ |
1198 | if (pageblock_free(page)) { | 1200 | if (pageblock_free(page)) { |
@@ -1200,16 +1202,16 @@ static struct page *next_active_pageblock(struct page *page) | |||
1200 | /* be careful. we don't have locks, page_order can be changed.*/ | 1202 | /* be careful. we don't have locks, page_order can be changed.*/ |
1201 | order = page_order(page); | 1203 | order = page_order(page); |
1202 | if ((order < MAX_ORDER) && (order >= pageblock_order)) | 1204 | if ((order < MAX_ORDER) && (order >= pageblock_order)) |
1203 | return page + (1 << order); | 1205 | return pfn + (1 << order); |
1204 | } | 1206 | } |
1205 | 1207 | ||
1206 | return page + pageblock_nr_pages; | 1208 | return pfn + pageblock_nr_pages; |
1207 | } | 1209 | } |
1208 | 1210 | ||
1209 | static bool is_pageblock_removable_nolock(struct page *page) | 1211 | static bool is_pageblock_removable_nolock(unsigned long pfn) |
1210 | { | 1212 | { |
1213 | struct page *page = pfn_to_page(pfn); | ||
1211 | struct zone *zone; | 1214 | struct zone *zone; |
1212 | unsigned long pfn; | ||
1213 | 1215 | ||
1214 | /* | 1216 | /* |
1215 | * We have to be careful here because we are iterating over memory | 1217 | * We have to be careful here because we are iterating over memory |
@@ -1232,12 +1234,14 @@ static bool is_pageblock_removable_nolock(struct page *page) | |||
1232 | /* Checks if this range of memory is likely to be hot-removable. */ | 1234 | /* Checks if this range of memory is likely to be hot-removable. */ |
1233 | bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) | 1235 | bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) |
1234 | { | 1236 | { |
1235 | struct page *page = pfn_to_page(start_pfn); | 1237 | unsigned long end_pfn, pfn; |
1236 | struct page *end_page = page + nr_pages; | 1238 | |
1239 | end_pfn = min(start_pfn + nr_pages, | ||
1240 | zone_end_pfn(page_zone(pfn_to_page(start_pfn)))); | ||
1237 | 1241 | ||
1238 | /* Check the starting page of each pageblock within the range */ | 1242 | /* Check the starting page of each pageblock within the range */ |
1239 | for (; page < end_page; page = next_active_pageblock(page)) { | 1243 | for (pfn = start_pfn; pfn < end_pfn; pfn = next_active_pageblock(pfn)) { |
1240 | if (!is_pageblock_removable_nolock(page)) | 1244 | if (!is_pageblock_removable_nolock(pfn)) |
1241 | return false; | 1245 | return false; |
1242 | cond_resched(); | 1246 | cond_resched(); |
1243 | } | 1247 | } |
@@ -1273,6 +1277,9 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, | |||
1273 | i++; | 1277 | i++; |
1274 | if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn) | 1278 | if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn) |
1275 | continue; | 1279 | continue; |
1280 | /* Check if we got outside of the zone */ | ||
1281 | if (zone && !zone_spans_pfn(zone, pfn + i)) | ||
1282 | return 0; | ||
1276 | page = pfn_to_page(pfn + i); | 1283 | page = pfn_to_page(pfn + i); |
1277 | if (zone && page_zone(page) != zone) | 1284 | if (zone && page_zone(page) != zone) |
1278 | return 0; | 1285 | return 0; |
@@ -1301,23 +1308,27 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, | |||
1301 | static unsigned long scan_movable_pages(unsigned long start, unsigned long end) | 1308 | static unsigned long scan_movable_pages(unsigned long start, unsigned long end) |
1302 | { | 1309 | { |
1303 | unsigned long pfn; | 1310 | unsigned long pfn; |
1304 | struct page *page; | 1311 | |
1305 | for (pfn = start; pfn < end; pfn++) { | 1312 | for (pfn = start; pfn < end; pfn++) { |
1306 | if (pfn_valid(pfn)) { | 1313 | struct page *page, *head; |
1307 | page = pfn_to_page(pfn); | 1314 | unsigned long skip; |
1308 | if (PageLRU(page)) | 1315 | |
1309 | return pfn; | 1316 | if (!pfn_valid(pfn)) |
1310 | if (__PageMovable(page)) | 1317 | continue; |
1311 | return pfn; | 1318 | page = pfn_to_page(pfn); |
1312 | if (PageHuge(page)) { | 1319 | if (PageLRU(page)) |
1313 | if (hugepage_migration_supported(page_hstate(page)) && | 1320 | return pfn; |
1314 | page_huge_active(page)) | 1321 | if (__PageMovable(page)) |
1315 | return pfn; | 1322 | return pfn; |
1316 | else | 1323 | |
1317 | pfn = round_up(pfn + 1, | 1324 | if (!PageHuge(page)) |
1318 | 1 << compound_order(page)) - 1; | 1325 | continue; |
1319 | } | 1326 | head = compound_head(page); |
1320 | } | 1327 | if (hugepage_migration_supported(page_hstate(head)) && |
1328 | page_huge_active(head)) | ||
1329 | return pfn; | ||
1330 | skip = (1 << compound_order(head)) - (page - head); | ||
1331 | pfn += skip - 1; | ||
1321 | } | 1332 | } |
1322 | return 0; | 1333 | return 0; |
1323 | } | 1334 | } |
@@ -1344,7 +1355,6 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) | |||
1344 | { | 1355 | { |
1345 | unsigned long pfn; | 1356 | unsigned long pfn; |
1346 | struct page *page; | 1357 | struct page *page; |
1347 | int not_managed = 0; | ||
1348 | int ret = 0; | 1358 | int ret = 0; |
1349 | LIST_HEAD(source); | 1359 | LIST_HEAD(source); |
1350 | 1360 | ||
@@ -1392,7 +1402,6 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) | |||
1392 | else | 1402 | else |
1393 | ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE); | 1403 | ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE); |
1394 | if (!ret) { /* Success */ | 1404 | if (!ret) { /* Success */ |
1395 | put_page(page); | ||
1396 | list_add_tail(&page->lru, &source); | 1405 | list_add_tail(&page->lru, &source); |
1397 | if (!__PageMovable(page)) | 1406 | if (!__PageMovable(page)) |
1398 | inc_node_page_state(page, NR_ISOLATED_ANON + | 1407 | inc_node_page_state(page, NR_ISOLATED_ANON + |
@@ -1401,22 +1410,10 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) | |||
1401 | } else { | 1410 | } else { |
1402 | pr_warn("failed to isolate pfn %lx\n", pfn); | 1411 | pr_warn("failed to isolate pfn %lx\n", pfn); |
1403 | dump_page(page, "isolation failed"); | 1412 | dump_page(page, "isolation failed"); |
1404 | put_page(page); | ||
1405 | /* Because we don't have big zone->lock. we should | ||
1406 | check this again here. */ | ||
1407 | if (page_count(page)) { | ||
1408 | not_managed++; | ||
1409 | ret = -EBUSY; | ||
1410 | break; | ||
1411 | } | ||
1412 | } | 1413 | } |
1414 | put_page(page); | ||
1413 | } | 1415 | } |
1414 | if (!list_empty(&source)) { | 1416 | if (!list_empty(&source)) { |
1415 | if (not_managed) { | ||
1416 | putback_movable_pages(&source); | ||
1417 | goto out; | ||
1418 | } | ||
1419 | |||
1420 | /* Allocate a new page from the nearest neighbor node */ | 1417 | /* Allocate a new page from the nearest neighbor node */ |
1421 | ret = migrate_pages(&source, new_node_page, NULL, 0, | 1418 | ret = migrate_pages(&source, new_node_page, NULL, 0, |
1422 | MIGRATE_SYNC, MR_MEMORY_HOTPLUG); | 1419 | MIGRATE_SYNC, MR_MEMORY_HOTPLUG); |
@@ -1429,7 +1426,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) | |||
1429 | putback_movable_pages(&source); | 1426 | putback_movable_pages(&source); |
1430 | } | 1427 | } |
1431 | } | 1428 | } |
1432 | out: | 1429 | |
1433 | return ret; | 1430 | return ret; |
1434 | } | 1431 | } |
1435 | 1432 | ||
@@ -1576,7 +1573,6 @@ static int __ref __offline_pages(unsigned long start_pfn, | |||
1576 | we assume this for now. .*/ | 1573 | we assume this for now. .*/ |
1577 | if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, | 1574 | if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, |
1578 | &valid_end)) { | 1575 | &valid_end)) { |
1579 | mem_hotplug_done(); | ||
1580 | ret = -EINVAL; | 1576 | ret = -EINVAL; |
1581 | reason = "multizone range"; | 1577 | reason = "multizone range"; |
1582 | goto failed_removal; | 1578 | goto failed_removal; |
@@ -1591,7 +1587,6 @@ static int __ref __offline_pages(unsigned long start_pfn, | |||
1591 | MIGRATE_MOVABLE, | 1587 | MIGRATE_MOVABLE, |
1592 | SKIP_HWPOISON | REPORT_FAILURE); | 1588 | SKIP_HWPOISON | REPORT_FAILURE); |
1593 | if (ret) { | 1589 | if (ret) { |
1594 | mem_hotplug_done(); | ||
1595 | reason = "failure to isolate range"; | 1590 | reason = "failure to isolate range"; |
1596 | goto failed_removal; | 1591 | goto failed_removal; |
1597 | } | 1592 | } |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d4496d9d34f5..ee2bce59d2bf 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -1314,7 +1314,7 @@ static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode, | |||
1314 | nodemask_t *nodes) | 1314 | nodemask_t *nodes) |
1315 | { | 1315 | { |
1316 | unsigned long copy = ALIGN(maxnode-1, 64) / 8; | 1316 | unsigned long copy = ALIGN(maxnode-1, 64) / 8; |
1317 | const int nbytes = BITS_TO_LONGS(MAX_NUMNODES) * sizeof(long); | 1317 | unsigned int nbytes = BITS_TO_LONGS(nr_node_ids) * sizeof(long); |
1318 | 1318 | ||
1319 | if (copy > nbytes) { | 1319 | if (copy > nbytes) { |
1320 | if (copy > PAGE_SIZE) | 1320 | if (copy > PAGE_SIZE) |
@@ -1491,7 +1491,7 @@ static int kernel_get_mempolicy(int __user *policy, | |||
1491 | int uninitialized_var(pval); | 1491 | int uninitialized_var(pval); |
1492 | nodemask_t nodes; | 1492 | nodemask_t nodes; |
1493 | 1493 | ||
1494 | if (nmask != NULL && maxnode < MAX_NUMNODES) | 1494 | if (nmask != NULL && maxnode < nr_node_ids) |
1495 | return -EINVAL; | 1495 | return -EINVAL; |
1496 | 1496 | ||
1497 | err = do_get_mempolicy(&pval, &nodes, addr, flags); | 1497 | err = do_get_mempolicy(&pval, &nodes, addr, flags); |
@@ -1527,7 +1527,7 @@ COMPAT_SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, | |||
1527 | unsigned long nr_bits, alloc_size; | 1527 | unsigned long nr_bits, alloc_size; |
1528 | DECLARE_BITMAP(bm, MAX_NUMNODES); | 1528 | DECLARE_BITMAP(bm, MAX_NUMNODES); |
1529 | 1529 | ||
1530 | nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES); | 1530 | nr_bits = min_t(unsigned long, maxnode-1, nr_node_ids); |
1531 | alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8; | 1531 | alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8; |
1532 | 1532 | ||
1533 | if (nmask) | 1533 | if (nmask) |
diff --git a/mm/migrate.c b/mm/migrate.c index ccf8966caf6f..d4fd680be3b0 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -709,7 +709,6 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head, | |||
709 | /* Simple case, sync compaction */ | 709 | /* Simple case, sync compaction */ |
710 | if (mode != MIGRATE_ASYNC) { | 710 | if (mode != MIGRATE_ASYNC) { |
711 | do { | 711 | do { |
712 | get_bh(bh); | ||
713 | lock_buffer(bh); | 712 | lock_buffer(bh); |
714 | bh = bh->b_this_page; | 713 | bh = bh->b_this_page; |
715 | 714 | ||
@@ -720,18 +719,15 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head, | |||
720 | 719 | ||
721 | /* async case, we cannot block on lock_buffer so use trylock_buffer */ | 720 | /* async case, we cannot block on lock_buffer so use trylock_buffer */ |
722 | do { | 721 | do { |
723 | get_bh(bh); | ||
724 | if (!trylock_buffer(bh)) { | 722 | if (!trylock_buffer(bh)) { |
725 | /* | 723 | /* |
726 | * We failed to lock the buffer and cannot stall in | 724 | * We failed to lock the buffer and cannot stall in |
727 | * async migration. Release the taken locks | 725 | * async migration. Release the taken locks |
728 | */ | 726 | */ |
729 | struct buffer_head *failed_bh = bh; | 727 | struct buffer_head *failed_bh = bh; |
730 | put_bh(failed_bh); | ||
731 | bh = head; | 728 | bh = head; |
732 | while (bh != failed_bh) { | 729 | while (bh != failed_bh) { |
733 | unlock_buffer(bh); | 730 | unlock_buffer(bh); |
734 | put_bh(bh); | ||
735 | bh = bh->b_this_page; | 731 | bh = bh->b_this_page; |
736 | } | 732 | } |
737 | return false; | 733 | return false; |
@@ -818,7 +814,6 @@ unlock_buffers: | |||
818 | bh = head; | 814 | bh = head; |
819 | do { | 815 | do { |
820 | unlock_buffer(bh); | 816 | unlock_buffer(bh); |
821 | put_bh(bh); | ||
822 | bh = bh->b_this_page; | 817 | bh = bh->b_this_page; |
823 | 818 | ||
824 | } while (bh != head); | 819 | } while (bh != head); |
@@ -1135,10 +1130,13 @@ out: | |||
1135 | * If migration is successful, decrease refcount of the newpage | 1130 | * If migration is successful, decrease refcount of the newpage |
1136 | * which will not free the page because new page owner increased | 1131 | * which will not free the page because new page owner increased |
1137 | * refcounter. As well, if it is LRU page, add the page to LRU | 1132 | * refcounter. As well, if it is LRU page, add the page to LRU |
1138 | * list in here. | 1133 | * list in here. Use the old state of the isolated source page to |
1134 | * determine if we migrated a LRU page. newpage was already unlocked | ||
1135 | * and possibly modified by its owner - don't rely on the page | ||
1136 | * state. | ||
1139 | */ | 1137 | */ |
1140 | if (rc == MIGRATEPAGE_SUCCESS) { | 1138 | if (rc == MIGRATEPAGE_SUCCESS) { |
1141 | if (unlikely(__PageMovable(newpage))) | 1139 | if (unlikely(!is_lru)) |
1142 | put_page(newpage); | 1140 | put_page(newpage); |
1143 | else | 1141 | else |
1144 | putback_lru_page(newpage); | 1142 | putback_lru_page(newpage); |
@@ -1324,19 +1322,8 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, | |||
1324 | goto put_anon; | 1322 | goto put_anon; |
1325 | 1323 | ||
1326 | if (page_mapped(hpage)) { | 1324 | if (page_mapped(hpage)) { |
1327 | struct address_space *mapping = page_mapping(hpage); | ||
1328 | |||
1329 | /* | ||
1330 | * try_to_unmap could potentially call huge_pmd_unshare. | ||
1331 | * Because of this, take semaphore in write mode here and | ||
1332 | * set TTU_RMAP_LOCKED to let lower levels know we have | ||
1333 | * taken the lock. | ||
1334 | */ | ||
1335 | i_mmap_lock_write(mapping); | ||
1336 | try_to_unmap(hpage, | 1325 | try_to_unmap(hpage, |
1337 | TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS| | 1326 | TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); |
1338 | TTU_RMAP_LOCKED); | ||
1339 | i_mmap_unlock_write(mapping); | ||
1340 | page_was_mapped = 1; | 1327 | page_was_mapped = 1; |
1341 | } | 1328 | } |
1342 | 1329 | ||
diff --git a/mm/mincore.c b/mm/mincore.c index f0f91461a9f4..218099b5ed31 100644 --- a/mm/mincore.c +++ b/mm/mincore.c | |||
@@ -42,14 +42,72 @@ static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr, | |||
42 | return 0; | 42 | return 0; |
43 | } | 43 | } |
44 | 44 | ||
45 | static int mincore_unmapped_range(unsigned long addr, unsigned long end, | 45 | /* |
46 | struct mm_walk *walk) | 46 | * Later we can get more picky about what "in core" means precisely. |
47 | * For now, simply check to see if the page is in the page cache, | ||
48 | * and is up to date; i.e. that no page-in operation would be required | ||
49 | * at this time if an application were to map and access this page. | ||
50 | */ | ||
51 | static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff) | ||
52 | { | ||
53 | unsigned char present = 0; | ||
54 | struct page *page; | ||
55 | |||
56 | /* | ||
57 | * When tmpfs swaps out a page from a file, any process mapping that | ||
58 | * file will not get a swp_entry_t in its pte, but rather it is like | ||
59 | * any other file mapping (ie. marked !present and faulted in with | ||
60 | * tmpfs's .fault). So swapped out tmpfs mappings are tested here. | ||
61 | */ | ||
62 | #ifdef CONFIG_SWAP | ||
63 | if (shmem_mapping(mapping)) { | ||
64 | page = find_get_entry(mapping, pgoff); | ||
65 | /* | ||
66 | * shmem/tmpfs may return swap: account for swapcache | ||
67 | * page too. | ||
68 | */ | ||
69 | if (xa_is_value(page)) { | ||
70 | swp_entry_t swp = radix_to_swp_entry(page); | ||
71 | page = find_get_page(swap_address_space(swp), | ||
72 | swp_offset(swp)); | ||
73 | } | ||
74 | } else | ||
75 | page = find_get_page(mapping, pgoff); | ||
76 | #else | ||
77 | page = find_get_page(mapping, pgoff); | ||
78 | #endif | ||
79 | if (page) { | ||
80 | present = PageUptodate(page); | ||
81 | put_page(page); | ||
82 | } | ||
83 | |||
84 | return present; | ||
85 | } | ||
86 | |||
87 | static int __mincore_unmapped_range(unsigned long addr, unsigned long end, | ||
88 | struct vm_area_struct *vma, unsigned char *vec) | ||
47 | { | 89 | { |
48 | unsigned char *vec = walk->private; | ||
49 | unsigned long nr = (end - addr) >> PAGE_SHIFT; | 90 | unsigned long nr = (end - addr) >> PAGE_SHIFT; |
91 | int i; | ||
50 | 92 | ||
51 | memset(vec, 0, nr); | 93 | if (vma->vm_file) { |
52 | walk->private += nr; | 94 | pgoff_t pgoff; |
95 | |||
96 | pgoff = linear_page_index(vma, addr); | ||
97 | for (i = 0; i < nr; i++, pgoff++) | ||
98 | vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff); | ||
99 | } else { | ||
100 | for (i = 0; i < nr; i++) | ||
101 | vec[i] = 0; | ||
102 | } | ||
103 | return nr; | ||
104 | } | ||
105 | |||
106 | static int mincore_unmapped_range(unsigned long addr, unsigned long end, | ||
107 | struct mm_walk *walk) | ||
108 | { | ||
109 | walk->private += __mincore_unmapped_range(addr, end, | ||
110 | walk->vma, walk->private); | ||
53 | return 0; | 111 | return 0; |
54 | } | 112 | } |
55 | 113 | ||
@@ -69,9 +127,8 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
69 | goto out; | 127 | goto out; |
70 | } | 128 | } |
71 | 129 | ||
72 | /* We'll consider a THP page under construction to be there */ | ||
73 | if (pmd_trans_unstable(pmd)) { | 130 | if (pmd_trans_unstable(pmd)) { |
74 | memset(vec, 1, nr); | 131 | __mincore_unmapped_range(addr, end, vma, vec); |
75 | goto out; | 132 | goto out; |
76 | } | 133 | } |
77 | 134 | ||
@@ -80,17 +137,28 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
80 | pte_t pte = *ptep; | 137 | pte_t pte = *ptep; |
81 | 138 | ||
82 | if (pte_none(pte)) | 139 | if (pte_none(pte)) |
83 | *vec = 0; | 140 | __mincore_unmapped_range(addr, addr + PAGE_SIZE, |
141 | vma, vec); | ||
84 | else if (pte_present(pte)) | 142 | else if (pte_present(pte)) |
85 | *vec = 1; | 143 | *vec = 1; |
86 | else { /* pte is a swap entry */ | 144 | else { /* pte is a swap entry */ |
87 | swp_entry_t entry = pte_to_swp_entry(pte); | 145 | swp_entry_t entry = pte_to_swp_entry(pte); |
88 | 146 | ||
89 | /* | 147 | if (non_swap_entry(entry)) { |
90 | * migration or hwpoison entries are always | 148 | /* |
91 | * uptodate | 149 | * migration or hwpoison entries are always |
92 | */ | 150 | * uptodate |
93 | *vec = !!non_swap_entry(entry); | 151 | */ |
152 | *vec = 1; | ||
153 | } else { | ||
154 | #ifdef CONFIG_SWAP | ||
155 | *vec = mincore_page(swap_address_space(entry), | ||
156 | swp_offset(entry)); | ||
157 | #else | ||
158 | WARN_ON(1); | ||
159 | *vec = 1; | ||
160 | #endif | ||
161 | } | ||
94 | } | 162 | } |
95 | vec++; | 163 | vec++; |
96 | } | 164 | } |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index f0e8cd9edb1a..26ea8636758f 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -647,8 +647,8 @@ static int oom_reaper(void *unused) | |||
647 | 647 | ||
648 | static void wake_oom_reaper(struct task_struct *tsk) | 648 | static void wake_oom_reaper(struct task_struct *tsk) |
649 | { | 649 | { |
650 | /* tsk is already queued? */ | 650 | /* mm is already queued? */ |
651 | if (tsk == oom_reaper_list || tsk->oom_reaper_list) | 651 | if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags)) |
652 | return; | 652 | return; |
653 | 653 | ||
654 | get_task_struct(tsk); | 654 | get_task_struct(tsk); |
@@ -975,6 +975,13 @@ static void oom_kill_process(struct oom_control *oc, const char *message) | |||
975 | * still freeing memory. | 975 | * still freeing memory. |
976 | */ | 976 | */ |
977 | read_lock(&tasklist_lock); | 977 | read_lock(&tasklist_lock); |
978 | |||
979 | /* | ||
980 | * The task 'p' might have already exited before reaching here. The | ||
981 | * put_task_struct() will free task_struct 'p' while the loop still try | ||
982 | * to access the field of 'p', so, get an extra reference. | ||
983 | */ | ||
984 | get_task_struct(p); | ||
978 | for_each_thread(p, t) { | 985 | for_each_thread(p, t) { |
979 | list_for_each_entry(child, &t->children, sibling) { | 986 | list_for_each_entry(child, &t->children, sibling) { |
980 | unsigned int child_points; | 987 | unsigned int child_points; |
@@ -994,6 +1001,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message) | |||
994 | } | 1001 | } |
995 | } | 1002 | } |
996 | } | 1003 | } |
1004 | put_task_struct(p); | ||
997 | read_unlock(&tasklist_lock); | 1005 | read_unlock(&tasklist_lock); |
998 | 1006 | ||
999 | /* | 1007 | /* |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index cde5dac6229a..0b9f577b1a2a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -2170,6 +2170,18 @@ static inline void boost_watermark(struct zone *zone) | |||
2170 | 2170 | ||
2171 | max_boost = mult_frac(zone->_watermark[WMARK_HIGH], | 2171 | max_boost = mult_frac(zone->_watermark[WMARK_HIGH], |
2172 | watermark_boost_factor, 10000); | 2172 | watermark_boost_factor, 10000); |
2173 | |||
2174 | /* | ||
2175 | * high watermark may be uninitialised if fragmentation occurs | ||
2176 | * very early in boot so do not boost. We do not fall | ||
2177 | * through and boost by pageblock_nr_pages as failing | ||
2178 | * allocations that early means that reclaim is not going | ||
2179 | * to help and it may even be impossible to reclaim the | ||
2180 | * boosted watermark resulting in a hang. | ||
2181 | */ | ||
2182 | if (!max_boost) | ||
2183 | return; | ||
2184 | |||
2173 | max_boost = max(pageblock_nr_pages, max_boost); | 2185 | max_boost = max(pageblock_nr_pages, max_boost); |
2174 | 2186 | ||
2175 | zone->watermark_boost = min(zone->watermark_boost + pageblock_nr_pages, | 2187 | zone->watermark_boost = min(zone->watermark_boost + pageblock_nr_pages, |
@@ -2214,7 +2226,7 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page, | |||
2214 | */ | 2226 | */ |
2215 | boost_watermark(zone); | 2227 | boost_watermark(zone); |
2216 | if (alloc_flags & ALLOC_KSWAPD) | 2228 | if (alloc_flags & ALLOC_KSWAPD) |
2217 | wakeup_kswapd(zone, 0, 0, zone_idx(zone)); | 2229 | set_bit(ZONE_BOOSTED_WATERMARK, &zone->flags); |
2218 | 2230 | ||
2219 | /* We are not allowed to try stealing from the whole block */ | 2231 | /* We are not allowed to try stealing from the whole block */ |
2220 | if (!whole_block) | 2232 | if (!whole_block) |
@@ -3102,6 +3114,12 @@ struct page *rmqueue(struct zone *preferred_zone, | |||
3102 | local_irq_restore(flags); | 3114 | local_irq_restore(flags); |
3103 | 3115 | ||
3104 | out: | 3116 | out: |
3117 | /* Separate test+clear to avoid unnecessary atomics */ | ||
3118 | if (test_bit(ZONE_BOOSTED_WATERMARK, &zone->flags)) { | ||
3119 | clear_bit(ZONE_BOOSTED_WATERMARK, &zone->flags); | ||
3120 | wakeup_kswapd(zone, 0, 0, zone_idx(zone)); | ||
3121 | } | ||
3122 | |||
3105 | VM_BUG_ON_PAGE(page && bad_range(zone, page), page); | 3123 | VM_BUG_ON_PAGE(page && bad_range(zone, page), page); |
3106 | return page; | 3124 | return page; |
3107 | 3125 | ||
@@ -4669,11 +4687,11 @@ refill: | |||
4669 | /* Even if we own the page, we do not use atomic_set(). | 4687 | /* Even if we own the page, we do not use atomic_set(). |
4670 | * This would break get_page_unless_zero() users. | 4688 | * This would break get_page_unless_zero() users. |
4671 | */ | 4689 | */ |
4672 | page_ref_add(page, size - 1); | 4690 | page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE); |
4673 | 4691 | ||
4674 | /* reset page count bias and offset to start of new frag */ | 4692 | /* reset page count bias and offset to start of new frag */ |
4675 | nc->pfmemalloc = page_is_pfmemalloc(page); | 4693 | nc->pfmemalloc = page_is_pfmemalloc(page); |
4676 | nc->pagecnt_bias = size; | 4694 | nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; |
4677 | nc->offset = size; | 4695 | nc->offset = size; |
4678 | } | 4696 | } |
4679 | 4697 | ||
@@ -4689,10 +4707,10 @@ refill: | |||
4689 | size = nc->size; | 4707 | size = nc->size; |
4690 | #endif | 4708 | #endif |
4691 | /* OK, page count is 0, we can safely set it */ | 4709 | /* OK, page count is 0, we can safely set it */ |
4692 | set_page_count(page, size); | 4710 | set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1); |
4693 | 4711 | ||
4694 | /* reset page count bias and offset to start of new frag */ | 4712 | /* reset page count bias and offset to start of new frag */ |
4695 | nc->pagecnt_bias = size; | 4713 | nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; |
4696 | offset = size - fragsz; | 4714 | offset = size - fragsz; |
4697 | } | 4715 | } |
4698 | 4716 | ||
@@ -5695,18 +5713,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
5695 | cond_resched(); | 5713 | cond_resched(); |
5696 | } | 5714 | } |
5697 | } | 5715 | } |
5698 | #ifdef CONFIG_SPARSEMEM | ||
5699 | /* | ||
5700 | * If the zone does not span the rest of the section then | ||
5701 | * we should at least initialize those pages. Otherwise we | ||
5702 | * could blow up on a poisoned page in some paths which depend | ||
5703 | * on full sections being initialized (e.g. memory hotplug). | ||
5704 | */ | ||
5705 | while (end_pfn % PAGES_PER_SECTION) { | ||
5706 | __init_single_page(pfn_to_page(end_pfn), end_pfn, zone, nid); | ||
5707 | end_pfn++; | ||
5708 | } | ||
5709 | #endif | ||
5710 | } | 5716 | } |
5711 | 5717 | ||
5712 | #ifdef CONFIG_ZONE_DEVICE | 5718 | #ifdef CONFIG_ZONE_DEVICE |
diff --git a/mm/page_ext.c b/mm/page_ext.c index ae44f7adbe07..8c78b8d45117 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c | |||
@@ -398,10 +398,8 @@ void __init page_ext_init(void) | |||
398 | * We know some arch can have a nodes layout such as | 398 | * We know some arch can have a nodes layout such as |
399 | * -------------pfn--------------> | 399 | * -------------pfn--------------> |
400 | * N0 | N1 | N2 | N0 | N1 | N2|.... | 400 | * N0 | N1 | N2 | N0 | N1 | N2|.... |
401 | * | ||
402 | * Take into account DEFERRED_STRUCT_PAGE_INIT. | ||
403 | */ | 401 | */ |
404 | if (early_pfn_to_nid(pfn) != nid) | 402 | if (pfn_to_nid(pfn) != nid) |
405 | continue; | 403 | continue; |
406 | if (init_section_page_ext(pfn, nid)) | 404 | if (init_section_page_ext(pfn, nid)) |
407 | goto oom; | 405 | goto oom; |
@@ -25,7 +25,6 @@ | |||
25 | * page->flags PG_locked (lock_page) | 25 | * page->flags PG_locked (lock_page) |
26 | * hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share) | 26 | * hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share) |
27 | * mapping->i_mmap_rwsem | 27 | * mapping->i_mmap_rwsem |
28 | * hugetlb_fault_mutex (hugetlbfs specific page fault mutex) | ||
29 | * anon_vma->rwsem | 28 | * anon_vma->rwsem |
30 | * mm->page_table_lock or pte_lock | 29 | * mm->page_table_lock or pte_lock |
31 | * zone_lru_lock (in mark_page_accessed, isolate_lru_page) | 30 | * zone_lru_lock (in mark_page_accessed, isolate_lru_page) |
@@ -1372,16 +1371,13 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
1372 | * Note that the page can not be free in this function as call of | 1371 | * Note that the page can not be free in this function as call of |
1373 | * try_to_unmap() must hold a reference on the page. | 1372 | * try_to_unmap() must hold a reference on the page. |
1374 | */ | 1373 | */ |
1375 | mmu_notifier_range_init(&range, vma->vm_mm, vma->vm_start, | 1374 | mmu_notifier_range_init(&range, vma->vm_mm, address, |
1376 | min(vma->vm_end, vma->vm_start + | 1375 | min(vma->vm_end, address + |
1377 | (PAGE_SIZE << compound_order(page)))); | 1376 | (PAGE_SIZE << compound_order(page)))); |
1378 | if (PageHuge(page)) { | 1377 | if (PageHuge(page)) { |
1379 | /* | 1378 | /* |
1380 | * If sharing is possible, start and end will be adjusted | 1379 | * If sharing is possible, start and end will be adjusted |
1381 | * accordingly. | 1380 | * accordingly. |
1382 | * | ||
1383 | * If called for a huge page, caller must hold i_mmap_rwsem | ||
1384 | * in write mode as it is possible to call huge_pmd_unshare. | ||
1385 | */ | 1381 | */ |
1386 | adjust_range_if_pmd_sharing_possible(vma, &range.start, | 1382 | adjust_range_if_pmd_sharing_possible(vma, &range.start, |
1387 | &range.end); | 1383 | &range.end); |
diff --git a/mm/shmem.c b/mm/shmem.c index 6ece1e2fe76e..0905215fb016 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -2854,10 +2854,14 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr | |||
2854 | * No ordinary (disk based) filesystem counts links as inodes; | 2854 | * No ordinary (disk based) filesystem counts links as inodes; |
2855 | * but each new link needs a new dentry, pinning lowmem, and | 2855 | * but each new link needs a new dentry, pinning lowmem, and |
2856 | * tmpfs dentries cannot be pruned until they are unlinked. | 2856 | * tmpfs dentries cannot be pruned until they are unlinked. |
2857 | * But if an O_TMPFILE file is linked into the tmpfs, the | ||
2858 | * first link must skip that, to get the accounting right. | ||
2857 | */ | 2859 | */ |
2858 | ret = shmem_reserve_inode(inode->i_sb); | 2860 | if (inode->i_nlink) { |
2859 | if (ret) | 2861 | ret = shmem_reserve_inode(inode->i_sb); |
2860 | goto out; | 2862 | if (ret) |
2863 | goto out; | ||
2864 | } | ||
2861 | 2865 | ||
2862 | dir->i_size += BOGO_DIRENT_SIZE; | 2866 | dir->i_size += BOGO_DIRENT_SIZE; |
2863 | inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); | 2867 | inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); |
@@ -666,8 +666,10 @@ static struct alien_cache *__alloc_alien_cache(int node, int entries, | |||
666 | struct alien_cache *alc = NULL; | 666 | struct alien_cache *alc = NULL; |
667 | 667 | ||
668 | alc = kmalloc_node(memsize, gfp, node); | 668 | alc = kmalloc_node(memsize, gfp, node); |
669 | init_arraycache(&alc->ac, entries, batch); | 669 | if (alc) { |
670 | spin_lock_init(&alc->lock); | 670 | init_arraycache(&alc->ac, entries, batch); |
671 | spin_lock_init(&alc->lock); | ||
672 | } | ||
671 | return alc; | 673 | return alc; |
672 | } | 674 | } |
673 | 675 | ||
@@ -2357,7 +2359,7 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep, | |||
2357 | void *freelist; | 2359 | void *freelist; |
2358 | void *addr = page_address(page); | 2360 | void *addr = page_address(page); |
2359 | 2361 | ||
2360 | page->s_mem = kasan_reset_tag(addr) + colour_off; | 2362 | page->s_mem = addr + colour_off; |
2361 | page->active = 0; | 2363 | page->active = 0; |
2362 | 2364 | ||
2363 | if (OBJFREELIST_SLAB(cachep)) | 2365 | if (OBJFREELIST_SLAB(cachep)) |
@@ -2366,6 +2368,7 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep, | |||
2366 | /* Slab management obj is off-slab. */ | 2368 | /* Slab management obj is off-slab. */ |
2367 | freelist = kmem_cache_alloc_node(cachep->freelist_cache, | 2369 | freelist = kmem_cache_alloc_node(cachep->freelist_cache, |
2368 | local_flags, nodeid); | 2370 | local_flags, nodeid); |
2371 | freelist = kasan_reset_tag(freelist); | ||
2369 | if (!freelist) | 2372 | if (!freelist) |
2370 | return NULL; | 2373 | return NULL; |
2371 | } else { | 2374 | } else { |
@@ -2679,6 +2682,13 @@ static struct page *cache_grow_begin(struct kmem_cache *cachep, | |||
2679 | 2682 | ||
2680 | offset *= cachep->colour_off; | 2683 | offset *= cachep->colour_off; |
2681 | 2684 | ||
2685 | /* | ||
2686 | * Call kasan_poison_slab() before calling alloc_slabmgmt(), so | ||
2687 | * page_address() in the latter returns a non-tagged pointer, | ||
2688 | * as it should be for slab pages. | ||
2689 | */ | ||
2690 | kasan_poison_slab(page); | ||
2691 | |||
2682 | /* Get slab management. */ | 2692 | /* Get slab management. */ |
2683 | freelist = alloc_slabmgmt(cachep, page, offset, | 2693 | freelist = alloc_slabmgmt(cachep, page, offset, |
2684 | local_flags & ~GFP_CONSTRAINT_MASK, page_node); | 2694 | local_flags & ~GFP_CONSTRAINT_MASK, page_node); |
@@ -2687,7 +2697,6 @@ static struct page *cache_grow_begin(struct kmem_cache *cachep, | |||
2687 | 2697 | ||
2688 | slab_map_pages(cachep, page, freelist); | 2698 | slab_map_pages(cachep, page, freelist); |
2689 | 2699 | ||
2690 | kasan_poison_slab(page); | ||
2691 | cache_init_objs(cachep, page); | 2700 | cache_init_objs(cachep, page); |
2692 | 2701 | ||
2693 | if (gfpflags_allow_blocking(local_flags)) | 2702 | if (gfpflags_allow_blocking(local_flags)) |
@@ -3538,7 +3547,6 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
3538 | { | 3547 | { |
3539 | void *ret = slab_alloc(cachep, flags, _RET_IP_); | 3548 | void *ret = slab_alloc(cachep, flags, _RET_IP_); |
3540 | 3549 | ||
3541 | ret = kasan_slab_alloc(cachep, ret, flags); | ||
3542 | trace_kmem_cache_alloc(_RET_IP_, ret, | 3550 | trace_kmem_cache_alloc(_RET_IP_, ret, |
3543 | cachep->object_size, cachep->size, flags); | 3551 | cachep->object_size, cachep->size, flags); |
3544 | 3552 | ||
@@ -3628,7 +3636,6 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
3628 | { | 3636 | { |
3629 | void *ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_); | 3637 | void *ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_); |
3630 | 3638 | ||
3631 | ret = kasan_slab_alloc(cachep, ret, flags); | ||
3632 | trace_kmem_cache_alloc_node(_RET_IP_, ret, | 3639 | trace_kmem_cache_alloc_node(_RET_IP_, ret, |
3633 | cachep->object_size, cachep->size, | 3640 | cachep->object_size, cachep->size, |
3634 | flags, nodeid); | 3641 | flags, nodeid); |
@@ -4406,6 +4413,8 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, | |||
4406 | unsigned int objnr; | 4413 | unsigned int objnr; |
4407 | unsigned long offset; | 4414 | unsigned long offset; |
4408 | 4415 | ||
4416 | ptr = kasan_reset_tag(ptr); | ||
4417 | |||
4409 | /* Find and validate object. */ | 4418 | /* Find and validate object. */ |
4410 | cachep = page->slab_cache; | 4419 | cachep = page->slab_cache; |
4411 | objnr = obj_to_index(cachep, page, (void *)ptr); | 4420 | objnr = obj_to_index(cachep, page, (void *)ptr); |
@@ -437,11 +437,10 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, | |||
437 | 437 | ||
438 | flags &= gfp_allowed_mask; | 438 | flags &= gfp_allowed_mask; |
439 | for (i = 0; i < size; i++) { | 439 | for (i = 0; i < size; i++) { |
440 | void *object = p[i]; | 440 | p[i] = kasan_slab_alloc(s, p[i], flags); |
441 | 441 | /* As p[i] might get tagged, call kmemleak hook after KASAN. */ | |
442 | kmemleak_alloc_recursive(object, s->object_size, 1, | 442 | kmemleak_alloc_recursive(p[i], s->object_size, 1, |
443 | s->flags, flags); | 443 | s->flags, flags); |
444 | p[i] = kasan_slab_alloc(s, object, flags); | ||
445 | } | 444 | } |
446 | 445 | ||
447 | if (memcg_kmem_enabled()) | 446 | if (memcg_kmem_enabled()) |
diff --git a/mm/slab_common.c b/mm/slab_common.c index 81732d05e74a..f9d89c1b5977 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c | |||
@@ -1228,8 +1228,9 @@ void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) | |||
1228 | flags |= __GFP_COMP; | 1228 | flags |= __GFP_COMP; |
1229 | page = alloc_pages(flags, order); | 1229 | page = alloc_pages(flags, order); |
1230 | ret = page ? page_address(page) : NULL; | 1230 | ret = page ? page_address(page) : NULL; |
1231 | kmemleak_alloc(ret, size, 1, flags); | ||
1232 | ret = kasan_kmalloc_large(ret, size, flags); | 1231 | ret = kasan_kmalloc_large(ret, size, flags); |
1232 | /* As ret might get tagged, call kmemleak hook after KASAN. */ | ||
1233 | kmemleak_alloc(ret, size, 1, flags); | ||
1233 | return ret; | 1234 | return ret; |
1234 | } | 1235 | } |
1235 | EXPORT_SYMBOL(kmalloc_order); | 1236 | EXPORT_SYMBOL(kmalloc_order); |
@@ -249,7 +249,18 @@ static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr, | |||
249 | unsigned long ptr_addr) | 249 | unsigned long ptr_addr) |
250 | { | 250 | { |
251 | #ifdef CONFIG_SLAB_FREELIST_HARDENED | 251 | #ifdef CONFIG_SLAB_FREELIST_HARDENED |
252 | return (void *)((unsigned long)ptr ^ s->random ^ ptr_addr); | 252 | /* |
253 | * When CONFIG_KASAN_SW_TAGS is enabled, ptr_addr might be tagged. | ||
254 | * Normally, this doesn't cause any issues, as both set_freepointer() | ||
255 | * and get_freepointer() are called with a pointer with the same tag. | ||
256 | * However, there are some issues with CONFIG_SLUB_DEBUG code. For | ||
257 | * example, when __free_slub() iterates over objects in a cache, it | ||
258 | * passes untagged pointers to check_object(). check_object() in turns | ||
259 | * calls get_freepointer() with an untagged pointer, which causes the | ||
260 | * freepointer to be restored incorrectly. | ||
261 | */ | ||
262 | return (void *)((unsigned long)ptr ^ s->random ^ | ||
263 | (unsigned long)kasan_reset_tag((void *)ptr_addr)); | ||
253 | #else | 264 | #else |
254 | return ptr; | 265 | return ptr; |
255 | #endif | 266 | #endif |
@@ -303,15 +314,10 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) | |||
303 | __p < (__addr) + (__objects) * (__s)->size; \ | 314 | __p < (__addr) + (__objects) * (__s)->size; \ |
304 | __p += (__s)->size) | 315 | __p += (__s)->size) |
305 | 316 | ||
306 | #define for_each_object_idx(__p, __idx, __s, __addr, __objects) \ | ||
307 | for (__p = fixup_red_left(__s, __addr), __idx = 1; \ | ||
308 | __idx <= __objects; \ | ||
309 | __p += (__s)->size, __idx++) | ||
310 | |||
311 | /* Determine object index from a given position */ | 317 | /* Determine object index from a given position */ |
312 | static inline unsigned int slab_index(void *p, struct kmem_cache *s, void *addr) | 318 | static inline unsigned int slab_index(void *p, struct kmem_cache *s, void *addr) |
313 | { | 319 | { |
314 | return (p - addr) / s->size; | 320 | return (kasan_reset_tag(p) - addr) / s->size; |
315 | } | 321 | } |
316 | 322 | ||
317 | static inline unsigned int order_objects(unsigned int order, unsigned int size) | 323 | static inline unsigned int order_objects(unsigned int order, unsigned int size) |
@@ -507,6 +513,7 @@ static inline int check_valid_pointer(struct kmem_cache *s, | |||
507 | return 1; | 513 | return 1; |
508 | 514 | ||
509 | base = page_address(page); | 515 | base = page_address(page); |
516 | object = kasan_reset_tag(object); | ||
510 | object = restore_red_left(s, object); | 517 | object = restore_red_left(s, object); |
511 | if (object < base || object >= base + page->objects * s->size || | 518 | if (object < base || object >= base + page->objects * s->size || |
512 | (object - base) % s->size) { | 519 | (object - base) % s->size) { |
@@ -1075,6 +1082,16 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page, | |||
1075 | init_tracking(s, object); | 1082 | init_tracking(s, object); |
1076 | } | 1083 | } |
1077 | 1084 | ||
1085 | static void setup_page_debug(struct kmem_cache *s, void *addr, int order) | ||
1086 | { | ||
1087 | if (!(s->flags & SLAB_POISON)) | ||
1088 | return; | ||
1089 | |||
1090 | metadata_access_enable(); | ||
1091 | memset(addr, POISON_INUSE, PAGE_SIZE << order); | ||
1092 | metadata_access_disable(); | ||
1093 | } | ||
1094 | |||
1078 | static inline int alloc_consistency_checks(struct kmem_cache *s, | 1095 | static inline int alloc_consistency_checks(struct kmem_cache *s, |
1079 | struct page *page, | 1096 | struct page *page, |
1080 | void *object, unsigned long addr) | 1097 | void *object, unsigned long addr) |
@@ -1330,6 +1347,8 @@ slab_flags_t kmem_cache_flags(unsigned int object_size, | |||
1330 | #else /* !CONFIG_SLUB_DEBUG */ | 1347 | #else /* !CONFIG_SLUB_DEBUG */ |
1331 | static inline void setup_object_debug(struct kmem_cache *s, | 1348 | static inline void setup_object_debug(struct kmem_cache *s, |
1332 | struct page *page, void *object) {} | 1349 | struct page *page, void *object) {} |
1350 | static inline void setup_page_debug(struct kmem_cache *s, | ||
1351 | void *addr, int order) {} | ||
1333 | 1352 | ||
1334 | static inline int alloc_debug_processing(struct kmem_cache *s, | 1353 | static inline int alloc_debug_processing(struct kmem_cache *s, |
1335 | struct page *page, void *object, unsigned long addr) { return 0; } | 1354 | struct page *page, void *object, unsigned long addr) { return 0; } |
@@ -1374,8 +1393,10 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, | |||
1374 | */ | 1393 | */ |
1375 | static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) | 1394 | static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) |
1376 | { | 1395 | { |
1396 | ptr = kasan_kmalloc_large(ptr, size, flags); | ||
1397 | /* As ptr might get tagged, call kmemleak hook after KASAN. */ | ||
1377 | kmemleak_alloc(ptr, size, 1, flags); | 1398 | kmemleak_alloc(ptr, size, 1, flags); |
1378 | return kasan_kmalloc_large(ptr, size, flags); | 1399 | return ptr; |
1379 | } | 1400 | } |
1380 | 1401 | ||
1381 | static __always_inline void kfree_hook(void *x) | 1402 | static __always_inline void kfree_hook(void *x) |
@@ -1641,27 +1662,25 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1641 | if (page_is_pfmemalloc(page)) | 1662 | if (page_is_pfmemalloc(page)) |
1642 | SetPageSlabPfmemalloc(page); | 1663 | SetPageSlabPfmemalloc(page); |
1643 | 1664 | ||
1644 | start = page_address(page); | 1665 | kasan_poison_slab(page); |
1645 | 1666 | ||
1646 | if (unlikely(s->flags & SLAB_POISON)) | 1667 | start = page_address(page); |
1647 | memset(start, POISON_INUSE, PAGE_SIZE << order); | ||
1648 | 1668 | ||
1649 | kasan_poison_slab(page); | 1669 | setup_page_debug(s, start, order); |
1650 | 1670 | ||
1651 | shuffle = shuffle_freelist(s, page); | 1671 | shuffle = shuffle_freelist(s, page); |
1652 | 1672 | ||
1653 | if (!shuffle) { | 1673 | if (!shuffle) { |
1654 | for_each_object_idx(p, idx, s, start, page->objects) { | ||
1655 | if (likely(idx < page->objects)) { | ||
1656 | next = p + s->size; | ||
1657 | next = setup_object(s, page, next); | ||
1658 | set_freepointer(s, p, next); | ||
1659 | } else | ||
1660 | set_freepointer(s, p, NULL); | ||
1661 | } | ||
1662 | start = fixup_red_left(s, start); | 1674 | start = fixup_red_left(s, start); |
1663 | start = setup_object(s, page, start); | 1675 | start = setup_object(s, page, start); |
1664 | page->freelist = start; | 1676 | page->freelist = start; |
1677 | for (idx = 0, p = start; idx < page->objects - 1; idx++) { | ||
1678 | next = p + s->size; | ||
1679 | next = setup_object(s, page, next); | ||
1680 | set_freepointer(s, p, next); | ||
1681 | p = next; | ||
1682 | } | ||
1683 | set_freepointer(s, p, NULL); | ||
1665 | } | 1684 | } |
1666 | 1685 | ||
1667 | page->inuse = page->objects; | 1686 | page->inuse = page->objects; |
@@ -3846,6 +3865,8 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, | |||
3846 | unsigned int offset; | 3865 | unsigned int offset; |
3847 | size_t object_size; | 3866 | size_t object_size; |
3848 | 3867 | ||
3868 | ptr = kasan_reset_tag(ptr); | ||
3869 | |||
3849 | /* Find object and usable object size. */ | 3870 | /* Find object and usable object size. */ |
3850 | s = page->slab_cache; | 3871 | s = page->slab_cache; |
3851 | 3872 | ||
@@ -320,11 +320,6 @@ static inline void activate_page_drain(int cpu) | |||
320 | { | 320 | { |
321 | } | 321 | } |
322 | 322 | ||
323 | static bool need_activate_page_drain(int cpu) | ||
324 | { | ||
325 | return false; | ||
326 | } | ||
327 | |||
328 | void activate_page(struct page *page) | 323 | void activate_page(struct page *page) |
329 | { | 324 | { |
330 | struct zone *zone = page_zone(page); | 325 | struct zone *zone = page_zone(page); |
@@ -653,13 +648,15 @@ void lru_add_drain(void) | |||
653 | put_cpu(); | 648 | put_cpu(); |
654 | } | 649 | } |
655 | 650 | ||
651 | #ifdef CONFIG_SMP | ||
652 | |||
653 | static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); | ||
654 | |||
656 | static void lru_add_drain_per_cpu(struct work_struct *dummy) | 655 | static void lru_add_drain_per_cpu(struct work_struct *dummy) |
657 | { | 656 | { |
658 | lru_add_drain(); | 657 | lru_add_drain(); |
659 | } | 658 | } |
660 | 659 | ||
661 | static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); | ||
662 | |||
663 | /* | 660 | /* |
664 | * Doesn't need any cpu hotplug locking because we do rely on per-cpu | 661 | * Doesn't need any cpu hotplug locking because we do rely on per-cpu |
665 | * kworkers being shut down before our page_alloc_cpu_dead callback is | 662 | * kworkers being shut down before our page_alloc_cpu_dead callback is |
@@ -702,6 +699,12 @@ void lru_add_drain_all(void) | |||
702 | 699 | ||
703 | mutex_unlock(&lock); | 700 | mutex_unlock(&lock); |
704 | } | 701 | } |
702 | #else | ||
703 | void lru_add_drain_all(void) | ||
704 | { | ||
705 | lru_add_drain(); | ||
706 | } | ||
707 | #endif | ||
705 | 708 | ||
706 | /** | 709 | /** |
707 | * release_pages - batched put_page() | 710 | * release_pages - batched put_page() |
diff --git a/mm/usercopy.c b/mm/usercopy.c index 852eb4e53f06..14faadcedd06 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c | |||
@@ -247,7 +247,8 @@ static DEFINE_STATIC_KEY_FALSE_RO(bypass_usercopy_checks); | |||
247 | /* | 247 | /* |
248 | * Validates that the given object is: | 248 | * Validates that the given object is: |
249 | * - not bogus address | 249 | * - not bogus address |
250 | * - known-safe heap or stack object | 250 | * - fully contained by stack (or stack frame, when available) |
251 | * - fully within SLAB object (or object whitelist area, when available) | ||
251 | * - not in kernel text | 252 | * - not in kernel text |
252 | */ | 253 | */ |
253 | void __check_object_size(const void *ptr, unsigned long n, bool to_user) | 254 | void __check_object_size(const void *ptr, unsigned long n, bool to_user) |
@@ -262,9 +263,6 @@ void __check_object_size(const void *ptr, unsigned long n, bool to_user) | |||
262 | /* Check for invalid addresses. */ | 263 | /* Check for invalid addresses. */ |
263 | check_bogus_address((const unsigned long)ptr, n, to_user); | 264 | check_bogus_address((const unsigned long)ptr, n, to_user); |
264 | 265 | ||
265 | /* Check for bad heap object. */ | ||
266 | check_heap_object(ptr, n, to_user); | ||
267 | |||
268 | /* Check for bad stack object. */ | 266 | /* Check for bad stack object. */ |
269 | switch (check_stack_object(ptr, n)) { | 267 | switch (check_stack_object(ptr, n)) { |
270 | case NOT_STACK: | 268 | case NOT_STACK: |
@@ -282,6 +280,9 @@ void __check_object_size(const void *ptr, unsigned long n, bool to_user) | |||
282 | usercopy_abort("process stack", NULL, to_user, 0, n); | 280 | usercopy_abort("process stack", NULL, to_user, 0, n); |
283 | } | 281 | } |
284 | 282 | ||
283 | /* Check for bad heap object. */ | ||
284 | check_heap_object(ptr, n, to_user); | ||
285 | |||
285 | /* Check for object in kernel to avoid text exposure. */ | 286 | /* Check for object in kernel to avoid text exposure. */ |
286 | check_kernel_text_object((const unsigned long)ptr, n, to_user); | 287 | check_kernel_text_object((const unsigned long)ptr, n, to_user); |
287 | } | 288 | } |
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 065c1ce191c4..d59b5a73dfb3 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c | |||
@@ -267,14 +267,10 @@ retry: | |||
267 | VM_BUG_ON(dst_addr & ~huge_page_mask(h)); | 267 | VM_BUG_ON(dst_addr & ~huge_page_mask(h)); |
268 | 268 | ||
269 | /* | 269 | /* |
270 | * Serialize via i_mmap_rwsem and hugetlb_fault_mutex. | 270 | * Serialize via hugetlb_fault_mutex |
271 | * i_mmap_rwsem ensures the dst_pte remains valid even | ||
272 | * in the case of shared pmds. fault mutex prevents | ||
273 | * races with other faulting threads. | ||
274 | */ | 271 | */ |
275 | mapping = dst_vma->vm_file->f_mapping; | ||
276 | i_mmap_lock_read(mapping); | ||
277 | idx = linear_page_index(dst_vma, dst_addr); | 272 | idx = linear_page_index(dst_vma, dst_addr); |
273 | mapping = dst_vma->vm_file->f_mapping; | ||
278 | hash = hugetlb_fault_mutex_hash(h, dst_mm, dst_vma, mapping, | 274 | hash = hugetlb_fault_mutex_hash(h, dst_mm, dst_vma, mapping, |
279 | idx, dst_addr); | 275 | idx, dst_addr); |
280 | mutex_lock(&hugetlb_fault_mutex_table[hash]); | 276 | mutex_lock(&hugetlb_fault_mutex_table[hash]); |
@@ -283,7 +279,6 @@ retry: | |||
283 | dst_pte = huge_pte_alloc(dst_mm, dst_addr, huge_page_size(h)); | 279 | dst_pte = huge_pte_alloc(dst_mm, dst_addr, huge_page_size(h)); |
284 | if (!dst_pte) { | 280 | if (!dst_pte) { |
285 | mutex_unlock(&hugetlb_fault_mutex_table[hash]); | 281 | mutex_unlock(&hugetlb_fault_mutex_table[hash]); |
286 | i_mmap_unlock_read(mapping); | ||
287 | goto out_unlock; | 282 | goto out_unlock; |
288 | } | 283 | } |
289 | 284 | ||
@@ -291,7 +286,6 @@ retry: | |||
291 | dst_pteval = huge_ptep_get(dst_pte); | 286 | dst_pteval = huge_ptep_get(dst_pte); |
292 | if (!huge_pte_none(dst_pteval)) { | 287 | if (!huge_pte_none(dst_pteval)) { |
293 | mutex_unlock(&hugetlb_fault_mutex_table[hash]); | 288 | mutex_unlock(&hugetlb_fault_mutex_table[hash]); |
294 | i_mmap_unlock_read(mapping); | ||
295 | goto out_unlock; | 289 | goto out_unlock; |
296 | } | 290 | } |
297 | 291 | ||
@@ -299,7 +293,6 @@ retry: | |||
299 | dst_addr, src_addr, &page); | 293 | dst_addr, src_addr, &page); |
300 | 294 | ||
301 | mutex_unlock(&hugetlb_fault_mutex_table[hash]); | 295 | mutex_unlock(&hugetlb_fault_mutex_table[hash]); |
302 | i_mmap_unlock_read(mapping); | ||
303 | vm_alloc_shared = vm_shared; | 296 | vm_alloc_shared = vm_shared; |
304 | 297 | ||
305 | cond_resched(); | 298 | cond_resched(); |
@@ -150,7 +150,7 @@ void *memdup_user(const void __user *src, size_t len) | |||
150 | { | 150 | { |
151 | void *p; | 151 | void *p; |
152 | 152 | ||
153 | p = kmalloc_track_caller(len, GFP_USER); | 153 | p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN); |
154 | if (!p) | 154 | if (!p) |
155 | return ERR_PTR(-ENOMEM); | 155 | return ERR_PTR(-ENOMEM); |
156 | 156 | ||
@@ -478,7 +478,7 @@ bool page_mapped(struct page *page) | |||
478 | return true; | 478 | return true; |
479 | if (PageHuge(page)) | 479 | if (PageHuge(page)) |
480 | return false; | 480 | return false; |
481 | for (i = 0; i < hpage_nr_pages(page); i++) { | 481 | for (i = 0; i < (1 << compound_order(page)); i++) { |
482 | if (atomic_read(&page[i]._mapcount) >= 0) | 482 | if (atomic_read(&page[i]._mapcount) >= 0) |
483 | return true; | 483 | return true; |
484 | } | 484 | } |
diff --git a/mm/vmscan.c b/mm/vmscan.c index a714c4f800e9..e979705bbf32 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -491,16 +491,6 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, | |||
491 | delta = freeable / 2; | 491 | delta = freeable / 2; |
492 | } | 492 | } |
493 | 493 | ||
494 | /* | ||
495 | * Make sure we apply some minimal pressure on default priority | ||
496 | * even on small cgroups. Stale objects are not only consuming memory | ||
497 | * by themselves, but can also hold a reference to a dying cgroup, | ||
498 | * preventing it from being reclaimed. A dying cgroup with all | ||
499 | * corresponding structures like per-cpu stats and kmem caches | ||
500 | * can be really big, so it may lead to a significant waste of memory. | ||
501 | */ | ||
502 | delta = max_t(unsigned long long, delta, min(freeable, batch_size)); | ||
503 | |||
504 | total_scan += delta; | 494 | total_scan += delta; |
505 | if (total_scan < 0) { | 495 | if (total_scan < 0) { |
506 | pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n", | 496 | pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n", |