aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig2
-rw-r--r--mm/filemap.c4
-rw-r--r--mm/frame_vector.c9
-rw-r--r--mm/gup.c67
-rw-r--r--mm/kasan/kasan.c22
-rw-r--r--mm/kmemleak.c7
-rw-r--r--mm/list_lru.c2
-rw-r--r--mm/memcontrol.c9
-rw-r--r--mm/memory.c16
-rw-r--r--mm/memory_hotplug.c29
-rw-r--r--mm/mempolicy.c2
-rw-r--r--mm/mprotect.c1
-rw-r--r--mm/nommu.c40
-rw-r--r--mm/page_alloc.c133
-rw-r--r--mm/process_vm_access.c7
-rw-r--r--mm/slab.c45
-rw-r--r--mm/slab.h1
-rw-r--r--mm/util.c12
-rw-r--r--mm/vmscan.c2
19 files changed, 158 insertions, 252 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index be0ee11fa0d9..86e3e0e74d20 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -187,7 +187,7 @@ config MEMORY_HOTPLUG
187 bool "Allow for memory hot-add" 187 bool "Allow for memory hot-add"
188 depends on SPARSEMEM || X86_64_ACPI_NUMA 188 depends on SPARSEMEM || X86_64_ACPI_NUMA
189 depends on ARCH_ENABLE_MEMORY_HOTPLUG 189 depends on ARCH_ENABLE_MEMORY_HOTPLUG
190 depends on !KASAN 190 depends on COMPILE_TEST || !KASAN
191 191
192config MEMORY_HOTPLUG_SPARSE 192config MEMORY_HOTPLUG_SPARSE
193 def_bool y 193 def_bool y
diff --git a/mm/filemap.c b/mm/filemap.c
index 00ab94a882de..db26ebc6c62f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -789,9 +789,7 @@ EXPORT_SYMBOL(__page_cache_alloc);
789 */ 789 */
790wait_queue_head_t *page_waitqueue(struct page *page) 790wait_queue_head_t *page_waitqueue(struct page *page)
791{ 791{
792 const struct zone *zone = page_zone(page); 792 return bit_waitqueue(page, 0);
793
794 return &zone->wait_table[hash_ptr(page, zone->wait_table_bits)];
795} 793}
796EXPORT_SYMBOL(page_waitqueue); 794EXPORT_SYMBOL(page_waitqueue);
797 795
diff --git a/mm/frame_vector.c b/mm/frame_vector.c
index 381bb07ed14f..db77dcb38afd 100644
--- a/mm/frame_vector.c
+++ b/mm/frame_vector.c
@@ -11,10 +11,7 @@
11 * get_vaddr_frames() - map virtual addresses to pfns 11 * get_vaddr_frames() - map virtual addresses to pfns
12 * @start: starting user address 12 * @start: starting user address
13 * @nr_frames: number of pages / pfns from start to map 13 * @nr_frames: number of pages / pfns from start to map
14 * @write: whether pages will be written to by the caller 14 * @gup_flags: flags modifying lookup behaviour
15 * @force: whether to force write access even if user mapping is
16 * readonly. See description of the same argument of
17 get_user_pages().
18 * @vec: structure which receives pages / pfns of the addresses mapped. 15 * @vec: structure which receives pages / pfns of the addresses mapped.
19 * It should have space for at least nr_frames entries. 16 * It should have space for at least nr_frames entries.
20 * 17 *
@@ -34,7 +31,7 @@
34 * This function takes care of grabbing mmap_sem as necessary. 31 * This function takes care of grabbing mmap_sem as necessary.
35 */ 32 */
36int get_vaddr_frames(unsigned long start, unsigned int nr_frames, 33int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
37 bool write, bool force, struct frame_vector *vec) 34 unsigned int gup_flags, struct frame_vector *vec)
38{ 35{
39 struct mm_struct *mm = current->mm; 36 struct mm_struct *mm = current->mm;
40 struct vm_area_struct *vma; 37 struct vm_area_struct *vma;
@@ -59,7 +56,7 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
59 vec->got_ref = true; 56 vec->got_ref = true;
60 vec->is_pfns = false; 57 vec->is_pfns = false;
61 ret = get_user_pages_locked(start, nr_frames, 58 ret = get_user_pages_locked(start, nr_frames,
62 write, force, (struct page **)(vec->ptrs), &locked); 59 gup_flags, (struct page **)(vec->ptrs), &locked);
63 goto out; 60 goto out;
64 } 61 }
65 62
diff --git a/mm/gup.c b/mm/gup.c
index 96b2b2fd0fbd..ec4f82704b6f 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -60,6 +60,16 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
60 return -EEXIST; 60 return -EEXIST;
61} 61}
62 62
63/*
64 * FOLL_FORCE can write to even unwritable pte's, but only
65 * after we've gone through a COW cycle and they are dirty.
66 */
67static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
68{
69 return pte_write(pte) ||
70 ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
71}
72
63static struct page *follow_page_pte(struct vm_area_struct *vma, 73static struct page *follow_page_pte(struct vm_area_struct *vma,
64 unsigned long address, pmd_t *pmd, unsigned int flags) 74 unsigned long address, pmd_t *pmd, unsigned int flags)
65{ 75{
@@ -95,7 +105,7 @@ retry:
95 } 105 }
96 if ((flags & FOLL_NUMA) && pte_protnone(pte)) 106 if ((flags & FOLL_NUMA) && pte_protnone(pte))
97 goto no_page; 107 goto no_page;
98 if ((flags & FOLL_WRITE) && !pte_write(pte)) { 108 if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) {
99 pte_unmap_unlock(ptep, ptl); 109 pte_unmap_unlock(ptep, ptl);
100 return NULL; 110 return NULL;
101 } 111 }
@@ -412,7 +422,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
412 * reCOWed by userspace write). 422 * reCOWed by userspace write).
413 */ 423 */
414 if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE)) 424 if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
415 *flags &= ~FOLL_WRITE; 425 *flags |= FOLL_COW;
416 return 0; 426 return 0;
417} 427}
418 428
@@ -516,7 +526,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
516 * instead of __get_user_pages. __get_user_pages should be used only if 526 * instead of __get_user_pages. __get_user_pages should be used only if
517 * you need some special @gup_flags. 527 * you need some special @gup_flags.
518 */ 528 */
519long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 529static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
520 unsigned long start, unsigned long nr_pages, 530 unsigned long start, unsigned long nr_pages,
521 unsigned int gup_flags, struct page **pages, 531 unsigned int gup_flags, struct page **pages,
522 struct vm_area_struct **vmas, int *nonblocking) 532 struct vm_area_struct **vmas, int *nonblocking)
@@ -621,7 +631,6 @@ next_page:
621 } while (nr_pages); 631 } while (nr_pages);
622 return i; 632 return i;
623} 633}
624EXPORT_SYMBOL(__get_user_pages);
625 634
626bool vma_permits_fault(struct vm_area_struct *vma, unsigned int fault_flags) 635bool vma_permits_fault(struct vm_area_struct *vma, unsigned int fault_flags)
627{ 636{
@@ -729,7 +738,6 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
729 struct mm_struct *mm, 738 struct mm_struct *mm,
730 unsigned long start, 739 unsigned long start,
731 unsigned long nr_pages, 740 unsigned long nr_pages,
732 int write, int force,
733 struct page **pages, 741 struct page **pages,
734 struct vm_area_struct **vmas, 742 struct vm_area_struct **vmas,
735 int *locked, bool notify_drop, 743 int *locked, bool notify_drop,
@@ -747,10 +755,6 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
747 755
748 if (pages) 756 if (pages)
749 flags |= FOLL_GET; 757 flags |= FOLL_GET;
750 if (write)
751 flags |= FOLL_WRITE;
752 if (force)
753 flags |= FOLL_FORCE;
754 758
755 pages_done = 0; 759 pages_done = 0;
756 lock_dropped = false; 760 lock_dropped = false;
@@ -843,12 +847,12 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
843 * up_read(&mm->mmap_sem); 847 * up_read(&mm->mmap_sem);
844 */ 848 */
845long get_user_pages_locked(unsigned long start, unsigned long nr_pages, 849long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
846 int write, int force, struct page **pages, 850 unsigned int gup_flags, struct page **pages,
847 int *locked) 851 int *locked)
848{ 852{
849 return __get_user_pages_locked(current, current->mm, start, nr_pages, 853 return __get_user_pages_locked(current, current->mm, start, nr_pages,
850 write, force, pages, NULL, locked, true, 854 pages, NULL, locked, true,
851 FOLL_TOUCH); 855 gup_flags | FOLL_TOUCH);
852} 856}
853EXPORT_SYMBOL(get_user_pages_locked); 857EXPORT_SYMBOL(get_user_pages_locked);
854 858
@@ -864,14 +868,14 @@ EXPORT_SYMBOL(get_user_pages_locked);
864 */ 868 */
865__always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, 869__always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
866 unsigned long start, unsigned long nr_pages, 870 unsigned long start, unsigned long nr_pages,
867 int write, int force, struct page **pages, 871 struct page **pages, unsigned int gup_flags)
868 unsigned int gup_flags)
869{ 872{
870 long ret; 873 long ret;
871 int locked = 1; 874 int locked = 1;
875
872 down_read(&mm->mmap_sem); 876 down_read(&mm->mmap_sem);
873 ret = __get_user_pages_locked(tsk, mm, start, nr_pages, write, force, 877 ret = __get_user_pages_locked(tsk, mm, start, nr_pages, pages, NULL,
874 pages, NULL, &locked, false, gup_flags); 878 &locked, false, gup_flags);
875 if (locked) 879 if (locked)
876 up_read(&mm->mmap_sem); 880 up_read(&mm->mmap_sem);
877 return ret; 881 return ret;
@@ -896,10 +900,10 @@ EXPORT_SYMBOL(__get_user_pages_unlocked);
896 * "force" parameter). 900 * "force" parameter).
897 */ 901 */
898long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, 902long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
899 int write, int force, struct page **pages) 903 struct page **pages, unsigned int gup_flags)
900{ 904{
901 return __get_user_pages_unlocked(current, current->mm, start, nr_pages, 905 return __get_user_pages_unlocked(current, current->mm, start, nr_pages,
902 write, force, pages, FOLL_TOUCH); 906 pages, gup_flags | FOLL_TOUCH);
903} 907}
904EXPORT_SYMBOL(get_user_pages_unlocked); 908EXPORT_SYMBOL(get_user_pages_unlocked);
905 909
@@ -910,9 +914,7 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
910 * @mm: mm_struct of target mm 914 * @mm: mm_struct of target mm
911 * @start: starting user address 915 * @start: starting user address
912 * @nr_pages: number of pages from start to pin 916 * @nr_pages: number of pages from start to pin
913 * @write: whether pages will be written to by the caller 917 * @gup_flags: flags modifying lookup behaviour
914 * @force: whether to force access even when user mapping is currently
915 * protected (but never forces write access to shared mapping).
916 * @pages: array that receives pointers to the pages pinned. 918 * @pages: array that receives pointers to the pages pinned.
917 * Should be at least nr_pages long. Or NULL, if caller 919 * Should be at least nr_pages long. Or NULL, if caller
918 * only intends to ensure the pages are faulted in. 920 * only intends to ensure the pages are faulted in.
@@ -941,9 +943,9 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
941 * or similar operation cannot guarantee anything stronger anyway because 943 * or similar operation cannot guarantee anything stronger anyway because
942 * locks can't be held over the syscall boundary. 944 * locks can't be held over the syscall boundary.
943 * 945 *
944 * If write=0, the page must not be written to. If the page is written to, 946 * If gup_flags & FOLL_WRITE == 0, the page must not be written to. If the page
945 * set_page_dirty (or set_page_dirty_lock, as appropriate) must be called 947 * is written to, set_page_dirty (or set_page_dirty_lock, as appropriate) must
946 * after the page is finished with, and before put_page is called. 948 * be called after the page is finished with, and before put_page is called.
947 * 949 *
948 * get_user_pages is typically used for fewer-copy IO operations, to get a 950 * get_user_pages is typically used for fewer-copy IO operations, to get a
949 * handle on the memory by some means other than accesses via the user virtual 951 * handle on the memory by some means other than accesses via the user virtual
@@ -960,12 +962,12 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
960 */ 962 */
961long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, 963long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
962 unsigned long start, unsigned long nr_pages, 964 unsigned long start, unsigned long nr_pages,
963 int write, int force, struct page **pages, 965 unsigned int gup_flags, struct page **pages,
964 struct vm_area_struct **vmas) 966 struct vm_area_struct **vmas)
965{ 967{
966 return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force, 968 return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
967 pages, vmas, NULL, false, 969 NULL, false,
968 FOLL_TOUCH | FOLL_REMOTE); 970 gup_flags | FOLL_TOUCH | FOLL_REMOTE);
969} 971}
970EXPORT_SYMBOL(get_user_pages_remote); 972EXPORT_SYMBOL(get_user_pages_remote);
971 973
@@ -976,12 +978,12 @@ EXPORT_SYMBOL(get_user_pages_remote);
976 * obviously don't pass FOLL_REMOTE in here. 978 * obviously don't pass FOLL_REMOTE in here.
977 */ 979 */
978long get_user_pages(unsigned long start, unsigned long nr_pages, 980long get_user_pages(unsigned long start, unsigned long nr_pages,
979 int write, int force, struct page **pages, 981 unsigned int gup_flags, struct page **pages,
980 struct vm_area_struct **vmas) 982 struct vm_area_struct **vmas)
981{ 983{
982 return __get_user_pages_locked(current, current->mm, start, nr_pages, 984 return __get_user_pages_locked(current, current->mm, start, nr_pages,
983 write, force, pages, vmas, NULL, false, 985 pages, vmas, NULL, false,
984 FOLL_TOUCH); 986 gup_flags | FOLL_TOUCH);
985} 987}
986EXPORT_SYMBOL(get_user_pages); 988EXPORT_SYMBOL(get_user_pages);
987 989
@@ -1505,7 +1507,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
1505 start += nr << PAGE_SHIFT; 1507 start += nr << PAGE_SHIFT;
1506 pages += nr; 1508 pages += nr;
1507 1509
1508 ret = get_user_pages_unlocked(start, nr_pages - nr, write, 0, pages); 1510 ret = get_user_pages_unlocked(start, nr_pages - nr, pages,
1511 write ? FOLL_WRITE : 0);
1509 1512
1510 /* Have to be a bit careful with return values */ 1513 /* Have to be a bit careful with return values */
1511 if (nr > 0) { 1514 if (nr > 0) {
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 88af13c00d3c..70c009741aab 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -34,6 +34,7 @@
34#include <linux/string.h> 34#include <linux/string.h>
35#include <linux/types.h> 35#include <linux/types.h>
36#include <linux/vmalloc.h> 36#include <linux/vmalloc.h>
37#include <linux/bug.h>
37 38
38#include "kasan.h" 39#include "kasan.h"
39#include "../slab.h" 40#include "../slab.h"
@@ -62,7 +63,7 @@ void kasan_unpoison_shadow(const void *address, size_t size)
62 } 63 }
63} 64}
64 65
65static void __kasan_unpoison_stack(struct task_struct *task, void *sp) 66static void __kasan_unpoison_stack(struct task_struct *task, const void *sp)
66{ 67{
67 void *base = task_stack_page(task); 68 void *base = task_stack_page(task);
68 size_t size = sp - base; 69 size_t size = sp - base;
@@ -77,9 +78,24 @@ void kasan_unpoison_task_stack(struct task_struct *task)
77} 78}
78 79
79/* Unpoison the stack for the current task beyond a watermark sp value. */ 80/* Unpoison the stack for the current task beyond a watermark sp value. */
80asmlinkage void kasan_unpoison_remaining_stack(void *sp) 81asmlinkage void kasan_unpoison_task_stack_below(const void *watermark)
81{ 82{
82 __kasan_unpoison_stack(current, sp); 83 __kasan_unpoison_stack(current, watermark);
84}
85
86/*
87 * Clear all poison for the region between the current SP and a provided
88 * watermark value, as is sometimes required prior to hand-crafted asm function
89 * returns in the middle of functions.
90 */
91void kasan_unpoison_stack_above_sp_to(const void *watermark)
92{
93 const void *sp = __builtin_frame_address(0);
94 size_t size = watermark - sp;
95
96 if (WARN_ON(sp > watermark))
97 return;
98 kasan_unpoison_shadow(sp, size);
83} 99}
84 100
85/* 101/*
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index a5e453cf05c4..e5355a5b423f 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1453,8 +1453,11 @@ static void kmemleak_scan(void)
1453 1453
1454 read_lock(&tasklist_lock); 1454 read_lock(&tasklist_lock);
1455 do_each_thread(g, p) { 1455 do_each_thread(g, p) {
1456 scan_block(task_stack_page(p), task_stack_page(p) + 1456 void *stack = try_get_task_stack(p);
1457 THREAD_SIZE, NULL); 1457 if (stack) {
1458 scan_block(stack, stack + THREAD_SIZE, NULL);
1459 put_task_stack(p);
1460 }
1458 } while_each_thread(g, p); 1461 } while_each_thread(g, p);
1459 read_unlock(&tasklist_lock); 1462 read_unlock(&tasklist_lock);
1460 } 1463 }
diff --git a/mm/list_lru.c b/mm/list_lru.c
index 1d05cb9d363d..234676e31edd 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -554,6 +554,8 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware,
554 err = memcg_init_list_lru(lru, memcg_aware); 554 err = memcg_init_list_lru(lru, memcg_aware);
555 if (err) { 555 if (err) {
556 kfree(lru->node); 556 kfree(lru->node);
557 /* Do this so a list_lru_destroy() doesn't crash: */
558 lru->node = NULL;
557 goto out; 559 goto out;
558 } 560 }
559 561
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ae052b5e3315..0f870ba43942 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1917,6 +1917,15 @@ retry:
1917 current->flags & PF_EXITING)) 1917 current->flags & PF_EXITING))
1918 goto force; 1918 goto force;
1919 1919
1920 /*
1921 * Prevent unbounded recursion when reclaim operations need to
1922 * allocate memory. This might exceed the limits temporarily,
1923 * but we prefer facilitating memory reclaim and getting back
1924 * under the limit over triggering OOM kills in these cases.
1925 */
1926 if (unlikely(current->flags & PF_MEMALLOC))
1927 goto force;
1928
1920 if (unlikely(task_in_memcg_oom(current))) 1929 if (unlikely(task_in_memcg_oom(current)))
1921 goto nomem; 1930 goto nomem;
1922 1931
diff --git a/mm/memory.c b/mm/memory.c
index fc1987dfd8cc..e18c57bdc75c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3869,10 +3869,11 @@ EXPORT_SYMBOL_GPL(generic_access_phys);
3869 * given task for page fault accounting. 3869 * given task for page fault accounting.
3870 */ 3870 */
3871static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, 3871static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
3872 unsigned long addr, void *buf, int len, int write) 3872 unsigned long addr, void *buf, int len, unsigned int gup_flags)
3873{ 3873{
3874 struct vm_area_struct *vma; 3874 struct vm_area_struct *vma;
3875 void *old_buf = buf; 3875 void *old_buf = buf;
3876 int write = gup_flags & FOLL_WRITE;
3876 3877
3877 down_read(&mm->mmap_sem); 3878 down_read(&mm->mmap_sem);
3878 /* ignore errors, just check how much was successfully transferred */ 3879 /* ignore errors, just check how much was successfully transferred */
@@ -3882,7 +3883,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
3882 struct page *page = NULL; 3883 struct page *page = NULL;
3883 3884
3884 ret = get_user_pages_remote(tsk, mm, addr, 1, 3885 ret = get_user_pages_remote(tsk, mm, addr, 1,
3885 write, 1, &page, &vma); 3886 gup_flags, &page, &vma);
3886 if (ret <= 0) { 3887 if (ret <= 0) {
3887#ifndef CONFIG_HAVE_IOREMAP_PROT 3888#ifndef CONFIG_HAVE_IOREMAP_PROT
3888 break; 3889 break;
@@ -3934,14 +3935,14 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
3934 * @addr: start address to access 3935 * @addr: start address to access
3935 * @buf: source or destination buffer 3936 * @buf: source or destination buffer
3936 * @len: number of bytes to transfer 3937 * @len: number of bytes to transfer
3937 * @write: whether the access is a write 3938 * @gup_flags: flags modifying lookup behaviour
3938 * 3939 *
3939 * The caller must hold a reference on @mm. 3940 * The caller must hold a reference on @mm.
3940 */ 3941 */
3941int access_remote_vm(struct mm_struct *mm, unsigned long addr, 3942int access_remote_vm(struct mm_struct *mm, unsigned long addr,
3942 void *buf, int len, int write) 3943 void *buf, int len, unsigned int gup_flags)
3943{ 3944{
3944 return __access_remote_vm(NULL, mm, addr, buf, len, write); 3945 return __access_remote_vm(NULL, mm, addr, buf, len, gup_flags);
3945} 3946}
3946 3947
3947/* 3948/*
@@ -3950,7 +3951,7 @@ int access_remote_vm(struct mm_struct *mm, unsigned long addr,
3950 * Do not walk the page table directly, use get_user_pages 3951 * Do not walk the page table directly, use get_user_pages
3951 */ 3952 */
3952int access_process_vm(struct task_struct *tsk, unsigned long addr, 3953int access_process_vm(struct task_struct *tsk, unsigned long addr,
3953 void *buf, int len, int write) 3954 void *buf, int len, unsigned int gup_flags)
3954{ 3955{
3955 struct mm_struct *mm; 3956 struct mm_struct *mm;
3956 int ret; 3957 int ret;
@@ -3959,7 +3960,8 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr,
3959 if (!mm) 3960 if (!mm)
3960 return 0; 3961 return 0;
3961 3962
3962 ret = __access_remote_vm(tsk, mm, addr, buf, len, write); 3963 ret = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags);
3964
3963 mmput(mm); 3965 mmput(mm);
3964 3966
3965 return ret; 3967 return ret;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 962927309b6e..cad4b9125695 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -268,7 +268,6 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat)
268 unsigned long i, pfn, end_pfn, nr_pages; 268 unsigned long i, pfn, end_pfn, nr_pages;
269 int node = pgdat->node_id; 269 int node = pgdat->node_id;
270 struct page *page; 270 struct page *page;
271 struct zone *zone;
272 271
273 nr_pages = PAGE_ALIGN(sizeof(struct pglist_data)) >> PAGE_SHIFT; 272 nr_pages = PAGE_ALIGN(sizeof(struct pglist_data)) >> PAGE_SHIFT;
274 page = virt_to_page(pgdat); 273 page = virt_to_page(pgdat);
@@ -276,19 +275,6 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat)
276 for (i = 0; i < nr_pages; i++, page++) 275 for (i = 0; i < nr_pages; i++, page++)
277 get_page_bootmem(node, page, NODE_INFO); 276 get_page_bootmem(node, page, NODE_INFO);
278 277
279 zone = &pgdat->node_zones[0];
280 for (; zone < pgdat->node_zones + MAX_NR_ZONES - 1; zone++) {
281 if (zone_is_initialized(zone)) {
282 nr_pages = zone->wait_table_hash_nr_entries
283 * sizeof(wait_queue_head_t);
284 nr_pages = PAGE_ALIGN(nr_pages) >> PAGE_SHIFT;
285 page = virt_to_page(zone->wait_table);
286
287 for (i = 0; i < nr_pages; i++, page++)
288 get_page_bootmem(node, page, NODE_INFO);
289 }
290 }
291
292 pfn = pgdat->node_start_pfn; 278 pfn = pgdat->node_start_pfn;
293 end_pfn = pgdat_end_pfn(pgdat); 279 end_pfn = pgdat_end_pfn(pgdat);
294 280
@@ -2131,7 +2117,6 @@ void try_offline_node(int nid)
2131 unsigned long start_pfn = pgdat->node_start_pfn; 2117 unsigned long start_pfn = pgdat->node_start_pfn;
2132 unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages; 2118 unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
2133 unsigned long pfn; 2119 unsigned long pfn;
2134 int i;
2135 2120
2136 for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { 2121 for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
2137 unsigned long section_nr = pfn_to_section_nr(pfn); 2122 unsigned long section_nr = pfn_to_section_nr(pfn);
@@ -2158,20 +2143,6 @@ void try_offline_node(int nid)
2158 */ 2143 */
2159 node_set_offline(nid); 2144 node_set_offline(nid);
2160 unregister_one_node(nid); 2145 unregister_one_node(nid);
2161
2162 /* free waittable in each zone */
2163 for (i = 0; i < MAX_NR_ZONES; i++) {
2164 struct zone *zone = pgdat->node_zones + i;
2165
2166 /*
2167 * wait_table may be allocated from boot memory,
2168 * here only free if it's allocated by vmalloc.
2169 */
2170 if (is_vmalloc_addr(zone->wait_table)) {
2171 vfree(zone->wait_table);
2172 zone->wait_table = NULL;
2173 }
2174 }
2175} 2146}
2176EXPORT_SYMBOL(try_offline_node); 2147EXPORT_SYMBOL(try_offline_node);
2177 2148
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index ad1c96ac313c..0b859af06b87 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -850,7 +850,7 @@ static int lookup_node(unsigned long addr)
850 struct page *p; 850 struct page *p;
851 int err; 851 int err;
852 852
853 err = get_user_pages(addr & PAGE_MASK, 1, 0, 0, &p, NULL); 853 err = get_user_pages(addr & PAGE_MASK, 1, 0, &p, NULL);
854 if (err >= 0) { 854 if (err >= 0) {
855 err = page_to_nid(p); 855 err = page_to_nid(p);
856 put_page(p); 856 put_page(p);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index bcdbe62f3e6d..11936526b08b 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -25,7 +25,6 @@
25#include <linux/perf_event.h> 25#include <linux/perf_event.h>
26#include <linux/pkeys.h> 26#include <linux/pkeys.h>
27#include <linux/ksm.h> 27#include <linux/ksm.h>
28#include <linux/pkeys.h>
29#include <asm/uaccess.h> 28#include <asm/uaccess.h>
30#include <asm/pgtable.h> 29#include <asm/pgtable.h>
31#include <asm/cacheflush.h> 30#include <asm/cacheflush.h>
diff --git a/mm/nommu.c b/mm/nommu.c
index 95daf81a4855..8b8faaf2a9e9 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -109,7 +109,7 @@ unsigned int kobjsize(const void *objp)
109 return PAGE_SIZE << compound_order(page); 109 return PAGE_SIZE << compound_order(page);
110} 110}
111 111
112long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 112static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
113 unsigned long start, unsigned long nr_pages, 113 unsigned long start, unsigned long nr_pages,
114 unsigned int foll_flags, struct page **pages, 114 unsigned int foll_flags, struct page **pages,
115 struct vm_area_struct **vmas, int *nonblocking) 115 struct vm_area_struct **vmas, int *nonblocking)
@@ -160,33 +160,25 @@ finish_or_fault:
160 * - don't permit access to VMAs that don't support it, such as I/O mappings 160 * - don't permit access to VMAs that don't support it, such as I/O mappings
161 */ 161 */
162long get_user_pages(unsigned long start, unsigned long nr_pages, 162long get_user_pages(unsigned long start, unsigned long nr_pages,
163 int write, int force, struct page **pages, 163 unsigned int gup_flags, struct page **pages,
164 struct vm_area_struct **vmas) 164 struct vm_area_struct **vmas)
165{ 165{
166 int flags = 0; 166 return __get_user_pages(current, current->mm, start, nr_pages,
167 167 gup_flags, pages, vmas, NULL);
168 if (write)
169 flags |= FOLL_WRITE;
170 if (force)
171 flags |= FOLL_FORCE;
172
173 return __get_user_pages(current, current->mm, start, nr_pages, flags,
174 pages, vmas, NULL);
175} 168}
176EXPORT_SYMBOL(get_user_pages); 169EXPORT_SYMBOL(get_user_pages);
177 170
178long get_user_pages_locked(unsigned long start, unsigned long nr_pages, 171long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
179 int write, int force, struct page **pages, 172 unsigned int gup_flags, struct page **pages,
180 int *locked) 173 int *locked)
181{ 174{
182 return get_user_pages(start, nr_pages, write, force, pages, NULL); 175 return get_user_pages(start, nr_pages, gup_flags, pages, NULL);
183} 176}
184EXPORT_SYMBOL(get_user_pages_locked); 177EXPORT_SYMBOL(get_user_pages_locked);
185 178
186long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, 179long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
187 unsigned long start, unsigned long nr_pages, 180 unsigned long start, unsigned long nr_pages,
188 int write, int force, struct page **pages, 181 struct page **pages, unsigned int gup_flags)
189 unsigned int gup_flags)
190{ 182{
191 long ret; 183 long ret;
192 down_read(&mm->mmap_sem); 184 down_read(&mm->mmap_sem);
@@ -198,10 +190,10 @@ long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
198EXPORT_SYMBOL(__get_user_pages_unlocked); 190EXPORT_SYMBOL(__get_user_pages_unlocked);
199 191
200long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, 192long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
201 int write, int force, struct page **pages) 193 struct page **pages, unsigned int gup_flags)
202{ 194{
203 return __get_user_pages_unlocked(current, current->mm, start, nr_pages, 195 return __get_user_pages_unlocked(current, current->mm, start, nr_pages,
204 write, force, pages, 0); 196 pages, gup_flags);
205} 197}
206EXPORT_SYMBOL(get_user_pages_unlocked); 198EXPORT_SYMBOL(get_user_pages_unlocked);
207 199
@@ -1817,9 +1809,10 @@ void filemap_map_pages(struct fault_env *fe,
1817EXPORT_SYMBOL(filemap_map_pages); 1809EXPORT_SYMBOL(filemap_map_pages);
1818 1810
1819static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, 1811static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
1820 unsigned long addr, void *buf, int len, int write) 1812 unsigned long addr, void *buf, int len, unsigned int gup_flags)
1821{ 1813{
1822 struct vm_area_struct *vma; 1814 struct vm_area_struct *vma;
1815 int write = gup_flags & FOLL_WRITE;
1823 1816
1824 down_read(&mm->mmap_sem); 1817 down_read(&mm->mmap_sem);
1825 1818
@@ -1854,21 +1847,22 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
1854 * @addr: start address to access 1847 * @addr: start address to access
1855 * @buf: source or destination buffer 1848 * @buf: source or destination buffer
1856 * @len: number of bytes to transfer 1849 * @len: number of bytes to transfer
1857 * @write: whether the access is a write 1850 * @gup_flags: flags modifying lookup behaviour
1858 * 1851 *
1859 * The caller must hold a reference on @mm. 1852 * The caller must hold a reference on @mm.
1860 */ 1853 */
1861int access_remote_vm(struct mm_struct *mm, unsigned long addr, 1854int access_remote_vm(struct mm_struct *mm, unsigned long addr,
1862 void *buf, int len, int write) 1855 void *buf, int len, unsigned int gup_flags)
1863{ 1856{
1864 return __access_remote_vm(NULL, mm, addr, buf, len, write); 1857 return __access_remote_vm(NULL, mm, addr, buf, len, gup_flags);
1865} 1858}
1866 1859
1867/* 1860/*
1868 * Access another process' address space. 1861 * Access another process' address space.
1869 * - source/target buffer must be kernel space 1862 * - source/target buffer must be kernel space
1870 */ 1863 */
1871int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) 1864int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len,
1865 unsigned int gup_flags)
1872{ 1866{
1873 struct mm_struct *mm; 1867 struct mm_struct *mm;
1874 1868
@@ -1879,7 +1873,7 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
1879 if (!mm) 1873 if (!mm)
1880 return 0; 1874 return 0;
1881 1875
1882 len = __access_remote_vm(tsk, mm, addr, buf, len, write); 1876 len = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags);
1883 1877
1884 mmput(mm); 1878 mmput(mm);
1885 return len; 1879 return len;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2b3bf6767d54..072d791dce2d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -92,7 +92,7 @@ int _node_numa_mem_[MAX_NUMNODES];
92#endif 92#endif
93 93
94#ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY 94#ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
95volatile u64 latent_entropy __latent_entropy; 95volatile unsigned long latent_entropy __latent_entropy;
96EXPORT_SYMBOL(latent_entropy); 96EXPORT_SYMBOL(latent_entropy);
97#endif 97#endif
98 98
@@ -4224,7 +4224,7 @@ static void show_migration_types(unsigned char type)
4224 } 4224 }
4225 4225
4226 *p = '\0'; 4226 *p = '\0';
4227 printk("(%s) ", tmp); 4227 printk(KERN_CONT "(%s) ", tmp);
4228} 4228}
4229 4229
4230/* 4230/*
@@ -4335,7 +4335,8 @@ void show_free_areas(unsigned int filter)
4335 free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count; 4335 free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count;
4336 4336
4337 show_node(zone); 4337 show_node(zone);
4338 printk("%s" 4338 printk(KERN_CONT
4339 "%s"
4339 " free:%lukB" 4340 " free:%lukB"
4340 " min:%lukB" 4341 " min:%lukB"
4341 " low:%lukB" 4342 " low:%lukB"
@@ -4382,8 +4383,8 @@ void show_free_areas(unsigned int filter)
4382 K(zone_page_state(zone, NR_FREE_CMA_PAGES))); 4383 K(zone_page_state(zone, NR_FREE_CMA_PAGES)));
4383 printk("lowmem_reserve[]:"); 4384 printk("lowmem_reserve[]:");
4384 for (i = 0; i < MAX_NR_ZONES; i++) 4385 for (i = 0; i < MAX_NR_ZONES; i++)
4385 printk(" %ld", zone->lowmem_reserve[i]); 4386 printk(KERN_CONT " %ld", zone->lowmem_reserve[i]);
4386 printk("\n"); 4387 printk(KERN_CONT "\n");
4387 } 4388 }
4388 4389
4389 for_each_populated_zone(zone) { 4390 for_each_populated_zone(zone) {
@@ -4394,7 +4395,7 @@ void show_free_areas(unsigned int filter)
4394 if (skip_free_areas_node(filter, zone_to_nid(zone))) 4395 if (skip_free_areas_node(filter, zone_to_nid(zone)))
4395 continue; 4396 continue;
4396 show_node(zone); 4397 show_node(zone);
4397 printk("%s: ", zone->name); 4398 printk(KERN_CONT "%s: ", zone->name);
4398 4399
4399 spin_lock_irqsave(&zone->lock, flags); 4400 spin_lock_irqsave(&zone->lock, flags);
4400 for (order = 0; order < MAX_ORDER; order++) { 4401 for (order = 0; order < MAX_ORDER; order++) {
@@ -4412,11 +4413,12 @@ void show_free_areas(unsigned int filter)
4412 } 4413 }
4413 spin_unlock_irqrestore(&zone->lock, flags); 4414 spin_unlock_irqrestore(&zone->lock, flags);
4414 for (order = 0; order < MAX_ORDER; order++) { 4415 for (order = 0; order < MAX_ORDER; order++) {
4415 printk("%lu*%lukB ", nr[order], K(1UL) << order); 4416 printk(KERN_CONT "%lu*%lukB ",
4417 nr[order], K(1UL) << order);
4416 if (nr[order]) 4418 if (nr[order])
4417 show_migration_types(types[order]); 4419 show_migration_types(types[order]);
4418 } 4420 }
4419 printk("= %lukB\n", K(total)); 4421 printk(KERN_CONT "= %lukB\n", K(total));
4420 } 4422 }
4421 4423
4422 hugetlb_show_meminfo(); 4424 hugetlb_show_meminfo();
@@ -4977,72 +4979,6 @@ void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone)
4977} 4979}
4978 4980
4979/* 4981/*
4980 * Helper functions to size the waitqueue hash table.
4981 * Essentially these want to choose hash table sizes sufficiently
4982 * large so that collisions trying to wait on pages are rare.
4983 * But in fact, the number of active page waitqueues on typical
4984 * systems is ridiculously low, less than 200. So this is even
4985 * conservative, even though it seems large.
4986 *
4987 * The constant PAGES_PER_WAITQUEUE specifies the ratio of pages to
4988 * waitqueues, i.e. the size of the waitq table given the number of pages.
4989 */
4990#define PAGES_PER_WAITQUEUE 256
4991
4992#ifndef CONFIG_MEMORY_HOTPLUG
4993static inline unsigned long wait_table_hash_nr_entries(unsigned long pages)
4994{
4995 unsigned long size = 1;
4996
4997 pages /= PAGES_PER_WAITQUEUE;
4998
4999 while (size < pages)
5000 size <<= 1;
5001
5002 /*
5003 * Once we have dozens or even hundreds of threads sleeping
5004 * on IO we've got bigger problems than wait queue collision.
5005 * Limit the size of the wait table to a reasonable size.
5006 */
5007 size = min(size, 4096UL);
5008
5009 return max(size, 4UL);
5010}
5011#else
5012/*
5013 * A zone's size might be changed by hot-add, so it is not possible to determine
5014 * a suitable size for its wait_table. So we use the maximum size now.
5015 *
5016 * The max wait table size = 4096 x sizeof(wait_queue_head_t). ie:
5017 *
5018 * i386 (preemption config) : 4096 x 16 = 64Kbyte.
5019 * ia64, x86-64 (no preemption): 4096 x 20 = 80Kbyte.
5020 * ia64, x86-64 (preemption) : 4096 x 24 = 96Kbyte.
5021 *
5022 * The maximum entries are prepared when a zone's memory is (512K + 256) pages
5023 * or more by the traditional way. (See above). It equals:
5024 *
5025 * i386, x86-64, powerpc(4K page size) : = ( 2G + 1M)byte.
5026 * ia64(16K page size) : = ( 8G + 4M)byte.
5027 * powerpc (64K page size) : = (32G +16M)byte.
5028 */
5029static inline unsigned long wait_table_hash_nr_entries(unsigned long pages)
5030{
5031 return 4096UL;
5032}
5033#endif
5034
5035/*
5036 * This is an integer logarithm so that shifts can be used later
5037 * to extract the more random high bits from the multiplicative
5038 * hash function before the remainder is taken.
5039 */
5040static inline unsigned long wait_table_bits(unsigned long size)
5041{
5042 return ffz(~size);
5043}
5044
5045/*
5046 * Initially all pages are reserved - free ones are freed 4982 * Initially all pages are reserved - free ones are freed
5047 * up by free_all_bootmem() once the early boot process is 4983 * up by free_all_bootmem() once the early boot process is
5048 * done. Non-atomic initialization, single-pass. 4984 * done. Non-atomic initialization, single-pass.
@@ -5304,49 +5240,6 @@ void __init setup_per_cpu_pageset(void)
5304 alloc_percpu(struct per_cpu_nodestat); 5240 alloc_percpu(struct per_cpu_nodestat);
5305} 5241}
5306 5242
5307static noinline __ref
5308int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
5309{
5310 int i;
5311 size_t alloc_size;
5312
5313 /*
5314 * The per-page waitqueue mechanism uses hashed waitqueues
5315 * per zone.
5316 */
5317 zone->wait_table_hash_nr_entries =
5318 wait_table_hash_nr_entries(zone_size_pages);
5319 zone->wait_table_bits =
5320 wait_table_bits(zone->wait_table_hash_nr_entries);
5321 alloc_size = zone->wait_table_hash_nr_entries
5322 * sizeof(wait_queue_head_t);
5323
5324 if (!slab_is_available()) {
5325 zone->wait_table = (wait_queue_head_t *)
5326 memblock_virt_alloc_node_nopanic(
5327 alloc_size, zone->zone_pgdat->node_id);
5328 } else {
5329 /*
5330 * This case means that a zone whose size was 0 gets new memory
5331 * via memory hot-add.
5332 * But it may be the case that a new node was hot-added. In
5333 * this case vmalloc() will not be able to use this new node's
5334 * memory - this wait_table must be initialized to use this new
5335 * node itself as well.
5336 * To use this new node's memory, further consideration will be
5337 * necessary.
5338 */
5339 zone->wait_table = vmalloc(alloc_size);
5340 }
5341 if (!zone->wait_table)
5342 return -ENOMEM;
5343
5344 for (i = 0; i < zone->wait_table_hash_nr_entries; ++i)
5345 init_waitqueue_head(zone->wait_table + i);
5346
5347 return 0;
5348}
5349
5350static __meminit void zone_pcp_init(struct zone *zone) 5243static __meminit void zone_pcp_init(struct zone *zone)
5351{ 5244{
5352 /* 5245 /*
@@ -5367,10 +5260,7 @@ int __meminit init_currently_empty_zone(struct zone *zone,
5367 unsigned long size) 5260 unsigned long size)
5368{ 5261{
5369 struct pglist_data *pgdat = zone->zone_pgdat; 5262 struct pglist_data *pgdat = zone->zone_pgdat;
5370 int ret; 5263
5371 ret = zone_wait_table_init(zone, size);
5372 if (ret)
5373 return ret;
5374 pgdat->nr_zones = zone_idx(zone) + 1; 5264 pgdat->nr_zones = zone_idx(zone) + 1;
5375 5265
5376 zone->zone_start_pfn = zone_start_pfn; 5266 zone->zone_start_pfn = zone_start_pfn;
@@ -5382,6 +5272,7 @@ int __meminit init_currently_empty_zone(struct zone *zone,
5382 zone_start_pfn, (zone_start_pfn + size)); 5272 zone_start_pfn, (zone_start_pfn + size));
5383 5273
5384 zone_init_free_lists(zone); 5274 zone_init_free_lists(zone);
5275 zone->initialized = 1;
5385 5276
5386 return 0; 5277 return 0;
5387} 5278}
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index 07514d41ebcc..be8dc8d1edb9 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -88,12 +88,16 @@ static int process_vm_rw_single_vec(unsigned long addr,
88 ssize_t rc = 0; 88 ssize_t rc = 0;
89 unsigned long max_pages_per_loop = PVM_MAX_KMALLOC_PAGES 89 unsigned long max_pages_per_loop = PVM_MAX_KMALLOC_PAGES
90 / sizeof(struct pages *); 90 / sizeof(struct pages *);
91 unsigned int flags = FOLL_REMOTE;
91 92
92 /* Work out address and page range required */ 93 /* Work out address and page range required */
93 if (len == 0) 94 if (len == 0)
94 return 0; 95 return 0;
95 nr_pages = (addr + len - 1) / PAGE_SIZE - addr / PAGE_SIZE + 1; 96 nr_pages = (addr + len - 1) / PAGE_SIZE - addr / PAGE_SIZE + 1;
96 97
98 if (vm_write)
99 flags |= FOLL_WRITE;
100
97 while (!rc && nr_pages && iov_iter_count(iter)) { 101 while (!rc && nr_pages && iov_iter_count(iter)) {
98 int pages = min(nr_pages, max_pages_per_loop); 102 int pages = min(nr_pages, max_pages_per_loop);
99 size_t bytes; 103 size_t bytes;
@@ -104,8 +108,7 @@ static int process_vm_rw_single_vec(unsigned long addr,
104 * current/current->mm 108 * current/current->mm
105 */ 109 */
106 pages = __get_user_pages_unlocked(task, mm, pa, pages, 110 pages = __get_user_pages_unlocked(task, mm, pa, pages,
107 vm_write, 0, process_pages, 111 process_pages, flags);
108 FOLL_REMOTE);
109 if (pages <= 0) 112 if (pages <= 0)
110 return -EFAULT; 113 return -EFAULT;
111 114
diff --git a/mm/slab.c b/mm/slab.c
index 090fb26b3a39..0b0550ca85b4 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -233,6 +233,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent)
233 spin_lock_init(&parent->list_lock); 233 spin_lock_init(&parent->list_lock);
234 parent->free_objects = 0; 234 parent->free_objects = 0;
235 parent->free_touched = 0; 235 parent->free_touched = 0;
236 parent->num_slabs = 0;
236} 237}
237 238
238#define MAKE_LIST(cachep, listp, slab, nodeid) \ 239#define MAKE_LIST(cachep, listp, slab, nodeid) \
@@ -966,7 +967,7 @@ static int setup_kmem_cache_node(struct kmem_cache *cachep,
966 * guaranteed to be valid until irq is re-enabled, because it will be 967 * guaranteed to be valid until irq is re-enabled, because it will be
967 * freed after synchronize_sched(). 968 * freed after synchronize_sched().
968 */ 969 */
969 if (force_change) 970 if (old_shared && force_change)
970 synchronize_sched(); 971 synchronize_sched();
971 972
972fail: 973fail:
@@ -1382,24 +1383,27 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
1382 for_each_kmem_cache_node(cachep, node, n) { 1383 for_each_kmem_cache_node(cachep, node, n) {
1383 unsigned long active_objs = 0, num_objs = 0, free_objects = 0; 1384 unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
1384 unsigned long active_slabs = 0, num_slabs = 0; 1385 unsigned long active_slabs = 0, num_slabs = 0;
1386 unsigned long num_slabs_partial = 0, num_slabs_free = 0;
1387 unsigned long num_slabs_full;
1385 1388
1386 spin_lock_irqsave(&n->list_lock, flags); 1389 spin_lock_irqsave(&n->list_lock, flags);
1387 list_for_each_entry(page, &n->slabs_full, lru) { 1390 num_slabs = n->num_slabs;
1388 active_objs += cachep->num;
1389 active_slabs++;
1390 }
1391 list_for_each_entry(page, &n->slabs_partial, lru) { 1391 list_for_each_entry(page, &n->slabs_partial, lru) {
1392 active_objs += page->active; 1392 active_objs += page->active;
1393 active_slabs++; 1393 num_slabs_partial++;
1394 } 1394 }
1395 list_for_each_entry(page, &n->slabs_free, lru) 1395 list_for_each_entry(page, &n->slabs_free, lru)
1396 num_slabs++; 1396 num_slabs_free++;
1397 1397
1398 free_objects += n->free_objects; 1398 free_objects += n->free_objects;
1399 spin_unlock_irqrestore(&n->list_lock, flags); 1399 spin_unlock_irqrestore(&n->list_lock, flags);
1400 1400
1401 num_slabs += active_slabs;
1402 num_objs = num_slabs * cachep->num; 1401 num_objs = num_slabs * cachep->num;
1402 active_slabs = num_slabs - num_slabs_free;
1403 num_slabs_full = num_slabs -
1404 (num_slabs_partial + num_slabs_free);
1405 active_objs += (num_slabs_full * cachep->num);
1406
1403 pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld, free: %ld\n", 1407 pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld, free: %ld\n",
1404 node, active_slabs, num_slabs, active_objs, num_objs, 1408 node, active_slabs, num_slabs, active_objs, num_objs,
1405 free_objects); 1409 free_objects);
@@ -2314,6 +2318,7 @@ static int drain_freelist(struct kmem_cache *cache,
2314 2318
2315 page = list_entry(p, struct page, lru); 2319 page = list_entry(p, struct page, lru);
2316 list_del(&page->lru); 2320 list_del(&page->lru);
2321 n->num_slabs--;
2317 /* 2322 /*
2318 * Safe to drop the lock. The slab is no longer linked 2323 * Safe to drop the lock. The slab is no longer linked
2319 * to the cache. 2324 * to the cache.
@@ -2752,6 +2757,8 @@ static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
2752 list_add_tail(&page->lru, &(n->slabs_free)); 2757 list_add_tail(&page->lru, &(n->slabs_free));
2753 else 2758 else
2754 fixup_slab_list(cachep, n, page, &list); 2759 fixup_slab_list(cachep, n, page, &list);
2760
2761 n->num_slabs++;
2755 STATS_INC_GROWN(cachep); 2762 STATS_INC_GROWN(cachep);
2756 n->free_objects += cachep->num - page->active; 2763 n->free_objects += cachep->num - page->active;
2757 spin_unlock(&n->list_lock); 2764 spin_unlock(&n->list_lock);
@@ -3443,6 +3450,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp,
3443 3450
3444 page = list_last_entry(&n->slabs_free, struct page, lru); 3451 page = list_last_entry(&n->slabs_free, struct page, lru);
3445 list_move(&page->lru, list); 3452 list_move(&page->lru, list);
3453 n->num_slabs--;
3446 } 3454 }
3447} 3455}
3448 3456
@@ -4099,6 +4107,8 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
4099 unsigned long num_objs; 4107 unsigned long num_objs;
4100 unsigned long active_slabs = 0; 4108 unsigned long active_slabs = 0;
4101 unsigned long num_slabs, free_objects = 0, shared_avail = 0; 4109 unsigned long num_slabs, free_objects = 0, shared_avail = 0;
4110 unsigned long num_slabs_partial = 0, num_slabs_free = 0;
4111 unsigned long num_slabs_full = 0;
4102 const char *name; 4112 const char *name;
4103 char *error = NULL; 4113 char *error = NULL;
4104 int node; 4114 int node;
@@ -4111,33 +4121,34 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
4111 check_irq_on(); 4121 check_irq_on();
4112 spin_lock_irq(&n->list_lock); 4122 spin_lock_irq(&n->list_lock);
4113 4123
4114 list_for_each_entry(page, &n->slabs_full, lru) { 4124 num_slabs += n->num_slabs;
4115 if (page->active != cachep->num && !error) 4125
4116 error = "slabs_full accounting error";
4117 active_objs += cachep->num;
4118 active_slabs++;
4119 }
4120 list_for_each_entry(page, &n->slabs_partial, lru) { 4126 list_for_each_entry(page, &n->slabs_partial, lru) {
4121 if (page->active == cachep->num && !error) 4127 if (page->active == cachep->num && !error)
4122 error = "slabs_partial accounting error"; 4128 error = "slabs_partial accounting error";
4123 if (!page->active && !error) 4129 if (!page->active && !error)
4124 error = "slabs_partial accounting error"; 4130 error = "slabs_partial accounting error";
4125 active_objs += page->active; 4131 active_objs += page->active;
4126 active_slabs++; 4132 num_slabs_partial++;
4127 } 4133 }
4134
4128 list_for_each_entry(page, &n->slabs_free, lru) { 4135 list_for_each_entry(page, &n->slabs_free, lru) {
4129 if (page->active && !error) 4136 if (page->active && !error)
4130 error = "slabs_free accounting error"; 4137 error = "slabs_free accounting error";
4131 num_slabs++; 4138 num_slabs_free++;
4132 } 4139 }
4140
4133 free_objects += n->free_objects; 4141 free_objects += n->free_objects;
4134 if (n->shared) 4142 if (n->shared)
4135 shared_avail += n->shared->avail; 4143 shared_avail += n->shared->avail;
4136 4144
4137 spin_unlock_irq(&n->list_lock); 4145 spin_unlock_irq(&n->list_lock);
4138 } 4146 }
4139 num_slabs += active_slabs;
4140 num_objs = num_slabs * cachep->num; 4147 num_objs = num_slabs * cachep->num;
4148 active_slabs = num_slabs - num_slabs_free;
4149 num_slabs_full = num_slabs - (num_slabs_partial + num_slabs_free);
4150 active_objs += (num_slabs_full * cachep->num);
4151
4141 if (num_objs - active_objs != free_objects && !error) 4152 if (num_objs - active_objs != free_objects && !error)
4142 error = "free_objects accounting error"; 4153 error = "free_objects accounting error";
4143 4154
diff --git a/mm/slab.h b/mm/slab.h
index 9653f2e2591a..bc05fdc3edce 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -432,6 +432,7 @@ struct kmem_cache_node {
432 struct list_head slabs_partial; /* partial list first, better asm code */ 432 struct list_head slabs_partial; /* partial list first, better asm code */
433 struct list_head slabs_full; 433 struct list_head slabs_full;
434 struct list_head slabs_free; 434 struct list_head slabs_free;
435 unsigned long num_slabs;
435 unsigned long free_objects; 436 unsigned long free_objects;
436 unsigned int free_limit; 437 unsigned int free_limit;
437 unsigned int colour_next; /* Per-node cache coloring */ 438 unsigned int colour_next; /* Per-node cache coloring */
diff --git a/mm/util.c b/mm/util.c
index 662cddf914af..1a41553db866 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -230,8 +230,10 @@ void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
230} 230}
231 231
232/* Check if the vma is being used as a stack by this task */ 232/* Check if the vma is being used as a stack by this task */
233int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t) 233int vma_is_stack_for_current(struct vm_area_struct *vma)
234{ 234{
235 struct task_struct * __maybe_unused t = current;
236
235 return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t)); 237 return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t));
236} 238}
237 239
@@ -283,7 +285,8 @@ EXPORT_SYMBOL_GPL(__get_user_pages_fast);
283int __weak get_user_pages_fast(unsigned long start, 285int __weak get_user_pages_fast(unsigned long start,
284 int nr_pages, int write, struct page **pages) 286 int nr_pages, int write, struct page **pages)
285{ 287{
286 return get_user_pages_unlocked(start, nr_pages, write, 0, pages); 288 return get_user_pages_unlocked(start, nr_pages, pages,
289 write ? FOLL_WRITE : 0);
287} 290}
288EXPORT_SYMBOL_GPL(get_user_pages_fast); 291EXPORT_SYMBOL_GPL(get_user_pages_fast);
289 292
@@ -623,7 +626,7 @@ int get_cmdline(struct task_struct *task, char *buffer, int buflen)
623 if (len > buflen) 626 if (len > buflen)
624 len = buflen; 627 len = buflen;
625 628
626 res = access_process_vm(task, arg_start, buffer, len, 0); 629 res = access_process_vm(task, arg_start, buffer, len, FOLL_FORCE);
627 630
628 /* 631 /*
629 * If the nul at the end of args has been overwritten, then 632 * If the nul at the end of args has been overwritten, then
@@ -638,7 +641,8 @@ int get_cmdline(struct task_struct *task, char *buffer, int buflen)
638 if (len > buflen - res) 641 if (len > buflen - res)
639 len = buflen - res; 642 len = buflen - res;
640 res += access_process_vm(task, env_start, 643 res += access_process_vm(task, env_start,
641 buffer+res, len, 0); 644 buffer+res, len,
645 FOLL_FORCE);
642 res = strnlen(buffer, res); 646 res = strnlen(buffer, res);
643 } 647 }
644 } 648 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 744f926af442..76fda2268148 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3043,7 +3043,9 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
3043 sc.gfp_mask, 3043 sc.gfp_mask,
3044 sc.reclaim_idx); 3044 sc.reclaim_idx);
3045 3045
3046 current->flags |= PF_MEMALLOC;
3046 nr_reclaimed = do_try_to_free_pages(zonelist, &sc); 3047 nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
3048 current->flags &= ~PF_MEMALLOC;
3047 3049
3048 trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); 3050 trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
3049 3051