diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-08-03 17:58:13 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-08-03 17:58:13 -0400 |
commit | 995d03ae266d3f3bec8844d01b6c3ded19b9cc1c (patch) | |
tree | a98a9fa566b55def6d742578cd078afb21a44ed6 | |
parent | 8d3fe85f07a99cbf82f12c486414eda1d186bfa1 (diff) | |
parent | 19ec8e48582670c021e998b9deb88e39a842ff45 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton:
"15 fixes"
[ This does not merge the "fortify: use WARN instead of BUG for now"
patch, which needs a bit of extra work to build cleanly with all
configurations. Arnd is on it. - Linus ]
* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
ocfs2: don't clear SGID when inheriting ACLs
mm: allow page_cache_get_speculative in interrupt context
userfaultfd: non-cooperative: flush event_wqh at release time
ipc: add missing container_of()s for randstruct
cpuset: fix a deadlock due to incomplete patching of cpusets_enabled()
userfaultfd_zeropage: return -ENOSPC in case mm has gone
mm: take memory hotplug lock within numa_zonelist_order_handler()
mm/page_io.c: fix oops during block io poll in swapin path
zram: do not free pool->size_class
kthread: fix documentation build warning
kasan: avoid -Wmaybe-uninitialized warning
userfaultfd: non-cooperative: notify about unmap of destination during mremap
mm, mprotect: flush TLB if potentially racing with a parallel reclaim leaving stale TLB entries
pid: kill pidhash_size in pidhash_init()
mm/hugetlb.c: __get_user_pages ignores certain follow_hugetlb_page errors
-rw-r--r-- | fs/ocfs2/acl.c | 24 | ||||
-rw-r--r-- | fs/userfaultfd.c | 5 | ||||
-rw-r--r-- | include/linux/cpuset.h | 19 | ||||
-rw-r--r-- | include/linux/kthread.h | 2 | ||||
-rw-r--r-- | include/linux/mm_types.h | 4 | ||||
-rw-r--r-- | include/linux/pagemap.h | 2 | ||||
-rw-r--r-- | ipc/msg.c | 3 | ||||
-rw-r--r-- | ipc/sem.c | 3 | ||||
-rw-r--r-- | ipc/shm.c | 4 | ||||
-rw-r--r-- | kernel/cgroup/cpuset.c | 1 | ||||
-rw-r--r-- | kernel/pid.c | 3 | ||||
-rw-r--r-- | mm/hugetlb.c | 9 | ||||
-rw-r--r-- | mm/internal.h | 5 | ||||
-rw-r--r-- | mm/kasan/report.c | 1 | ||||
-rw-r--r-- | mm/madvise.c | 1 | ||||
-rw-r--r-- | mm/memory.c | 1 | ||||
-rw-r--r-- | mm/mprotect.c | 1 | ||||
-rw-r--r-- | mm/mremap.c | 8 | ||||
-rw-r--r-- | mm/page_alloc.c | 2 | ||||
-rw-r--r-- | mm/page_io.c | 7 | ||||
-rw-r--r-- | mm/rmap.c | 36 | ||||
-rw-r--r-- | mm/zsmalloc.c | 1 |
22 files changed, 109 insertions, 33 deletions
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index dc22ba8c710f..e50a387959bf 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c | |||
@@ -240,18 +240,6 @@ int ocfs2_set_acl(handle_t *handle, | |||
240 | switch (type) { | 240 | switch (type) { |
241 | case ACL_TYPE_ACCESS: | 241 | case ACL_TYPE_ACCESS: |
242 | name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS; | 242 | name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS; |
243 | if (acl) { | ||
244 | umode_t mode; | ||
245 | |||
246 | ret = posix_acl_update_mode(inode, &mode, &acl); | ||
247 | if (ret) | ||
248 | return ret; | ||
249 | |||
250 | ret = ocfs2_acl_set_mode(inode, di_bh, | ||
251 | handle, mode); | ||
252 | if (ret) | ||
253 | return ret; | ||
254 | } | ||
255 | break; | 243 | break; |
256 | case ACL_TYPE_DEFAULT: | 244 | case ACL_TYPE_DEFAULT: |
257 | name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; | 245 | name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; |
@@ -289,7 +277,19 @@ int ocfs2_iop_set_acl(struct inode *inode, struct posix_acl *acl, int type) | |||
289 | had_lock = ocfs2_inode_lock_tracker(inode, &bh, 1, &oh); | 277 | had_lock = ocfs2_inode_lock_tracker(inode, &bh, 1, &oh); |
290 | if (had_lock < 0) | 278 | if (had_lock < 0) |
291 | return had_lock; | 279 | return had_lock; |
280 | if (type == ACL_TYPE_ACCESS && acl) { | ||
281 | umode_t mode; | ||
282 | |||
283 | status = posix_acl_update_mode(inode, &mode, &acl); | ||
284 | if (status) | ||
285 | goto unlock; | ||
286 | |||
287 | status = ocfs2_acl_set_mode(inode, bh, NULL, mode); | ||
288 | if (status) | ||
289 | goto unlock; | ||
290 | } | ||
292 | status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, NULL); | 291 | status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, NULL); |
292 | unlock: | ||
293 | ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); | 293 | ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); |
294 | brelse(bh); | 294 | brelse(bh); |
295 | return status; | 295 | return status; |
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index cadcd12a3d35..06ea26b8c996 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c | |||
@@ -854,6 +854,9 @@ wakeup: | |||
854 | __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, &range); | 854 | __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, &range); |
855 | spin_unlock(&ctx->fault_pending_wqh.lock); | 855 | spin_unlock(&ctx->fault_pending_wqh.lock); |
856 | 856 | ||
857 | /* Flush pending events that may still wait on event_wqh */ | ||
858 | wake_up_all(&ctx->event_wqh); | ||
859 | |||
857 | wake_up_poll(&ctx->fd_wqh, POLLHUP); | 860 | wake_up_poll(&ctx->fd_wqh, POLLHUP); |
858 | userfaultfd_ctx_put(ctx); | 861 | userfaultfd_ctx_put(ctx); |
859 | return 0; | 862 | return 0; |
@@ -1643,6 +1646,8 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, | |||
1643 | ret = mfill_zeropage(ctx->mm, uffdio_zeropage.range.start, | 1646 | ret = mfill_zeropage(ctx->mm, uffdio_zeropage.range.start, |
1644 | uffdio_zeropage.range.len); | 1647 | uffdio_zeropage.range.len); |
1645 | mmput(ctx->mm); | 1648 | mmput(ctx->mm); |
1649 | } else { | ||
1650 | return -ENOSPC; | ||
1646 | } | 1651 | } |
1647 | if (unlikely(put_user(ret, &user_uffdio_zeropage->zeropage))) | 1652 | if (unlikely(put_user(ret, &user_uffdio_zeropage->zeropage))) |
1648 | return -EFAULT; | 1653 | return -EFAULT; |
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 119a3f9604b0..898cfe2eeb42 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h | |||
@@ -18,6 +18,19 @@ | |||
18 | 18 | ||
19 | #ifdef CONFIG_CPUSETS | 19 | #ifdef CONFIG_CPUSETS |
20 | 20 | ||
21 | /* | ||
22 | * Static branch rewrites can happen in an arbitrary order for a given | ||
23 | * key. In code paths where we need to loop with read_mems_allowed_begin() and | ||
24 | * read_mems_allowed_retry() to get a consistent view of mems_allowed, we need | ||
25 | * to ensure that begin() always gets rewritten before retry() in the | ||
26 | * disabled -> enabled transition. If not, then if local irqs are disabled | ||
27 | * around the loop, we can deadlock since retry() would always be | ||
28 | * comparing the latest value of the mems_allowed seqcount against 0 as | ||
29 | * begin() still would see cpusets_enabled() as false. The enabled -> disabled | ||
30 | * transition should happen in reverse order for the same reasons (want to stop | ||
31 | * looking at real value of mems_allowed.sequence in retry() first). | ||
32 | */ | ||
33 | extern struct static_key_false cpusets_pre_enable_key; | ||
21 | extern struct static_key_false cpusets_enabled_key; | 34 | extern struct static_key_false cpusets_enabled_key; |
22 | static inline bool cpusets_enabled(void) | 35 | static inline bool cpusets_enabled(void) |
23 | { | 36 | { |
@@ -32,12 +45,14 @@ static inline int nr_cpusets(void) | |||
32 | 45 | ||
33 | static inline void cpuset_inc(void) | 46 | static inline void cpuset_inc(void) |
34 | { | 47 | { |
48 | static_branch_inc(&cpusets_pre_enable_key); | ||
35 | static_branch_inc(&cpusets_enabled_key); | 49 | static_branch_inc(&cpusets_enabled_key); |
36 | } | 50 | } |
37 | 51 | ||
38 | static inline void cpuset_dec(void) | 52 | static inline void cpuset_dec(void) |
39 | { | 53 | { |
40 | static_branch_dec(&cpusets_enabled_key); | 54 | static_branch_dec(&cpusets_enabled_key); |
55 | static_branch_dec(&cpusets_pre_enable_key); | ||
41 | } | 56 | } |
42 | 57 | ||
43 | extern int cpuset_init(void); | 58 | extern int cpuset_init(void); |
@@ -115,7 +130,7 @@ extern void cpuset_print_current_mems_allowed(void); | |||
115 | */ | 130 | */ |
116 | static inline unsigned int read_mems_allowed_begin(void) | 131 | static inline unsigned int read_mems_allowed_begin(void) |
117 | { | 132 | { |
118 | if (!cpusets_enabled()) | 133 | if (!static_branch_unlikely(&cpusets_pre_enable_key)) |
119 | return 0; | 134 | return 0; |
120 | 135 | ||
121 | return read_seqcount_begin(¤t->mems_allowed_seq); | 136 | return read_seqcount_begin(¤t->mems_allowed_seq); |
@@ -129,7 +144,7 @@ static inline unsigned int read_mems_allowed_begin(void) | |||
129 | */ | 144 | */ |
130 | static inline bool read_mems_allowed_retry(unsigned int seq) | 145 | static inline bool read_mems_allowed_retry(unsigned int seq) |
131 | { | 146 | { |
132 | if (!cpusets_enabled()) | 147 | if (!static_branch_unlikely(&cpusets_enabled_key)) |
133 | return false; | 148 | return false; |
134 | 149 | ||
135 | return read_seqcount_retry(¤t->mems_allowed_seq, seq); | 150 | return read_seqcount_retry(¤t->mems_allowed_seq, seq); |
diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 4fec8b775895..82e197eeac91 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h | |||
@@ -15,7 +15,7 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), | |||
15 | * @threadfn: the function to run in the thread | 15 | * @threadfn: the function to run in the thread |
16 | * @data: data pointer for @threadfn() | 16 | * @data: data pointer for @threadfn() |
17 | * @namefmt: printf-style format string for the thread name | 17 | * @namefmt: printf-style format string for the thread name |
18 | * @...: arguments for @namefmt. | 18 | * @arg...: arguments for @namefmt. |
19 | * | 19 | * |
20 | * This macro will create a kthread on the current node, leaving it in | 20 | * This macro will create a kthread on the current node, leaving it in |
21 | * the stopped state. This is just a helper for kthread_create_on_node(); | 21 | * the stopped state. This is just a helper for kthread_create_on_node(); |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index ff151814a02d..7f384bb62d8e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -495,6 +495,10 @@ struct mm_struct { | |||
495 | */ | 495 | */ |
496 | bool tlb_flush_pending; | 496 | bool tlb_flush_pending; |
497 | #endif | 497 | #endif |
498 | #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH | ||
499 | /* See flush_tlb_batched_pending() */ | ||
500 | bool tlb_flush_batched; | ||
501 | #endif | ||
498 | struct uprobes_state uprobes_state; | 502 | struct uprobes_state uprobes_state; |
499 | #ifdef CONFIG_HUGETLB_PAGE | 503 | #ifdef CONFIG_HUGETLB_PAGE |
500 | atomic_long_t hugetlb_usage; | 504 | atomic_long_t hugetlb_usage; |
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index baa9344dcd10..79b36f57c3ba 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h | |||
@@ -163,8 +163,6 @@ void release_pages(struct page **pages, int nr, bool cold); | |||
163 | */ | 163 | */ |
164 | static inline int page_cache_get_speculative(struct page *page) | 164 | static inline int page_cache_get_speculative(struct page *page) |
165 | { | 165 | { |
166 | VM_BUG_ON(in_interrupt()); | ||
167 | |||
168 | #ifdef CONFIG_TINY_RCU | 166 | #ifdef CONFIG_TINY_RCU |
169 | # ifdef CONFIG_PREEMPT_COUNT | 167 | # ifdef CONFIG_PREEMPT_COUNT |
170 | VM_BUG_ON(!in_atomic() && !irqs_disabled()); | 168 | VM_BUG_ON(!in_atomic() && !irqs_disabled()); |
@@ -1034,7 +1034,8 @@ void msg_exit_ns(struct ipc_namespace *ns) | |||
1034 | static int sysvipc_msg_proc_show(struct seq_file *s, void *it) | 1034 | static int sysvipc_msg_proc_show(struct seq_file *s, void *it) |
1035 | { | 1035 | { |
1036 | struct user_namespace *user_ns = seq_user_ns(s); | 1036 | struct user_namespace *user_ns = seq_user_ns(s); |
1037 | struct msg_queue *msq = it; | 1037 | struct kern_ipc_perm *ipcp = it; |
1038 | struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); | ||
1038 | 1039 | ||
1039 | seq_printf(s, | 1040 | seq_printf(s, |
1040 | "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", | 1041 | "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", |
@@ -2179,7 +2179,8 @@ void exit_sem(struct task_struct *tsk) | |||
2179 | static int sysvipc_sem_proc_show(struct seq_file *s, void *it) | 2179 | static int sysvipc_sem_proc_show(struct seq_file *s, void *it) |
2180 | { | 2180 | { |
2181 | struct user_namespace *user_ns = seq_user_ns(s); | 2181 | struct user_namespace *user_ns = seq_user_ns(s); |
2182 | struct sem_array *sma = it; | 2182 | struct kern_ipc_perm *ipcp = it; |
2183 | struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); | ||
2183 | time_t sem_otime; | 2184 | time_t sem_otime; |
2184 | 2185 | ||
2185 | /* | 2186 | /* |
@@ -1380,9 +1380,11 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) | |||
1380 | static int sysvipc_shm_proc_show(struct seq_file *s, void *it) | 1380 | static int sysvipc_shm_proc_show(struct seq_file *s, void *it) |
1381 | { | 1381 | { |
1382 | struct user_namespace *user_ns = seq_user_ns(s); | 1382 | struct user_namespace *user_ns = seq_user_ns(s); |
1383 | struct shmid_kernel *shp = it; | 1383 | struct kern_ipc_perm *ipcp = it; |
1384 | struct shmid_kernel *shp; | ||
1384 | unsigned long rss = 0, swp = 0; | 1385 | unsigned long rss = 0, swp = 0; |
1385 | 1386 | ||
1387 | shp = container_of(ipcp, struct shmid_kernel, shm_perm); | ||
1386 | shm_add_rss_swap(shp, &rss, &swp); | 1388 | shm_add_rss_swap(shp, &rss, &swp); |
1387 | 1389 | ||
1388 | #if BITS_PER_LONG <= 32 | 1390 | #if BITS_PER_LONG <= 32 |
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index ca8376e5008c..8d5151688504 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c | |||
@@ -63,6 +63,7 @@ | |||
63 | #include <linux/cgroup.h> | 63 | #include <linux/cgroup.h> |
64 | #include <linux/wait.h> | 64 | #include <linux/wait.h> |
65 | 65 | ||
66 | DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key); | ||
66 | DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key); | 67 | DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key); |
67 | 68 | ||
68 | /* See "Frequency meter" comments, below. */ | 69 | /* See "Frequency meter" comments, below. */ |
diff --git a/kernel/pid.c b/kernel/pid.c index 731c4e528f4e..c69c30d827e5 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -575,13 +575,10 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns) | |||
575 | */ | 575 | */ |
576 | void __init pidhash_init(void) | 576 | void __init pidhash_init(void) |
577 | { | 577 | { |
578 | unsigned int pidhash_size; | ||
579 | |||
580 | pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18, | 578 | pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18, |
581 | HASH_EARLY | HASH_SMALL | HASH_ZERO, | 579 | HASH_EARLY | HASH_SMALL | HASH_ZERO, |
582 | &pidhash_shift, NULL, | 580 | &pidhash_shift, NULL, |
583 | 0, 4096); | 581 | 0, 4096); |
584 | pidhash_size = 1U << pidhash_shift; | ||
585 | } | 582 | } |
586 | 583 | ||
587 | void __init pidmap_init(void) | 584 | void __init pidmap_init(void) |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index bc48ee783dd9..a1a0ac0ad6f6 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -4078,6 +4078,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
4078 | unsigned long vaddr = *position; | 4078 | unsigned long vaddr = *position; |
4079 | unsigned long remainder = *nr_pages; | 4079 | unsigned long remainder = *nr_pages; |
4080 | struct hstate *h = hstate_vma(vma); | 4080 | struct hstate *h = hstate_vma(vma); |
4081 | int err = -EFAULT; | ||
4081 | 4082 | ||
4082 | while (vaddr < vma->vm_end && remainder) { | 4083 | while (vaddr < vma->vm_end && remainder) { |
4083 | pte_t *pte; | 4084 | pte_t *pte; |
@@ -4154,11 +4155,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
4154 | } | 4155 | } |
4155 | ret = hugetlb_fault(mm, vma, vaddr, fault_flags); | 4156 | ret = hugetlb_fault(mm, vma, vaddr, fault_flags); |
4156 | if (ret & VM_FAULT_ERROR) { | 4157 | if (ret & VM_FAULT_ERROR) { |
4157 | int err = vm_fault_to_errno(ret, flags); | 4158 | err = vm_fault_to_errno(ret, flags); |
4158 | |||
4159 | if (err) | ||
4160 | return err; | ||
4161 | |||
4162 | remainder = 0; | 4159 | remainder = 0; |
4163 | break; | 4160 | break; |
4164 | } | 4161 | } |
@@ -4213,7 +4210,7 @@ same_page: | |||
4213 | */ | 4210 | */ |
4214 | *position = vaddr; | 4211 | *position = vaddr; |
4215 | 4212 | ||
4216 | return i ? i : -EFAULT; | 4213 | return i ? i : err; |
4217 | } | 4214 | } |
4218 | 4215 | ||
4219 | #ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE | 4216 | #ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE |
diff --git a/mm/internal.h b/mm/internal.h index 24d88f084705..4ef49fc55e58 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -498,6 +498,7 @@ extern struct workqueue_struct *mm_percpu_wq; | |||
498 | #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH | 498 | #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH |
499 | void try_to_unmap_flush(void); | 499 | void try_to_unmap_flush(void); |
500 | void try_to_unmap_flush_dirty(void); | 500 | void try_to_unmap_flush_dirty(void); |
501 | void flush_tlb_batched_pending(struct mm_struct *mm); | ||
501 | #else | 502 | #else |
502 | static inline void try_to_unmap_flush(void) | 503 | static inline void try_to_unmap_flush(void) |
503 | { | 504 | { |
@@ -505,7 +506,9 @@ static inline void try_to_unmap_flush(void) | |||
505 | static inline void try_to_unmap_flush_dirty(void) | 506 | static inline void try_to_unmap_flush_dirty(void) |
506 | { | 507 | { |
507 | } | 508 | } |
508 | 509 | static inline void flush_tlb_batched_pending(struct mm_struct *mm) | |
510 | { | ||
511 | } | ||
509 | #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ | 512 | #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ |
510 | 513 | ||
511 | extern const struct trace_print_flags pageflag_names[]; | 514 | extern const struct trace_print_flags pageflag_names[]; |
diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 04bb1d3eb9ec..6bcfb01ba038 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c | |||
@@ -401,6 +401,7 @@ void kasan_report(unsigned long addr, size_t size, | |||
401 | disable_trace_on_warning(); | 401 | disable_trace_on_warning(); |
402 | 402 | ||
403 | info.access_addr = (void *)addr; | 403 | info.access_addr = (void *)addr; |
404 | info.first_bad_addr = (void *)addr; | ||
404 | info.access_size = size; | 405 | info.access_size = size; |
405 | info.is_write = is_write; | 406 | info.is_write = is_write; |
406 | info.ip = ip; | 407 | info.ip = ip; |
diff --git a/mm/madvise.c b/mm/madvise.c index 9976852f1e1c..47d8d8a25eae 100644 --- a/mm/madvise.c +++ b/mm/madvise.c | |||
@@ -320,6 +320,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, | |||
320 | 320 | ||
321 | tlb_remove_check_page_size_change(tlb, PAGE_SIZE); | 321 | tlb_remove_check_page_size_change(tlb, PAGE_SIZE); |
322 | orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl); | 322 | orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl); |
323 | flush_tlb_batched_pending(mm); | ||
323 | arch_enter_lazy_mmu_mode(); | 324 | arch_enter_lazy_mmu_mode(); |
324 | for (; addr != end; pte++, addr += PAGE_SIZE) { | 325 | for (; addr != end; pte++, addr += PAGE_SIZE) { |
325 | ptent = *pte; | 326 | ptent = *pte; |
diff --git a/mm/memory.c b/mm/memory.c index 0e517be91a89..f65beaad319b 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1197,6 +1197,7 @@ again: | |||
1197 | init_rss_vec(rss); | 1197 | init_rss_vec(rss); |
1198 | start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); | 1198 | start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); |
1199 | pte = start_pte; | 1199 | pte = start_pte; |
1200 | flush_tlb_batched_pending(mm); | ||
1200 | arch_enter_lazy_mmu_mode(); | 1201 | arch_enter_lazy_mmu_mode(); |
1201 | do { | 1202 | do { |
1202 | pte_t ptent = *pte; | 1203 | pte_t ptent = *pte; |
diff --git a/mm/mprotect.c b/mm/mprotect.c index 1a8c9ca83e48..4180ad8cc9c5 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -64,6 +64,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
64 | atomic_read(&vma->vm_mm->mm_users) == 1) | 64 | atomic_read(&vma->vm_mm->mm_users) == 1) |
65 | target_node = numa_node_id(); | 65 | target_node = numa_node_id(); |
66 | 66 | ||
67 | flush_tlb_batched_pending(vma->vm_mm); | ||
67 | arch_enter_lazy_mmu_mode(); | 68 | arch_enter_lazy_mmu_mode(); |
68 | do { | 69 | do { |
69 | oldpte = *pte; | 70 | oldpte = *pte; |
diff --git a/mm/mremap.c b/mm/mremap.c index cd8a1b199ef9..3f23715d3c69 100644 --- a/mm/mremap.c +++ b/mm/mremap.c | |||
@@ -152,6 +152,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, | |||
152 | new_ptl = pte_lockptr(mm, new_pmd); | 152 | new_ptl = pte_lockptr(mm, new_pmd); |
153 | if (new_ptl != old_ptl) | 153 | if (new_ptl != old_ptl) |
154 | spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); | 154 | spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); |
155 | flush_tlb_batched_pending(vma->vm_mm); | ||
155 | arch_enter_lazy_mmu_mode(); | 156 | arch_enter_lazy_mmu_mode(); |
156 | 157 | ||
157 | for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, | 158 | for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, |
@@ -428,6 +429,7 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr, | |||
428 | static unsigned long mremap_to(unsigned long addr, unsigned long old_len, | 429 | static unsigned long mremap_to(unsigned long addr, unsigned long old_len, |
429 | unsigned long new_addr, unsigned long new_len, bool *locked, | 430 | unsigned long new_addr, unsigned long new_len, bool *locked, |
430 | struct vm_userfaultfd_ctx *uf, | 431 | struct vm_userfaultfd_ctx *uf, |
432 | struct list_head *uf_unmap_early, | ||
431 | struct list_head *uf_unmap) | 433 | struct list_head *uf_unmap) |
432 | { | 434 | { |
433 | struct mm_struct *mm = current->mm; | 435 | struct mm_struct *mm = current->mm; |
@@ -446,7 +448,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, | |||
446 | if (addr + old_len > new_addr && new_addr + new_len > addr) | 448 | if (addr + old_len > new_addr && new_addr + new_len > addr) |
447 | goto out; | 449 | goto out; |
448 | 450 | ||
449 | ret = do_munmap(mm, new_addr, new_len, NULL); | 451 | ret = do_munmap(mm, new_addr, new_len, uf_unmap_early); |
450 | if (ret) | 452 | if (ret) |
451 | goto out; | 453 | goto out; |
452 | 454 | ||
@@ -514,6 +516,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, | |||
514 | unsigned long charged = 0; | 516 | unsigned long charged = 0; |
515 | bool locked = false; | 517 | bool locked = false; |
516 | struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX; | 518 | struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX; |
519 | LIST_HEAD(uf_unmap_early); | ||
517 | LIST_HEAD(uf_unmap); | 520 | LIST_HEAD(uf_unmap); |
518 | 521 | ||
519 | if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) | 522 | if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) |
@@ -541,7 +544,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, | |||
541 | 544 | ||
542 | if (flags & MREMAP_FIXED) { | 545 | if (flags & MREMAP_FIXED) { |
543 | ret = mremap_to(addr, old_len, new_addr, new_len, | 546 | ret = mremap_to(addr, old_len, new_addr, new_len, |
544 | &locked, &uf, &uf_unmap); | 547 | &locked, &uf, &uf_unmap_early, &uf_unmap); |
545 | goto out; | 548 | goto out; |
546 | } | 549 | } |
547 | 550 | ||
@@ -621,6 +624,7 @@ out: | |||
621 | up_write(¤t->mm->mmap_sem); | 624 | up_write(¤t->mm->mmap_sem); |
622 | if (locked && new_len > old_len) | 625 | if (locked && new_len > old_len) |
623 | mm_populate(new_addr + old_len, new_len - old_len); | 626 | mm_populate(new_addr + old_len, new_len - old_len); |
627 | userfaultfd_unmap_complete(mm, &uf_unmap_early); | ||
624 | mremap_userfaultfd_complete(&uf, addr, new_addr, old_len); | 628 | mremap_userfaultfd_complete(&uf, addr, new_addr, old_len); |
625 | userfaultfd_unmap_complete(mm, &uf_unmap); | 629 | userfaultfd_unmap_complete(mm, &uf_unmap); |
626 | return ret; | 630 | return ret; |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6d30e914afb6..fc32aa81f359 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -4891,9 +4891,11 @@ int numa_zonelist_order_handler(struct ctl_table *table, int write, | |||
4891 | NUMA_ZONELIST_ORDER_LEN); | 4891 | NUMA_ZONELIST_ORDER_LEN); |
4892 | user_zonelist_order = oldval; | 4892 | user_zonelist_order = oldval; |
4893 | } else if (oldval != user_zonelist_order) { | 4893 | } else if (oldval != user_zonelist_order) { |
4894 | mem_hotplug_begin(); | ||
4894 | mutex_lock(&zonelists_mutex); | 4895 | mutex_lock(&zonelists_mutex); |
4895 | build_all_zonelists(NULL, NULL); | 4896 | build_all_zonelists(NULL, NULL); |
4896 | mutex_unlock(&zonelists_mutex); | 4897 | mutex_unlock(&zonelists_mutex); |
4898 | mem_hotplug_done(); | ||
4897 | } | 4899 | } |
4898 | } | 4900 | } |
4899 | out: | 4901 | out: |
diff --git a/mm/page_io.c b/mm/page_io.c index b6c4ac388209..5f61b54ee1f3 100644 --- a/mm/page_io.c +++ b/mm/page_io.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/frontswap.h> | 22 | #include <linux/frontswap.h> |
23 | #include <linux/blkdev.h> | 23 | #include <linux/blkdev.h> |
24 | #include <linux/uio.h> | 24 | #include <linux/uio.h> |
25 | #include <linux/sched/task.h> | ||
25 | #include <asm/pgtable.h> | 26 | #include <asm/pgtable.h> |
26 | 27 | ||
27 | static struct bio *get_swap_bio(gfp_t gfp_flags, | 28 | static struct bio *get_swap_bio(gfp_t gfp_flags, |
@@ -136,6 +137,7 @@ out: | |||
136 | WRITE_ONCE(bio->bi_private, NULL); | 137 | WRITE_ONCE(bio->bi_private, NULL); |
137 | bio_put(bio); | 138 | bio_put(bio); |
138 | wake_up_process(waiter); | 139 | wake_up_process(waiter); |
140 | put_task_struct(waiter); | ||
139 | } | 141 | } |
140 | 142 | ||
141 | int generic_swapfile_activate(struct swap_info_struct *sis, | 143 | int generic_swapfile_activate(struct swap_info_struct *sis, |
@@ -378,6 +380,11 @@ int swap_readpage(struct page *page, bool do_poll) | |||
378 | goto out; | 380 | goto out; |
379 | } | 381 | } |
380 | bdev = bio->bi_bdev; | 382 | bdev = bio->bi_bdev; |
383 | /* | ||
384 | * Keep this task valid during swap readpage because the oom killer may | ||
385 | * attempt to access it in the page fault retry time check. | ||
386 | */ | ||
387 | get_task_struct(current); | ||
381 | bio->bi_private = current; | 388 | bio->bi_private = current; |
382 | bio_set_op_attrs(bio, REQ_OP_READ, 0); | 389 | bio_set_op_attrs(bio, REQ_OP_READ, 0); |
383 | count_vm_event(PSWPIN); | 390 | count_vm_event(PSWPIN); |
@@ -605,6 +605,13 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable) | |||
605 | tlb_ubc->flush_required = true; | 605 | tlb_ubc->flush_required = true; |
606 | 606 | ||
607 | /* | 607 | /* |
608 | * Ensure compiler does not re-order the setting of tlb_flush_batched | ||
609 | * before the PTE is cleared. | ||
610 | */ | ||
611 | barrier(); | ||
612 | mm->tlb_flush_batched = true; | ||
613 | |||
614 | /* | ||
608 | * If the PTE was dirty then it's best to assume it's writable. The | 615 | * If the PTE was dirty then it's best to assume it's writable. The |
609 | * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush() | 616 | * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush() |
610 | * before the page is queued for IO. | 617 | * before the page is queued for IO. |
@@ -631,6 +638,35 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) | |||
631 | 638 | ||
632 | return should_defer; | 639 | return should_defer; |
633 | } | 640 | } |
641 | |||
642 | /* | ||
643 | * Reclaim unmaps pages under the PTL but do not flush the TLB prior to | ||
644 | * releasing the PTL if TLB flushes are batched. It's possible for a parallel | ||
645 | * operation such as mprotect or munmap to race between reclaim unmapping | ||
646 | * the page and flushing the page. If this race occurs, it potentially allows | ||
647 | * access to data via a stale TLB entry. Tracking all mm's that have TLB | ||
648 | * batching in flight would be expensive during reclaim so instead track | ||
649 | * whether TLB batching occurred in the past and if so then do a flush here | ||
650 | * if required. This will cost one additional flush per reclaim cycle paid | ||
651 | * by the first operation at risk such as mprotect and mumap. | ||
652 | * | ||
653 | * This must be called under the PTL so that an access to tlb_flush_batched | ||
654 | * that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise | ||
655 | * via the PTL. | ||
656 | */ | ||
657 | void flush_tlb_batched_pending(struct mm_struct *mm) | ||
658 | { | ||
659 | if (mm->tlb_flush_batched) { | ||
660 | flush_tlb_mm(mm); | ||
661 | |||
662 | /* | ||
663 | * Do not allow the compiler to re-order the clearing of | ||
664 | * tlb_flush_batched before the tlb is flushed. | ||
665 | */ | ||
666 | barrier(); | ||
667 | mm->tlb_flush_batched = false; | ||
668 | } | ||
669 | } | ||
634 | #else | 670 | #else |
635 | static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable) | 671 | static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable) |
636 | { | 672 | { |
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 013eea76685e..308acb9d814b 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c | |||
@@ -2453,7 +2453,6 @@ void zs_destroy_pool(struct zs_pool *pool) | |||
2453 | } | 2453 | } |
2454 | 2454 | ||
2455 | destroy_cache(pool); | 2455 | destroy_cache(pool); |
2456 | kfree(pool->size_class); | ||
2457 | kfree(pool->name); | 2456 | kfree(pool->name); |
2458 | kfree(pool); | 2457 | kfree(pool); |
2459 | } | 2458 | } |