aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-05-11 21:04:12 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-05-11 21:04:12 -0400
commitf0ab773f5c96c29a5227234c4b5a820f5591b74d (patch)
treea98d6190989c2190e522b8d1966727125bcbcbb3
parent4bc871984f7cb5b2dec3ae64b570cb02f9ce2227 (diff)
parent2075b16e32c26e4031b9fd3cbe26c54676a8fcb5 (diff)
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton: "13 fixes" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: rbtree: include rcu.h scripts/faddr2line: fix error when addr2line output contains discriminator ocfs2: take inode cluster lock before moving reflinked inode from orphan dir mm, oom: fix concurrent munlock and oom reaper unmap, v3 mm: migrate: fix double call of radix_tree_replace_slot() proc/kcore: don't bounds check against address 0 mm: don't show nr_indirectly_reclaimable in /proc/vmstat mm: sections are not offlined during memory hotremove z3fold: fix reclaim lock-ups init: fix false positives in W+X checking lib/find_bit_benchmark.c: avoid soft lockup in test_find_first_bit() KASAN: prohibit KASAN+STRUCTLEAK combination MAINTAINERS: update Shuah's email address
-rw-r--r--MAINTAINERS3
-rw-r--r--arch/Kconfig4
-rw-r--r--fs/ocfs2/refcounttree.c14
-rw-r--r--fs/proc/kcore.c23
-rw-r--r--include/linux/oom.h2
-rw-r--r--include/linux/rbtree_augmented.h1
-rw-r--r--include/linux/rbtree_latch.h1
-rw-r--r--init/main.c7
-rw-r--r--kernel/module.c5
-rw-r--r--lib/find_bit_benchmark.c7
-rw-r--r--mm/migrate.c4
-rw-r--r--mm/mmap.c44
-rw-r--r--mm/oom_kill.c81
-rw-r--r--mm/sparse.c2
-rw-r--r--mm/vmstat.c6
-rw-r--r--mm/z3fold.c42
-rwxr-xr-xscripts/faddr2line5
17 files changed, 164 insertions, 87 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index f913c80c8c38..58b9861ccf99 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3691,7 +3691,6 @@ F: drivers/cpufreq/arm_big_little_dt.c
3691 3691
3692CPU POWER MONITORING SUBSYSTEM 3692CPU POWER MONITORING SUBSYSTEM
3693M: Thomas Renninger <trenn@suse.com> 3693M: Thomas Renninger <trenn@suse.com>
3694M: Shuah Khan <shuahkh@osg.samsung.com>
3695M: Shuah Khan <shuah@kernel.org> 3694M: Shuah Khan <shuah@kernel.org>
3696L: linux-pm@vger.kernel.org 3695L: linux-pm@vger.kernel.org
3697S: Maintained 3696S: Maintained
@@ -7696,7 +7695,6 @@ F: include/linux/sunrpc/
7696F: include/uapi/linux/sunrpc/ 7695F: include/uapi/linux/sunrpc/
7697 7696
7698KERNEL SELFTEST FRAMEWORK 7697KERNEL SELFTEST FRAMEWORK
7699M: Shuah Khan <shuahkh@osg.samsung.com>
7700M: Shuah Khan <shuah@kernel.org> 7698M: Shuah Khan <shuah@kernel.org>
7701L: linux-kselftest@vger.kernel.org 7699L: linux-kselftest@vger.kernel.org
7702T: git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git 7700T: git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git
@@ -14650,7 +14648,6 @@ F: drivers/usb/common/usb-otg-fsm.c
14650 14648
14651USB OVER IP DRIVER 14649USB OVER IP DRIVER
14652M: Valentina Manea <valentina.manea.m@gmail.com> 14650M: Valentina Manea <valentina.manea.m@gmail.com>
14653M: Shuah Khan <shuahkh@osg.samsung.com>
14654M: Shuah Khan <shuah@kernel.org> 14651M: Shuah Khan <shuah@kernel.org>
14655L: linux-usb@vger.kernel.org 14652L: linux-usb@vger.kernel.org
14656S: Maintained 14653S: Maintained
diff --git a/arch/Kconfig b/arch/Kconfig
index 8e0d665c8d53..75dd23acf133 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -464,6 +464,10 @@ config GCC_PLUGIN_LATENT_ENTROPY
464config GCC_PLUGIN_STRUCTLEAK 464config GCC_PLUGIN_STRUCTLEAK
465 bool "Force initialization of variables containing userspace addresses" 465 bool "Force initialization of variables containing userspace addresses"
466 depends on GCC_PLUGINS 466 depends on GCC_PLUGINS
467 # Currently STRUCTLEAK inserts initialization out of live scope of
468 # variables from KASAN point of view. This leads to KASAN false
469 # positive reports. Prohibit this combination for now.
470 depends on !KASAN_EXTRA
467 help 471 help
468 This plugin zero-initializes any structures containing a 472 This plugin zero-initializes any structures containing a
469 __user attribute. This can prevent some classes of information 473 __user attribute. This can prevent some classes of information
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 01c6b3894406..7869622af22a 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4250,10 +4250,11 @@ out:
4250static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, 4250static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
4251 struct dentry *new_dentry, bool preserve) 4251 struct dentry *new_dentry, bool preserve)
4252{ 4252{
4253 int error; 4253 int error, had_lock;
4254 struct inode *inode = d_inode(old_dentry); 4254 struct inode *inode = d_inode(old_dentry);
4255 struct buffer_head *old_bh = NULL; 4255 struct buffer_head *old_bh = NULL;
4256 struct inode *new_orphan_inode = NULL; 4256 struct inode *new_orphan_inode = NULL;
4257 struct ocfs2_lock_holder oh;
4257 4258
4258 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) 4259 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
4259 return -EOPNOTSUPP; 4260 return -EOPNOTSUPP;
@@ -4295,6 +4296,14 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
4295 goto out; 4296 goto out;
4296 } 4297 }
4297 4298
4299 had_lock = ocfs2_inode_lock_tracker(new_orphan_inode, NULL, 1,
4300 &oh);
4301 if (had_lock < 0) {
4302 error = had_lock;
4303 mlog_errno(error);
4304 goto out;
4305 }
4306
4298 /* If the security isn't preserved, we need to re-initialize them. */ 4307 /* If the security isn't preserved, we need to re-initialize them. */
4299 if (!preserve) { 4308 if (!preserve) {
4300 error = ocfs2_init_security_and_acl(dir, new_orphan_inode, 4309 error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
@@ -4302,14 +4311,15 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
4302 if (error) 4311 if (error)
4303 mlog_errno(error); 4312 mlog_errno(error);
4304 } 4313 }
4305out:
4306 if (!error) { 4314 if (!error) {
4307 error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode, 4315 error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
4308 new_dentry); 4316 new_dentry);
4309 if (error) 4317 if (error)
4310 mlog_errno(error); 4318 mlog_errno(error);
4311 } 4319 }
4320 ocfs2_inode_unlock_tracker(new_orphan_inode, 1, &oh, had_lock);
4312 4321
4322out:
4313 if (new_orphan_inode) { 4323 if (new_orphan_inode) {
4314 /* 4324 /*
4315 * We need to open_unlock the inode no matter whether we 4325 * We need to open_unlock the inode no matter whether we
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index d1e82761de81..e64ecb9f2720 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -209,25 +209,34 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
209{ 209{
210 struct list_head *head = (struct list_head *)arg; 210 struct list_head *head = (struct list_head *)arg;
211 struct kcore_list *ent; 211 struct kcore_list *ent;
212 struct page *p;
213
214 if (!pfn_valid(pfn))
215 return 1;
216
217 p = pfn_to_page(pfn);
218 if (!memmap_valid_within(pfn, p, page_zone(p)))
219 return 1;
212 220
213 ent = kmalloc(sizeof(*ent), GFP_KERNEL); 221 ent = kmalloc(sizeof(*ent), GFP_KERNEL);
214 if (!ent) 222 if (!ent)
215 return -ENOMEM; 223 return -ENOMEM;
216 ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT)); 224 ent->addr = (unsigned long)page_to_virt(p);
217 ent->size = nr_pages << PAGE_SHIFT; 225 ent->size = nr_pages << PAGE_SHIFT;
218 226
219 /* Sanity check: Can happen in 32bit arch...maybe */ 227 if (!virt_addr_valid(ent->addr))
220 if (ent->addr < (unsigned long) __va(0))
221 goto free_out; 228 goto free_out;
222 229
223 /* cut not-mapped area. ....from ppc-32 code. */ 230 /* cut not-mapped area. ....from ppc-32 code. */
224 if (ULONG_MAX - ent->addr < ent->size) 231 if (ULONG_MAX - ent->addr < ent->size)
225 ent->size = ULONG_MAX - ent->addr; 232 ent->size = ULONG_MAX - ent->addr;
226 233
227 /* cut when vmalloc() area is higher than direct-map area */ 234 /*
228 if (VMALLOC_START > (unsigned long)__va(0)) { 235 * We've already checked virt_addr_valid so we know this address
229 if (ent->addr > VMALLOC_START) 236 * is a valid pointer, therefore we can check against it to determine
230 goto free_out; 237 * if we need to trim
238 */
239 if (VMALLOC_START > ent->addr) {
231 if (VMALLOC_START - ent->addr < ent->size) 240 if (VMALLOC_START - ent->addr < ent->size)
232 ent->size = VMALLOC_START - ent->addr; 241 ent->size = VMALLOC_START - ent->addr;
233 } 242 }
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 5bad038ac012..6adac113e96d 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -95,6 +95,8 @@ static inline int check_stable_address_space(struct mm_struct *mm)
95 return 0; 95 return 0;
96} 96}
97 97
98void __oom_reap_task_mm(struct mm_struct *mm);
99
98extern unsigned long oom_badness(struct task_struct *p, 100extern unsigned long oom_badness(struct task_struct *p,
99 struct mem_cgroup *memcg, const nodemask_t *nodemask, 101 struct mem_cgroup *memcg, const nodemask_t *nodemask,
100 unsigned long totalpages); 102 unsigned long totalpages);
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index 6bfd2b581f75..af8a61be2d8d 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -26,6 +26,7 @@
26 26
27#include <linux/compiler.h> 27#include <linux/compiler.h>
28#include <linux/rbtree.h> 28#include <linux/rbtree.h>
29#include <linux/rcupdate.h>
29 30
30/* 31/*
31 * Please note - only struct rb_augment_callbacks and the prototypes for 32 * Please note - only struct rb_augment_callbacks and the prototypes for
diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h
index ece43e882b56..7d012faa509a 100644
--- a/include/linux/rbtree_latch.h
+++ b/include/linux/rbtree_latch.h
@@ -35,6 +35,7 @@
35 35
36#include <linux/rbtree.h> 36#include <linux/rbtree.h>
37#include <linux/seqlock.h> 37#include <linux/seqlock.h>
38#include <linux/rcupdate.h>
38 39
39struct latch_tree_node { 40struct latch_tree_node {
40 struct rb_node node[2]; 41 struct rb_node node[2];
diff --git a/init/main.c b/init/main.c
index a404936d85d8..fd37315835b4 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1034,6 +1034,13 @@ __setup("rodata=", set_debug_rodata);
1034static void mark_readonly(void) 1034static void mark_readonly(void)
1035{ 1035{
1036 if (rodata_enabled) { 1036 if (rodata_enabled) {
1037 /*
1038 * load_module() results in W+X mappings, which are cleaned up
1039 * with call_rcu_sched(). Let's make sure that queued work is
1040 * flushed so that we don't hit false positives looking for
1041 * insecure pages which are W+X.
1042 */
1043 rcu_barrier_sched();
1037 mark_rodata_ro(); 1044 mark_rodata_ro();
1038 rodata_test(); 1045 rodata_test();
1039 } else 1046 } else
diff --git a/kernel/module.c b/kernel/module.c
index ce8066b88178..c9bea7f2b43e 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3517,6 +3517,11 @@ static noinline int do_init_module(struct module *mod)
3517 * walking this with preempt disabled. In all the failure paths, we 3517 * walking this with preempt disabled. In all the failure paths, we
3518 * call synchronize_sched(), but we don't want to slow down the success 3518 * call synchronize_sched(), but we don't want to slow down the success
3519 * path, so use actual RCU here. 3519 * path, so use actual RCU here.
3520 * Note that module_alloc() on most architectures creates W+X page
3521 * mappings which won't be cleaned up until do_free_init() runs. Any
3522 * code such as mark_rodata_ro() which depends on those mappings to
3523 * be cleaned up needs to sync with the queued work - ie
3524 * rcu_barrier_sched()
3520 */ 3525 */
3521 call_rcu_sched(&freeinit->rcu, do_free_init); 3526 call_rcu_sched(&freeinit->rcu, do_free_init);
3522 mutex_unlock(&module_mutex); 3527 mutex_unlock(&module_mutex);
diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c
index 5985a25e6cbc..5367ffa5c18f 100644
--- a/lib/find_bit_benchmark.c
+++ b/lib/find_bit_benchmark.c
@@ -132,7 +132,12 @@ static int __init find_bit_test(void)
132 test_find_next_bit(bitmap, BITMAP_LEN); 132 test_find_next_bit(bitmap, BITMAP_LEN);
133 test_find_next_zero_bit(bitmap, BITMAP_LEN); 133 test_find_next_zero_bit(bitmap, BITMAP_LEN);
134 test_find_last_bit(bitmap, BITMAP_LEN); 134 test_find_last_bit(bitmap, BITMAP_LEN);
135 test_find_first_bit(bitmap, BITMAP_LEN); 135
136 /*
137 * test_find_first_bit() may take some time, so
138 * traverse only part of bitmap to avoid soft lockup.
139 */
140 test_find_first_bit(bitmap, BITMAP_LEN / 10);
136 test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN); 141 test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
137 142
138 pr_err("\nStart testing find_bit() with sparse bitmap\n"); 143 pr_err("\nStart testing find_bit() with sparse bitmap\n");
diff --git a/mm/migrate.c b/mm/migrate.c
index 568433023831..8c0af0f7cab1 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -528,14 +528,12 @@ int migrate_page_move_mapping(struct address_space *mapping,
528 int i; 528 int i;
529 int index = page_index(page); 529 int index = page_index(page);
530 530
531 for (i = 0; i < HPAGE_PMD_NR; i++) { 531 for (i = 1; i < HPAGE_PMD_NR; i++) {
532 pslot = radix_tree_lookup_slot(&mapping->i_pages, 532 pslot = radix_tree_lookup_slot(&mapping->i_pages,
533 index + i); 533 index + i);
534 radix_tree_replace_slot(&mapping->i_pages, pslot, 534 radix_tree_replace_slot(&mapping->i_pages, pslot,
535 newpage + i); 535 newpage + i);
536 } 536 }
537 } else {
538 radix_tree_replace_slot(&mapping->i_pages, pslot, newpage);
539 } 537 }
540 538
541 /* 539 /*
diff --git a/mm/mmap.c b/mm/mmap.c
index 6fc435760086..78e14facdb6e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3056,6 +3056,32 @@ void exit_mmap(struct mm_struct *mm)
3056 /* mm's last user has gone, and its about to be pulled down */ 3056 /* mm's last user has gone, and its about to be pulled down */
3057 mmu_notifier_release(mm); 3057 mmu_notifier_release(mm);
3058 3058
3059 if (unlikely(mm_is_oom_victim(mm))) {
3060 /*
3061 * Manually reap the mm to free as much memory as possible.
3062 * Then, as the oom reaper does, set MMF_OOM_SKIP to disregard
3063 * this mm from further consideration. Taking mm->mmap_sem for
3064 * write after setting MMF_OOM_SKIP will guarantee that the oom
3065 * reaper will not run on this mm again after mmap_sem is
3066 * dropped.
3067 *
3068 * Nothing can be holding mm->mmap_sem here and the above call
3069 * to mmu_notifier_release(mm) ensures mmu notifier callbacks in
3070 * __oom_reap_task_mm() will not block.
3071 *
3072 * This needs to be done before calling munlock_vma_pages_all(),
3073 * which clears VM_LOCKED, otherwise the oom reaper cannot
3074 * reliably test it.
3075 */
3076 mutex_lock(&oom_lock);
3077 __oom_reap_task_mm(mm);
3078 mutex_unlock(&oom_lock);
3079
3080 set_bit(MMF_OOM_SKIP, &mm->flags);
3081 down_write(&mm->mmap_sem);
3082 up_write(&mm->mmap_sem);
3083 }
3084
3059 if (mm->locked_vm) { 3085 if (mm->locked_vm) {
3060 vma = mm->mmap; 3086 vma = mm->mmap;
3061 while (vma) { 3087 while (vma) {
@@ -3077,24 +3103,6 @@ void exit_mmap(struct mm_struct *mm)
3077 /* update_hiwater_rss(mm) here? but nobody should be looking */ 3103 /* update_hiwater_rss(mm) here? but nobody should be looking */
3078 /* Use -1 here to ensure all VMAs in the mm are unmapped */ 3104 /* Use -1 here to ensure all VMAs in the mm are unmapped */
3079 unmap_vmas(&tlb, vma, 0, -1); 3105 unmap_vmas(&tlb, vma, 0, -1);
3080
3081 if (unlikely(mm_is_oom_victim(mm))) {
3082 /*
3083 * Wait for oom_reap_task() to stop working on this
3084 * mm. Because MMF_OOM_SKIP is already set before
3085 * calling down_read(), oom_reap_task() will not run
3086 * on this "mm" post up_write().
3087 *
3088 * mm_is_oom_victim() cannot be set from under us
3089 * either because victim->mm is already set to NULL
3090 * under task_lock before calling mmput and oom_mm is
3091 * set not NULL by the OOM killer only if victim->mm
3092 * is found not NULL while holding the task_lock.
3093 */
3094 set_bit(MMF_OOM_SKIP, &mm->flags);
3095 down_write(&mm->mmap_sem);
3096 up_write(&mm->mmap_sem);
3097 }
3098 free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); 3106 free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
3099 tlb_finish_mmu(&tlb, 0, -1); 3107 tlb_finish_mmu(&tlb, 0, -1);
3100 3108
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index ff992fa8760a..8ba6cb88cf58 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -469,7 +469,6 @@ bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
469 return false; 469 return false;
470} 470}
471 471
472
473#ifdef CONFIG_MMU 472#ifdef CONFIG_MMU
474/* 473/*
475 * OOM Reaper kernel thread which tries to reap the memory used by the OOM 474 * OOM Reaper kernel thread which tries to reap the memory used by the OOM
@@ -480,16 +479,54 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
480static struct task_struct *oom_reaper_list; 479static struct task_struct *oom_reaper_list;
481static DEFINE_SPINLOCK(oom_reaper_lock); 480static DEFINE_SPINLOCK(oom_reaper_lock);
482 481
483static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) 482void __oom_reap_task_mm(struct mm_struct *mm)
484{ 483{
485 struct mmu_gather tlb;
486 struct vm_area_struct *vma; 484 struct vm_area_struct *vma;
485
486 /*
487 * Tell all users of get_user/copy_from_user etc... that the content
488 * is no longer stable. No barriers really needed because unmapping
489 * should imply barriers already and the reader would hit a page fault
490 * if it stumbled over a reaped memory.
491 */
492 set_bit(MMF_UNSTABLE, &mm->flags);
493
494 for (vma = mm->mmap ; vma; vma = vma->vm_next) {
495 if (!can_madv_dontneed_vma(vma))
496 continue;
497
498 /*
499 * Only anonymous pages have a good chance to be dropped
500 * without additional steps which we cannot afford as we
501 * are OOM already.
502 *
503 * We do not even care about fs backed pages because all
504 * which are reclaimable have already been reclaimed and
505 * we do not want to block exit_mmap by keeping mm ref
506 * count elevated without a good reason.
507 */
508 if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
509 const unsigned long start = vma->vm_start;
510 const unsigned long end = vma->vm_end;
511 struct mmu_gather tlb;
512
513 tlb_gather_mmu(&tlb, mm, start, end);
514 mmu_notifier_invalidate_range_start(mm, start, end);
515 unmap_page_range(&tlb, vma, start, end, NULL);
516 mmu_notifier_invalidate_range_end(mm, start, end);
517 tlb_finish_mmu(&tlb, start, end);
518 }
519 }
520}
521
522static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
523{
487 bool ret = true; 524 bool ret = true;
488 525
489 /* 526 /*
490 * We have to make sure to not race with the victim exit path 527 * We have to make sure to not race with the victim exit path
491 * and cause premature new oom victim selection: 528 * and cause premature new oom victim selection:
492 * __oom_reap_task_mm exit_mm 529 * oom_reap_task_mm exit_mm
493 * mmget_not_zero 530 * mmget_not_zero
494 * mmput 531 * mmput
495 * atomic_dec_and_test 532 * atomic_dec_and_test
@@ -534,39 +571,8 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
534 571
535 trace_start_task_reaping(tsk->pid); 572 trace_start_task_reaping(tsk->pid);
536 573
537 /* 574 __oom_reap_task_mm(mm);
538 * Tell all users of get_user/copy_from_user etc... that the content
539 * is no longer stable. No barriers really needed because unmapping
540 * should imply barriers already and the reader would hit a page fault
541 * if it stumbled over a reaped memory.
542 */
543 set_bit(MMF_UNSTABLE, &mm->flags);
544
545 for (vma = mm->mmap ; vma; vma = vma->vm_next) {
546 if (!can_madv_dontneed_vma(vma))
547 continue;
548 575
549 /*
550 * Only anonymous pages have a good chance to be dropped
551 * without additional steps which we cannot afford as we
552 * are OOM already.
553 *
554 * We do not even care about fs backed pages because all
555 * which are reclaimable have already been reclaimed and
556 * we do not want to block exit_mmap by keeping mm ref
557 * count elevated without a good reason.
558 */
559 if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
560 const unsigned long start = vma->vm_start;
561 const unsigned long end = vma->vm_end;
562
563 tlb_gather_mmu(&tlb, mm, start, end);
564 mmu_notifier_invalidate_range_start(mm, start, end);
565 unmap_page_range(&tlb, vma, start, end, NULL);
566 mmu_notifier_invalidate_range_end(mm, start, end);
567 tlb_finish_mmu(&tlb, start, end);
568 }
569 }
570 pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", 576 pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
571 task_pid_nr(tsk), tsk->comm, 577 task_pid_nr(tsk), tsk->comm,
572 K(get_mm_counter(mm, MM_ANONPAGES)), 578 K(get_mm_counter(mm, MM_ANONPAGES)),
@@ -587,14 +593,13 @@ static void oom_reap_task(struct task_struct *tsk)
587 struct mm_struct *mm = tsk->signal->oom_mm; 593 struct mm_struct *mm = tsk->signal->oom_mm;
588 594
589 /* Retry the down_read_trylock(mmap_sem) a few times */ 595 /* Retry the down_read_trylock(mmap_sem) a few times */
590 while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task_mm(tsk, mm)) 596 while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm))
591 schedule_timeout_idle(HZ/10); 597 schedule_timeout_idle(HZ/10);
592 598
593 if (attempts <= MAX_OOM_REAP_RETRIES || 599 if (attempts <= MAX_OOM_REAP_RETRIES ||
594 test_bit(MMF_OOM_SKIP, &mm->flags)) 600 test_bit(MMF_OOM_SKIP, &mm->flags))
595 goto done; 601 goto done;
596 602
597
598 pr_info("oom_reaper: unable to reap pid:%d (%s)\n", 603 pr_info("oom_reaper: unable to reap pid:%d (%s)\n",
599 task_pid_nr(tsk), tsk->comm); 604 task_pid_nr(tsk), tsk->comm);
600 debug_show_all_locks(); 605 debug_show_all_locks();
diff --git a/mm/sparse.c b/mm/sparse.c
index 62eef264a7bd..73dc2fcc0eab 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -629,7 +629,7 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
629 unsigned long pfn; 629 unsigned long pfn;
630 630
631 for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { 631 for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
632 unsigned long section_nr = pfn_to_section_nr(start_pfn); 632 unsigned long section_nr = pfn_to_section_nr(pfn);
633 struct mem_section *ms; 633 struct mem_section *ms;
634 634
635 /* 635 /*
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 536332e988b8..a2b9518980ce 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1161,7 +1161,7 @@ const char * const vmstat_text[] = {
1161 "nr_vmscan_immediate_reclaim", 1161 "nr_vmscan_immediate_reclaim",
1162 "nr_dirtied", 1162 "nr_dirtied",
1163 "nr_written", 1163 "nr_written",
1164 "nr_indirectly_reclaimable", 1164 "", /* nr_indirectly_reclaimable */
1165 1165
1166 /* enum writeback_stat_item counters */ 1166 /* enum writeback_stat_item counters */
1167 "nr_dirty_threshold", 1167 "nr_dirty_threshold",
@@ -1740,6 +1740,10 @@ static int vmstat_show(struct seq_file *m, void *arg)
1740 unsigned long *l = arg; 1740 unsigned long *l = arg;
1741 unsigned long off = l - (unsigned long *)m->private; 1741 unsigned long off = l - (unsigned long *)m->private;
1742 1742
1743 /* Skip hidden vmstat items. */
1744 if (*vmstat_text[off] == '\0')
1745 return 0;
1746
1743 seq_puts(m, vmstat_text[off]); 1747 seq_puts(m, vmstat_text[off]);
1744 seq_put_decimal_ull(m, " ", *l); 1748 seq_put_decimal_ull(m, " ", *l);
1745 seq_putc(m, '\n'); 1749 seq_putc(m, '\n');
diff --git a/mm/z3fold.c b/mm/z3fold.c
index c0bca6153b95..4b366d181f35 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -144,7 +144,8 @@ enum z3fold_page_flags {
144 PAGE_HEADLESS = 0, 144 PAGE_HEADLESS = 0,
145 MIDDLE_CHUNK_MAPPED, 145 MIDDLE_CHUNK_MAPPED,
146 NEEDS_COMPACTING, 146 NEEDS_COMPACTING,
147 PAGE_STALE 147 PAGE_STALE,
148 UNDER_RECLAIM
148}; 149};
149 150
150/***************** 151/*****************
@@ -173,6 +174,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page,
173 clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 174 clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
174 clear_bit(NEEDS_COMPACTING, &page->private); 175 clear_bit(NEEDS_COMPACTING, &page->private);
175 clear_bit(PAGE_STALE, &page->private); 176 clear_bit(PAGE_STALE, &page->private);
177 clear_bit(UNDER_RECLAIM, &page->private);
176 178
177 spin_lock_init(&zhdr->page_lock); 179 spin_lock_init(&zhdr->page_lock);
178 kref_init(&zhdr->refcount); 180 kref_init(&zhdr->refcount);
@@ -756,6 +758,10 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
756 atomic64_dec(&pool->pages_nr); 758 atomic64_dec(&pool->pages_nr);
757 return; 759 return;
758 } 760 }
761 if (test_bit(UNDER_RECLAIM, &page->private)) {
762 z3fold_page_unlock(zhdr);
763 return;
764 }
759 if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) { 765 if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
760 z3fold_page_unlock(zhdr); 766 z3fold_page_unlock(zhdr);
761 return; 767 return;
@@ -840,6 +846,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
840 kref_get(&zhdr->refcount); 846 kref_get(&zhdr->refcount);
841 list_del_init(&zhdr->buddy); 847 list_del_init(&zhdr->buddy);
842 zhdr->cpu = -1; 848 zhdr->cpu = -1;
849 set_bit(UNDER_RECLAIM, &page->private);
850 break;
843 } 851 }
844 852
845 list_del_init(&page->lru); 853 list_del_init(&page->lru);
@@ -887,25 +895,35 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
887 goto next; 895 goto next;
888 } 896 }
889next: 897next:
890 spin_lock(&pool->lock);
891 if (test_bit(PAGE_HEADLESS, &page->private)) { 898 if (test_bit(PAGE_HEADLESS, &page->private)) {
892 if (ret == 0) { 899 if (ret == 0) {
893 spin_unlock(&pool->lock);
894 free_z3fold_page(page); 900 free_z3fold_page(page);
895 return 0; 901 return 0;
896 } 902 }
897 } else if (kref_put(&zhdr->refcount, release_z3fold_page)) { 903 spin_lock(&pool->lock);
898 atomic64_dec(&pool->pages_nr); 904 list_add(&page->lru, &pool->lru);
905 spin_unlock(&pool->lock);
906 } else {
907 z3fold_page_lock(zhdr);
908 clear_bit(UNDER_RECLAIM, &page->private);
909 if (kref_put(&zhdr->refcount,
910 release_z3fold_page_locked)) {
911 atomic64_dec(&pool->pages_nr);
912 return 0;
913 }
914 /*
915 * if we are here, the page is still not completely
916 * free. Take the global pool lock then to be able
917 * to add it back to the lru list
918 */
919 spin_lock(&pool->lock);
920 list_add(&page->lru, &pool->lru);
899 spin_unlock(&pool->lock); 921 spin_unlock(&pool->lock);
900 return 0; 922 z3fold_page_unlock(zhdr);
901 } 923 }
902 924
903 /* 925 /* We started off locked to we need to lock the pool back */
904 * Add to the beginning of LRU. 926 spin_lock(&pool->lock);
905 * Pool lock has to be kept here to ensure the page has
906 * not already been released
907 */
908 list_add(&page->lru, &pool->lru);
909 } 927 }
910 spin_unlock(&pool->lock); 928 spin_unlock(&pool->lock);
911 return -EAGAIN; 929 return -EAGAIN;
diff --git a/scripts/faddr2line b/scripts/faddr2line
index 9e5735a4d3a5..1876a741087c 100755
--- a/scripts/faddr2line
+++ b/scripts/faddr2line
@@ -170,7 +170,10 @@ __faddr2line() {
170 echo "$file_lines" | while read -r line 170 echo "$file_lines" | while read -r line
171 do 171 do
172 echo $line 172 echo $line
173 eval $(echo $line | awk -F "[ :]" '{printf("n1=%d;n2=%d;f=%s",$NF-5, $NF+5, $(NF-1))}') 173 n=$(echo $line | sed 's/.*:\([0-9]\+\).*/\1/g')
174 n1=$[$n-5]
175 n2=$[$n+5]
176 f=$(echo $line | sed 's/.*at \(.\+\):.*/\1/g')
174 awk 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") {printf("%d\t%s\n", NR, $0)}' $f 177 awk 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") {printf("%d\t%s\n", NR, $0)}' $f
175 done 178 done
176 179