diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-05-11 21:04:12 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-05-11 21:04:12 -0400 |
commit | f0ab773f5c96c29a5227234c4b5a820f5591b74d (patch) | |
tree | a98d6190989c2190e522b8d1966727125bcbcbb3 | |
parent | 4bc871984f7cb5b2dec3ae64b570cb02f9ce2227 (diff) | |
parent | 2075b16e32c26e4031b9fd3cbe26c54676a8fcb5 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton:
"13 fixes"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
rbtree: include rcu.h
scripts/faddr2line: fix error when addr2line output contains discriminator
ocfs2: take inode cluster lock before moving reflinked inode from orphan dir
mm, oom: fix concurrent munlock and oom reaper unmap, v3
mm: migrate: fix double call of radix_tree_replace_slot()
proc/kcore: don't bounds check against address 0
mm: don't show nr_indirectly_reclaimable in /proc/vmstat
mm: sections are not offlined during memory hotremove
z3fold: fix reclaim lock-ups
init: fix false positives in W+X checking
lib/find_bit_benchmark.c: avoid soft lockup in test_find_first_bit()
KASAN: prohibit KASAN+STRUCTLEAK combination
MAINTAINERS: update Shuah's email address
-rw-r--r-- | MAINTAINERS | 3 | ||||
-rw-r--r-- | arch/Kconfig | 4 | ||||
-rw-r--r-- | fs/ocfs2/refcounttree.c | 14 | ||||
-rw-r--r-- | fs/proc/kcore.c | 23 | ||||
-rw-r--r-- | include/linux/oom.h | 2 | ||||
-rw-r--r-- | include/linux/rbtree_augmented.h | 1 | ||||
-rw-r--r-- | include/linux/rbtree_latch.h | 1 | ||||
-rw-r--r-- | init/main.c | 7 | ||||
-rw-r--r-- | kernel/module.c | 5 | ||||
-rw-r--r-- | lib/find_bit_benchmark.c | 7 | ||||
-rw-r--r-- | mm/migrate.c | 4 | ||||
-rw-r--r-- | mm/mmap.c | 44 | ||||
-rw-r--r-- | mm/oom_kill.c | 81 | ||||
-rw-r--r-- | mm/sparse.c | 2 | ||||
-rw-r--r-- | mm/vmstat.c | 6 | ||||
-rw-r--r-- | mm/z3fold.c | 42 | ||||
-rwxr-xr-x | scripts/faddr2line | 5 |
17 files changed, 164 insertions, 87 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index f913c80c8c38..58b9861ccf99 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -3691,7 +3691,6 @@ F: drivers/cpufreq/arm_big_little_dt.c | |||
3691 | 3691 | ||
3692 | CPU POWER MONITORING SUBSYSTEM | 3692 | CPU POWER MONITORING SUBSYSTEM |
3693 | M: Thomas Renninger <trenn@suse.com> | 3693 | M: Thomas Renninger <trenn@suse.com> |
3694 | M: Shuah Khan <shuahkh@osg.samsung.com> | ||
3695 | M: Shuah Khan <shuah@kernel.org> | 3694 | M: Shuah Khan <shuah@kernel.org> |
3696 | L: linux-pm@vger.kernel.org | 3695 | L: linux-pm@vger.kernel.org |
3697 | S: Maintained | 3696 | S: Maintained |
@@ -7696,7 +7695,6 @@ F: include/linux/sunrpc/ | |||
7696 | F: include/uapi/linux/sunrpc/ | 7695 | F: include/uapi/linux/sunrpc/ |
7697 | 7696 | ||
7698 | KERNEL SELFTEST FRAMEWORK | 7697 | KERNEL SELFTEST FRAMEWORK |
7699 | M: Shuah Khan <shuahkh@osg.samsung.com> | ||
7700 | M: Shuah Khan <shuah@kernel.org> | 7698 | M: Shuah Khan <shuah@kernel.org> |
7701 | L: linux-kselftest@vger.kernel.org | 7699 | L: linux-kselftest@vger.kernel.org |
7702 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git | 7700 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git |
@@ -14650,7 +14648,6 @@ F: drivers/usb/common/usb-otg-fsm.c | |||
14650 | 14648 | ||
14651 | USB OVER IP DRIVER | 14649 | USB OVER IP DRIVER |
14652 | M: Valentina Manea <valentina.manea.m@gmail.com> | 14650 | M: Valentina Manea <valentina.manea.m@gmail.com> |
14653 | M: Shuah Khan <shuahkh@osg.samsung.com> | ||
14654 | M: Shuah Khan <shuah@kernel.org> | 14651 | M: Shuah Khan <shuah@kernel.org> |
14655 | L: linux-usb@vger.kernel.org | 14652 | L: linux-usb@vger.kernel.org |
14656 | S: Maintained | 14653 | S: Maintained |
diff --git a/arch/Kconfig b/arch/Kconfig index 8e0d665c8d53..75dd23acf133 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -464,6 +464,10 @@ config GCC_PLUGIN_LATENT_ENTROPY | |||
464 | config GCC_PLUGIN_STRUCTLEAK | 464 | config GCC_PLUGIN_STRUCTLEAK |
465 | bool "Force initialization of variables containing userspace addresses" | 465 | bool "Force initialization of variables containing userspace addresses" |
466 | depends on GCC_PLUGINS | 466 | depends on GCC_PLUGINS |
467 | # Currently STRUCTLEAK inserts initialization out of live scope of | ||
468 | # variables from KASAN point of view. This leads to KASAN false | ||
469 | # positive reports. Prohibit this combination for now. | ||
470 | depends on !KASAN_EXTRA | ||
467 | help | 471 | help |
468 | This plugin zero-initializes any structures containing a | 472 | This plugin zero-initializes any structures containing a |
469 | __user attribute. This can prevent some classes of information | 473 | __user attribute. This can prevent some classes of information |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 01c6b3894406..7869622af22a 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -4250,10 +4250,11 @@ out: | |||
4250 | static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, | 4250 | static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, |
4251 | struct dentry *new_dentry, bool preserve) | 4251 | struct dentry *new_dentry, bool preserve) |
4252 | { | 4252 | { |
4253 | int error; | 4253 | int error, had_lock; |
4254 | struct inode *inode = d_inode(old_dentry); | 4254 | struct inode *inode = d_inode(old_dentry); |
4255 | struct buffer_head *old_bh = NULL; | 4255 | struct buffer_head *old_bh = NULL; |
4256 | struct inode *new_orphan_inode = NULL; | 4256 | struct inode *new_orphan_inode = NULL; |
4257 | struct ocfs2_lock_holder oh; | ||
4257 | 4258 | ||
4258 | if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) | 4259 | if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) |
4259 | return -EOPNOTSUPP; | 4260 | return -EOPNOTSUPP; |
@@ -4295,6 +4296,14 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, | |||
4295 | goto out; | 4296 | goto out; |
4296 | } | 4297 | } |
4297 | 4298 | ||
4299 | had_lock = ocfs2_inode_lock_tracker(new_orphan_inode, NULL, 1, | ||
4300 | &oh); | ||
4301 | if (had_lock < 0) { | ||
4302 | error = had_lock; | ||
4303 | mlog_errno(error); | ||
4304 | goto out; | ||
4305 | } | ||
4306 | |||
4298 | /* If the security isn't preserved, we need to re-initialize them. */ | 4307 | /* If the security isn't preserved, we need to re-initialize them. */ |
4299 | if (!preserve) { | 4308 | if (!preserve) { |
4300 | error = ocfs2_init_security_and_acl(dir, new_orphan_inode, | 4309 | error = ocfs2_init_security_and_acl(dir, new_orphan_inode, |
@@ -4302,14 +4311,15 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, | |||
4302 | if (error) | 4311 | if (error) |
4303 | mlog_errno(error); | 4312 | mlog_errno(error); |
4304 | } | 4313 | } |
4305 | out: | ||
4306 | if (!error) { | 4314 | if (!error) { |
4307 | error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode, | 4315 | error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode, |
4308 | new_dentry); | 4316 | new_dentry); |
4309 | if (error) | 4317 | if (error) |
4310 | mlog_errno(error); | 4318 | mlog_errno(error); |
4311 | } | 4319 | } |
4320 | ocfs2_inode_unlock_tracker(new_orphan_inode, 1, &oh, had_lock); | ||
4312 | 4321 | ||
4322 | out: | ||
4313 | if (new_orphan_inode) { | 4323 | if (new_orphan_inode) { |
4314 | /* | 4324 | /* |
4315 | * We need to open_unlock the inode no matter whether we | 4325 | * We need to open_unlock the inode no matter whether we |
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index d1e82761de81..e64ecb9f2720 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c | |||
@@ -209,25 +209,34 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg) | |||
209 | { | 209 | { |
210 | struct list_head *head = (struct list_head *)arg; | 210 | struct list_head *head = (struct list_head *)arg; |
211 | struct kcore_list *ent; | 211 | struct kcore_list *ent; |
212 | struct page *p; | ||
213 | |||
214 | if (!pfn_valid(pfn)) | ||
215 | return 1; | ||
216 | |||
217 | p = pfn_to_page(pfn); | ||
218 | if (!memmap_valid_within(pfn, p, page_zone(p))) | ||
219 | return 1; | ||
212 | 220 | ||
213 | ent = kmalloc(sizeof(*ent), GFP_KERNEL); | 221 | ent = kmalloc(sizeof(*ent), GFP_KERNEL); |
214 | if (!ent) | 222 | if (!ent) |
215 | return -ENOMEM; | 223 | return -ENOMEM; |
216 | ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT)); | 224 | ent->addr = (unsigned long)page_to_virt(p); |
217 | ent->size = nr_pages << PAGE_SHIFT; | 225 | ent->size = nr_pages << PAGE_SHIFT; |
218 | 226 | ||
219 | /* Sanity check: Can happen in 32bit arch...maybe */ | 227 | if (!virt_addr_valid(ent->addr)) |
220 | if (ent->addr < (unsigned long) __va(0)) | ||
221 | goto free_out; | 228 | goto free_out; |
222 | 229 | ||
223 | /* cut not-mapped area. ....from ppc-32 code. */ | 230 | /* cut not-mapped area. ....from ppc-32 code. */ |
224 | if (ULONG_MAX - ent->addr < ent->size) | 231 | if (ULONG_MAX - ent->addr < ent->size) |
225 | ent->size = ULONG_MAX - ent->addr; | 232 | ent->size = ULONG_MAX - ent->addr; |
226 | 233 | ||
227 | /* cut when vmalloc() area is higher than direct-map area */ | 234 | /* |
228 | if (VMALLOC_START > (unsigned long)__va(0)) { | 235 | * We've already checked virt_addr_valid so we know this address |
229 | if (ent->addr > VMALLOC_START) | 236 | * is a valid pointer, therefore we can check against it to determine |
230 | goto free_out; | 237 | * if we need to trim |
238 | */ | ||
239 | if (VMALLOC_START > ent->addr) { | ||
231 | if (VMALLOC_START - ent->addr < ent->size) | 240 | if (VMALLOC_START - ent->addr < ent->size) |
232 | ent->size = VMALLOC_START - ent->addr; | 241 | ent->size = VMALLOC_START - ent->addr; |
233 | } | 242 | } |
diff --git a/include/linux/oom.h b/include/linux/oom.h index 5bad038ac012..6adac113e96d 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h | |||
@@ -95,6 +95,8 @@ static inline int check_stable_address_space(struct mm_struct *mm) | |||
95 | return 0; | 95 | return 0; |
96 | } | 96 | } |
97 | 97 | ||
98 | void __oom_reap_task_mm(struct mm_struct *mm); | ||
99 | |||
98 | extern unsigned long oom_badness(struct task_struct *p, | 100 | extern unsigned long oom_badness(struct task_struct *p, |
99 | struct mem_cgroup *memcg, const nodemask_t *nodemask, | 101 | struct mem_cgroup *memcg, const nodemask_t *nodemask, |
100 | unsigned long totalpages); | 102 | unsigned long totalpages); |
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index 6bfd2b581f75..af8a61be2d8d 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h | |||
@@ -26,6 +26,7 @@ | |||
26 | 26 | ||
27 | #include <linux/compiler.h> | 27 | #include <linux/compiler.h> |
28 | #include <linux/rbtree.h> | 28 | #include <linux/rbtree.h> |
29 | #include <linux/rcupdate.h> | ||
29 | 30 | ||
30 | /* | 31 | /* |
31 | * Please note - only struct rb_augment_callbacks and the prototypes for | 32 | * Please note - only struct rb_augment_callbacks and the prototypes for |
diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h index ece43e882b56..7d012faa509a 100644 --- a/include/linux/rbtree_latch.h +++ b/include/linux/rbtree_latch.h | |||
@@ -35,6 +35,7 @@ | |||
35 | 35 | ||
36 | #include <linux/rbtree.h> | 36 | #include <linux/rbtree.h> |
37 | #include <linux/seqlock.h> | 37 | #include <linux/seqlock.h> |
38 | #include <linux/rcupdate.h> | ||
38 | 39 | ||
39 | struct latch_tree_node { | 40 | struct latch_tree_node { |
40 | struct rb_node node[2]; | 41 | struct rb_node node[2]; |
diff --git a/init/main.c b/init/main.c index a404936d85d8..fd37315835b4 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -1034,6 +1034,13 @@ __setup("rodata=", set_debug_rodata); | |||
1034 | static void mark_readonly(void) | 1034 | static void mark_readonly(void) |
1035 | { | 1035 | { |
1036 | if (rodata_enabled) { | 1036 | if (rodata_enabled) { |
1037 | /* | ||
1038 | * load_module() results in W+X mappings, which are cleaned up | ||
1039 | * with call_rcu_sched(). Let's make sure that queued work is | ||
1040 | * flushed so that we don't hit false positives looking for | ||
1041 | * insecure pages which are W+X. | ||
1042 | */ | ||
1043 | rcu_barrier_sched(); | ||
1037 | mark_rodata_ro(); | 1044 | mark_rodata_ro(); |
1038 | rodata_test(); | 1045 | rodata_test(); |
1039 | } else | 1046 | } else |
diff --git a/kernel/module.c b/kernel/module.c index ce8066b88178..c9bea7f2b43e 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -3517,6 +3517,11 @@ static noinline int do_init_module(struct module *mod) | |||
3517 | * walking this with preempt disabled. In all the failure paths, we | 3517 | * walking this with preempt disabled. In all the failure paths, we |
3518 | * call synchronize_sched(), but we don't want to slow down the success | 3518 | * call synchronize_sched(), but we don't want to slow down the success |
3519 | * path, so use actual RCU here. | 3519 | * path, so use actual RCU here. |
3520 | * Note that module_alloc() on most architectures creates W+X page | ||
3521 | * mappings which won't be cleaned up until do_free_init() runs. Any | ||
3522 | * code such as mark_rodata_ro() which depends on those mappings to | ||
3523 | * be cleaned up needs to sync with the queued work - ie | ||
3524 | * rcu_barrier_sched() | ||
3520 | */ | 3525 | */ |
3521 | call_rcu_sched(&freeinit->rcu, do_free_init); | 3526 | call_rcu_sched(&freeinit->rcu, do_free_init); |
3522 | mutex_unlock(&module_mutex); | 3527 | mutex_unlock(&module_mutex); |
diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c index 5985a25e6cbc..5367ffa5c18f 100644 --- a/lib/find_bit_benchmark.c +++ b/lib/find_bit_benchmark.c | |||
@@ -132,7 +132,12 @@ static int __init find_bit_test(void) | |||
132 | test_find_next_bit(bitmap, BITMAP_LEN); | 132 | test_find_next_bit(bitmap, BITMAP_LEN); |
133 | test_find_next_zero_bit(bitmap, BITMAP_LEN); | 133 | test_find_next_zero_bit(bitmap, BITMAP_LEN); |
134 | test_find_last_bit(bitmap, BITMAP_LEN); | 134 | test_find_last_bit(bitmap, BITMAP_LEN); |
135 | test_find_first_bit(bitmap, BITMAP_LEN); | 135 | |
136 | /* | ||
137 | * test_find_first_bit() may take some time, so | ||
138 | * traverse only part of bitmap to avoid soft lockup. | ||
139 | */ | ||
140 | test_find_first_bit(bitmap, BITMAP_LEN / 10); | ||
136 | test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN); | 141 | test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN); |
137 | 142 | ||
138 | pr_err("\nStart testing find_bit() with sparse bitmap\n"); | 143 | pr_err("\nStart testing find_bit() with sparse bitmap\n"); |
diff --git a/mm/migrate.c b/mm/migrate.c index 568433023831..8c0af0f7cab1 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -528,14 +528,12 @@ int migrate_page_move_mapping(struct address_space *mapping, | |||
528 | int i; | 528 | int i; |
529 | int index = page_index(page); | 529 | int index = page_index(page); |
530 | 530 | ||
531 | for (i = 0; i < HPAGE_PMD_NR; i++) { | 531 | for (i = 1; i < HPAGE_PMD_NR; i++) { |
532 | pslot = radix_tree_lookup_slot(&mapping->i_pages, | 532 | pslot = radix_tree_lookup_slot(&mapping->i_pages, |
533 | index + i); | 533 | index + i); |
534 | radix_tree_replace_slot(&mapping->i_pages, pslot, | 534 | radix_tree_replace_slot(&mapping->i_pages, pslot, |
535 | newpage + i); | 535 | newpage + i); |
536 | } | 536 | } |
537 | } else { | ||
538 | radix_tree_replace_slot(&mapping->i_pages, pslot, newpage); | ||
539 | } | 537 | } |
540 | 538 | ||
541 | /* | 539 | /* |
@@ -3056,6 +3056,32 @@ void exit_mmap(struct mm_struct *mm) | |||
3056 | /* mm's last user has gone, and its about to be pulled down */ | 3056 | /* mm's last user has gone, and its about to be pulled down */ |
3057 | mmu_notifier_release(mm); | 3057 | mmu_notifier_release(mm); |
3058 | 3058 | ||
3059 | if (unlikely(mm_is_oom_victim(mm))) { | ||
3060 | /* | ||
3061 | * Manually reap the mm to free as much memory as possible. | ||
3062 | * Then, as the oom reaper does, set MMF_OOM_SKIP to disregard | ||
3063 | * this mm from further consideration. Taking mm->mmap_sem for | ||
3064 | * write after setting MMF_OOM_SKIP will guarantee that the oom | ||
3065 | * reaper will not run on this mm again after mmap_sem is | ||
3066 | * dropped. | ||
3067 | * | ||
3068 | * Nothing can be holding mm->mmap_sem here and the above call | ||
3069 | * to mmu_notifier_release(mm) ensures mmu notifier callbacks in | ||
3070 | * __oom_reap_task_mm() will not block. | ||
3071 | * | ||
3072 | * This needs to be done before calling munlock_vma_pages_all(), | ||
3073 | * which clears VM_LOCKED, otherwise the oom reaper cannot | ||
3074 | * reliably test it. | ||
3075 | */ | ||
3076 | mutex_lock(&oom_lock); | ||
3077 | __oom_reap_task_mm(mm); | ||
3078 | mutex_unlock(&oom_lock); | ||
3079 | |||
3080 | set_bit(MMF_OOM_SKIP, &mm->flags); | ||
3081 | down_write(&mm->mmap_sem); | ||
3082 | up_write(&mm->mmap_sem); | ||
3083 | } | ||
3084 | |||
3059 | if (mm->locked_vm) { | 3085 | if (mm->locked_vm) { |
3060 | vma = mm->mmap; | 3086 | vma = mm->mmap; |
3061 | while (vma) { | 3087 | while (vma) { |
@@ -3077,24 +3103,6 @@ void exit_mmap(struct mm_struct *mm) | |||
3077 | /* update_hiwater_rss(mm) here? but nobody should be looking */ | 3103 | /* update_hiwater_rss(mm) here? but nobody should be looking */ |
3078 | /* Use -1 here to ensure all VMAs in the mm are unmapped */ | 3104 | /* Use -1 here to ensure all VMAs in the mm are unmapped */ |
3079 | unmap_vmas(&tlb, vma, 0, -1); | 3105 | unmap_vmas(&tlb, vma, 0, -1); |
3080 | |||
3081 | if (unlikely(mm_is_oom_victim(mm))) { | ||
3082 | /* | ||
3083 | * Wait for oom_reap_task() to stop working on this | ||
3084 | * mm. Because MMF_OOM_SKIP is already set before | ||
3085 | * calling down_read(), oom_reap_task() will not run | ||
3086 | * on this "mm" post up_write(). | ||
3087 | * | ||
3088 | * mm_is_oom_victim() cannot be set from under us | ||
3089 | * either because victim->mm is already set to NULL | ||
3090 | * under task_lock before calling mmput and oom_mm is | ||
3091 | * set not NULL by the OOM killer only if victim->mm | ||
3092 | * is found not NULL while holding the task_lock. | ||
3093 | */ | ||
3094 | set_bit(MMF_OOM_SKIP, &mm->flags); | ||
3095 | down_write(&mm->mmap_sem); | ||
3096 | up_write(&mm->mmap_sem); | ||
3097 | } | ||
3098 | free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); | 3106 | free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); |
3099 | tlb_finish_mmu(&tlb, 0, -1); | 3107 | tlb_finish_mmu(&tlb, 0, -1); |
3100 | 3108 | ||
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index ff992fa8760a..8ba6cb88cf58 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -469,7 +469,6 @@ bool process_shares_mm(struct task_struct *p, struct mm_struct *mm) | |||
469 | return false; | 469 | return false; |
470 | } | 470 | } |
471 | 471 | ||
472 | |||
473 | #ifdef CONFIG_MMU | 472 | #ifdef CONFIG_MMU |
474 | /* | 473 | /* |
475 | * OOM Reaper kernel thread which tries to reap the memory used by the OOM | 474 | * OOM Reaper kernel thread which tries to reap the memory used by the OOM |
@@ -480,16 +479,54 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait); | |||
480 | static struct task_struct *oom_reaper_list; | 479 | static struct task_struct *oom_reaper_list; |
481 | static DEFINE_SPINLOCK(oom_reaper_lock); | 480 | static DEFINE_SPINLOCK(oom_reaper_lock); |
482 | 481 | ||
483 | static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) | 482 | void __oom_reap_task_mm(struct mm_struct *mm) |
484 | { | 483 | { |
485 | struct mmu_gather tlb; | ||
486 | struct vm_area_struct *vma; | 484 | struct vm_area_struct *vma; |
485 | |||
486 | /* | ||
487 | * Tell all users of get_user/copy_from_user etc... that the content | ||
488 | * is no longer stable. No barriers really needed because unmapping | ||
489 | * should imply barriers already and the reader would hit a page fault | ||
490 | * if it stumbled over a reaped memory. | ||
491 | */ | ||
492 | set_bit(MMF_UNSTABLE, &mm->flags); | ||
493 | |||
494 | for (vma = mm->mmap ; vma; vma = vma->vm_next) { | ||
495 | if (!can_madv_dontneed_vma(vma)) | ||
496 | continue; | ||
497 | |||
498 | /* | ||
499 | * Only anonymous pages have a good chance to be dropped | ||
500 | * without additional steps which we cannot afford as we | ||
501 | * are OOM already. | ||
502 | * | ||
503 | * We do not even care about fs backed pages because all | ||
504 | * which are reclaimable have already been reclaimed and | ||
505 | * we do not want to block exit_mmap by keeping mm ref | ||
506 | * count elevated without a good reason. | ||
507 | */ | ||
508 | if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) { | ||
509 | const unsigned long start = vma->vm_start; | ||
510 | const unsigned long end = vma->vm_end; | ||
511 | struct mmu_gather tlb; | ||
512 | |||
513 | tlb_gather_mmu(&tlb, mm, start, end); | ||
514 | mmu_notifier_invalidate_range_start(mm, start, end); | ||
515 | unmap_page_range(&tlb, vma, start, end, NULL); | ||
516 | mmu_notifier_invalidate_range_end(mm, start, end); | ||
517 | tlb_finish_mmu(&tlb, start, end); | ||
518 | } | ||
519 | } | ||
520 | } | ||
521 | |||
522 | static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) | ||
523 | { | ||
487 | bool ret = true; | 524 | bool ret = true; |
488 | 525 | ||
489 | /* | 526 | /* |
490 | * We have to make sure to not race with the victim exit path | 527 | * We have to make sure to not race with the victim exit path |
491 | * and cause premature new oom victim selection: | 528 | * and cause premature new oom victim selection: |
492 | * __oom_reap_task_mm exit_mm | 529 | * oom_reap_task_mm exit_mm |
493 | * mmget_not_zero | 530 | * mmget_not_zero |
494 | * mmput | 531 | * mmput |
495 | * atomic_dec_and_test | 532 | * atomic_dec_and_test |
@@ -534,39 +571,8 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) | |||
534 | 571 | ||
535 | trace_start_task_reaping(tsk->pid); | 572 | trace_start_task_reaping(tsk->pid); |
536 | 573 | ||
537 | /* | 574 | __oom_reap_task_mm(mm); |
538 | * Tell all users of get_user/copy_from_user etc... that the content | ||
539 | * is no longer stable. No barriers really needed because unmapping | ||
540 | * should imply barriers already and the reader would hit a page fault | ||
541 | * if it stumbled over a reaped memory. | ||
542 | */ | ||
543 | set_bit(MMF_UNSTABLE, &mm->flags); | ||
544 | |||
545 | for (vma = mm->mmap ; vma; vma = vma->vm_next) { | ||
546 | if (!can_madv_dontneed_vma(vma)) | ||
547 | continue; | ||
548 | 575 | ||
549 | /* | ||
550 | * Only anonymous pages have a good chance to be dropped | ||
551 | * without additional steps which we cannot afford as we | ||
552 | * are OOM already. | ||
553 | * | ||
554 | * We do not even care about fs backed pages because all | ||
555 | * which are reclaimable have already been reclaimed and | ||
556 | * we do not want to block exit_mmap by keeping mm ref | ||
557 | * count elevated without a good reason. | ||
558 | */ | ||
559 | if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) { | ||
560 | const unsigned long start = vma->vm_start; | ||
561 | const unsigned long end = vma->vm_end; | ||
562 | |||
563 | tlb_gather_mmu(&tlb, mm, start, end); | ||
564 | mmu_notifier_invalidate_range_start(mm, start, end); | ||
565 | unmap_page_range(&tlb, vma, start, end, NULL); | ||
566 | mmu_notifier_invalidate_range_end(mm, start, end); | ||
567 | tlb_finish_mmu(&tlb, start, end); | ||
568 | } | ||
569 | } | ||
570 | pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", | 576 | pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", |
571 | task_pid_nr(tsk), tsk->comm, | 577 | task_pid_nr(tsk), tsk->comm, |
572 | K(get_mm_counter(mm, MM_ANONPAGES)), | 578 | K(get_mm_counter(mm, MM_ANONPAGES)), |
@@ -587,14 +593,13 @@ static void oom_reap_task(struct task_struct *tsk) | |||
587 | struct mm_struct *mm = tsk->signal->oom_mm; | 593 | struct mm_struct *mm = tsk->signal->oom_mm; |
588 | 594 | ||
589 | /* Retry the down_read_trylock(mmap_sem) a few times */ | 595 | /* Retry the down_read_trylock(mmap_sem) a few times */ |
590 | while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task_mm(tsk, mm)) | 596 | while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm)) |
591 | schedule_timeout_idle(HZ/10); | 597 | schedule_timeout_idle(HZ/10); |
592 | 598 | ||
593 | if (attempts <= MAX_OOM_REAP_RETRIES || | 599 | if (attempts <= MAX_OOM_REAP_RETRIES || |
594 | test_bit(MMF_OOM_SKIP, &mm->flags)) | 600 | test_bit(MMF_OOM_SKIP, &mm->flags)) |
595 | goto done; | 601 | goto done; |
596 | 602 | ||
597 | |||
598 | pr_info("oom_reaper: unable to reap pid:%d (%s)\n", | 603 | pr_info("oom_reaper: unable to reap pid:%d (%s)\n", |
599 | task_pid_nr(tsk), tsk->comm); | 604 | task_pid_nr(tsk), tsk->comm); |
600 | debug_show_all_locks(); | 605 | debug_show_all_locks(); |
diff --git a/mm/sparse.c b/mm/sparse.c index 62eef264a7bd..73dc2fcc0eab 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
@@ -629,7 +629,7 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn) | |||
629 | unsigned long pfn; | 629 | unsigned long pfn; |
630 | 630 | ||
631 | for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { | 631 | for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { |
632 | unsigned long section_nr = pfn_to_section_nr(start_pfn); | 632 | unsigned long section_nr = pfn_to_section_nr(pfn); |
633 | struct mem_section *ms; | 633 | struct mem_section *ms; |
634 | 634 | ||
635 | /* | 635 | /* |
diff --git a/mm/vmstat.c b/mm/vmstat.c index 536332e988b8..a2b9518980ce 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -1161,7 +1161,7 @@ const char * const vmstat_text[] = { | |||
1161 | "nr_vmscan_immediate_reclaim", | 1161 | "nr_vmscan_immediate_reclaim", |
1162 | "nr_dirtied", | 1162 | "nr_dirtied", |
1163 | "nr_written", | 1163 | "nr_written", |
1164 | "nr_indirectly_reclaimable", | 1164 | "", /* nr_indirectly_reclaimable */ |
1165 | 1165 | ||
1166 | /* enum writeback_stat_item counters */ | 1166 | /* enum writeback_stat_item counters */ |
1167 | "nr_dirty_threshold", | 1167 | "nr_dirty_threshold", |
@@ -1740,6 +1740,10 @@ static int vmstat_show(struct seq_file *m, void *arg) | |||
1740 | unsigned long *l = arg; | 1740 | unsigned long *l = arg; |
1741 | unsigned long off = l - (unsigned long *)m->private; | 1741 | unsigned long off = l - (unsigned long *)m->private; |
1742 | 1742 | ||
1743 | /* Skip hidden vmstat items. */ | ||
1744 | if (*vmstat_text[off] == '\0') | ||
1745 | return 0; | ||
1746 | |||
1743 | seq_puts(m, vmstat_text[off]); | 1747 | seq_puts(m, vmstat_text[off]); |
1744 | seq_put_decimal_ull(m, " ", *l); | 1748 | seq_put_decimal_ull(m, " ", *l); |
1745 | seq_putc(m, '\n'); | 1749 | seq_putc(m, '\n'); |
diff --git a/mm/z3fold.c b/mm/z3fold.c index c0bca6153b95..4b366d181f35 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c | |||
@@ -144,7 +144,8 @@ enum z3fold_page_flags { | |||
144 | PAGE_HEADLESS = 0, | 144 | PAGE_HEADLESS = 0, |
145 | MIDDLE_CHUNK_MAPPED, | 145 | MIDDLE_CHUNK_MAPPED, |
146 | NEEDS_COMPACTING, | 146 | NEEDS_COMPACTING, |
147 | PAGE_STALE | 147 | PAGE_STALE, |
148 | UNDER_RECLAIM | ||
148 | }; | 149 | }; |
149 | 150 | ||
150 | /***************** | 151 | /***************** |
@@ -173,6 +174,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page, | |||
173 | clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); | 174 | clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); |
174 | clear_bit(NEEDS_COMPACTING, &page->private); | 175 | clear_bit(NEEDS_COMPACTING, &page->private); |
175 | clear_bit(PAGE_STALE, &page->private); | 176 | clear_bit(PAGE_STALE, &page->private); |
177 | clear_bit(UNDER_RECLAIM, &page->private); | ||
176 | 178 | ||
177 | spin_lock_init(&zhdr->page_lock); | 179 | spin_lock_init(&zhdr->page_lock); |
178 | kref_init(&zhdr->refcount); | 180 | kref_init(&zhdr->refcount); |
@@ -756,6 +758,10 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) | |||
756 | atomic64_dec(&pool->pages_nr); | 758 | atomic64_dec(&pool->pages_nr); |
757 | return; | 759 | return; |
758 | } | 760 | } |
761 | if (test_bit(UNDER_RECLAIM, &page->private)) { | ||
762 | z3fold_page_unlock(zhdr); | ||
763 | return; | ||
764 | } | ||
759 | if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) { | 765 | if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) { |
760 | z3fold_page_unlock(zhdr); | 766 | z3fold_page_unlock(zhdr); |
761 | return; | 767 | return; |
@@ -840,6 +846,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) | |||
840 | kref_get(&zhdr->refcount); | 846 | kref_get(&zhdr->refcount); |
841 | list_del_init(&zhdr->buddy); | 847 | list_del_init(&zhdr->buddy); |
842 | zhdr->cpu = -1; | 848 | zhdr->cpu = -1; |
849 | set_bit(UNDER_RECLAIM, &page->private); | ||
850 | break; | ||
843 | } | 851 | } |
844 | 852 | ||
845 | list_del_init(&page->lru); | 853 | list_del_init(&page->lru); |
@@ -887,25 +895,35 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) | |||
887 | goto next; | 895 | goto next; |
888 | } | 896 | } |
889 | next: | 897 | next: |
890 | spin_lock(&pool->lock); | ||
891 | if (test_bit(PAGE_HEADLESS, &page->private)) { | 898 | if (test_bit(PAGE_HEADLESS, &page->private)) { |
892 | if (ret == 0) { | 899 | if (ret == 0) { |
893 | spin_unlock(&pool->lock); | ||
894 | free_z3fold_page(page); | 900 | free_z3fold_page(page); |
895 | return 0; | 901 | return 0; |
896 | } | 902 | } |
897 | } else if (kref_put(&zhdr->refcount, release_z3fold_page)) { | 903 | spin_lock(&pool->lock); |
898 | atomic64_dec(&pool->pages_nr); | 904 | list_add(&page->lru, &pool->lru); |
905 | spin_unlock(&pool->lock); | ||
906 | } else { | ||
907 | z3fold_page_lock(zhdr); | ||
908 | clear_bit(UNDER_RECLAIM, &page->private); | ||
909 | if (kref_put(&zhdr->refcount, | ||
910 | release_z3fold_page_locked)) { | ||
911 | atomic64_dec(&pool->pages_nr); | ||
912 | return 0; | ||
913 | } | ||
914 | /* | ||
915 | * if we are here, the page is still not completely | ||
916 | * free. Take the global pool lock then to be able | ||
917 | * to add it back to the lru list | ||
918 | */ | ||
919 | spin_lock(&pool->lock); | ||
920 | list_add(&page->lru, &pool->lru); | ||
899 | spin_unlock(&pool->lock); | 921 | spin_unlock(&pool->lock); |
900 | return 0; | 922 | z3fold_page_unlock(zhdr); |
901 | } | 923 | } |
902 | 924 | ||
903 | /* | 925 | /* We started off locked to we need to lock the pool back */ |
904 | * Add to the beginning of LRU. | 926 | spin_lock(&pool->lock); |
905 | * Pool lock has to be kept here to ensure the page has | ||
906 | * not already been released | ||
907 | */ | ||
908 | list_add(&page->lru, &pool->lru); | ||
909 | } | 927 | } |
910 | spin_unlock(&pool->lock); | 928 | spin_unlock(&pool->lock); |
911 | return -EAGAIN; | 929 | return -EAGAIN; |
diff --git a/scripts/faddr2line b/scripts/faddr2line index 9e5735a4d3a5..1876a741087c 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line | |||
@@ -170,7 +170,10 @@ __faddr2line() { | |||
170 | echo "$file_lines" | while read -r line | 170 | echo "$file_lines" | while read -r line |
171 | do | 171 | do |
172 | echo $line | 172 | echo $line |
173 | eval $(echo $line | awk -F "[ :]" '{printf("n1=%d;n2=%d;f=%s",$NF-5, $NF+5, $(NF-1))}') | 173 | n=$(echo $line | sed 's/.*:\([0-9]\+\).*/\1/g') |
174 | n1=$[$n-5] | ||
175 | n2=$[$n+5] | ||
176 | f=$(echo $line | sed 's/.*at \(.\+\):.*/\1/g') | ||
174 | awk 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") {printf("%d\t%s\n", NR, $0)}' $f | 177 | awk 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") {printf("%d\t%s\n", NR, $0)}' $f |
175 | done | 178 | done |
176 | 179 | ||