diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-05-11 21:04:12 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-05-11 21:04:12 -0400 |
| commit | f0ab773f5c96c29a5227234c4b5a820f5591b74d (patch) | |
| tree | a98d6190989c2190e522b8d1966727125bcbcbb3 | |
| parent | 4bc871984f7cb5b2dec3ae64b570cb02f9ce2227 (diff) | |
| parent | 2075b16e32c26e4031b9fd3cbe26c54676a8fcb5 (diff) | |
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton:
"13 fixes"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
rbtree: include rcu.h
scripts/faddr2line: fix error when addr2line output contains discriminator
ocfs2: take inode cluster lock before moving reflinked inode from orphan dir
mm, oom: fix concurrent munlock and oom reaper unmap, v3
mm: migrate: fix double call of radix_tree_replace_slot()
proc/kcore: don't bounds check against address 0
mm: don't show nr_indirectly_reclaimable in /proc/vmstat
mm: sections are not offlined during memory hotremove
z3fold: fix reclaim lock-ups
init: fix false positives in W+X checking
lib/find_bit_benchmark.c: avoid soft lockup in test_find_first_bit()
KASAN: prohibit KASAN+STRUCTLEAK combination
MAINTAINERS: update Shuah's email address
| -rw-r--r-- | MAINTAINERS | 3 | ||||
| -rw-r--r-- | arch/Kconfig | 4 | ||||
| -rw-r--r-- | fs/ocfs2/refcounttree.c | 14 | ||||
| -rw-r--r-- | fs/proc/kcore.c | 23 | ||||
| -rw-r--r-- | include/linux/oom.h | 2 | ||||
| -rw-r--r-- | include/linux/rbtree_augmented.h | 1 | ||||
| -rw-r--r-- | include/linux/rbtree_latch.h | 1 | ||||
| -rw-r--r-- | init/main.c | 7 | ||||
| -rw-r--r-- | kernel/module.c | 5 | ||||
| -rw-r--r-- | lib/find_bit_benchmark.c | 7 | ||||
| -rw-r--r-- | mm/migrate.c | 4 | ||||
| -rw-r--r-- | mm/mmap.c | 44 | ||||
| -rw-r--r-- | mm/oom_kill.c | 81 | ||||
| -rw-r--r-- | mm/sparse.c | 2 | ||||
| -rw-r--r-- | mm/vmstat.c | 6 | ||||
| -rw-r--r-- | mm/z3fold.c | 42 | ||||
| -rwxr-xr-x | scripts/faddr2line | 5 |
17 files changed, 164 insertions, 87 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index f913c80c8c38..58b9861ccf99 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
| @@ -3691,7 +3691,6 @@ F: drivers/cpufreq/arm_big_little_dt.c | |||
| 3691 | 3691 | ||
| 3692 | CPU POWER MONITORING SUBSYSTEM | 3692 | CPU POWER MONITORING SUBSYSTEM |
| 3693 | M: Thomas Renninger <trenn@suse.com> | 3693 | M: Thomas Renninger <trenn@suse.com> |
| 3694 | M: Shuah Khan <shuahkh@osg.samsung.com> | ||
| 3695 | M: Shuah Khan <shuah@kernel.org> | 3694 | M: Shuah Khan <shuah@kernel.org> |
| 3696 | L: linux-pm@vger.kernel.org | 3695 | L: linux-pm@vger.kernel.org |
| 3697 | S: Maintained | 3696 | S: Maintained |
| @@ -7696,7 +7695,6 @@ F: include/linux/sunrpc/ | |||
| 7696 | F: include/uapi/linux/sunrpc/ | 7695 | F: include/uapi/linux/sunrpc/ |
| 7697 | 7696 | ||
| 7698 | KERNEL SELFTEST FRAMEWORK | 7697 | KERNEL SELFTEST FRAMEWORK |
| 7699 | M: Shuah Khan <shuahkh@osg.samsung.com> | ||
| 7700 | M: Shuah Khan <shuah@kernel.org> | 7698 | M: Shuah Khan <shuah@kernel.org> |
| 7701 | L: linux-kselftest@vger.kernel.org | 7699 | L: linux-kselftest@vger.kernel.org |
| 7702 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git | 7700 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git |
| @@ -14650,7 +14648,6 @@ F: drivers/usb/common/usb-otg-fsm.c | |||
| 14650 | 14648 | ||
| 14651 | USB OVER IP DRIVER | 14649 | USB OVER IP DRIVER |
| 14652 | M: Valentina Manea <valentina.manea.m@gmail.com> | 14650 | M: Valentina Manea <valentina.manea.m@gmail.com> |
| 14653 | M: Shuah Khan <shuahkh@osg.samsung.com> | ||
| 14654 | M: Shuah Khan <shuah@kernel.org> | 14651 | M: Shuah Khan <shuah@kernel.org> |
| 14655 | L: linux-usb@vger.kernel.org | 14652 | L: linux-usb@vger.kernel.org |
| 14656 | S: Maintained | 14653 | S: Maintained |
diff --git a/arch/Kconfig b/arch/Kconfig index 8e0d665c8d53..75dd23acf133 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
| @@ -464,6 +464,10 @@ config GCC_PLUGIN_LATENT_ENTROPY | |||
| 464 | config GCC_PLUGIN_STRUCTLEAK | 464 | config GCC_PLUGIN_STRUCTLEAK |
| 465 | bool "Force initialization of variables containing userspace addresses" | 465 | bool "Force initialization of variables containing userspace addresses" |
| 466 | depends on GCC_PLUGINS | 466 | depends on GCC_PLUGINS |
| 467 | # Currently STRUCTLEAK inserts initialization out of live scope of | ||
| 468 | # variables from KASAN point of view. This leads to KASAN false | ||
| 469 | # positive reports. Prohibit this combination for now. | ||
| 470 | depends on !KASAN_EXTRA | ||
| 467 | help | 471 | help |
| 468 | This plugin zero-initializes any structures containing a | 472 | This plugin zero-initializes any structures containing a |
| 469 | __user attribute. This can prevent some classes of information | 473 | __user attribute. This can prevent some classes of information |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 01c6b3894406..7869622af22a 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
| @@ -4250,10 +4250,11 @@ out: | |||
| 4250 | static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, | 4250 | static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, |
| 4251 | struct dentry *new_dentry, bool preserve) | 4251 | struct dentry *new_dentry, bool preserve) |
| 4252 | { | 4252 | { |
| 4253 | int error; | 4253 | int error, had_lock; |
| 4254 | struct inode *inode = d_inode(old_dentry); | 4254 | struct inode *inode = d_inode(old_dentry); |
| 4255 | struct buffer_head *old_bh = NULL; | 4255 | struct buffer_head *old_bh = NULL; |
| 4256 | struct inode *new_orphan_inode = NULL; | 4256 | struct inode *new_orphan_inode = NULL; |
| 4257 | struct ocfs2_lock_holder oh; | ||
| 4257 | 4258 | ||
| 4258 | if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) | 4259 | if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) |
| 4259 | return -EOPNOTSUPP; | 4260 | return -EOPNOTSUPP; |
| @@ -4295,6 +4296,14 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, | |||
| 4295 | goto out; | 4296 | goto out; |
| 4296 | } | 4297 | } |
| 4297 | 4298 | ||
| 4299 | had_lock = ocfs2_inode_lock_tracker(new_orphan_inode, NULL, 1, | ||
| 4300 | &oh); | ||
| 4301 | if (had_lock < 0) { | ||
| 4302 | error = had_lock; | ||
| 4303 | mlog_errno(error); | ||
| 4304 | goto out; | ||
| 4305 | } | ||
| 4306 | |||
| 4298 | /* If the security isn't preserved, we need to re-initialize them. */ | 4307 | /* If the security isn't preserved, we need to re-initialize them. */ |
| 4299 | if (!preserve) { | 4308 | if (!preserve) { |
| 4300 | error = ocfs2_init_security_and_acl(dir, new_orphan_inode, | 4309 | error = ocfs2_init_security_and_acl(dir, new_orphan_inode, |
| @@ -4302,14 +4311,15 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, | |||
| 4302 | if (error) | 4311 | if (error) |
| 4303 | mlog_errno(error); | 4312 | mlog_errno(error); |
| 4304 | } | 4313 | } |
| 4305 | out: | ||
| 4306 | if (!error) { | 4314 | if (!error) { |
| 4307 | error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode, | 4315 | error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode, |
| 4308 | new_dentry); | 4316 | new_dentry); |
| 4309 | if (error) | 4317 | if (error) |
| 4310 | mlog_errno(error); | 4318 | mlog_errno(error); |
| 4311 | } | 4319 | } |
| 4320 | ocfs2_inode_unlock_tracker(new_orphan_inode, 1, &oh, had_lock); | ||
| 4312 | 4321 | ||
| 4322 | out: | ||
| 4313 | if (new_orphan_inode) { | 4323 | if (new_orphan_inode) { |
| 4314 | /* | 4324 | /* |
| 4315 | * We need to open_unlock the inode no matter whether we | 4325 | * We need to open_unlock the inode no matter whether we |
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index d1e82761de81..e64ecb9f2720 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c | |||
| @@ -209,25 +209,34 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg) | |||
| 209 | { | 209 | { |
| 210 | struct list_head *head = (struct list_head *)arg; | 210 | struct list_head *head = (struct list_head *)arg; |
| 211 | struct kcore_list *ent; | 211 | struct kcore_list *ent; |
| 212 | struct page *p; | ||
| 213 | |||
| 214 | if (!pfn_valid(pfn)) | ||
| 215 | return 1; | ||
| 216 | |||
| 217 | p = pfn_to_page(pfn); | ||
| 218 | if (!memmap_valid_within(pfn, p, page_zone(p))) | ||
| 219 | return 1; | ||
| 212 | 220 | ||
| 213 | ent = kmalloc(sizeof(*ent), GFP_KERNEL); | 221 | ent = kmalloc(sizeof(*ent), GFP_KERNEL); |
| 214 | if (!ent) | 222 | if (!ent) |
| 215 | return -ENOMEM; | 223 | return -ENOMEM; |
| 216 | ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT)); | 224 | ent->addr = (unsigned long)page_to_virt(p); |
| 217 | ent->size = nr_pages << PAGE_SHIFT; | 225 | ent->size = nr_pages << PAGE_SHIFT; |
| 218 | 226 | ||
| 219 | /* Sanity check: Can happen in 32bit arch...maybe */ | 227 | if (!virt_addr_valid(ent->addr)) |
| 220 | if (ent->addr < (unsigned long) __va(0)) | ||
| 221 | goto free_out; | 228 | goto free_out; |
| 222 | 229 | ||
| 223 | /* cut not-mapped area. ....from ppc-32 code. */ | 230 | /* cut not-mapped area. ....from ppc-32 code. */ |
| 224 | if (ULONG_MAX - ent->addr < ent->size) | 231 | if (ULONG_MAX - ent->addr < ent->size) |
| 225 | ent->size = ULONG_MAX - ent->addr; | 232 | ent->size = ULONG_MAX - ent->addr; |
| 226 | 233 | ||
| 227 | /* cut when vmalloc() area is higher than direct-map area */ | 234 | /* |
| 228 | if (VMALLOC_START > (unsigned long)__va(0)) { | 235 | * We've already checked virt_addr_valid so we know this address |
| 229 | if (ent->addr > VMALLOC_START) | 236 | * is a valid pointer, therefore we can check against it to determine |
| 230 | goto free_out; | 237 | * if we need to trim |
| 238 | */ | ||
| 239 | if (VMALLOC_START > ent->addr) { | ||
| 231 | if (VMALLOC_START - ent->addr < ent->size) | 240 | if (VMALLOC_START - ent->addr < ent->size) |
| 232 | ent->size = VMALLOC_START - ent->addr; | 241 | ent->size = VMALLOC_START - ent->addr; |
| 233 | } | 242 | } |
diff --git a/include/linux/oom.h b/include/linux/oom.h index 5bad038ac012..6adac113e96d 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h | |||
| @@ -95,6 +95,8 @@ static inline int check_stable_address_space(struct mm_struct *mm) | |||
| 95 | return 0; | 95 | return 0; |
| 96 | } | 96 | } |
| 97 | 97 | ||
| 98 | void __oom_reap_task_mm(struct mm_struct *mm); | ||
| 99 | |||
| 98 | extern unsigned long oom_badness(struct task_struct *p, | 100 | extern unsigned long oom_badness(struct task_struct *p, |
| 99 | struct mem_cgroup *memcg, const nodemask_t *nodemask, | 101 | struct mem_cgroup *memcg, const nodemask_t *nodemask, |
| 100 | unsigned long totalpages); | 102 | unsigned long totalpages); |
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index 6bfd2b581f75..af8a61be2d8d 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | 26 | ||
| 27 | #include <linux/compiler.h> | 27 | #include <linux/compiler.h> |
| 28 | #include <linux/rbtree.h> | 28 | #include <linux/rbtree.h> |
| 29 | #include <linux/rcupdate.h> | ||
| 29 | 30 | ||
| 30 | /* | 31 | /* |
| 31 | * Please note - only struct rb_augment_callbacks and the prototypes for | 32 | * Please note - only struct rb_augment_callbacks and the prototypes for |
diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h index ece43e882b56..7d012faa509a 100644 --- a/include/linux/rbtree_latch.h +++ b/include/linux/rbtree_latch.h | |||
| @@ -35,6 +35,7 @@ | |||
| 35 | 35 | ||
| 36 | #include <linux/rbtree.h> | 36 | #include <linux/rbtree.h> |
| 37 | #include <linux/seqlock.h> | 37 | #include <linux/seqlock.h> |
| 38 | #include <linux/rcupdate.h> | ||
| 38 | 39 | ||
| 39 | struct latch_tree_node { | 40 | struct latch_tree_node { |
| 40 | struct rb_node node[2]; | 41 | struct rb_node node[2]; |
diff --git a/init/main.c b/init/main.c index a404936d85d8..fd37315835b4 100644 --- a/init/main.c +++ b/init/main.c | |||
| @@ -1034,6 +1034,13 @@ __setup("rodata=", set_debug_rodata); | |||
| 1034 | static void mark_readonly(void) | 1034 | static void mark_readonly(void) |
| 1035 | { | 1035 | { |
| 1036 | if (rodata_enabled) { | 1036 | if (rodata_enabled) { |
| 1037 | /* | ||
| 1038 | * load_module() results in W+X mappings, which are cleaned up | ||
| 1039 | * with call_rcu_sched(). Let's make sure that queued work is | ||
| 1040 | * flushed so that we don't hit false positives looking for | ||
| 1041 | * insecure pages which are W+X. | ||
| 1042 | */ | ||
| 1043 | rcu_barrier_sched(); | ||
| 1037 | mark_rodata_ro(); | 1044 | mark_rodata_ro(); |
| 1038 | rodata_test(); | 1045 | rodata_test(); |
| 1039 | } else | 1046 | } else |
diff --git a/kernel/module.c b/kernel/module.c index ce8066b88178..c9bea7f2b43e 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -3517,6 +3517,11 @@ static noinline int do_init_module(struct module *mod) | |||
| 3517 | * walking this with preempt disabled. In all the failure paths, we | 3517 | * walking this with preempt disabled. In all the failure paths, we |
| 3518 | * call synchronize_sched(), but we don't want to slow down the success | 3518 | * call synchronize_sched(), but we don't want to slow down the success |
| 3519 | * path, so use actual RCU here. | 3519 | * path, so use actual RCU here. |
| 3520 | * Note that module_alloc() on most architectures creates W+X page | ||
| 3521 | * mappings which won't be cleaned up until do_free_init() runs. Any | ||
| 3522 | * code such as mark_rodata_ro() which depends on those mappings to | ||
| 3523 | * be cleaned up needs to sync with the queued work - ie | ||
| 3524 | * rcu_barrier_sched() | ||
| 3520 | */ | 3525 | */ |
| 3521 | call_rcu_sched(&freeinit->rcu, do_free_init); | 3526 | call_rcu_sched(&freeinit->rcu, do_free_init); |
| 3522 | mutex_unlock(&module_mutex); | 3527 | mutex_unlock(&module_mutex); |
diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c index 5985a25e6cbc..5367ffa5c18f 100644 --- a/lib/find_bit_benchmark.c +++ b/lib/find_bit_benchmark.c | |||
| @@ -132,7 +132,12 @@ static int __init find_bit_test(void) | |||
| 132 | test_find_next_bit(bitmap, BITMAP_LEN); | 132 | test_find_next_bit(bitmap, BITMAP_LEN); |
| 133 | test_find_next_zero_bit(bitmap, BITMAP_LEN); | 133 | test_find_next_zero_bit(bitmap, BITMAP_LEN); |
| 134 | test_find_last_bit(bitmap, BITMAP_LEN); | 134 | test_find_last_bit(bitmap, BITMAP_LEN); |
| 135 | test_find_first_bit(bitmap, BITMAP_LEN); | 135 | |
| 136 | /* | ||
| 137 | * test_find_first_bit() may take some time, so | ||
| 138 | * traverse only part of bitmap to avoid soft lockup. | ||
| 139 | */ | ||
| 140 | test_find_first_bit(bitmap, BITMAP_LEN / 10); | ||
| 136 | test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN); | 141 | test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN); |
| 137 | 142 | ||
| 138 | pr_err("\nStart testing find_bit() with sparse bitmap\n"); | 143 | pr_err("\nStart testing find_bit() with sparse bitmap\n"); |
diff --git a/mm/migrate.c b/mm/migrate.c index 568433023831..8c0af0f7cab1 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
| @@ -528,14 +528,12 @@ int migrate_page_move_mapping(struct address_space *mapping, | |||
| 528 | int i; | 528 | int i; |
| 529 | int index = page_index(page); | 529 | int index = page_index(page); |
| 530 | 530 | ||
| 531 | for (i = 0; i < HPAGE_PMD_NR; i++) { | 531 | for (i = 1; i < HPAGE_PMD_NR; i++) { |
| 532 | pslot = radix_tree_lookup_slot(&mapping->i_pages, | 532 | pslot = radix_tree_lookup_slot(&mapping->i_pages, |
| 533 | index + i); | 533 | index + i); |
| 534 | radix_tree_replace_slot(&mapping->i_pages, pslot, | 534 | radix_tree_replace_slot(&mapping->i_pages, pslot, |
| 535 | newpage + i); | 535 | newpage + i); |
| 536 | } | 536 | } |
| 537 | } else { | ||
| 538 | radix_tree_replace_slot(&mapping->i_pages, pslot, newpage); | ||
| 539 | } | 537 | } |
| 540 | 538 | ||
| 541 | /* | 539 | /* |
| @@ -3056,6 +3056,32 @@ void exit_mmap(struct mm_struct *mm) | |||
| 3056 | /* mm's last user has gone, and its about to be pulled down */ | 3056 | /* mm's last user has gone, and its about to be pulled down */ |
| 3057 | mmu_notifier_release(mm); | 3057 | mmu_notifier_release(mm); |
| 3058 | 3058 | ||
| 3059 | if (unlikely(mm_is_oom_victim(mm))) { | ||
| 3060 | /* | ||
| 3061 | * Manually reap the mm to free as much memory as possible. | ||
| 3062 | * Then, as the oom reaper does, set MMF_OOM_SKIP to disregard | ||
| 3063 | * this mm from further consideration. Taking mm->mmap_sem for | ||
| 3064 | * write after setting MMF_OOM_SKIP will guarantee that the oom | ||
| 3065 | * reaper will not run on this mm again after mmap_sem is | ||
| 3066 | * dropped. | ||
| 3067 | * | ||
| 3068 | * Nothing can be holding mm->mmap_sem here and the above call | ||
| 3069 | * to mmu_notifier_release(mm) ensures mmu notifier callbacks in | ||
| 3070 | * __oom_reap_task_mm() will not block. | ||
| 3071 | * | ||
| 3072 | * This needs to be done before calling munlock_vma_pages_all(), | ||
| 3073 | * which clears VM_LOCKED, otherwise the oom reaper cannot | ||
| 3074 | * reliably test it. | ||
| 3075 | */ | ||
| 3076 | mutex_lock(&oom_lock); | ||
| 3077 | __oom_reap_task_mm(mm); | ||
| 3078 | mutex_unlock(&oom_lock); | ||
| 3079 | |||
| 3080 | set_bit(MMF_OOM_SKIP, &mm->flags); | ||
| 3081 | down_write(&mm->mmap_sem); | ||
| 3082 | up_write(&mm->mmap_sem); | ||
| 3083 | } | ||
| 3084 | |||
| 3059 | if (mm->locked_vm) { | 3085 | if (mm->locked_vm) { |
| 3060 | vma = mm->mmap; | 3086 | vma = mm->mmap; |
| 3061 | while (vma) { | 3087 | while (vma) { |
| @@ -3077,24 +3103,6 @@ void exit_mmap(struct mm_struct *mm) | |||
| 3077 | /* update_hiwater_rss(mm) here? but nobody should be looking */ | 3103 | /* update_hiwater_rss(mm) here? but nobody should be looking */ |
| 3078 | /* Use -1 here to ensure all VMAs in the mm are unmapped */ | 3104 | /* Use -1 here to ensure all VMAs in the mm are unmapped */ |
| 3079 | unmap_vmas(&tlb, vma, 0, -1); | 3105 | unmap_vmas(&tlb, vma, 0, -1); |
| 3080 | |||
| 3081 | if (unlikely(mm_is_oom_victim(mm))) { | ||
| 3082 | /* | ||
| 3083 | * Wait for oom_reap_task() to stop working on this | ||
| 3084 | * mm. Because MMF_OOM_SKIP is already set before | ||
| 3085 | * calling down_read(), oom_reap_task() will not run | ||
| 3086 | * on this "mm" post up_write(). | ||
| 3087 | * | ||
| 3088 | * mm_is_oom_victim() cannot be set from under us | ||
| 3089 | * either because victim->mm is already set to NULL | ||
| 3090 | * under task_lock before calling mmput and oom_mm is | ||
| 3091 | * set not NULL by the OOM killer only if victim->mm | ||
| 3092 | * is found not NULL while holding the task_lock. | ||
| 3093 | */ | ||
| 3094 | set_bit(MMF_OOM_SKIP, &mm->flags); | ||
| 3095 | down_write(&mm->mmap_sem); | ||
| 3096 | up_write(&mm->mmap_sem); | ||
| 3097 | } | ||
| 3098 | free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); | 3106 | free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); |
| 3099 | tlb_finish_mmu(&tlb, 0, -1); | 3107 | tlb_finish_mmu(&tlb, 0, -1); |
| 3100 | 3108 | ||
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index ff992fa8760a..8ba6cb88cf58 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
| @@ -469,7 +469,6 @@ bool process_shares_mm(struct task_struct *p, struct mm_struct *mm) | |||
| 469 | return false; | 469 | return false; |
| 470 | } | 470 | } |
| 471 | 471 | ||
| 472 | |||
| 473 | #ifdef CONFIG_MMU | 472 | #ifdef CONFIG_MMU |
| 474 | /* | 473 | /* |
| 475 | * OOM Reaper kernel thread which tries to reap the memory used by the OOM | 474 | * OOM Reaper kernel thread which tries to reap the memory used by the OOM |
| @@ -480,16 +479,54 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait); | |||
| 480 | static struct task_struct *oom_reaper_list; | 479 | static struct task_struct *oom_reaper_list; |
| 481 | static DEFINE_SPINLOCK(oom_reaper_lock); | 480 | static DEFINE_SPINLOCK(oom_reaper_lock); |
| 482 | 481 | ||
| 483 | static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) | 482 | void __oom_reap_task_mm(struct mm_struct *mm) |
| 484 | { | 483 | { |
| 485 | struct mmu_gather tlb; | ||
| 486 | struct vm_area_struct *vma; | 484 | struct vm_area_struct *vma; |
| 485 | |||
| 486 | /* | ||
| 487 | * Tell all users of get_user/copy_from_user etc... that the content | ||
| 488 | * is no longer stable. No barriers really needed because unmapping | ||
| 489 | * should imply barriers already and the reader would hit a page fault | ||
| 490 | * if it stumbled over a reaped memory. | ||
| 491 | */ | ||
| 492 | set_bit(MMF_UNSTABLE, &mm->flags); | ||
| 493 | |||
| 494 | for (vma = mm->mmap ; vma; vma = vma->vm_next) { | ||
| 495 | if (!can_madv_dontneed_vma(vma)) | ||
| 496 | continue; | ||
| 497 | |||
| 498 | /* | ||
| 499 | * Only anonymous pages have a good chance to be dropped | ||
| 500 | * without additional steps which we cannot afford as we | ||
| 501 | * are OOM already. | ||
| 502 | * | ||
| 503 | * We do not even care about fs backed pages because all | ||
| 504 | * which are reclaimable have already been reclaimed and | ||
| 505 | * we do not want to block exit_mmap by keeping mm ref | ||
| 506 | * count elevated without a good reason. | ||
| 507 | */ | ||
| 508 | if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) { | ||
| 509 | const unsigned long start = vma->vm_start; | ||
| 510 | const unsigned long end = vma->vm_end; | ||
| 511 | struct mmu_gather tlb; | ||
| 512 | |||
| 513 | tlb_gather_mmu(&tlb, mm, start, end); | ||
| 514 | mmu_notifier_invalidate_range_start(mm, start, end); | ||
| 515 | unmap_page_range(&tlb, vma, start, end, NULL); | ||
| 516 | mmu_notifier_invalidate_range_end(mm, start, end); | ||
| 517 | tlb_finish_mmu(&tlb, start, end); | ||
| 518 | } | ||
| 519 | } | ||
| 520 | } | ||
| 521 | |||
| 522 | static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) | ||
| 523 | { | ||
| 487 | bool ret = true; | 524 | bool ret = true; |
| 488 | 525 | ||
| 489 | /* | 526 | /* |
| 490 | * We have to make sure to not race with the victim exit path | 527 | * We have to make sure to not race with the victim exit path |
| 491 | * and cause premature new oom victim selection: | 528 | * and cause premature new oom victim selection: |
| 492 | * __oom_reap_task_mm exit_mm | 529 | * oom_reap_task_mm exit_mm |
| 493 | * mmget_not_zero | 530 | * mmget_not_zero |
| 494 | * mmput | 531 | * mmput |
| 495 | * atomic_dec_and_test | 532 | * atomic_dec_and_test |
| @@ -534,39 +571,8 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) | |||
| 534 | 571 | ||
| 535 | trace_start_task_reaping(tsk->pid); | 572 | trace_start_task_reaping(tsk->pid); |
| 536 | 573 | ||
| 537 | /* | 574 | __oom_reap_task_mm(mm); |
| 538 | * Tell all users of get_user/copy_from_user etc... that the content | ||
| 539 | * is no longer stable. No barriers really needed because unmapping | ||
| 540 | * should imply barriers already and the reader would hit a page fault | ||
| 541 | * if it stumbled over a reaped memory. | ||
| 542 | */ | ||
| 543 | set_bit(MMF_UNSTABLE, &mm->flags); | ||
| 544 | |||
| 545 | for (vma = mm->mmap ; vma; vma = vma->vm_next) { | ||
| 546 | if (!can_madv_dontneed_vma(vma)) | ||
| 547 | continue; | ||
| 548 | 575 | ||
| 549 | /* | ||
| 550 | * Only anonymous pages have a good chance to be dropped | ||
| 551 | * without additional steps which we cannot afford as we | ||
| 552 | * are OOM already. | ||
| 553 | * | ||
| 554 | * We do not even care about fs backed pages because all | ||
| 555 | * which are reclaimable have already been reclaimed and | ||
| 556 | * we do not want to block exit_mmap by keeping mm ref | ||
| 557 | * count elevated without a good reason. | ||
| 558 | */ | ||
| 559 | if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) { | ||
| 560 | const unsigned long start = vma->vm_start; | ||
| 561 | const unsigned long end = vma->vm_end; | ||
| 562 | |||
| 563 | tlb_gather_mmu(&tlb, mm, start, end); | ||
| 564 | mmu_notifier_invalidate_range_start(mm, start, end); | ||
| 565 | unmap_page_range(&tlb, vma, start, end, NULL); | ||
| 566 | mmu_notifier_invalidate_range_end(mm, start, end); | ||
| 567 | tlb_finish_mmu(&tlb, start, end); | ||
| 568 | } | ||
| 569 | } | ||
| 570 | pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", | 576 | pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", |
| 571 | task_pid_nr(tsk), tsk->comm, | 577 | task_pid_nr(tsk), tsk->comm, |
| 572 | K(get_mm_counter(mm, MM_ANONPAGES)), | 578 | K(get_mm_counter(mm, MM_ANONPAGES)), |
| @@ -587,14 +593,13 @@ static void oom_reap_task(struct task_struct *tsk) | |||
| 587 | struct mm_struct *mm = tsk->signal->oom_mm; | 593 | struct mm_struct *mm = tsk->signal->oom_mm; |
| 588 | 594 | ||
| 589 | /* Retry the down_read_trylock(mmap_sem) a few times */ | 595 | /* Retry the down_read_trylock(mmap_sem) a few times */ |
| 590 | while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task_mm(tsk, mm)) | 596 | while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm)) |
| 591 | schedule_timeout_idle(HZ/10); | 597 | schedule_timeout_idle(HZ/10); |
| 592 | 598 | ||
| 593 | if (attempts <= MAX_OOM_REAP_RETRIES || | 599 | if (attempts <= MAX_OOM_REAP_RETRIES || |
| 594 | test_bit(MMF_OOM_SKIP, &mm->flags)) | 600 | test_bit(MMF_OOM_SKIP, &mm->flags)) |
| 595 | goto done; | 601 | goto done; |
| 596 | 602 | ||
| 597 | |||
| 598 | pr_info("oom_reaper: unable to reap pid:%d (%s)\n", | 603 | pr_info("oom_reaper: unable to reap pid:%d (%s)\n", |
| 599 | task_pid_nr(tsk), tsk->comm); | 604 | task_pid_nr(tsk), tsk->comm); |
| 600 | debug_show_all_locks(); | 605 | debug_show_all_locks(); |
diff --git a/mm/sparse.c b/mm/sparse.c index 62eef264a7bd..73dc2fcc0eab 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
| @@ -629,7 +629,7 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn) | |||
| 629 | unsigned long pfn; | 629 | unsigned long pfn; |
| 630 | 630 | ||
| 631 | for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { | 631 | for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { |
| 632 | unsigned long section_nr = pfn_to_section_nr(start_pfn); | 632 | unsigned long section_nr = pfn_to_section_nr(pfn); |
| 633 | struct mem_section *ms; | 633 | struct mem_section *ms; |
| 634 | 634 | ||
| 635 | /* | 635 | /* |
diff --git a/mm/vmstat.c b/mm/vmstat.c index 536332e988b8..a2b9518980ce 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
| @@ -1161,7 +1161,7 @@ const char * const vmstat_text[] = { | |||
| 1161 | "nr_vmscan_immediate_reclaim", | 1161 | "nr_vmscan_immediate_reclaim", |
| 1162 | "nr_dirtied", | 1162 | "nr_dirtied", |
| 1163 | "nr_written", | 1163 | "nr_written", |
| 1164 | "nr_indirectly_reclaimable", | 1164 | "", /* nr_indirectly_reclaimable */ |
| 1165 | 1165 | ||
| 1166 | /* enum writeback_stat_item counters */ | 1166 | /* enum writeback_stat_item counters */ |
| 1167 | "nr_dirty_threshold", | 1167 | "nr_dirty_threshold", |
| @@ -1740,6 +1740,10 @@ static int vmstat_show(struct seq_file *m, void *arg) | |||
| 1740 | unsigned long *l = arg; | 1740 | unsigned long *l = arg; |
| 1741 | unsigned long off = l - (unsigned long *)m->private; | 1741 | unsigned long off = l - (unsigned long *)m->private; |
| 1742 | 1742 | ||
| 1743 | /* Skip hidden vmstat items. */ | ||
| 1744 | if (*vmstat_text[off] == '\0') | ||
| 1745 | return 0; | ||
| 1746 | |||
| 1743 | seq_puts(m, vmstat_text[off]); | 1747 | seq_puts(m, vmstat_text[off]); |
| 1744 | seq_put_decimal_ull(m, " ", *l); | 1748 | seq_put_decimal_ull(m, " ", *l); |
| 1745 | seq_putc(m, '\n'); | 1749 | seq_putc(m, '\n'); |
diff --git a/mm/z3fold.c b/mm/z3fold.c index c0bca6153b95..4b366d181f35 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c | |||
| @@ -144,7 +144,8 @@ enum z3fold_page_flags { | |||
| 144 | PAGE_HEADLESS = 0, | 144 | PAGE_HEADLESS = 0, |
| 145 | MIDDLE_CHUNK_MAPPED, | 145 | MIDDLE_CHUNK_MAPPED, |
| 146 | NEEDS_COMPACTING, | 146 | NEEDS_COMPACTING, |
| 147 | PAGE_STALE | 147 | PAGE_STALE, |
| 148 | UNDER_RECLAIM | ||
| 148 | }; | 149 | }; |
| 149 | 150 | ||
| 150 | /***************** | 151 | /***************** |
| @@ -173,6 +174,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page, | |||
| 173 | clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); | 174 | clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); |
| 174 | clear_bit(NEEDS_COMPACTING, &page->private); | 175 | clear_bit(NEEDS_COMPACTING, &page->private); |
| 175 | clear_bit(PAGE_STALE, &page->private); | 176 | clear_bit(PAGE_STALE, &page->private); |
| 177 | clear_bit(UNDER_RECLAIM, &page->private); | ||
| 176 | 178 | ||
| 177 | spin_lock_init(&zhdr->page_lock); | 179 | spin_lock_init(&zhdr->page_lock); |
| 178 | kref_init(&zhdr->refcount); | 180 | kref_init(&zhdr->refcount); |
| @@ -756,6 +758,10 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) | |||
| 756 | atomic64_dec(&pool->pages_nr); | 758 | atomic64_dec(&pool->pages_nr); |
| 757 | return; | 759 | return; |
| 758 | } | 760 | } |
| 761 | if (test_bit(UNDER_RECLAIM, &page->private)) { | ||
| 762 | z3fold_page_unlock(zhdr); | ||
| 763 | return; | ||
| 764 | } | ||
| 759 | if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) { | 765 | if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) { |
| 760 | z3fold_page_unlock(zhdr); | 766 | z3fold_page_unlock(zhdr); |
| 761 | return; | 767 | return; |
| @@ -840,6 +846,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) | |||
| 840 | kref_get(&zhdr->refcount); | 846 | kref_get(&zhdr->refcount); |
| 841 | list_del_init(&zhdr->buddy); | 847 | list_del_init(&zhdr->buddy); |
| 842 | zhdr->cpu = -1; | 848 | zhdr->cpu = -1; |
| 849 | set_bit(UNDER_RECLAIM, &page->private); | ||
| 850 | break; | ||
| 843 | } | 851 | } |
| 844 | 852 | ||
| 845 | list_del_init(&page->lru); | 853 | list_del_init(&page->lru); |
| @@ -887,25 +895,35 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) | |||
| 887 | goto next; | 895 | goto next; |
| 888 | } | 896 | } |
| 889 | next: | 897 | next: |
| 890 | spin_lock(&pool->lock); | ||
| 891 | if (test_bit(PAGE_HEADLESS, &page->private)) { | 898 | if (test_bit(PAGE_HEADLESS, &page->private)) { |
| 892 | if (ret == 0) { | 899 | if (ret == 0) { |
| 893 | spin_unlock(&pool->lock); | ||
| 894 | free_z3fold_page(page); | 900 | free_z3fold_page(page); |
| 895 | return 0; | 901 | return 0; |
| 896 | } | 902 | } |
| 897 | } else if (kref_put(&zhdr->refcount, release_z3fold_page)) { | 903 | spin_lock(&pool->lock); |
| 898 | atomic64_dec(&pool->pages_nr); | 904 | list_add(&page->lru, &pool->lru); |
| 905 | spin_unlock(&pool->lock); | ||
| 906 | } else { | ||
| 907 | z3fold_page_lock(zhdr); | ||
| 908 | clear_bit(UNDER_RECLAIM, &page->private); | ||
| 909 | if (kref_put(&zhdr->refcount, | ||
| 910 | release_z3fold_page_locked)) { | ||
| 911 | atomic64_dec(&pool->pages_nr); | ||
| 912 | return 0; | ||
| 913 | } | ||
| 914 | /* | ||
| 915 | * if we are here, the page is still not completely | ||
| 916 | * free. Take the global pool lock then to be able | ||
| 917 | * to add it back to the lru list | ||
| 918 | */ | ||
| 919 | spin_lock(&pool->lock); | ||
| 920 | list_add(&page->lru, &pool->lru); | ||
| 899 | spin_unlock(&pool->lock); | 921 | spin_unlock(&pool->lock); |
| 900 | return 0; | 922 | z3fold_page_unlock(zhdr); |
| 901 | } | 923 | } |
| 902 | 924 | ||
| 903 | /* | 925 | /* We started off locked to we need to lock the pool back */ |
| 904 | * Add to the beginning of LRU. | 926 | spin_lock(&pool->lock); |
| 905 | * Pool lock has to be kept here to ensure the page has | ||
| 906 | * not already been released | ||
| 907 | */ | ||
| 908 | list_add(&page->lru, &pool->lru); | ||
| 909 | } | 927 | } |
| 910 | spin_unlock(&pool->lock); | 928 | spin_unlock(&pool->lock); |
| 911 | return -EAGAIN; | 929 | return -EAGAIN; |
diff --git a/scripts/faddr2line b/scripts/faddr2line index 9e5735a4d3a5..1876a741087c 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line | |||
| @@ -170,7 +170,10 @@ __faddr2line() { | |||
| 170 | echo "$file_lines" | while read -r line | 170 | echo "$file_lines" | while read -r line |
| 171 | do | 171 | do |
| 172 | echo $line | 172 | echo $line |
| 173 | eval $(echo $line | awk -F "[ :]" '{printf("n1=%d;n2=%d;f=%s",$NF-5, $NF+5, $(NF-1))}') | 173 | n=$(echo $line | sed 's/.*:\([0-9]\+\).*/\1/g') |
| 174 | n1=$[$n-5] | ||
| 175 | n2=$[$n+5] | ||
| 176 | f=$(echo $line | sed 's/.*at \(.\+\):.*/\1/g') | ||
| 174 | awk 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") {printf("%d\t%s\n", NR, $0)}' $f | 177 | awk 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") {printf("%d\t%s\n", NR, $0)}' $f |
| 175 | done | 178 | done |
| 176 | 179 | ||
