summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/mm.h2
-rw-r--r--mm/internal.h5
-rw-r--r--mm/memory.c17
-rw-r--r--mm/oom_kill.c151
4 files changed, 162 insertions, 13 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 450fc977ed02..ed6407d1b7b5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1132,6 +1132,8 @@ struct zap_details {
1132 struct address_space *check_mapping; /* Check page->mapping if set */ 1132 struct address_space *check_mapping; /* Check page->mapping if set */
1133 pgoff_t first_index; /* Lowest page->index to unmap */ 1133 pgoff_t first_index; /* Lowest page->index to unmap */
1134 pgoff_t last_index; /* Highest page->index to unmap */ 1134 pgoff_t last_index; /* Highest page->index to unmap */
1135 bool ignore_dirty; /* Ignore dirty pages */
1136 bool check_swap_entries; /* Check also swap entries */
1135}; 1137};
1136 1138
1137struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, 1139struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
diff --git a/mm/internal.h b/mm/internal.h
index 7449392c6faa..b79abb6721cf 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -38,6 +38,11 @@
38void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, 38void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
39 unsigned long floor, unsigned long ceiling); 39 unsigned long floor, unsigned long ceiling);
40 40
41void unmap_page_range(struct mmu_gather *tlb,
42 struct vm_area_struct *vma,
43 unsigned long addr, unsigned long end,
44 struct zap_details *details);
45
41extern int __do_page_cache_readahead(struct address_space *mapping, 46extern int __do_page_cache_readahead(struct address_space *mapping,
42 struct file *filp, pgoff_t offset, unsigned long nr_to_read, 47 struct file *filp, pgoff_t offset, unsigned long nr_to_read,
43 unsigned long lookahead_size); 48 unsigned long lookahead_size);
diff --git a/mm/memory.c b/mm/memory.c
index 81dca0083fcd..098f00d05461 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1102,6 +1102,12 @@ again:
1102 1102
1103 if (!PageAnon(page)) { 1103 if (!PageAnon(page)) {
1104 if (pte_dirty(ptent)) { 1104 if (pte_dirty(ptent)) {
1105 /*
1106 * oom_reaper cannot tear down dirty
1107 * pages
1108 */
1109 if (unlikely(details && details->ignore_dirty))
1110 continue;
1105 force_flush = 1; 1111 force_flush = 1;
1106 set_page_dirty(page); 1112 set_page_dirty(page);
1107 } 1113 }
@@ -1120,8 +1126,8 @@ again:
1120 } 1126 }
1121 continue; 1127 continue;
1122 } 1128 }
1123 /* If details->check_mapping, we leave swap entries. */ 1129 /* only check swap_entries if explicitly asked for in details */
1124 if (unlikely(details)) 1130 if (unlikely(details && !details->check_swap_entries))
1125 continue; 1131 continue;
1126 1132
1127 entry = pte_to_swp_entry(ptent); 1133 entry = pte_to_swp_entry(ptent);
@@ -1226,7 +1232,7 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
1226 return addr; 1232 return addr;
1227} 1233}
1228 1234
1229static void unmap_page_range(struct mmu_gather *tlb, 1235void unmap_page_range(struct mmu_gather *tlb,
1230 struct vm_area_struct *vma, 1236 struct vm_area_struct *vma,
1231 unsigned long addr, unsigned long end, 1237 unsigned long addr, unsigned long end,
1232 struct zap_details *details) 1238 struct zap_details *details)
@@ -1234,9 +1240,6 @@ static void unmap_page_range(struct mmu_gather *tlb,
1234 pgd_t *pgd; 1240 pgd_t *pgd;
1235 unsigned long next; 1241 unsigned long next;
1236 1242
1237 if (details && !details->check_mapping)
1238 details = NULL;
1239
1240 BUG_ON(addr >= end); 1243 BUG_ON(addr >= end);
1241 tlb_start_vma(tlb, vma); 1244 tlb_start_vma(tlb, vma);
1242 pgd = pgd_offset(vma->vm_mm, addr); 1245 pgd = pgd_offset(vma->vm_mm, addr);
@@ -2432,7 +2435,7 @@ static inline void unmap_mapping_range_tree(struct rb_root *root,
2432void unmap_mapping_range(struct address_space *mapping, 2435void unmap_mapping_range(struct address_space *mapping,
2433 loff_t const holebegin, loff_t const holelen, int even_cows) 2436 loff_t const holebegin, loff_t const holelen, int even_cows)
2434{ 2437{
2435 struct zap_details details; 2438 struct zap_details details = { };
2436 pgoff_t hba = holebegin >> PAGE_SHIFT; 2439 pgoff_t hba = holebegin >> PAGE_SHIFT;
2437 pgoff_t hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; 2440 pgoff_t hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT;
2438 2441
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 06f7e1707847..f7ed6ece0719 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -35,6 +35,11 @@
35#include <linux/freezer.h> 35#include <linux/freezer.h>
36#include <linux/ftrace.h> 36#include <linux/ftrace.h>
37#include <linux/ratelimit.h> 37#include <linux/ratelimit.h>
38#include <linux/kthread.h>
39#include <linux/init.h>
40
41#include <asm/tlb.h>
42#include "internal.h"
38 43
39#define CREATE_TRACE_POINTS 44#define CREATE_TRACE_POINTS
40#include <trace/events/oom.h> 45#include <trace/events/oom.h>
@@ -405,6 +410,133 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait);
405 410
406bool oom_killer_disabled __read_mostly; 411bool oom_killer_disabled __read_mostly;
407 412
413#ifdef CONFIG_MMU
414/*
415 * OOM Reaper kernel thread which tries to reap the memory used by the OOM
416 * victim (if that is possible) to help the OOM killer to move on.
417 */
418static struct task_struct *oom_reaper_th;
419static struct mm_struct *mm_to_reap;
420static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
421
422static bool __oom_reap_vmas(struct mm_struct *mm)
423{
424 struct mmu_gather tlb;
425 struct vm_area_struct *vma;
426 struct zap_details details = {.check_swap_entries = true,
427 .ignore_dirty = true};
428 bool ret = true;
429
430 /* We might have raced with exit path */
431 if (!atomic_inc_not_zero(&mm->mm_users))
432 return true;
433
434 if (!down_read_trylock(&mm->mmap_sem)) {
435 ret = false;
436 goto out;
437 }
438
439 tlb_gather_mmu(&tlb, mm, 0, -1);
440 for (vma = mm->mmap ; vma; vma = vma->vm_next) {
441 if (is_vm_hugetlb_page(vma))
442 continue;
443
444 /*
445 * mlocked VMAs require explicit munlocking before unmap.
446 * Let's keep it simple here and skip such VMAs.
447 */
448 if (vma->vm_flags & VM_LOCKED)
449 continue;
450
451 /*
452 * Only anonymous pages have a good chance to be dropped
453 * without additional steps which we cannot afford as we
454 * are OOM already.
455 *
456 * We do not even care about fs backed pages because all
457 * which are reclaimable have already been reclaimed and
458 * we do not want to block exit_mmap by keeping mm ref
459 * count elevated without a good reason.
460 */
461 if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED))
462 unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end,
463 &details);
464 }
465 tlb_finish_mmu(&tlb, 0, -1);
466 up_read(&mm->mmap_sem);
467out:
468 mmput(mm);
469 return ret;
470}
471
472static void oom_reap_vmas(struct mm_struct *mm)
473{
474 int attempts = 0;
475
476 /* Retry the down_read_trylock(mmap_sem) a few times */
477 while (attempts++ < 10 && !__oom_reap_vmas(mm))
478 schedule_timeout_idle(HZ/10);
479
480 /* Drop a reference taken by wake_oom_reaper */
481 mmdrop(mm);
482}
483
484static int oom_reaper(void *unused)
485{
486 while (true) {
487 struct mm_struct *mm;
488
489 wait_event_freezable(oom_reaper_wait,
490 (mm = READ_ONCE(mm_to_reap)));
491 oom_reap_vmas(mm);
492 WRITE_ONCE(mm_to_reap, NULL);
493 }
494
495 return 0;
496}
497
498static void wake_oom_reaper(struct mm_struct *mm)
499{
500 struct mm_struct *old_mm;
501
502 if (!oom_reaper_th)
503 return;
504
505 /*
506 * Pin the given mm. Use mm_count instead of mm_users because
507 * we do not want to delay the address space tear down.
508 */
509 atomic_inc(&mm->mm_count);
510
511 /*
512 * Make sure that only a single mm is ever queued for the reaper
513 * because multiple are not necessary and the operation might be
514 * disruptive so better reduce it to the bare minimum.
515 */
516 old_mm = cmpxchg(&mm_to_reap, NULL, mm);
517 if (!old_mm)
518 wake_up(&oom_reaper_wait);
519 else
520 mmdrop(mm);
521}
522
523static int __init oom_init(void)
524{
525 oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper");
526 if (IS_ERR(oom_reaper_th)) {
527 pr_err("Unable to start OOM reaper %ld. Continuing regardless\n",
528 PTR_ERR(oom_reaper_th));
529 oom_reaper_th = NULL;
530 }
531 return 0;
532}
533subsys_initcall(oom_init)
534#else
535static void wake_oom_reaper(struct mm_struct *mm)
536{
537}
538#endif
539
408/** 540/**
409 * mark_oom_victim - mark the given task as OOM victim 541 * mark_oom_victim - mark the given task as OOM victim
410 * @tsk: task to mark 542 * @tsk: task to mark
@@ -510,6 +642,7 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
510 unsigned int victim_points = 0; 642 unsigned int victim_points = 0;
511 static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL, 643 static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
512 DEFAULT_RATELIMIT_BURST); 644 DEFAULT_RATELIMIT_BURST);
645 bool can_oom_reap = true;
513 646
514 /* 647 /*
515 * If the task is already exiting, don't alarm the sysadmin or kill 648 * If the task is already exiting, don't alarm the sysadmin or kill
@@ -600,17 +733,23 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
600 continue; 733 continue;
601 if (same_thread_group(p, victim)) 734 if (same_thread_group(p, victim))
602 continue; 735 continue;
603 if (unlikely(p->flags & PF_KTHREAD)) 736 if (unlikely(p->flags & PF_KTHREAD) || is_global_init(p) ||
604 continue; 737 p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
605 if (is_global_init(p)) 738 /*
606 continue; 739 * We cannot use oom_reaper for the mm shared by this
607 if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) 740 * process because it wouldn't get killed and so the
741 * memory might be still used.
742 */
743 can_oom_reap = false;
608 continue; 744 continue;
609 745 }
610 do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true); 746 do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true);
611 } 747 }
612 rcu_read_unlock(); 748 rcu_read_unlock();
613 749
750 if (can_oom_reap)
751 wake_oom_reaper(mm);
752
614 mmdrop(mm); 753 mmdrop(mm);
615 put_task_struct(victim); 754 put_task_struct(victim);
616} 755}