aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2011-10-31 20:07:15 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-10-31 20:30:45 -0400
commitc9f01245b6a7d77d17deaa71af10f6aca14fa24e (patch)
tree13ffde591a5bcefba39cb6393f09b27f1ebc1a30
parent7b0d44fa49b1dcfdcf4897f12ddd12ddeab1a9d7 (diff)
oom: remove oom_disable_count
This removes mm->oom_disable_count entirely since it's unnecessary and currently buggy. The counter was intended to be per-process but it's currently decremented in the exit path for each thread that exits, causing it to underflow. The count was originally intended to prevent oom killing threads that share memory with threads that cannot be killed since it doesn't lead to future memory freeing. The counter could be fixed to represent all threads sharing the same mm, but it's better to remove the count since: - it is possible that the OOM_DISABLE thread sharing memory with the victim is waiting on that thread to exit and will actually cause future memory freeing, and - there is no guarantee that a thread is disabled from oom killing just because another thread sharing its mm is oom disabled. Signed-off-by: David Rientjes <rientjes@google.com> Reported-by: Oleg Nesterov <oleg@redhat.com> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Cc: Ying Han <yinghan@google.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/exec.c4
-rw-r--r--fs/proc/base.c13
-rw-r--r--include/linux/mm_types.h3
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c10
-rw-r--r--mm/oom_kill.c23
6 files changed, 6 insertions, 49 deletions
diff --git a/fs/exec.c b/fs/exec.c
index 25dcbe5fc35..36254645b7c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -841,10 +841,6 @@ static int exec_mmap(struct mm_struct *mm)
841 tsk->mm = mm; 841 tsk->mm = mm;
842 tsk->active_mm = mm; 842 tsk->active_mm = mm;
843 activate_mm(active_mm, mm); 843 activate_mm(active_mm, mm);
844 if (old_mm && tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
845 atomic_dec(&old_mm->oom_disable_count);
846 atomic_inc(&tsk->mm->oom_disable_count);
847 }
848 task_unlock(tsk); 844 task_unlock(tsk);
849 arch_pick_mmap_layout(mm); 845 arch_pick_mmap_layout(mm);
850 if (old_mm) { 846 if (old_mm) {
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 5eb02069e1b..8f0087e20e1 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1107,13 +1107,6 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1107 goto err_sighand; 1107 goto err_sighand;
1108 } 1108 }
1109 1109
1110 if (oom_adjust != task->signal->oom_adj) {
1111 if (oom_adjust == OOM_DISABLE)
1112 atomic_inc(&task->mm->oom_disable_count);
1113 if (task->signal->oom_adj == OOM_DISABLE)
1114 atomic_dec(&task->mm->oom_disable_count);
1115 }
1116
1117 /* 1110 /*
1118 * Warn that /proc/pid/oom_adj is deprecated, see 1111 * Warn that /proc/pid/oom_adj is deprecated, see
1119 * Documentation/feature-removal-schedule.txt. 1112 * Documentation/feature-removal-schedule.txt.
@@ -1215,12 +1208,6 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
1215 goto err_sighand; 1208 goto err_sighand;
1216 } 1209 }
1217 1210
1218 if (oom_score_adj != task->signal->oom_score_adj) {
1219 if (oom_score_adj == OOM_SCORE_ADJ_MIN)
1220 atomic_inc(&task->mm->oom_disable_count);
1221 if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
1222 atomic_dec(&task->mm->oom_disable_count);
1223 }
1224 task->signal->oom_score_adj = oom_score_adj; 1211 task->signal->oom_score_adj = oom_score_adj;
1225 if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) 1212 if (has_capability_noaudit(current, CAP_SYS_RESOURCE))
1226 task->signal->oom_score_adj_min = oom_score_adj; 1213 task->signal->oom_score_adj_min = oom_score_adj;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c93d00a6e95..6456624aa96 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -336,9 +336,6 @@ struct mm_struct {
336 unsigned int token_priority; 336 unsigned int token_priority;
337 unsigned int last_interval; 337 unsigned int last_interval;
338 338
339 /* How many tasks sharing this mm are OOM_DISABLE */
340 atomic_t oom_disable_count;
341
342 unsigned long flags; /* Must use atomic bitops to access the bits */ 339 unsigned long flags; /* Must use atomic bitops to access the bits */
343 340
344 struct core_state *core_state; /* coredumping support */ 341 struct core_state *core_state; /* coredumping support */
diff --git a/kernel/exit.c b/kernel/exit.c
index 2913b3509d4..d0b7d988f87 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -681,8 +681,6 @@ static void exit_mm(struct task_struct * tsk)
681 enter_lazy_tlb(mm, current); 681 enter_lazy_tlb(mm, current);
682 /* We don't want this task to be frozen prematurely */ 682 /* We don't want this task to be frozen prematurely */
683 clear_freeze_flag(tsk); 683 clear_freeze_flag(tsk);
684 if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
685 atomic_dec(&mm->oom_disable_count);
686 task_unlock(tsk); 684 task_unlock(tsk);
687 mm_update_next_owner(mm); 685 mm_update_next_owner(mm);
688 mmput(mm); 686 mmput(mm);
diff --git a/kernel/fork.c b/kernel/fork.c
index 8e6b6f4fb27..70d76191afb 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -501,7 +501,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
501 mm->cached_hole_size = ~0UL; 501 mm->cached_hole_size = ~0UL;
502 mm_init_aio(mm); 502 mm_init_aio(mm);
503 mm_init_owner(mm, p); 503 mm_init_owner(mm, p);
504 atomic_set(&mm->oom_disable_count, 0);
505 504
506 if (likely(!mm_alloc_pgd(mm))) { 505 if (likely(!mm_alloc_pgd(mm))) {
507 mm->def_flags = 0; 506 mm->def_flags = 0;
@@ -816,8 +815,6 @@ good_mm:
816 /* Initializing for Swap token stuff */ 815 /* Initializing for Swap token stuff */
817 mm->token_priority = 0; 816 mm->token_priority = 0;
818 mm->last_interval = 0; 817 mm->last_interval = 0;
819 if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
820 atomic_inc(&mm->oom_disable_count);
821 818
822 tsk->mm = mm; 819 tsk->mm = mm;
823 tsk->active_mm = mm; 820 tsk->active_mm = mm;
@@ -1391,13 +1388,8 @@ bad_fork_cleanup_io:
1391bad_fork_cleanup_namespaces: 1388bad_fork_cleanup_namespaces:
1392 exit_task_namespaces(p); 1389 exit_task_namespaces(p);
1393bad_fork_cleanup_mm: 1390bad_fork_cleanup_mm:
1394 if (p->mm) { 1391 if (p->mm)
1395 task_lock(p);
1396 if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
1397 atomic_dec(&p->mm->oom_disable_count);
1398 task_unlock(p);
1399 mmput(p->mm); 1392 mmput(p->mm);
1400 }
1401bad_fork_cleanup_signal: 1393bad_fork_cleanup_signal:
1402 if (!(clone_flags & CLONE_THREAD)) 1394 if (!(clone_flags & CLONE_THREAD))
1403 free_signal_struct(p->signal); 1395 free_signal_struct(p->signal);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index b0d8943bc9f..2b97e8f0460 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -54,13 +54,7 @@ int test_set_oom_score_adj(int new_val)
54 54
55 spin_lock_irq(&sighand->siglock); 55 spin_lock_irq(&sighand->siglock);
56 old_val = current->signal->oom_score_adj; 56 old_val = current->signal->oom_score_adj;
57 if (new_val != old_val) { 57 current->signal->oom_score_adj = new_val;
58 if (new_val == OOM_SCORE_ADJ_MIN)
59 atomic_inc(&current->mm->oom_disable_count);
60 else if (old_val == OOM_SCORE_ADJ_MIN)
61 atomic_dec(&current->mm->oom_disable_count);
62 current->signal->oom_score_adj = new_val;
63 }
64 spin_unlock_irq(&sighand->siglock); 58 spin_unlock_irq(&sighand->siglock);
65 59
66 return old_val; 60 return old_val;
@@ -173,16 +167,6 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
173 return 0; 167 return 0;
174 168
175 /* 169 /*
176 * Shortcut check for a thread sharing p->mm that is OOM_SCORE_ADJ_MIN
177 * so the entire heuristic doesn't need to be executed for something
178 * that cannot be killed.
179 */
180 if (atomic_read(&p->mm->oom_disable_count)) {
181 task_unlock(p);
182 return 0;
183 }
184
185 /*
186 * The memory controller may have a limit of 0 bytes, so avoid a divide 170 * The memory controller may have a limit of 0 bytes, so avoid a divide
187 * by zero, if necessary. 171 * by zero, if necessary.
188 */ 172 */
@@ -451,6 +435,9 @@ static int oom_kill_task(struct task_struct *p, struct mem_cgroup *mem)
451 for_each_process(q) 435 for_each_process(q)
452 if (q->mm == mm && !same_thread_group(q, p) && 436 if (q->mm == mm && !same_thread_group(q, p) &&
453 !(q->flags & PF_KTHREAD)) { 437 !(q->flags & PF_KTHREAD)) {
438 if (q->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
439 continue;
440
454 task_lock(q); /* Protect ->comm from prctl() */ 441 task_lock(q); /* Protect ->comm from prctl() */
455 pr_err("Kill process %d (%s) sharing same memory\n", 442 pr_err("Kill process %d (%s) sharing same memory\n",
456 task_pid_nr(q), q->comm); 443 task_pid_nr(q), q->comm);
@@ -727,7 +714,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
727 read_lock(&tasklist_lock); 714 read_lock(&tasklist_lock);
728 if (sysctl_oom_kill_allocating_task && 715 if (sysctl_oom_kill_allocating_task &&
729 !oom_unkillable_task(current, NULL, nodemask) && 716 !oom_unkillable_task(current, NULL, nodemask) &&
730 current->mm && !atomic_read(&current->mm->oom_disable_count)) { 717 current->mm) {
731 /* 718 /*
732 * oom_kill_process() needs tasklist_lock held. If it returns 719 * oom_kill_process() needs tasklist_lock held. If it returns
733 * non-zero, current could not be killed so we must fallback to 720 * non-zero, current could not be killed so we must fallback to