diff options
author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2009-01-07 21:08:08 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-01-08 11:31:06 -0500 |
commit | a636b327f731143ccc544b966cfd8de6cb6d72c6 (patch) | |
tree | e53b03b64e8ebca20649c2d877bc4c3ef54ec34c | |
parent | 2e4d40915fb85207fe48cfc31201824ec6d7426e (diff) |
memcg: avoid unnecessary system-wide-oom-killer
Current mmtom has new oom function as pagefault_out_of_memory(). It's
added for select bad process rathar than killing current.
When memcg hit limit and calls OOM at page_fault, this handler called and
system-wide-oom handling happens. (means kernel panics if panic_on_oom is
true....)
To avoid overkill, check memcg's recent behavior before starting
system-wide-oom.
And this patch also fixes to guarantee "don't accnout against process with
TIF_MEMDIE". This is necessary for smooth OOM.
[akpm@linux-foundation.org: build fix]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Jan Blunck <jblunck@suse.de>
Cc: Hirokazu Takahashi <taka@valinux.co.jp>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/memcontrol.h | 7 | ||||
-rw-r--r-- | mm/memcontrol.c | 33 | ||||
-rw-r--r-- | mm/oom_kill.c | 8 |
3 files changed, 44 insertions, 4 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 2de6504e01fb..2fdd1380bf0a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -102,6 +102,8 @@ static inline bool mem_cgroup_disabled(void) | |||
102 | return false; | 102 | return false; |
103 | } | 103 | } |
104 | 104 | ||
105 | extern bool mem_cgroup_oom_called(struct task_struct *task); | ||
106 | |||
105 | #else /* CONFIG_CGROUP_MEM_RES_CTLR */ | 107 | #else /* CONFIG_CGROUP_MEM_RES_CTLR */ |
106 | struct mem_cgroup; | 108 | struct mem_cgroup; |
107 | 109 | ||
@@ -234,6 +236,11 @@ static inline bool mem_cgroup_disabled(void) | |||
234 | { | 236 | { |
235 | return true; | 237 | return true; |
236 | } | 238 | } |
239 | |||
240 | static inline bool mem_cgroup_oom_called(struct task_struct *task) | ||
241 | { | ||
242 | return false; | ||
243 | } | ||
237 | #endif /* CONFIG_CGROUP_MEM_CONT */ | 244 | #endif /* CONFIG_CGROUP_MEM_CONT */ |
238 | 245 | ||
239 | #endif /* _LINUX_MEMCONTROL_H */ | 246 | #endif /* _LINUX_MEMCONTROL_H */ |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 886e2224c5fd..659b0c58f13e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -153,7 +153,7 @@ struct mem_cgroup { | |||
153 | * Should the accounting and control be hierarchical, per subtree? | 153 | * Should the accounting and control be hierarchical, per subtree? |
154 | */ | 154 | */ |
155 | bool use_hierarchy; | 155 | bool use_hierarchy; |
156 | 156 | unsigned long last_oom_jiffies; | |
157 | int obsolete; | 157 | int obsolete; |
158 | atomic_t refcnt; | 158 | atomic_t refcnt; |
159 | /* | 159 | /* |
@@ -615,6 +615,22 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
615 | return ret; | 615 | return ret; |
616 | } | 616 | } |
617 | 617 | ||
618 | bool mem_cgroup_oom_called(struct task_struct *task) | ||
619 | { | ||
620 | bool ret = false; | ||
621 | struct mem_cgroup *mem; | ||
622 | struct mm_struct *mm; | ||
623 | |||
624 | rcu_read_lock(); | ||
625 | mm = task->mm; | ||
626 | if (!mm) | ||
627 | mm = &init_mm; | ||
628 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); | ||
629 | if (mem && time_before(jiffies, mem->last_oom_jiffies + HZ/10)) | ||
630 | ret = true; | ||
631 | rcu_read_unlock(); | ||
632 | return ret; | ||
633 | } | ||
618 | /* | 634 | /* |
619 | * Unlike exported interface, "oom" parameter is added. if oom==true, | 635 | * Unlike exported interface, "oom" parameter is added. if oom==true, |
620 | * oom-killer can be invoked. | 636 | * oom-killer can be invoked. |
@@ -626,6 +642,13 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
626 | struct mem_cgroup *mem, *mem_over_limit; | 642 | struct mem_cgroup *mem, *mem_over_limit; |
627 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; | 643 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
628 | struct res_counter *fail_res; | 644 | struct res_counter *fail_res; |
645 | |||
646 | if (unlikely(test_thread_flag(TIF_MEMDIE))) { | ||
647 | /* Don't account this! */ | ||
648 | *memcg = NULL; | ||
649 | return 0; | ||
650 | } | ||
651 | |||
629 | /* | 652 | /* |
630 | * We always charge the cgroup the mm_struct belongs to. | 653 | * We always charge the cgroup the mm_struct belongs to. |
631 | * The mm_struct's mem_cgroup changes on task migration if the | 654 | * The mm_struct's mem_cgroup changes on task migration if the |
@@ -694,8 +717,10 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
694 | continue; | 717 | continue; |
695 | 718 | ||
696 | if (!nr_retries--) { | 719 | if (!nr_retries--) { |
697 | if (oom) | 720 | if (oom) { |
698 | mem_cgroup_out_of_memory(mem, gfp_mask); | 721 | mem_cgroup_out_of_memory(mem, gfp_mask); |
722 | mem->last_oom_jiffies = jiffies; | ||
723 | } | ||
699 | goto nomem; | 724 | goto nomem; |
700 | } | 725 | } |
701 | } | 726 | } |
@@ -832,7 +857,7 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, | |||
832 | 857 | ||
833 | 858 | ||
834 | ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false); | 859 | ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false); |
835 | if (ret) | 860 | if (ret || !parent) |
836 | return ret; | 861 | return ret; |
837 | 862 | ||
838 | if (!get_page_unless_zero(page)) | 863 | if (!get_page_unless_zero(page)) |
@@ -883,7 +908,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
883 | 908 | ||
884 | mem = memcg; | 909 | mem = memcg; |
885 | ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true); | 910 | ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true); |
886 | if (ret) | 911 | if (ret || !mem) |
887 | return ret; | 912 | return ret; |
888 | 913 | ||
889 | __mem_cgroup_commit_charge(mem, pc, ctype); | 914 | __mem_cgroup_commit_charge(mem, pc, ctype); |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 6b9e758c98a5..fd150e3a2567 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -560,6 +560,13 @@ void pagefault_out_of_memory(void) | |||
560 | /* Got some memory back in the last second. */ | 560 | /* Got some memory back in the last second. */ |
561 | return; | 561 | return; |
562 | 562 | ||
563 | /* | ||
564 | * If this is from memcg, oom-killer is already invoked. | ||
565 | * and not worth to go system-wide-oom. | ||
566 | */ | ||
567 | if (mem_cgroup_oom_called(current)) | ||
568 | goto rest_and_return; | ||
569 | |||
563 | if (sysctl_panic_on_oom) | 570 | if (sysctl_panic_on_oom) |
564 | panic("out of memory from page fault. panic_on_oom is selected.\n"); | 571 | panic("out of memory from page fault. panic_on_oom is selected.\n"); |
565 | 572 | ||
@@ -571,6 +578,7 @@ void pagefault_out_of_memory(void) | |||
571 | * Give "p" a good chance of killing itself before we | 578 | * Give "p" a good chance of killing itself before we |
572 | * retry to allocate memory. | 579 | * retry to allocate memory. |
573 | */ | 580 | */ |
581 | rest_and_return: | ||
574 | if (!test_thread_flag(TIF_MEMDIE)) | 582 | if (!test_thread_flag(TIF_MEMDIE)) |
575 | schedule_timeout_uninterruptible(1); | 583 | schedule_timeout_uninterruptible(1); |
576 | } | 584 | } |