aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Jackson <pj@sgi.com>2005-09-06 18:18:13 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-09-07 19:57:40 -0400
commitef08e3b4981aebf2ba9bd7025ef7210e8eec07ce (patch)
tree3b5386e011c87dde384115c8eb0d6961c2536025
parent9bf2229f8817677127a60c177aefce1badd22d7b (diff)
[PATCH] cpusets: confine oom_killer to mem_exclusive cpuset
Now the real motivation for this cpuset mem_exclusive patch series seems trivial. This patch keeps a task in or under one mem_exclusive cpuset from provoking an oom kill of a task under a non-overlapping mem_exclusive cpuset. Since only interrupt and GFP_ATOMIC allocations are allowed to escape mem_exclusive containment, there is little to gain from oom killing a task under a non-overlapping mem_exclusive cpuset, as almost all kernel and user memory allocation must come from disjoint memory nodes. This patch enables configuring a system so that a runaway job under one mem_exclusive cpuset cannot cause the killing of a job in another such cpuset that might be using very high compute and memory resources for a prolonged time. Signed-off-by: Paul Jackson <pj@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/linux/cpuset.h6
-rw-r--r--kernel/cpuset.c33
-rw-r--r--mm/oom_kill.c5
3 files changed, 44 insertions, 0 deletions
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 1fe1c3ebad30..24062a1dbf61 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -24,6 +24,7 @@ void cpuset_update_current_mems_allowed(void);
24void cpuset_restrict_to_mems_allowed(unsigned long *nodes); 24void cpuset_restrict_to_mems_allowed(unsigned long *nodes);
25int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); 25int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl);
26extern int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask); 26extern int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask);
27extern int cpuset_excl_nodes_overlap(const struct task_struct *p);
27extern struct file_operations proc_cpuset_operations; 28extern struct file_operations proc_cpuset_operations;
28extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer); 29extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer);
29 30
@@ -54,6 +55,11 @@ static inline int cpuset_zone_allowed(struct zone *z,
54 return 1; 55 return 1;
55} 56}
56 57
58static inline int cpuset_excl_nodes_overlap(const struct task_struct *p)
59{
60 return 1;
61}
62
57static inline char *cpuset_task_status_allowed(struct task_struct *task, 63static inline char *cpuset_task_status_allowed(struct task_struct *task,
58 char *buffer) 64 char *buffer)
59{ 65{
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 214806deca99..40c6d801dd66 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1688,6 +1688,39 @@ done:
1688 return allowed; 1688 return allowed;
1689} 1689}
1690 1690
1691/**
1692 * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors?
1693 * @p: pointer to task_struct of some other task.
1694 *
1695 * Description: Return true if the nearest mem_exclusive ancestor
1696 * cpusets of tasks @p and current overlap. Used by oom killer to
1697 * determine if task @p's memory usage might impact the memory
1698 * available to the current task.
1699 *
1700 * Acquires cpuset_sem - not suitable for calling from a fast path.
1701 **/
1702
1703int cpuset_excl_nodes_overlap(const struct task_struct *p)
1704{
1705 const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */
1706 int overlap = 0; /* do cpusets overlap? */
1707
1708 down(&cpuset_sem);
1709 cs1 = current->cpuset;
1710 if (!cs1)
1711 goto done; /* current task exiting */
1712 cs2 = p->cpuset;
1713 if (!cs2)
1714 goto done; /* task p is exiting */
1715 cs1 = nearest_exclusive_ancestor(cs1);
1716 cs2 = nearest_exclusive_ancestor(cs2);
1717 overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed);
1718done:
1719 up(&cpuset_sem);
1720
1721 return overlap;
1722}
1723
1691/* 1724/*
1692 * proc_cpuset_show() 1725 * proc_cpuset_show()
1693 * - Print tasks cpuset path into seq_file. 1726 * - Print tasks cpuset path into seq_file.
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 3a1d46502938..5ec8da12cfd9 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -20,6 +20,7 @@
20#include <linux/swap.h> 20#include <linux/swap.h>
21#include <linux/timex.h> 21#include <linux/timex.h>
22#include <linux/jiffies.h> 22#include <linux/jiffies.h>
23#include <linux/cpuset.h>
23 24
24/* #define DEBUG */ 25/* #define DEBUG */
25 26
@@ -152,6 +153,10 @@ static struct task_struct * select_bad_process(void)
152 continue; 153 continue;
153 if (p->oomkilladj == OOM_DISABLE) 154 if (p->oomkilladj == OOM_DISABLE)
154 continue; 155 continue;
156 /* If p's nodes don't overlap ours, it won't help to kill p. */
157 if (!cpuset_excl_nodes_overlap(p))
158 continue;
159
155 /* 160 /*
156 * This is in the process of releasing memory so for wait it 161 * This is in the process of releasing memory so for wait it
157 * to finish before killing some other task by mistake. 162 * to finish before killing some other task by mistake.