diff options
author | Paul Jackson <pj@sgi.com> | 2005-09-06 18:18:13 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-09-07 19:57:40 -0400 |
commit | ef08e3b4981aebf2ba9bd7025ef7210e8eec07ce (patch) | |
tree | 3b5386e011c87dde384115c8eb0d6961c2536025 | |
parent | 9bf2229f8817677127a60c177aefce1badd22d7b (diff) |
[PATCH] cpusets: confine oom_killer to mem_exclusive cpuset
Now the real motivation for this cpuset mem_exclusive patch series seems
trivial.
This patch keeps a task in or under one mem_exclusive cpuset from provoking an
oom kill of a task under a non-overlapping mem_exclusive cpuset. Since only
interrupt and GFP_ATOMIC allocations are allowed to escape mem_exclusive
containment, there is little to gain from oom killing a task under a
non-overlapping mem_exclusive cpuset, as almost all kernel and user memory
allocation must come from disjoint memory nodes.
This patch enables configuring a system so that a runaway job under one
mem_exclusive cpuset cannot cause the killing of a job in another such cpuset
that might be using very high compute and memory resources for a prolonged
time.
Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | include/linux/cpuset.h | 6 | ||||
-rw-r--r-- | kernel/cpuset.c | 33 | ||||
-rw-r--r-- | mm/oom_kill.c | 5 |
3 files changed, 44 insertions, 0 deletions
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 1fe1c3ebad30..24062a1dbf61 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h | |||
@@ -24,6 +24,7 @@ void cpuset_update_current_mems_allowed(void); | |||
24 | void cpuset_restrict_to_mems_allowed(unsigned long *nodes); | 24 | void cpuset_restrict_to_mems_allowed(unsigned long *nodes); |
25 | int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); | 25 | int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); |
26 | extern int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask); | 26 | extern int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask); |
27 | extern int cpuset_excl_nodes_overlap(const struct task_struct *p); | ||
27 | extern struct file_operations proc_cpuset_operations; | 28 | extern struct file_operations proc_cpuset_operations; |
28 | extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer); | 29 | extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer); |
29 | 30 | ||
@@ -54,6 +55,11 @@ static inline int cpuset_zone_allowed(struct zone *z, | |||
54 | return 1; | 55 | return 1; |
55 | } | 56 | } |
56 | 57 | ||
58 | static inline int cpuset_excl_nodes_overlap(const struct task_struct *p) | ||
59 | { | ||
60 | return 1; | ||
61 | } | ||
62 | |||
57 | static inline char *cpuset_task_status_allowed(struct task_struct *task, | 63 | static inline char *cpuset_task_status_allowed(struct task_struct *task, |
58 | char *buffer) | 64 | char *buffer) |
59 | { | 65 | { |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 214806deca99..40c6d801dd66 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1688,6 +1688,39 @@ done: | |||
1688 | return allowed; | 1688 | return allowed; |
1689 | } | 1689 | } |
1690 | 1690 | ||
1691 | /** | ||
1692 | * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors? | ||
1693 | * @p: pointer to task_struct of some other task. | ||
1694 | * | ||
1695 | * Description: Return true if the nearest mem_exclusive ancestor | ||
1696 | * cpusets of tasks @p and current overlap. Used by oom killer to | ||
1697 | * determine if task @p's memory usage might impact the memory | ||
1698 | * available to the current task. | ||
1699 | * | ||
1700 | * Acquires cpuset_sem - not suitable for calling from a fast path. | ||
1701 | **/ | ||
1702 | |||
1703 | int cpuset_excl_nodes_overlap(const struct task_struct *p) | ||
1704 | { | ||
1705 | const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */ | ||
1706 | int overlap = 0; /* do cpusets overlap? */ | ||
1707 | |||
1708 | down(&cpuset_sem); | ||
1709 | cs1 = current->cpuset; | ||
1710 | if (!cs1) | ||
1711 | goto done; /* current task exiting */ | ||
1712 | cs2 = p->cpuset; | ||
1713 | if (!cs2) | ||
1714 | goto done; /* task p is exiting */ | ||
1715 | cs1 = nearest_exclusive_ancestor(cs1); | ||
1716 | cs2 = nearest_exclusive_ancestor(cs2); | ||
1717 | overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed); | ||
1718 | done: | ||
1719 | up(&cpuset_sem); | ||
1720 | |||
1721 | return overlap; | ||
1722 | } | ||
1723 | |||
1691 | /* | 1724 | /* |
1692 | * proc_cpuset_show() | 1725 | * proc_cpuset_show() |
1693 | * - Print tasks cpuset path into seq_file. | 1726 | * - Print tasks cpuset path into seq_file. |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 3a1d46502938..5ec8da12cfd9 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/swap.h> | 20 | #include <linux/swap.h> |
21 | #include <linux/timex.h> | 21 | #include <linux/timex.h> |
22 | #include <linux/jiffies.h> | 22 | #include <linux/jiffies.h> |
23 | #include <linux/cpuset.h> | ||
23 | 24 | ||
24 | /* #define DEBUG */ | 25 | /* #define DEBUG */ |
25 | 26 | ||
@@ -152,6 +153,10 @@ static struct task_struct * select_bad_process(void) | |||
152 | continue; | 153 | continue; |
153 | if (p->oomkilladj == OOM_DISABLE) | 154 | if (p->oomkilladj == OOM_DISABLE) |
154 | continue; | 155 | continue; |
156 | /* If p's nodes don't overlap ours, it won't help to kill p. */ | ||
157 | if (!cpuset_excl_nodes_overlap(p)) | ||
158 | continue; | ||
159 | |||
155 | /* | 160 | /* |
156 | * This is in the process of releasing memory so for wait it | 161 | * This is in the process of releasing memory so for wait it |
157 | * to finish before killing some other task by mistake. | 162 | * to finish before killing some other task by mistake. |