diff options
author | Christoph Lameter <clameter@sgi.com> | 2007-10-16 04:25:38 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-16 12:42:59 -0400 |
commit | 0e1e7c7a739562a321fda07c7cd2a97a7114f8f8 (patch) | |
tree | f2148e5b667152681625c19cf8b2a556500994ea | |
parent | 523b945855a1427000ffc707c610abe5947ae607 (diff) |
Memoryless nodes: Use N_HIGH_MEMORY for cpusets
cpusets try to ensure that any node added to a cpuset's mems_allowed is
on-line and contains memory. The assumption was that online nodes contained
memory. Thus, it is possible to add memoryless nodes to a cpuset and then add
tasks to this cpuset. This results in continuous series of oom-kill and
apparent system hang.
Change cpusets to use node_states[N_HIGH_MEMORY] [a.k.a. node_memory_map] in
place of node_online_map when vetting memories. Return error if admin
attempts to write a non-empty mems_allowed node mask containing only
memoryless-nodes.
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Bob Picco <bob.picco@hp.com>
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@skynet.ie>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | Documentation/cpusets.txt | 7 | ||||
-rw-r--r-- | include/linux/cpuset.h | 2 | ||||
-rw-r--r-- | kernel/cpuset.c | 56 |
3 files changed, 43 insertions, 22 deletions
diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt index f2c0a6842930..b875d231ac74 100644 --- a/Documentation/cpusets.txt +++ b/Documentation/cpusets.txt | |||
@@ -35,7 +35,8 @@ CONTENTS: | |||
35 | ---------------------- | 35 | ---------------------- |
36 | 36 | ||
37 | Cpusets provide a mechanism for assigning a set of CPUs and Memory | 37 | Cpusets provide a mechanism for assigning a set of CPUs and Memory |
38 | Nodes to a set of tasks. | 38 | Nodes to a set of tasks. In this document "Memory Node" refers to |
39 | an on-line node that contains memory. | ||
39 | 40 | ||
40 | Cpusets constrain the CPU and Memory placement of tasks to only | 41 | Cpusets constrain the CPU and Memory placement of tasks to only |
41 | the resources within a tasks current cpuset. They form a nested | 42 | the resources within a tasks current cpuset. They form a nested |
@@ -220,8 +221,8 @@ and name space for cpusets, with a minimum of additional kernel code. | |||
220 | The cpus and mems files in the root (top_cpuset) cpuset are | 221 | The cpus and mems files in the root (top_cpuset) cpuset are |
221 | read-only. The cpus file automatically tracks the value of | 222 | read-only. The cpus file automatically tracks the value of |
222 | cpu_online_map using a CPU hotplug notifier, and the mems file | 223 | cpu_online_map using a CPU hotplug notifier, and the mems file |
223 | automatically tracks the value of node_online_map using the | 224 | automatically tracks the value of node_states[N_MEMORY]--i.e., |
224 | cpuset_track_online_nodes() hook. | 225 | nodes with memory--using the cpuset_track_online_nodes() hook. |
225 | 226 | ||
226 | 227 | ||
227 | 1.4 What are exclusive cpusets ? | 228 | 1.4 What are exclusive cpusets ? |
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 826b15e914e2..9e633ea103ce 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h | |||
@@ -93,7 +93,7 @@ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) | |||
93 | return node_possible_map; | 93 | return node_possible_map; |
94 | } | 94 | } |
95 | 95 | ||
96 | #define cpuset_current_mems_allowed (node_online_map) | 96 | #define cpuset_current_mems_allowed (node_states[N_HIGH_MEMORY]) |
97 | static inline void cpuset_init_current_mems_allowed(void) {} | 97 | static inline void cpuset_init_current_mems_allowed(void) {} |
98 | static inline void cpuset_update_task_memory_state(void) {} | 98 | static inline void cpuset_update_task_memory_state(void) {} |
99 | #define cpuset_nodes_subset_current_mems_allowed(nodes) (1) | 99 | #define cpuset_nodes_subset_current_mems_allowed(nodes) (1) |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 57e6448b171e..8b2daac4de83 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -581,26 +581,28 @@ static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask) | |||
581 | 581 | ||
582 | /* | 582 | /* |
583 | * Return in *pmask the portion of a cpusets's mems_allowed that | 583 | * Return in *pmask the portion of a cpusets's mems_allowed that |
584 | * are online. If none are online, walk up the cpuset hierarchy | 584 | * are online, with memory. If none are online with memory, walk |
585 | * until we find one that does have some online mems. If we get | 585 | * up the cpuset hierarchy until we find one that does have some |
586 | * all the way to the top and still haven't found any online mems, | 586 | * online mems. If we get all the way to the top and still haven't |
587 | * return node_online_map. | 587 | * found any online mems, return node_states[N_HIGH_MEMORY]. |
588 | * | 588 | * |
589 | * One way or another, we guarantee to return some non-empty subset | 589 | * One way or another, we guarantee to return some non-empty subset |
590 | * of node_online_map. | 590 | * of node_states[N_HIGH_MEMORY]. |
591 | * | 591 | * |
592 | * Call with callback_mutex held. | 592 | * Call with callback_mutex held. |
593 | */ | 593 | */ |
594 | 594 | ||
595 | static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) | 595 | static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) |
596 | { | 596 | { |
597 | while (cs && !nodes_intersects(cs->mems_allowed, node_online_map)) | 597 | while (cs && !nodes_intersects(cs->mems_allowed, |
598 | node_states[N_HIGH_MEMORY])) | ||
598 | cs = cs->parent; | 599 | cs = cs->parent; |
599 | if (cs) | 600 | if (cs) |
600 | nodes_and(*pmask, cs->mems_allowed, node_online_map); | 601 | nodes_and(*pmask, cs->mems_allowed, |
602 | node_states[N_HIGH_MEMORY]); | ||
601 | else | 603 | else |
602 | *pmask = node_online_map; | 604 | *pmask = node_states[N_HIGH_MEMORY]; |
603 | BUG_ON(!nodes_intersects(*pmask, node_online_map)); | 605 | BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY])); |
604 | } | 606 | } |
605 | 607 | ||
606 | /** | 608 | /** |
@@ -924,7 +926,10 @@ static int update_nodemask(struct cpuset *cs, char *buf) | |||
924 | int fudge; | 926 | int fudge; |
925 | int retval; | 927 | int retval; |
926 | 928 | ||
927 | /* top_cpuset.mems_allowed tracks node_online_map; it's read-only */ | 929 | /* |
930 | * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY]; | ||
931 | * it's read-only | ||
932 | */ | ||
928 | if (cs == &top_cpuset) | 933 | if (cs == &top_cpuset) |
929 | return -EACCES; | 934 | return -EACCES; |
930 | 935 | ||
@@ -941,8 +946,21 @@ static int update_nodemask(struct cpuset *cs, char *buf) | |||
941 | retval = nodelist_parse(buf, trialcs.mems_allowed); | 946 | retval = nodelist_parse(buf, trialcs.mems_allowed); |
942 | if (retval < 0) | 947 | if (retval < 0) |
943 | goto done; | 948 | goto done; |
949 | if (!nodes_intersects(trialcs.mems_allowed, | ||
950 | node_states[N_HIGH_MEMORY])) { | ||
951 | /* | ||
952 | * error if only memoryless nodes specified. | ||
953 | */ | ||
954 | retval = -ENOSPC; | ||
955 | goto done; | ||
956 | } | ||
944 | } | 957 | } |
945 | nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, node_online_map); | 958 | /* |
959 | * Exclude memoryless nodes. We know that trialcs.mems_allowed | ||
960 | * contains at least one node with memory. | ||
961 | */ | ||
962 | nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, | ||
963 | node_states[N_HIGH_MEMORY]); | ||
946 | oldmem = cs->mems_allowed; | 964 | oldmem = cs->mems_allowed; |
947 | if (nodes_equal(oldmem, trialcs.mems_allowed)) { | 965 | if (nodes_equal(oldmem, trialcs.mems_allowed)) { |
948 | retval = 0; /* Too easy - nothing to do */ | 966 | retval = 0; /* Too easy - nothing to do */ |
@@ -2098,8 +2116,9 @@ static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur) | |||
2098 | 2116 | ||
2099 | /* | 2117 | /* |
2100 | * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track | 2118 | * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track |
2101 | * cpu_online_map and node_online_map. Force the top cpuset to track | 2119 | * cpu_online_map and node_states[N_HIGH_MEMORY]. Force the top cpuset to |
2102 | * whats online after any CPU or memory node hotplug or unplug event. | 2120 | * track what's online after any CPU or memory node hotplug or unplug |
2121 | * event. | ||
2103 | * | 2122 | * |
2104 | * To ensure that we don't remove a CPU or node from the top cpuset | 2123 | * To ensure that we don't remove a CPU or node from the top cpuset |
2105 | * that is currently in use by a child cpuset (which would violate | 2124 | * that is currently in use by a child cpuset (which would violate |
@@ -2119,7 +2138,7 @@ static void common_cpu_mem_hotplug_unplug(void) | |||
2119 | 2138 | ||
2120 | guarantee_online_cpus_mems_in_subtree(&top_cpuset); | 2139 | guarantee_online_cpus_mems_in_subtree(&top_cpuset); |
2121 | top_cpuset.cpus_allowed = cpu_online_map; | 2140 | top_cpuset.cpus_allowed = cpu_online_map; |
2122 | top_cpuset.mems_allowed = node_online_map; | 2141 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; |
2123 | 2142 | ||
2124 | mutex_unlock(&callback_mutex); | 2143 | mutex_unlock(&callback_mutex); |
2125 | mutex_unlock(&manage_mutex); | 2144 | mutex_unlock(&manage_mutex); |
@@ -2147,8 +2166,9 @@ static int cpuset_handle_cpuhp(struct notifier_block *nb, | |||
2147 | 2166 | ||
2148 | #ifdef CONFIG_MEMORY_HOTPLUG | 2167 | #ifdef CONFIG_MEMORY_HOTPLUG |
2149 | /* | 2168 | /* |
2150 | * Keep top_cpuset.mems_allowed tracking node_online_map. | 2169 | * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY]. |
2151 | * Call this routine anytime after you change node_online_map. | 2170 | * Call this routine anytime after you change |
2171 | * node_states[N_HIGH_MEMORY]. | ||
2152 | * See also the previous routine cpuset_handle_cpuhp(). | 2172 | * See also the previous routine cpuset_handle_cpuhp(). |
2153 | */ | 2173 | */ |
2154 | 2174 | ||
@@ -2167,7 +2187,7 @@ void cpuset_track_online_nodes(void) | |||
2167 | void __init cpuset_init_smp(void) | 2187 | void __init cpuset_init_smp(void) |
2168 | { | 2188 | { |
2169 | top_cpuset.cpus_allowed = cpu_online_map; | 2189 | top_cpuset.cpus_allowed = cpu_online_map; |
2170 | top_cpuset.mems_allowed = node_online_map; | 2190 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; |
2171 | 2191 | ||
2172 | hotcpu_notifier(cpuset_handle_cpuhp, 0); | 2192 | hotcpu_notifier(cpuset_handle_cpuhp, 0); |
2173 | } | 2193 | } |
@@ -2309,7 +2329,7 @@ void cpuset_init_current_mems_allowed(void) | |||
2309 | * | 2329 | * |
2310 | * Description: Returns the nodemask_t mems_allowed of the cpuset | 2330 | * Description: Returns the nodemask_t mems_allowed of the cpuset |
2311 | * attached to the specified @tsk. Guaranteed to return some non-empty | 2331 | * attached to the specified @tsk. Guaranteed to return some non-empty |
2312 | * subset of node_online_map, even if this means going outside the | 2332 | * subset of node_states[N_HIGH_MEMORY], even if this means going outside the |
2313 | * tasks cpuset. | 2333 | * tasks cpuset. |
2314 | **/ | 2334 | **/ |
2315 | 2335 | ||