diff options
author | Christoph Lameter <clameter@sgi.com> | 2007-10-16 04:25:38 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-16 12:42:59 -0400 |
commit | 0e1e7c7a739562a321fda07c7cd2a97a7114f8f8 (patch) | |
tree | f2148e5b667152681625c19cf8b2a556500994ea /kernel/cpuset.c | |
parent | 523b945855a1427000ffc707c610abe5947ae607 (diff) |
Memoryless nodes: Use N_HIGH_MEMORY for cpusets
cpusets try to ensure that any node added to a cpuset's mems_allowed is
on-line and contains memory. The assumption was that online nodes contained
memory. Thus, it is possible to add memoryless nodes to a cpuset and then add
tasks to this cpuset. This results in continuous series of oom-kill and
apparent system hang.
Change cpusets to use node_states[N_HIGH_MEMORY] [a.k.a. node_memory_map] in
place of node_online_map when vetting memories. Return error if admin
attempts to write a non-empty mems_allowed node mask containing only
memoryless-nodes.
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Bob Picco <bob.picco@hp.com>
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@skynet.ie>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 56 |
1 files changed, 38 insertions, 18 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 57e6448b171e..8b2daac4de83 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -581,26 +581,28 @@ static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask) | |||
581 | 581 | ||
582 | /* | 582 | /* |
583 | * Return in *pmask the portion of a cpusets's mems_allowed that | 583 | * Return in *pmask the portion of a cpusets's mems_allowed that |
584 | * are online. If none are online, walk up the cpuset hierarchy | 584 | * are online, with memory. If none are online with memory, walk |
585 | * until we find one that does have some online mems. If we get | 585 | * up the cpuset hierarchy until we find one that does have some |
586 | * all the way to the top and still haven't found any online mems, | 586 | * online mems. If we get all the way to the top and still haven't |
587 | * return node_online_map. | 587 | * found any online mems, return node_states[N_HIGH_MEMORY]. |
588 | * | 588 | * |
589 | * One way or another, we guarantee to return some non-empty subset | 589 | * One way or another, we guarantee to return some non-empty subset |
590 | * of node_online_map. | 590 | * of node_states[N_HIGH_MEMORY]. |
591 | * | 591 | * |
592 | * Call with callback_mutex held. | 592 | * Call with callback_mutex held. |
593 | */ | 593 | */ |
594 | 594 | ||
595 | static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) | 595 | static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) |
596 | { | 596 | { |
597 | while (cs && !nodes_intersects(cs->mems_allowed, node_online_map)) | 597 | while (cs && !nodes_intersects(cs->mems_allowed, |
598 | node_states[N_HIGH_MEMORY])) | ||
598 | cs = cs->parent; | 599 | cs = cs->parent; |
599 | if (cs) | 600 | if (cs) |
600 | nodes_and(*pmask, cs->mems_allowed, node_online_map); | 601 | nodes_and(*pmask, cs->mems_allowed, |
602 | node_states[N_HIGH_MEMORY]); | ||
601 | else | 603 | else |
602 | *pmask = node_online_map; | 604 | *pmask = node_states[N_HIGH_MEMORY]; |
603 | BUG_ON(!nodes_intersects(*pmask, node_online_map)); | 605 | BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY])); |
604 | } | 606 | } |
605 | 607 | ||
606 | /** | 608 | /** |
@@ -924,7 +926,10 @@ static int update_nodemask(struct cpuset *cs, char *buf) | |||
924 | int fudge; | 926 | int fudge; |
925 | int retval; | 927 | int retval; |
926 | 928 | ||
927 | /* top_cpuset.mems_allowed tracks node_online_map; it's read-only */ | 929 | /* |
930 | * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY]; | ||
931 | * it's read-only | ||
932 | */ | ||
928 | if (cs == &top_cpuset) | 933 | if (cs == &top_cpuset) |
929 | return -EACCES; | 934 | return -EACCES; |
930 | 935 | ||
@@ -941,8 +946,21 @@ static int update_nodemask(struct cpuset *cs, char *buf) | |||
941 | retval = nodelist_parse(buf, trialcs.mems_allowed); | 946 | retval = nodelist_parse(buf, trialcs.mems_allowed); |
942 | if (retval < 0) | 947 | if (retval < 0) |
943 | goto done; | 948 | goto done; |
949 | if (!nodes_intersects(trialcs.mems_allowed, | ||
950 | node_states[N_HIGH_MEMORY])) { | ||
951 | /* | ||
952 | * error if only memoryless nodes specified. | ||
953 | */ | ||
954 | retval = -ENOSPC; | ||
955 | goto done; | ||
956 | } | ||
944 | } | 957 | } |
945 | nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, node_online_map); | 958 | /* |
959 | * Exclude memoryless nodes. We know that trialcs.mems_allowed | ||
960 | * contains at least one node with memory. | ||
961 | */ | ||
962 | nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, | ||
963 | node_states[N_HIGH_MEMORY]); | ||
946 | oldmem = cs->mems_allowed; | 964 | oldmem = cs->mems_allowed; |
947 | if (nodes_equal(oldmem, trialcs.mems_allowed)) { | 965 | if (nodes_equal(oldmem, trialcs.mems_allowed)) { |
948 | retval = 0; /* Too easy - nothing to do */ | 966 | retval = 0; /* Too easy - nothing to do */ |
@@ -2098,8 +2116,9 @@ static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur) | |||
2098 | 2116 | ||
2099 | /* | 2117 | /* |
2100 | * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track | 2118 | * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track |
2101 | * cpu_online_map and node_online_map. Force the top cpuset to track | 2119 | * cpu_online_map and node_states[N_HIGH_MEMORY]. Force the top cpuset to |
2102 | * whats online after any CPU or memory node hotplug or unplug event. | 2120 | * track what's online after any CPU or memory node hotplug or unplug |
2121 | * event. | ||
2103 | * | 2122 | * |
2104 | * To ensure that we don't remove a CPU or node from the top cpuset | 2123 | * To ensure that we don't remove a CPU or node from the top cpuset |
2105 | * that is currently in use by a child cpuset (which would violate | 2124 | * that is currently in use by a child cpuset (which would violate |
@@ -2119,7 +2138,7 @@ static void common_cpu_mem_hotplug_unplug(void) | |||
2119 | 2138 | ||
2120 | guarantee_online_cpus_mems_in_subtree(&top_cpuset); | 2139 | guarantee_online_cpus_mems_in_subtree(&top_cpuset); |
2121 | top_cpuset.cpus_allowed = cpu_online_map; | 2140 | top_cpuset.cpus_allowed = cpu_online_map; |
2122 | top_cpuset.mems_allowed = node_online_map; | 2141 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; |
2123 | 2142 | ||
2124 | mutex_unlock(&callback_mutex); | 2143 | mutex_unlock(&callback_mutex); |
2125 | mutex_unlock(&manage_mutex); | 2144 | mutex_unlock(&manage_mutex); |
@@ -2147,8 +2166,9 @@ static int cpuset_handle_cpuhp(struct notifier_block *nb, | |||
2147 | 2166 | ||
2148 | #ifdef CONFIG_MEMORY_HOTPLUG | 2167 | #ifdef CONFIG_MEMORY_HOTPLUG |
2149 | /* | 2168 | /* |
2150 | * Keep top_cpuset.mems_allowed tracking node_online_map. | 2169 | * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY]. |
2151 | * Call this routine anytime after you change node_online_map. | 2170 | * Call this routine anytime after you change |
2171 | * node_states[N_HIGH_MEMORY]. | ||
2152 | * See also the previous routine cpuset_handle_cpuhp(). | 2172 | * See also the previous routine cpuset_handle_cpuhp(). |
2153 | */ | 2173 | */ |
2154 | 2174 | ||
@@ -2167,7 +2187,7 @@ void cpuset_track_online_nodes(void) | |||
2167 | void __init cpuset_init_smp(void) | 2187 | void __init cpuset_init_smp(void) |
2168 | { | 2188 | { |
2169 | top_cpuset.cpus_allowed = cpu_online_map; | 2189 | top_cpuset.cpus_allowed = cpu_online_map; |
2170 | top_cpuset.mems_allowed = node_online_map; | 2190 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; |
2171 | 2191 | ||
2172 | hotcpu_notifier(cpuset_handle_cpuhp, 0); | 2192 | hotcpu_notifier(cpuset_handle_cpuhp, 0); |
2173 | } | 2193 | } |
@@ -2309,7 +2329,7 @@ void cpuset_init_current_mems_allowed(void) | |||
2309 | * | 2329 | * |
2310 | * Description: Returns the nodemask_t mems_allowed of the cpuset | 2330 | * Description: Returns the nodemask_t mems_allowed of the cpuset |
2311 | * attached to the specified @tsk. Guaranteed to return some non-empty | 2331 | * attached to the specified @tsk. Guaranteed to return some non-empty |
2312 | * subset of node_online_map, even if this means going outside the | 2332 | * subset of node_states[N_HIGH_MEMORY], even if this means going outside the |
2313 | * tasks cpuset. | 2333 | * tasks cpuset. |
2314 | **/ | 2334 | **/ |
2315 | 2335 | ||