aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2007-10-16 04:25:38 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-16 12:42:59 -0400
commit0e1e7c7a739562a321fda07c7cd2a97a7114f8f8 (patch)
treef2148e5b667152681625c19cf8b2a556500994ea /kernel/cpuset.c
parent523b945855a1427000ffc707c610abe5947ae607 (diff)
Memoryless nodes: Use N_HIGH_MEMORY for cpusets
cpusets try to ensure that any node added to a cpuset's mems_allowed is on-line and contains memory. The assumption was that online nodes contained memory. Thus, it is possible to add memoryless nodes to a cpuset and then add tasks to this cpuset. This results in continuous series of oom-kill and apparent system hang. Change cpusets to use node_states[N_HIGH_MEMORY] [a.k.a. node_memory_map] in place of node_online_map when vetting memories. Return error if admin attempts to write a non-empty mems_allowed node mask containing only memoryless-nodes. Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Signed-off-by: Bob Picco <bob.picco@hp.com> Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Mel Gorman <mel@skynet.ie> Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c56
1 files changed, 38 insertions, 18 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 57e6448b171e..8b2daac4de83 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -581,26 +581,28 @@ static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask)
581 581
582/* 582/*
583 * Return in *pmask the portion of a cpusets's mems_allowed that 583 * Return in *pmask the portion of a cpusets's mems_allowed that
584 * are online. If none are online, walk up the cpuset hierarchy 584 * are online, with memory. If none are online with memory, walk
585 * until we find one that does have some online mems. If we get 585 * up the cpuset hierarchy until we find one that does have some
586 * all the way to the top and still haven't found any online mems, 586 * online mems. If we get all the way to the top and still haven't
587 * return node_online_map. 587 * found any online mems, return node_states[N_HIGH_MEMORY].
588 * 588 *
589 * One way or another, we guarantee to return some non-empty subset 589 * One way or another, we guarantee to return some non-empty subset
590 * of node_online_map. 590 * of node_states[N_HIGH_MEMORY].
591 * 591 *
592 * Call with callback_mutex held. 592 * Call with callback_mutex held.
593 */ 593 */
594 594
595static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) 595static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
596{ 596{
597 while (cs && !nodes_intersects(cs->mems_allowed, node_online_map)) 597 while (cs && !nodes_intersects(cs->mems_allowed,
598 node_states[N_HIGH_MEMORY]))
598 cs = cs->parent; 599 cs = cs->parent;
599 if (cs) 600 if (cs)
600 nodes_and(*pmask, cs->mems_allowed, node_online_map); 601 nodes_and(*pmask, cs->mems_allowed,
602 node_states[N_HIGH_MEMORY]);
601 else 603 else
602 *pmask = node_online_map; 604 *pmask = node_states[N_HIGH_MEMORY];
603 BUG_ON(!nodes_intersects(*pmask, node_online_map)); 605 BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY]));
604} 606}
605 607
606/** 608/**
@@ -924,7 +926,10 @@ static int update_nodemask(struct cpuset *cs, char *buf)
924 int fudge; 926 int fudge;
925 int retval; 927 int retval;
926 928
927 /* top_cpuset.mems_allowed tracks node_online_map; it's read-only */ 929 /*
930 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
931 * it's read-only
932 */
928 if (cs == &top_cpuset) 933 if (cs == &top_cpuset)
929 return -EACCES; 934 return -EACCES;
930 935
@@ -941,8 +946,21 @@ static int update_nodemask(struct cpuset *cs, char *buf)
941 retval = nodelist_parse(buf, trialcs.mems_allowed); 946 retval = nodelist_parse(buf, trialcs.mems_allowed);
942 if (retval < 0) 947 if (retval < 0)
943 goto done; 948 goto done;
949 if (!nodes_intersects(trialcs.mems_allowed,
950 node_states[N_HIGH_MEMORY])) {
951 /*
952 * error if only memoryless nodes specified.
953 */
954 retval = -ENOSPC;
955 goto done;
956 }
944 } 957 }
945 nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, node_online_map); 958 /*
959 * Exclude memoryless nodes. We know that trialcs.mems_allowed
960 * contains at least one node with memory.
961 */
962 nodes_and(trialcs.mems_allowed, trialcs.mems_allowed,
963 node_states[N_HIGH_MEMORY]);
946 oldmem = cs->mems_allowed; 964 oldmem = cs->mems_allowed;
947 if (nodes_equal(oldmem, trialcs.mems_allowed)) { 965 if (nodes_equal(oldmem, trialcs.mems_allowed)) {
948 retval = 0; /* Too easy - nothing to do */ 966 retval = 0; /* Too easy - nothing to do */
@@ -2098,8 +2116,9 @@ static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur)
2098 2116
2099/* 2117/*
2100 * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track 2118 * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track
2101 * cpu_online_map and node_online_map. Force the top cpuset to track 2119 * cpu_online_map and node_states[N_HIGH_MEMORY]. Force the top cpuset to
2102 * whats online after any CPU or memory node hotplug or unplug event. 2120 * track what's online after any CPU or memory node hotplug or unplug
2121 * event.
2103 * 2122 *
2104 * To ensure that we don't remove a CPU or node from the top cpuset 2123 * To ensure that we don't remove a CPU or node from the top cpuset
2105 * that is currently in use by a child cpuset (which would violate 2124 * that is currently in use by a child cpuset (which would violate
@@ -2119,7 +2138,7 @@ static void common_cpu_mem_hotplug_unplug(void)
2119 2138
2120 guarantee_online_cpus_mems_in_subtree(&top_cpuset); 2139 guarantee_online_cpus_mems_in_subtree(&top_cpuset);
2121 top_cpuset.cpus_allowed = cpu_online_map; 2140 top_cpuset.cpus_allowed = cpu_online_map;
2122 top_cpuset.mems_allowed = node_online_map; 2141 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
2123 2142
2124 mutex_unlock(&callback_mutex); 2143 mutex_unlock(&callback_mutex);
2125 mutex_unlock(&manage_mutex); 2144 mutex_unlock(&manage_mutex);
@@ -2147,8 +2166,9 @@ static int cpuset_handle_cpuhp(struct notifier_block *nb,
2147 2166
2148#ifdef CONFIG_MEMORY_HOTPLUG 2167#ifdef CONFIG_MEMORY_HOTPLUG
2149/* 2168/*
2150 * Keep top_cpuset.mems_allowed tracking node_online_map. 2169 * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY].
2151 * Call this routine anytime after you change node_online_map. 2170 * Call this routine anytime after you change
2171 * node_states[N_HIGH_MEMORY].
2152 * See also the previous routine cpuset_handle_cpuhp(). 2172 * See also the previous routine cpuset_handle_cpuhp().
2153 */ 2173 */
2154 2174
@@ -2167,7 +2187,7 @@ void cpuset_track_online_nodes(void)
2167void __init cpuset_init_smp(void) 2187void __init cpuset_init_smp(void)
2168{ 2188{
2169 top_cpuset.cpus_allowed = cpu_online_map; 2189 top_cpuset.cpus_allowed = cpu_online_map;
2170 top_cpuset.mems_allowed = node_online_map; 2190 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
2171 2191
2172 hotcpu_notifier(cpuset_handle_cpuhp, 0); 2192 hotcpu_notifier(cpuset_handle_cpuhp, 0);
2173} 2193}
@@ -2309,7 +2329,7 @@ void cpuset_init_current_mems_allowed(void)
2309 * 2329 *
2310 * Description: Returns the nodemask_t mems_allowed of the cpuset 2330 * Description: Returns the nodemask_t mems_allowed of the cpuset
2311 * attached to the specified @tsk. Guaranteed to return some non-empty 2331 * attached to the specified @tsk. Guaranteed to return some non-empty
2312 * subset of node_online_map, even if this means going outside the 2332 * subset of node_states[N_HIGH_MEMORY], even if this means going outside the
2313 * tasks cpuset. 2333 * tasks cpuset.
2314 **/ 2334 **/
2315 2335