aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2007-10-16 04:25:38 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-16 12:42:59 -0400
commit0e1e7c7a739562a321fda07c7cd2a97a7114f8f8 (patch)
treef2148e5b667152681625c19cf8b2a556500994ea
parent523b945855a1427000ffc707c610abe5947ae607 (diff)
Memoryless nodes: Use N_HIGH_MEMORY for cpusets
cpusets try to ensure that any node added to a cpuset's mems_allowed is on-line and contains memory. The assumption was that online nodes contained memory. Thus, it is possible to add memoryless nodes to a cpuset and then add tasks to this cpuset. This results in continuous series of oom-kill and apparent system hang. Change cpusets to use node_states[N_HIGH_MEMORY] [a.k.a. node_memory_map] in place of node_online_map when vetting memories. Return error if admin attempts to write a non-empty mems_allowed node mask containing only memoryless-nodes. Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Signed-off-by: Bob Picco <bob.picco@hp.com> Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Mel Gorman <mel@skynet.ie> Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/cpusets.txt7
-rw-r--r--include/linux/cpuset.h2
-rw-r--r--kernel/cpuset.c56
3 files changed, 43 insertions, 22 deletions
diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt
index f2c0a6842930..b875d231ac74 100644
--- a/Documentation/cpusets.txt
+++ b/Documentation/cpusets.txt
@@ -35,7 +35,8 @@ CONTENTS:
35---------------------- 35----------------------
36 36
37Cpusets provide a mechanism for assigning a set of CPUs and Memory 37Cpusets provide a mechanism for assigning a set of CPUs and Memory
38Nodes to a set of tasks. 38Nodes to a set of tasks. In this document "Memory Node" refers to
39an on-line node that contains memory.
39 40
40Cpusets constrain the CPU and Memory placement of tasks to only 41Cpusets constrain the CPU and Memory placement of tasks to only
41the resources within a tasks current cpuset. They form a nested 42the resources within a tasks current cpuset. They form a nested
@@ -220,8 +221,8 @@ and name space for cpusets, with a minimum of additional kernel code.
220The cpus and mems files in the root (top_cpuset) cpuset are 221The cpus and mems files in the root (top_cpuset) cpuset are
221read-only. The cpus file automatically tracks the value of 222read-only. The cpus file automatically tracks the value of
222cpu_online_map using a CPU hotplug notifier, and the mems file 223cpu_online_map using a CPU hotplug notifier, and the mems file
223automatically tracks the value of node_online_map using the 224automatically tracks the value of node_states[N_MEMORY]--i.e.,
224cpuset_track_online_nodes() hook. 225nodes with memory--using the cpuset_track_online_nodes() hook.
225 226
226 227
2271.4 What are exclusive cpusets ? 2281.4 What are exclusive cpusets ?
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 826b15e914e2..9e633ea103ce 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -93,7 +93,7 @@ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
93 return node_possible_map; 93 return node_possible_map;
94} 94}
95 95
96#define cpuset_current_mems_allowed (node_online_map) 96#define cpuset_current_mems_allowed (node_states[N_HIGH_MEMORY])
97static inline void cpuset_init_current_mems_allowed(void) {} 97static inline void cpuset_init_current_mems_allowed(void) {}
98static inline void cpuset_update_task_memory_state(void) {} 98static inline void cpuset_update_task_memory_state(void) {}
99#define cpuset_nodes_subset_current_mems_allowed(nodes) (1) 99#define cpuset_nodes_subset_current_mems_allowed(nodes) (1)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 57e6448b171e..8b2daac4de83 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -581,26 +581,28 @@ static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask)
581 581
582/* 582/*
583 * Return in *pmask the portion of a cpusets's mems_allowed that 583 * Return in *pmask the portion of a cpusets's mems_allowed that
584 * are online. If none are online, walk up the cpuset hierarchy 584 * are online, with memory. If none are online with memory, walk
585 * until we find one that does have some online mems. If we get 585 * up the cpuset hierarchy until we find one that does have some
586 * all the way to the top and still haven't found any online mems, 586 * online mems. If we get all the way to the top and still haven't
587 * return node_online_map. 587 * found any online mems, return node_states[N_HIGH_MEMORY].
588 * 588 *
589 * One way or another, we guarantee to return some non-empty subset 589 * One way or another, we guarantee to return some non-empty subset
590 * of node_online_map. 590 * of node_states[N_HIGH_MEMORY].
591 * 591 *
592 * Call with callback_mutex held. 592 * Call with callback_mutex held.
593 */ 593 */
594 594
595static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) 595static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
596{ 596{
597 while (cs && !nodes_intersects(cs->mems_allowed, node_online_map)) 597 while (cs && !nodes_intersects(cs->mems_allowed,
598 node_states[N_HIGH_MEMORY]))
598 cs = cs->parent; 599 cs = cs->parent;
599 if (cs) 600 if (cs)
600 nodes_and(*pmask, cs->mems_allowed, node_online_map); 601 nodes_and(*pmask, cs->mems_allowed,
602 node_states[N_HIGH_MEMORY]);
601 else 603 else
602 *pmask = node_online_map; 604 *pmask = node_states[N_HIGH_MEMORY];
603 BUG_ON(!nodes_intersects(*pmask, node_online_map)); 605 BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY]));
604} 606}
605 607
606/** 608/**
@@ -924,7 +926,10 @@ static int update_nodemask(struct cpuset *cs, char *buf)
924 int fudge; 926 int fudge;
925 int retval; 927 int retval;
926 928
927 /* top_cpuset.mems_allowed tracks node_online_map; it's read-only */ 929 /*
930 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
931 * it's read-only
932 */
928 if (cs == &top_cpuset) 933 if (cs == &top_cpuset)
929 return -EACCES; 934 return -EACCES;
930 935
@@ -941,8 +946,21 @@ static int update_nodemask(struct cpuset *cs, char *buf)
941 retval = nodelist_parse(buf, trialcs.mems_allowed); 946 retval = nodelist_parse(buf, trialcs.mems_allowed);
942 if (retval < 0) 947 if (retval < 0)
943 goto done; 948 goto done;
949 if (!nodes_intersects(trialcs.mems_allowed,
950 node_states[N_HIGH_MEMORY])) {
951 /*
952 * error if only memoryless nodes specified.
953 */
954 retval = -ENOSPC;
955 goto done;
956 }
944 } 957 }
945 nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, node_online_map); 958 /*
959 * Exclude memoryless nodes. We know that trialcs.mems_allowed
960 * contains at least one node with memory.
961 */
962 nodes_and(trialcs.mems_allowed, trialcs.mems_allowed,
963 node_states[N_HIGH_MEMORY]);
946 oldmem = cs->mems_allowed; 964 oldmem = cs->mems_allowed;
947 if (nodes_equal(oldmem, trialcs.mems_allowed)) { 965 if (nodes_equal(oldmem, trialcs.mems_allowed)) {
948 retval = 0; /* Too easy - nothing to do */ 966 retval = 0; /* Too easy - nothing to do */
@@ -2098,8 +2116,9 @@ static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur)
2098 2116
2099/* 2117/*
2100 * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track 2118 * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track
2101 * cpu_online_map and node_online_map. Force the top cpuset to track 2119 * cpu_online_map and node_states[N_HIGH_MEMORY]. Force the top cpuset to
2102 * whats online after any CPU or memory node hotplug or unplug event. 2120 * track what's online after any CPU or memory node hotplug or unplug
2121 * event.
2103 * 2122 *
2104 * To ensure that we don't remove a CPU or node from the top cpuset 2123 * To ensure that we don't remove a CPU or node from the top cpuset
2105 * that is currently in use by a child cpuset (which would violate 2124 * that is currently in use by a child cpuset (which would violate
@@ -2119,7 +2138,7 @@ static void common_cpu_mem_hotplug_unplug(void)
2119 2138
2120 guarantee_online_cpus_mems_in_subtree(&top_cpuset); 2139 guarantee_online_cpus_mems_in_subtree(&top_cpuset);
2121 top_cpuset.cpus_allowed = cpu_online_map; 2140 top_cpuset.cpus_allowed = cpu_online_map;
2122 top_cpuset.mems_allowed = node_online_map; 2141 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
2123 2142
2124 mutex_unlock(&callback_mutex); 2143 mutex_unlock(&callback_mutex);
2125 mutex_unlock(&manage_mutex); 2144 mutex_unlock(&manage_mutex);
@@ -2147,8 +2166,9 @@ static int cpuset_handle_cpuhp(struct notifier_block *nb,
2147 2166
2148#ifdef CONFIG_MEMORY_HOTPLUG 2167#ifdef CONFIG_MEMORY_HOTPLUG
2149/* 2168/*
2150 * Keep top_cpuset.mems_allowed tracking node_online_map. 2169 * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY].
2151 * Call this routine anytime after you change node_online_map. 2170 * Call this routine anytime after you change
2171 * node_states[N_HIGH_MEMORY].
2152 * See also the previous routine cpuset_handle_cpuhp(). 2172 * See also the previous routine cpuset_handle_cpuhp().
2153 */ 2173 */
2154 2174
@@ -2167,7 +2187,7 @@ void cpuset_track_online_nodes(void)
2167void __init cpuset_init_smp(void) 2187void __init cpuset_init_smp(void)
2168{ 2188{
2169 top_cpuset.cpus_allowed = cpu_online_map; 2189 top_cpuset.cpus_allowed = cpu_online_map;
2170 top_cpuset.mems_allowed = node_online_map; 2190 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
2171 2191
2172 hotcpu_notifier(cpuset_handle_cpuhp, 0); 2192 hotcpu_notifier(cpuset_handle_cpuhp, 0);
2173} 2193}
@@ -2309,7 +2329,7 @@ void cpuset_init_current_mems_allowed(void)
2309 * 2329 *
2310 * Description: Returns the nodemask_t mems_allowed of the cpuset 2330 * Description: Returns the nodemask_t mems_allowed of the cpuset
2311 * attached to the specified @tsk. Guaranteed to return some non-empty 2331 * attached to the specified @tsk. Guaranteed to return some non-empty
2312 * subset of node_online_map, even if this means going outside the 2332 * subset of node_states[N_HIGH_MEMORY], even if this means going outside the
2313 * tasks cpuset. 2333 * tasks cpuset.
2314 **/ 2334 **/
2315 2335