aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul Jackson <pj@sgi.com>2006-09-29 05:01:17 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-09-29 12:18:21 -0400
commitb1aac8bb824c658ddebd296b088a8bff5029c288 (patch)
tree4b19b71fac4f769b7be7cff97ac7f6445d14d236 /kernel
parent38837fc75acb7fa9b0e111b0241fe4fe76c5d4b3 (diff)
[PATCH] cpuset: hotunplug cpus and mems in all cpusets
The cpuset code handling hot unplug of CPUs or Memory Nodes was incorrect - it could remove a CPU or Node from the top cpuset, while leaving it still in some child cpusets. One basic rule of cpusets is that each cpusets cpus and mems are subsets of its parents. The cpuset hot unplug code violated this rule. So the cpuset hotunplug handler must walk down the tree, removing any removed CPU or Node from all cpusets. However, it is not allowed to make a cpusets cpus or mems become empty. They can only transition from empty to non-empty, not back. So if the last CPU or Node would be removed from a cpuset by the above walk, we scan back up the cpuset hierarchy, finding the nearest ancestor that still has something online, and copy its CPU or Memory placement. Signed-off-by: Paul Jackson <pj@sgi.com> Cc: Nathan Lynch <ntl@pobox.com> Cc: Anton Blanchard <anton@samba.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpuset.c87
1 files changed, 70 insertions, 17 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 794af5024c2f..cc0395d7eba1 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2040,6 +2040,73 @@ out:
2040 return err; 2040 return err;
2041} 2041}
2042 2042
2043#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG)
2044/*
2045 * If common_cpu_mem_hotplug_unplug(), below, unplugs any CPUs
2046 * or memory nodes, we need to walk over the cpuset hierarchy,
2047 * removing that CPU or node from all cpusets. If this removes the
2048 * last CPU or node from a cpuset, then the guarantee_online_cpus()
2049 * or guarantee_online_mems() code will use that emptied cpusets
2050 * parent online CPUs or nodes. Cpusets that were already empty of
2051 * CPUs or nodes are left empty.
2052 *
2053 * This routine is intentionally inefficient in a couple of regards.
2054 * It will check all cpusets in a subtree even if the top cpuset of
2055 * the subtree has no offline CPUs or nodes. It checks both CPUs and
2056 * nodes, even though the caller could have been coded to know that
2057 * only one of CPUs or nodes needed to be checked on a given call.
2058 * This was done to minimize text size rather than cpu cycles.
2059 *
2060 * Call with both manage_mutex and callback_mutex held.
2061 *
2062 * Recursive, on depth of cpuset subtree.
2063 */
2064
2065static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur)
2066{
2067 struct cpuset *c;
2068
2069 /* Each of our child cpusets mems must be online */
2070 list_for_each_entry(c, &cur->children, sibling) {
2071 guarantee_online_cpus_mems_in_subtree(c);
2072 if (!cpus_empty(c->cpus_allowed))
2073 guarantee_online_cpus(c, &c->cpus_allowed);
2074 if (!nodes_empty(c->mems_allowed))
2075 guarantee_online_mems(c, &c->mems_allowed);
2076 }
2077}
2078
2079/*
2080 * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track
2081 * cpu_online_map and node_online_map. Force the top cpuset to track
2082 * whats online after any CPU or memory node hotplug or unplug event.
2083 *
2084 * To ensure that we don't remove a CPU or node from the top cpuset
2085 * that is currently in use by a child cpuset (which would violate
2086 * the rule that cpusets must be subsets of their parent), we first
2087 * call the recursive routine guarantee_online_cpus_mems_in_subtree().
2088 *
2089 * Since there are two callers of this routine, one for CPU hotplug
2090 * events and one for memory node hotplug events, we could have coded
2091 * two separate routines here. We code it as a single common routine
2092 * in order to minimize text size.
2093 */
2094
2095static void common_cpu_mem_hotplug_unplug(void)
2096{
2097 mutex_lock(&manage_mutex);
2098 mutex_lock(&callback_mutex);
2099
2100 guarantee_online_cpus_mems_in_subtree(&top_cpuset);
2101 top_cpuset.cpus_allowed = cpu_online_map;
2102 top_cpuset.mems_allowed = node_online_map;
2103
2104 mutex_unlock(&callback_mutex);
2105 mutex_unlock(&manage_mutex);
2106}
2107#endif
2108
2109#ifdef CONFIG_HOTPLUG_CPU
2043/* 2110/*
2044 * The top_cpuset tracks what CPUs and Memory Nodes are online, 2111 * The top_cpuset tracks what CPUs and Memory Nodes are online,
2045 * period. This is necessary in order to make cpusets transparent 2112 * period. This is necessary in order to make cpusets transparent
@@ -2050,38 +2117,24 @@ out:
2050 * cpu_online_map on each CPU hotplug (cpuhp) event. 2117 * cpu_online_map on each CPU hotplug (cpuhp) event.
2051 */ 2118 */
2052 2119
2053#ifdef CONFIG_HOTPLUG_CPU
2054static int cpuset_handle_cpuhp(struct notifier_block *nb, 2120static int cpuset_handle_cpuhp(struct notifier_block *nb,
2055 unsigned long phase, void *cpu) 2121 unsigned long phase, void *cpu)
2056{ 2122{
2057 mutex_lock(&manage_mutex); 2123 common_cpu_mem_hotplug_unplug();
2058 mutex_lock(&callback_mutex);
2059
2060 top_cpuset.cpus_allowed = cpu_online_map;
2061
2062 mutex_unlock(&callback_mutex);
2063 mutex_unlock(&manage_mutex);
2064
2065 return 0; 2124 return 0;
2066} 2125}
2067#endif 2126#endif
2068 2127
2128#ifdef CONFIG_MEMORY_HOTPLUG
2069/* 2129/*
2070 * Keep top_cpuset.mems_allowed tracking node_online_map. 2130 * Keep top_cpuset.mems_allowed tracking node_online_map.
2071 * Call this routine anytime after you change node_online_map. 2131 * Call this routine anytime after you change node_online_map.
2072 * See also the previous routine cpuset_handle_cpuhp(). 2132 * See also the previous routine cpuset_handle_cpuhp().
2073 */ 2133 */
2074 2134
2075#ifdef CONFIG_MEMORY_HOTPLUG
2076void cpuset_track_online_nodes() 2135void cpuset_track_online_nodes()
2077{ 2136{
2078 mutex_lock(&manage_mutex); 2137 common_cpu_mem_hotplug_unplug();
2079 mutex_lock(&callback_mutex);
2080
2081 top_cpuset.mems_allowed = node_online_map;
2082
2083 mutex_unlock(&callback_mutex);
2084 mutex_unlock(&manage_mutex);
2085} 2138}
2086#endif 2139#endif
2087 2140