aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c113
1 files changed, 96 insertions, 17 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 4ea6f0dc2fc5..8c3c400cce91 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -240,7 +240,7 @@ static struct super_block *cpuset_sb;
240 * A cpuset can only be deleted if both its 'count' of using tasks 240 * A cpuset can only be deleted if both its 'count' of using tasks
241 * is zero, and its list of 'children' cpusets is empty. Since all 241 * is zero, and its list of 'children' cpusets is empty. Since all
242 * tasks in the system use _some_ cpuset, and since there is always at 242 * tasks in the system use _some_ cpuset, and since there is always at
243 * least one task in the system (init, pid == 1), therefore, top_cpuset 243 * least one task in the system (init), therefore, top_cpuset
244 * always has either children cpusets and/or using tasks. So we don't 244 * always has either children cpusets and/or using tasks. So we don't
245 * need a special hack to ensure that top_cpuset cannot be deleted. 245 * need a special hack to ensure that top_cpuset cannot be deleted.
246 * 246 *
@@ -289,7 +289,6 @@ static struct inode *cpuset_new_inode(mode_t mode)
289 inode->i_mode = mode; 289 inode->i_mode = mode;
290 inode->i_uid = current->fsuid; 290 inode->i_uid = current->fsuid;
291 inode->i_gid = current->fsgid; 291 inode->i_gid = current->fsgid;
292 inode->i_blksize = PAGE_CACHE_SIZE;
293 inode->i_blocks = 0; 292 inode->i_blocks = 0;
294 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 293 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
295 inode->i_mapping->backing_dev_info = &cpuset_backing_dev_info; 294 inode->i_mapping->backing_dev_info = &cpuset_backing_dev_info;
@@ -913,6 +912,10 @@ static int update_nodemask(struct cpuset *cs, char *buf)
913 int fudge; 912 int fudge;
914 int retval; 913 int retval;
915 914
915 /* top_cpuset.mems_allowed tracks node_online_map; it's read-only */
916 if (cs == &top_cpuset)
917 return -EACCES;
918
916 trialcs = *cs; 919 trialcs = *cs;
917 retval = nodelist_parse(buf, trialcs.mems_allowed); 920 retval = nodelist_parse(buf, trialcs.mems_allowed);
918 if (retval < 0) 921 if (retval < 0)
@@ -1222,7 +1225,12 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
1222 1225
1223 task_lock(tsk); 1226 task_lock(tsk);
1224 oldcs = tsk->cpuset; 1227 oldcs = tsk->cpuset;
1225 if (!oldcs) { 1228 /*
1229 * After getting 'oldcs' cpuset ptr, be sure still not exiting.
1230 * If 'oldcs' might be the top_cpuset due to the_top_cpuset_hack
1231 * then fail this attach_task(), to avoid breaking top_cpuset.count.
1232 */
1233 if (tsk->flags & PF_EXITING) {
1226 task_unlock(tsk); 1234 task_unlock(tsk);
1227 mutex_unlock(&callback_mutex); 1235 mutex_unlock(&callback_mutex);
1228 put_task_struct(tsk); 1236 put_task_struct(tsk);
@@ -2037,33 +2045,104 @@ out:
2037 return err; 2045 return err;
2038} 2046}
2039 2047
2048#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG)
2040/* 2049/*
2041 * The top_cpuset tracks what CPUs and Memory Nodes are online, 2050 * If common_cpu_mem_hotplug_unplug(), below, unplugs any CPUs
2042 * period. This is necessary in order to make cpusets transparent 2051 * or memory nodes, we need to walk over the cpuset hierarchy,
2043 * (of no affect) on systems that are actively using CPU hotplug 2052 * removing that CPU or node from all cpusets. If this removes the
2044 * but making no active use of cpusets. 2053 * last CPU or node from a cpuset, then the guarantee_online_cpus()
2045 * 2054 * or guarantee_online_mems() code will use that emptied cpusets
2046 * This handles CPU hotplug (cpuhp) events. If someday Memory 2055 * parent online CPUs or nodes. Cpusets that were already empty of
2047 * Nodes can be hotplugged (dynamically changing node_online_map) 2056 * CPUs or nodes are left empty.
2048 * then we should handle that too, perhaps in a similar way. 2057 *
2058 * This routine is intentionally inefficient in a couple of regards.
2059 * It will check all cpusets in a subtree even if the top cpuset of
2060 * the subtree has no offline CPUs or nodes. It checks both CPUs and
2061 * nodes, even though the caller could have been coded to know that
2062 * only one of CPUs or nodes needed to be checked on a given call.
2063 * This was done to minimize text size rather than cpu cycles.
2064 *
2065 * Call with both manage_mutex and callback_mutex held.
2066 *
2067 * Recursive, on depth of cpuset subtree.
2049 */ 2068 */
2050 2069
2051#ifdef CONFIG_HOTPLUG_CPU 2070static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur)
2052static int cpuset_handle_cpuhp(struct notifier_block *nb, 2071{
2053 unsigned long phase, void *cpu) 2072 struct cpuset *c;
2073
2074 /* Each of our child cpusets mems must be online */
2075 list_for_each_entry(c, &cur->children, sibling) {
2076 guarantee_online_cpus_mems_in_subtree(c);
2077 if (!cpus_empty(c->cpus_allowed))
2078 guarantee_online_cpus(c, &c->cpus_allowed);
2079 if (!nodes_empty(c->mems_allowed))
2080 guarantee_online_mems(c, &c->mems_allowed);
2081 }
2082}
2083
2084/*
2085 * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track
2086 * cpu_online_map and node_online_map. Force the top cpuset to track
2087 * whats online after any CPU or memory node hotplug or unplug event.
2088 *
2089 * To ensure that we don't remove a CPU or node from the top cpuset
2090 * that is currently in use by a child cpuset (which would violate
2091 * the rule that cpusets must be subsets of their parent), we first
2092 * call the recursive routine guarantee_online_cpus_mems_in_subtree().
2093 *
2094 * Since there are two callers of this routine, one for CPU hotplug
2095 * events and one for memory node hotplug events, we could have coded
2096 * two separate routines here. We code it as a single common routine
2097 * in order to minimize text size.
2098 */
2099
2100static void common_cpu_mem_hotplug_unplug(void)
2054{ 2101{
2055 mutex_lock(&manage_mutex); 2102 mutex_lock(&manage_mutex);
2056 mutex_lock(&callback_mutex); 2103 mutex_lock(&callback_mutex);
2057 2104
2105 guarantee_online_cpus_mems_in_subtree(&top_cpuset);
2058 top_cpuset.cpus_allowed = cpu_online_map; 2106 top_cpuset.cpus_allowed = cpu_online_map;
2107 top_cpuset.mems_allowed = node_online_map;
2059 2108
2060 mutex_unlock(&callback_mutex); 2109 mutex_unlock(&callback_mutex);
2061 mutex_unlock(&manage_mutex); 2110 mutex_unlock(&manage_mutex);
2111}
2112#endif
2113
2114#ifdef CONFIG_HOTPLUG_CPU
2115/*
2116 * The top_cpuset tracks what CPUs and Memory Nodes are online,
2117 * period. This is necessary in order to make cpusets transparent
2118 * (of no affect) on systems that are actively using CPU hotplug
2119 * but making no active use of cpusets.
2120 *
2121 * This routine ensures that top_cpuset.cpus_allowed tracks
2122 * cpu_online_map on each CPU hotplug (cpuhp) event.
2123 */
2062 2124
2125static int cpuset_handle_cpuhp(struct notifier_block *nb,
2126 unsigned long phase, void *cpu)
2127{
2128 common_cpu_mem_hotplug_unplug();
2063 return 0; 2129 return 0;
2064} 2130}
2065#endif 2131#endif
2066 2132
2133#ifdef CONFIG_MEMORY_HOTPLUG
2134/*
2135 * Keep top_cpuset.mems_allowed tracking node_online_map.
2136 * Call this routine anytime after you change node_online_map.
2137 * See also the previous routine cpuset_handle_cpuhp().
2138 */
2139
2140void cpuset_track_online_nodes()
2141{
2142 common_cpu_mem_hotplug_unplug();
2143}
2144#endif
2145
2067/** 2146/**
2068 * cpuset_init_smp - initialize cpus_allowed 2147 * cpuset_init_smp - initialize cpus_allowed
2069 * 2148 *
@@ -2245,7 +2324,7 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
2245 int i; 2324 int i;
2246 2325
2247 for (i = 0; zl->zones[i]; i++) { 2326 for (i = 0; zl->zones[i]; i++) {
2248 int nid = zl->zones[i]->zone_pgdat->node_id; 2327 int nid = zone_to_nid(zl->zones[i]);
2249 2328
2250 if (node_isset(nid, current->mems_allowed)) 2329 if (node_isset(nid, current->mems_allowed))
2251 return 1; 2330 return 1;
@@ -2316,9 +2395,9 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
2316 const struct cpuset *cs; /* current cpuset ancestors */ 2395 const struct cpuset *cs; /* current cpuset ancestors */
2317 int allowed; /* is allocation in zone z allowed? */ 2396 int allowed; /* is allocation in zone z allowed? */
2318 2397
2319 if (in_interrupt()) 2398 if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
2320 return 1; 2399 return 1;
2321 node = z->zone_pgdat->node_id; 2400 node = zone_to_nid(z);
2322 might_sleep_if(!(gfp_mask & __GFP_HARDWALL)); 2401 might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
2323 if (node_isset(node, current->mems_allowed)) 2402 if (node_isset(node, current->mems_allowed))
2324 return 1; 2403 return 1;