diff options
Diffstat (limited to 'kernel/cpuset.c')
| -rw-r--r-- | kernel/cpuset.c | 106 |
1 files changed, 93 insertions, 13 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 1b32c2c04c15..8c3c400cce91 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -240,7 +240,7 @@ static struct super_block *cpuset_sb; | |||
| 240 | * A cpuset can only be deleted if both its 'count' of using tasks | 240 | * A cpuset can only be deleted if both its 'count' of using tasks |
| 241 | * is zero, and its list of 'children' cpusets is empty. Since all | 241 | * is zero, and its list of 'children' cpusets is empty. Since all |
| 242 | * tasks in the system use _some_ cpuset, and since there is always at | 242 | * tasks in the system use _some_ cpuset, and since there is always at |
| 243 | * least one task in the system (init, pid == 1), therefore, top_cpuset | 243 | * least one task in the system (init), therefore, top_cpuset |
| 244 | * always has either children cpusets and/or using tasks. So we don't | 244 | * always has either children cpusets and/or using tasks. So we don't |
| 245 | * need a special hack to ensure that top_cpuset cannot be deleted. | 245 | * need a special hack to ensure that top_cpuset cannot be deleted. |
| 246 | * | 246 | * |
| @@ -912,6 +912,10 @@ static int update_nodemask(struct cpuset *cs, char *buf) | |||
| 912 | int fudge; | 912 | int fudge; |
| 913 | int retval; | 913 | int retval; |
| 914 | 914 | ||
| 915 | /* top_cpuset.mems_allowed tracks node_online_map; it's read-only */ | ||
| 916 | if (cs == &top_cpuset) | ||
| 917 | return -EACCES; | ||
| 918 | |||
| 915 | trialcs = *cs; | 919 | trialcs = *cs; |
| 916 | retval = nodelist_parse(buf, trialcs.mems_allowed); | 920 | retval = nodelist_parse(buf, trialcs.mems_allowed); |
| 917 | if (retval < 0) | 921 | if (retval < 0) |
| @@ -1221,7 +1225,12 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) | |||
| 1221 | 1225 | ||
| 1222 | task_lock(tsk); | 1226 | task_lock(tsk); |
| 1223 | oldcs = tsk->cpuset; | 1227 | oldcs = tsk->cpuset; |
| 1224 | if (!oldcs) { | 1228 | /* |
| 1229 | * After getting 'oldcs' cpuset ptr, be sure still not exiting. | ||
| 1230 | * If 'oldcs' might be the top_cpuset due to the_top_cpuset_hack | ||
| 1231 | * then fail this attach_task(), to avoid breaking top_cpuset.count. | ||
| 1232 | */ | ||
| 1233 | if (tsk->flags & PF_EXITING) { | ||
| 1225 | task_unlock(tsk); | 1234 | task_unlock(tsk); |
| 1226 | mutex_unlock(&callback_mutex); | 1235 | mutex_unlock(&callback_mutex); |
| 1227 | put_task_struct(tsk); | 1236 | put_task_struct(tsk); |
| @@ -2036,33 +2045,104 @@ out: | |||
| 2036 | return err; | 2045 | return err; |
| 2037 | } | 2046 | } |
| 2038 | 2047 | ||
| 2048 | #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG) | ||
| 2039 | /* | 2049 | /* |
| 2040 | * The top_cpuset tracks what CPUs and Memory Nodes are online, | 2050 | * If common_cpu_mem_hotplug_unplug(), below, unplugs any CPUs |
| 2041 | * period. This is necessary in order to make cpusets transparent | 2051 | * or memory nodes, we need to walk over the cpuset hierarchy, |
| 2042 | * (of no affect) on systems that are actively using CPU hotplug | 2052 | * removing that CPU or node from all cpusets. If this removes the |
| 2043 | * but making no active use of cpusets. | 2053 | * last CPU or node from a cpuset, then the guarantee_online_cpus() |
| 2044 | * | 2054 | * or guarantee_online_mems() code will use that emptied cpusets |
| 2045 | * This handles CPU hotplug (cpuhp) events. If someday Memory | 2055 | * parent online CPUs or nodes. Cpusets that were already empty of |
| 2046 | * Nodes can be hotplugged (dynamically changing node_online_map) | 2056 | * CPUs or nodes are left empty. |
| 2047 | * then we should handle that too, perhaps in a similar way. | 2057 | * |
| 2058 | * This routine is intentionally inefficient in a couple of regards. | ||
| 2059 | * It will check all cpusets in a subtree even if the top cpuset of | ||
| 2060 | * the subtree has no offline CPUs or nodes. It checks both CPUs and | ||
| 2061 | * nodes, even though the caller could have been coded to know that | ||
| 2062 | * only one of CPUs or nodes needed to be checked on a given call. | ||
| 2063 | * This was done to minimize text size rather than cpu cycles. | ||
| 2064 | * | ||
| 2065 | * Call with both manage_mutex and callback_mutex held. | ||
| 2066 | * | ||
| 2067 | * Recursive, on depth of cpuset subtree. | ||
| 2048 | */ | 2068 | */ |
| 2049 | 2069 | ||
| 2050 | #ifdef CONFIG_HOTPLUG_CPU | 2070 | static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur) |
| 2051 | static int cpuset_handle_cpuhp(struct notifier_block *nb, | 2071 | { |
| 2052 | unsigned long phase, void *cpu) | 2072 | struct cpuset *c; |
| 2073 | |||
| 2074 | /* Each of our child cpusets mems must be online */ | ||
| 2075 | list_for_each_entry(c, &cur->children, sibling) { | ||
| 2076 | guarantee_online_cpus_mems_in_subtree(c); | ||
| 2077 | if (!cpus_empty(c->cpus_allowed)) | ||
| 2078 | guarantee_online_cpus(c, &c->cpus_allowed); | ||
| 2079 | if (!nodes_empty(c->mems_allowed)) | ||
| 2080 | guarantee_online_mems(c, &c->mems_allowed); | ||
| 2081 | } | ||
| 2082 | } | ||
| 2083 | |||
| 2084 | /* | ||
| 2085 | * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track | ||
| 2086 | * cpu_online_map and node_online_map. Force the top cpuset to track | ||
| 2087 | * whats online after any CPU or memory node hotplug or unplug event. | ||
| 2088 | * | ||
| 2089 | * To ensure that we don't remove a CPU or node from the top cpuset | ||
| 2090 | * that is currently in use by a child cpuset (which would violate | ||
| 2091 | * the rule that cpusets must be subsets of their parent), we first | ||
| 2092 | * call the recursive routine guarantee_online_cpus_mems_in_subtree(). | ||
| 2093 | * | ||
| 2094 | * Since there are two callers of this routine, one for CPU hotplug | ||
| 2095 | * events and one for memory node hotplug events, we could have coded | ||
| 2096 | * two separate routines here. We code it as a single common routine | ||
| 2097 | * in order to minimize text size. | ||
| 2098 | */ | ||
| 2099 | |||
| 2100 | static void common_cpu_mem_hotplug_unplug(void) | ||
| 2053 | { | 2101 | { |
| 2054 | mutex_lock(&manage_mutex); | 2102 | mutex_lock(&manage_mutex); |
| 2055 | mutex_lock(&callback_mutex); | 2103 | mutex_lock(&callback_mutex); |
| 2056 | 2104 | ||
| 2105 | guarantee_online_cpus_mems_in_subtree(&top_cpuset); | ||
| 2057 | top_cpuset.cpus_allowed = cpu_online_map; | 2106 | top_cpuset.cpus_allowed = cpu_online_map; |
| 2107 | top_cpuset.mems_allowed = node_online_map; | ||
| 2058 | 2108 | ||
| 2059 | mutex_unlock(&callback_mutex); | 2109 | mutex_unlock(&callback_mutex); |
| 2060 | mutex_unlock(&manage_mutex); | 2110 | mutex_unlock(&manage_mutex); |
| 2111 | } | ||
| 2112 | #endif | ||
| 2113 | |||
| 2114 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 2115 | /* | ||
| 2116 | * The top_cpuset tracks what CPUs and Memory Nodes are online, | ||
| 2117 | * period. This is necessary in order to make cpusets transparent | ||
| 2118 | * (of no affect) on systems that are actively using CPU hotplug | ||
| 2119 | * but making no active use of cpusets. | ||
| 2120 | * | ||
| 2121 | * This routine ensures that top_cpuset.cpus_allowed tracks | ||
| 2122 | * cpu_online_map on each CPU hotplug (cpuhp) event. | ||
| 2123 | */ | ||
| 2061 | 2124 | ||
| 2125 | static int cpuset_handle_cpuhp(struct notifier_block *nb, | ||
| 2126 | unsigned long phase, void *cpu) | ||
| 2127 | { | ||
| 2128 | common_cpu_mem_hotplug_unplug(); | ||
| 2062 | return 0; | 2129 | return 0; |
| 2063 | } | 2130 | } |
| 2064 | #endif | 2131 | #endif |
| 2065 | 2132 | ||
| 2133 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
| 2134 | /* | ||
| 2135 | * Keep top_cpuset.mems_allowed tracking node_online_map. | ||
| 2136 | * Call this routine anytime after you change node_online_map. | ||
| 2137 | * See also the previous routine cpuset_handle_cpuhp(). | ||
| 2138 | */ | ||
| 2139 | |||
| 2140 | void cpuset_track_online_nodes() | ||
| 2141 | { | ||
| 2142 | common_cpu_mem_hotplug_unplug(); | ||
| 2143 | } | ||
| 2144 | #endif | ||
| 2145 | |||
| 2066 | /** | 2146 | /** |
| 2067 | * cpuset_init_smp - initialize cpus_allowed | 2147 | * cpuset_init_smp - initialize cpus_allowed |
| 2068 | * | 2148 | * |
