diff options
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 130 |
1 files changed, 92 insertions, 38 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 8c8bd652dd12..f33c7153b6d7 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -147,6 +147,12 @@ typedef enum { | |||
147 | CS_SPREAD_SLAB, | 147 | CS_SPREAD_SLAB, |
148 | } cpuset_flagbits_t; | 148 | } cpuset_flagbits_t; |
149 | 149 | ||
150 | /* the type of hotplug event */ | ||
151 | enum hotplug_event { | ||
152 | CPUSET_CPU_OFFLINE, | ||
153 | CPUSET_MEM_OFFLINE, | ||
154 | }; | ||
155 | |||
150 | /* convenient tests for these bits */ | 156 | /* convenient tests for these bits */ |
151 | static inline int is_cpu_exclusive(const struct cpuset *cs) | 157 | static inline int is_cpu_exclusive(const struct cpuset *cs) |
152 | { | 158 | { |
@@ -1990,8 +1996,36 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) | |||
1990 | } | 1996 | } |
1991 | 1997 | ||
1992 | /* | 1998 | /* |
1993 | * Walk the specified cpuset subtree and look for empty cpusets. | 1999 | * Helper function to traverse cpusets. |
1994 | * The tasks of such cpuset must be moved to a parent cpuset. | 2000 | * It can be used to walk the cpuset tree from top to bottom, completing |
2001 | * one layer before dropping down to the next (thus always processing a | ||
2002 | * node before any of its children). | ||
2003 | */ | ||
2004 | static struct cpuset *cpuset_next(struct list_head *queue) | ||
2005 | { | ||
2006 | struct cpuset *cp; | ||
2007 | struct cpuset *child; /* scans child cpusets of cp */ | ||
2008 | struct cgroup *cont; | ||
2009 | |||
2010 | if (list_empty(queue)) | ||
2011 | return NULL; | ||
2012 | |||
2013 | cp = list_first_entry(queue, struct cpuset, stack_list); | ||
2014 | list_del(queue->next); | ||
2015 | list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { | ||
2016 | child = cgroup_cs(cont); | ||
2017 | list_add_tail(&child->stack_list, queue); | ||
2018 | } | ||
2019 | |||
2020 | return cp; | ||
2021 | } | ||
2022 | |||
2023 | |||
2024 | /* | ||
2025 | * Walk the specified cpuset subtree upon a hotplug operation (CPU/Memory | ||
2026 | * online/offline) and update the cpusets accordingly. | ||
2027 | * For regular CPU/Mem hotplug, look for empty cpusets; the tasks of such | ||
2028 | * cpuset must be moved to a parent cpuset. | ||
1995 | * | 2029 | * |
1996 | * Called with cgroup_mutex held. We take callback_mutex to modify | 2030 | * Called with cgroup_mutex held. We take callback_mutex to modify |
1997 | * cpus_allowed and mems_allowed. | 2031 | * cpus_allowed and mems_allowed. |
@@ -2000,50 +2034,61 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) | |||
2000 | * before dropping down to the next. It always processes a node before | 2034 | * before dropping down to the next. It always processes a node before |
2001 | * any of its children. | 2035 | * any of its children. |
2002 | * | 2036 | * |
2003 | * For now, since we lack memory hot unplug, we'll never see a cpuset | 2037 | * In the case of memory hot-unplug, it will remove nodes from N_HIGH_MEMORY |
2004 | * that has tasks along with an empty 'mems'. But if we did see such | 2038 | * if all present pages from a node are offlined. |
2005 | * a cpuset, we'd handle it just like we do if its 'cpus' was empty. | ||
2006 | */ | 2039 | */ |
2007 | static void scan_for_empty_cpusets(struct cpuset *root) | 2040 | static void |
2041 | scan_cpusets_upon_hotplug(struct cpuset *root, enum hotplug_event event) | ||
2008 | { | 2042 | { |
2009 | LIST_HEAD(queue); | 2043 | LIST_HEAD(queue); |
2010 | struct cpuset *cp; /* scans cpusets being updated */ | 2044 | struct cpuset *cp; /* scans cpusets being updated */ |
2011 | struct cpuset *child; /* scans child cpusets of cp */ | ||
2012 | struct cgroup *cont; | ||
2013 | static nodemask_t oldmems; /* protected by cgroup_mutex */ | 2045 | static nodemask_t oldmems; /* protected by cgroup_mutex */ |
2014 | 2046 | ||
2015 | list_add_tail((struct list_head *)&root->stack_list, &queue); | 2047 | list_add_tail((struct list_head *)&root->stack_list, &queue); |
2016 | 2048 | ||
2017 | while (!list_empty(&queue)) { | 2049 | switch (event) { |
2018 | cp = list_first_entry(&queue, struct cpuset, stack_list); | 2050 | case CPUSET_CPU_OFFLINE: |
2019 | list_del(queue.next); | 2051 | while ((cp = cpuset_next(&queue)) != NULL) { |
2020 | list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { | 2052 | |
2021 | child = cgroup_cs(cont); | 2053 | /* Continue past cpusets with all cpus online */ |
2022 | list_add_tail(&child->stack_list, &queue); | 2054 | if (cpumask_subset(cp->cpus_allowed, cpu_active_mask)) |
2055 | continue; | ||
2056 | |||
2057 | /* Remove offline cpus from this cpuset. */ | ||
2058 | mutex_lock(&callback_mutex); | ||
2059 | cpumask_and(cp->cpus_allowed, cp->cpus_allowed, | ||
2060 | cpu_active_mask); | ||
2061 | mutex_unlock(&callback_mutex); | ||
2062 | |||
2063 | /* Move tasks from the empty cpuset to a parent */ | ||
2064 | if (cpumask_empty(cp->cpus_allowed)) | ||
2065 | remove_tasks_in_empty_cpuset(cp); | ||
2066 | else | ||
2067 | update_tasks_cpumask(cp, NULL); | ||
2023 | } | 2068 | } |
2069 | break; | ||
2024 | 2070 | ||
2025 | /* Continue past cpusets with all cpus, mems online */ | 2071 | case CPUSET_MEM_OFFLINE: |
2026 | if (cpumask_subset(cp->cpus_allowed, cpu_active_mask) && | 2072 | while ((cp = cpuset_next(&queue)) != NULL) { |
2027 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) | ||
2028 | continue; | ||
2029 | 2073 | ||
2030 | oldmems = cp->mems_allowed; | 2074 | /* Continue past cpusets with all mems online */ |
2075 | if (nodes_subset(cp->mems_allowed, | ||
2076 | node_states[N_HIGH_MEMORY])) | ||
2077 | continue; | ||
2031 | 2078 | ||
2032 | /* Remove offline cpus and mems from this cpuset. */ | 2079 | oldmems = cp->mems_allowed; |
2033 | mutex_lock(&callback_mutex); | 2080 | |
2034 | cpumask_and(cp->cpus_allowed, cp->cpus_allowed, | 2081 | /* Remove offline mems from this cpuset. */ |
2035 | cpu_active_mask); | 2082 | mutex_lock(&callback_mutex); |
2036 | nodes_and(cp->mems_allowed, cp->mems_allowed, | 2083 | nodes_and(cp->mems_allowed, cp->mems_allowed, |
2037 | node_states[N_HIGH_MEMORY]); | 2084 | node_states[N_HIGH_MEMORY]); |
2038 | mutex_unlock(&callback_mutex); | 2085 | mutex_unlock(&callback_mutex); |
2039 | 2086 | ||
2040 | /* Move tasks from the empty cpuset to a parent */ | 2087 | /* Move tasks from the empty cpuset to a parent */ |
2041 | if (cpumask_empty(cp->cpus_allowed) || | 2088 | if (nodes_empty(cp->mems_allowed)) |
2042 | nodes_empty(cp->mems_allowed)) | 2089 | remove_tasks_in_empty_cpuset(cp); |
2043 | remove_tasks_in_empty_cpuset(cp); | 2090 | else |
2044 | else { | 2091 | update_tasks_nodemask(cp, &oldmems, NULL); |
2045 | update_tasks_cpumask(cp, NULL); | ||
2046 | update_tasks_nodemask(cp, &oldmems, NULL); | ||
2047 | } | 2092 | } |
2048 | } | 2093 | } |
2049 | } | 2094 | } |
@@ -2054,13 +2099,19 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
2054 | * (of no affect) on systems that are actively using CPU hotplug | 2099 | * (of no affect) on systems that are actively using CPU hotplug |
2055 | * but making no active use of cpusets. | 2100 | * but making no active use of cpusets. |
2056 | * | 2101 | * |
2102 | * The only exception to this is suspend/resume, where we don't | ||
2103 | * modify cpusets at all. | ||
2104 | * | ||
2057 | * This routine ensures that top_cpuset.cpus_allowed tracks | 2105 | * This routine ensures that top_cpuset.cpus_allowed tracks |
2058 | * cpu_active_mask on each CPU hotplug (cpuhp) event. | 2106 | * cpu_active_mask on each CPU hotplug (cpuhp) event. |
2059 | * | 2107 | * |
2060 | * Called within get_online_cpus(). Needs to call cgroup_lock() | 2108 | * Called within get_online_cpus(). Needs to call cgroup_lock() |
2061 | * before calling generate_sched_domains(). | 2109 | * before calling generate_sched_domains(). |
2110 | * | ||
2111 | * @cpu_online: Indicates whether this is a CPU online event (true) or | ||
2112 | * a CPU offline event (false). | ||
2062 | */ | 2113 | */ |
2063 | void cpuset_update_active_cpus(void) | 2114 | void cpuset_update_active_cpus(bool cpu_online) |
2064 | { | 2115 | { |
2065 | struct sched_domain_attr *attr; | 2116 | struct sched_domain_attr *attr; |
2066 | cpumask_var_t *doms; | 2117 | cpumask_var_t *doms; |
@@ -2070,7 +2121,10 @@ void cpuset_update_active_cpus(void) | |||
2070 | mutex_lock(&callback_mutex); | 2121 | mutex_lock(&callback_mutex); |
2071 | cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); | 2122 | cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); |
2072 | mutex_unlock(&callback_mutex); | 2123 | mutex_unlock(&callback_mutex); |
2073 | scan_for_empty_cpusets(&top_cpuset); | 2124 | |
2125 | if (!cpu_online) | ||
2126 | scan_cpusets_upon_hotplug(&top_cpuset, CPUSET_CPU_OFFLINE); | ||
2127 | |||
2074 | ndoms = generate_sched_domains(&doms, &attr); | 2128 | ndoms = generate_sched_domains(&doms, &attr); |
2075 | cgroup_unlock(); | 2129 | cgroup_unlock(); |
2076 | 2130 | ||
@@ -2082,7 +2136,7 @@ void cpuset_update_active_cpus(void) | |||
2082 | /* | 2136 | /* |
2083 | * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY]. | 2137 | * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY]. |
2084 | * Call this routine anytime after node_states[N_HIGH_MEMORY] changes. | 2138 | * Call this routine anytime after node_states[N_HIGH_MEMORY] changes. |
2085 | * See also the previous routine cpuset_track_online_cpus(). | 2139 | * See cpuset_update_active_cpus() for CPU hotplug handling. |
2086 | */ | 2140 | */ |
2087 | static int cpuset_track_online_nodes(struct notifier_block *self, | 2141 | static int cpuset_track_online_nodes(struct notifier_block *self, |
2088 | unsigned long action, void *arg) | 2142 | unsigned long action, void *arg) |
@@ -2101,9 +2155,9 @@ static int cpuset_track_online_nodes(struct notifier_block *self, | |||
2101 | case MEM_OFFLINE: | 2155 | case MEM_OFFLINE: |
2102 | /* | 2156 | /* |
2103 | * needn't update top_cpuset.mems_allowed explicitly because | 2157 | * needn't update top_cpuset.mems_allowed explicitly because |
2104 | * scan_for_empty_cpusets() will update it. | 2158 | * scan_cpusets_upon_hotplug() will update it. |
2105 | */ | 2159 | */ |
2106 | scan_for_empty_cpusets(&top_cpuset); | 2160 | scan_cpusets_upon_hotplug(&top_cpuset, CPUSET_MEM_OFFLINE); |
2107 | break; | 2161 | break; |
2108 | default: | 2162 | default: |
2109 | break; | 2163 | break; |