aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c143
1 files changed, 43 insertions, 100 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 4f9dfe43ecbd..12331120767c 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -265,17 +265,6 @@ static DEFINE_MUTEX(cpuset_mutex);
265static DEFINE_MUTEX(callback_mutex); 265static DEFINE_MUTEX(callback_mutex);
266 266
267/* 267/*
268 * cpuset_buffer_lock protects both the cpuset_name and cpuset_nodelist
269 * buffers. They are statically allocated to prevent using excess stack
270 * when calling cpuset_print_task_mems_allowed().
271 */
272#define CPUSET_NAME_LEN (128)
273#define CPUSET_NODELIST_LEN (256)
274static char cpuset_name[CPUSET_NAME_LEN];
275static char cpuset_nodelist[CPUSET_NODELIST_LEN];
276static DEFINE_SPINLOCK(cpuset_buffer_lock);
277
278/*
279 * CPU / memory hotplug is handled asynchronously. 268 * CPU / memory hotplug is handled asynchronously.
280 */ 269 */
281static struct workqueue_struct *cpuset_propagate_hotplug_wq; 270static struct workqueue_struct *cpuset_propagate_hotplug_wq;
@@ -780,25 +769,26 @@ static void rebuild_sched_domains_locked(void)
780 lockdep_assert_held(&cpuset_mutex); 769 lockdep_assert_held(&cpuset_mutex);
781 get_online_cpus(); 770 get_online_cpus();
782 771
772 /*
773 * We have raced with CPU hotplug. Don't do anything to avoid
774 * passing doms with offlined cpu to partition_sched_domains().
775 * Anyways, hotplug work item will rebuild sched domains.
776 */
777 if (!cpumask_equal(top_cpuset.cpus_allowed, cpu_active_mask))
778 goto out;
779
783 /* Generate domain masks and attrs */ 780 /* Generate domain masks and attrs */
784 ndoms = generate_sched_domains(&doms, &attr); 781 ndoms = generate_sched_domains(&doms, &attr);
785 782
786 /* Have scheduler rebuild the domains */ 783 /* Have scheduler rebuild the domains */
787 partition_sched_domains(ndoms, doms, attr); 784 partition_sched_domains(ndoms, doms, attr);
788 785out:
789 put_online_cpus(); 786 put_online_cpus();
790} 787}
791#else /* !CONFIG_SMP */ 788#else /* !CONFIG_SMP */
792static void rebuild_sched_domains_locked(void) 789static void rebuild_sched_domains_locked(void)
793{ 790{
794} 791}
795
796static int generate_sched_domains(cpumask_var_t **domains,
797 struct sched_domain_attr **attributes)
798{
799 *domains = NULL;
800 return 1;
801}
802#endif /* CONFIG_SMP */ 792#endif /* CONFIG_SMP */
803 793
804void rebuild_sched_domains(void) 794void rebuild_sched_domains(void)
@@ -1388,16 +1378,16 @@ static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
1388 1378
1389 cgroup_taskset_for_each(task, cgrp, tset) { 1379 cgroup_taskset_for_each(task, cgrp, tset) {
1390 /* 1380 /*
1391 * Kthreads bound to specific cpus cannot be moved to a new 1381 * Kthreads which disallow setaffinity shouldn't be moved
1392 * cpuset; we cannot change their cpu affinity and 1382 * to a new cpuset; we don't want to change their cpu
1393 * isolating such threads by their set of allowed nodes is 1383 * affinity and isolating such threads by their set of
1394 * unnecessary. Thus, cpusets are not applicable for such 1384 * allowed nodes is unnecessary. Thus, cpusets are not
1395 * threads. This prevents checking for success of 1385 * applicable for such threads. This prevents checking for
1396 * set_cpus_allowed_ptr() on all attached tasks before 1386 * success of set_cpus_allowed_ptr() on all attached tasks
1397 * cpus_allowed may be changed. 1387 * before cpus_allowed may be changed.
1398 */ 1388 */
1399 ret = -EINVAL; 1389 ret = -EINVAL;
1400 if (task->flags & PF_THREAD_BOUND) 1390 if (task->flags & PF_NO_SETAFFINITY)
1401 goto out_unlock; 1391 goto out_unlock;
1402 ret = security_task_setscheduler(task); 1392 ret = security_task_setscheduler(task);
1403 if (ret) 1393 if (ret)
@@ -2005,50 +1995,6 @@ int __init cpuset_init(void)
2005 return 0; 1995 return 0;
2006} 1996}
2007 1997
2008/**
2009 * cpuset_do_move_task - move a given task to another cpuset
2010 * @tsk: pointer to task_struct the task to move
2011 * @scan: struct cgroup_scanner contained in its struct cpuset_hotplug_scanner
2012 *
2013 * Called by cgroup_scan_tasks() for each task in a cgroup.
2014 * Return nonzero to stop the walk through the tasks.
2015 */
2016static void cpuset_do_move_task(struct task_struct *tsk,
2017 struct cgroup_scanner *scan)
2018{
2019 struct cgroup *new_cgroup = scan->data;
2020
2021 cgroup_lock();
2022 cgroup_attach_task(new_cgroup, tsk);
2023 cgroup_unlock();
2024}
2025
2026/**
2027 * move_member_tasks_to_cpuset - move tasks from one cpuset to another
2028 * @from: cpuset in which the tasks currently reside
2029 * @to: cpuset to which the tasks will be moved
2030 *
2031 * Called with cpuset_mutex held
2032 * callback_mutex must not be held, as cpuset_attach() will take it.
2033 *
2034 * The cgroup_scan_tasks() function will scan all the tasks in a cgroup,
2035 * calling callback functions for each.
2036 */
2037static void move_member_tasks_to_cpuset(struct cpuset *from, struct cpuset *to)
2038{
2039 struct cgroup_scanner scan;
2040
2041 scan.cg = from->css.cgroup;
2042 scan.test_task = NULL; /* select all tasks in cgroup */
2043 scan.process_task = cpuset_do_move_task;
2044 scan.heap = NULL;
2045 scan.data = to->css.cgroup;
2046
2047 if (cgroup_scan_tasks(&scan))
2048 printk(KERN_ERR "move_member_tasks_to_cpuset: "
2049 "cgroup_scan_tasks failed\n");
2050}
2051
2052/* 1998/*
2053 * If CPU and/or memory hotplug handlers, below, unplug any CPUs 1999 * If CPU and/or memory hotplug handlers, below, unplug any CPUs
2054 * or memory nodes, we need to walk over the cpuset hierarchy, 2000 * or memory nodes, we need to walk over the cpuset hierarchy,
@@ -2069,7 +2015,12 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
2069 nodes_empty(parent->mems_allowed)) 2015 nodes_empty(parent->mems_allowed))
2070 parent = parent_cs(parent); 2016 parent = parent_cs(parent);
2071 2017
2072 move_member_tasks_to_cpuset(cs, parent); 2018 if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) {
2019 rcu_read_lock();
2020 printk(KERN_ERR "cpuset: failed to transfer tasks out of empty cpuset %s\n",
2021 cgroup_name(cs->css.cgroup));
2022 rcu_read_unlock();
2023 }
2073} 2024}
2074 2025
2075/** 2026/**
@@ -2222,17 +2173,8 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
2222 flush_workqueue(cpuset_propagate_hotplug_wq); 2173 flush_workqueue(cpuset_propagate_hotplug_wq);
2223 2174
2224 /* rebuild sched domains if cpus_allowed has changed */ 2175 /* rebuild sched domains if cpus_allowed has changed */
2225 if (cpus_updated) { 2176 if (cpus_updated)
2226 struct sched_domain_attr *attr; 2177 rebuild_sched_domains();
2227 cpumask_var_t *doms;
2228 int ndoms;
2229
2230 mutex_lock(&cpuset_mutex);
2231 ndoms = generate_sched_domains(&doms, &attr);
2232 mutex_unlock(&cpuset_mutex);
2233
2234 partition_sched_domains(ndoms, doms, attr);
2235 }
2236} 2178}
2237 2179
2238void cpuset_update_active_cpus(bool cpu_online) 2180void cpuset_update_active_cpus(bool cpu_online)
@@ -2251,7 +2193,6 @@ void cpuset_update_active_cpus(bool cpu_online)
2251 schedule_work(&cpuset_hotplug_work); 2193 schedule_work(&cpuset_hotplug_work);
2252} 2194}
2253 2195
2254#ifdef CONFIG_MEMORY_HOTPLUG
2255/* 2196/*
2256 * Keep top_cpuset.mems_allowed tracking node_states[N_MEMORY]. 2197 * Keep top_cpuset.mems_allowed tracking node_states[N_MEMORY].
2257 * Call this routine anytime after node_states[N_MEMORY] changes. 2198 * Call this routine anytime after node_states[N_MEMORY] changes.
@@ -2263,20 +2204,23 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
2263 schedule_work(&cpuset_hotplug_work); 2204 schedule_work(&cpuset_hotplug_work);
2264 return NOTIFY_OK; 2205 return NOTIFY_OK;
2265} 2206}
2266#endif 2207
2208static struct notifier_block cpuset_track_online_nodes_nb = {
2209 .notifier_call = cpuset_track_online_nodes,
2210 .priority = 10, /* ??! */
2211};
2267 2212
2268/** 2213/**
2269 * cpuset_init_smp - initialize cpus_allowed 2214 * cpuset_init_smp - initialize cpus_allowed
2270 * 2215 *
2271 * Description: Finish top cpuset after cpu, node maps are initialized 2216 * Description: Finish top cpuset after cpu, node maps are initialized
2272 **/ 2217 */
2273
2274void __init cpuset_init_smp(void) 2218void __init cpuset_init_smp(void)
2275{ 2219{
2276 cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); 2220 cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
2277 top_cpuset.mems_allowed = node_states[N_MEMORY]; 2221 top_cpuset.mems_allowed = node_states[N_MEMORY];
2278 2222
2279 hotplug_memory_notifier(cpuset_track_online_nodes, 10); 2223 register_hotmemory_notifier(&cpuset_track_online_nodes_nb);
2280 2224
2281 cpuset_propagate_hotplug_wq = 2225 cpuset_propagate_hotplug_wq =
2282 alloc_ordered_workqueue("cpuset_hotplug", 0); 2226 alloc_ordered_workqueue("cpuset_hotplug", 0);
@@ -2592,6 +2536,8 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
2592 return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed); 2536 return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
2593} 2537}
2594 2538
2539#define CPUSET_NODELIST_LEN (256)
2540
2595/** 2541/**
2596 * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed 2542 * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed
2597 * @task: pointer to task_struct of some task. 2543 * @task: pointer to task_struct of some task.
@@ -2602,25 +2548,22 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
2602 */ 2548 */
2603void cpuset_print_task_mems_allowed(struct task_struct *tsk) 2549void cpuset_print_task_mems_allowed(struct task_struct *tsk)
2604{ 2550{
2605 struct dentry *dentry; 2551 /* Statically allocated to prevent using excess stack. */
2552 static char cpuset_nodelist[CPUSET_NODELIST_LEN];
2553 static DEFINE_SPINLOCK(cpuset_buffer_lock);
2606 2554
2607 dentry = task_cs(tsk)->css.cgroup->dentry; 2555 struct cgroup *cgrp = task_cs(tsk)->css.cgroup;
2608 spin_lock(&cpuset_buffer_lock);
2609 2556
2610 if (!dentry) { 2557 rcu_read_lock();
2611 strcpy(cpuset_name, "/"); 2558 spin_lock(&cpuset_buffer_lock);
2612 } else {
2613 spin_lock(&dentry->d_lock);
2614 strlcpy(cpuset_name, (const char *)dentry->d_name.name,
2615 CPUSET_NAME_LEN);
2616 spin_unlock(&dentry->d_lock);
2617 }
2618 2559
2619 nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN, 2560 nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN,
2620 tsk->mems_allowed); 2561 tsk->mems_allowed);
2621 printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n", 2562 printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n",
2622 tsk->comm, cpuset_name, cpuset_nodelist); 2563 tsk->comm, cgroup_name(cgrp), cpuset_nodelist);
2564
2623 spin_unlock(&cpuset_buffer_lock); 2565 spin_unlock(&cpuset_buffer_lock);
2566 rcu_read_unlock();
2624} 2567}
2625 2568
2626/* 2569/*