aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-04-29 22:14:20 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-04-29 22:14:20 -0400
commit191a712090bb8a10e6f129360eeed2d68f3d4c9a (patch)
tree17e2d6c27fb8a7c3a61828fbcc7c343a4966a0a9 /kernel/cpuset.c
parent46d9be3e5eb01f71fc02653755d970247174b400 (diff)
parent2a0010af17b1739ef8ea8cf02647a127241ee674 (diff)
Merge branch 'for-3.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: - Fixes and a lot of cleanups. Locking cleanup is finally complete. cgroup_mutex is no longer exposed to individual controlelrs which used to cause nasty deadlock issues. Li fixed and cleaned up quite a bit including long standing ones like racy cgroup_path(). - device cgroup now supports proper hierarchy thanks to Aristeu. - perf_event cgroup now supports proper hierarchy. - A new mount option "__DEVEL__sane_behavior" is added. As indicated by the name, this option is to be used for development only at this point and generates a warning message when used. Unfortunately, cgroup interface currently has too many brekages and inconsistencies to implement a consistent and unified hierarchy on top. The new flag is used to collect the behavior changes which are necessary to implement consistent unified hierarchy. It's likely that this flag won't be used verbatim when it becomes ready but will be enabled implicitly along with unified hierarchy. The option currently disables some of broken behaviors in cgroup core and also .use_hierarchy switch in memcg (will be routed through -mm), which can be used to make very unusual hierarchy where nesting is partially honored. It will also be used to implement hierarchy support for blk-throttle which would be impossible otherwise without introducing a full separate set of control knobs. This is essentially versioning of interface which isn't very nice but at this point I can't see any other options which would allow keeping the interface the same while moving towards hierarchy behavior which is at least somewhat sane. The planned unified hierarchy is likely to require some level of adaptation from userland anyway, so I think it'd be best to take the chance and update the interface such that it's supportable in the long term. Maintaining the existing interface does complicate cgroup core but shouldn't put too much strain on individual controllers and I think it'd be manageable for the foreseeable future. Maybe we'll be able to drop it in a decade. Fix up conflicts (including a semantic one adding a new #include to ppc that was uncovered by header the file changes) as per Tejun. * 'for-3.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (45 commits) cpuset: fix compile warning when CONFIG_SMP=n cpuset: fix cpu hotplug vs rebuild_sched_domains() race cpuset: use rebuild_sched_domains() in cpuset_hotplug_workfn() cgroup: restore the call to eventfd->poll() cgroup: fix use-after-free when umounting cgroupfs cgroup: fix broken file xattrs devcg: remove parent_cgroup. memcg: force use_hierarchy if sane_behavior cgroup: remove cgrp->top_cgroup cgroup: introduce sane_behavior mount option move cgroupfs_root to include/linux/cgroup.h cgroup: convert cgroupfs_root flag bits to masks and add CGRP_ prefix cgroup: make cgroup_path() not print double slashes Revert "cgroup: remove bind() method from cgroup_subsys." perf: make perf_event cgroup hierarchical cgroup: implement cgroup_is_descendant() cgroup: make sure parent won't be destroyed before its children cgroup: remove bind() method from cgroup_subsys. devcg: remove broken_hierarchy tag cgroup: remove cgroup_lock_is_held() ...
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c115
1 files changed, 28 insertions, 87 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 027a6f65f2ad..12331120767c 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -265,17 +265,6 @@ static DEFINE_MUTEX(cpuset_mutex);
265static DEFINE_MUTEX(callback_mutex); 265static DEFINE_MUTEX(callback_mutex);
266 266
267/* 267/*
268 * cpuset_buffer_lock protects both the cpuset_name and cpuset_nodelist
269 * buffers. They are statically allocated to prevent using excess stack
270 * when calling cpuset_print_task_mems_allowed().
271 */
272#define CPUSET_NAME_LEN (128)
273#define CPUSET_NODELIST_LEN (256)
274static char cpuset_name[CPUSET_NAME_LEN];
275static char cpuset_nodelist[CPUSET_NODELIST_LEN];
276static DEFINE_SPINLOCK(cpuset_buffer_lock);
277
278/*
279 * CPU / memory hotplug is handled asynchronously. 268 * CPU / memory hotplug is handled asynchronously.
280 */ 269 */
281static struct workqueue_struct *cpuset_propagate_hotplug_wq; 270static struct workqueue_struct *cpuset_propagate_hotplug_wq;
@@ -780,25 +769,26 @@ static void rebuild_sched_domains_locked(void)
780 lockdep_assert_held(&cpuset_mutex); 769 lockdep_assert_held(&cpuset_mutex);
781 get_online_cpus(); 770 get_online_cpus();
782 771
772 /*
773 * We have raced with CPU hotplug. Don't do anything to avoid
774 * passing doms with offlined cpu to partition_sched_domains().
775 * Anyways, hotplug work item will rebuild sched domains.
776 */
777 if (!cpumask_equal(top_cpuset.cpus_allowed, cpu_active_mask))
778 goto out;
779
783 /* Generate domain masks and attrs */ 780 /* Generate domain masks and attrs */
784 ndoms = generate_sched_domains(&doms, &attr); 781 ndoms = generate_sched_domains(&doms, &attr);
785 782
786 /* Have scheduler rebuild the domains */ 783 /* Have scheduler rebuild the domains */
787 partition_sched_domains(ndoms, doms, attr); 784 partition_sched_domains(ndoms, doms, attr);
788 785out:
789 put_online_cpus(); 786 put_online_cpus();
790} 787}
791#else /* !CONFIG_SMP */ 788#else /* !CONFIG_SMP */
792static void rebuild_sched_domains_locked(void) 789static void rebuild_sched_domains_locked(void)
793{ 790{
794} 791}
795
796static int generate_sched_domains(cpumask_var_t **domains,
797 struct sched_domain_attr **attributes)
798{
799 *domains = NULL;
800 return 1;
801}
802#endif /* CONFIG_SMP */ 792#endif /* CONFIG_SMP */
803 793
804void rebuild_sched_domains(void) 794void rebuild_sched_domains(void)
@@ -2005,50 +1995,6 @@ int __init cpuset_init(void)
2005 return 0; 1995 return 0;
2006} 1996}
2007 1997
2008/**
2009 * cpuset_do_move_task - move a given task to another cpuset
2010 * @tsk: pointer to task_struct the task to move
2011 * @scan: struct cgroup_scanner contained in its struct cpuset_hotplug_scanner
2012 *
2013 * Called by cgroup_scan_tasks() for each task in a cgroup.
2014 * Return nonzero to stop the walk through the tasks.
2015 */
2016static void cpuset_do_move_task(struct task_struct *tsk,
2017 struct cgroup_scanner *scan)
2018{
2019 struct cgroup *new_cgroup = scan->data;
2020
2021 cgroup_lock();
2022 cgroup_attach_task(new_cgroup, tsk);
2023 cgroup_unlock();
2024}
2025
2026/**
2027 * move_member_tasks_to_cpuset - move tasks from one cpuset to another
2028 * @from: cpuset in which the tasks currently reside
2029 * @to: cpuset to which the tasks will be moved
2030 *
2031 * Called with cpuset_mutex held
2032 * callback_mutex must not be held, as cpuset_attach() will take it.
2033 *
2034 * The cgroup_scan_tasks() function will scan all the tasks in a cgroup,
2035 * calling callback functions for each.
2036 */
2037static void move_member_tasks_to_cpuset(struct cpuset *from, struct cpuset *to)
2038{
2039 struct cgroup_scanner scan;
2040
2041 scan.cg = from->css.cgroup;
2042 scan.test_task = NULL; /* select all tasks in cgroup */
2043 scan.process_task = cpuset_do_move_task;
2044 scan.heap = NULL;
2045 scan.data = to->css.cgroup;
2046
2047 if (cgroup_scan_tasks(&scan))
2048 printk(KERN_ERR "move_member_tasks_to_cpuset: "
2049 "cgroup_scan_tasks failed\n");
2050}
2051
2052/* 1998/*
2053 * If CPU and/or memory hotplug handlers, below, unplug any CPUs 1999 * If CPU and/or memory hotplug handlers, below, unplug any CPUs
2054 * or memory nodes, we need to walk over the cpuset hierarchy, 2000 * or memory nodes, we need to walk over the cpuset hierarchy,
@@ -2069,7 +2015,12 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
2069 nodes_empty(parent->mems_allowed)) 2015 nodes_empty(parent->mems_allowed))
2070 parent = parent_cs(parent); 2016 parent = parent_cs(parent);
2071 2017
2072 move_member_tasks_to_cpuset(cs, parent); 2018 if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) {
2019 rcu_read_lock();
2020 printk(KERN_ERR "cpuset: failed to transfer tasks out of empty cpuset %s\n",
2021 cgroup_name(cs->css.cgroup));
2022 rcu_read_unlock();
2023 }
2073} 2024}
2074 2025
2075/** 2026/**
@@ -2222,17 +2173,8 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
2222 flush_workqueue(cpuset_propagate_hotplug_wq); 2173 flush_workqueue(cpuset_propagate_hotplug_wq);
2223 2174
2224 /* rebuild sched domains if cpus_allowed has changed */ 2175 /* rebuild sched domains if cpus_allowed has changed */
2225 if (cpus_updated) { 2176 if (cpus_updated)
2226 struct sched_domain_attr *attr; 2177 rebuild_sched_domains();
2227 cpumask_var_t *doms;
2228 int ndoms;
2229
2230 mutex_lock(&cpuset_mutex);
2231 ndoms = generate_sched_domains(&doms, &attr);
2232 mutex_unlock(&cpuset_mutex);
2233
2234 partition_sched_domains(ndoms, doms, attr);
2235 }
2236} 2178}
2237 2179
2238void cpuset_update_active_cpus(bool cpu_online) 2180void cpuset_update_active_cpus(bool cpu_online)
@@ -2594,6 +2536,8 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
2594 return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed); 2536 return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
2595} 2537}
2596 2538
2539#define CPUSET_NODELIST_LEN (256)
2540
2597/** 2541/**
2598 * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed 2542 * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed
2599 * @task: pointer to task_struct of some task. 2543 * @task: pointer to task_struct of some task.
@@ -2604,25 +2548,22 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
2604 */ 2548 */
2605void cpuset_print_task_mems_allowed(struct task_struct *tsk) 2549void cpuset_print_task_mems_allowed(struct task_struct *tsk)
2606{ 2550{
2607 struct dentry *dentry; 2551 /* Statically allocated to prevent using excess stack. */
2552 static char cpuset_nodelist[CPUSET_NODELIST_LEN];
2553 static DEFINE_SPINLOCK(cpuset_buffer_lock);
2608 2554
2609 dentry = task_cs(tsk)->css.cgroup->dentry; 2555 struct cgroup *cgrp = task_cs(tsk)->css.cgroup;
2610 spin_lock(&cpuset_buffer_lock);
2611 2556
2612 if (!dentry) { 2557 rcu_read_lock();
2613 strcpy(cpuset_name, "/"); 2558 spin_lock(&cpuset_buffer_lock);
2614 } else {
2615 spin_lock(&dentry->d_lock);
2616 strlcpy(cpuset_name, (const char *)dentry->d_name.name,
2617 CPUSET_NAME_LEN);
2618 spin_unlock(&dentry->d_lock);
2619 }
2620 2559
2621 nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN, 2560 nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN,
2622 tsk->mems_allowed); 2561 tsk->mems_allowed);
2623 printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n", 2562 printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n",
2624 tsk->comm, cpuset_name, cpuset_nodelist); 2563 tsk->comm, cgroup_name(cgrp), cpuset_nodelist);
2564
2625 spin_unlock(&cpuset_buffer_lock); 2565 spin_unlock(&cpuset_buffer_lock);
2566 rcu_read_unlock();
2626} 2567}
2627 2568
2628/* 2569/*