diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-04-29 22:14:20 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-04-29 22:14:20 -0400 |
commit | 191a712090bb8a10e6f129360eeed2d68f3d4c9a (patch) | |
tree | 17e2d6c27fb8a7c3a61828fbcc7c343a4966a0a9 /kernel/cpuset.c | |
parent | 46d9be3e5eb01f71fc02653755d970247174b400 (diff) | |
parent | 2a0010af17b1739ef8ea8cf02647a127241ee674 (diff) |
Merge branch 'for-3.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo:
- Fixes and a lot of cleanups. Locking cleanup is finally complete.
cgroup_mutex is no longer exposed to individual controlelrs which
used to cause nasty deadlock issues. Li fixed and cleaned up quite a
bit including long standing ones like racy cgroup_path().
- device cgroup now supports proper hierarchy thanks to Aristeu.
- perf_event cgroup now supports proper hierarchy.
- A new mount option "__DEVEL__sane_behavior" is added. As indicated
by the name, this option is to be used for development only at this
point and generates a warning message when used. Unfortunately,
cgroup interface currently has too many brekages and inconsistencies
to implement a consistent and unified hierarchy on top. The new flag
is used to collect the behavior changes which are necessary to
implement consistent unified hierarchy. It's likely that this flag
won't be used verbatim when it becomes ready but will be enabled
implicitly along with unified hierarchy.
The option currently disables some of broken behaviors in cgroup core
and also .use_hierarchy switch in memcg (will be routed through -mm),
which can be used to make very unusual hierarchy where nesting is
partially honored. It will also be used to implement hierarchy
support for blk-throttle which would be impossible otherwise without
introducing a full separate set of control knobs.
This is essentially versioning of interface which isn't very nice but
at this point I can't see any other options which would allow keeping
the interface the same while moving towards hierarchy behavior which
is at least somewhat sane. The planned unified hierarchy is likely
to require some level of adaptation from userland anyway, so I think
it'd be best to take the chance and update the interface such that
it's supportable in the long term.
Maintaining the existing interface does complicate cgroup core but
shouldn't put too much strain on individual controllers and I think
it'd be manageable for the foreseeable future. Maybe we'll be able
to drop it in a decade.
Fix up conflicts (including a semantic one adding a new #include to ppc
that was uncovered by header the file changes) as per Tejun.
* 'for-3.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (45 commits)
cpuset: fix compile warning when CONFIG_SMP=n
cpuset: fix cpu hotplug vs rebuild_sched_domains() race
cpuset: use rebuild_sched_domains() in cpuset_hotplug_workfn()
cgroup: restore the call to eventfd->poll()
cgroup: fix use-after-free when umounting cgroupfs
cgroup: fix broken file xattrs
devcg: remove parent_cgroup.
memcg: force use_hierarchy if sane_behavior
cgroup: remove cgrp->top_cgroup
cgroup: introduce sane_behavior mount option
move cgroupfs_root to include/linux/cgroup.h
cgroup: convert cgroupfs_root flag bits to masks and add CGRP_ prefix
cgroup: make cgroup_path() not print double slashes
Revert "cgroup: remove bind() method from cgroup_subsys."
perf: make perf_event cgroup hierarchical
cgroup: implement cgroup_is_descendant()
cgroup: make sure parent won't be destroyed before its children
cgroup: remove bind() method from cgroup_subsys.
devcg: remove broken_hierarchy tag
cgroup: remove cgroup_lock_is_held()
...
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 115 |
1 files changed, 28 insertions, 87 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 027a6f65f2ad..12331120767c 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -265,17 +265,6 @@ static DEFINE_MUTEX(cpuset_mutex); | |||
265 | static DEFINE_MUTEX(callback_mutex); | 265 | static DEFINE_MUTEX(callback_mutex); |
266 | 266 | ||
267 | /* | 267 | /* |
268 | * cpuset_buffer_lock protects both the cpuset_name and cpuset_nodelist | ||
269 | * buffers. They are statically allocated to prevent using excess stack | ||
270 | * when calling cpuset_print_task_mems_allowed(). | ||
271 | */ | ||
272 | #define CPUSET_NAME_LEN (128) | ||
273 | #define CPUSET_NODELIST_LEN (256) | ||
274 | static char cpuset_name[CPUSET_NAME_LEN]; | ||
275 | static char cpuset_nodelist[CPUSET_NODELIST_LEN]; | ||
276 | static DEFINE_SPINLOCK(cpuset_buffer_lock); | ||
277 | |||
278 | /* | ||
279 | * CPU / memory hotplug is handled asynchronously. | 268 | * CPU / memory hotplug is handled asynchronously. |
280 | */ | 269 | */ |
281 | static struct workqueue_struct *cpuset_propagate_hotplug_wq; | 270 | static struct workqueue_struct *cpuset_propagate_hotplug_wq; |
@@ -780,25 +769,26 @@ static void rebuild_sched_domains_locked(void) | |||
780 | lockdep_assert_held(&cpuset_mutex); | 769 | lockdep_assert_held(&cpuset_mutex); |
781 | get_online_cpus(); | 770 | get_online_cpus(); |
782 | 771 | ||
772 | /* | ||
773 | * We have raced with CPU hotplug. Don't do anything to avoid | ||
774 | * passing doms with offlined cpu to partition_sched_domains(). | ||
775 | * Anyways, hotplug work item will rebuild sched domains. | ||
776 | */ | ||
777 | if (!cpumask_equal(top_cpuset.cpus_allowed, cpu_active_mask)) | ||
778 | goto out; | ||
779 | |||
783 | /* Generate domain masks and attrs */ | 780 | /* Generate domain masks and attrs */ |
784 | ndoms = generate_sched_domains(&doms, &attr); | 781 | ndoms = generate_sched_domains(&doms, &attr); |
785 | 782 | ||
786 | /* Have scheduler rebuild the domains */ | 783 | /* Have scheduler rebuild the domains */ |
787 | partition_sched_domains(ndoms, doms, attr); | 784 | partition_sched_domains(ndoms, doms, attr); |
788 | 785 | out: | |
789 | put_online_cpus(); | 786 | put_online_cpus(); |
790 | } | 787 | } |
791 | #else /* !CONFIG_SMP */ | 788 | #else /* !CONFIG_SMP */ |
792 | static void rebuild_sched_domains_locked(void) | 789 | static void rebuild_sched_domains_locked(void) |
793 | { | 790 | { |
794 | } | 791 | } |
795 | |||
796 | static int generate_sched_domains(cpumask_var_t **domains, | ||
797 | struct sched_domain_attr **attributes) | ||
798 | { | ||
799 | *domains = NULL; | ||
800 | return 1; | ||
801 | } | ||
802 | #endif /* CONFIG_SMP */ | 792 | #endif /* CONFIG_SMP */ |
803 | 793 | ||
804 | void rebuild_sched_domains(void) | 794 | void rebuild_sched_domains(void) |
@@ -2005,50 +1995,6 @@ int __init cpuset_init(void) | |||
2005 | return 0; | 1995 | return 0; |
2006 | } | 1996 | } |
2007 | 1997 | ||
2008 | /** | ||
2009 | * cpuset_do_move_task - move a given task to another cpuset | ||
2010 | * @tsk: pointer to task_struct the task to move | ||
2011 | * @scan: struct cgroup_scanner contained in its struct cpuset_hotplug_scanner | ||
2012 | * | ||
2013 | * Called by cgroup_scan_tasks() for each task in a cgroup. | ||
2014 | * Return nonzero to stop the walk through the tasks. | ||
2015 | */ | ||
2016 | static void cpuset_do_move_task(struct task_struct *tsk, | ||
2017 | struct cgroup_scanner *scan) | ||
2018 | { | ||
2019 | struct cgroup *new_cgroup = scan->data; | ||
2020 | |||
2021 | cgroup_lock(); | ||
2022 | cgroup_attach_task(new_cgroup, tsk); | ||
2023 | cgroup_unlock(); | ||
2024 | } | ||
2025 | |||
2026 | /** | ||
2027 | * move_member_tasks_to_cpuset - move tasks from one cpuset to another | ||
2028 | * @from: cpuset in which the tasks currently reside | ||
2029 | * @to: cpuset to which the tasks will be moved | ||
2030 | * | ||
2031 | * Called with cpuset_mutex held | ||
2032 | * callback_mutex must not be held, as cpuset_attach() will take it. | ||
2033 | * | ||
2034 | * The cgroup_scan_tasks() function will scan all the tasks in a cgroup, | ||
2035 | * calling callback functions for each. | ||
2036 | */ | ||
2037 | static void move_member_tasks_to_cpuset(struct cpuset *from, struct cpuset *to) | ||
2038 | { | ||
2039 | struct cgroup_scanner scan; | ||
2040 | |||
2041 | scan.cg = from->css.cgroup; | ||
2042 | scan.test_task = NULL; /* select all tasks in cgroup */ | ||
2043 | scan.process_task = cpuset_do_move_task; | ||
2044 | scan.heap = NULL; | ||
2045 | scan.data = to->css.cgroup; | ||
2046 | |||
2047 | if (cgroup_scan_tasks(&scan)) | ||
2048 | printk(KERN_ERR "move_member_tasks_to_cpuset: " | ||
2049 | "cgroup_scan_tasks failed\n"); | ||
2050 | } | ||
2051 | |||
2052 | /* | 1998 | /* |
2053 | * If CPU and/or memory hotplug handlers, below, unplug any CPUs | 1999 | * If CPU and/or memory hotplug handlers, below, unplug any CPUs |
2054 | * or memory nodes, we need to walk over the cpuset hierarchy, | 2000 | * or memory nodes, we need to walk over the cpuset hierarchy, |
@@ -2069,7 +2015,12 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) | |||
2069 | nodes_empty(parent->mems_allowed)) | 2015 | nodes_empty(parent->mems_allowed)) |
2070 | parent = parent_cs(parent); | 2016 | parent = parent_cs(parent); |
2071 | 2017 | ||
2072 | move_member_tasks_to_cpuset(cs, parent); | 2018 | if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) { |
2019 | rcu_read_lock(); | ||
2020 | printk(KERN_ERR "cpuset: failed to transfer tasks out of empty cpuset %s\n", | ||
2021 | cgroup_name(cs->css.cgroup)); | ||
2022 | rcu_read_unlock(); | ||
2023 | } | ||
2073 | } | 2024 | } |
2074 | 2025 | ||
2075 | /** | 2026 | /** |
@@ -2222,17 +2173,8 @@ static void cpuset_hotplug_workfn(struct work_struct *work) | |||
2222 | flush_workqueue(cpuset_propagate_hotplug_wq); | 2173 | flush_workqueue(cpuset_propagate_hotplug_wq); |
2223 | 2174 | ||
2224 | /* rebuild sched domains if cpus_allowed has changed */ | 2175 | /* rebuild sched domains if cpus_allowed has changed */ |
2225 | if (cpus_updated) { | 2176 | if (cpus_updated) |
2226 | struct sched_domain_attr *attr; | 2177 | rebuild_sched_domains(); |
2227 | cpumask_var_t *doms; | ||
2228 | int ndoms; | ||
2229 | |||
2230 | mutex_lock(&cpuset_mutex); | ||
2231 | ndoms = generate_sched_domains(&doms, &attr); | ||
2232 | mutex_unlock(&cpuset_mutex); | ||
2233 | |||
2234 | partition_sched_domains(ndoms, doms, attr); | ||
2235 | } | ||
2236 | } | 2178 | } |
2237 | 2179 | ||
2238 | void cpuset_update_active_cpus(bool cpu_online) | 2180 | void cpuset_update_active_cpus(bool cpu_online) |
@@ -2594,6 +2536,8 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, | |||
2594 | return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed); | 2536 | return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed); |
2595 | } | 2537 | } |
2596 | 2538 | ||
2539 | #define CPUSET_NODELIST_LEN (256) | ||
2540 | |||
2597 | /** | 2541 | /** |
2598 | * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed | 2542 | * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed |
2599 | * @task: pointer to task_struct of some task. | 2543 | * @task: pointer to task_struct of some task. |
@@ -2604,25 +2548,22 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, | |||
2604 | */ | 2548 | */ |
2605 | void cpuset_print_task_mems_allowed(struct task_struct *tsk) | 2549 | void cpuset_print_task_mems_allowed(struct task_struct *tsk) |
2606 | { | 2550 | { |
2607 | struct dentry *dentry; | 2551 | /* Statically allocated to prevent using excess stack. */ |
2552 | static char cpuset_nodelist[CPUSET_NODELIST_LEN]; | ||
2553 | static DEFINE_SPINLOCK(cpuset_buffer_lock); | ||
2608 | 2554 | ||
2609 | dentry = task_cs(tsk)->css.cgroup->dentry; | 2555 | struct cgroup *cgrp = task_cs(tsk)->css.cgroup; |
2610 | spin_lock(&cpuset_buffer_lock); | ||
2611 | 2556 | ||
2612 | if (!dentry) { | 2557 | rcu_read_lock(); |
2613 | strcpy(cpuset_name, "/"); | 2558 | spin_lock(&cpuset_buffer_lock); |
2614 | } else { | ||
2615 | spin_lock(&dentry->d_lock); | ||
2616 | strlcpy(cpuset_name, (const char *)dentry->d_name.name, | ||
2617 | CPUSET_NAME_LEN); | ||
2618 | spin_unlock(&dentry->d_lock); | ||
2619 | } | ||
2620 | 2559 | ||
2621 | nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN, | 2560 | nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN, |
2622 | tsk->mems_allowed); | 2561 | tsk->mems_allowed); |
2623 | printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n", | 2562 | printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n", |
2624 | tsk->comm, cpuset_name, cpuset_nodelist); | 2563 | tsk->comm, cgroup_name(cgrp), cpuset_nodelist); |
2564 | |||
2625 | spin_unlock(&cpuset_buffer_lock); | 2565 | spin_unlock(&cpuset_buffer_lock); |
2566 | rcu_read_unlock(); | ||
2626 | } | 2567 | } |
2627 | 2568 | ||
2628 | /* | 2569 | /* |