diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-04-29 22:14:20 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-04-29 22:14:20 -0400 |
commit | 191a712090bb8a10e6f129360eeed2d68f3d4c9a (patch) | |
tree | 17e2d6c27fb8a7c3a61828fbcc7c343a4966a0a9 /mm/memcontrol.c | |
parent | 46d9be3e5eb01f71fc02653755d970247174b400 (diff) | |
parent | 2a0010af17b1739ef8ea8cf02647a127241ee674 (diff) |
Merge branch 'for-3.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo:
- Fixes and a lot of cleanups. Locking cleanup is finally complete.
cgroup_mutex is no longer exposed to individual controlelrs which
used to cause nasty deadlock issues. Li fixed and cleaned up quite a
bit including long standing ones like racy cgroup_path().
- device cgroup now supports proper hierarchy thanks to Aristeu.
- perf_event cgroup now supports proper hierarchy.
- A new mount option "__DEVEL__sane_behavior" is added. As indicated
by the name, this option is to be used for development only at this
point and generates a warning message when used. Unfortunately,
cgroup interface currently has too many brekages and inconsistencies
to implement a consistent and unified hierarchy on top. The new flag
is used to collect the behavior changes which are necessary to
implement consistent unified hierarchy. It's likely that this flag
won't be used verbatim when it becomes ready but will be enabled
implicitly along with unified hierarchy.
The option currently disables some of broken behaviors in cgroup core
and also .use_hierarchy switch in memcg (will be routed through -mm),
which can be used to make very unusual hierarchy where nesting is
partially honored. It will also be used to implement hierarchy
support for blk-throttle which would be impossible otherwise without
introducing a full separate set of control knobs.
This is essentially versioning of interface which isn't very nice but
at this point I can't see any other options which would allow keeping
the interface the same while moving towards hierarchy behavior which
is at least somewhat sane. The planned unified hierarchy is likely
to require some level of adaptation from userland anyway, so I think
it'd be best to take the chance and update the interface such that
it's supportable in the long term.
Maintaining the existing interface does complicate cgroup core but
shouldn't put too much strain on individual controllers and I think
it'd be manageable for the foreseeable future. Maybe we'll be able
to drop it in a decade.
Fix up conflicts (including a semantic one adding a new #include to ppc
that was uncovered by header the file changes) as per Tejun.
* 'for-3.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (45 commits)
cpuset: fix compile warning when CONFIG_SMP=n
cpuset: fix cpu hotplug vs rebuild_sched_domains() race
cpuset: use rebuild_sched_domains() in cpuset_hotplug_workfn()
cgroup: restore the call to eventfd->poll()
cgroup: fix use-after-free when umounting cgroupfs
cgroup: fix broken file xattrs
devcg: remove parent_cgroup.
memcg: force use_hierarchy if sane_behavior
cgroup: remove cgrp->top_cgroup
cgroup: introduce sane_behavior mount option
move cgroupfs_root to include/linux/cgroup.h
cgroup: convert cgroupfs_root flag bits to masks and add CGRP_ prefix
cgroup: make cgroup_path() not print double slashes
Revert "cgroup: remove bind() method from cgroup_subsys."
perf: make perf_event cgroup hierarchical
cgroup: implement cgroup_is_descendant()
cgroup: make sure parent won't be destroyed before its children
cgroup: remove bind() method from cgroup_subsys.
devcg: remove broken_hierarchy tag
cgroup: remove cgroup_lock_is_held()
...
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 80 |
1 files changed, 49 insertions, 31 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b8dc8e4cbf6a..0f1d92163f30 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -3321,52 +3321,53 @@ void mem_cgroup_destroy_cache(struct kmem_cache *cachep) | |||
3321 | schedule_work(&cachep->memcg_params->destroy); | 3321 | schedule_work(&cachep->memcg_params->destroy); |
3322 | } | 3322 | } |
3323 | 3323 | ||
3324 | static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s) | 3324 | /* |
3325 | { | 3325 | * This lock protects updaters, not readers. We want readers to be as fast as |
3326 | char *name; | 3326 | * they can, and they will either see NULL or a valid cache value. Our model |
3327 | struct dentry *dentry; | 3327 | * allow them to see NULL, in which case the root memcg will be selected. |
3328 | 3328 | * | |
3329 | rcu_read_lock(); | 3329 | * We need this lock because multiple allocations to the same cache from a non |
3330 | dentry = rcu_dereference(memcg->css.cgroup->dentry); | 3330 | * will span more than one worker. Only one of them can create the cache. |
3331 | rcu_read_unlock(); | 3331 | */ |
3332 | 3332 | static DEFINE_MUTEX(memcg_cache_mutex); | |
3333 | BUG_ON(dentry == NULL); | ||
3334 | |||
3335 | name = kasprintf(GFP_KERNEL, "%s(%d:%s)", s->name, | ||
3336 | memcg_cache_id(memcg), dentry->d_name.name); | ||
3337 | |||
3338 | return name; | ||
3339 | } | ||
3340 | 3333 | ||
3334 | /* | ||
3335 | * Called with memcg_cache_mutex held | ||
3336 | */ | ||
3341 | static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg, | 3337 | static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg, |
3342 | struct kmem_cache *s) | 3338 | struct kmem_cache *s) |
3343 | { | 3339 | { |
3344 | char *name; | ||
3345 | struct kmem_cache *new; | 3340 | struct kmem_cache *new; |
3341 | static char *tmp_name = NULL; | ||
3346 | 3342 | ||
3347 | name = memcg_cache_name(memcg, s); | 3343 | lockdep_assert_held(&memcg_cache_mutex); |
3348 | if (!name) | 3344 | |
3349 | return NULL; | 3345 | /* |
3346 | * kmem_cache_create_memcg duplicates the given name and | ||
3347 | * cgroup_name for this name requires RCU context. | ||
3348 | * This static temporary buffer is used to prevent from | ||
3349 | * pointless shortliving allocation. | ||
3350 | */ | ||
3351 | if (!tmp_name) { | ||
3352 | tmp_name = kmalloc(PATH_MAX, GFP_KERNEL); | ||
3353 | if (!tmp_name) | ||
3354 | return NULL; | ||
3355 | } | ||
3356 | |||
3357 | rcu_read_lock(); | ||
3358 | snprintf(tmp_name, PATH_MAX, "%s(%d:%s)", s->name, | ||
3359 | memcg_cache_id(memcg), cgroup_name(memcg->css.cgroup)); | ||
3360 | rcu_read_unlock(); | ||
3350 | 3361 | ||
3351 | new = kmem_cache_create_memcg(memcg, name, s->object_size, s->align, | 3362 | new = kmem_cache_create_memcg(memcg, tmp_name, s->object_size, s->align, |
3352 | (s->flags & ~SLAB_PANIC), s->ctor, s); | 3363 | (s->flags & ~SLAB_PANIC), s->ctor, s); |
3353 | 3364 | ||
3354 | if (new) | 3365 | if (new) |
3355 | new->allocflags |= __GFP_KMEMCG; | 3366 | new->allocflags |= __GFP_KMEMCG; |
3356 | 3367 | ||
3357 | kfree(name); | ||
3358 | return new; | 3368 | return new; |
3359 | } | 3369 | } |
3360 | 3370 | ||
3361 | /* | ||
3362 | * This lock protects updaters, not readers. We want readers to be as fast as | ||
3363 | * they can, and they will either see NULL or a valid cache value. Our model | ||
3364 | * allow them to see NULL, in which case the root memcg will be selected. | ||
3365 | * | ||
3366 | * We need this lock because multiple allocations to the same cache from a non | ||
3367 | * will span more than one worker. Only one of them can create the cache. | ||
3368 | */ | ||
3369 | static DEFINE_MUTEX(memcg_cache_mutex); | ||
3370 | static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, | 3371 | static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, |
3371 | struct kmem_cache *cachep) | 3372 | struct kmem_cache *cachep) |
3372 | { | 3373 | { |
@@ -5912,6 +5913,7 @@ static struct cftype mem_cgroup_files[] = { | |||
5912 | }, | 5913 | }, |
5913 | { | 5914 | { |
5914 | .name = "use_hierarchy", | 5915 | .name = "use_hierarchy", |
5916 | .flags = CFTYPE_INSANE, | ||
5915 | .write_u64 = mem_cgroup_hierarchy_write, | 5917 | .write_u64 = mem_cgroup_hierarchy_write, |
5916 | .read_u64 = mem_cgroup_hierarchy_read, | 5918 | .read_u64 = mem_cgroup_hierarchy_read, |
5917 | }, | 5919 | }, |
@@ -6907,6 +6909,21 @@ static void mem_cgroup_move_task(struct cgroup *cont, | |||
6907 | } | 6909 | } |
6908 | #endif | 6910 | #endif |
6909 | 6911 | ||
6912 | /* | ||
6913 | * Cgroup retains root cgroups across [un]mount cycles making it necessary | ||
6914 | * to verify sane_behavior flag on each mount attempt. | ||
6915 | */ | ||
6916 | static void mem_cgroup_bind(struct cgroup *root) | ||
6917 | { | ||
6918 | /* | ||
6919 | * use_hierarchy is forced with sane_behavior. cgroup core | ||
6920 | * guarantees that @root doesn't have any children, so turning it | ||
6921 | * on for the root memcg is enough. | ||
6922 | */ | ||
6923 | if (cgroup_sane_behavior(root)) | ||
6924 | mem_cgroup_from_cont(root)->use_hierarchy = true; | ||
6925 | } | ||
6926 | |||
6910 | struct cgroup_subsys mem_cgroup_subsys = { | 6927 | struct cgroup_subsys mem_cgroup_subsys = { |
6911 | .name = "memory", | 6928 | .name = "memory", |
6912 | .subsys_id = mem_cgroup_subsys_id, | 6929 | .subsys_id = mem_cgroup_subsys_id, |
@@ -6917,6 +6934,7 @@ struct cgroup_subsys mem_cgroup_subsys = { | |||
6917 | .can_attach = mem_cgroup_can_attach, | 6934 | .can_attach = mem_cgroup_can_attach, |
6918 | .cancel_attach = mem_cgroup_cancel_attach, | 6935 | .cancel_attach = mem_cgroup_cancel_attach, |
6919 | .attach = mem_cgroup_move_task, | 6936 | .attach = mem_cgroup_move_task, |
6937 | .bind = mem_cgroup_bind, | ||
6920 | .base_cftypes = mem_cgroup_files, | 6938 | .base_cftypes = mem_cgroup_files, |
6921 | .early_init = 0, | 6939 | .early_init = 0, |
6922 | .use_id = 1, | 6940 | .use_id = 1, |