aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-04-29 22:14:20 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-04-29 22:14:20 -0400
commit191a712090bb8a10e6f129360eeed2d68f3d4c9a (patch)
tree17e2d6c27fb8a7c3a61828fbcc7c343a4966a0a9 /mm/memcontrol.c
parent46d9be3e5eb01f71fc02653755d970247174b400 (diff)
parent2a0010af17b1739ef8ea8cf02647a127241ee674 (diff)
Merge branch 'for-3.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: - Fixes and a lot of cleanups. Locking cleanup is finally complete. cgroup_mutex is no longer exposed to individual controlelrs which used to cause nasty deadlock issues. Li fixed and cleaned up quite a bit including long standing ones like racy cgroup_path(). - device cgroup now supports proper hierarchy thanks to Aristeu. - perf_event cgroup now supports proper hierarchy. - A new mount option "__DEVEL__sane_behavior" is added. As indicated by the name, this option is to be used for development only at this point and generates a warning message when used. Unfortunately, cgroup interface currently has too many brekages and inconsistencies to implement a consistent and unified hierarchy on top. The new flag is used to collect the behavior changes which are necessary to implement consistent unified hierarchy. It's likely that this flag won't be used verbatim when it becomes ready but will be enabled implicitly along with unified hierarchy. The option currently disables some of broken behaviors in cgroup core and also .use_hierarchy switch in memcg (will be routed through -mm), which can be used to make very unusual hierarchy where nesting is partially honored. It will also be used to implement hierarchy support for blk-throttle which would be impossible otherwise without introducing a full separate set of control knobs. This is essentially versioning of interface which isn't very nice but at this point I can't see any other options which would allow keeping the interface the same while moving towards hierarchy behavior which is at least somewhat sane. The planned unified hierarchy is likely to require some level of adaptation from userland anyway, so I think it'd be best to take the chance and update the interface such that it's supportable in the long term. Maintaining the existing interface does complicate cgroup core but shouldn't put too much strain on individual controllers and I think it'd be manageable for the foreseeable future. Maybe we'll be able to drop it in a decade. Fix up conflicts (including a semantic one adding a new #include to ppc that was uncovered by header the file changes) as per Tejun. * 'for-3.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (45 commits) cpuset: fix compile warning when CONFIG_SMP=n cpuset: fix cpu hotplug vs rebuild_sched_domains() race cpuset: use rebuild_sched_domains() in cpuset_hotplug_workfn() cgroup: restore the call to eventfd->poll() cgroup: fix use-after-free when umounting cgroupfs cgroup: fix broken file xattrs devcg: remove parent_cgroup. memcg: force use_hierarchy if sane_behavior cgroup: remove cgrp->top_cgroup cgroup: introduce sane_behavior mount option move cgroupfs_root to include/linux/cgroup.h cgroup: convert cgroupfs_root flag bits to masks and add CGRP_ prefix cgroup: make cgroup_path() not print double slashes Revert "cgroup: remove bind() method from cgroup_subsys." perf: make perf_event cgroup hierarchical cgroup: implement cgroup_is_descendant() cgroup: make sure parent won't be destroyed before its children cgroup: remove bind() method from cgroup_subsys. devcg: remove broken_hierarchy tag cgroup: remove cgroup_lock_is_held() ...
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c80
1 files changed, 49 insertions, 31 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b8dc8e4cbf6a..0f1d92163f30 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3321,52 +3321,53 @@ void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
3321 schedule_work(&cachep->memcg_params->destroy); 3321 schedule_work(&cachep->memcg_params->destroy);
3322} 3322}
3323 3323
3324static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s) 3324/*
3325{ 3325 * This lock protects updaters, not readers. We want readers to be as fast as
3326 char *name; 3326 * they can, and they will either see NULL or a valid cache value. Our model
3327 struct dentry *dentry; 3327 * allow them to see NULL, in which case the root memcg will be selected.
3328 3328 *
3329 rcu_read_lock(); 3329 * We need this lock because multiple allocations to the same cache from a non
3330 dentry = rcu_dereference(memcg->css.cgroup->dentry); 3330 * will span more than one worker. Only one of them can create the cache.
3331 rcu_read_unlock(); 3331 */
3332 3332static DEFINE_MUTEX(memcg_cache_mutex);
3333 BUG_ON(dentry == NULL);
3334
3335 name = kasprintf(GFP_KERNEL, "%s(%d:%s)", s->name,
3336 memcg_cache_id(memcg), dentry->d_name.name);
3337
3338 return name;
3339}
3340 3333
3334/*
3335 * Called with memcg_cache_mutex held
3336 */
3341static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg, 3337static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg,
3342 struct kmem_cache *s) 3338 struct kmem_cache *s)
3343{ 3339{
3344 char *name;
3345 struct kmem_cache *new; 3340 struct kmem_cache *new;
3341 static char *tmp_name = NULL;
3346 3342
3347 name = memcg_cache_name(memcg, s); 3343 lockdep_assert_held(&memcg_cache_mutex);
3348 if (!name) 3344
3349 return NULL; 3345 /*
3346 * kmem_cache_create_memcg duplicates the given name and
3347 * cgroup_name for this name requires RCU context.
3348 * This static temporary buffer is used to prevent from
3349 * pointless shortliving allocation.
3350 */
3351 if (!tmp_name) {
3352 tmp_name = kmalloc(PATH_MAX, GFP_KERNEL);
3353 if (!tmp_name)
3354 return NULL;
3355 }
3356
3357 rcu_read_lock();
3358 snprintf(tmp_name, PATH_MAX, "%s(%d:%s)", s->name,
3359 memcg_cache_id(memcg), cgroup_name(memcg->css.cgroup));
3360 rcu_read_unlock();
3350 3361
3351 new = kmem_cache_create_memcg(memcg, name, s->object_size, s->align, 3362 new = kmem_cache_create_memcg(memcg, tmp_name, s->object_size, s->align,
3352 (s->flags & ~SLAB_PANIC), s->ctor, s); 3363 (s->flags & ~SLAB_PANIC), s->ctor, s);
3353 3364
3354 if (new) 3365 if (new)
3355 new->allocflags |= __GFP_KMEMCG; 3366 new->allocflags |= __GFP_KMEMCG;
3356 3367
3357 kfree(name);
3358 return new; 3368 return new;
3359} 3369}
3360 3370
3361/*
3362 * This lock protects updaters, not readers. We want readers to be as fast as
3363 * they can, and they will either see NULL or a valid cache value. Our model
3364 * allow them to see NULL, in which case the root memcg will be selected.
3365 *
3366 * We need this lock because multiple allocations to the same cache from a non
3367 * will span more than one worker. Only one of them can create the cache.
3368 */
3369static DEFINE_MUTEX(memcg_cache_mutex);
3370static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, 3371static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
3371 struct kmem_cache *cachep) 3372 struct kmem_cache *cachep)
3372{ 3373{
@@ -5912,6 +5913,7 @@ static struct cftype mem_cgroup_files[] = {
5912 }, 5913 },
5913 { 5914 {
5914 .name = "use_hierarchy", 5915 .name = "use_hierarchy",
5916 .flags = CFTYPE_INSANE,
5915 .write_u64 = mem_cgroup_hierarchy_write, 5917 .write_u64 = mem_cgroup_hierarchy_write,
5916 .read_u64 = mem_cgroup_hierarchy_read, 5918 .read_u64 = mem_cgroup_hierarchy_read,
5917 }, 5919 },
@@ -6907,6 +6909,21 @@ static void mem_cgroup_move_task(struct cgroup *cont,
6907} 6909}
6908#endif 6910#endif
6909 6911
6912/*
6913 * Cgroup retains root cgroups across [un]mount cycles making it necessary
6914 * to verify sane_behavior flag on each mount attempt.
6915 */
6916static void mem_cgroup_bind(struct cgroup *root)
6917{
6918 /*
6919 * use_hierarchy is forced with sane_behavior. cgroup core
6920 * guarantees that @root doesn't have any children, so turning it
6921 * on for the root memcg is enough.
6922 */
6923 if (cgroup_sane_behavior(root))
6924 mem_cgroup_from_cont(root)->use_hierarchy = true;
6925}
6926
6910struct cgroup_subsys mem_cgroup_subsys = { 6927struct cgroup_subsys mem_cgroup_subsys = {
6911 .name = "memory", 6928 .name = "memory",
6912 .subsys_id = mem_cgroup_subsys_id, 6929 .subsys_id = mem_cgroup_subsys_id,
@@ -6917,6 +6934,7 @@ struct cgroup_subsys mem_cgroup_subsys = {
6917 .can_attach = mem_cgroup_can_attach, 6934 .can_attach = mem_cgroup_can_attach,
6918 .cancel_attach = mem_cgroup_cancel_attach, 6935 .cancel_attach = mem_cgroup_cancel_attach,
6919 .attach = mem_cgroup_move_task, 6936 .attach = mem_cgroup_move_task,
6937 .bind = mem_cgroup_bind,
6920 .base_cftypes = mem_cgroup_files, 6938 .base_cftypes = mem_cgroup_files,
6921 .early_init = 0, 6939 .early_init = 0,
6922 .use_id = 1, 6940 .use_id = 1,