aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--init/Kconfig1
-rw-r--r--mm/memcontrol.c126
2 files changed, 124 insertions, 3 deletions
diff --git a/init/Kconfig b/init/Kconfig
index 675d8a2326cf..19ccb33c99d9 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -882,6 +882,7 @@ config MEMCG_SWAP_ENABLED
882config MEMCG_KMEM 882config MEMCG_KMEM
883 bool "Memory Resource Controller Kernel Memory accounting (EXPERIMENTAL)" 883 bool "Memory Resource Controller Kernel Memory accounting (EXPERIMENTAL)"
884 depends on MEMCG && EXPERIMENTAL 884 depends on MEMCG && EXPERIMENTAL
885 depends on SLUB || SLAB
885 default n 886 default n
886 help 887 help
887 The Kernel Memory extension for Memory Resource Controller can limit 888 The Kernel Memory extension for Memory Resource Controller can limit
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c7b0b1b803a5..bba1cb4bbb82 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -268,6 +268,10 @@ struct mem_cgroup {
268 }; 268 };
269 269
270 /* 270 /*
271 * the counter to account for kernel memory usage.
272 */
273 struct res_counter kmem;
274 /*
271 * Per cgroup active and inactive list, similar to the 275 * Per cgroup active and inactive list, similar to the
272 * per zone LRU lists. 276 * per zone LRU lists.
273 */ 277 */
@@ -282,6 +286,7 @@ struct mem_cgroup {
282 * Should the accounting and control be hierarchical, per subtree? 286 * Should the accounting and control be hierarchical, per subtree?
283 */ 287 */
284 bool use_hierarchy; 288 bool use_hierarchy;
289 unsigned long kmem_account_flags; /* See KMEM_ACCOUNTED_*, below */
285 290
286 bool oom_lock; 291 bool oom_lock;
287 atomic_t under_oom; 292 atomic_t under_oom;
@@ -334,6 +339,20 @@ struct mem_cgroup {
334#endif 339#endif
335}; 340};
336 341
342/* internal only representation about the status of kmem accounting. */
343enum {
344 KMEM_ACCOUNTED_ACTIVE = 0, /* accounted by this cgroup itself */
345};
346
347#define KMEM_ACCOUNTED_MASK (1 << KMEM_ACCOUNTED_ACTIVE)
348
349#ifdef CONFIG_MEMCG_KMEM
350static inline void memcg_kmem_set_active(struct mem_cgroup *memcg)
351{
352 set_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);
353}
354#endif
355
337/* Stuffs for move charges at task migration. */ 356/* Stuffs for move charges at task migration. */
338/* 357/*
339 * Types of charges to be moved. "move_charge_at_immitgrate" is treated as a 358 * Types of charges to be moved. "move_charge_at_immitgrate" is treated as a
@@ -392,6 +411,7 @@ enum res_type {
392 _MEM, 411 _MEM,
393 _MEMSWAP, 412 _MEMSWAP,
394 _OOM_TYPE, 413 _OOM_TYPE,
414 _KMEM,
395}; 415};
396 416
397#define MEMFILE_PRIVATE(x, val) ((x) << 16 | (val)) 417#define MEMFILE_PRIVATE(x, val) ((x) << 16 | (val))
@@ -1456,6 +1476,10 @@ done:
1456 res_counter_read_u64(&memcg->memsw, RES_USAGE) >> 10, 1476 res_counter_read_u64(&memcg->memsw, RES_USAGE) >> 10,
1457 res_counter_read_u64(&memcg->memsw, RES_LIMIT) >> 10, 1477 res_counter_read_u64(&memcg->memsw, RES_LIMIT) >> 10,
1458 res_counter_read_u64(&memcg->memsw, RES_FAILCNT)); 1478 res_counter_read_u64(&memcg->memsw, RES_FAILCNT));
1479 printk(KERN_INFO "kmem: usage %llukB, limit %llukB, failcnt %llu\n",
1480 res_counter_read_u64(&memcg->kmem, RES_USAGE) >> 10,
1481 res_counter_read_u64(&memcg->kmem, RES_LIMIT) >> 10,
1482 res_counter_read_u64(&memcg->kmem, RES_FAILCNT));
1459} 1483}
1460 1484
1461/* 1485/*
@@ -3977,6 +4001,9 @@ static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft,
3977 else 4001 else
3978 val = res_counter_read_u64(&memcg->memsw, name); 4002 val = res_counter_read_u64(&memcg->memsw, name);
3979 break; 4003 break;
4004 case _KMEM:
4005 val = res_counter_read_u64(&memcg->kmem, name);
4006 break;
3980 default: 4007 default:
3981 BUG(); 4008 BUG();
3982 } 4009 }
@@ -3984,6 +4011,59 @@ static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft,
3984 len = scnprintf(str, sizeof(str), "%llu\n", (unsigned long long)val); 4011 len = scnprintf(str, sizeof(str), "%llu\n", (unsigned long long)val);
3985 return simple_read_from_buffer(buf, nbytes, ppos, str, len); 4012 return simple_read_from_buffer(buf, nbytes, ppos, str, len);
3986} 4013}
4014
4015static int memcg_update_kmem_limit(struct cgroup *cont, u64 val)
4016{
4017 int ret = -EINVAL;
4018#ifdef CONFIG_MEMCG_KMEM
4019 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
4020 /*
4021 * For simplicity, we won't allow this to be disabled. It also can't
4022 * be changed if the cgroup has children already, or if tasks had
4023 * already joined.
4024 *
4025 * If tasks join before we set the limit, a person looking at
4026 * kmem.usage_in_bytes will have no way to determine when it took
4027 * place, which makes the value quite meaningless.
4028 *
4029 * After it first became limited, changes in the value of the limit are
4030 * of course permitted.
4031 *
4032 * Taking the cgroup_lock is really offensive, but it is so far the only
4033 * way to guarantee that no children will appear. There are plenty of
4034 * other offenders, and they should all go away. Fine grained locking
4035 * is probably the way to go here. When we are fully hierarchical, we
4036 * can also get rid of the use_hierarchy check.
4037 */
4038 cgroup_lock();
4039 mutex_lock(&set_limit_mutex);
4040 if (!memcg->kmem_account_flags && val != RESOURCE_MAX) {
4041 if (cgroup_task_count(cont) || (memcg->use_hierarchy &&
4042 !list_empty(&cont->children))) {
4043 ret = -EBUSY;
4044 goto out;
4045 }
4046 ret = res_counter_set_limit(&memcg->kmem, val);
4047 VM_BUG_ON(ret);
4048
4049 memcg_kmem_set_active(memcg);
4050 } else
4051 ret = res_counter_set_limit(&memcg->kmem, val);
4052out:
4053 mutex_unlock(&set_limit_mutex);
4054 cgroup_unlock();
4055#endif
4056 return ret;
4057}
4058
4059static void memcg_propagate_kmem(struct mem_cgroup *memcg)
4060{
4061 struct mem_cgroup *parent = parent_mem_cgroup(memcg);
4062 if (!parent)
4063 return;
4064 memcg->kmem_account_flags = parent->kmem_account_flags;
4065}
4066
3987/* 4067/*
3988 * The user of this function is... 4068 * The user of this function is...
3989 * RES_LIMIT. 4069 * RES_LIMIT.
@@ -4015,8 +4095,12 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
4015 break; 4095 break;
4016 if (type == _MEM) 4096 if (type == _MEM)
4017 ret = mem_cgroup_resize_limit(memcg, val); 4097 ret = mem_cgroup_resize_limit(memcg, val);
4018 else 4098 else if (type == _MEMSWAP)
4019 ret = mem_cgroup_resize_memsw_limit(memcg, val); 4099 ret = mem_cgroup_resize_memsw_limit(memcg, val);
4100 else if (type == _KMEM)
4101 ret = memcg_update_kmem_limit(cont, val);
4102 else
4103 return -EINVAL;
4020 break; 4104 break;
4021 case RES_SOFT_LIMIT: 4105 case RES_SOFT_LIMIT:
4022 ret = res_counter_memparse_write_strategy(buffer, &val); 4106 ret = res_counter_memparse_write_strategy(buffer, &val);
@@ -4082,14 +4166,22 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
4082 case RES_MAX_USAGE: 4166 case RES_MAX_USAGE:
4083 if (type == _MEM) 4167 if (type == _MEM)
4084 res_counter_reset_max(&memcg->res); 4168 res_counter_reset_max(&memcg->res);
4085 else 4169 else if (type == _MEMSWAP)
4086 res_counter_reset_max(&memcg->memsw); 4170 res_counter_reset_max(&memcg->memsw);
4171 else if (type == _KMEM)
4172 res_counter_reset_max(&memcg->kmem);
4173 else
4174 return -EINVAL;
4087 break; 4175 break;
4088 case RES_FAILCNT: 4176 case RES_FAILCNT:
4089 if (type == _MEM) 4177 if (type == _MEM)
4090 res_counter_reset_failcnt(&memcg->res); 4178 res_counter_reset_failcnt(&memcg->res);
4091 else 4179 else if (type == _MEMSWAP)
4092 res_counter_reset_failcnt(&memcg->memsw); 4180 res_counter_reset_failcnt(&memcg->memsw);
4181 else if (type == _KMEM)
4182 res_counter_reset_failcnt(&memcg->kmem);
4183 else
4184 return -EINVAL;
4093 break; 4185 break;
4094 } 4186 }
4095 4187
@@ -4651,6 +4743,7 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
4651#ifdef CONFIG_MEMCG_KMEM 4743#ifdef CONFIG_MEMCG_KMEM
4652static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) 4744static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
4653{ 4745{
4746 memcg_propagate_kmem(memcg);
4654 return mem_cgroup_sockets_init(memcg, ss); 4747 return mem_cgroup_sockets_init(memcg, ss);
4655}; 4748};
4656 4749
@@ -4765,6 +4858,31 @@ static struct cftype mem_cgroup_files[] = {
4765 .read = mem_cgroup_read, 4858 .read = mem_cgroup_read,
4766 }, 4859 },
4767#endif 4860#endif
4861#ifdef CONFIG_MEMCG_KMEM
4862 {
4863 .name = "kmem.limit_in_bytes",
4864 .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT),
4865 .write_string = mem_cgroup_write,
4866 .read = mem_cgroup_read,
4867 },
4868 {
4869 .name = "kmem.usage_in_bytes",
4870 .private = MEMFILE_PRIVATE(_KMEM, RES_USAGE),
4871 .read = mem_cgroup_read,
4872 },
4873 {
4874 .name = "kmem.failcnt",
4875 .private = MEMFILE_PRIVATE(_KMEM, RES_FAILCNT),
4876 .trigger = mem_cgroup_reset,
4877 .read = mem_cgroup_read,
4878 },
4879 {
4880 .name = "kmem.max_usage_in_bytes",
4881 .private = MEMFILE_PRIVATE(_KMEM, RES_MAX_USAGE),
4882 .trigger = mem_cgroup_reset,
4883 .read = mem_cgroup_read,
4884 },
4885#endif
4768 { }, /* terminate */ 4886 { }, /* terminate */
4769}; 4887};
4770 4888
@@ -5010,6 +5128,7 @@ mem_cgroup_css_alloc(struct cgroup *cont)
5010 if (parent && parent->use_hierarchy) { 5128 if (parent && parent->use_hierarchy) {
5011 res_counter_init(&memcg->res, &parent->res); 5129 res_counter_init(&memcg->res, &parent->res);
5012 res_counter_init(&memcg->memsw, &parent->memsw); 5130 res_counter_init(&memcg->memsw, &parent->memsw);
5131 res_counter_init(&memcg->kmem, &parent->kmem);
5013 /* 5132 /*
5014 * We increment refcnt of the parent to ensure that we can 5133 * We increment refcnt of the parent to ensure that we can
5015 * safely access it on res_counter_charge/uncharge. 5134 * safely access it on res_counter_charge/uncharge.
@@ -5020,6 +5139,7 @@ mem_cgroup_css_alloc(struct cgroup *cont)
5020 } else { 5139 } else {
5021 res_counter_init(&memcg->res, NULL); 5140 res_counter_init(&memcg->res, NULL);
5022 res_counter_init(&memcg->memsw, NULL); 5141 res_counter_init(&memcg->memsw, NULL);
5142 res_counter_init(&memcg->kmem, NULL);
5023 /* 5143 /*
5024 * Deeper hierachy with use_hierarchy == false doesn't make 5144 * Deeper hierachy with use_hierarchy == false doesn't make
5025 * much sense so let cgroup subsystem know about this 5145 * much sense so let cgroup subsystem know about this