diff options
-rw-r--r-- | init/Kconfig | 1 | ||||
-rw-r--r-- | mm/memcontrol.c | 126 |
2 files changed, 124 insertions, 3 deletions
diff --git a/init/Kconfig b/init/Kconfig index 675d8a2326cf..19ccb33c99d9 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -882,6 +882,7 @@ config MEMCG_SWAP_ENABLED | |||
882 | config MEMCG_KMEM | 882 | config MEMCG_KMEM |
883 | bool "Memory Resource Controller Kernel Memory accounting (EXPERIMENTAL)" | 883 | bool "Memory Resource Controller Kernel Memory accounting (EXPERIMENTAL)" |
884 | depends on MEMCG && EXPERIMENTAL | 884 | depends on MEMCG && EXPERIMENTAL |
885 | depends on SLUB || SLAB | ||
885 | default n | 886 | default n |
886 | help | 887 | help |
887 | The Kernel Memory extension for Memory Resource Controller can limit | 888 | The Kernel Memory extension for Memory Resource Controller can limit |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c7b0b1b803a5..bba1cb4bbb82 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -268,6 +268,10 @@ struct mem_cgroup { | |||
268 | }; | 268 | }; |
269 | 269 | ||
270 | /* | 270 | /* |
271 | * the counter to account for kernel memory usage. | ||
272 | */ | ||
273 | struct res_counter kmem; | ||
274 | /* | ||
271 | * Per cgroup active and inactive list, similar to the | 275 | * Per cgroup active and inactive list, similar to the |
272 | * per zone LRU lists. | 276 | * per zone LRU lists. |
273 | */ | 277 | */ |
@@ -282,6 +286,7 @@ struct mem_cgroup { | |||
282 | * Should the accounting and control be hierarchical, per subtree? | 286 | * Should the accounting and control be hierarchical, per subtree? |
283 | */ | 287 | */ |
284 | bool use_hierarchy; | 288 | bool use_hierarchy; |
289 | unsigned long kmem_account_flags; /* See KMEM_ACCOUNTED_*, below */ | ||
285 | 290 | ||
286 | bool oom_lock; | 291 | bool oom_lock; |
287 | atomic_t under_oom; | 292 | atomic_t under_oom; |
@@ -334,6 +339,20 @@ struct mem_cgroup { | |||
334 | #endif | 339 | #endif |
335 | }; | 340 | }; |
336 | 341 | ||
342 | /* internal only representation about the status of kmem accounting. */ | ||
343 | enum { | ||
344 | KMEM_ACCOUNTED_ACTIVE = 0, /* accounted by this cgroup itself */ | ||
345 | }; | ||
346 | |||
347 | #define KMEM_ACCOUNTED_MASK (1 << KMEM_ACCOUNTED_ACTIVE) | ||
348 | |||
349 | #ifdef CONFIG_MEMCG_KMEM | ||
350 | static inline void memcg_kmem_set_active(struct mem_cgroup *memcg) | ||
351 | { | ||
352 | set_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags); | ||
353 | } | ||
354 | #endif | ||
355 | |||
337 | /* Stuffs for move charges at task migration. */ | 356 | /* Stuffs for move charges at task migration. */ |
338 | /* | 357 | /* |
339 | * Types of charges to be moved. "move_charge_at_immitgrate" is treated as a | 358 | * Types of charges to be moved. "move_charge_at_immitgrate" is treated as a |
@@ -392,6 +411,7 @@ enum res_type { | |||
392 | _MEM, | 411 | _MEM, |
393 | _MEMSWAP, | 412 | _MEMSWAP, |
394 | _OOM_TYPE, | 413 | _OOM_TYPE, |
414 | _KMEM, | ||
395 | }; | 415 | }; |
396 | 416 | ||
397 | #define MEMFILE_PRIVATE(x, val) ((x) << 16 | (val)) | 417 | #define MEMFILE_PRIVATE(x, val) ((x) << 16 | (val)) |
@@ -1456,6 +1476,10 @@ done: | |||
1456 | res_counter_read_u64(&memcg->memsw, RES_USAGE) >> 10, | 1476 | res_counter_read_u64(&memcg->memsw, RES_USAGE) >> 10, |
1457 | res_counter_read_u64(&memcg->memsw, RES_LIMIT) >> 10, | 1477 | res_counter_read_u64(&memcg->memsw, RES_LIMIT) >> 10, |
1458 | res_counter_read_u64(&memcg->memsw, RES_FAILCNT)); | 1478 | res_counter_read_u64(&memcg->memsw, RES_FAILCNT)); |
1479 | printk(KERN_INFO "kmem: usage %llukB, limit %llukB, failcnt %llu\n", | ||
1480 | res_counter_read_u64(&memcg->kmem, RES_USAGE) >> 10, | ||
1481 | res_counter_read_u64(&memcg->kmem, RES_LIMIT) >> 10, | ||
1482 | res_counter_read_u64(&memcg->kmem, RES_FAILCNT)); | ||
1459 | } | 1483 | } |
1460 | 1484 | ||
1461 | /* | 1485 | /* |
@@ -3977,6 +4001,9 @@ static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft, | |||
3977 | else | 4001 | else |
3978 | val = res_counter_read_u64(&memcg->memsw, name); | 4002 | val = res_counter_read_u64(&memcg->memsw, name); |
3979 | break; | 4003 | break; |
4004 | case _KMEM: | ||
4005 | val = res_counter_read_u64(&memcg->kmem, name); | ||
4006 | break; | ||
3980 | default: | 4007 | default: |
3981 | BUG(); | 4008 | BUG(); |
3982 | } | 4009 | } |
@@ -3984,6 +4011,59 @@ static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft, | |||
3984 | len = scnprintf(str, sizeof(str), "%llu\n", (unsigned long long)val); | 4011 | len = scnprintf(str, sizeof(str), "%llu\n", (unsigned long long)val); |
3985 | return simple_read_from_buffer(buf, nbytes, ppos, str, len); | 4012 | return simple_read_from_buffer(buf, nbytes, ppos, str, len); |
3986 | } | 4013 | } |
4014 | |||
4015 | static int memcg_update_kmem_limit(struct cgroup *cont, u64 val) | ||
4016 | { | ||
4017 | int ret = -EINVAL; | ||
4018 | #ifdef CONFIG_MEMCG_KMEM | ||
4019 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | ||
4020 | /* | ||
4021 | * For simplicity, we won't allow this to be disabled. It also can't | ||
4022 | * be changed if the cgroup has children already, or if tasks had | ||
4023 | * already joined. | ||
4024 | * | ||
4025 | * If tasks join before we set the limit, a person looking at | ||
4026 | * kmem.usage_in_bytes will have no way to determine when it took | ||
4027 | * place, which makes the value quite meaningless. | ||
4028 | * | ||
4029 | * After it first became limited, changes in the value of the limit are | ||
4030 | * of course permitted. | ||
4031 | * | ||
4032 | * Taking the cgroup_lock is really offensive, but it is so far the only | ||
4033 | * way to guarantee that no children will appear. There are plenty of | ||
4034 | * other offenders, and they should all go away. Fine grained locking | ||
4035 | * is probably the way to go here. When we are fully hierarchical, we | ||
4036 | * can also get rid of the use_hierarchy check. | ||
4037 | */ | ||
4038 | cgroup_lock(); | ||
4039 | mutex_lock(&set_limit_mutex); | ||
4040 | if (!memcg->kmem_account_flags && val != RESOURCE_MAX) { | ||
4041 | if (cgroup_task_count(cont) || (memcg->use_hierarchy && | ||
4042 | !list_empty(&cont->children))) { | ||
4043 | ret = -EBUSY; | ||
4044 | goto out; | ||
4045 | } | ||
4046 | ret = res_counter_set_limit(&memcg->kmem, val); | ||
4047 | VM_BUG_ON(ret); | ||
4048 | |||
4049 | memcg_kmem_set_active(memcg); | ||
4050 | } else | ||
4051 | ret = res_counter_set_limit(&memcg->kmem, val); | ||
4052 | out: | ||
4053 | mutex_unlock(&set_limit_mutex); | ||
4054 | cgroup_unlock(); | ||
4055 | #endif | ||
4056 | return ret; | ||
4057 | } | ||
4058 | |||
4059 | static void memcg_propagate_kmem(struct mem_cgroup *memcg) | ||
4060 | { | ||
4061 | struct mem_cgroup *parent = parent_mem_cgroup(memcg); | ||
4062 | if (!parent) | ||
4063 | return; | ||
4064 | memcg->kmem_account_flags = parent->kmem_account_flags; | ||
4065 | } | ||
4066 | |||
3987 | /* | 4067 | /* |
3988 | * The user of this function is... | 4068 | * The user of this function is... |
3989 | * RES_LIMIT. | 4069 | * RES_LIMIT. |
@@ -4015,8 +4095,12 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, | |||
4015 | break; | 4095 | break; |
4016 | if (type == _MEM) | 4096 | if (type == _MEM) |
4017 | ret = mem_cgroup_resize_limit(memcg, val); | 4097 | ret = mem_cgroup_resize_limit(memcg, val); |
4018 | else | 4098 | else if (type == _MEMSWAP) |
4019 | ret = mem_cgroup_resize_memsw_limit(memcg, val); | 4099 | ret = mem_cgroup_resize_memsw_limit(memcg, val); |
4100 | else if (type == _KMEM) | ||
4101 | ret = memcg_update_kmem_limit(cont, val); | ||
4102 | else | ||
4103 | return -EINVAL; | ||
4020 | break; | 4104 | break; |
4021 | case RES_SOFT_LIMIT: | 4105 | case RES_SOFT_LIMIT: |
4022 | ret = res_counter_memparse_write_strategy(buffer, &val); | 4106 | ret = res_counter_memparse_write_strategy(buffer, &val); |
@@ -4082,14 +4166,22 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) | |||
4082 | case RES_MAX_USAGE: | 4166 | case RES_MAX_USAGE: |
4083 | if (type == _MEM) | 4167 | if (type == _MEM) |
4084 | res_counter_reset_max(&memcg->res); | 4168 | res_counter_reset_max(&memcg->res); |
4085 | else | 4169 | else if (type == _MEMSWAP) |
4086 | res_counter_reset_max(&memcg->memsw); | 4170 | res_counter_reset_max(&memcg->memsw); |
4171 | else if (type == _KMEM) | ||
4172 | res_counter_reset_max(&memcg->kmem); | ||
4173 | else | ||
4174 | return -EINVAL; | ||
4087 | break; | 4175 | break; |
4088 | case RES_FAILCNT: | 4176 | case RES_FAILCNT: |
4089 | if (type == _MEM) | 4177 | if (type == _MEM) |
4090 | res_counter_reset_failcnt(&memcg->res); | 4178 | res_counter_reset_failcnt(&memcg->res); |
4091 | else | 4179 | else if (type == _MEMSWAP) |
4092 | res_counter_reset_failcnt(&memcg->memsw); | 4180 | res_counter_reset_failcnt(&memcg->memsw); |
4181 | else if (type == _KMEM) | ||
4182 | res_counter_reset_failcnt(&memcg->kmem); | ||
4183 | else | ||
4184 | return -EINVAL; | ||
4093 | break; | 4185 | break; |
4094 | } | 4186 | } |
4095 | 4187 | ||
@@ -4651,6 +4743,7 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp, | |||
4651 | #ifdef CONFIG_MEMCG_KMEM | 4743 | #ifdef CONFIG_MEMCG_KMEM |
4652 | static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) | 4744 | static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) |
4653 | { | 4745 | { |
4746 | memcg_propagate_kmem(memcg); | ||
4654 | return mem_cgroup_sockets_init(memcg, ss); | 4747 | return mem_cgroup_sockets_init(memcg, ss); |
4655 | }; | 4748 | }; |
4656 | 4749 | ||
@@ -4765,6 +4858,31 @@ static struct cftype mem_cgroup_files[] = { | |||
4765 | .read = mem_cgroup_read, | 4858 | .read = mem_cgroup_read, |
4766 | }, | 4859 | }, |
4767 | #endif | 4860 | #endif |
4861 | #ifdef CONFIG_MEMCG_KMEM | ||
4862 | { | ||
4863 | .name = "kmem.limit_in_bytes", | ||
4864 | .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT), | ||
4865 | .write_string = mem_cgroup_write, | ||
4866 | .read = mem_cgroup_read, | ||
4867 | }, | ||
4868 | { | ||
4869 | .name = "kmem.usage_in_bytes", | ||
4870 | .private = MEMFILE_PRIVATE(_KMEM, RES_USAGE), | ||
4871 | .read = mem_cgroup_read, | ||
4872 | }, | ||
4873 | { | ||
4874 | .name = "kmem.failcnt", | ||
4875 | .private = MEMFILE_PRIVATE(_KMEM, RES_FAILCNT), | ||
4876 | .trigger = mem_cgroup_reset, | ||
4877 | .read = mem_cgroup_read, | ||
4878 | }, | ||
4879 | { | ||
4880 | .name = "kmem.max_usage_in_bytes", | ||
4881 | .private = MEMFILE_PRIVATE(_KMEM, RES_MAX_USAGE), | ||
4882 | .trigger = mem_cgroup_reset, | ||
4883 | .read = mem_cgroup_read, | ||
4884 | }, | ||
4885 | #endif | ||
4768 | { }, /* terminate */ | 4886 | { }, /* terminate */ |
4769 | }; | 4887 | }; |
4770 | 4888 | ||
@@ -5010,6 +5128,7 @@ mem_cgroup_css_alloc(struct cgroup *cont) | |||
5010 | if (parent && parent->use_hierarchy) { | 5128 | if (parent && parent->use_hierarchy) { |
5011 | res_counter_init(&memcg->res, &parent->res); | 5129 | res_counter_init(&memcg->res, &parent->res); |
5012 | res_counter_init(&memcg->memsw, &parent->memsw); | 5130 | res_counter_init(&memcg->memsw, &parent->memsw); |
5131 | res_counter_init(&memcg->kmem, &parent->kmem); | ||
5013 | /* | 5132 | /* |
5014 | * We increment refcnt of the parent to ensure that we can | 5133 | * We increment refcnt of the parent to ensure that we can |
5015 | * safely access it on res_counter_charge/uncharge. | 5134 | * safely access it on res_counter_charge/uncharge. |
@@ -5020,6 +5139,7 @@ mem_cgroup_css_alloc(struct cgroup *cont) | |||
5020 | } else { | 5139 | } else { |
5021 | res_counter_init(&memcg->res, NULL); | 5140 | res_counter_init(&memcg->res, NULL); |
5022 | res_counter_init(&memcg->memsw, NULL); | 5141 | res_counter_init(&memcg->memsw, NULL); |
5142 | res_counter_init(&memcg->kmem, NULL); | ||
5023 | /* | 5143 | /* |
5024 | * Deeper hierachy with use_hierarchy == false doesn't make | 5144 | * Deeper hierachy with use_hierarchy == false doesn't make |
5025 | * much sense so let cgroup subsystem know about this | 5145 | * much sense so let cgroup subsystem know about this |