aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGlauber Costa <glommer@parallels.com>2011-12-21 20:02:27 -0500
committerDavid S. Miller <davem@davemloft.net>2011-12-22 22:37:18 -0500
commit65c64ce8ee642eb330a4c4d94b664725f2902b44 (patch)
treec4ad98c2a7f839f3b968fae6eddeec8b5752ce49
parent7d6c429b263c2f735f3df8c282cc77a8659e5d74 (diff)
Partial revert "Basic kernel memory functionality for the Memory Controller"
This reverts commit e5671dfae59b165e2adfd4dfbdeab11ac8db5bda. After a follow up discussion with Michal, it was agreed it would be better to leave the kmem controller with just the tcp files, deferring the behavior of the other general memory.kmem.* files for a later time, when more caches are controlled. This is because generic kmem files are not used by tcp accounting and it is not clear how other slab caches would fit into the scheme. We are reverting the original commit so we can track the reference. Part of the patch is kept, because it was used by the later tcp code. Conflicts are shown in the bottom. init/Kconfig is removed from the revert entirely. Signed-off-by: Glauber Costa <glommer@parallels.com> Acked-by: Michal Hocko <mhocko@suse.cz> CC: Kirill A. Shutemov <kirill@shutemov.name> CC: Paul Menage <paul@paulmenage.org> CC: Greg Thelen <gthelen@google.com> CC: Johannes Weiner <jweiner@redhat.com> CC: David S. Miller <davem@davemloft.net> Conflicts: Documentation/cgroups/memory.txt mm/memcontrol.c Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/cgroups/memory.txt22
-rw-r--r--mm/memcontrol.c93
2 files changed, 8 insertions, 107 deletions
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 6922b6cb58e3..4d8774f6f48a 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -44,9 +44,8 @@ Features:
44 - oom-killer disable knob and oom-notifier 44 - oom-killer disable knob and oom-notifier
45 - Root cgroup has no limit controls. 45 - Root cgroup has no limit controls.
46 46
47 Hugepages is not under control yet. We just manage pages on LRU. To add more 47 Kernel memory support is work in progress, and the current version provides
48 controls, we have to take care of performance. Kernel memory support is work 48 basically functionality. (See Section 2.7)
49 in progress, and the current version provides basically functionality.
50 49
51Brief summary of control files. 50Brief summary of control files.
52 51
@@ -57,11 +56,8 @@ Brief summary of control files.
57 (See 5.5 for details) 56 (See 5.5 for details)
58 memory.memsw.usage_in_bytes # show current res_counter usage for memory+Swap 57 memory.memsw.usage_in_bytes # show current res_counter usage for memory+Swap
59 (See 5.5 for details) 58 (See 5.5 for details)
60 memory.kmem.usage_in_bytes # show current res_counter usage for kmem only.
61 (See 2.7 for details)
62 memory.limit_in_bytes # set/show limit of memory usage 59 memory.limit_in_bytes # set/show limit of memory usage
63 memory.memsw.limit_in_bytes # set/show limit of memory+Swap usage 60 memory.memsw.limit_in_bytes # set/show limit of memory+Swap usage
64 memory.kmem.limit_in_bytes # if allowed, set/show limit of kernel memory
65 memory.failcnt # show the number of memory usage hits limits 61 memory.failcnt # show the number of memory usage hits limits
66 memory.memsw.failcnt # show the number of memory+Swap hits limits 62 memory.memsw.failcnt # show the number of memory+Swap hits limits
67 memory.max_usage_in_bytes # show max memory usage recorded 63 memory.max_usage_in_bytes # show max memory usage recorded
@@ -76,8 +72,6 @@ Brief summary of control files.
76 memory.oom_control # set/show oom controls. 72 memory.oom_control # set/show oom controls.
77 memory.numa_stat # show the number of memory usage per numa node 73 memory.numa_stat # show the number of memory usage per numa node
78 74
79 memory.independent_kmem_limit # select whether or not kernel memory limits are
80 independent of user limits
81 memory.kmem.tcp.limit_in_bytes # set/show hard limit for tcp buf memory 75 memory.kmem.tcp.limit_in_bytes # set/show hard limit for tcp buf memory
82 memory.kmem.tcp.usage_in_bytes # show current tcp buf memory allocation 76 memory.kmem.tcp.usage_in_bytes # show current tcp buf memory allocation
83 77
@@ -271,21 +265,9 @@ the amount of kernel memory used by the system. Kernel memory is fundamentally
271different than user memory, since it can't be swapped out, which makes it 265different than user memory, since it can't be swapped out, which makes it
272possible to DoS the system by consuming too much of this precious resource. 266possible to DoS the system by consuming too much of this precious resource.
273 267
274Some kernel memory resources may be accounted and limited separately from the
275main "kmem" resource. For instance, a slab cache that is considered important
276enough to be limited separately may have its own knobs.
277
278Kernel memory limits are not imposed for the root cgroup. Usage for the root 268Kernel memory limits are not imposed for the root cgroup. Usage for the root
279cgroup may or may not be accounted. 269cgroup may or may not be accounted.
280 270
281Memory limits as specified by the standard Memory Controller may or may not
282take kernel memory into consideration. This is achieved through the file
283memory.independent_kmem_limit. A Value different than 0 will allow for kernel
284memory to be controlled separately.
285
286When kernel memory limits are not independent, the limit values set in
287memory.kmem files are ignored.
288
289Currently no soft limit is implemented for kernel memory. It is future work 271Currently no soft limit is implemented for kernel memory. It is future work
290to trigger slab reclaim when those limits are reached. 272to trigger slab reclaim when those limits are reached.
291 273
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7266202fa7cf..8cdc9156455c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -229,10 +229,6 @@ struct mem_cgroup {
229 */ 229 */
230 struct res_counter memsw; 230 struct res_counter memsw;
231 /* 231 /*
232 * the counter to account for kmem usage.
233 */
234 struct res_counter kmem;
235 /*
236 * Per cgroup active and inactive list, similar to the 232 * Per cgroup active and inactive list, similar to the
237 * per zone LRU lists. 233 * per zone LRU lists.
238 */ 234 */
@@ -283,11 +279,6 @@ struct mem_cgroup {
283 */ 279 */
284 unsigned long move_charge_at_immigrate; 280 unsigned long move_charge_at_immigrate;
285 /* 281 /*
286 * Should kernel memory limits be stabilished independently
287 * from user memory ?
288 */
289 int kmem_independent_accounting;
290 /*
291 * percpu counter. 282 * percpu counter.
292 */ 283 */
293 struct mem_cgroup_stat_cpu *stat; 284 struct mem_cgroup_stat_cpu *stat;
@@ -359,14 +350,9 @@ enum charge_type {
359}; 350};
360 351
361/* for encoding cft->private value on file */ 352/* for encoding cft->private value on file */
362 353#define _MEM (0)
363enum mem_type { 354#define _MEMSWAP (1)
364 _MEM = 0, 355#define _OOM_TYPE (2)
365 _MEMSWAP,
366 _OOM_TYPE,
367 _KMEM,
368};
369
370#define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val)) 356#define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val))
371#define MEMFILE_TYPE(val) (((val) >> 16) & 0xffff) 357#define MEMFILE_TYPE(val) (((val) >> 16) & 0xffff)
372#define MEMFILE_ATTR(val) ((val) & 0xffff) 358#define MEMFILE_ATTR(val) ((val) & 0xffff)
@@ -3919,17 +3905,10 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
3919 u64 val; 3905 u64 val;
3920 3906
3921 if (!mem_cgroup_is_root(memcg)) { 3907 if (!mem_cgroup_is_root(memcg)) {
3922 val = 0;
3923#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
3924 if (!memcg->kmem_independent_accounting)
3925 val = res_counter_read_u64(&memcg->kmem, RES_USAGE);
3926#endif
3927 if (!swap) 3908 if (!swap)
3928 val += res_counter_read_u64(&memcg->res, RES_USAGE); 3909 return res_counter_read_u64(&memcg->res, RES_USAGE);
3929 else 3910 else
3930 val += res_counter_read_u64(&memcg->memsw, RES_USAGE); 3911 return res_counter_read_u64(&memcg->memsw, RES_USAGE);
3931
3932 return val;
3933 } 3912 }
3934 3913
3935 val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE); 3914 val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
@@ -3962,11 +3941,6 @@ static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
3962 else 3941 else
3963 val = res_counter_read_u64(&memcg->memsw, name); 3942 val = res_counter_read_u64(&memcg->memsw, name);
3964 break; 3943 break;
3965#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
3966 case _KMEM:
3967 val = res_counter_read_u64(&memcg->kmem, name);
3968 break;
3969#endif
3970 default: 3944 default:
3971 BUG(); 3945 BUG();
3972 break; 3946 break;
@@ -4696,59 +4670,8 @@ static int mem_control_numa_stat_open(struct inode *unused, struct file *file)
4696#endif /* CONFIG_NUMA */ 4670#endif /* CONFIG_NUMA */
4697 4671
4698#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM 4672#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
4699static u64 kmem_limit_independent_read(struct cgroup *cgroup, struct cftype *cft)
4700{
4701 return mem_cgroup_from_cont(cgroup)->kmem_independent_accounting;
4702}
4703
4704static int kmem_limit_independent_write(struct cgroup *cgroup, struct cftype *cft,
4705 u64 val)
4706{
4707 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgroup);
4708 struct mem_cgroup *parent = parent_mem_cgroup(memcg);
4709
4710 val = !!val;
4711
4712 /*
4713 * This follows the same hierarchy restrictions than
4714 * mem_cgroup_hierarchy_write()
4715 */
4716 if (!parent || !parent->use_hierarchy) {
4717 if (list_empty(&cgroup->children))
4718 memcg->kmem_independent_accounting = val;
4719 else
4720 return -EBUSY;
4721 }
4722 else
4723 return -EINVAL;
4724
4725 return 0;
4726}
4727static struct cftype kmem_cgroup_files[] = {
4728 {
4729 .name = "independent_kmem_limit",
4730 .read_u64 = kmem_limit_independent_read,
4731 .write_u64 = kmem_limit_independent_write,
4732 },
4733 {
4734 .name = "kmem.usage_in_bytes",
4735 .private = MEMFILE_PRIVATE(_KMEM, RES_USAGE),
4736 .read_u64 = mem_cgroup_read,
4737 },
4738 {
4739 .name = "kmem.limit_in_bytes",
4740 .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT),
4741 .read_u64 = mem_cgroup_read,
4742 },
4743};
4744
4745static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss) 4673static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss)
4746{ 4674{
4747 int ret = 0;
4748
4749 ret = cgroup_add_files(cont, ss, kmem_cgroup_files,
4750 ARRAY_SIZE(kmem_cgroup_files));
4751
4752 /* 4675 /*
4753 * Part of this would be better living in a separate allocation 4676 * Part of this would be better living in a separate allocation
4754 * function, leaving us with just the cgroup tree population work. 4677 * function, leaving us with just the cgroup tree population work.
@@ -4756,9 +4679,7 @@ static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss)
4756 * is only initialized after cgroup creation. I found the less 4679 * is only initialized after cgroup creation. I found the less
4757 * cumbersome way to deal with it to defer it all to populate time 4680 * cumbersome way to deal with it to defer it all to populate time
4758 */ 4681 */
4759 if (!ret) 4682 return mem_cgroup_sockets_init(cont, ss);
4760 ret = mem_cgroup_sockets_init(cont, ss);
4761 return ret;
4762}; 4683};
4763 4684
4764static void kmem_cgroup_destroy(struct cgroup_subsys *ss, 4685static void kmem_cgroup_destroy(struct cgroup_subsys *ss,
@@ -5092,7 +5013,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
5092 if (parent && parent->use_hierarchy) { 5013 if (parent && parent->use_hierarchy) {
5093 res_counter_init(&memcg->res, &parent->res); 5014 res_counter_init(&memcg->res, &parent->res);
5094 res_counter_init(&memcg->memsw, &parent->memsw); 5015 res_counter_init(&memcg->memsw, &parent->memsw);
5095 res_counter_init(&memcg->kmem, &parent->kmem);
5096 /* 5016 /*
5097 * We increment refcnt of the parent to ensure that we can 5017 * We increment refcnt of the parent to ensure that we can
5098 * safely access it on res_counter_charge/uncharge. 5018 * safely access it on res_counter_charge/uncharge.
@@ -5103,7 +5023,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
5103 } else { 5023 } else {
5104 res_counter_init(&memcg->res, NULL); 5024 res_counter_init(&memcg->res, NULL);
5105 res_counter_init(&memcg->memsw, NULL); 5025 res_counter_init(&memcg->memsw, NULL);
5106 res_counter_init(&memcg->kmem, NULL);
5107 } 5026 }
5108 memcg->last_scanned_child = 0; 5027 memcg->last_scanned_child = 0;
5109 memcg->last_scanned_node = MAX_NUMNODES; 5028 memcg->last_scanned_node = MAX_NUMNODES;