aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorAnton Vorontsov <anton.vorontsov@linaro.org>2013-04-29 18:08:31 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-04-29 18:54:38 -0400
commit70ddf637eebe47e61fb2be08a59315581b6d2f38 (patch)
tree7fdb9e04da11c191daa225cad2314e440effc176 /mm/memcontrol.c
parent84d96d897671cfb386e722acbefdb3a79e115a8a (diff)
memcg: add memory.pressure_level events
With this patch userland applications that want to maintain the interactivity/memory allocation cost can use the pressure level notifications. The levels are defined like this: The "low" level means that the system is reclaiming memory for new allocations. Monitoring this reclaiming activity might be useful for maintaining cache level. Upon notification, the program (typically "Activity Manager") might analyze vmstat and act in advance (i.e. prematurely shutdown unimportant services). The "medium" level means that the system is experiencing medium memory pressure, the system might be making swap, paging out active file caches, etc. Upon this event applications may decide to further analyze vmstat/zoneinfo/memcg or internal memory usage statistics and free any resources that can be easily reconstructed or re-read from a disk. The "critical" level means that the system is actively thrashing, it is about to out of memory (OOM) or even the in-kernel OOM killer is on its way to trigger. Applications should do whatever they can to help the system. It might be too late to consult with vmstat or any other statistics, so it's advisable to take an immediate action. The events are propagated upward until the event is handled, i.e. the events are not pass-through. Here is what this means: for example you have three cgroups: A->B->C. Now you set up an event listener on cgroups A, B and C, and suppose group C experiences some pressure. In this situation, only group C will receive the notification, i.e. groups A and B will not receive it. This is done to avoid excessive "broadcasting" of messages, which disturbs the system and which is especially bad if we are low on memory or thrashing. So, organize the cgroups wisely, or propagate the events manually (or, ask us to implement the pass-through events, explaining why would you need them.) Performance wise, the memory pressure notifications feature itself is lightweight and does not require much of bookkeeping, in contrast to the rest of memcg features. Unfortunately, as of current memcg implementation, pages accounting is an inseparable part and cannot be turned off. The good news is that there are some efforts[1] to improve the situation; plus, implementing the same, fully API-compatible[2] interface for CONFIG_MEMCG=n case (e.g. embedded) is also a viable option, so it will not require any changes on the userland side. [1] http://permalink.gmane.org/gmane.linux.kernel.cgroups/6291 [2] http://lkml.org/lkml/2013/2/21/454 [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: fix CONFIG_CGROPUPS=n warnings] Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org> Acked-by: Kirill A. Shutemov <kirill@shutemov.name> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Tejun Heo <tj@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: Mel Gorman <mgorman@suse.de> Cc: Glauber Costa <glommer@parallels.com> Cc: Michal Hocko <mhocko@suse.cz> Cc: Luiz Capitulino <lcapitulino@redhat.com> Cc: Greg Thelen <gthelen@google.com> Cc: Leonid Moiseichuk <leonid.moiseichuk@nokia.com> Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com> Cc: John Stultz <john.stultz@linaro.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c29
1 files changed, 29 insertions, 0 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7e5bc43c2d1f..360464f40e96 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -49,6 +49,7 @@
49#include <linux/fs.h> 49#include <linux/fs.h>
50#include <linux/seq_file.h> 50#include <linux/seq_file.h>
51#include <linux/vmalloc.h> 51#include <linux/vmalloc.h>
52#include <linux/vmpressure.h>
52#include <linux/mm_inline.h> 53#include <linux/mm_inline.h>
53#include <linux/page_cgroup.h> 54#include <linux/page_cgroup.h>
54#include <linux/cpu.h> 55#include <linux/cpu.h>
@@ -261,6 +262,9 @@ struct mem_cgroup {
261 */ 262 */
262 struct res_counter res; 263 struct res_counter res;
263 264
265 /* vmpressure notifications */
266 struct vmpressure vmpressure;
267
264 union { 268 union {
265 /* 269 /*
266 * the counter to account for mem+swap usage. 270 * the counter to account for mem+swap usage.
@@ -359,6 +363,7 @@ struct mem_cgroup {
359 atomic_t numainfo_events; 363 atomic_t numainfo_events;
360 atomic_t numainfo_updating; 364 atomic_t numainfo_updating;
361#endif 365#endif
366
362 /* 367 /*
363 * Per cgroup active and inactive list, similar to the 368 * Per cgroup active and inactive list, similar to the
364 * per zone LRU lists. 369 * per zone LRU lists.
@@ -510,6 +515,24 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *s)
510 return container_of(s, struct mem_cgroup, css); 515 return container_of(s, struct mem_cgroup, css);
511} 516}
512 517
518/* Some nice accessors for the vmpressure. */
519struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg)
520{
521 if (!memcg)
522 memcg = root_mem_cgroup;
523 return &memcg->vmpressure;
524}
525
526struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr)
527{
528 return &container_of(vmpr, struct mem_cgroup, vmpressure)->css;
529}
530
531struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css)
532{
533 return &mem_cgroup_from_css(css)->vmpressure;
534}
535
513static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) 536static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
514{ 537{
515 return (memcg == root_mem_cgroup); 538 return (memcg == root_mem_cgroup);
@@ -5907,6 +5930,11 @@ static struct cftype mem_cgroup_files[] = {
5907 .unregister_event = mem_cgroup_oom_unregister_event, 5930 .unregister_event = mem_cgroup_oom_unregister_event,
5908 .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL), 5931 .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL),
5909 }, 5932 },
5933 {
5934 .name = "pressure_level",
5935 .register_event = vmpressure_register_event,
5936 .unregister_event = vmpressure_unregister_event,
5937 },
5910#ifdef CONFIG_NUMA 5938#ifdef CONFIG_NUMA
5911 { 5939 {
5912 .name = "numa_stat", 5940 .name = "numa_stat",
@@ -6188,6 +6216,7 @@ mem_cgroup_css_alloc(struct cgroup *cont)
6188 memcg->move_charge_at_immigrate = 0; 6216 memcg->move_charge_at_immigrate = 0;
6189 mutex_init(&memcg->thresholds_lock); 6217 mutex_init(&memcg->thresholds_lock);
6190 spin_lock_init(&memcg->move_lock); 6218 spin_lock_init(&memcg->move_lock);
6219 vmpressure_init(&memcg->vmpressure);
6191 6220
6192 return &memcg->css; 6221 return &memcg->css;
6193 6222