aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorJohannes Weiner <jweiner@redhat.com>2012-01-12 20:17:55 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-12 23:13:04 -0500
commit527a5ec9a53471d855291ba9f1fdf1dd4e12a184 (patch)
tree156a8be57e66e1c6f54e498beda0f2d8412cee12 /mm/memcontrol.c
parentf16015fbf2f6ac45505d6ad21455ff9f6c14473d (diff)
mm: memcg: per-priority per-zone hierarchy scan generations
Memory cgroup limit reclaim currently picks one memory cgroup out of the target hierarchy, remembers it as the last scanned child, and reclaims all zones in it with decreasing priority levels. The new hierarchy reclaim code will pick memory cgroups from the same hierarchy concurrently from different zones and priority levels, it becomes necessary that hierarchy roots not only remember the last scanned child, but do so for each zone and priority level. Until now, we reclaimed memcgs like this: mem = mem_cgroup_iter(root) for each priority level: for each zone in zonelist: reclaim(mem, zone) But subsequent patches will move the memcg iteration inside the loop over the zones: for each priority level: for each zone in zonelist: mem = mem_cgroup_iter(root) reclaim(mem, zone) And to keep with the original scan order - memcg -> priority -> zone - the last scanned memcg has to be remembered per zone and per priority level. Furthermore, global reclaim will be switched to the hierarchy walk as well. Different from limit reclaim, which can just recheck the limit after some reclaim progress, its target is to scan all memcgs for the desired zone pages, proportional to the memcg size, and so reliably detecting a full hierarchy round-trip will become crucial. Currently, the code relies on one reclaimer encountering the same memcg twice, but that is error-prone with concurrent reclaimers. Instead, use a generation counter that is increased every time the child with the highest ID has been visited, so that reclaimers can stop when the generation changes. Signed-off-by: Johannes Weiner <jweiner@redhat.com> Reviewed-by: Kirill A. Shutemov <kirill@shutemov.name> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Michal Hocko <mhocko@suse.cz> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <bsingharora@gmail.com> Cc: Ying Han <yinghan@google.com> Cc: Greg Thelen <gthelen@google.com> Cc: Michel Lespinasse <walken@google.com> Cc: Rik van Riel <riel@redhat.com> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c65
1 files changed, 47 insertions, 18 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6edef95fecf4..bec451da7def 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -123,6 +123,13 @@ struct mem_cgroup_stat_cpu {
123 unsigned long targets[MEM_CGROUP_NTARGETS]; 123 unsigned long targets[MEM_CGROUP_NTARGETS];
124}; 124};
125 125
126struct mem_cgroup_reclaim_iter {
127 /* css_id of the last scanned hierarchy member */
128 int position;
129 /* scan generation, increased every round-trip */
130 unsigned int generation;
131};
132
126/* 133/*
127 * per-zone information in memory controller. 134 * per-zone information in memory controller.
128 */ 135 */
@@ -133,6 +140,8 @@ struct mem_cgroup_per_zone {
133 struct list_head lists[NR_LRU_LISTS]; 140 struct list_head lists[NR_LRU_LISTS];
134 unsigned long count[NR_LRU_LISTS]; 141 unsigned long count[NR_LRU_LISTS];
135 142
143 struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1];
144
136 struct zone_reclaim_stat reclaim_stat; 145 struct zone_reclaim_stat reclaim_stat;
137 struct rb_node tree_node; /* RB tree node */ 146 struct rb_node tree_node; /* RB tree node */
138 unsigned long long usage_in_excess;/* Set to the value by which */ 147 unsigned long long usage_in_excess;/* Set to the value by which */
@@ -233,11 +242,6 @@ struct mem_cgroup {
233 * per zone LRU lists. 242 * per zone LRU lists.
234 */ 243 */
235 struct mem_cgroup_lru_info info; 244 struct mem_cgroup_lru_info info;
236 /*
237 * While reclaiming in a hierarchy, we cache the last child we
238 * reclaimed from.
239 */
240 int last_scanned_child;
241 int last_scanned_node; 245 int last_scanned_node;
242#if MAX_NUMNODES > 1 246#if MAX_NUMNODES > 1
243 nodemask_t scan_nodes; 247 nodemask_t scan_nodes;
@@ -853,9 +857,16 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
853 return memcg; 857 return memcg;
854} 858}
855 859
856static struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, 860struct mem_cgroup_reclaim_cookie {
857 struct mem_cgroup *prev, 861 struct zone *zone;
858 bool reclaim) 862 int priority;
863 unsigned int generation;
864};
865
866static struct mem_cgroup *
867mem_cgroup_iter(struct mem_cgroup *root,
868 struct mem_cgroup *prev,
869 struct mem_cgroup_reclaim_cookie *reclaim)
859{ 870{
860 struct mem_cgroup *memcg = NULL; 871 struct mem_cgroup *memcg = NULL;
861 int id = 0; 872 int id = 0;
@@ -876,10 +887,20 @@ static struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
876 } 887 }
877 888
878 while (!memcg) { 889 while (!memcg) {
890 struct mem_cgroup_reclaim_iter *uninitialized_var(iter);
879 struct cgroup_subsys_state *css; 891 struct cgroup_subsys_state *css;
880 892
881 if (reclaim) 893 if (reclaim) {
882 id = root->last_scanned_child; 894 int nid = zone_to_nid(reclaim->zone);
895 int zid = zone_idx(reclaim->zone);
896 struct mem_cgroup_per_zone *mz;
897
898 mz = mem_cgroup_zoneinfo(root, nid, zid);
899 iter = &mz->reclaim_iter[reclaim->priority];
900 if (prev && reclaim->generation != iter->generation)
901 return NULL;
902 id = iter->position;
903 }
883 904
884 rcu_read_lock(); 905 rcu_read_lock();
885 css = css_get_next(&mem_cgroup_subsys, id + 1, &root->css, &id); 906 css = css_get_next(&mem_cgroup_subsys, id + 1, &root->css, &id);
@@ -891,8 +912,13 @@ static struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
891 id = 0; 912 id = 0;
892 rcu_read_unlock(); 913 rcu_read_unlock();
893 914
894 if (reclaim) 915 if (reclaim) {
895 root->last_scanned_child = id; 916 iter->position = id;
917 if (!css)
918 iter->generation++;
919 else if (!prev && memcg)
920 reclaim->generation = iter->generation;
921 }
896 922
897 if (prev && !css) 923 if (prev && !css)
898 return NULL; 924 return NULL;
@@ -915,14 +941,14 @@ static void mem_cgroup_iter_break(struct mem_cgroup *root,
915 * be used for reference counting. 941 * be used for reference counting.
916 */ 942 */
917#define for_each_mem_cgroup_tree(iter, root) \ 943#define for_each_mem_cgroup_tree(iter, root) \
918 for (iter = mem_cgroup_iter(root, NULL, false); \ 944 for (iter = mem_cgroup_iter(root, NULL, NULL); \
919 iter != NULL; \ 945 iter != NULL; \
920 iter = mem_cgroup_iter(root, iter, false)) 946 iter = mem_cgroup_iter(root, iter, NULL))
921 947
922#define for_each_mem_cgroup(iter) \ 948#define for_each_mem_cgroup(iter) \
923 for (iter = mem_cgroup_iter(NULL, NULL, false); \ 949 for (iter = mem_cgroup_iter(NULL, NULL, NULL); \
924 iter != NULL; \ 950 iter != NULL; \
925 iter = mem_cgroup_iter(NULL, iter, false)) 951 iter = mem_cgroup_iter(NULL, iter, NULL))
926 952
927static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) 953static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
928{ 954{
@@ -1692,6 +1718,10 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
1692 bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; 1718 bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
1693 unsigned long excess; 1719 unsigned long excess;
1694 unsigned long nr_scanned; 1720 unsigned long nr_scanned;
1721 struct mem_cgroup_reclaim_cookie reclaim = {
1722 .zone = zone,
1723 .priority = 0,
1724 };
1695 1725
1696 excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT; 1726 excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
1697 1727
@@ -1700,7 +1730,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
1700 noswap = true; 1730 noswap = true;
1701 1731
1702 while (1) { 1732 while (1) {
1703 victim = mem_cgroup_iter(root_memcg, victim, true); 1733 victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
1704 if (!victim) { 1734 if (!victim) {
1705 loop++; 1735 loop++;
1706 /* 1736 /*
@@ -5028,7 +5058,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
5028 res_counter_init(&memcg->res, NULL); 5058 res_counter_init(&memcg->res, NULL);
5029 res_counter_init(&memcg->memsw, NULL); 5059 res_counter_init(&memcg->memsw, NULL);
5030 } 5060 }
5031 memcg->last_scanned_child = 0;
5032 memcg->last_scanned_node = MAX_NUMNODES; 5061 memcg->last_scanned_node = MAX_NUMNODES;
5033 INIT_LIST_HEAD(&memcg->oom_notify); 5062 INIT_LIST_HEAD(&memcg->oom_notify);
5034 5063