aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorMichal Hocko <mhocko@suse.cz>2013-09-12 18:13:26 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-12 18:38:00 -0400
commitde57780dc659f95b17ccb649f003278dde0b5b86 (patch)
treed2493cc412c16946f3ead9158a61b26dd1f0c45a /mm/memcontrol.c
parenta5b7c87f92076352dbff2fe0423ec255e1c9a71b (diff)
memcg: enhance memcg iterator to support predicates
The caller of the iterator might know that some nodes or even subtrees should be skipped but there is no way to tell iterators about that so the only choice left is to let iterators to visit each node and do the selection outside of the iterating code. This, however, doesn't scale well with hierarchies with many groups where only few groups are interesting. This patch adds mem_cgroup_iter_cond variant of the iterator with a callback which gets called for every visited node. There are three possible ways how the callback can influence the walk. Either the node is visited, it is skipped but the tree walk continues down the tree or the whole subtree of the current group is skipped. [hughd@google.com: fix memcg-less page reclaim] Signed-off-by: Michal Hocko <mhocko@suse.cz> Cc: Balbir Singh <bsingharora@gmail.com> Cc: Glauber Costa <glommer@openvz.org> Cc: Greg Thelen <gthelen@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Michel Lespinasse <walken@google.com> Cc: Tejun Heo <tj@kernel.org> Cc: Ying Han <yinghan@google.com> Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c70
1 files changed, 55 insertions, 15 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c016e001c5b2..a4bb857d902c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -875,6 +875,15 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
875 return memcg; 875 return memcg;
876} 876}
877 877
878static enum mem_cgroup_filter_t
879mem_cgroup_filter(struct mem_cgroup *memcg, struct mem_cgroup *root,
880 mem_cgroup_iter_filter cond)
881{
882 if (!cond)
883 return VISIT;
884 return cond(memcg, root);
885}
886
878/* 887/*
879 * Returns a next (in a pre-order walk) alive memcg (with elevated css 888 * Returns a next (in a pre-order walk) alive memcg (with elevated css
880 * ref. count) or NULL if the whole root's subtree has been visited. 889 * ref. count) or NULL if the whole root's subtree has been visited.
@@ -882,7 +891,7 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
882 * helper function to be used by mem_cgroup_iter 891 * helper function to be used by mem_cgroup_iter
883 */ 892 */
884static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root, 893static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root,
885 struct mem_cgroup *last_visited) 894 struct mem_cgroup *last_visited, mem_cgroup_iter_filter cond)
886{ 895{
887 struct cgroup_subsys_state *prev_css, *next_css; 896 struct cgroup_subsys_state *prev_css, *next_css;
888 897
@@ -900,11 +909,31 @@ skip_node:
900 if (next_css) { 909 if (next_css) {
901 struct mem_cgroup *mem = mem_cgroup_from_css(next_css); 910 struct mem_cgroup *mem = mem_cgroup_from_css(next_css);
902 911
903 if (css_tryget(&mem->css)) 912 switch (mem_cgroup_filter(mem, root, cond)) {
904 return mem; 913 case SKIP:
905 else {
906 prev_css = next_css; 914 prev_css = next_css;
907 goto skip_node; 915 goto skip_node;
916 case SKIP_TREE:
917 if (mem == root)
918 return NULL;
919 /*
920 * css_rightmost_descendant is not an optimal way to
921 * skip through a subtree (especially for imbalanced
922 * trees leaning to right) but that's what we have right
923 * now. More effective solution would be traversing
924 * right-up for first non-NULL without calling
925 * css_next_descendant_pre afterwards.
926 */
927 prev_css = css_rightmost_descendant(next_css);
928 goto skip_node;
929 case VISIT:
930 if (css_tryget(&mem->css))
931 return mem;
932 else {
933 prev_css = next_css;
934 goto skip_node;
935 }
936 break;
908 } 937 }
909 } 938 }
910 939
@@ -968,6 +997,7 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
968 * @root: hierarchy root 997 * @root: hierarchy root
969 * @prev: previously returned memcg, NULL on first invocation 998 * @prev: previously returned memcg, NULL on first invocation
970 * @reclaim: cookie for shared reclaim walks, NULL for full walks 999 * @reclaim: cookie for shared reclaim walks, NULL for full walks
1000 * @cond: filter for visited nodes, NULL for no filter
971 * 1001 *
972 * Returns references to children of the hierarchy below @root, or 1002 * Returns references to children of the hierarchy below @root, or
973 * @root itself, or %NULL after a full round-trip. 1003 * @root itself, or %NULL after a full round-trip.
@@ -980,15 +1010,18 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
980 * divide up the memcgs in the hierarchy among all concurrent 1010 * divide up the memcgs in the hierarchy among all concurrent
981 * reclaimers operating on the same zone and priority. 1011 * reclaimers operating on the same zone and priority.
982 */ 1012 */
983struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, 1013struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
984 struct mem_cgroup *prev, 1014 struct mem_cgroup *prev,
985 struct mem_cgroup_reclaim_cookie *reclaim) 1015 struct mem_cgroup_reclaim_cookie *reclaim,
1016 mem_cgroup_iter_filter cond)
986{ 1017{
987 struct mem_cgroup *memcg = NULL; 1018 struct mem_cgroup *memcg = NULL;
988 struct mem_cgroup *last_visited = NULL; 1019 struct mem_cgroup *last_visited = NULL;
989 1020
990 if (mem_cgroup_disabled()) 1021 if (mem_cgroup_disabled()) {
991 return NULL; 1022 /* first call must return non-NULL, second return NULL */
1023 return (struct mem_cgroup *)(unsigned long)!prev;
1024 }
992 1025
993 if (!root) 1026 if (!root)
994 root = root_mem_cgroup; 1027 root = root_mem_cgroup;
@@ -999,7 +1032,9 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
999 if (!root->use_hierarchy && root != root_mem_cgroup) { 1032 if (!root->use_hierarchy && root != root_mem_cgroup) {
1000 if (prev) 1033 if (prev)
1001 goto out_css_put; 1034 goto out_css_put;
1002 return root; 1035 if (mem_cgroup_filter(root, root, cond) == VISIT)
1036 return root;
1037 return NULL;
1003 } 1038 }
1004 1039
1005 rcu_read_lock(); 1040 rcu_read_lock();
@@ -1022,7 +1057,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
1022 last_visited = mem_cgroup_iter_load(iter, root, &seq); 1057 last_visited = mem_cgroup_iter_load(iter, root, &seq);
1023 } 1058 }
1024 1059
1025 memcg = __mem_cgroup_iter_next(root, last_visited); 1060 memcg = __mem_cgroup_iter_next(root, last_visited, cond);
1026 1061
1027 if (reclaim) { 1062 if (reclaim) {
1028 mem_cgroup_iter_update(iter, last_visited, memcg, seq); 1063 mem_cgroup_iter_update(iter, last_visited, memcg, seq);
@@ -1033,7 +1068,11 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
1033 reclaim->generation = iter->generation; 1068 reclaim->generation = iter->generation;
1034 } 1069 }
1035 1070
1036 if (prev && !memcg) 1071 /*
1072 * We have finished the whole tree walk or no group has been
1073 * visited because filter told us to skip the root node.
1074 */
1075 if (!memcg && (prev || (cond && !last_visited)))
1037 goto out_unlock; 1076 goto out_unlock;
1038 } 1077 }
1039out_unlock: 1078out_unlock:
@@ -1778,13 +1817,14 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
1778 * a) it is over its soft limit 1817 * a) it is over its soft limit
1779 * b) any parent up the hierarchy is over its soft limit 1818 * b) any parent up the hierarchy is over its soft limit
1780 */ 1819 */
1781bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, 1820enum mem_cgroup_filter_t
1821mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
1782 struct mem_cgroup *root) 1822 struct mem_cgroup *root)
1783{ 1823{
1784 struct mem_cgroup *parent = memcg; 1824 struct mem_cgroup *parent = memcg;
1785 1825
1786 if (res_counter_soft_limit_excess(&memcg->res)) 1826 if (res_counter_soft_limit_excess(&memcg->res))
1787 return true; 1827 return VISIT;
1788 1828
1789 /* 1829 /*
1790 * If any parent up to the root in the hierarchy is over its soft limit 1830 * If any parent up to the root in the hierarchy is over its soft limit
@@ -1792,12 +1832,12 @@ bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
1792 */ 1832 */
1793 while((parent = parent_mem_cgroup(parent))) { 1833 while((parent = parent_mem_cgroup(parent))) {
1794 if (res_counter_soft_limit_excess(&parent->res)) 1834 if (res_counter_soft_limit_excess(&parent->res))
1795 return true; 1835 return VISIT;
1796 if (parent == root) 1836 if (parent == root)
1797 break; 1837 break;
1798 } 1838 }
1799 1839
1800 return false; 1840 return SKIP;
1801} 1841}
1802 1842
1803/* 1843/*