aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/memcontrol.h49
-rw-r--r--mm/memcontrol.c70
-rw-r--r--mm/vmscan.c16
3 files changed, 103 insertions, 32 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d8dd6560621b..d4d1f9b0dbba 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -41,6 +41,23 @@ struct mem_cgroup_reclaim_cookie {
41 unsigned int generation; 41 unsigned int generation;
42}; 42};
43 43
44enum mem_cgroup_filter_t {
45 VISIT, /* visit current node */
46 SKIP, /* skip the current node and continue traversal */
47 SKIP_TREE, /* skip the whole subtree and continue traversal */
48};
49
50/*
51 * mem_cgroup_filter_t predicate might instruct mem_cgroup_iter_cond how to
52 * iterate through the hierarchy tree. Each tree element is checked by the
53 * predicate before it is returned by the iterator. If a filter returns
54 * SKIP or SKIP_TREE then the iterator code continues traversal (with the
55 * next node down the hierarchy or the next node that doesn't belong under the
56 * memcg's subtree).
57 */
58typedef enum mem_cgroup_filter_t
59(*mem_cgroup_iter_filter)(struct mem_cgroup *memcg, struct mem_cgroup *root);
60
44#ifdef CONFIG_MEMCG 61#ifdef CONFIG_MEMCG
45/* 62/*
46 * All "charge" functions with gfp_mask should use GFP_KERNEL or 63 * All "charge" functions with gfp_mask should use GFP_KERNEL or
@@ -108,9 +125,18 @@ mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
108extern void mem_cgroup_end_migration(struct mem_cgroup *memcg, 125extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
109 struct page *oldpage, struct page *newpage, bool migration_ok); 126 struct page *oldpage, struct page *newpage, bool migration_ok);
110 127
111struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, 128struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
112 struct mem_cgroup *, 129 struct mem_cgroup *prev,
113 struct mem_cgroup_reclaim_cookie *); 130 struct mem_cgroup_reclaim_cookie *reclaim,
131 mem_cgroup_iter_filter cond);
132
133static inline struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
134 struct mem_cgroup *prev,
135 struct mem_cgroup_reclaim_cookie *reclaim)
136{
137 return mem_cgroup_iter_cond(root, prev, reclaim, NULL);
138}
139
114void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); 140void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
115 141
116/* 142/*
@@ -180,7 +206,8 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
180 mem_cgroup_update_page_stat(page, idx, -1); 206 mem_cgroup_update_page_stat(page, idx, -1);
181} 207}
182 208
183bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, 209enum mem_cgroup_filter_t
210mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
184 struct mem_cgroup *root); 211 struct mem_cgroup *root);
185 212
186void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx); 213void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
@@ -295,6 +322,15 @@ static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
295 struct page *oldpage, struct page *newpage, bool migration_ok) 322 struct page *oldpage, struct page *newpage, bool migration_ok)
296{ 323{
297} 324}
325static inline struct mem_cgroup *
326mem_cgroup_iter_cond(struct mem_cgroup *root,
327 struct mem_cgroup *prev,
328 struct mem_cgroup_reclaim_cookie *reclaim,
329 mem_cgroup_iter_filter cond)
330{
331 /* first call must return non-NULL, second return NULL */
332 return (struct mem_cgroup *)(unsigned long)!prev;
333}
298 334
299static inline struct mem_cgroup * 335static inline struct mem_cgroup *
300mem_cgroup_iter(struct mem_cgroup *root, 336mem_cgroup_iter(struct mem_cgroup *root,
@@ -358,10 +394,11 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
358} 394}
359 395
360static inline 396static inline
361bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, 397enum mem_cgroup_filter_t
398mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
362 struct mem_cgroup *root) 399 struct mem_cgroup *root)
363{ 400{
364 return false; 401 return VISIT;
365} 402}
366 403
367static inline void mem_cgroup_split_huge_fixup(struct page *head) 404static inline void mem_cgroup_split_huge_fixup(struct page *head)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c016e001c5b2..a4bb857d902c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -875,6 +875,15 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
875 return memcg; 875 return memcg;
876} 876}
877 877
878static enum mem_cgroup_filter_t
879mem_cgroup_filter(struct mem_cgroup *memcg, struct mem_cgroup *root,
880 mem_cgroup_iter_filter cond)
881{
882 if (!cond)
883 return VISIT;
884 return cond(memcg, root);
885}
886
878/* 887/*
879 * Returns a next (in a pre-order walk) alive memcg (with elevated css 888 * Returns a next (in a pre-order walk) alive memcg (with elevated css
880 * ref. count) or NULL if the whole root's subtree has been visited. 889 * ref. count) or NULL if the whole root's subtree has been visited.
@@ -882,7 +891,7 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
882 * helper function to be used by mem_cgroup_iter 891 * helper function to be used by mem_cgroup_iter
883 */ 892 */
884static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root, 893static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root,
885 struct mem_cgroup *last_visited) 894 struct mem_cgroup *last_visited, mem_cgroup_iter_filter cond)
886{ 895{
887 struct cgroup_subsys_state *prev_css, *next_css; 896 struct cgroup_subsys_state *prev_css, *next_css;
888 897
@@ -900,11 +909,31 @@ skip_node:
900 if (next_css) { 909 if (next_css) {
901 struct mem_cgroup *mem = mem_cgroup_from_css(next_css); 910 struct mem_cgroup *mem = mem_cgroup_from_css(next_css);
902 911
903 if (css_tryget(&mem->css)) 912 switch (mem_cgroup_filter(mem, root, cond)) {
904 return mem; 913 case SKIP:
905 else {
906 prev_css = next_css; 914 prev_css = next_css;
907 goto skip_node; 915 goto skip_node;
916 case SKIP_TREE:
917 if (mem == root)
918 return NULL;
919 /*
920 * css_rightmost_descendant is not an optimal way to
921 * skip through a subtree (especially for imbalanced
922 * trees leaning to right) but that's what we have right
923 * now. More effective solution would be traversing
924 * right-up for first non-NULL without calling
925 * css_next_descendant_pre afterwards.
926 */
927 prev_css = css_rightmost_descendant(next_css);
928 goto skip_node;
929 case VISIT:
930 if (css_tryget(&mem->css))
931 return mem;
932 else {
933 prev_css = next_css;
934 goto skip_node;
935 }
936 break;
908 } 937 }
909 } 938 }
910 939
@@ -968,6 +997,7 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
968 * @root: hierarchy root 997 * @root: hierarchy root
969 * @prev: previously returned memcg, NULL on first invocation 998 * @prev: previously returned memcg, NULL on first invocation
970 * @reclaim: cookie for shared reclaim walks, NULL for full walks 999 * @reclaim: cookie for shared reclaim walks, NULL for full walks
1000 * @cond: filter for visited nodes, NULL for no filter
971 * 1001 *
972 * Returns references to children of the hierarchy below @root, or 1002 * Returns references to children of the hierarchy below @root, or
973 * @root itself, or %NULL after a full round-trip. 1003 * @root itself, or %NULL after a full round-trip.
@@ -980,15 +1010,18 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
980 * divide up the memcgs in the hierarchy among all concurrent 1010 * divide up the memcgs in the hierarchy among all concurrent
981 * reclaimers operating on the same zone and priority. 1011 * reclaimers operating on the same zone and priority.
982 */ 1012 */
983struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, 1013struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
984 struct mem_cgroup *prev, 1014 struct mem_cgroup *prev,
985 struct mem_cgroup_reclaim_cookie *reclaim) 1015 struct mem_cgroup_reclaim_cookie *reclaim,
1016 mem_cgroup_iter_filter cond)
986{ 1017{
987 struct mem_cgroup *memcg = NULL; 1018 struct mem_cgroup *memcg = NULL;
988 struct mem_cgroup *last_visited = NULL; 1019 struct mem_cgroup *last_visited = NULL;
989 1020
990 if (mem_cgroup_disabled()) 1021 if (mem_cgroup_disabled()) {
991 return NULL; 1022 /* first call must return non-NULL, second return NULL */
1023 return (struct mem_cgroup *)(unsigned long)!prev;
1024 }
992 1025
993 if (!root) 1026 if (!root)
994 root = root_mem_cgroup; 1027 root = root_mem_cgroup;
@@ -999,7 +1032,9 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
999 if (!root->use_hierarchy && root != root_mem_cgroup) { 1032 if (!root->use_hierarchy && root != root_mem_cgroup) {
1000 if (prev) 1033 if (prev)
1001 goto out_css_put; 1034 goto out_css_put;
1002 return root; 1035 if (mem_cgroup_filter(root, root, cond) == VISIT)
1036 return root;
1037 return NULL;
1003 } 1038 }
1004 1039
1005 rcu_read_lock(); 1040 rcu_read_lock();
@@ -1022,7 +1057,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
1022 last_visited = mem_cgroup_iter_load(iter, root, &seq); 1057 last_visited = mem_cgroup_iter_load(iter, root, &seq);
1023 } 1058 }
1024 1059
1025 memcg = __mem_cgroup_iter_next(root, last_visited); 1060 memcg = __mem_cgroup_iter_next(root, last_visited, cond);
1026 1061
1027 if (reclaim) { 1062 if (reclaim) {
1028 mem_cgroup_iter_update(iter, last_visited, memcg, seq); 1063 mem_cgroup_iter_update(iter, last_visited, memcg, seq);
@@ -1033,7 +1068,11 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
1033 reclaim->generation = iter->generation; 1068 reclaim->generation = iter->generation;
1034 } 1069 }
1035 1070
1036 if (prev && !memcg) 1071 /*
1072 * We have finished the whole tree walk or no group has been
1073 * visited because filter told us to skip the root node.
1074 */
1075 if (!memcg && (prev || (cond && !last_visited)))
1037 goto out_unlock; 1076 goto out_unlock;
1038 } 1077 }
1039out_unlock: 1078out_unlock:
@@ -1778,13 +1817,14 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
1778 * a) it is over its soft limit 1817 * a) it is over its soft limit
1779 * b) any parent up the hierarchy is over its soft limit 1818 * b) any parent up the hierarchy is over its soft limit
1780 */ 1819 */
1781bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, 1820enum mem_cgroup_filter_t
1821mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
1782 struct mem_cgroup *root) 1822 struct mem_cgroup *root)
1783{ 1823{
1784 struct mem_cgroup *parent = memcg; 1824 struct mem_cgroup *parent = memcg;
1785 1825
1786 if (res_counter_soft_limit_excess(&memcg->res)) 1826 if (res_counter_soft_limit_excess(&memcg->res))
1787 return true; 1827 return VISIT;
1788 1828
1789 /* 1829 /*
1790 * If any parent up to the root in the hierarchy is over its soft limit 1830 * If any parent up to the root in the hierarchy is over its soft limit
@@ -1792,12 +1832,12 @@ bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
1792 */ 1832 */
1793 while((parent = parent_mem_cgroup(parent))) { 1833 while((parent = parent_mem_cgroup(parent))) {
1794 if (res_counter_soft_limit_excess(&parent->res)) 1834 if (res_counter_soft_limit_excess(&parent->res))
1795 return true; 1835 return VISIT;
1796 if (parent == root) 1836 if (parent == root)
1797 break; 1837 break;
1798 } 1838 }
1799 1839
1800 return false; 1840 return SKIP;
1801} 1841}
1802 1842
1803/* 1843/*
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1896e7ca494b..f2e35099508b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2151,21 +2151,16 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
2151 .zone = zone, 2151 .zone = zone,
2152 .priority = sc->priority, 2152 .priority = sc->priority,
2153 }; 2153 };
2154 struct mem_cgroup *memcg; 2154 struct mem_cgroup *memcg = NULL;
2155 mem_cgroup_iter_filter filter = (soft_reclaim) ?
2156 mem_cgroup_soft_reclaim_eligible : NULL;
2155 2157
2156 nr_reclaimed = sc->nr_reclaimed; 2158 nr_reclaimed = sc->nr_reclaimed;
2157 nr_scanned = sc->nr_scanned; 2159 nr_scanned = sc->nr_scanned;
2158 2160
2159 memcg = mem_cgroup_iter(root, NULL, &reclaim); 2161 while ((memcg = mem_cgroup_iter_cond(root, memcg, &reclaim, filter))) {
2160 do {
2161 struct lruvec *lruvec; 2162 struct lruvec *lruvec;
2162 2163
2163 if (soft_reclaim &&
2164 !mem_cgroup_soft_reclaim_eligible(memcg, root)) {
2165 memcg = mem_cgroup_iter(root, memcg, &reclaim);
2166 continue;
2167 }
2168
2169 lruvec = mem_cgroup_zone_lruvec(zone, memcg); 2164 lruvec = mem_cgroup_zone_lruvec(zone, memcg);
2170 2165
2171 shrink_lruvec(lruvec, sc); 2166 shrink_lruvec(lruvec, sc);
@@ -2185,8 +2180,7 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
2185 mem_cgroup_iter_break(root, memcg); 2180 mem_cgroup_iter_break(root, memcg);
2186 break; 2181 break;
2187 } 2182 }
2188 memcg = mem_cgroup_iter(root, memcg, &reclaim); 2183 }
2189 } while (memcg);
2190 2184
2191 vmpressure(sc->gfp_mask, sc->target_mem_cgroup, 2185 vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
2192 sc->nr_scanned - nr_scanned, 2186 sc->nr_scanned - nr_scanned,