diff options
-rw-r--r-- | include/linux/memcontrol.h | 49 | ||||
-rw-r--r-- | mm/memcontrol.c | 70 | ||||
-rw-r--r-- | mm/vmscan.c | 16 |
3 files changed, 103 insertions, 32 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d8dd6560621b..d4d1f9b0dbba 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -41,6 +41,23 @@ struct mem_cgroup_reclaim_cookie { | |||
41 | unsigned int generation; | 41 | unsigned int generation; |
42 | }; | 42 | }; |
43 | 43 | ||
44 | enum mem_cgroup_filter_t { | ||
45 | VISIT, /* visit current node */ | ||
46 | SKIP, /* skip the current node and continue traversal */ | ||
47 | SKIP_TREE, /* skip the whole subtree and continue traversal */ | ||
48 | }; | ||
49 | |||
50 | /* | ||
51 | * mem_cgroup_filter_t predicate might instruct mem_cgroup_iter_cond how to | ||
52 | * iterate through the hierarchy tree. Each tree element is checked by the | ||
53 | * predicate before it is returned by the iterator. If a filter returns | ||
54 | * SKIP or SKIP_TREE then the iterator code continues traversal (with the | ||
55 | * next node down the hierarchy or the next node that doesn't belong under the | ||
56 | * memcg's subtree). | ||
57 | */ | ||
58 | typedef enum mem_cgroup_filter_t | ||
59 | (*mem_cgroup_iter_filter)(struct mem_cgroup *memcg, struct mem_cgroup *root); | ||
60 | |||
44 | #ifdef CONFIG_MEMCG | 61 | #ifdef CONFIG_MEMCG |
45 | /* | 62 | /* |
46 | * All "charge" functions with gfp_mask should use GFP_KERNEL or | 63 | * All "charge" functions with gfp_mask should use GFP_KERNEL or |
@@ -108,9 +125,18 @@ mem_cgroup_prepare_migration(struct page *page, struct page *newpage, | |||
108 | extern void mem_cgroup_end_migration(struct mem_cgroup *memcg, | 125 | extern void mem_cgroup_end_migration(struct mem_cgroup *memcg, |
109 | struct page *oldpage, struct page *newpage, bool migration_ok); | 126 | struct page *oldpage, struct page *newpage, bool migration_ok); |
110 | 127 | ||
111 | struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, | 128 | struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root, |
112 | struct mem_cgroup *, | 129 | struct mem_cgroup *prev, |
113 | struct mem_cgroup_reclaim_cookie *); | 130 | struct mem_cgroup_reclaim_cookie *reclaim, |
131 | mem_cgroup_iter_filter cond); | ||
132 | |||
133 | static inline struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, | ||
134 | struct mem_cgroup *prev, | ||
135 | struct mem_cgroup_reclaim_cookie *reclaim) | ||
136 | { | ||
137 | return mem_cgroup_iter_cond(root, prev, reclaim, NULL); | ||
138 | } | ||
139 | |||
114 | void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); | 140 | void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); |
115 | 141 | ||
116 | /* | 142 | /* |
@@ -180,7 +206,8 @@ static inline void mem_cgroup_dec_page_stat(struct page *page, | |||
180 | mem_cgroup_update_page_stat(page, idx, -1); | 206 | mem_cgroup_update_page_stat(page, idx, -1); |
181 | } | 207 | } |
182 | 208 | ||
183 | bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, | 209 | enum mem_cgroup_filter_t |
210 | mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, | ||
184 | struct mem_cgroup *root); | 211 | struct mem_cgroup *root); |
185 | 212 | ||
186 | void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx); | 213 | void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx); |
@@ -295,6 +322,15 @@ static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg, | |||
295 | struct page *oldpage, struct page *newpage, bool migration_ok) | 322 | struct page *oldpage, struct page *newpage, bool migration_ok) |
296 | { | 323 | { |
297 | } | 324 | } |
325 | static inline struct mem_cgroup * | ||
326 | mem_cgroup_iter_cond(struct mem_cgroup *root, | ||
327 | struct mem_cgroup *prev, | ||
328 | struct mem_cgroup_reclaim_cookie *reclaim, | ||
329 | mem_cgroup_iter_filter cond) | ||
330 | { | ||
331 | /* first call must return non-NULL, second return NULL */ | ||
332 | return (struct mem_cgroup *)(unsigned long)!prev; | ||
333 | } | ||
298 | 334 | ||
299 | static inline struct mem_cgroup * | 335 | static inline struct mem_cgroup * |
300 | mem_cgroup_iter(struct mem_cgroup *root, | 336 | mem_cgroup_iter(struct mem_cgroup *root, |
@@ -358,10 +394,11 @@ static inline void mem_cgroup_dec_page_stat(struct page *page, | |||
358 | } | 394 | } |
359 | 395 | ||
360 | static inline | 396 | static inline |
361 | bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, | 397 | enum mem_cgroup_filter_t |
398 | mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, | ||
362 | struct mem_cgroup *root) | 399 | struct mem_cgroup *root) |
363 | { | 400 | { |
364 | return false; | 401 | return VISIT; |
365 | } | 402 | } |
366 | 403 | ||
367 | static inline void mem_cgroup_split_huge_fixup(struct page *head) | 404 | static inline void mem_cgroup_split_huge_fixup(struct page *head) |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c016e001c5b2..a4bb857d902c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -875,6 +875,15 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) | |||
875 | return memcg; | 875 | return memcg; |
876 | } | 876 | } |
877 | 877 | ||
878 | static enum mem_cgroup_filter_t | ||
879 | mem_cgroup_filter(struct mem_cgroup *memcg, struct mem_cgroup *root, | ||
880 | mem_cgroup_iter_filter cond) | ||
881 | { | ||
882 | if (!cond) | ||
883 | return VISIT; | ||
884 | return cond(memcg, root); | ||
885 | } | ||
886 | |||
878 | /* | 887 | /* |
879 | * Returns a next (in a pre-order walk) alive memcg (with elevated css | 888 | * Returns a next (in a pre-order walk) alive memcg (with elevated css |
880 | * ref. count) or NULL if the whole root's subtree has been visited. | 889 | * ref. count) or NULL if the whole root's subtree has been visited. |
@@ -882,7 +891,7 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) | |||
882 | * helper function to be used by mem_cgroup_iter | 891 | * helper function to be used by mem_cgroup_iter |
883 | */ | 892 | */ |
884 | static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root, | 893 | static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root, |
885 | struct mem_cgroup *last_visited) | 894 | struct mem_cgroup *last_visited, mem_cgroup_iter_filter cond) |
886 | { | 895 | { |
887 | struct cgroup_subsys_state *prev_css, *next_css; | 896 | struct cgroup_subsys_state *prev_css, *next_css; |
888 | 897 | ||
@@ -900,11 +909,31 @@ skip_node: | |||
900 | if (next_css) { | 909 | if (next_css) { |
901 | struct mem_cgroup *mem = mem_cgroup_from_css(next_css); | 910 | struct mem_cgroup *mem = mem_cgroup_from_css(next_css); |
902 | 911 | ||
903 | if (css_tryget(&mem->css)) | 912 | switch (mem_cgroup_filter(mem, root, cond)) { |
904 | return mem; | 913 | case SKIP: |
905 | else { | ||
906 | prev_css = next_css; | 914 | prev_css = next_css; |
907 | goto skip_node; | 915 | goto skip_node; |
916 | case SKIP_TREE: | ||
917 | if (mem == root) | ||
918 | return NULL; | ||
919 | /* | ||
920 | * css_rightmost_descendant is not an optimal way to | ||
921 | * skip through a subtree (especially for imbalanced | ||
922 | * trees leaning to right) but that's what we have right | ||
923 | * now. More effective solution would be traversing | ||
924 | * right-up for first non-NULL without calling | ||
925 | * css_next_descendant_pre afterwards. | ||
926 | */ | ||
927 | prev_css = css_rightmost_descendant(next_css); | ||
928 | goto skip_node; | ||
929 | case VISIT: | ||
930 | if (css_tryget(&mem->css)) | ||
931 | return mem; | ||
932 | else { | ||
933 | prev_css = next_css; | ||
934 | goto skip_node; | ||
935 | } | ||
936 | break; | ||
908 | } | 937 | } |
909 | } | 938 | } |
910 | 939 | ||
@@ -968,6 +997,7 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter, | |||
968 | * @root: hierarchy root | 997 | * @root: hierarchy root |
969 | * @prev: previously returned memcg, NULL on first invocation | 998 | * @prev: previously returned memcg, NULL on first invocation |
970 | * @reclaim: cookie for shared reclaim walks, NULL for full walks | 999 | * @reclaim: cookie for shared reclaim walks, NULL for full walks |
1000 | * @cond: filter for visited nodes, NULL for no filter | ||
971 | * | 1001 | * |
972 | * Returns references to children of the hierarchy below @root, or | 1002 | * Returns references to children of the hierarchy below @root, or |
973 | * @root itself, or %NULL after a full round-trip. | 1003 | * @root itself, or %NULL after a full round-trip. |
@@ -980,15 +1010,18 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter, | |||
980 | * divide up the memcgs in the hierarchy among all concurrent | 1010 | * divide up the memcgs in the hierarchy among all concurrent |
981 | * reclaimers operating on the same zone and priority. | 1011 | * reclaimers operating on the same zone and priority. |
982 | */ | 1012 | */ |
983 | struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, | 1013 | struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root, |
984 | struct mem_cgroup *prev, | 1014 | struct mem_cgroup *prev, |
985 | struct mem_cgroup_reclaim_cookie *reclaim) | 1015 | struct mem_cgroup_reclaim_cookie *reclaim, |
1016 | mem_cgroup_iter_filter cond) | ||
986 | { | 1017 | { |
987 | struct mem_cgroup *memcg = NULL; | 1018 | struct mem_cgroup *memcg = NULL; |
988 | struct mem_cgroup *last_visited = NULL; | 1019 | struct mem_cgroup *last_visited = NULL; |
989 | 1020 | ||
990 | if (mem_cgroup_disabled()) | 1021 | if (mem_cgroup_disabled()) { |
991 | return NULL; | 1022 | /* first call must return non-NULL, second return NULL */ |
1023 | return (struct mem_cgroup *)(unsigned long)!prev; | ||
1024 | } | ||
992 | 1025 | ||
993 | if (!root) | 1026 | if (!root) |
994 | root = root_mem_cgroup; | 1027 | root = root_mem_cgroup; |
@@ -999,7 +1032,9 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, | |||
999 | if (!root->use_hierarchy && root != root_mem_cgroup) { | 1032 | if (!root->use_hierarchy && root != root_mem_cgroup) { |
1000 | if (prev) | 1033 | if (prev) |
1001 | goto out_css_put; | 1034 | goto out_css_put; |
1002 | return root; | 1035 | if (mem_cgroup_filter(root, root, cond) == VISIT) |
1036 | return root; | ||
1037 | return NULL; | ||
1003 | } | 1038 | } |
1004 | 1039 | ||
1005 | rcu_read_lock(); | 1040 | rcu_read_lock(); |
@@ -1022,7 +1057,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, | |||
1022 | last_visited = mem_cgroup_iter_load(iter, root, &seq); | 1057 | last_visited = mem_cgroup_iter_load(iter, root, &seq); |
1023 | } | 1058 | } |
1024 | 1059 | ||
1025 | memcg = __mem_cgroup_iter_next(root, last_visited); | 1060 | memcg = __mem_cgroup_iter_next(root, last_visited, cond); |
1026 | 1061 | ||
1027 | if (reclaim) { | 1062 | if (reclaim) { |
1028 | mem_cgroup_iter_update(iter, last_visited, memcg, seq); | 1063 | mem_cgroup_iter_update(iter, last_visited, memcg, seq); |
@@ -1033,7 +1068,11 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, | |||
1033 | reclaim->generation = iter->generation; | 1068 | reclaim->generation = iter->generation; |
1034 | } | 1069 | } |
1035 | 1070 | ||
1036 | if (prev && !memcg) | 1071 | /* |
1072 | * We have finished the whole tree walk or no group has been | ||
1073 | * visited because filter told us to skip the root node. | ||
1074 | */ | ||
1075 | if (!memcg && (prev || (cond && !last_visited))) | ||
1037 | goto out_unlock; | 1076 | goto out_unlock; |
1038 | } | 1077 | } |
1039 | out_unlock: | 1078 | out_unlock: |
@@ -1778,13 +1817,14 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg) | |||
1778 | * a) it is over its soft limit | 1817 | * a) it is over its soft limit |
1779 | * b) any parent up the hierarchy is over its soft limit | 1818 | * b) any parent up the hierarchy is over its soft limit |
1780 | */ | 1819 | */ |
1781 | bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, | 1820 | enum mem_cgroup_filter_t |
1821 | mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, | ||
1782 | struct mem_cgroup *root) | 1822 | struct mem_cgroup *root) |
1783 | { | 1823 | { |
1784 | struct mem_cgroup *parent = memcg; | 1824 | struct mem_cgroup *parent = memcg; |
1785 | 1825 | ||
1786 | if (res_counter_soft_limit_excess(&memcg->res)) | 1826 | if (res_counter_soft_limit_excess(&memcg->res)) |
1787 | return true; | 1827 | return VISIT; |
1788 | 1828 | ||
1789 | /* | 1829 | /* |
1790 | * If any parent up to the root in the hierarchy is over its soft limit | 1830 | * If any parent up to the root in the hierarchy is over its soft limit |
@@ -1792,12 +1832,12 @@ bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, | |||
1792 | */ | 1832 | */ |
1793 | while((parent = parent_mem_cgroup(parent))) { | 1833 | while((parent = parent_mem_cgroup(parent))) { |
1794 | if (res_counter_soft_limit_excess(&parent->res)) | 1834 | if (res_counter_soft_limit_excess(&parent->res)) |
1795 | return true; | 1835 | return VISIT; |
1796 | if (parent == root) | 1836 | if (parent == root) |
1797 | break; | 1837 | break; |
1798 | } | 1838 | } |
1799 | 1839 | ||
1800 | return false; | 1840 | return SKIP; |
1801 | } | 1841 | } |
1802 | 1842 | ||
1803 | /* | 1843 | /* |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 1896e7ca494b..f2e35099508b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -2151,21 +2151,16 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) | |||
2151 | .zone = zone, | 2151 | .zone = zone, |
2152 | .priority = sc->priority, | 2152 | .priority = sc->priority, |
2153 | }; | 2153 | }; |
2154 | struct mem_cgroup *memcg; | 2154 | struct mem_cgroup *memcg = NULL; |
2155 | mem_cgroup_iter_filter filter = (soft_reclaim) ? | ||
2156 | mem_cgroup_soft_reclaim_eligible : NULL; | ||
2155 | 2157 | ||
2156 | nr_reclaimed = sc->nr_reclaimed; | 2158 | nr_reclaimed = sc->nr_reclaimed; |
2157 | nr_scanned = sc->nr_scanned; | 2159 | nr_scanned = sc->nr_scanned; |
2158 | 2160 | ||
2159 | memcg = mem_cgroup_iter(root, NULL, &reclaim); | 2161 | while ((memcg = mem_cgroup_iter_cond(root, memcg, &reclaim, filter))) { |
2160 | do { | ||
2161 | struct lruvec *lruvec; | 2162 | struct lruvec *lruvec; |
2162 | 2163 | ||
2163 | if (soft_reclaim && | ||
2164 | !mem_cgroup_soft_reclaim_eligible(memcg, root)) { | ||
2165 | memcg = mem_cgroup_iter(root, memcg, &reclaim); | ||
2166 | continue; | ||
2167 | } | ||
2168 | |||
2169 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); | 2164 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); |
2170 | 2165 | ||
2171 | shrink_lruvec(lruvec, sc); | 2166 | shrink_lruvec(lruvec, sc); |
@@ -2185,8 +2180,7 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) | |||
2185 | mem_cgroup_iter_break(root, memcg); | 2180 | mem_cgroup_iter_break(root, memcg); |
2186 | break; | 2181 | break; |
2187 | } | 2182 | } |
2188 | memcg = mem_cgroup_iter(root, memcg, &reclaim); | 2183 | } |
2189 | } while (memcg); | ||
2190 | 2184 | ||
2191 | vmpressure(sc->gfp_mask, sc->target_mem_cgroup, | 2185 | vmpressure(sc->gfp_mask, sc->target_mem_cgroup, |
2192 | sc->nr_scanned - nr_scanned, | 2186 | sc->nr_scanned - nr_scanned, |