diff options
author | Andrew Morton <akpm@linux-foundation.org> | 2013-09-24 18:27:41 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-24 20:00:26 -0400 |
commit | 0608f43da64a1f1c42507304b5f25bc8b1227aa4 (patch) | |
tree | 3138a251439bee1f12bd176f41f3fc63e194f455 /mm | |
parent | bb4cc1a8b5eaf3b9e5707d7c270400b05d11a2b7 (diff) |
revert "memcg, vmscan: integrate soft reclaim tighter with zone shrinking code"
Revert commit 3b38722efd9f ("memcg, vmscan: integrate soft reclaim
tighter with zone shrinking code")
I merged this prematurely - Michal and Johannes still disagree about the
overall design direction and the future remains unclear.
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 163 | ||||
-rw-r--r-- | mm/vmscan.c | 62 |
2 files changed, 168 insertions, 57 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 852dbec07ce6..1c52ddbc839b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1991,28 +1991,57 @@ static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) | |||
1991 | } | 1991 | } |
1992 | #endif | 1992 | #endif |
1993 | 1993 | ||
1994 | /* | 1994 | static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg, |
1995 | * A group is eligible for the soft limit reclaim if | 1995 | struct zone *zone, |
1996 | * a) it is over its soft limit | 1996 | gfp_t gfp_mask, |
1997 | * b) any parent up the hierarchy is over its soft limit | 1997 | unsigned long *total_scanned) |
1998 | */ | 1998 | { |
1999 | bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg) | 1999 | struct mem_cgroup *victim = NULL; |
2000 | { | 2000 | int total = 0; |
2001 | struct mem_cgroup *parent = memcg; | 2001 | int loop = 0; |
2002 | 2002 | unsigned long excess; | |
2003 | if (res_counter_soft_limit_excess(&memcg->res)) | 2003 | unsigned long nr_scanned; |
2004 | return true; | 2004 | struct mem_cgroup_reclaim_cookie reclaim = { |
2005 | 2005 | .zone = zone, | |
2006 | /* | 2006 | .priority = 0, |
2007 | * If any parent up the hierarchy is over its soft limit then we | 2007 | }; |
2008 | * have to obey and reclaim from this group as well. | 2008 | |
2009 | */ | 2009 | excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT; |
2010 | while ((parent = parent_mem_cgroup(parent))) { | 2010 | |
2011 | if (res_counter_soft_limit_excess(&parent->res)) | 2011 | while (1) { |
2012 | return true; | 2012 | victim = mem_cgroup_iter(root_memcg, victim, &reclaim); |
2013 | if (!victim) { | ||
2014 | loop++; | ||
2015 | if (loop >= 2) { | ||
2016 | /* | ||
2017 | * If we have not been able to reclaim | ||
2018 | * anything, it might because there are | ||
2019 | * no reclaimable pages under this hierarchy | ||
2020 | */ | ||
2021 | if (!total) | ||
2022 | break; | ||
2023 | /* | ||
2024 | * We want to do more targeted reclaim. | ||
2025 | * excess >> 2 is not to excessive so as to | ||
2026 | * reclaim too much, nor too less that we keep | ||
2027 | * coming back to reclaim from this cgroup | ||
2028 | */ | ||
2029 | if (total >= (excess >> 2) || | ||
2030 | (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS)) | ||
2031 | break; | ||
2032 | } | ||
2033 | continue; | ||
2034 | } | ||
2035 | if (!mem_cgroup_reclaimable(victim, false)) | ||
2036 | continue; | ||
2037 | total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false, | ||
2038 | zone, &nr_scanned); | ||
2039 | *total_scanned += nr_scanned; | ||
2040 | if (!res_counter_soft_limit_excess(&root_memcg->res)) | ||
2041 | break; | ||
2013 | } | 2042 | } |
2014 | 2043 | mem_cgroup_iter_break(root_memcg, victim); | |
2015 | return false; | 2044 | return total; |
2016 | } | 2045 | } |
2017 | 2046 | ||
2018 | static DEFINE_SPINLOCK(memcg_oom_lock); | 2047 | static DEFINE_SPINLOCK(memcg_oom_lock); |
@@ -4761,6 +4790,98 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, | |||
4761 | return ret; | 4790 | return ret; |
4762 | } | 4791 | } |
4763 | 4792 | ||
4793 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | ||
4794 | gfp_t gfp_mask, | ||
4795 | unsigned long *total_scanned) | ||
4796 | { | ||
4797 | unsigned long nr_reclaimed = 0; | ||
4798 | struct mem_cgroup_per_zone *mz, *next_mz = NULL; | ||
4799 | unsigned long reclaimed; | ||
4800 | int loop = 0; | ||
4801 | struct mem_cgroup_tree_per_zone *mctz; | ||
4802 | unsigned long long excess; | ||
4803 | unsigned long nr_scanned; | ||
4804 | |||
4805 | if (order > 0) | ||
4806 | return 0; | ||
4807 | |||
4808 | mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone)); | ||
4809 | /* | ||
4810 | * This loop can run a while, specially if mem_cgroup's continuously | ||
4811 | * keep exceeding their soft limit and putting the system under | ||
4812 | * pressure | ||
4813 | */ | ||
4814 | do { | ||
4815 | if (next_mz) | ||
4816 | mz = next_mz; | ||
4817 | else | ||
4818 | mz = mem_cgroup_largest_soft_limit_node(mctz); | ||
4819 | if (!mz) | ||
4820 | break; | ||
4821 | |||
4822 | nr_scanned = 0; | ||
4823 | reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone, | ||
4824 | gfp_mask, &nr_scanned); | ||
4825 | nr_reclaimed += reclaimed; | ||
4826 | *total_scanned += nr_scanned; | ||
4827 | spin_lock(&mctz->lock); | ||
4828 | |||
4829 | /* | ||
4830 | * If we failed to reclaim anything from this memory cgroup | ||
4831 | * it is time to move on to the next cgroup | ||
4832 | */ | ||
4833 | next_mz = NULL; | ||
4834 | if (!reclaimed) { | ||
4835 | do { | ||
4836 | /* | ||
4837 | * Loop until we find yet another one. | ||
4838 | * | ||
4839 | * By the time we get the soft_limit lock | ||
4840 | * again, someone might have aded the | ||
4841 | * group back on the RB tree. Iterate to | ||
4842 | * make sure we get a different mem. | ||
4843 | * mem_cgroup_largest_soft_limit_node returns | ||
4844 | * NULL if no other cgroup is present on | ||
4845 | * the tree | ||
4846 | */ | ||
4847 | next_mz = | ||
4848 | __mem_cgroup_largest_soft_limit_node(mctz); | ||
4849 | if (next_mz == mz) | ||
4850 | css_put(&next_mz->memcg->css); | ||
4851 | else /* next_mz == NULL or other memcg */ | ||
4852 | break; | ||
4853 | } while (1); | ||
4854 | } | ||
4855 | __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz); | ||
4856 | excess = res_counter_soft_limit_excess(&mz->memcg->res); | ||
4857 | /* | ||
4858 | * One school of thought says that we should not add | ||
4859 | * back the node to the tree if reclaim returns 0. | ||
4860 | * But our reclaim could return 0, simply because due | ||
4861 | * to priority we are exposing a smaller subset of | ||
4862 | * memory to reclaim from. Consider this as a longer | ||
4863 | * term TODO. | ||
4864 | */ | ||
4865 | /* If excess == 0, no tree ops */ | ||
4866 | __mem_cgroup_insert_exceeded(mz->memcg, mz, mctz, excess); | ||
4867 | spin_unlock(&mctz->lock); | ||
4868 | css_put(&mz->memcg->css); | ||
4869 | loop++; | ||
4870 | /* | ||
4871 | * Could not reclaim anything and there are no more | ||
4872 | * mem cgroups to try or we seem to be looping without | ||
4873 | * reclaiming anything. | ||
4874 | */ | ||
4875 | if (!nr_reclaimed && | ||
4876 | (next_mz == NULL || | ||
4877 | loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS)) | ||
4878 | break; | ||
4879 | } while (!nr_reclaimed); | ||
4880 | if (next_mz) | ||
4881 | css_put(&next_mz->memcg->css); | ||
4882 | return nr_reclaimed; | ||
4883 | } | ||
4884 | |||
4764 | /** | 4885 | /** |
4765 | * mem_cgroup_force_empty_list - clears LRU of a group | 4886 | * mem_cgroup_force_empty_list - clears LRU of a group |
4766 | * @memcg: group to clear | 4887 | * @memcg: group to clear |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 0e081cada4ba..beb35778c69f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -139,21 +139,11 @@ static bool global_reclaim(struct scan_control *sc) | |||
139 | { | 139 | { |
140 | return !sc->target_mem_cgroup; | 140 | return !sc->target_mem_cgroup; |
141 | } | 141 | } |
142 | |||
143 | static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc) | ||
144 | { | ||
145 | return !mem_cgroup_disabled() && global_reclaim(sc); | ||
146 | } | ||
147 | #else | 142 | #else |
148 | static bool global_reclaim(struct scan_control *sc) | 143 | static bool global_reclaim(struct scan_control *sc) |
149 | { | 144 | { |
150 | return true; | 145 | return true; |
151 | } | 146 | } |
152 | |||
153 | static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc) | ||
154 | { | ||
155 | return false; | ||
156 | } | ||
157 | #endif | 147 | #endif |
158 | 148 | ||
159 | unsigned long zone_reclaimable_pages(struct zone *zone) | 149 | unsigned long zone_reclaimable_pages(struct zone *zone) |
@@ -2174,8 +2164,7 @@ static inline bool should_continue_reclaim(struct zone *zone, | |||
2174 | } | 2164 | } |
2175 | } | 2165 | } |
2176 | 2166 | ||
2177 | static void | 2167 | static void shrink_zone(struct zone *zone, struct scan_control *sc) |
2178 | __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) | ||
2179 | { | 2168 | { |
2180 | unsigned long nr_reclaimed, nr_scanned; | 2169 | unsigned long nr_reclaimed, nr_scanned; |
2181 | 2170 | ||
@@ -2194,12 +2183,6 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) | |||
2194 | do { | 2183 | do { |
2195 | struct lruvec *lruvec; | 2184 | struct lruvec *lruvec; |
2196 | 2185 | ||
2197 | if (soft_reclaim && | ||
2198 | !mem_cgroup_soft_reclaim_eligible(memcg)) { | ||
2199 | memcg = mem_cgroup_iter(root, memcg, &reclaim); | ||
2200 | continue; | ||
2201 | } | ||
2202 | |||
2203 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); | 2186 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); |
2204 | 2187 | ||
2205 | shrink_lruvec(lruvec, sc); | 2188 | shrink_lruvec(lruvec, sc); |
@@ -2230,24 +2213,6 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) | |||
2230 | sc->nr_scanned - nr_scanned, sc)); | 2213 | sc->nr_scanned - nr_scanned, sc)); |
2231 | } | 2214 | } |
2232 | 2215 | ||
2233 | |||
2234 | static void shrink_zone(struct zone *zone, struct scan_control *sc) | ||
2235 | { | ||
2236 | bool do_soft_reclaim = mem_cgroup_should_soft_reclaim(sc); | ||
2237 | unsigned long nr_scanned = sc->nr_scanned; | ||
2238 | |||
2239 | __shrink_zone(zone, sc, do_soft_reclaim); | ||
2240 | |||
2241 | /* | ||
2242 | * No group is over the soft limit or those that are do not have | ||
2243 | * pages in the zone we are reclaiming so we have to reclaim everybody | ||
2244 | */ | ||
2245 | if (do_soft_reclaim && (sc->nr_scanned == nr_scanned)) { | ||
2246 | __shrink_zone(zone, sc, false); | ||
2247 | return; | ||
2248 | } | ||
2249 | } | ||
2250 | |||
2251 | /* Returns true if compaction should go ahead for a high-order request */ | 2216 | /* Returns true if compaction should go ahead for a high-order request */ |
2252 | static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) | 2217 | static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) |
2253 | { | 2218 | { |
@@ -2309,6 +2274,8 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) | |||
2309 | { | 2274 | { |
2310 | struct zoneref *z; | 2275 | struct zoneref *z; |
2311 | struct zone *zone; | 2276 | struct zone *zone; |
2277 | unsigned long nr_soft_reclaimed; | ||
2278 | unsigned long nr_soft_scanned; | ||
2312 | bool aborted_reclaim = false; | 2279 | bool aborted_reclaim = false; |
2313 | 2280 | ||
2314 | /* | 2281 | /* |
@@ -2348,6 +2315,18 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) | |||
2348 | continue; | 2315 | continue; |
2349 | } | 2316 | } |
2350 | } | 2317 | } |
2318 | /* | ||
2319 | * This steals pages from memory cgroups over softlimit | ||
2320 | * and returns the number of reclaimed pages and | ||
2321 | * scanned pages. This works for global memory pressure | ||
2322 | * and balancing, not for a memcg's limit. | ||
2323 | */ | ||
2324 | nr_soft_scanned = 0; | ||
2325 | nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, | ||
2326 | sc->order, sc->gfp_mask, | ||
2327 | &nr_soft_scanned); | ||
2328 | sc->nr_reclaimed += nr_soft_reclaimed; | ||
2329 | sc->nr_scanned += nr_soft_scanned; | ||
2351 | /* need some check for avoid more shrink_zone() */ | 2330 | /* need some check for avoid more shrink_zone() */ |
2352 | } | 2331 | } |
2353 | 2332 | ||
@@ -2941,6 +2920,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, | |||
2941 | { | 2920 | { |
2942 | int i; | 2921 | int i; |
2943 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ | 2922 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ |
2923 | unsigned long nr_soft_reclaimed; | ||
2924 | unsigned long nr_soft_scanned; | ||
2944 | struct scan_control sc = { | 2925 | struct scan_control sc = { |
2945 | .gfp_mask = GFP_KERNEL, | 2926 | .gfp_mask = GFP_KERNEL, |
2946 | .priority = DEF_PRIORITY, | 2927 | .priority = DEF_PRIORITY, |
@@ -3055,6 +3036,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, | |||
3055 | 3036 | ||
3056 | sc.nr_scanned = 0; | 3037 | sc.nr_scanned = 0; |
3057 | 3038 | ||
3039 | nr_soft_scanned = 0; | ||
3040 | /* | ||
3041 | * Call soft limit reclaim before calling shrink_zone. | ||
3042 | */ | ||
3043 | nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, | ||
3044 | order, sc.gfp_mask, | ||
3045 | &nr_soft_scanned); | ||
3046 | sc.nr_reclaimed += nr_soft_reclaimed; | ||
3047 | |||
3058 | /* | 3048 | /* |
3059 | * There should be no need to raise the scanning | 3049 | * There should be no need to raise the scanning |
3060 | * priority if enough pages are already being scanned | 3050 | * priority if enough pages are already being scanned |