aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorAndrew Morton <akpm@linux-foundation.org>2013-09-24 18:27:41 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-24 20:00:26 -0400
commit0608f43da64a1f1c42507304b5f25bc8b1227aa4 (patch)
tree3138a251439bee1f12bd176f41f3fc63e194f455 /mm
parentbb4cc1a8b5eaf3b9e5707d7c270400b05d11a2b7 (diff)
revert "memcg, vmscan: integrate soft reclaim tighter with zone shrinking code"
Revert commit 3b38722efd9f ("memcg, vmscan: integrate soft reclaim tighter with zone shrinking code") I merged this prematurely - Michal and Johannes still disagree about the overall design direction and the future remains unclear. Cc: Michal Hocko <mhocko@suse.cz> Cc: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c163
-rw-r--r--mm/vmscan.c62
2 files changed, 168 insertions, 57 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 852dbec07ce6..1c52ddbc839b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1991,28 +1991,57 @@ static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
1991} 1991}
1992#endif 1992#endif
1993 1993
1994/* 1994static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
1995 * A group is eligible for the soft limit reclaim if 1995 struct zone *zone,
1996 * a) it is over its soft limit 1996 gfp_t gfp_mask,
1997 * b) any parent up the hierarchy is over its soft limit 1997 unsigned long *total_scanned)
1998 */ 1998{
1999bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg) 1999 struct mem_cgroup *victim = NULL;
2000{ 2000 int total = 0;
2001 struct mem_cgroup *parent = memcg; 2001 int loop = 0;
2002 2002 unsigned long excess;
2003 if (res_counter_soft_limit_excess(&memcg->res)) 2003 unsigned long nr_scanned;
2004 return true; 2004 struct mem_cgroup_reclaim_cookie reclaim = {
2005 2005 .zone = zone,
2006 /* 2006 .priority = 0,
2007 * If any parent up the hierarchy is over its soft limit then we 2007 };
2008 * have to obey and reclaim from this group as well. 2008
2009 */ 2009 excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
2010 while ((parent = parent_mem_cgroup(parent))) { 2010
2011 if (res_counter_soft_limit_excess(&parent->res)) 2011 while (1) {
2012 return true; 2012 victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
2013 if (!victim) {
2014 loop++;
2015 if (loop >= 2) {
2016 /*
2017 * If we have not been able to reclaim
2018 * anything, it might because there are
2019 * no reclaimable pages under this hierarchy
2020 */
2021 if (!total)
2022 break;
2023 /*
2024 * We want to do more targeted reclaim.
2025 * excess >> 2 is not to excessive so as to
2026 * reclaim too much, nor too less that we keep
2027 * coming back to reclaim from this cgroup
2028 */
2029 if (total >= (excess >> 2) ||
2030 (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS))
2031 break;
2032 }
2033 continue;
2034 }
2035 if (!mem_cgroup_reclaimable(victim, false))
2036 continue;
2037 total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
2038 zone, &nr_scanned);
2039 *total_scanned += nr_scanned;
2040 if (!res_counter_soft_limit_excess(&root_memcg->res))
2041 break;
2013 } 2042 }
2014 2043 mem_cgroup_iter_break(root_memcg, victim);
2015 return false; 2044 return total;
2016} 2045}
2017 2046
2018static DEFINE_SPINLOCK(memcg_oom_lock); 2047static DEFINE_SPINLOCK(memcg_oom_lock);
@@ -4761,6 +4790,98 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
4761 return ret; 4790 return ret;
4762} 4791}
4763 4792
4793unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
4794 gfp_t gfp_mask,
4795 unsigned long *total_scanned)
4796{
4797 unsigned long nr_reclaimed = 0;
4798 struct mem_cgroup_per_zone *mz, *next_mz = NULL;
4799 unsigned long reclaimed;
4800 int loop = 0;
4801 struct mem_cgroup_tree_per_zone *mctz;
4802 unsigned long long excess;
4803 unsigned long nr_scanned;
4804
4805 if (order > 0)
4806 return 0;
4807
4808 mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone));
4809 /*
4810 * This loop can run a while, specially if mem_cgroup's continuously
4811 * keep exceeding their soft limit and putting the system under
4812 * pressure
4813 */
4814 do {
4815 if (next_mz)
4816 mz = next_mz;
4817 else
4818 mz = mem_cgroup_largest_soft_limit_node(mctz);
4819 if (!mz)
4820 break;
4821
4822 nr_scanned = 0;
4823 reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone,
4824 gfp_mask, &nr_scanned);
4825 nr_reclaimed += reclaimed;
4826 *total_scanned += nr_scanned;
4827 spin_lock(&mctz->lock);
4828
4829 /*
4830 * If we failed to reclaim anything from this memory cgroup
4831 * it is time to move on to the next cgroup
4832 */
4833 next_mz = NULL;
4834 if (!reclaimed) {
4835 do {
4836 /*
4837 * Loop until we find yet another one.
4838 *
4839 * By the time we get the soft_limit lock
4840 * again, someone might have aded the
4841 * group back on the RB tree. Iterate to
4842 * make sure we get a different mem.
4843 * mem_cgroup_largest_soft_limit_node returns
4844 * NULL if no other cgroup is present on
4845 * the tree
4846 */
4847 next_mz =
4848 __mem_cgroup_largest_soft_limit_node(mctz);
4849 if (next_mz == mz)
4850 css_put(&next_mz->memcg->css);
4851 else /* next_mz == NULL or other memcg */
4852 break;
4853 } while (1);
4854 }
4855 __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
4856 excess = res_counter_soft_limit_excess(&mz->memcg->res);
4857 /*
4858 * One school of thought says that we should not add
4859 * back the node to the tree if reclaim returns 0.
4860 * But our reclaim could return 0, simply because due
4861 * to priority we are exposing a smaller subset of
4862 * memory to reclaim from. Consider this as a longer
4863 * term TODO.
4864 */
4865 /* If excess == 0, no tree ops */
4866 __mem_cgroup_insert_exceeded(mz->memcg, mz, mctz, excess);
4867 spin_unlock(&mctz->lock);
4868 css_put(&mz->memcg->css);
4869 loop++;
4870 /*
4871 * Could not reclaim anything and there are no more
4872 * mem cgroups to try or we seem to be looping without
4873 * reclaiming anything.
4874 */
4875 if (!nr_reclaimed &&
4876 (next_mz == NULL ||
4877 loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS))
4878 break;
4879 } while (!nr_reclaimed);
4880 if (next_mz)
4881 css_put(&next_mz->memcg->css);
4882 return nr_reclaimed;
4883}
4884
4764/** 4885/**
4765 * mem_cgroup_force_empty_list - clears LRU of a group 4886 * mem_cgroup_force_empty_list - clears LRU of a group
4766 * @memcg: group to clear 4887 * @memcg: group to clear
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0e081cada4ba..beb35778c69f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -139,21 +139,11 @@ static bool global_reclaim(struct scan_control *sc)
139{ 139{
140 return !sc->target_mem_cgroup; 140 return !sc->target_mem_cgroup;
141} 141}
142
143static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc)
144{
145 return !mem_cgroup_disabled() && global_reclaim(sc);
146}
147#else 142#else
148static bool global_reclaim(struct scan_control *sc) 143static bool global_reclaim(struct scan_control *sc)
149{ 144{
150 return true; 145 return true;
151} 146}
152
153static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc)
154{
155 return false;
156}
157#endif 147#endif
158 148
159unsigned long zone_reclaimable_pages(struct zone *zone) 149unsigned long zone_reclaimable_pages(struct zone *zone)
@@ -2174,8 +2164,7 @@ static inline bool should_continue_reclaim(struct zone *zone,
2174 } 2164 }
2175} 2165}
2176 2166
2177static void 2167static void shrink_zone(struct zone *zone, struct scan_control *sc)
2178__shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
2179{ 2168{
2180 unsigned long nr_reclaimed, nr_scanned; 2169 unsigned long nr_reclaimed, nr_scanned;
2181 2170
@@ -2194,12 +2183,6 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
2194 do { 2183 do {
2195 struct lruvec *lruvec; 2184 struct lruvec *lruvec;
2196 2185
2197 if (soft_reclaim &&
2198 !mem_cgroup_soft_reclaim_eligible(memcg)) {
2199 memcg = mem_cgroup_iter(root, memcg, &reclaim);
2200 continue;
2201 }
2202
2203 lruvec = mem_cgroup_zone_lruvec(zone, memcg); 2186 lruvec = mem_cgroup_zone_lruvec(zone, memcg);
2204 2187
2205 shrink_lruvec(lruvec, sc); 2188 shrink_lruvec(lruvec, sc);
@@ -2230,24 +2213,6 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
2230 sc->nr_scanned - nr_scanned, sc)); 2213 sc->nr_scanned - nr_scanned, sc));
2231} 2214}
2232 2215
2233
2234static void shrink_zone(struct zone *zone, struct scan_control *sc)
2235{
2236 bool do_soft_reclaim = mem_cgroup_should_soft_reclaim(sc);
2237 unsigned long nr_scanned = sc->nr_scanned;
2238
2239 __shrink_zone(zone, sc, do_soft_reclaim);
2240
2241 /*
2242 * No group is over the soft limit or those that are do not have
2243 * pages in the zone we are reclaiming so we have to reclaim everybody
2244 */
2245 if (do_soft_reclaim && (sc->nr_scanned == nr_scanned)) {
2246 __shrink_zone(zone, sc, false);
2247 return;
2248 }
2249}
2250
2251/* Returns true if compaction should go ahead for a high-order request */ 2216/* Returns true if compaction should go ahead for a high-order request */
2252static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) 2217static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
2253{ 2218{
@@ -2309,6 +2274,8 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2309{ 2274{
2310 struct zoneref *z; 2275 struct zoneref *z;
2311 struct zone *zone; 2276 struct zone *zone;
2277 unsigned long nr_soft_reclaimed;
2278 unsigned long nr_soft_scanned;
2312 bool aborted_reclaim = false; 2279 bool aborted_reclaim = false;
2313 2280
2314 /* 2281 /*
@@ -2348,6 +2315,18 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2348 continue; 2315 continue;
2349 } 2316 }
2350 } 2317 }
2318 /*
2319 * This steals pages from memory cgroups over softlimit
2320 * and returns the number of reclaimed pages and
2321 * scanned pages. This works for global memory pressure
2322 * and balancing, not for a memcg's limit.
2323 */
2324 nr_soft_scanned = 0;
2325 nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
2326 sc->order, sc->gfp_mask,
2327 &nr_soft_scanned);
2328 sc->nr_reclaimed += nr_soft_reclaimed;
2329 sc->nr_scanned += nr_soft_scanned;
2351 /* need some check for avoid more shrink_zone() */ 2330 /* need some check for avoid more shrink_zone() */
2352 } 2331 }
2353 2332
@@ -2941,6 +2920,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
2941{ 2920{
2942 int i; 2921 int i;
2943 int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ 2922 int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
2923 unsigned long nr_soft_reclaimed;
2924 unsigned long nr_soft_scanned;
2944 struct scan_control sc = { 2925 struct scan_control sc = {
2945 .gfp_mask = GFP_KERNEL, 2926 .gfp_mask = GFP_KERNEL,
2946 .priority = DEF_PRIORITY, 2927 .priority = DEF_PRIORITY,
@@ -3055,6 +3036,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
3055 3036
3056 sc.nr_scanned = 0; 3037 sc.nr_scanned = 0;
3057 3038
3039 nr_soft_scanned = 0;
3040 /*
3041 * Call soft limit reclaim before calling shrink_zone.
3042 */
3043 nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
3044 order, sc.gfp_mask,
3045 &nr_soft_scanned);
3046 sc.nr_reclaimed += nr_soft_reclaimed;
3047
3058 /* 3048 /*
3059 * There should be no need to raise the scanning 3049 * There should be no need to raise the scanning
3060 * priority if enough pages are already being scanned 3050 * priority if enough pages are already being scanned