diff options
-rw-r--r-- | include/linux/memcontrol.h | 10 | ||||
-rw-r--r-- | mm/memcontrol.c | 163 | ||||
-rw-r--r-- | mm/vmscan.c | 62 |
3 files changed, 60 insertions, 175 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 6c416092e324..4b78661c68d0 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -180,9 +180,7 @@ static inline void mem_cgroup_dec_page_stat(struct page *page, | |||
180 | mem_cgroup_update_page_stat(page, idx, -1); | 180 | mem_cgroup_update_page_stat(page, idx, -1); |
181 | } | 181 | } |
182 | 182 | ||
183 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | 183 | bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg); |
184 | gfp_t gfp_mask, | ||
185 | unsigned long *total_scanned); | ||
186 | 184 | ||
187 | void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx); | 185 | void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx); |
188 | static inline void mem_cgroup_count_vm_event(struct mm_struct *mm, | 186 | static inline void mem_cgroup_count_vm_event(struct mm_struct *mm, |
@@ -359,11 +357,9 @@ static inline void mem_cgroup_dec_page_stat(struct page *page, | |||
359 | } | 357 | } |
360 | 358 | ||
361 | static inline | 359 | static inline |
362 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | 360 | bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg) |
363 | gfp_t gfp_mask, | ||
364 | unsigned long *total_scanned) | ||
365 | { | 361 | { |
366 | return 0; | 362 | return false; |
367 | } | 363 | } |
368 | 364 | ||
369 | static inline void mem_cgroup_split_huge_fixup(struct page *head) | 365 | static inline void mem_cgroup_split_huge_fixup(struct page *head) |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fe6b9f96abdd..6c32271a31c5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -2004,57 +2004,28 @@ static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) | |||
2004 | } | 2004 | } |
2005 | #endif | 2005 | #endif |
2006 | 2006 | ||
2007 | static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg, | 2007 | /* |
2008 | struct zone *zone, | 2008 | * A group is eligible for the soft limit reclaim if it is |
2009 | gfp_t gfp_mask, | 2009 | * a) is over its soft limit |
2010 | unsigned long *total_scanned) | 2010 | * b) any parent up the hierarchy is over its soft limit |
2011 | { | 2011 | */ |
2012 | struct mem_cgroup *victim = NULL; | 2012 | bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg) |
2013 | int total = 0; | 2013 | { |
2014 | int loop = 0; | 2014 | struct mem_cgroup *parent = memcg; |
2015 | unsigned long excess; | 2015 | |
2016 | unsigned long nr_scanned; | 2016 | if (res_counter_soft_limit_excess(&memcg->res)) |
2017 | struct mem_cgroup_reclaim_cookie reclaim = { | 2017 | return true; |
2018 | .zone = zone, | 2018 | |
2019 | .priority = 0, | 2019 | /* |
2020 | }; | 2020 | * If any parent up the hierarchy is over its soft limit then we |
2021 | 2021 | * have to obey and reclaim from this group as well. | |
2022 | excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT; | 2022 | */ |
2023 | 2023 | while((parent = parent_mem_cgroup(parent))) { | |
2024 | while (1) { | 2024 | if (res_counter_soft_limit_excess(&parent->res)) |
2025 | victim = mem_cgroup_iter(root_memcg, victim, &reclaim); | 2025 | return true; |
2026 | if (!victim) { | ||
2027 | loop++; | ||
2028 | if (loop >= 2) { | ||
2029 | /* | ||
2030 | * If we have not been able to reclaim | ||
2031 | * anything, it might because there are | ||
2032 | * no reclaimable pages under this hierarchy | ||
2033 | */ | ||
2034 | if (!total) | ||
2035 | break; | ||
2036 | /* | ||
2037 | * We want to do more targeted reclaim. | ||
2038 | * excess >> 2 is not to excessive so as to | ||
2039 | * reclaim too much, nor too less that we keep | ||
2040 | * coming back to reclaim from this cgroup | ||
2041 | */ | ||
2042 | if (total >= (excess >> 2) || | ||
2043 | (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS)) | ||
2044 | break; | ||
2045 | } | ||
2046 | continue; | ||
2047 | } | ||
2048 | if (!mem_cgroup_reclaimable(victim, false)) | ||
2049 | continue; | ||
2050 | total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false, | ||
2051 | zone, &nr_scanned); | ||
2052 | *total_scanned += nr_scanned; | ||
2053 | if (!res_counter_soft_limit_excess(&root_memcg->res)) | ||
2054 | break; | ||
2055 | } | 2026 | } |
2056 | mem_cgroup_iter_break(root_memcg, victim); | 2027 | |
2057 | return total; | 2028 | return false; |
2058 | } | 2029 | } |
2059 | 2030 | ||
2060 | /* | 2031 | /* |
@@ -4727,98 +4698,6 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, | |||
4727 | return ret; | 4698 | return ret; |
4728 | } | 4699 | } |
4729 | 4700 | ||
4730 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | ||
4731 | gfp_t gfp_mask, | ||
4732 | unsigned long *total_scanned) | ||
4733 | { | ||
4734 | unsigned long nr_reclaimed = 0; | ||
4735 | struct mem_cgroup_per_zone *mz, *next_mz = NULL; | ||
4736 | unsigned long reclaimed; | ||
4737 | int loop = 0; | ||
4738 | struct mem_cgroup_tree_per_zone *mctz; | ||
4739 | unsigned long long excess; | ||
4740 | unsigned long nr_scanned; | ||
4741 | |||
4742 | if (order > 0) | ||
4743 | return 0; | ||
4744 | |||
4745 | mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone)); | ||
4746 | /* | ||
4747 | * This loop can run a while, specially if mem_cgroup's continuously | ||
4748 | * keep exceeding their soft limit and putting the system under | ||
4749 | * pressure | ||
4750 | */ | ||
4751 | do { | ||
4752 | if (next_mz) | ||
4753 | mz = next_mz; | ||
4754 | else | ||
4755 | mz = mem_cgroup_largest_soft_limit_node(mctz); | ||
4756 | if (!mz) | ||
4757 | break; | ||
4758 | |||
4759 | nr_scanned = 0; | ||
4760 | reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone, | ||
4761 | gfp_mask, &nr_scanned); | ||
4762 | nr_reclaimed += reclaimed; | ||
4763 | *total_scanned += nr_scanned; | ||
4764 | spin_lock(&mctz->lock); | ||
4765 | |||
4766 | /* | ||
4767 | * If we failed to reclaim anything from this memory cgroup | ||
4768 | * it is time to move on to the next cgroup | ||
4769 | */ | ||
4770 | next_mz = NULL; | ||
4771 | if (!reclaimed) { | ||
4772 | do { | ||
4773 | /* | ||
4774 | * Loop until we find yet another one. | ||
4775 | * | ||
4776 | * By the time we get the soft_limit lock | ||
4777 | * again, someone might have aded the | ||
4778 | * group back on the RB tree. Iterate to | ||
4779 | * make sure we get a different mem. | ||
4780 | * mem_cgroup_largest_soft_limit_node returns | ||
4781 | * NULL if no other cgroup is present on | ||
4782 | * the tree | ||
4783 | */ | ||
4784 | next_mz = | ||
4785 | __mem_cgroup_largest_soft_limit_node(mctz); | ||
4786 | if (next_mz == mz) | ||
4787 | css_put(&next_mz->memcg->css); | ||
4788 | else /* next_mz == NULL or other memcg */ | ||
4789 | break; | ||
4790 | } while (1); | ||
4791 | } | ||
4792 | __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz); | ||
4793 | excess = res_counter_soft_limit_excess(&mz->memcg->res); | ||
4794 | /* | ||
4795 | * One school of thought says that we should not add | ||
4796 | * back the node to the tree if reclaim returns 0. | ||
4797 | * But our reclaim could return 0, simply because due | ||
4798 | * to priority we are exposing a smaller subset of | ||
4799 | * memory to reclaim from. Consider this as a longer | ||
4800 | * term TODO. | ||
4801 | */ | ||
4802 | /* If excess == 0, no tree ops */ | ||
4803 | __mem_cgroup_insert_exceeded(mz->memcg, mz, mctz, excess); | ||
4804 | spin_unlock(&mctz->lock); | ||
4805 | css_put(&mz->memcg->css); | ||
4806 | loop++; | ||
4807 | /* | ||
4808 | * Could not reclaim anything and there are no more | ||
4809 | * mem cgroups to try or we seem to be looping without | ||
4810 | * reclaiming anything. | ||
4811 | */ | ||
4812 | if (!nr_reclaimed && | ||
4813 | (next_mz == NULL || | ||
4814 | loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS)) | ||
4815 | break; | ||
4816 | } while (!nr_reclaimed); | ||
4817 | if (next_mz) | ||
4818 | css_put(&next_mz->memcg->css); | ||
4819 | return nr_reclaimed; | ||
4820 | } | ||
4821 | |||
4822 | /** | 4701 | /** |
4823 | * mem_cgroup_force_empty_list - clears LRU of a group | 4702 | * mem_cgroup_force_empty_list - clears LRU of a group |
4824 | * @memcg: group to clear | 4703 | * @memcg: group to clear |
diff --git a/mm/vmscan.c b/mm/vmscan.c index fe715daeb8bc..cf4643807ec2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -139,11 +139,21 @@ static bool global_reclaim(struct scan_control *sc) | |||
139 | { | 139 | { |
140 | return !sc->target_mem_cgroup; | 140 | return !sc->target_mem_cgroup; |
141 | } | 141 | } |
142 | |||
143 | static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc) | ||
144 | { | ||
145 | return !mem_cgroup_disabled() && global_reclaim(sc); | ||
146 | } | ||
142 | #else | 147 | #else |
143 | static bool global_reclaim(struct scan_control *sc) | 148 | static bool global_reclaim(struct scan_control *sc) |
144 | { | 149 | { |
145 | return true; | 150 | return true; |
146 | } | 151 | } |
152 | |||
153 | static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc) | ||
154 | { | ||
155 | return false; | ||
156 | } | ||
147 | #endif | 157 | #endif |
148 | 158 | ||
149 | unsigned long zone_reclaimable_pages(struct zone *zone) | 159 | unsigned long zone_reclaimable_pages(struct zone *zone) |
@@ -2130,7 +2140,8 @@ static inline bool should_continue_reclaim(struct zone *zone, | |||
2130 | } | 2140 | } |
2131 | } | 2141 | } |
2132 | 2142 | ||
2133 | static void shrink_zone(struct zone *zone, struct scan_control *sc) | 2143 | static void |
2144 | __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) | ||
2134 | { | 2145 | { |
2135 | unsigned long nr_reclaimed, nr_scanned; | 2146 | unsigned long nr_reclaimed, nr_scanned; |
2136 | 2147 | ||
@@ -2149,6 +2160,12 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc) | |||
2149 | do { | 2160 | do { |
2150 | struct lruvec *lruvec; | 2161 | struct lruvec *lruvec; |
2151 | 2162 | ||
2163 | if (soft_reclaim && | ||
2164 | !mem_cgroup_soft_reclaim_eligible(memcg)) { | ||
2165 | memcg = mem_cgroup_iter(root, memcg, &reclaim); | ||
2166 | continue; | ||
2167 | } | ||
2168 | |||
2152 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); | 2169 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); |
2153 | 2170 | ||
2154 | shrink_lruvec(lruvec, sc); | 2171 | shrink_lruvec(lruvec, sc); |
@@ -2179,6 +2196,24 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc) | |||
2179 | sc->nr_scanned - nr_scanned, sc)); | 2196 | sc->nr_scanned - nr_scanned, sc)); |
2180 | } | 2197 | } |
2181 | 2198 | ||
2199 | |||
2200 | static void shrink_zone(struct zone *zone, struct scan_control *sc) | ||
2201 | { | ||
2202 | bool do_soft_reclaim = mem_cgroup_should_soft_reclaim(sc); | ||
2203 | unsigned long nr_scanned = sc->nr_scanned; | ||
2204 | |||
2205 | __shrink_zone(zone, sc, do_soft_reclaim); | ||
2206 | |||
2207 | /* | ||
2208 | * No group is over the soft limit or those that are do not have | ||
2209 | * pages in the zone we are reclaiming so we have to reclaim everybody | ||
2210 | */ | ||
2211 | if (do_soft_reclaim && (sc->nr_scanned == nr_scanned)) { | ||
2212 | __shrink_zone(zone, sc, false); | ||
2213 | return; | ||
2214 | } | ||
2215 | } | ||
2216 | |||
2182 | /* Returns true if compaction should go ahead for a high-order request */ | 2217 | /* Returns true if compaction should go ahead for a high-order request */ |
2183 | static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) | 2218 | static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) |
2184 | { | 2219 | { |
@@ -2240,8 +2275,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) | |||
2240 | { | 2275 | { |
2241 | struct zoneref *z; | 2276 | struct zoneref *z; |
2242 | struct zone *zone; | 2277 | struct zone *zone; |
2243 | unsigned long nr_soft_reclaimed; | ||
2244 | unsigned long nr_soft_scanned; | ||
2245 | bool aborted_reclaim = false; | 2278 | bool aborted_reclaim = false; |
2246 | 2279 | ||
2247 | /* | 2280 | /* |
@@ -2281,18 +2314,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) | |||
2281 | continue; | 2314 | continue; |
2282 | } | 2315 | } |
2283 | } | 2316 | } |
2284 | /* | ||
2285 | * This steals pages from memory cgroups over softlimit | ||
2286 | * and returns the number of reclaimed pages and | ||
2287 | * scanned pages. This works for global memory pressure | ||
2288 | * and balancing, not for a memcg's limit. | ||
2289 | */ | ||
2290 | nr_soft_scanned = 0; | ||
2291 | nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, | ||
2292 | sc->order, sc->gfp_mask, | ||
2293 | &nr_soft_scanned); | ||
2294 | sc->nr_reclaimed += nr_soft_reclaimed; | ||
2295 | sc->nr_scanned += nr_soft_scanned; | ||
2296 | /* need some check for avoid more shrink_zone() */ | 2317 | /* need some check for avoid more shrink_zone() */ |
2297 | } | 2318 | } |
2298 | 2319 | ||
@@ -2880,8 +2901,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, | |||
2880 | { | 2901 | { |
2881 | int i; | 2902 | int i; |
2882 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ | 2903 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ |
2883 | unsigned long nr_soft_reclaimed; | ||
2884 | unsigned long nr_soft_scanned; | ||
2885 | struct scan_control sc = { | 2904 | struct scan_control sc = { |
2886 | .gfp_mask = GFP_KERNEL, | 2905 | .gfp_mask = GFP_KERNEL, |
2887 | .priority = DEF_PRIORITY, | 2906 | .priority = DEF_PRIORITY, |
@@ -2996,15 +3015,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, | |||
2996 | 3015 | ||
2997 | sc.nr_scanned = 0; | 3016 | sc.nr_scanned = 0; |
2998 | 3017 | ||
2999 | nr_soft_scanned = 0; | ||
3000 | /* | ||
3001 | * Call soft limit reclaim before calling shrink_zone. | ||
3002 | */ | ||
3003 | nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, | ||
3004 | order, sc.gfp_mask, | ||
3005 | &nr_soft_scanned); | ||
3006 | sc.nr_reclaimed += nr_soft_reclaimed; | ||
3007 | |||
3008 | /* | 3018 | /* |
3009 | * There should be no need to raise the scanning | 3019 | * There should be no need to raise the scanning |
3010 | * priority if enough pages are already being scanned | 3020 | * priority if enough pages are already being scanned |