diff options
| -rw-r--r-- | include/linux/memcontrol.h | 10 | ||||
| -rw-r--r-- | mm/memcontrol.c | 163 | ||||
| -rw-r--r-- | mm/vmscan.c | 62 |
3 files changed, 175 insertions, 60 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 6054c9f3a5e8..ecc82b37c4cc 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
| @@ -234,7 +234,9 @@ static inline void mem_cgroup_dec_page_stat(struct page *page, | |||
| 234 | mem_cgroup_update_page_stat(page, idx, -1); | 234 | mem_cgroup_update_page_stat(page, idx, -1); |
| 235 | } | 235 | } |
| 236 | 236 | ||
| 237 | bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg); | 237 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, |
| 238 | gfp_t gfp_mask, | ||
| 239 | unsigned long *total_scanned); | ||
| 238 | 240 | ||
| 239 | void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx); | 241 | void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx); |
| 240 | static inline void mem_cgroup_count_vm_event(struct mm_struct *mm, | 242 | static inline void mem_cgroup_count_vm_event(struct mm_struct *mm, |
| @@ -434,9 +436,11 @@ static inline void mem_cgroup_dec_page_stat(struct page *page, | |||
| 434 | } | 436 | } |
| 435 | 437 | ||
| 436 | static inline | 438 | static inline |
| 437 | bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg) | 439 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, |
| 440 | gfp_t gfp_mask, | ||
| 441 | unsigned long *total_scanned) | ||
| 438 | { | 442 | { |
| 439 | return false; | 443 | return 0; |
| 440 | } | 444 | } |
| 441 | 445 | ||
| 442 | static inline void mem_cgroup_split_huge_fixup(struct page *head) | 446 | static inline void mem_cgroup_split_huge_fixup(struct page *head) |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 852dbec07ce6..1c52ddbc839b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -1991,28 +1991,57 @@ static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) | |||
| 1991 | } | 1991 | } |
| 1992 | #endif | 1992 | #endif |
| 1993 | 1993 | ||
| 1994 | /* | 1994 | static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg, |
| 1995 | * A group is eligible for the soft limit reclaim if | 1995 | struct zone *zone, |
| 1996 | * a) it is over its soft limit | 1996 | gfp_t gfp_mask, |
| 1997 | * b) any parent up the hierarchy is over its soft limit | 1997 | unsigned long *total_scanned) |
| 1998 | */ | 1998 | { |
| 1999 | bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg) | 1999 | struct mem_cgroup *victim = NULL; |
| 2000 | { | 2000 | int total = 0; |
| 2001 | struct mem_cgroup *parent = memcg; | 2001 | int loop = 0; |
| 2002 | 2002 | unsigned long excess; | |
| 2003 | if (res_counter_soft_limit_excess(&memcg->res)) | 2003 | unsigned long nr_scanned; |
| 2004 | return true; | 2004 | struct mem_cgroup_reclaim_cookie reclaim = { |
| 2005 | 2005 | .zone = zone, | |
| 2006 | /* | 2006 | .priority = 0, |
| 2007 | * If any parent up the hierarchy is over its soft limit then we | 2007 | }; |
| 2008 | * have to obey and reclaim from this group as well. | 2008 | |
| 2009 | */ | 2009 | excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT; |
| 2010 | while ((parent = parent_mem_cgroup(parent))) { | 2010 | |
| 2011 | if (res_counter_soft_limit_excess(&parent->res)) | 2011 | while (1) { |
| 2012 | return true; | 2012 | victim = mem_cgroup_iter(root_memcg, victim, &reclaim); |
| 2013 | if (!victim) { | ||
| 2014 | loop++; | ||
| 2015 | if (loop >= 2) { | ||
| 2016 | /* | ||
| 2017 | * If we have not been able to reclaim | ||
| 2018 | * anything, it might because there are | ||
| 2019 | * no reclaimable pages under this hierarchy | ||
| 2020 | */ | ||
| 2021 | if (!total) | ||
| 2022 | break; | ||
| 2023 | /* | ||
| 2024 | * We want to do more targeted reclaim. | ||
| 2025 | * excess >> 2 is not to excessive so as to | ||
| 2026 | * reclaim too much, nor too less that we keep | ||
| 2027 | * coming back to reclaim from this cgroup | ||
| 2028 | */ | ||
| 2029 | if (total >= (excess >> 2) || | ||
| 2030 | (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS)) | ||
| 2031 | break; | ||
| 2032 | } | ||
| 2033 | continue; | ||
| 2034 | } | ||
| 2035 | if (!mem_cgroup_reclaimable(victim, false)) | ||
| 2036 | continue; | ||
| 2037 | total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false, | ||
| 2038 | zone, &nr_scanned); | ||
| 2039 | *total_scanned += nr_scanned; | ||
| 2040 | if (!res_counter_soft_limit_excess(&root_memcg->res)) | ||
| 2041 | break; | ||
| 2013 | } | 2042 | } |
| 2014 | 2043 | mem_cgroup_iter_break(root_memcg, victim); | |
| 2015 | return false; | 2044 | return total; |
| 2016 | } | 2045 | } |
| 2017 | 2046 | ||
| 2018 | static DEFINE_SPINLOCK(memcg_oom_lock); | 2047 | static DEFINE_SPINLOCK(memcg_oom_lock); |
| @@ -4761,6 +4790,98 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, | |||
| 4761 | return ret; | 4790 | return ret; |
| 4762 | } | 4791 | } |
| 4763 | 4792 | ||
| 4793 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | ||
| 4794 | gfp_t gfp_mask, | ||
| 4795 | unsigned long *total_scanned) | ||
| 4796 | { | ||
| 4797 | unsigned long nr_reclaimed = 0; | ||
| 4798 | struct mem_cgroup_per_zone *mz, *next_mz = NULL; | ||
| 4799 | unsigned long reclaimed; | ||
| 4800 | int loop = 0; | ||
| 4801 | struct mem_cgroup_tree_per_zone *mctz; | ||
| 4802 | unsigned long long excess; | ||
| 4803 | unsigned long nr_scanned; | ||
| 4804 | |||
| 4805 | if (order > 0) | ||
| 4806 | return 0; | ||
| 4807 | |||
| 4808 | mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone)); | ||
| 4809 | /* | ||
| 4810 | * This loop can run a while, specially if mem_cgroup's continuously | ||
| 4811 | * keep exceeding their soft limit and putting the system under | ||
| 4812 | * pressure | ||
| 4813 | */ | ||
| 4814 | do { | ||
| 4815 | if (next_mz) | ||
| 4816 | mz = next_mz; | ||
| 4817 | else | ||
| 4818 | mz = mem_cgroup_largest_soft_limit_node(mctz); | ||
| 4819 | if (!mz) | ||
| 4820 | break; | ||
| 4821 | |||
| 4822 | nr_scanned = 0; | ||
| 4823 | reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone, | ||
| 4824 | gfp_mask, &nr_scanned); | ||
| 4825 | nr_reclaimed += reclaimed; | ||
| 4826 | *total_scanned += nr_scanned; | ||
| 4827 | spin_lock(&mctz->lock); | ||
| 4828 | |||
| 4829 | /* | ||
| 4830 | * If we failed to reclaim anything from this memory cgroup | ||
| 4831 | * it is time to move on to the next cgroup | ||
| 4832 | */ | ||
| 4833 | next_mz = NULL; | ||
| 4834 | if (!reclaimed) { | ||
| 4835 | do { | ||
| 4836 | /* | ||
| 4837 | * Loop until we find yet another one. | ||
| 4838 | * | ||
| 4839 | * By the time we get the soft_limit lock | ||
| 4840 | * again, someone might have aded the | ||
| 4841 | * group back on the RB tree. Iterate to | ||
| 4842 | * make sure we get a different mem. | ||
| 4843 | * mem_cgroup_largest_soft_limit_node returns | ||
| 4844 | * NULL if no other cgroup is present on | ||
| 4845 | * the tree | ||
| 4846 | */ | ||
| 4847 | next_mz = | ||
| 4848 | __mem_cgroup_largest_soft_limit_node(mctz); | ||
| 4849 | if (next_mz == mz) | ||
| 4850 | css_put(&next_mz->memcg->css); | ||
| 4851 | else /* next_mz == NULL or other memcg */ | ||
| 4852 | break; | ||
| 4853 | } while (1); | ||
| 4854 | } | ||
| 4855 | __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz); | ||
| 4856 | excess = res_counter_soft_limit_excess(&mz->memcg->res); | ||
| 4857 | /* | ||
| 4858 | * One school of thought says that we should not add | ||
| 4859 | * back the node to the tree if reclaim returns 0. | ||
| 4860 | * But our reclaim could return 0, simply because due | ||
| 4861 | * to priority we are exposing a smaller subset of | ||
| 4862 | * memory to reclaim from. Consider this as a longer | ||
| 4863 | * term TODO. | ||
| 4864 | */ | ||
| 4865 | /* If excess == 0, no tree ops */ | ||
| 4866 | __mem_cgroup_insert_exceeded(mz->memcg, mz, mctz, excess); | ||
| 4867 | spin_unlock(&mctz->lock); | ||
| 4868 | css_put(&mz->memcg->css); | ||
| 4869 | loop++; | ||
| 4870 | /* | ||
| 4871 | * Could not reclaim anything and there are no more | ||
| 4872 | * mem cgroups to try or we seem to be looping without | ||
| 4873 | * reclaiming anything. | ||
| 4874 | */ | ||
| 4875 | if (!nr_reclaimed && | ||
| 4876 | (next_mz == NULL || | ||
| 4877 | loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS)) | ||
| 4878 | break; | ||
| 4879 | } while (!nr_reclaimed); | ||
| 4880 | if (next_mz) | ||
| 4881 | css_put(&next_mz->memcg->css); | ||
| 4882 | return nr_reclaimed; | ||
| 4883 | } | ||
| 4884 | |||
| 4764 | /** | 4885 | /** |
| 4765 | * mem_cgroup_force_empty_list - clears LRU of a group | 4886 | * mem_cgroup_force_empty_list - clears LRU of a group |
| 4766 | * @memcg: group to clear | 4887 | * @memcg: group to clear |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 0e081cada4ba..beb35778c69f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
| @@ -139,21 +139,11 @@ static bool global_reclaim(struct scan_control *sc) | |||
| 139 | { | 139 | { |
| 140 | return !sc->target_mem_cgroup; | 140 | return !sc->target_mem_cgroup; |
| 141 | } | 141 | } |
| 142 | |||
| 143 | static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc) | ||
| 144 | { | ||
| 145 | return !mem_cgroup_disabled() && global_reclaim(sc); | ||
| 146 | } | ||
| 147 | #else | 142 | #else |
| 148 | static bool global_reclaim(struct scan_control *sc) | 143 | static bool global_reclaim(struct scan_control *sc) |
| 149 | { | 144 | { |
| 150 | return true; | 145 | return true; |
| 151 | } | 146 | } |
| 152 | |||
| 153 | static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc) | ||
| 154 | { | ||
| 155 | return false; | ||
| 156 | } | ||
| 157 | #endif | 147 | #endif |
| 158 | 148 | ||
| 159 | unsigned long zone_reclaimable_pages(struct zone *zone) | 149 | unsigned long zone_reclaimable_pages(struct zone *zone) |
| @@ -2174,8 +2164,7 @@ static inline bool should_continue_reclaim(struct zone *zone, | |||
| 2174 | } | 2164 | } |
| 2175 | } | 2165 | } |
| 2176 | 2166 | ||
| 2177 | static void | 2167 | static void shrink_zone(struct zone *zone, struct scan_control *sc) |
| 2178 | __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) | ||
| 2179 | { | 2168 | { |
| 2180 | unsigned long nr_reclaimed, nr_scanned; | 2169 | unsigned long nr_reclaimed, nr_scanned; |
| 2181 | 2170 | ||
| @@ -2194,12 +2183,6 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) | |||
| 2194 | do { | 2183 | do { |
| 2195 | struct lruvec *lruvec; | 2184 | struct lruvec *lruvec; |
| 2196 | 2185 | ||
| 2197 | if (soft_reclaim && | ||
| 2198 | !mem_cgroup_soft_reclaim_eligible(memcg)) { | ||
| 2199 | memcg = mem_cgroup_iter(root, memcg, &reclaim); | ||
| 2200 | continue; | ||
| 2201 | } | ||
| 2202 | |||
| 2203 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); | 2186 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); |
| 2204 | 2187 | ||
| 2205 | shrink_lruvec(lruvec, sc); | 2188 | shrink_lruvec(lruvec, sc); |
| @@ -2230,24 +2213,6 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) | |||
| 2230 | sc->nr_scanned - nr_scanned, sc)); | 2213 | sc->nr_scanned - nr_scanned, sc)); |
| 2231 | } | 2214 | } |
| 2232 | 2215 | ||
| 2233 | |||
| 2234 | static void shrink_zone(struct zone *zone, struct scan_control *sc) | ||
| 2235 | { | ||
| 2236 | bool do_soft_reclaim = mem_cgroup_should_soft_reclaim(sc); | ||
| 2237 | unsigned long nr_scanned = sc->nr_scanned; | ||
| 2238 | |||
| 2239 | __shrink_zone(zone, sc, do_soft_reclaim); | ||
| 2240 | |||
| 2241 | /* | ||
| 2242 | * No group is over the soft limit or those that are do not have | ||
| 2243 | * pages in the zone we are reclaiming so we have to reclaim everybody | ||
| 2244 | */ | ||
| 2245 | if (do_soft_reclaim && (sc->nr_scanned == nr_scanned)) { | ||
| 2246 | __shrink_zone(zone, sc, false); | ||
| 2247 | return; | ||
| 2248 | } | ||
| 2249 | } | ||
| 2250 | |||
| 2251 | /* Returns true if compaction should go ahead for a high-order request */ | 2216 | /* Returns true if compaction should go ahead for a high-order request */ |
| 2252 | static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) | 2217 | static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) |
| 2253 | { | 2218 | { |
| @@ -2309,6 +2274,8 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) | |||
| 2309 | { | 2274 | { |
| 2310 | struct zoneref *z; | 2275 | struct zoneref *z; |
| 2311 | struct zone *zone; | 2276 | struct zone *zone; |
| 2277 | unsigned long nr_soft_reclaimed; | ||
| 2278 | unsigned long nr_soft_scanned; | ||
| 2312 | bool aborted_reclaim = false; | 2279 | bool aborted_reclaim = false; |
| 2313 | 2280 | ||
| 2314 | /* | 2281 | /* |
| @@ -2348,6 +2315,18 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) | |||
| 2348 | continue; | 2315 | continue; |
| 2349 | } | 2316 | } |
| 2350 | } | 2317 | } |
| 2318 | /* | ||
| 2319 | * This steals pages from memory cgroups over softlimit | ||
| 2320 | * and returns the number of reclaimed pages and | ||
| 2321 | * scanned pages. This works for global memory pressure | ||
| 2322 | * and balancing, not for a memcg's limit. | ||
| 2323 | */ | ||
| 2324 | nr_soft_scanned = 0; | ||
| 2325 | nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, | ||
| 2326 | sc->order, sc->gfp_mask, | ||
| 2327 | &nr_soft_scanned); | ||
| 2328 | sc->nr_reclaimed += nr_soft_reclaimed; | ||
| 2329 | sc->nr_scanned += nr_soft_scanned; | ||
| 2351 | /* need some check for avoid more shrink_zone() */ | 2330 | /* need some check for avoid more shrink_zone() */ |
| 2352 | } | 2331 | } |
| 2353 | 2332 | ||
| @@ -2941,6 +2920,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, | |||
| 2941 | { | 2920 | { |
| 2942 | int i; | 2921 | int i; |
| 2943 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ | 2922 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ |
| 2923 | unsigned long nr_soft_reclaimed; | ||
| 2924 | unsigned long nr_soft_scanned; | ||
| 2944 | struct scan_control sc = { | 2925 | struct scan_control sc = { |
| 2945 | .gfp_mask = GFP_KERNEL, | 2926 | .gfp_mask = GFP_KERNEL, |
| 2946 | .priority = DEF_PRIORITY, | 2927 | .priority = DEF_PRIORITY, |
| @@ -3055,6 +3036,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, | |||
| 3055 | 3036 | ||
| 3056 | sc.nr_scanned = 0; | 3037 | sc.nr_scanned = 0; |
| 3057 | 3038 | ||
| 3039 | nr_soft_scanned = 0; | ||
| 3040 | /* | ||
| 3041 | * Call soft limit reclaim before calling shrink_zone. | ||
| 3042 | */ | ||
| 3043 | nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, | ||
| 3044 | order, sc.gfp_mask, | ||
| 3045 | &nr_soft_scanned); | ||
| 3046 | sc.nr_reclaimed += nr_soft_reclaimed; | ||
| 3047 | |||
| 3058 | /* | 3048 | /* |
| 3059 | * There should be no need to raise the scanning | 3049 | * There should be no need to raise the scanning |
| 3060 | * priority if enough pages are already being scanned | 3050 | * priority if enough pages are already being scanned |
