diff options
author | Mel Gorman <mgorman@techsingularity.net> | 2016-07-28 18:46:02 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-28 19:07:41 -0400 |
commit | a9dd0a83104c01269ea36a9b4ec42b51edf85427 (patch) | |
tree | 2d3329f49b0c91376945e96838bab1307e33b57d /mm/vmscan.c | |
parent | 86c79f6b5426ce118d32c73fa9e328f0a86ab590 (diff) |
mm, vmscan: make shrink_node decisions more node-centric
Earlier patches focused on having direct reclaim and kswapd use data
that is node-centric for reclaiming but shrink_node() itself still uses
too much zone information. This patch removes unnecessary zone-based
information with the most important decision being whether to continue
reclaim or not. Some memcg APIs are adjusted as a result even though
memcg itself still uses some zone information.
[mgorman@techsingularity.net: optimization]
Link: http://lkml.kernel.org/r/1468588165-12461-2-git-send-email-mgorman@techsingularity.net
Link: http://lkml.kernel.org/r/1467970510-21195-14-git-send-email-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@surriel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 61 |
1 files changed, 35 insertions, 26 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index b7a276f4b1b0..46f7a71ed13b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -2224,12 +2224,13 @@ static inline void init_tlb_ubc(void) | |||
2224 | #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ | 2224 | #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ |
2225 | 2225 | ||
2226 | /* | 2226 | /* |
2227 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. | 2227 | * This is a basic per-node page freer. Used by both kswapd and direct reclaim. |
2228 | */ | 2228 | */ |
2229 | static void shrink_zone_memcg(struct zone *zone, struct mem_cgroup *memcg, | 2229 | static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memcg, |
2230 | struct scan_control *sc, unsigned long *lru_pages) | 2230 | struct scan_control *sc, unsigned long *lru_pages) |
2231 | { | 2231 | { |
2232 | struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); | 2232 | struct zone *zone = &pgdat->node_zones[sc->reclaim_idx]; |
2233 | struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, zone, memcg); | ||
2233 | unsigned long nr[NR_LRU_LISTS]; | 2234 | unsigned long nr[NR_LRU_LISTS]; |
2234 | unsigned long targets[NR_LRU_LISTS]; | 2235 | unsigned long targets[NR_LRU_LISTS]; |
2235 | unsigned long nr_to_scan; | 2236 | unsigned long nr_to_scan; |
@@ -2362,13 +2363,14 @@ static bool in_reclaim_compaction(struct scan_control *sc) | |||
2362 | * calls try_to_compact_zone() that it will have enough free pages to succeed. | 2363 | * calls try_to_compact_zone() that it will have enough free pages to succeed. |
2363 | * It will give up earlier than that if there is difficulty reclaiming pages. | 2364 | * It will give up earlier than that if there is difficulty reclaiming pages. |
2364 | */ | 2365 | */ |
2365 | static inline bool should_continue_reclaim(struct zone *zone, | 2366 | static inline bool should_continue_reclaim(struct pglist_data *pgdat, |
2366 | unsigned long nr_reclaimed, | 2367 | unsigned long nr_reclaimed, |
2367 | unsigned long nr_scanned, | 2368 | unsigned long nr_scanned, |
2368 | struct scan_control *sc) | 2369 | struct scan_control *sc) |
2369 | { | 2370 | { |
2370 | unsigned long pages_for_compaction; | 2371 | unsigned long pages_for_compaction; |
2371 | unsigned long inactive_lru_pages; | 2372 | unsigned long inactive_lru_pages; |
2373 | int z; | ||
2372 | 2374 | ||
2373 | /* If not in reclaim/compaction mode, stop */ | 2375 | /* If not in reclaim/compaction mode, stop */ |
2374 | if (!in_reclaim_compaction(sc)) | 2376 | if (!in_reclaim_compaction(sc)) |
@@ -2402,21 +2404,29 @@ static inline bool should_continue_reclaim(struct zone *zone, | |||
2402 | * inactive lists are large enough, continue reclaiming | 2404 | * inactive lists are large enough, continue reclaiming |
2403 | */ | 2405 | */ |
2404 | pages_for_compaction = (2UL << sc->order); | 2406 | pages_for_compaction = (2UL << sc->order); |
2405 | inactive_lru_pages = node_page_state(zone->zone_pgdat, NR_INACTIVE_FILE); | 2407 | inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE); |
2406 | if (get_nr_swap_pages() > 0) | 2408 | if (get_nr_swap_pages() > 0) |
2407 | inactive_lru_pages += node_page_state(zone->zone_pgdat, NR_INACTIVE_ANON); | 2409 | inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON); |
2408 | if (sc->nr_reclaimed < pages_for_compaction && | 2410 | if (sc->nr_reclaimed < pages_for_compaction && |
2409 | inactive_lru_pages > pages_for_compaction) | 2411 | inactive_lru_pages > pages_for_compaction) |
2410 | return true; | 2412 | return true; |
2411 | 2413 | ||
2412 | /* If compaction would go ahead or the allocation would succeed, stop */ | 2414 | /* If compaction would go ahead or the allocation would succeed, stop */ |
2413 | switch (compaction_suitable(zone, sc->order, 0, 0)) { | 2415 | for (z = 0; z <= sc->reclaim_idx; z++) { |
2414 | case COMPACT_PARTIAL: | 2416 | struct zone *zone = &pgdat->node_zones[z]; |
2415 | case COMPACT_CONTINUE: | 2417 | if (!populated_zone(zone)) |
2416 | return false; | 2418 | continue; |
2417 | default: | 2419 | |
2418 | return true; | 2420 | switch (compaction_suitable(zone, sc->order, 0, sc->reclaim_idx)) { |
2421 | case COMPACT_PARTIAL: | ||
2422 | case COMPACT_CONTINUE: | ||
2423 | return false; | ||
2424 | default: | ||
2425 | /* check next zone */ | ||
2426 | ; | ||
2427 | } | ||
2419 | } | 2428 | } |
2429 | return true; | ||
2420 | } | 2430 | } |
2421 | 2431 | ||
2422 | static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, | 2432 | static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, |
@@ -2425,15 +2435,14 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, | |||
2425 | struct reclaim_state *reclaim_state = current->reclaim_state; | 2435 | struct reclaim_state *reclaim_state = current->reclaim_state; |
2426 | unsigned long nr_reclaimed, nr_scanned; | 2436 | unsigned long nr_reclaimed, nr_scanned; |
2427 | bool reclaimable = false; | 2437 | bool reclaimable = false; |
2428 | struct zone *zone = &pgdat->node_zones[classzone_idx]; | ||
2429 | 2438 | ||
2430 | do { | 2439 | do { |
2431 | struct mem_cgroup *root = sc->target_mem_cgroup; | 2440 | struct mem_cgroup *root = sc->target_mem_cgroup; |
2432 | struct mem_cgroup_reclaim_cookie reclaim = { | 2441 | struct mem_cgroup_reclaim_cookie reclaim = { |
2433 | .zone = zone, | 2442 | .zone = &pgdat->node_zones[classzone_idx], |
2434 | .priority = sc->priority, | 2443 | .priority = sc->priority, |
2435 | }; | 2444 | }; |
2436 | unsigned long zone_lru_pages = 0; | 2445 | unsigned long node_lru_pages = 0; |
2437 | struct mem_cgroup *memcg; | 2446 | struct mem_cgroup *memcg; |
2438 | 2447 | ||
2439 | nr_reclaimed = sc->nr_reclaimed; | 2448 | nr_reclaimed = sc->nr_reclaimed; |
@@ -2454,11 +2463,11 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, | |||
2454 | reclaimed = sc->nr_reclaimed; | 2463 | reclaimed = sc->nr_reclaimed; |
2455 | scanned = sc->nr_scanned; | 2464 | scanned = sc->nr_scanned; |
2456 | 2465 | ||
2457 | shrink_zone_memcg(zone, memcg, sc, &lru_pages); | 2466 | shrink_node_memcg(pgdat, memcg, sc, &lru_pages); |
2458 | zone_lru_pages += lru_pages; | 2467 | node_lru_pages += lru_pages; |
2459 | 2468 | ||
2460 | if (!global_reclaim(sc)) | 2469 | if (!global_reclaim(sc)) |
2461 | shrink_slab(sc->gfp_mask, zone_to_nid(zone), | 2470 | shrink_slab(sc->gfp_mask, pgdat->node_id, |
2462 | memcg, sc->nr_scanned - scanned, | 2471 | memcg, sc->nr_scanned - scanned, |
2463 | lru_pages); | 2472 | lru_pages); |
2464 | 2473 | ||
@@ -2470,7 +2479,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, | |||
2470 | /* | 2479 | /* |
2471 | * Direct reclaim and kswapd have to scan all memory | 2480 | * Direct reclaim and kswapd have to scan all memory |
2472 | * cgroups to fulfill the overall scan target for the | 2481 | * cgroups to fulfill the overall scan target for the |
2473 | * zone. | 2482 | * node. |
2474 | * | 2483 | * |
2475 | * Limit reclaim, on the other hand, only cares about | 2484 | * Limit reclaim, on the other hand, only cares about |
2476 | * nr_to_reclaim pages to be reclaimed and it will | 2485 | * nr_to_reclaim pages to be reclaimed and it will |
@@ -2489,9 +2498,9 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, | |||
2489 | * the eligible LRU pages were scanned. | 2498 | * the eligible LRU pages were scanned. |
2490 | */ | 2499 | */ |
2491 | if (global_reclaim(sc)) | 2500 | if (global_reclaim(sc)) |
2492 | shrink_slab(sc->gfp_mask, zone_to_nid(zone), NULL, | 2501 | shrink_slab(sc->gfp_mask, pgdat->node_id, NULL, |
2493 | sc->nr_scanned - nr_scanned, | 2502 | sc->nr_scanned - nr_scanned, |
2494 | zone_lru_pages); | 2503 | node_lru_pages); |
2495 | 2504 | ||
2496 | if (reclaim_state) { | 2505 | if (reclaim_state) { |
2497 | sc->nr_reclaimed += reclaim_state->reclaimed_slab; | 2506 | sc->nr_reclaimed += reclaim_state->reclaimed_slab; |
@@ -2506,7 +2515,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, | |||
2506 | if (sc->nr_reclaimed - nr_reclaimed) | 2515 | if (sc->nr_reclaimed - nr_reclaimed) |
2507 | reclaimable = true; | 2516 | reclaimable = true; |
2508 | 2517 | ||
2509 | } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed, | 2518 | } while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, |
2510 | sc->nr_scanned - nr_scanned, sc)); | 2519 | sc->nr_scanned - nr_scanned, sc)); |
2511 | 2520 | ||
2512 | return reclaimable; | 2521 | return reclaimable; |
@@ -2906,7 +2915,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | |||
2906 | 2915 | ||
2907 | #ifdef CONFIG_MEMCG | 2916 | #ifdef CONFIG_MEMCG |
2908 | 2917 | ||
2909 | unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, | 2918 | unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, |
2910 | gfp_t gfp_mask, bool noswap, | 2919 | gfp_t gfp_mask, bool noswap, |
2911 | struct zone *zone, | 2920 | struct zone *zone, |
2912 | unsigned long *nr_scanned) | 2921 | unsigned long *nr_scanned) |
@@ -2931,11 +2940,11 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, | |||
2931 | /* | 2940 | /* |
2932 | * NOTE: Although we can get the priority field, using it | 2941 | * NOTE: Although we can get the priority field, using it |
2933 | * here is not a good idea, since it limits the pages we can scan. | 2942 | * here is not a good idea, since it limits the pages we can scan. |
2934 | * if we don't reclaim here, the shrink_zone from balance_pgdat | 2943 | * if we don't reclaim here, the shrink_node from balance_pgdat |
2935 | * will pick up pages from other mem cgroup's as well. We hack | 2944 | * will pick up pages from other mem cgroup's as well. We hack |
2936 | * the priority and make it zero. | 2945 | * the priority and make it zero. |
2937 | */ | 2946 | */ |
2938 | shrink_zone_memcg(zone, memcg, &sc, &lru_pages); | 2947 | shrink_node_memcg(zone->zone_pgdat, memcg, &sc, &lru_pages); |
2939 | 2948 | ||
2940 | trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); | 2949 | trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); |
2941 | 2950 | ||
@@ -2994,7 +3003,7 @@ static void age_active_anon(struct pglist_data *pgdat, | |||
2994 | 3003 | ||
2995 | memcg = mem_cgroup_iter(NULL, NULL, NULL); | 3004 | memcg = mem_cgroup_iter(NULL, NULL, NULL); |
2996 | do { | 3005 | do { |
2997 | struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); | 3006 | struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, zone, memcg); |
2998 | 3007 | ||
2999 | if (inactive_list_is_low(lruvec, false)) | 3008 | if (inactive_list_is_low(lruvec, false)) |
3000 | shrink_active_list(SWAP_CLUSTER_MAX, lruvec, | 3009 | shrink_active_list(SWAP_CLUSTER_MAX, lruvec, |