diff options
author | Mel Gorman <mgorman@suse.de> | 2013-07-03 18:01:45 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-03 19:07:28 -0400 |
commit | b8e83b942a16eb73e63406592d3178207a4f07a1 (patch) | |
tree | 2fc7f3c0989b924b6e2a7cd9b81b0d11690c5294 /mm | |
parent | e82e0561dae9f3ae5a21fc2d3d3ccbe69d90be46 (diff) |
mm: vmscan: flatten kswapd priority loop
kswapd stops raising the scanning priority when at least
SWAP_CLUSTER_MAX pages have been reclaimed or the pgdat is considered
balanced. It then rechecks if it needs to restart at DEF_PRIORITY and
whether high-order reclaim needs to be reset. This is not wrong per-se
but it is confusing to follow and forcing kswapd to stay at DEF_PRIORITY
may require several restarts before it has scanned enough pages to meet
the high watermark even at 100% efficiency. This patch irons out the
logic a bit by controlling when priority is raised and removing the
"goto loop_again".
This patch has kswapd raise the scanning priority until it is scanning
enough pages that it could meet the high watermark in one shrink of the
LRU lists if it is able to reclaim at 100% efficiency. It will not
raise the scanning prioirty higher unless it is failing to reclaim any
pages.
To avoid infinite looping for high-order allocation requests kswapd will
not reclaim for high-order allocations when it has reclaimed at least
twice the number of pages as the allocation request.
Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Tested-by: Zlatko Calusic <zcalusic@bitsync.net>
Cc: dormando <dormando@rydia.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/vmscan.c | 86 |
1 files changed, 41 insertions, 45 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 26ad67f1962c..1c10ee512215 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -2654,8 +2654,12 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, | |||
2654 | /* | 2654 | /* |
2655 | * kswapd shrinks the zone by the number of pages required to reach | 2655 | * kswapd shrinks the zone by the number of pages required to reach |
2656 | * the high watermark. | 2656 | * the high watermark. |
2657 | * | ||
2658 | * Returns true if kswapd scanned at least the requested number of pages to | ||
2659 | * reclaim. This is used to determine if the scanning priority needs to be | ||
2660 | * raised. | ||
2657 | */ | 2661 | */ |
2658 | static void kswapd_shrink_zone(struct zone *zone, | 2662 | static bool kswapd_shrink_zone(struct zone *zone, |
2659 | struct scan_control *sc, | 2663 | struct scan_control *sc, |
2660 | unsigned long lru_pages) | 2664 | unsigned long lru_pages) |
2661 | { | 2665 | { |
@@ -2675,6 +2679,8 @@ static void kswapd_shrink_zone(struct zone *zone, | |||
2675 | 2679 | ||
2676 | if (nr_slab == 0 && !zone_reclaimable(zone)) | 2680 | if (nr_slab == 0 && !zone_reclaimable(zone)) |
2677 | zone->all_unreclaimable = 1; | 2681 | zone->all_unreclaimable = 1; |
2682 | |||
2683 | return sc->nr_scanned >= sc->nr_to_reclaim; | ||
2678 | } | 2684 | } |
2679 | 2685 | ||
2680 | /* | 2686 | /* |
@@ -2701,26 +2707,26 @@ static void kswapd_shrink_zone(struct zone *zone, | |||
2701 | static unsigned long balance_pgdat(pg_data_t *pgdat, int order, | 2707 | static unsigned long balance_pgdat(pg_data_t *pgdat, int order, |
2702 | int *classzone_idx) | 2708 | int *classzone_idx) |
2703 | { | 2709 | { |
2704 | bool pgdat_is_balanced = false; | ||
2705 | int i; | 2710 | int i; |
2706 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ | 2711 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ |
2707 | unsigned long nr_soft_reclaimed; | 2712 | unsigned long nr_soft_reclaimed; |
2708 | unsigned long nr_soft_scanned; | 2713 | unsigned long nr_soft_scanned; |
2709 | struct scan_control sc = { | 2714 | struct scan_control sc = { |
2710 | .gfp_mask = GFP_KERNEL, | 2715 | .gfp_mask = GFP_KERNEL, |
2716 | .priority = DEF_PRIORITY, | ||
2711 | .may_unmap = 1, | 2717 | .may_unmap = 1, |
2712 | .may_swap = 1, | 2718 | .may_swap = 1, |
2719 | .may_writepage = !laptop_mode, | ||
2713 | .order = order, | 2720 | .order = order, |
2714 | .target_mem_cgroup = NULL, | 2721 | .target_mem_cgroup = NULL, |
2715 | }; | 2722 | }; |
2716 | loop_again: | ||
2717 | sc.priority = DEF_PRIORITY; | ||
2718 | sc.nr_reclaimed = 0; | ||
2719 | sc.may_writepage = !laptop_mode; | ||
2720 | count_vm_event(PAGEOUTRUN); | 2723 | count_vm_event(PAGEOUTRUN); |
2721 | 2724 | ||
2722 | do { | 2725 | do { |
2723 | unsigned long lru_pages = 0; | 2726 | unsigned long lru_pages = 0; |
2727 | bool raise_priority = true; | ||
2728 | |||
2729 | sc.nr_reclaimed = 0; | ||
2724 | 2730 | ||
2725 | /* | 2731 | /* |
2726 | * Scan in the highmem->dma direction for the highest | 2732 | * Scan in the highmem->dma direction for the highest |
@@ -2762,10 +2768,8 @@ loop_again: | |||
2762 | } | 2768 | } |
2763 | } | 2769 | } |
2764 | 2770 | ||
2765 | if (i < 0) { | 2771 | if (i < 0) |
2766 | pgdat_is_balanced = true; | ||
2767 | goto out; | 2772 | goto out; |
2768 | } | ||
2769 | 2773 | ||
2770 | for (i = 0; i <= end_zone; i++) { | 2774 | for (i = 0; i <= end_zone; i++) { |
2771 | struct zone *zone = pgdat->node_zones + i; | 2775 | struct zone *zone = pgdat->node_zones + i; |
@@ -2832,8 +2836,16 @@ loop_again: | |||
2832 | 2836 | ||
2833 | if ((buffer_heads_over_limit && is_highmem_idx(i)) || | 2837 | if ((buffer_heads_over_limit && is_highmem_idx(i)) || |
2834 | !zone_balanced(zone, testorder, | 2838 | !zone_balanced(zone, testorder, |
2835 | balance_gap, end_zone)) | 2839 | balance_gap, end_zone)) { |
2836 | kswapd_shrink_zone(zone, &sc, lru_pages); | 2840 | /* |
2841 | * There should be no need to raise the | ||
2842 | * scanning priority if enough pages are | ||
2843 | * already being scanned that high | ||
2844 | * watermark would be met at 100% efficiency. | ||
2845 | */ | ||
2846 | if (kswapd_shrink_zone(zone, &sc, lru_pages)) | ||
2847 | raise_priority = false; | ||
2848 | } | ||
2837 | 2849 | ||
2838 | /* | 2850 | /* |
2839 | * If we're getting trouble reclaiming, start doing | 2851 | * If we're getting trouble reclaiming, start doing |
@@ -2868,46 +2880,29 @@ loop_again: | |||
2868 | pfmemalloc_watermark_ok(pgdat)) | 2880 | pfmemalloc_watermark_ok(pgdat)) |
2869 | wake_up(&pgdat->pfmemalloc_wait); | 2881 | wake_up(&pgdat->pfmemalloc_wait); |
2870 | 2882 | ||
2871 | if (pgdat_balanced(pgdat, order, *classzone_idx)) { | ||
2872 | pgdat_is_balanced = true; | ||
2873 | break; /* kswapd: all done */ | ||
2874 | } | ||
2875 | |||
2876 | /* | 2883 | /* |
2877 | * We do this so kswapd doesn't build up large priorities for | 2884 | * Fragmentation may mean that the system cannot be rebalanced |
2878 | * example when it is freeing in parallel with allocators. It | 2885 | * for high-order allocations in all zones. If twice the |
2879 | * matches the direct reclaim path behaviour in terms of impact | 2886 | * allocation size has been reclaimed and the zones are still |
2880 | * on zone->*_priority. | 2887 | * not balanced then recheck the watermarks at order-0 to |
2888 | * prevent kswapd reclaiming excessively. Assume that a | ||
2889 | * process requested a high-order can direct reclaim/compact. | ||
2881 | */ | 2890 | */ |
2882 | if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX) | 2891 | if (order && sc.nr_reclaimed >= 2UL << order) |
2883 | break; | 2892 | order = sc.order = 0; |
2884 | } while (--sc.priority >= 0); | ||
2885 | |||
2886 | out: | ||
2887 | if (!pgdat_is_balanced) { | ||
2888 | cond_resched(); | ||
2889 | 2893 | ||
2890 | try_to_freeze(); | 2894 | /* Check if kswapd should be suspending */ |
2895 | if (try_to_freeze() || kthread_should_stop()) | ||
2896 | break; | ||
2891 | 2897 | ||
2892 | /* | 2898 | /* |
2893 | * Fragmentation may mean that the system cannot be | 2899 | * Raise priority if scanning rate is too low or there was no |
2894 | * rebalanced for high-order allocations in all zones. | 2900 | * progress in reclaiming pages |
2895 | * At this point, if nr_reclaimed < SWAP_CLUSTER_MAX, | ||
2896 | * it means the zones have been fully scanned and are still | ||
2897 | * not balanced. For high-order allocations, there is | ||
2898 | * little point trying all over again as kswapd may | ||
2899 | * infinite loop. | ||
2900 | * | ||
2901 | * Instead, recheck all watermarks at order-0 as they | ||
2902 | * are the most important. If watermarks are ok, kswapd will go | ||
2903 | * back to sleep. High-order users can still perform direct | ||
2904 | * reclaim if they wish. | ||
2905 | */ | 2901 | */ |
2906 | if (sc.nr_reclaimed < SWAP_CLUSTER_MAX) | 2902 | if (raise_priority || !sc.nr_reclaimed) |
2907 | order = sc.order = 0; | 2903 | sc.priority--; |
2908 | 2904 | } while (sc.priority >= 0 && | |
2909 | goto loop_again; | 2905 | !pgdat_balanced(pgdat, order, *classzone_idx)); |
2910 | } | ||
2911 | 2906 | ||
2912 | /* | 2907 | /* |
2913 | * If kswapd was reclaiming at a higher order, it has the option of | 2908 | * If kswapd was reclaiming at a higher order, it has the option of |
@@ -2936,6 +2931,7 @@ out: | |||
2936 | compact_pgdat(pgdat, order); | 2931 | compact_pgdat(pgdat, order); |
2937 | } | 2932 | } |
2938 | 2933 | ||
2934 | out: | ||
2939 | /* | 2935 | /* |
2940 | * Return the order we were reclaiming at so prepare_kswapd_sleep() | 2936 | * Return the order we were reclaiming at so prepare_kswapd_sleep() |
2941 | * makes a decision on the order we were last reclaiming at. However, | 2937 | * makes a decision on the order we were last reclaiming at. However, |