aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2013-07-03 18:01:45 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-03 19:07:28 -0400
commitb8e83b942a16eb73e63406592d3178207a4f07a1 (patch)
tree2fc7f3c0989b924b6e2a7cd9b81b0d11690c5294 /mm
parente82e0561dae9f3ae5a21fc2d3d3ccbe69d90be46 (diff)
mm: vmscan: flatten kswapd priority loop
kswapd stops raising the scanning priority when at least SWAP_CLUSTER_MAX pages have been reclaimed or the pgdat is considered balanced. It then rechecks if it needs to restart at DEF_PRIORITY and whether high-order reclaim needs to be reset. This is not wrong per-se but it is confusing to follow and forcing kswapd to stay at DEF_PRIORITY may require several restarts before it has scanned enough pages to meet the high watermark even at 100% efficiency. This patch irons out the logic a bit by controlling when priority is raised and removing the "goto loop_again". This patch has kswapd raise the scanning priority until it is scanning enough pages that it could meet the high watermark in one shrink of the LRU lists if it is able to reclaim at 100% efficiency. It will not raise the scanning prioirty higher unless it is failing to reclaim any pages. To avoid infinite looping for high-order allocation requests kswapd will not reclaim for high-order allocations when it has reclaimed at least twice the number of pages as the allocation request. Signed-off-by: Mel Gorman <mgorman@suse.de> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Michal Hocko <mhocko@suse.cz> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Rik van Riel <riel@redhat.com> Cc: Jiri Slaby <jslaby@suse.cz> Cc: Valdis Kletnieks <Valdis.Kletnieks@vt.edu> Tested-by: Zlatko Calusic <zcalusic@bitsync.net> Cc: dormando <dormando@rydia.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/vmscan.c86
1 files changed, 41 insertions, 45 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 26ad67f1962c..1c10ee512215 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2654,8 +2654,12 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining,
2654/* 2654/*
2655 * kswapd shrinks the zone by the number of pages required to reach 2655 * kswapd shrinks the zone by the number of pages required to reach
2656 * the high watermark. 2656 * the high watermark.
2657 *
2658 * Returns true if kswapd scanned at least the requested number of pages to
2659 * reclaim. This is used to determine if the scanning priority needs to be
2660 * raised.
2657 */ 2661 */
2658static void kswapd_shrink_zone(struct zone *zone, 2662static bool kswapd_shrink_zone(struct zone *zone,
2659 struct scan_control *sc, 2663 struct scan_control *sc,
2660 unsigned long lru_pages) 2664 unsigned long lru_pages)
2661{ 2665{
@@ -2675,6 +2679,8 @@ static void kswapd_shrink_zone(struct zone *zone,
2675 2679
2676 if (nr_slab == 0 && !zone_reclaimable(zone)) 2680 if (nr_slab == 0 && !zone_reclaimable(zone))
2677 zone->all_unreclaimable = 1; 2681 zone->all_unreclaimable = 1;
2682
2683 return sc->nr_scanned >= sc->nr_to_reclaim;
2678} 2684}
2679 2685
2680/* 2686/*
@@ -2701,26 +2707,26 @@ static void kswapd_shrink_zone(struct zone *zone,
2701static unsigned long balance_pgdat(pg_data_t *pgdat, int order, 2707static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
2702 int *classzone_idx) 2708 int *classzone_idx)
2703{ 2709{
2704 bool pgdat_is_balanced = false;
2705 int i; 2710 int i;
2706 int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ 2711 int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
2707 unsigned long nr_soft_reclaimed; 2712 unsigned long nr_soft_reclaimed;
2708 unsigned long nr_soft_scanned; 2713 unsigned long nr_soft_scanned;
2709 struct scan_control sc = { 2714 struct scan_control sc = {
2710 .gfp_mask = GFP_KERNEL, 2715 .gfp_mask = GFP_KERNEL,
2716 .priority = DEF_PRIORITY,
2711 .may_unmap = 1, 2717 .may_unmap = 1,
2712 .may_swap = 1, 2718 .may_swap = 1,
2719 .may_writepage = !laptop_mode,
2713 .order = order, 2720 .order = order,
2714 .target_mem_cgroup = NULL, 2721 .target_mem_cgroup = NULL,
2715 }; 2722 };
2716loop_again:
2717 sc.priority = DEF_PRIORITY;
2718 sc.nr_reclaimed = 0;
2719 sc.may_writepage = !laptop_mode;
2720 count_vm_event(PAGEOUTRUN); 2723 count_vm_event(PAGEOUTRUN);
2721 2724
2722 do { 2725 do {
2723 unsigned long lru_pages = 0; 2726 unsigned long lru_pages = 0;
2727 bool raise_priority = true;
2728
2729 sc.nr_reclaimed = 0;
2724 2730
2725 /* 2731 /*
2726 * Scan in the highmem->dma direction for the highest 2732 * Scan in the highmem->dma direction for the highest
@@ -2762,10 +2768,8 @@ loop_again:
2762 } 2768 }
2763 } 2769 }
2764 2770
2765 if (i < 0) { 2771 if (i < 0)
2766 pgdat_is_balanced = true;
2767 goto out; 2772 goto out;
2768 }
2769 2773
2770 for (i = 0; i <= end_zone; i++) { 2774 for (i = 0; i <= end_zone; i++) {
2771 struct zone *zone = pgdat->node_zones + i; 2775 struct zone *zone = pgdat->node_zones + i;
@@ -2832,8 +2836,16 @@ loop_again:
2832 2836
2833 if ((buffer_heads_over_limit && is_highmem_idx(i)) || 2837 if ((buffer_heads_over_limit && is_highmem_idx(i)) ||
2834 !zone_balanced(zone, testorder, 2838 !zone_balanced(zone, testorder,
2835 balance_gap, end_zone)) 2839 balance_gap, end_zone)) {
2836 kswapd_shrink_zone(zone, &sc, lru_pages); 2840 /*
2841 * There should be no need to raise the
2842 * scanning priority if enough pages are
2843 * already being scanned that high
2844 * watermark would be met at 100% efficiency.
2845 */
2846 if (kswapd_shrink_zone(zone, &sc, lru_pages))
2847 raise_priority = false;
2848 }
2837 2849
2838 /* 2850 /*
2839 * If we're getting trouble reclaiming, start doing 2851 * If we're getting trouble reclaiming, start doing
@@ -2868,46 +2880,29 @@ loop_again:
2868 pfmemalloc_watermark_ok(pgdat)) 2880 pfmemalloc_watermark_ok(pgdat))
2869 wake_up(&pgdat->pfmemalloc_wait); 2881 wake_up(&pgdat->pfmemalloc_wait);
2870 2882
2871 if (pgdat_balanced(pgdat, order, *classzone_idx)) {
2872 pgdat_is_balanced = true;
2873 break; /* kswapd: all done */
2874 }
2875
2876 /* 2883 /*
2877 * We do this so kswapd doesn't build up large priorities for 2884 * Fragmentation may mean that the system cannot be rebalanced
2878 * example when it is freeing in parallel with allocators. It 2885 * for high-order allocations in all zones. If twice the
2879 * matches the direct reclaim path behaviour in terms of impact 2886 * allocation size has been reclaimed and the zones are still
2880 * on zone->*_priority. 2887 * not balanced then recheck the watermarks at order-0 to
2888 * prevent kswapd reclaiming excessively. Assume that a
2889 * process requested a high-order can direct reclaim/compact.
2881 */ 2890 */
2882 if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX) 2891 if (order && sc.nr_reclaimed >= 2UL << order)
2883 break; 2892 order = sc.order = 0;
2884 } while (--sc.priority >= 0);
2885
2886out:
2887 if (!pgdat_is_balanced) {
2888 cond_resched();
2889 2893
2890 try_to_freeze(); 2894 /* Check if kswapd should be suspending */
2895 if (try_to_freeze() || kthread_should_stop())
2896 break;
2891 2897
2892 /* 2898 /*
2893 * Fragmentation may mean that the system cannot be 2899 * Raise priority if scanning rate is too low or there was no
2894 * rebalanced for high-order allocations in all zones. 2900 * progress in reclaiming pages
2895 * At this point, if nr_reclaimed < SWAP_CLUSTER_MAX,
2896 * it means the zones have been fully scanned and are still
2897 * not balanced. For high-order allocations, there is
2898 * little point trying all over again as kswapd may
2899 * infinite loop.
2900 *
2901 * Instead, recheck all watermarks at order-0 as they
2902 * are the most important. If watermarks are ok, kswapd will go
2903 * back to sleep. High-order users can still perform direct
2904 * reclaim if they wish.
2905 */ 2901 */
2906 if (sc.nr_reclaimed < SWAP_CLUSTER_MAX) 2902 if (raise_priority || !sc.nr_reclaimed)
2907 order = sc.order = 0; 2903 sc.priority--;
2908 2904 } while (sc.priority >= 0 &&
2909 goto loop_again; 2905 !pgdat_balanced(pgdat, order, *classzone_idx));
2910 }
2911 2906
2912 /* 2907 /*
2913 * If kswapd was reclaiming at a higher order, it has the option of 2908 * If kswapd was reclaiming at a higher order, it has the option of
@@ -2936,6 +2931,7 @@ out:
2936 compact_pgdat(pgdat, order); 2931 compact_pgdat(pgdat, order);
2937 } 2932 }
2938 2933
2934out:
2939 /* 2935 /*
2940 * Return the order we were reclaiming at so prepare_kswapd_sleep() 2936 * Return the order we were reclaiming at so prepare_kswapd_sleep()
2941 * makes a decision on the order we were last reclaiming at. However, 2937 * makes a decision on the order we were last reclaiming at. However,