aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorZlatko Calusic <zlatko.calusic@iskon.hr>2012-12-19 18:25:13 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-20 10:06:56 -0500
commitcda73a10eb3f493871ed39f468db50a65ebeddce (patch)
treefe486de989b58130ba20d534606f1091e367c8ec /mm
parentf01af9f85855e38fbd601e033a8eac204cc4cc1c (diff)
mm: do not sleep in balance_pgdat if there's no i/o congestion
On a 4GB RAM machine, where Normal zone is much smaller than DMA32 zone, the Normal zone gets fragmented in time. This requires relatively more pressure in balance_pgdat to get the zone above the required watermark. Unfortunately, the congestion_wait() call in there slows it down for a completely wrong reason, expecting that there's a lot of writeback/swapout, even when there's none (much more common). After a few days, when fragmentation progresses, this flawed logic translates to a very high CPU iowait times, even though there's no I/O congestion at all. If THP is enabled, the problem occurs sooner, but I was able to see it even on !THP kernels, just by giving it a bit more time to occur. The proper way to deal with this is to not wait, unless there's congestion. Thanks to Mel Gorman, we already have the function that perfectly fits the job. The patch was tested on a machine which nicely revealed the problem after only 1 day of uptime, and it's been working great. Signed-off-by: Zlatko Calusic <zlatko.calusic@iskon.hr> Acked-by: Mel Gorman <mgorman@suse.de> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/vmscan.c12
1 files changed, 6 insertions, 6 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 828530e2794a..adc7e9058181 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2570,7 +2570,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining,
2570static unsigned long balance_pgdat(pg_data_t *pgdat, int order, 2570static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
2571 int *classzone_idx) 2571 int *classzone_idx)
2572{ 2572{
2573 int all_zones_ok; 2573 struct zone *unbalanced_zone;
2574 unsigned long balanced; 2574 unsigned long balanced;
2575 int i; 2575 int i;
2576 int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ 2576 int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
@@ -2604,7 +2604,7 @@ loop_again:
2604 unsigned long lru_pages = 0; 2604 unsigned long lru_pages = 0;
2605 int has_under_min_watermark_zone = 0; 2605 int has_under_min_watermark_zone = 0;
2606 2606
2607 all_zones_ok = 1; 2607 unbalanced_zone = NULL;
2608 balanced = 0; 2608 balanced = 0;
2609 2609
2610 /* 2610 /*
@@ -2743,7 +2743,7 @@ loop_again:
2743 } 2743 }
2744 2744
2745 if (!zone_balanced(zone, testorder, 0, end_zone)) { 2745 if (!zone_balanced(zone, testorder, 0, end_zone)) {
2746 all_zones_ok = 0; 2746 unbalanced_zone = zone;
2747 /* 2747 /*
2748 * We are still under min water mark. This 2748 * We are still under min water mark. This
2749 * means that we have a GFP_ATOMIC allocation 2749 * means that we have a GFP_ATOMIC allocation
@@ -2776,7 +2776,7 @@ loop_again:
2776 pfmemalloc_watermark_ok(pgdat)) 2776 pfmemalloc_watermark_ok(pgdat))
2777 wake_up(&pgdat->pfmemalloc_wait); 2777 wake_up(&pgdat->pfmemalloc_wait);
2778 2778
2779 if (all_zones_ok || (order && pgdat_balanced(pgdat, balanced, *classzone_idx))) 2779 if (!unbalanced_zone || (order && pgdat_balanced(pgdat, balanced, *classzone_idx)))
2780 break; /* kswapd: all done */ 2780 break; /* kswapd: all done */
2781 /* 2781 /*
2782 * OK, kswapd is getting into trouble. Take a nap, then take 2782 * OK, kswapd is getting into trouble. Take a nap, then take
@@ -2786,7 +2786,7 @@ loop_again:
2786 if (has_under_min_watermark_zone) 2786 if (has_under_min_watermark_zone)
2787 count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT); 2787 count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT);
2788 else 2788 else
2789 congestion_wait(BLK_RW_ASYNC, HZ/10); 2789 wait_iff_congested(unbalanced_zone, BLK_RW_ASYNC, HZ/10);
2790 } 2790 }
2791 2791
2792 /* 2792 /*
@@ -2805,7 +2805,7 @@ out:
2805 * high-order: Balanced zones must make up at least 25% of the node 2805 * high-order: Balanced zones must make up at least 25% of the node
2806 * for the node to be balanced 2806 * for the node to be balanced
2807 */ 2807 */
2808 if (!(all_zones_ok || (order && pgdat_balanced(pgdat, balanced, *classzone_idx)))) { 2808 if (unbalanced_zone && (!order || !pgdat_balanced(pgdat, balanced, *classzone_idx))) {
2809 cond_resched(); 2809 cond_resched();
2810 2810
2811 try_to_freeze(); 2811 try_to_freeze();