summaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorMel Gorman <mgorman@techsingularity.net>2016-07-28 18:47:05 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-28 19:07:41 -0400
commitbca6759258dbef378bcf5b872177bcd2259ceb68 (patch)
tree17b2a1307ab70e4fb6f8f2cf4b535240b2433d5d /mm/page_alloc.c
parente2ecc8a79ed49f7838b4fdf352c4c48cec9424ac (diff)
mm, vmstat: remove zone and node double accounting by approximating retries
The number of LRU pages, dirty pages and writeback pages must be accounted for on both zones and nodes because of the reclaim retry logic, compaction retry logic and highmem calculations all depending on per-zone stats. Many lowmem allocations are immune from OOM kill due to a check in __alloc_pages_may_oom for (ac->high_zoneidx < ZONE_NORMAL) since commit 03668b3ceb0c ("oom: avoid oom killer for lowmem allocations"). The exception is costly high-order allocations or allocations that cannot fail. If the __alloc_pages_may_oom avoids OOM-kill for low-order lowmem allocations then it would fall through to __alloc_pages_direct_compact. This patch will blindly retry reclaim for zone-constrained allocations in should_reclaim_retry up to MAX_RECLAIM_RETRIES. This is not ideal but without per-zone stats there are not many alternatives. The impact it that zone-constrained allocations may delay before considering the OOM killer. As there is no guarantee enough memory can ever be freed to satisfy compaction, this patch avoids retrying compaction for zone-contrained allocations. In combination, that means that the per-node stats can be used when deciding whether to continue reclaim using a rough approximation. While it is possible this will make the wrong decision on occasion, it will not infinite loop as the number of reclaim attempts is capped by MAX_RECLAIM_RETRIES. The final step is calculating the number of dirtyable highmem pages. As those calculations only care about the global count of file pages in highmem. This patch uses a global counter used instead of per-zone stats as it is sufficient. In combination, this allows the per-zone LRU and dirty state counters to be removed. [mgorman@techsingularity.net: fix acct_highmem_file_pages()] Link: http://lkml.kernel.org/r/1468853426-12858-4-git-send-email-mgorman@techsingularity.netLink: http://lkml.kernel.org/r/1467970510-21195-35-git-send-email-mgorman@techsingularity.net Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Suggested by: Michal Hocko <mhocko@kernel.org> Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Minchan Kim <minchan@kernel.org> Cc: Rik van Riel <riel@surriel.com> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c55
1 files changed, 43 insertions, 12 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 03e67f2dfdaa..f1b5a0bc11f2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3402,6 +3402,7 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
3402{ 3402{
3403 struct zone *zone; 3403 struct zone *zone;
3404 struct zoneref *z; 3404 struct zoneref *z;
3405 pg_data_t *current_pgdat = NULL;
3405 3406
3406 /* 3407 /*
3407 * Make sure we converge to OOM if we cannot make any progress 3408 * Make sure we converge to OOM if we cannot make any progress
@@ -3411,27 +3412,56 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
3411 return false; 3412 return false;
3412 3413
3413 /* 3414 /*
3414 * Keep reclaiming pages while there is a chance this will lead somewhere. 3415 * Blindly retry lowmem allocation requests that are often ignored by
3415 * If none of the target zones can satisfy our allocation request even 3416 * the OOM killer up to MAX_RECLAIM_RETRIES as we not have a reliable
3416 * if all reclaimable pages are considered then we are screwed and have 3417 * and fast means of calculating reclaimable, dirty and writeback pages
3417 * to go OOM. 3418 * in eligible zones.
3419 */
3420 if (ac->high_zoneidx < ZONE_NORMAL)
3421 goto out;
3422
3423 /*
3424 * Keep reclaiming pages while there is a chance this will lead
3425 * somewhere. If none of the target zones can satisfy our allocation
3426 * request even if all reclaimable pages are considered then we are
3427 * screwed and have to go OOM.
3418 */ 3428 */
3419 for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx, 3429 for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
3420 ac->nodemask) { 3430 ac->nodemask) {
3421 unsigned long available; 3431 unsigned long available;
3422 unsigned long reclaimable; 3432 unsigned long reclaimable;
3433 int zid;
3423 3434
3424 available = reclaimable = zone_reclaimable_pages(zone); 3435 if (current_pgdat == zone->zone_pgdat)
3436 continue;
3437
3438 current_pgdat = zone->zone_pgdat;
3439 available = reclaimable = pgdat_reclaimable_pages(current_pgdat);
3425 available -= DIV_ROUND_UP(no_progress_loops * available, 3440 available -= DIV_ROUND_UP(no_progress_loops * available,
3426 MAX_RECLAIM_RETRIES); 3441 MAX_RECLAIM_RETRIES);
3427 available += zone_page_state_snapshot(zone, NR_FREE_PAGES); 3442
3443 /* Account for all free pages on eligible zones */
3444 for (zid = 0; zid <= zone_idx(zone); zid++) {
3445 struct zone *acct_zone = &current_pgdat->node_zones[zid];
3446
3447 available += zone_page_state_snapshot(acct_zone, NR_FREE_PAGES);
3448 }
3428 3449
3429 /* 3450 /*
3430 * Would the allocation succeed if we reclaimed the whole 3451 * Would the allocation succeed if we reclaimed the whole
3431 * available? 3452 * available? This is approximate because there is no
3453 * accurate count of reclaimable pages per zone.
3432 */ 3454 */
3433 if (__zone_watermark_ok(zone, order, min_wmark_pages(zone), 3455 for (zid = 0; zid <= zone_idx(zone); zid++) {
3434 ac_classzone_idx(ac), alloc_flags, available)) { 3456 struct zone *check_zone = &current_pgdat->node_zones[zid];
3457 unsigned long estimate;
3458
3459 estimate = min(check_zone->managed_pages, available);
3460 if (!__zone_watermark_ok(check_zone, order,
3461 min_wmark_pages(check_zone), ac_classzone_idx(ac),
3462 alloc_flags, estimate))
3463 continue;
3464
3435 /* 3465 /*
3436 * If we didn't make any progress and have a lot of 3466 * If we didn't make any progress and have a lot of
3437 * dirty + writeback pages then we should wait for 3467 * dirty + writeback pages then we should wait for
@@ -3441,15 +3471,16 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
3441 if (!did_some_progress) { 3471 if (!did_some_progress) {
3442 unsigned long write_pending; 3472 unsigned long write_pending;
3443 3473
3444 write_pending = zone_page_state_snapshot(zone, 3474 write_pending =
3445 NR_ZONE_WRITE_PENDING); 3475 node_page_state(current_pgdat, NR_WRITEBACK) +
3476 node_page_state(current_pgdat, NR_FILE_DIRTY);
3446 3477
3447 if (2 * write_pending > reclaimable) { 3478 if (2 * write_pending > reclaimable) {
3448 congestion_wait(BLK_RW_ASYNC, HZ/10); 3479 congestion_wait(BLK_RW_ASYNC, HZ/10);
3449 return true; 3480 return true;
3450 } 3481 }
3451 } 3482 }
3452 3483out:
3453 /* 3484 /*
3454 * Memory allocation/reclaim might be called from a WQ 3485 * Memory allocation/reclaim might be called from a WQ
3455 * context and the current implementation of the WQ 3486 * context and the current implementation of the WQ