mm: remove reclaim and compaction retry approximations

If per-zone LRU accounting is available then there is no point approximating whether reclaim and compaction should retry based on pgdat statistics. This is effectively a revert of "mm, vmstat: remove zone and node double accounting by approximating retries" with the difference that inactive/active stats are still available. This preserves the history of why the approximation was retried and why it had to be reverted to handle OOM kills on 32-bit systems. Link: http://lkml.kernel.org/r/1469110261-7365-4-git-send-email-mgorman@techsingularity.net Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Minchan Kim <minchan@kernel.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Mel Gorman <mgorman@techsingularity.net> 2016-07-28 18:47:31 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2016-07-28 19:07:41 -0400
commit: 5a1c84b404a7176b8b36e2a0041b6f0adb3151a3 (patch)
tree: ff98e242c5d4d3a24ca49f6ddc707028aeb938f9
parent: bb4cc2bea6df7854d629bff114ca03237cc718d6 (diff)
8 files changed, 39 insertions, 58 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1a813ad335f4..ca0fbc483441 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -116,6 +116,7 @@ enum zone_stat_item {
        NR_ZONE_INACTIVE_FILE,
        NR_ZONE_ACTIVE_FILE,
        NR_ZONE_UNEVICTABLE,
+        NR_ZONE_WRITE_PENDING,  /* Count of dirty, writeback and unstable pages */
        NR_MLOCK,               /* mlock()ed pages found and moved off LRU */
        NR_SLAB_RECLAIMABLE,
        NR_SLAB_UNRECLAIMABLE,
diff --git a/include/linux/swap.h b/include/linux/swap.h
index cc753c639e3d..b17cc4830fa6 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -307,6 +307,7 @@ extern void lru_cache_add_active_or_unevictable(struct page *page,
                                                struct vm_area_struct *vma);
 /* linux/mm/vmscan.c */
+extern unsigned long zone_reclaimable_pages(struct zone *zone);
 extern unsigned long pgdat_reclaimable_pages(struct pglist_data *pgdat);
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
                                        gfp_t gfp_mask, nodemask_t *mask);
diff --git a/mm/compaction.c b/mm/compaction.c
index cd93ea24c565..e5995f38d677 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1438,11 +1438,6 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
 {
        struct zone *zone;
        struct zoneref *z;
-        pg_data_t *last_pgdat = NULL;
-        /* Do not retry compaction for zone-constrained allocations */
-        if (ac->high_zoneidx < ZONE_NORMAL)
-                return false;
        /*
         * Make sure at least one zone would pass __compaction_suitable if we continue
@@ -1453,27 +1448,14 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
                unsigned long available;
                enum compact_result compact_result;
-                if (last_pgdat == zone->zone_pgdat)
-                        continue;
-                /*
-                 * This over-estimates the number of pages available for
-                 * reclaim/compaction but walking the LRU would take too
-                 * long. The consequences are that compaction may retry
-                 * longer than it should for a zone-constrained allocation
-                 * request.
-                 */
-                last_pgdat = zone->zone_pgdat;
-                available = pgdat_reclaimable_pages(zone->zone_pgdat) / order;
                /*
                 * Do not consider all the reclaimable memory because we do not
                 * want to trash just for a single high order allocation which
                 * is even not guaranteed to appear even if __compaction_suitable
                 * is happy about the watermark check.
                 */
+                available = zone_reclaimable_pages(zone) / order;
                available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
-                available = min(zone->managed_pages, available);
                compact_result = __compaction_suitable(zone, order, alloc_flags,
                                ac_classzone_idx(ac), available);
                if (compact_result != COMPACT_SKIPPED &&
diff --git a/mm/migrate.c b/mm/migrate.c
index ed2f85e61de1..ed0268268e93 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -513,7 +513,9 @@ int migrate_page_move_mapping(struct address_space *mapping,
                }
                if (dirty && mapping_cap_account_dirty(mapping)) {
                        __dec_node_state(oldzone->zone_pgdat, NR_FILE_DIRTY);
+                        __dec_zone_state(oldzone, NR_ZONE_WRITE_PENDING);
                        __inc_node_state(newzone->zone_pgdat, NR_FILE_DIRTY);
+                        __inc_zone_state(newzone, NR_ZONE_WRITE_PENDING);
                }
        }
        local_irq_enable();
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 7b5920a3500f..f4cd7d8005c9 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2462,6 +2462,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
                mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_DIRTY);
                __inc_node_page_state(page, NR_FILE_DIRTY);
+                __inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
                __inc_node_page_state(page, NR_DIRTIED);
                __inc_wb_stat(wb, WB_RECLAIMABLE);
                __inc_wb_stat(wb, WB_DIRTIED);
@@ -2483,6 +2484,7 @@ void account_page_cleaned(struct page *page, struct address_space *mapping,
        if (mapping_cap_account_dirty(mapping)) {
                mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY);
                dec_node_page_state(page, NR_FILE_DIRTY);
+                dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
                dec_wb_stat(wb, WB_RECLAIMABLE);
                task_io_account_cancelled_write(PAGE_SIZE);
        }
@@ -2739,6 +2741,7 @@ int clear_page_dirty_for_io(struct page *page)
                if (TestClearPageDirty(page)) {
                        mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY);
                        dec_node_page_state(page, NR_FILE_DIRTY);
+                        dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
                        dec_wb_stat(wb, WB_RECLAIMABLE);
                        ret = 1;
                }
@@ -2785,6 +2788,7 @@ int test_clear_page_writeback(struct page *page)
        if (ret) {
                mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
                dec_node_page_state(page, NR_WRITEBACK);
+                dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
                inc_node_page_state(page, NR_WRITTEN);
        }
        unlock_page_memcg(page);
@@ -2839,6 +2843,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
        if (!ret) {
                mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
                inc_node_page_state(page, NR_WRITEBACK);
+                inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
        }
        unlock_page_memcg(page);
        return ret;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 759cfa8cbbeb..dfdb608f7b3d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3402,7 +3402,6 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
 {
        struct zone *zone;
        struct zoneref *z;
-        pg_data_t *current_pgdat = NULL;
        /*
         * Make sure we converge to OOM if we cannot make any progress
@@ -3412,15 +3411,6 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
                return false;
        /*
-         * Blindly retry lowmem allocation requests that are often ignored by
-         * the OOM killer up to MAX_RECLAIM_RETRIES as we not have a reliable
-         * and fast means of calculating reclaimable, dirty and writeback pages
-         * in eligible zones.
-         */
-        if (ac->high_zoneidx < ZONE_NORMAL)
-                goto out;
-        /*
         * Keep reclaiming pages while there is a chance this will lead
         * somewhere.  If none of the target zones can satisfy our allocation
         * request even if all reclaimable pages are considered then we are
@@ -3430,38 +3420,18 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
                                        ac->nodemask) {
                unsigned long available;
                unsigned long reclaimable;
-                int zid;
-                if (current_pgdat == zone->zone_pgdat)
+                available = reclaimable = zone_reclaimable_pages(zone);
-                        continue;
-                current_pgdat = zone->zone_pgdat;
-                available = reclaimable = pgdat_reclaimable_pages(current_pgdat);
                available -= DIV_ROUND_UP(no_progress_loops * available,
                                          MAX_RECLAIM_RETRIES);
+                available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
-                /* Account for all free pages on eligible zones */
-                for (zid = 0; zid <= zone_idx(zone); zid++) {
-                        struct zone *acct_zone = &current_pgdat->node_zones[zid];
-                        available += zone_page_state_snapshot(acct_zone, NR_FREE_PAGES);
-                }
                /*
                 * Would the allocation succeed if we reclaimed the whole
-                 * available? This is approximate because there is no
+                 * available?
-                 * accurate count of reclaimable pages per zone.
                 */
-                for (zid = 0; zid <= zone_idx(zone); zid++) {
+                if (__zone_watermark_ok(zone, order, min_wmark_pages(zone),
-                        struct zone *check_zone = &current_pgdat->node_zones[zid];
+                                ac_classzone_idx(ac), alloc_flags, available)) {
-                        unsigned long estimate;
-                        estimate = min(check_zone->managed_pages, available);
-                        if (!__zone_watermark_ok(check_zone, order,
-                                        min_wmark_pages(check_zone), ac_classzone_idx(ac),
-                                        alloc_flags, estimate))
-                                continue;
                        /*
                         * If we didn't make any progress and have a lot of
                         * dirty + writeback pages then we should wait for
@@ -3471,16 +3441,15 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
                        if (!did_some_progress) {
                                unsigned long write_pending;
-                                write_pending =
+                                write_pending = zone_page_state_snapshot(zone,
-                                        node_page_state(current_pgdat, NR_WRITEBACK) +
+                                                        NR_ZONE_WRITE_PENDING);
-                                        node_page_state(current_pgdat, NR_FILE_DIRTY);
                                if (2 * write_pending > reclaimable) {
                                        congestion_wait(BLK_RW_ASYNC, HZ/10);
                                        return true;
                                }
                        }
-out:
                        /*
                         * Memory allocation/reclaim might be called from a WQ
                         * context and the current implementation of the WQ
@@ -4361,6 +4330,7 @@ void show_free_areas(unsigned int filter)
                        " active_file:%lukB"
                        " inactive_file:%lukB"
                        " unevictable:%lukB"
+                        " writepending:%lukB"
                        " present:%lukB"
                        " managed:%lukB"
                        " mlocked:%lukB"
@@ -4383,6 +4353,7 @@ void show_free_areas(unsigned int filter)
                        K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE)),
                        K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE)),
                        K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)),
+                        K(zone_page_state(zone, NR_ZONE_WRITE_PENDING)),
                        K(zone->present_pages),
                        K(zone->managed_pages),
                        K(zone_page_state(zone, NR_MLOCK)),
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 222d5403dd4b..134381a20099 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -194,6 +194,24 @@ static bool sane_reclaim(struct scan_control *sc)
 }
 #endif
+/*
+ * This misses isolated pages which are not accounted for to save counters.
+ * As the data only determines if reclaim or compaction continues, it is
+ * not expected that isolated pages will be a dominating factor.
+ */
+unsigned long zone_reclaimable_pages(struct zone *zone)
+{
+        unsigned long nr;
+        nr = zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_FILE) +
+                zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_FILE);
+        if (get_nr_swap_pages() > 0)
+                nr += zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_ANON) +
+                        zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_ANON);
+        return nr;
+}
 unsigned long pgdat_reclaimable_pages(struct pglist_data *pgdat)
 {
        unsigned long nr;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 053075ac67b8..89cec42d19ff 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -926,6 +926,7 @@ const char * const vmstat_text[] = {
        "nr_zone_inactive_file",
        "nr_zone_active_file",
        "nr_zone_unevictable",
+        "nr_zone_write_pending",
        "nr_mlock",
        "nr_slab_reclaimable",
        "nr_slab_unreclaimable",
author	Mel Gorman <mgorman@techsingularity.net>	2016-07-28 18:47:31 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2016-07-28 19:07:41 -0400
commit	5a1c84b404a7176b8b36e2a0041b6f0adb3151a3 (patch)
tree	ff98e242c5d4d3a24ca49f6ddc707028aeb938f9
parent	bb4cc2bea6df7854d629bff114ca03237cc718d6 (diff)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 1a813ad335f4..ca0fbc483441 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h
@@ -116,6 +116,7 @@ enum zone_stat_item {
116	NR_ZONE_INACTIVE_FILE,	116	NR_ZONE_INACTIVE_FILE,
117	NR_ZONE_ACTIVE_FILE,	117	NR_ZONE_ACTIVE_FILE,
118	NR_ZONE_UNEVICTABLE,	118	NR_ZONE_UNEVICTABLE,
		119	NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */
119	NR_MLOCK, /* mlock()ed pages found and moved off LRU */	120	NR_MLOCK, /* mlock()ed pages found and moved off LRU */
120	NR_SLAB_RECLAIMABLE,	121	NR_SLAB_RECLAIMABLE,
121	NR_SLAB_UNRECLAIMABLE,	122	NR_SLAB_UNRECLAIMABLE,


diff --git a/include/linux/swap.h b/include/linux/swap.h index cc753c639e3d..b17cc4830fa6 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h
@@ -307,6 +307,7 @@ extern void lru_cache_add_active_or_unevictable(struct page *page,
307	struct vm_area_struct *vma);	307	struct vm_area_struct *vma);
308		308
309	/* linux/mm/vmscan.c */	309	/* linux/mm/vmscan.c */
		310	extern unsigned long zone_reclaimable_pages(struct zone *zone);
310	extern unsigned long pgdat_reclaimable_pages(struct pglist_data *pgdat);	311	extern unsigned long pgdat_reclaimable_pages(struct pglist_data *pgdat);
311	extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,	312	extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
312	gfp_t gfp_mask, nodemask_t *mask);	313	gfp_t gfp_mask, nodemask_t *mask);


diff --git a/mm/compaction.c b/mm/compaction.c index cd93ea24c565..e5995f38d677 100644 --- a/mm/compaction.c +++ b/mm/compaction.c
@@ -1438,11 +1438,6 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
1438	{	1438	{
1439	struct zone *zone;	1439	struct zone *zone;
1440	struct zoneref *z;	1440	struct zoneref *z;
1441	pg_data_t *last_pgdat = NULL;
1442
1443	/* Do not retry compaction for zone-constrained allocations */
1444	if (ac->high_zoneidx < ZONE_NORMAL)
1445	return false;
1446		1441
1447	/*	1442	/*
1448	* Make sure at least one zone would pass __compaction_suitable if we continue	1443	* Make sure at least one zone would pass __compaction_suitable if we continue
@@ -1453,27 +1448,14 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
1453	unsigned long available;	1448	unsigned long available;
1454	enum compact_result compact_result;	1449	enum compact_result compact_result;
1455		1450
1456	if (last_pgdat == zone->zone_pgdat)
1457	continue;
1458
1459	/*
1460	* This over-estimates the number of pages available for
1461	* reclaim/compaction but walking the LRU would take too
1462	* long. The consequences are that compaction may retry
1463	* longer than it should for a zone-constrained allocation
1464	* request.
1465	*/
1466	last_pgdat = zone->zone_pgdat;
1467	available = pgdat_reclaimable_pages(zone->zone_pgdat) / order;
1468
1469	/*	1451	/*
1470	* Do not consider all the reclaimable memory because we do not	1452	* Do not consider all the reclaimable memory because we do not
1471	* want to trash just for a single high order allocation which	1453	* want to trash just for a single high order allocation which
1472	* is even not guaranteed to appear even if __compaction_suitable	1454	* is even not guaranteed to appear even if __compaction_suitable
1473	* is happy about the watermark check.	1455	* is happy about the watermark check.
1474	*/	1456	*/
		1457	available = zone_reclaimable_pages(zone) / order;
1475	available += zone_page_state_snapshot(zone, NR_FREE_PAGES);	1458	available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
1476	available = min(zone->managed_pages, available);
1477	compact_result = __compaction_suitable(zone, order, alloc_flags,	1459	compact_result = __compaction_suitable(zone, order, alloc_flags,
1478	ac_classzone_idx(ac), available);	1460	ac_classzone_idx(ac), available);
1479	if (compact_result != COMPACT_SKIPPED &&	1461	if (compact_result != COMPACT_SKIPPED &&


diff --git a/mm/migrate.c b/mm/migrate.c index ed2f85e61de1..ed0268268e93 100644 --- a/mm/migrate.c +++ b/mm/migrate.c
@@ -513,7 +513,9 @@ int migrate_page_move_mapping(struct address_space *mapping,
513	}	513	}
514	if (dirty && mapping_cap_account_dirty(mapping)) {	514	if (dirty && mapping_cap_account_dirty(mapping)) {
515	__dec_node_state(oldzone->zone_pgdat, NR_FILE_DIRTY);	515	__dec_node_state(oldzone->zone_pgdat, NR_FILE_DIRTY);
		516	__dec_zone_state(oldzone, NR_ZONE_WRITE_PENDING);
516	__inc_node_state(newzone->zone_pgdat, NR_FILE_DIRTY);	517	__inc_node_state(newzone->zone_pgdat, NR_FILE_DIRTY);
		518	__inc_zone_state(newzone, NR_ZONE_WRITE_PENDING);
517	}	519	}
518	}	520	}
519	local_irq_enable();	521	local_irq_enable();


diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 7b5920a3500f..f4cd7d8005c9 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c
@@ -2462,6 +2462,7 @@ void account_page_dirtied(struct page page, struct address_space mapping)
2462		2462
2463	mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_DIRTY);	2463	mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_DIRTY);
2464	__inc_node_page_state(page, NR_FILE_DIRTY);	2464	__inc_node_page_state(page, NR_FILE_DIRTY);
		2465	__inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2465	__inc_node_page_state(page, NR_DIRTIED);	2466	__inc_node_page_state(page, NR_DIRTIED);
2466	__inc_wb_stat(wb, WB_RECLAIMABLE);	2467	__inc_wb_stat(wb, WB_RECLAIMABLE);
2467	__inc_wb_stat(wb, WB_DIRTIED);	2468	__inc_wb_stat(wb, WB_DIRTIED);
@@ -2483,6 +2484,7 @@ void account_page_cleaned(struct page page, struct address_space mapping,
2483	if (mapping_cap_account_dirty(mapping)) {	2484	if (mapping_cap_account_dirty(mapping)) {
2484	mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY);	2485	mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY);
2485	dec_node_page_state(page, NR_FILE_DIRTY);	2486	dec_node_page_state(page, NR_FILE_DIRTY);
		2487	dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2486	dec_wb_stat(wb, WB_RECLAIMABLE);	2488	dec_wb_stat(wb, WB_RECLAIMABLE);
2487	task_io_account_cancelled_write(PAGE_SIZE);	2489	task_io_account_cancelled_write(PAGE_SIZE);
2488	}	2490	}
@@ -2739,6 +2741,7 @@ int clear_page_dirty_for_io(struct page *page)
2739	if (TestClearPageDirty(page)) {	2741	if (TestClearPageDirty(page)) {
2740	mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY);	2742	mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY);
2741	dec_node_page_state(page, NR_FILE_DIRTY);	2743	dec_node_page_state(page, NR_FILE_DIRTY);
		2744	dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2742	dec_wb_stat(wb, WB_RECLAIMABLE);	2745	dec_wb_stat(wb, WB_RECLAIMABLE);
2743	ret = 1;	2746	ret = 1;
2744	}	2747	}
@@ -2785,6 +2788,7 @@ int test_clear_page_writeback(struct page *page)
2785	if (ret) {	2788	if (ret) {
2786	mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);	2789	mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
2787	dec_node_page_state(page, NR_WRITEBACK);	2790	dec_node_page_state(page, NR_WRITEBACK);
		2791	dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2788	inc_node_page_state(page, NR_WRITTEN);	2792	inc_node_page_state(page, NR_WRITTEN);
2789	}	2793	}
2790	unlock_page_memcg(page);	2794	unlock_page_memcg(page);
@@ -2839,6 +2843,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
2839	if (!ret) {	2843	if (!ret) {
2840	mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);	2844	mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
2841	inc_node_page_state(page, NR_WRITEBACK);	2845	inc_node_page_state(page, NR_WRITEBACK);
		2846	inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2842	}	2847	}
2843	unlock_page_memcg(page);	2848	unlock_page_memcg(page);
2844	return ret;	2849	return ret;


diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 759cfa8cbbeb..dfdb608f7b3d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c
@@ -3402,7 +3402,6 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
3402	{	3402	{
3403	struct zone *zone;	3403	struct zone *zone;
3404	struct zoneref *z;	3404	struct zoneref *z;
3405	pg_data_t *current_pgdat = NULL;
3406		3405
3407	/*	3406	/*
3408	* Make sure we converge to OOM if we cannot make any progress	3407	* Make sure we converge to OOM if we cannot make any progress
@@ -3412,15 +3411,6 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
3412	return false;	3411	return false;
3413		3412
3414	/*	3413	/*
3415	* Blindly retry lowmem allocation requests that are often ignored by
3416	* the OOM killer up to MAX_RECLAIM_RETRIES as we not have a reliable
3417	* and fast means of calculating reclaimable, dirty and writeback pages
3418	* in eligible zones.
3419	*/
3420	if (ac->high_zoneidx < ZONE_NORMAL)
3421	goto out;
3422
3423	/*
3424	* Keep reclaiming pages while there is a chance this will lead	3414	* Keep reclaiming pages while there is a chance this will lead
3425	* somewhere. If none of the target zones can satisfy our allocation	3415	* somewhere. If none of the target zones can satisfy our allocation
3426	* request even if all reclaimable pages are considered then we are	3416	* request even if all reclaimable pages are considered then we are
@@ -3430,38 +3420,18 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
3430	ac->nodemask) {	3420	ac->nodemask) {
3431	unsigned long available;	3421	unsigned long available;
3432	unsigned long reclaimable;	3422	unsigned long reclaimable;
3433	int zid;
3434		3423
3435	if (current_pgdat == zone->zone_pgdat)	3424	available = reclaimable = zone_reclaimable_pages(zone);
3436	continue;
3437
3438	current_pgdat = zone->zone_pgdat;
3439	available = reclaimable = pgdat_reclaimable_pages(current_pgdat);
3440	available -= DIV_ROUND_UP(no_progress_loops * available,	3425	available -= DIV_ROUND_UP(no_progress_loops * available,
3441	MAX_RECLAIM_RETRIES);	3426	MAX_RECLAIM_RETRIES);
3442		3427	available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
3443	/* Account for all free pages on eligible zones */
3444	for (zid = 0; zid <= zone_idx(zone); zid++) {
3445	struct zone *acct_zone = &current_pgdat->node_zones[zid];
3446
3447	available += zone_page_state_snapshot(acct_zone, NR_FREE_PAGES);
3448	}
3449		3428
3450	/*	3429	/*
3451	* Would the allocation succeed if we reclaimed the whole	3430	* Would the allocation succeed if we reclaimed the whole
3452	* available? This is approximate because there is no	3431	* available?
3453	* accurate count of reclaimable pages per zone.
3454	*/	3432	*/
3455	for (zid = 0; zid <= zone_idx(zone); zid++) {	3433	if (__zone_watermark_ok(zone, order, min_wmark_pages(zone),
3456	struct zone *check_zone = &current_pgdat->node_zones[zid];	3434	ac_classzone_idx(ac), alloc_flags, available)) {
3457	unsigned long estimate;
3458
3459	estimate = min(check_zone->managed_pages, available);
3460	if (!__zone_watermark_ok(check_zone, order,
3461	min_wmark_pages(check_zone), ac_classzone_idx(ac),
3462	alloc_flags, estimate))
3463	continue;
3464
3465	/*	3435	/*
3466	* If we didn't make any progress and have a lot of	3436	* If we didn't make any progress and have a lot of
3467	* dirty + writeback pages then we should wait for	3437	* dirty + writeback pages then we should wait for
@@ -3471,16 +3441,15 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
3471	if (!did_some_progress) {	3441	if (!did_some_progress) {
3472	unsigned long write_pending;	3442	unsigned long write_pending;
3473		3443
3474	write_pending =	3444	write_pending = zone_page_state_snapshot(zone,
3475	node_page_state(current_pgdat, NR_WRITEBACK) +	3445	NR_ZONE_WRITE_PENDING);
3476	node_page_state(current_pgdat, NR_FILE_DIRTY);
3477		3446
3478	if (2 * write_pending > reclaimable) {	3447	if (2 * write_pending > reclaimable) {
3479	congestion_wait(BLK_RW_ASYNC, HZ/10);	3448	congestion_wait(BLK_RW_ASYNC, HZ/10);
3480	return true;	3449	return true;
3481	}	3450	}
3482	}	3451	}
3483	out:	3452
3484	/*	3453	/*
3485	* Memory allocation/reclaim might be called from a WQ	3454	* Memory allocation/reclaim might be called from a WQ
3486	* context and the current implementation of the WQ	3455	* context and the current implementation of the WQ
@@ -4361,6 +4330,7 @@ void show_free_areas(unsigned int filter)
4361	" active_file:%lukB"	4330	" active_file:%lukB"
4362	" inactive_file:%lukB"	4331	" inactive_file:%lukB"
4363	" unevictable:%lukB"	4332	" unevictable:%lukB"
		4333	" writepending:%lukB"
4364	" present:%lukB"	4334	" present:%lukB"
4365	" managed:%lukB"	4335	" managed:%lukB"
4366	" mlocked:%lukB"	4336	" mlocked:%lukB"
@@ -4383,6 +4353,7 @@ void show_free_areas(unsigned int filter)
4383	K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE)),	4353	K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE)),
4384	K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE)),	4354	K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE)),
4385	K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)),	4355	K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)),
		4356	K(zone_page_state(zone, NR_ZONE_WRITE_PENDING)),
4386	K(zone->present_pages),	4357	K(zone->present_pages),
4387	K(zone->managed_pages),	4358	K(zone->managed_pages),
4388	K(zone_page_state(zone, NR_MLOCK)),	4359	K(zone_page_state(zone, NR_MLOCK)),


diff --git a/mm/vmscan.c b/mm/vmscan.c index 222d5403dd4b..134381a20099 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c
@@ -194,6 +194,24 @@ static bool sane_reclaim(struct scan_control *sc)
194	}	194	}
195	#endif	195	#endif
196		196
		197	/*
		198	* This misses isolated pages which are not accounted for to save counters.
		199	* As the data only determines if reclaim or compaction continues, it is
		200	* not expected that isolated pages will be a dominating factor.
		201	*/
		202	unsigned long zone_reclaimable_pages(struct zone *zone)
		203	{
		204	unsigned long nr;
		205
		206	nr = zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_FILE) +
		207	zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_FILE);
		208	if (get_nr_swap_pages() > 0)
		209	nr += zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_ANON) +
		210	zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_ANON);
		211
		212	return nr;
		213	}
		214
197	unsigned long pgdat_reclaimable_pages(struct pglist_data *pgdat)	215	unsigned long pgdat_reclaimable_pages(struct pglist_data *pgdat)
198	{	216	{
199	unsigned long nr;	217	unsigned long nr;


diff --git a/mm/vmstat.c b/mm/vmstat.c index 053075ac67b8..89cec42d19ff 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c
@@ -926,6 +926,7 @@ const char * const vmstat_text[] = {
926	"nr_zone_inactive_file",	926	"nr_zone_inactive_file",
927	"nr_zone_active_file",	927	"nr_zone_active_file",
928	"nr_zone_unevictable",	928	"nr_zone_unevictable",
		929	"nr_zone_write_pending",
929	"nr_mlock",	930	"nr_mlock",
930	"nr_slab_reclaimable",	931	"nr_slab_reclaimable",
931	"nr_slab_unreclaimable",	932	"nr_slab_unreclaimable",