1 files changed, 145 insertions, 102 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 40e29429e7b0..1b849500640c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1032,11 +1032,9 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
 static int fallbacks[MIGRATE_TYPES][4] = {
        [MIGRATE_UNMOVABLE]   = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE,     MIGRATE_RESERVE },
        [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE,   MIGRATE_MOVABLE,     MIGRATE_RESERVE },
+        [MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE,   MIGRATE_RESERVE },
 #ifdef CONFIG_CMA
-        [MIGRATE_MOVABLE]     = { MIGRATE_CMA,         MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE },
        [MIGRATE_CMA]         = { MIGRATE_RESERVE }, /* Never used */
-#else
-        [MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE,   MIGRATE_RESERVE },
 #endif
        [MIGRATE_RESERVE]     = { MIGRATE_RESERVE }, /* Never used */
 #ifdef CONFIG_MEMORY_ISOLATION
@@ -1044,6 +1042,17 @@ static int fallbacks[MIGRATE_TYPES][4] = {
 #endif
 };
+#ifdef CONFIG_CMA
+static struct page *__rmqueue_cma_fallback(struct zone *zone,
+                                        unsigned int order)
+{
+        return __rmqueue_smallest(zone, order, MIGRATE_CMA);
+}
+#else
+static inline struct page *__rmqueue_cma_fallback(struct zone *zone,
+                                        unsigned int order) { return NULL; }
+#endif
 /*
 * Move the free pages in a range to the free lists of the requested type.
 * Note that start_page and end_pages are not aligned on a pageblock
@@ -1136,14 +1145,40 @@ static void change_pageblock_range(struct page *pageblock_page,
 * as fragmentation caused by those allocations polluting movable pageblocks
 * is worse than movable allocations stealing from unmovable and reclaimable
 * pageblocks.
- *
- * If we claim more than half of the pageblock, change pageblock's migratetype
- * as well.
 */
-static void try_to_steal_freepages(struct zone *zone, struct page *page,
+static bool can_steal_fallback(unsigned int order, int start_mt)
-                                  int start_type, int fallback_type)
+{
+        /*
+         * Leaving this order check is intended, although there is
+         * relaxed order check in next check. The reason is that
+         * we can actually steal whole pageblock if this condition met,
+         * but, below check doesn't guarantee it and that is just heuristic
+         * so could be changed anytime.
+         */
+        if (order >= pageblock_order)
+                return true;
+        if (order >= pageblock_order / 2 ||
+                start_mt == MIGRATE_RECLAIMABLE ||
+                start_mt == MIGRATE_UNMOVABLE ||
+                page_group_by_mobility_disabled)
+                return true;
+        return false;
+}
+/*
+ * This function implements actual steal behaviour. If order is large enough,
+ * we can steal whole pageblock. If not, we first move freepages in this
+ * pageblock and check whether half of pages are moved or not. If half of
+ * pages are moved, we can change migratetype of pageblock and permanently
+ * use it's pages as requested migratetype in the future.
+ */
+static void steal_suitable_fallback(struct zone *zone, struct page *page,
+                                                          int start_type)
 {
        int current_order = page_order(page);
+        int pages;
        /* Take ownership for orders >= pageblock_order */
        if (current_order >= pageblock_order) {
@@ -1151,19 +1186,49 @@ static void try_to_steal_freepages(struct zone *zone, struct page *page,
                return;
        }
-        if (current_order >= pageblock_order / 2 ||
+        pages = move_freepages_block(zone, page, start_type);
-            start_type == MIGRATE_RECLAIMABLE ||
-            start_type == MIGRATE_UNMOVABLE ||
+        /* Claim the whole block if over half of it is free */
-            page_group_by_mobility_disabled) {
+        if (pages >= (1 << (pageblock_order-1)) ||
-                int pages;
+                        page_group_by_mobility_disabled)
+                set_pageblock_migratetype(page, start_type);
+}
+/*
+ * Check whether there is a suitable fallback freepage with requested order.
+ * If only_stealable is true, this function returns fallback_mt only if
+ * we can steal other freepages all together. This would help to reduce
+ * fragmentation due to mixed migratetype pages in one pageblock.
+ */
+int find_suitable_fallback(struct free_area *area, unsigned int order,
+                        int migratetype, bool only_stealable, bool *can_steal)
+{
+        int i;
+        int fallback_mt;
+        if (area->nr_free == 0)
+                return -1;
+        *can_steal = false;
+        for (i = 0;; i++) {
+                fallback_mt = fallbacks[migratetype][i];
+                if (fallback_mt == MIGRATE_RESERVE)
+                        break;
+                if (list_empty(&area->free_list[fallback_mt]))
+                        continue;
-                pages = move_freepages_block(zone, page, start_type);
+                if (can_steal_fallback(order, migratetype))
+                        *can_steal = true;
-                /* Claim the whole block if over half of it is free */
+                if (!only_stealable)
-                if (pages >= (1 << (pageblock_order-1)) ||
+                        return fallback_mt;
-                                page_group_by_mobility_disabled)
-                        set_pageblock_migratetype(page, start_type);
+                if (*can_steal)
+                        return fallback_mt;
        }
+        return -1;
 }
 /* Remove an element from the buddy allocator from the fallback list */
@@ -1173,64 +1238,45 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
        struct free_area *area;
        unsigned int current_order;
        struct page *page;
+        int fallback_mt;
+        bool can_steal;
        /* Find the largest possible block of pages in the other list */
        for (current_order = MAX_ORDER-1;
                                current_order >= order && current_order <= MAX_ORDER-1;
                                --current_order) {
-                int i;
+                area = &(zone->free_area[current_order]);
-                for (i = 0;; i++) {
+                fallback_mt = find_suitable_fallback(area, current_order,
-                        int migratetype = fallbacks[start_migratetype][i];
+                                start_migratetype, false, &can_steal);
-                        int buddy_type = start_migratetype;
+                if (fallback_mt == -1)
+                        continue;
-                        /* MIGRATE_RESERVE handled later if necessary */
-                        if (migratetype == MIGRATE_RESERVE)
-                                break;
-                        area = &(zone->free_area[current_order]);
-                        if (list_empty(&area->free_list[migratetype]))
-                                continue;
-                        page = list_entry(area->free_list[migratetype].next,
-                                        struct page, lru);
-                        area->nr_free--;
-                        if (!is_migrate_cma(migratetype)) {
-                                try_to_steal_freepages(zone, page,
-                                                        start_migratetype,
-                                                        migratetype);
-                        } else {
-                                /*
-                                 * When borrowing from MIGRATE_CMA, we need to
-                                 * release the excess buddy pages to CMA
-                                 * itself, and we do not try to steal extra
-                                 * free pages.
-                                 */
-                                buddy_type = migratetype;
-                        }
-                        /* Remove the page from the freelists */
+                page = list_entry(area->free_list[fallback_mt].next,
-                        list_del(&page->lru);
+                                                struct page, lru);
-                        rmv_page_order(page);
+                if (can_steal)
+                        steal_suitable_fallback(zone, page, start_migratetype);
-                        expand(zone, page, order, current_order, area,
+                /* Remove the page from the freelists */
-                                        buddy_type);
+                area->nr_free--;
+                list_del(&page->lru);
+                rmv_page_order(page);
-                        /*
+                expand(zone, page, order, current_order, area,
-                         * The freepage_migratetype may differ from pageblock's
+                                        start_migratetype);
-                         * migratetype depending on the decisions in
+                /*
-                         * try_to_steal_freepages(). This is OK as long as it
+                 * The freepage_migratetype may differ from pageblock's
-                         * does not differ for MIGRATE_CMA pageblocks. For CMA
+                 * migratetype depending on the decisions in
-                         * we need to make sure unallocated pages flushed from
+                 * try_to_steal_freepages(). This is OK as long as it
-                         * pcp lists are returned to the correct freelist.
+                 * does not differ for MIGRATE_CMA pageblocks. For CMA
-                         */
+                 * we need to make sure unallocated pages flushed from
-                        set_freepage_migratetype(page, buddy_type);
+                 * pcp lists are returned to the correct freelist.
+                 */
+                set_freepage_migratetype(page, start_migratetype);
-                        trace_mm_page_alloc_extfrag(page, order, current_order,
+                trace_mm_page_alloc_extfrag(page, order, current_order,
-                                start_migratetype, migratetype);
+                        start_migratetype, fallback_mt);
-                        return page;
+                return page;
-                }
        }
        return NULL;
@@ -1249,7 +1295,11 @@ retry_reserve:
        page = __rmqueue_smallest(zone, order, migratetype);
        if (unlikely(!page) && migratetype != MIGRATE_RESERVE) {
-                page = __rmqueue_fallback(zone, order, migratetype);
+                if (migratetype == MIGRATE_MOVABLE)
+                        page = __rmqueue_cma_fallback(zone, order);
+                if (!page)
+                        page = __rmqueue_fallback(zone, order, migratetype);
                /*
                 * Use MIGRATE_RESERVE rather than fail an allocation. goto
@@ -2362,13 +2412,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
                        *did_some_progress = 1;
                        goto out;
                }
-                /*
+                /* The OOM killer may not free memory on a specific node */
-                 * GFP_THISNODE contains __GFP_NORETRY and we never hit this.
-                 * Sanity check for bare calls of __GFP_THISNODE, not real OOM.
-                 * The caller should handle page allocation failure by itself if
-                 * it specifies __GFP_THISNODE.
-                 * Note: Hugepage uses it but will hit PAGE_ALLOC_COSTLY_ORDER.
-                 */
                if (gfp_mask & __GFP_THISNODE)
                        goto out;
        }
@@ -2623,15 +2667,11 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
        }
        /*
-         * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and
+         * If this allocation cannot block and it is for a specific node, then
-         * __GFP_NOWARN set) should not cause reclaim since the subsystem
+         * fail early.  There's no need to wakeup kswapd or retry for a
-         * (f.e. slab) using GFP_THISNODE may choose to trigger reclaim
+         * speculative node-specific allocation.
-         * using a larger set of nodes after it has established that the
-         * allowed per node queues are empty and that nodes are
-         * over allocated.
         */
-        if (IS_ENABLED(CONFIG_NUMA) &&
+        if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !wait)
-            (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
                goto nopage;
 retry:
@@ -2824,7 +2864,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
        /*
         * Check the zones suitable for the gfp_mask contain at least one
         * valid zone. It's possible to have an empty zonelist as a result
-         * of GFP_THISNODE and a memoryless node
+         * of __GFP_THISNODE and a memoryless node
         */
        if (unlikely(!zonelist->_zonerefs->zone))
                return NULL;
@@ -3201,38 +3241,31 @@ static void show_migration_types(unsigned char type)
 * Show free area list (used inside shift_scroll-lock stuff)
 * We also calculate the percentage fragmentation. We do this by counting the
 * memory on each free list with the exception of the first item on the list.
- * Suppresses nodes that are not allowed by current's cpuset if
+ *
- * SHOW_MEM_FILTER_NODES is passed.
+ * Bits in @filter:
+ * SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's
+ *   cpuset.
 */
 void show_free_areas(unsigned int filter)
 {
+        unsigned long free_pcp = 0;
        int cpu;
        struct zone *zone;
        for_each_populated_zone(zone) {
                if (skip_free_areas_node(filter, zone_to_nid(zone)))
                        continue;
-                show_node(zone);
-                printk("%s per-cpu:\n", zone->name);
-                for_each_online_cpu(cpu) {
+                for_each_online_cpu(cpu)
-                        struct per_cpu_pageset *pageset;
+                        free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count;
-                        pageset = per_cpu_ptr(zone->pageset, cpu);
-                        printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n",
-                               cpu, pageset->pcp.high,
-                               pageset->pcp.batch, pageset->pcp.count);
-                }
        }
        printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
                " active_file:%lu inactive_file:%lu isolated_file:%lu\n"
-                " unevictable:%lu"
+                " unevictable:%lu dirty:%lu writeback:%lu unstable:%lu\n"
-                " dirty:%lu writeback:%lu unstable:%lu\n"
+                " slab_reclaimable:%lu slab_unreclaimable:%lu\n"
-                " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n"
                " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n"
-                " free_cma:%lu\n",
+                " free:%lu free_pcp:%lu free_cma:%lu\n",
                global_page_state(NR_ACTIVE_ANON),
                global_page_state(NR_INACTIVE_ANON),
                global_page_state(NR_ISOLATED_ANON),
@@ -3243,13 +3276,14 @@ void show_free_areas(unsigned int filter)
                global_page_state(NR_FILE_DIRTY),
                global_page_state(NR_WRITEBACK),
                global_page_state(NR_UNSTABLE_NFS),
-                global_page_state(NR_FREE_PAGES),
                global_page_state(NR_SLAB_RECLAIMABLE),
                global_page_state(NR_SLAB_UNRECLAIMABLE),
                global_page_state(NR_FILE_MAPPED),
                global_page_state(NR_SHMEM),
                global_page_state(NR_PAGETABLE),
                global_page_state(NR_BOUNCE),
+                global_page_state(NR_FREE_PAGES),
+                free_pcp,
                global_page_state(NR_FREE_CMA_PAGES));
        for_each_populated_zone(zone) {
@@ -3257,6 +3291,11 @@ void show_free_areas(unsigned int filter)
                if (skip_free_areas_node(filter, zone_to_nid(zone)))
                        continue;
+                free_pcp = 0;
+                for_each_online_cpu(cpu)
+                        free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count;
                show_node(zone);
                printk("%s"
                        " free:%lukB"
@@ -3283,6 +3322,8 @@ void show_free_areas(unsigned int filter)
                        " pagetables:%lukB"
                        " unstable:%lukB"
                        " bounce:%lukB"
+                        " free_pcp:%lukB"
+                        " local_pcp:%ukB"
                        " free_cma:%lukB"
                        " writeback_tmp:%lukB"
                        " pages_scanned:%lu"
@@ -3314,6 +3355,8 @@ void show_free_areas(unsigned int filter)
                        K(zone_page_state(zone, NR_PAGETABLE)),
                        K(zone_page_state(zone, NR_UNSTABLE_NFS)),
                        K(zone_page_state(zone, NR_BOUNCE)),
+                        K(free_pcp),
+                        K(this_cpu_read(zone->pageset->pcp.count)),
                        K(zone_page_state(zone, NR_FREE_CMA_PAGES)),
                        K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
                        K(zone_page_state(zone, NR_PAGES_SCANNED)),
@@ -5717,7 +5760,7 @@ static void __setup_per_zone_wmarks(void)
                         * value here.
                         *
                         * The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN)
-                         * deltas controls asynch page reclaim, and so should
+                         * deltas control asynch page reclaim, and so should
                         * not be capped for highmem.
                         */
                        unsigned long min_pages;

diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 40e29429e7b0..1b849500640c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c
@@ -1032,11 +1032,9 @@ struct page __rmqueue_smallest(struct zone zone, unsigned int order,
1032	static int fallbacks[MIGRATE_TYPES][4] = {	1032	static int fallbacks[MIGRATE_TYPES][4] = {
1033	[MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE },	1033	[MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE },
1034	[MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE },	1034	[MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE },
		1035	[MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE },
1035	#ifdef CONFIG_CMA	1036	#ifdef CONFIG_CMA
1036	[MIGRATE_MOVABLE] = { MIGRATE_CMA, MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE },
1037	[MIGRATE_CMA] = { MIGRATE_RESERVE }, /* Never used */	1037	[MIGRATE_CMA] = { MIGRATE_RESERVE }, /* Never used */
1038	#else
1039	[MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE },
1040	#endif	1038	#endif
1041	[MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */	1039	[MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */
1042	#ifdef CONFIG_MEMORY_ISOLATION	1040	#ifdef CONFIG_MEMORY_ISOLATION
@@ -1044,6 +1042,17 @@ static int fallbacks[MIGRATE_TYPES][4] = {
1044	#endif	1042	#endif
1045	};	1043	};
1046		1044
		1045	#ifdef CONFIG_CMA
		1046	static struct page __rmqueue_cma_fallback(struct zone zone,
		1047	unsigned int order)
		1048	{
		1049	return __rmqueue_smallest(zone, order, MIGRATE_CMA);
		1050	}
		1051	#else
		1052	static inline struct page __rmqueue_cma_fallback(struct zone zone,
		1053	unsigned int order) { return NULL; }
		1054	#endif
		1055
1047	/*	1056	/*
1048	* Move the free pages in a range to the free lists of the requested type.	1057	* Move the free pages in a range to the free lists of the requested type.
1049	* Note that start_page and end_pages are not aligned on a pageblock	1058	* Note that start_page and end_pages are not aligned on a pageblock
@@ -1136,14 +1145,40 @@ static void change_pageblock_range(struct page *pageblock_page,
1136	* as fragmentation caused by those allocations polluting movable pageblocks	1145	* as fragmentation caused by those allocations polluting movable pageblocks
1137	* is worse than movable allocations stealing from unmovable and reclaimable	1146	* is worse than movable allocations stealing from unmovable and reclaimable
1138	* pageblocks.	1147	* pageblocks.
1139	*
1140	* If we claim more than half of the pageblock, change pageblock's migratetype
1141	* as well.
1142	*/	1148	*/
1143	static void try_to_steal_freepages(struct zone zone, struct page page,	1149	static bool can_steal_fallback(unsigned int order, int start_mt)
1144	int start_type, int fallback_type)	1150	{
		1151	/*
		1152	* Leaving this order check is intended, although there is
		1153	* relaxed order check in next check. The reason is that
		1154	* we can actually steal whole pageblock if this condition met,
		1155	* but, below check doesn't guarantee it and that is just heuristic
		1156	* so could be changed anytime.
		1157	*/
		1158	if (order >= pageblock_order)
		1159	return true;
		1160
		1161	if (order >= pageblock_order / 2 \|\|
		1162	start_mt == MIGRATE_RECLAIMABLE \|\|
		1163	start_mt == MIGRATE_UNMOVABLE \|\|
		1164	page_group_by_mobility_disabled)
		1165	return true;
		1166
		1167	return false;
		1168	}
		1169
		1170	/*
		1171	* This function implements actual steal behaviour. If order is large enough,
		1172	* we can steal whole pageblock. If not, we first move freepages in this
		1173	* pageblock and check whether half of pages are moved or not. If half of
		1174	* pages are moved, we can change migratetype of pageblock and permanently
		1175	* use it's pages as requested migratetype in the future.
		1176	*/
		1177	static void steal_suitable_fallback(struct zone zone, struct page page,
		1178	int start_type)
1145	{	1179	{
1146	int current_order = page_order(page);	1180	int current_order = page_order(page);
		1181	int pages;
1147		1182
1148	/* Take ownership for orders >= pageblock_order */	1183	/* Take ownership for orders >= pageblock_order */
1149	if (current_order >= pageblock_order) {	1184	if (current_order >= pageblock_order) {
@@ -1151,19 +1186,49 @@ static void try_to_steal_freepages(struct zone zone, struct page page,
1151	return;	1186	return;
1152	}	1187	}
1153		1188
1154	if (current_order >= pageblock_order / 2 \|\|	1189	pages = move_freepages_block(zone, page, start_type);
1155	start_type == MIGRATE_RECLAIMABLE \|\|	1190
1156	start_type == MIGRATE_UNMOVABLE \|\|	1191	/* Claim the whole block if over half of it is free */
1157	page_group_by_mobility_disabled) {	1192	if (pages >= (1 << (pageblock_order-1)) \|\|
1158	int pages;	1193	page_group_by_mobility_disabled)
		1194	set_pageblock_migratetype(page, start_type);
		1195	}
		1196
		1197	/*
		1198	* Check whether there is a suitable fallback freepage with requested order.
		1199	* If only_stealable is true, this function returns fallback_mt only if
		1200	* we can steal other freepages all together. This would help to reduce
		1201	* fragmentation due to mixed migratetype pages in one pageblock.
		1202	*/
		1203	int find_suitable_fallback(struct free_area *area, unsigned int order,
		1204	int migratetype, bool only_stealable, bool *can_steal)
		1205	{
		1206	int i;
		1207	int fallback_mt;
		1208
		1209	if (area->nr_free == 0)
		1210	return -1;
		1211
		1212	*can_steal = false;
		1213	for (i = 0;; i++) {
		1214	fallback_mt = fallbacks[migratetype][i];
		1215	if (fallback_mt == MIGRATE_RESERVE)
		1216	break;
		1217
		1218	if (list_empty(&area->free_list[fallback_mt]))
		1219	continue;
1159		1220
1160	pages = move_freepages_block(zone, page, start_type);	1221	if (can_steal_fallback(order, migratetype))
		1222	*can_steal = true;
1161		1223
1162	/* Claim the whole block if over half of it is free */	1224	if (!only_stealable)
1163	if (pages >= (1 << (pageblock_order-1)) \|\|	1225	return fallback_mt;
1164	page_group_by_mobility_disabled)	1226
1165	set_pageblock_migratetype(page, start_type);	1227	if (*can_steal)
		1228	return fallback_mt;
1166	}	1229	}
		1230
		1231	return -1;
1167	}	1232	}
1168		1233
1169	/* Remove an element from the buddy allocator from the fallback list */	1234	/* Remove an element from the buddy allocator from the fallback list */
@@ -1173,64 +1238,45 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
1173	struct free_area *area;	1238	struct free_area *area;
1174	unsigned int current_order;	1239	unsigned int current_order;
1175	struct page *page;	1240	struct page *page;
		1241	int fallback_mt;
		1242	bool can_steal;
1176		1243
1177	/* Find the largest possible block of pages in the other list */	1244	/* Find the largest possible block of pages in the other list */
1178	for (current_order = MAX_ORDER-1;	1245	for (current_order = MAX_ORDER-1;
1179	current_order >= order && current_order <= MAX_ORDER-1;	1246	current_order >= order && current_order <= MAX_ORDER-1;
1180	--current_order) {	1247	--current_order) {
1181	int i;	1248	area = &(zone->free_area[current_order]);
1182	for (i = 0;; i++) {	1249	fallback_mt = find_suitable_fallback(area, current_order,
1183	int migratetype = fallbacks[start_migratetype][i];	1250	start_migratetype, false, &can_steal);
1184	int buddy_type = start_migratetype;	1251	if (fallback_mt == -1)
1185		1252	continue;
1186	/* MIGRATE_RESERVE handled later if necessary */
1187	if (migratetype == MIGRATE_RESERVE)
1188	break;
1189
1190	area = &(zone->free_area[current_order]);
1191	if (list_empty(&area->free_list[migratetype]))
1192	continue;
1193
1194	page = list_entry(area->free_list[migratetype].next,
1195	struct page, lru);
1196	area->nr_free--;
1197
1198	if (!is_migrate_cma(migratetype)) {
1199	try_to_steal_freepages(zone, page,
1200	start_migratetype,
1201	migratetype);
1202	} else {
1203	/*
1204	* When borrowing from MIGRATE_CMA, we need to
1205	* release the excess buddy pages to CMA
1206	* itself, and we do not try to steal extra
1207	* free pages.
1208	*/
1209	buddy_type = migratetype;
1210	}
1211		1253
1212	/* Remove the page from the freelists */	1254	page = list_entry(area->free_list[fallback_mt].next,
1213	list_del(&page->lru);	1255	struct page, lru);
1214	rmv_page_order(page);	1256	if (can_steal)
		1257	steal_suitable_fallback(zone, page, start_migratetype);
1215		1258
1216	expand(zone, page, order, current_order, area,	1259	/* Remove the page from the freelists */
1217	buddy_type);	1260	area->nr_free--;
		1261	list_del(&page->lru);
		1262	rmv_page_order(page);
1218		1263
1219	/*	1264	expand(zone, page, order, current_order, area,
1220	* The freepage_migratetype may differ from pageblock's	1265	start_migratetype);
1221	* migratetype depending on the decisions in	1266	/*
1222	* try_to_steal_freepages(). This is OK as long as it	1267	* The freepage_migratetype may differ from pageblock's
1223	* does not differ for MIGRATE_CMA pageblocks. For CMA	1268	* migratetype depending on the decisions in
1224	* we need to make sure unallocated pages flushed from	1269	* try_to_steal_freepages(). This is OK as long as it
1225	* pcp lists are returned to the correct freelist.	1270	* does not differ for MIGRATE_CMA pageblocks. For CMA
1226	*/	1271	* we need to make sure unallocated pages flushed from
1227	set_freepage_migratetype(page, buddy_type);	1272	* pcp lists are returned to the correct freelist.
		1273	*/
		1274	set_freepage_migratetype(page, start_migratetype);
1228		1275
1229	trace_mm_page_alloc_extfrag(page, order, current_order,	1276	trace_mm_page_alloc_extfrag(page, order, current_order,
1230	start_migratetype, migratetype);	1277	start_migratetype, fallback_mt);
1231		1278
1232	return page;	1279	return page;
1233	}
1234	}	1280	}
1235		1281
1236	return NULL;	1282	return NULL;
@@ -1249,7 +1295,11 @@ retry_reserve:
1249	page = __rmqueue_smallest(zone, order, migratetype);	1295	page = __rmqueue_smallest(zone, order, migratetype);
1250		1296
1251	if (unlikely(!page) && migratetype != MIGRATE_RESERVE) {	1297	if (unlikely(!page) && migratetype != MIGRATE_RESERVE) {
1252	page = __rmqueue_fallback(zone, order, migratetype);	1298	if (migratetype == MIGRATE_MOVABLE)
		1299	page = __rmqueue_cma_fallback(zone, order);
		1300
		1301	if (!page)
		1302	page = __rmqueue_fallback(zone, order, migratetype);
1253		1303
1254	/*	1304	/*
1255	* Use MIGRATE_RESERVE rather than fail an allocation. goto	1305	* Use MIGRATE_RESERVE rather than fail an allocation. goto
@@ -2362,13 +2412,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
2362	*did_some_progress = 1;	2412	*did_some_progress = 1;
2363	goto out;	2413	goto out;
2364	}	2414	}
2365	/*	2415	/* The OOM killer may not free memory on a specific node */
2366	* GFP_THISNODE contains __GFP_NORETRY and we never hit this.
2367	* Sanity check for bare calls of __GFP_THISNODE, not real OOM.
2368	* The caller should handle page allocation failure by itself if
2369	* it specifies __GFP_THISNODE.
2370	* Note: Hugepage uses it but will hit PAGE_ALLOC_COSTLY_ORDER.
2371	*/
2372	if (gfp_mask & __GFP_THISNODE)	2416	if (gfp_mask & __GFP_THISNODE)
2373	goto out;	2417	goto out;
2374	}	2418	}
@@ -2623,15 +2667,11 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
2623	}	2667	}
2624		2668
2625	/*	2669	/*
2626	* GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and	2670	* If this allocation cannot block and it is for a specific node, then
2627	* __GFP_NOWARN set) should not cause reclaim since the subsystem	2671	* fail early. There's no need to wakeup kswapd or retry for a
2628	* (f.e. slab) using GFP_THISNODE may choose to trigger reclaim	2672	* speculative node-specific allocation.
2629	* using a larger set of nodes after it has established that the
2630	* allowed per node queues are empty and that nodes are
2631	* over allocated.
2632	*/	2673	*/
2633	if (IS_ENABLED(CONFIG_NUMA) &&	2674	if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !wait)
2634	(gfp_mask & GFP_THISNODE) == GFP_THISNODE)
2635	goto nopage;	2675	goto nopage;
2636		2676
2637	retry:	2677	retry:
@@ -2824,7 +2864,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2824	/*	2864	/*
2825	* Check the zones suitable for the gfp_mask contain at least one	2865	* Check the zones suitable for the gfp_mask contain at least one
2826	* valid zone. It's possible to have an empty zonelist as a result	2866	* valid zone. It's possible to have an empty zonelist as a result
2827	* of GFP_THISNODE and a memoryless node	2867	* of __GFP_THISNODE and a memoryless node
2828	*/	2868	*/
2829	if (unlikely(!zonelist->_zonerefs->zone))	2869	if (unlikely(!zonelist->_zonerefs->zone))
2830	return NULL;	2870	return NULL;
@@ -3201,38 +3241,31 @@ static void show_migration_types(unsigned char type)
3201	* Show free area list (used inside shift_scroll-lock stuff)	3241	* Show free area list (used inside shift_scroll-lock stuff)
3202	* We also calculate the percentage fragmentation. We do this by counting the	3242	* We also calculate the percentage fragmentation. We do this by counting the
3203	* memory on each free list with the exception of the first item on the list.	3243	* memory on each free list with the exception of the first item on the list.
3204	* Suppresses nodes that are not allowed by current's cpuset if	3244	*
3205	* SHOW_MEM_FILTER_NODES is passed.	3245	* Bits in @filter:
		3246	* SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's
		3247	* cpuset.
3206	*/	3248	*/
3207	void show_free_areas(unsigned int filter)	3249	void show_free_areas(unsigned int filter)
3208	{	3250	{
		3251	unsigned long free_pcp = 0;
3209	int cpu;	3252	int cpu;
3210	struct zone *zone;	3253	struct zone *zone;
3211		3254
3212	for_each_populated_zone(zone) {	3255	for_each_populated_zone(zone) {
3213	if (skip_free_areas_node(filter, zone_to_nid(zone)))	3256	if (skip_free_areas_node(filter, zone_to_nid(zone)))
3214	continue;	3257	continue;
3215	show_node(zone);
3216	printk("%s per-cpu:\n", zone->name);
3217		3258
3218	for_each_online_cpu(cpu) {	3259	for_each_online_cpu(cpu)
3219	struct per_cpu_pageset *pageset;	3260	free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count;
3220
3221	pageset = per_cpu_ptr(zone->pageset, cpu);
3222
3223	printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n",
3224	cpu, pageset->pcp.high,
3225	pageset->pcp.batch, pageset->pcp.count);
3226	}
3227	}	3261	}
3228		3262
3229	printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"	3263	printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
3230	" active_file:%lu inactive_file:%lu isolated_file:%lu\n"	3264	" active_file:%lu inactive_file:%lu isolated_file:%lu\n"
3231	" unevictable:%lu"	3265	" unevictable:%lu dirty:%lu writeback:%lu unstable:%lu\n"
3232	" dirty:%lu writeback:%lu unstable:%lu\n"	3266	" slab_reclaimable:%lu slab_unreclaimable:%lu\n"
3233	" free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n"
3234	" mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n"	3267	" mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n"
3235	" free_cma:%lu\n",	3268	" free:%lu free_pcp:%lu free_cma:%lu\n",
3236	global_page_state(NR_ACTIVE_ANON),	3269	global_page_state(NR_ACTIVE_ANON),
3237	global_page_state(NR_INACTIVE_ANON),	3270	global_page_state(NR_INACTIVE_ANON),
3238	global_page_state(NR_ISOLATED_ANON),	3271	global_page_state(NR_ISOLATED_ANON),
@@ -3243,13 +3276,14 @@ void show_free_areas(unsigned int filter)
3243	global_page_state(NR_FILE_DIRTY),	3276	global_page_state(NR_FILE_DIRTY),
3244	global_page_state(NR_WRITEBACK),	3277	global_page_state(NR_WRITEBACK),
3245	global_page_state(NR_UNSTABLE_NFS),	3278	global_page_state(NR_UNSTABLE_NFS),
3246	global_page_state(NR_FREE_PAGES),
3247	global_page_state(NR_SLAB_RECLAIMABLE),	3279	global_page_state(NR_SLAB_RECLAIMABLE),
3248	global_page_state(NR_SLAB_UNRECLAIMABLE),	3280	global_page_state(NR_SLAB_UNRECLAIMABLE),
3249	global_page_state(NR_FILE_MAPPED),	3281	global_page_state(NR_FILE_MAPPED),
3250	global_page_state(NR_SHMEM),	3282	global_page_state(NR_SHMEM),
3251	global_page_state(NR_PAGETABLE),	3283	global_page_state(NR_PAGETABLE),
3252	global_page_state(NR_BOUNCE),	3284	global_page_state(NR_BOUNCE),
		3285	global_page_state(NR_FREE_PAGES),
		3286	free_pcp,
3253	global_page_state(NR_FREE_CMA_PAGES));	3287	global_page_state(NR_FREE_CMA_PAGES));
3254		3288
3255	for_each_populated_zone(zone) {	3289	for_each_populated_zone(zone) {
@@ -3257,6 +3291,11 @@ void show_free_areas(unsigned int filter)
3257		3291
3258	if (skip_free_areas_node(filter, zone_to_nid(zone)))	3292	if (skip_free_areas_node(filter, zone_to_nid(zone)))
3259	continue;	3293	continue;
		3294
		3295	free_pcp = 0;
		3296	for_each_online_cpu(cpu)
		3297	free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count;
		3298
3260	show_node(zone);	3299	show_node(zone);
3261	printk("%s"	3300	printk("%s"
3262	" free:%lukB"	3301	" free:%lukB"
@@ -3283,6 +3322,8 @@ void show_free_areas(unsigned int filter)
3283	" pagetables:%lukB"	3322	" pagetables:%lukB"
3284	" unstable:%lukB"	3323	" unstable:%lukB"
3285	" bounce:%lukB"	3324	" bounce:%lukB"
		3325	" free_pcp:%lukB"
		3326	" local_pcp:%ukB"
3286	" free_cma:%lukB"	3327	" free_cma:%lukB"
3287	" writeback_tmp:%lukB"	3328	" writeback_tmp:%lukB"
3288	" pages_scanned:%lu"	3329	" pages_scanned:%lu"
@@ -3314,6 +3355,8 @@ void show_free_areas(unsigned int filter)
3314	K(zone_page_state(zone, NR_PAGETABLE)),	3355	K(zone_page_state(zone, NR_PAGETABLE)),
3315	K(zone_page_state(zone, NR_UNSTABLE_NFS)),	3356	K(zone_page_state(zone, NR_UNSTABLE_NFS)),
3316	K(zone_page_state(zone, NR_BOUNCE)),	3357	K(zone_page_state(zone, NR_BOUNCE)),
		3358	K(free_pcp),
		3359	K(this_cpu_read(zone->pageset->pcp.count)),
3317	K(zone_page_state(zone, NR_FREE_CMA_PAGES)),	3360	K(zone_page_state(zone, NR_FREE_CMA_PAGES)),
3318	K(zone_page_state(zone, NR_WRITEBACK_TEMP)),	3361	K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
3319	K(zone_page_state(zone, NR_PAGES_SCANNED)),	3362	K(zone_page_state(zone, NR_PAGES_SCANNED)),
@@ -5717,7 +5760,7 @@ static void __setup_per_zone_wmarks(void)
5717	* value here.	5760	* value here.
5718	*	5761	*
5719	* The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN)	5762	* The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN)
5720	* deltas controls asynch page reclaim, and so should	5763	* deltas control asynch page reclaim, and so should
5721	* not be capped for highmem.	5764	* not be capped for highmem.
5722	*/	5765	*/
5723	unsigned long min_pages;	5766	unsigned long min_pages;