mm/compaction: speed up pageblock_pfn_to_page() when zone is contiguous

There is a performance drop report due to hugepage allocation and in there half of cpu time are spent on pageblock_pfn_to_page() in compaction [1]. In that workload, compaction is triggered to make hugepage but most of pageblocks are un-available for compaction due to pageblock type and skip bit so compaction usually fails. Most costly operations in this case is to find valid pageblock while scanning whole zone range. To check if pageblock is valid to compact, valid pfn within pageblock is required and we can obtain it by calling pageblock_pfn_to_page(). This function checks whether pageblock is in a single zone and return valid pfn if possible. Problem is that we need to check it every time before scanning pageblock even if we re-visit it and this turns out to be very expensive in this workload. Although we have no way to skip this pageblock check in the system where hole exists at arbitrary position, we can use cached value for zone continuity and just do pfn_to_page() in the system where hole doesn't exist. This optimization considerably speeds up in above workload. Before vs After Max: 1096 MB/s vs 1325 MB/s Min: 635 MB/s 1015 MB/s Avg: 899 MB/s 1194 MB/s Avg is improved by roughly 30% [2]. [1]: http://www.spinics.net/lists/linux-mm/msg97378.html [2]: https://lkml.org/lkml/2015/12/9/23 [akpm@linux-foundation.org: don't forget to restore zone->contiguous on error path, per Vlastimil] Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com> Reported-by: Aaron Lu <aaron.lu@intel.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Tested-by: Aaron Lu <aaron.lu@intel.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Rik van Riel <riel@redhat.com> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Joonsoo Kim <iamjoonsoo.kim@lge.com> 2016-03-15 17:57:51 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2016-03-15 19:55:16 -0400
commit: 7cf91a98e607c2f935dbcc177d70011e95b8faff (patch)
tree: 8a57a26127dc9c96059ceedebc2cf13e5d124e3c /mm
parent: e1409c325fdc1fef7b3d8025c51892355f065d15 (diff)
4 files changed, 100 insertions, 46 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index 8ce36ebc8d15..93f71d968098 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -71,49 +71,6 @@ static inline bool migrate_async_suitable(int migratetype)
        return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE;
 }
-/*
- * Check that the whole (or subset of) a pageblock given by the interval of
- * [start_pfn, end_pfn) is valid and within the same zone, before scanning it
- * with the migration of free compaction scanner. The scanners then need to
- * use only pfn_valid_within() check for arches that allow holes within
- * pageblocks.
- *
- * Return struct page pointer of start_pfn, or NULL if checks were not passed.
- *
- * It's possible on some configurations to have a setup like node0 node1 node0
- * i.e. it's possible that all pages within a zones range of pages do not
- * belong to a single zone. We assume that a border between node0 and node1
- * can occur within a single pageblock, but not a node0 node1 node0
- * interleaving within a single pageblock. It is therefore sufficient to check
- * the first and last page of a pageblock and avoid checking each individual
- * page in a pageblock.
- */
-static struct page *pageblock_pfn_to_page(unsigned long start_pfn,
-                                unsigned long end_pfn, struct zone *zone)
-{
-        struct page *start_page;
-        struct page *end_page;
-        /* end_pfn is one past the range we are checking */
-        end_pfn--;
-        if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn))
-                return NULL;
-        start_page = pfn_to_page(start_pfn);
-        if (page_zone(start_page) != zone)
-                return NULL;
-        end_page = pfn_to_page(end_pfn);
-        /* This gives a shorter code than deriving page_zone(end_page) */
-        if (page_zone_id(start_page) != page_zone_id(end_page))
-                return NULL;
-        return start_page;
-}
 #ifdef CONFIG_COMPACTION
 /* Do not skip compaction more than 64 times */
diff --git a/mm/internal.h b/mm/internal.h
index 6636e1d3ecf0..ad9400d759c8 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -132,6 +132,18 @@ __find_buddy_index(unsigned long page_idx, unsigned int order)
        return page_idx ^ (1 << order);
 }
+extern struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
+                                unsigned long end_pfn, struct zone *zone);
+static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn,
+                                unsigned long end_pfn, struct zone *zone)
+{
+        if (zone->contiguous)
+                return pfn_to_page(start_pfn);
+        return __pageblock_pfn_to_page(start_pfn, end_pfn, zone);
+}
 extern int __isolate_free_page(struct page *page, unsigned int order);
 extern void __free_pages_bootmem(struct page *page, unsigned long pfn,
                                        unsigned int order);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 484e86761b3e..24ea06393816 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -512,6 +512,8 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
        int start_sec, end_sec;
        struct vmem_altmap *altmap;
+        clear_zone_contiguous(zone);
        /* during initialize mem_map, align hot-added range to section */
        start_sec = pfn_to_section_nr(phys_start_pfn);
        end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
@@ -524,7 +526,8 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
                if (altmap->base_pfn != phys_start_pfn
                                || vmem_altmap_offset(altmap) > nr_pages) {
                        pr_warn_once("memory add fail, invalid altmap\n");
-                        return -EINVAL;
+                        err = -EINVAL;
+                        goto out;
                }
                altmap->alloc = 0;
        }
@@ -542,7 +545,8 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
                err = 0;
        }
        vmemmap_populate_print_last();
+out:
+        set_zone_contiguous(zone);
        return err;
 }
 EXPORT_SYMBOL_GPL(__add_pages);
@@ -814,6 +818,8 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
                }
        }
+        clear_zone_contiguous(zone);
        /*
         * We can only remove entire sections
         */
@@ -829,6 +835,9 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
                if (ret)
                        break;
        }
+        set_zone_contiguous(zone);
        return ret;
 }
 EXPORT_SYMBOL_GPL(__remove_pages);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 50897dcaefdb..c46b75d14b6f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1128,6 +1128,75 @@ void __init __free_pages_bootmem(struct page *page, unsigned long pfn,
        return __free_pages_boot_core(page, pfn, order);
 }
+/*
+ * Check that the whole (or subset of) a pageblock given by the interval of
+ * [start_pfn, end_pfn) is valid and within the same zone, before scanning it
+ * with the migration of free compaction scanner. The scanners then need to
+ * use only pfn_valid_within() check for arches that allow holes within
+ * pageblocks.
+ *
+ * Return struct page pointer of start_pfn, or NULL if checks were not passed.
+ *
+ * It's possible on some configurations to have a setup like node0 node1 node0
+ * i.e. it's possible that all pages within a zones range of pages do not
+ * belong to a single zone. We assume that a border between node0 and node1
+ * can occur within a single pageblock, but not a node0 node1 node0
+ * interleaving within a single pageblock. It is therefore sufficient to check
+ * the first and last page of a pageblock and avoid checking each individual
+ * page in a pageblock.
+ */
+struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
+                                     unsigned long end_pfn, struct zone *zone)
+{
+        struct page *start_page;
+        struct page *end_page;
+        /* end_pfn is one past the range we are checking */
+        end_pfn--;
+        if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn))
+                return NULL;
+        start_page = pfn_to_page(start_pfn);
+        if (page_zone(start_page) != zone)
+                return NULL;
+        end_page = pfn_to_page(end_pfn);
+        /* This gives a shorter code than deriving page_zone(end_page) */
+        if (page_zone_id(start_page) != page_zone_id(end_page))
+                return NULL;
+        return start_page;
+}
+void set_zone_contiguous(struct zone *zone)
+{
+        unsigned long block_start_pfn = zone->zone_start_pfn;
+        unsigned long block_end_pfn;
+        block_end_pfn = ALIGN(block_start_pfn + 1, pageblock_nr_pages);
+        for (; block_start_pfn < zone_end_pfn(zone);
+                        block_start_pfn = block_end_pfn,
+                         block_end_pfn += pageblock_nr_pages) {
+                block_end_pfn = min(block_end_pfn, zone_end_pfn(zone));
+                if (!__pageblock_pfn_to_page(block_start_pfn,
+                                             block_end_pfn, zone))
+                        return;
+        }
+        /* We confirm that there is no hole */
+        zone->contiguous = true;
+}
+void clear_zone_contiguous(struct zone *zone)
+{
+        zone->contiguous = false;
+}
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
 static void __init deferred_free_range(struct page *page,
                                        unsigned long pfn, int nr_pages)
@@ -1278,9 +1347,13 @@ free_range:
        pgdat_init_report_one_done();
        return 0;
 }
+#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
 void __init page_alloc_init_late(void)
 {
+        struct zone *zone;
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
        int nid;
        /* There will be num_node_state(N_MEMORY) threads */
@@ -1294,8 +1367,11 @@ void __init page_alloc_init_late(void)
        /* Reinit limits that are based on free pages after the kernel is up */
        files_maxfiles_init();
+#endif
+        for_each_populated_zone(zone)
+                set_zone_contiguous(zone);
 }
-#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
 #ifdef CONFIG_CMA
 /* Free whole pageblock and set its migration type to MIGRATE_CMA. */
author	Joonsoo Kim <iamjoonsoo.kim@lge.com>	2016-03-15 17:57:51 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2016-03-15 19:55:16 -0400
commit	7cf91a98e607c2f935dbcc177d70011e95b8faff (patch)
tree	8a57a26127dc9c96059ceedebc2cf13e5d124e3c /mm
parent	e1409c325fdc1fef7b3d8025c51892355f065d15 (diff)

diff --git a/mm/compaction.c b/mm/compaction.c index 8ce36ebc8d15..93f71d968098 100644 --- a/mm/compaction.c +++ b/mm/compaction.c
@@ -71,49 +71,6 @@ static inline bool migrate_async_suitable(int migratetype)
71	return is_migrate_cma(migratetype) \|\| migratetype == MIGRATE_MOVABLE;	71	return is_migrate_cma(migratetype) \|\| migratetype == MIGRATE_MOVABLE;
72	}	72	}
73		73
74	/*
75	* Check that the whole (or subset of) a pageblock given by the interval of
76	* [start_pfn, end_pfn) is valid and within the same zone, before scanning it
77	* with the migration of free compaction scanner. The scanners then need to
78	* use only pfn_valid_within() check for arches that allow holes within
79	* pageblocks.
80	*
81	* Return struct page pointer of start_pfn, or NULL if checks were not passed.
82	*
83	* It's possible on some configurations to have a setup like node0 node1 node0
84	* i.e. it's possible that all pages within a zones range of pages do not
85	* belong to a single zone. We assume that a border between node0 and node1
86	* can occur within a single pageblock, but not a node0 node1 node0
87	* interleaving within a single pageblock. It is therefore sufficient to check
88	* the first and last page of a pageblock and avoid checking each individual
89	* page in a pageblock.
90	*/
91	static struct page *pageblock_pfn_to_page(unsigned long start_pfn,
92	unsigned long end_pfn, struct zone *zone)
93	{
94	struct page *start_page;
95	struct page *end_page;
96
97	/* end_pfn is one past the range we are checking */
98	end_pfn--;
99
100	if (!pfn_valid(start_pfn) \|\| !pfn_valid(end_pfn))
101	return NULL;
102
103	start_page = pfn_to_page(start_pfn);
104
105	if (page_zone(start_page) != zone)
106	return NULL;
107
108	end_page = pfn_to_page(end_pfn);
109
110	/* This gives a shorter code than deriving page_zone(end_page) */
111	if (page_zone_id(start_page) != page_zone_id(end_page))
112	return NULL;
113
114	return start_page;
115	}
116
117	#ifdef CONFIG_COMPACTION	74	#ifdef CONFIG_COMPACTION
118		75
119	/* Do not skip compaction more than 64 times */	76	/* Do not skip compaction more than 64 times */


diff --git a/mm/internal.h b/mm/internal.h index 6636e1d3ecf0..ad9400d759c8 100644 --- a/mm/internal.h +++ b/mm/internal.h
@@ -132,6 +132,18 @@ __find_buddy_index(unsigned long page_idx, unsigned int order)
132	return page_idx ^ (1 << order);	132	return page_idx ^ (1 << order);
133	}	133	}
134		134
		135	extern struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
		136	unsigned long end_pfn, struct zone *zone);
		137
		138	static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn,
		139	unsigned long end_pfn, struct zone *zone)
		140	{
		141	if (zone->contiguous)
		142	return pfn_to_page(start_pfn);
		143
		144	return __pageblock_pfn_to_page(start_pfn, end_pfn, zone);
		145	}
		146
135	extern int __isolate_free_page(struct page *page, unsigned int order);	147	extern int __isolate_free_page(struct page *page, unsigned int order);
136	extern void __free_pages_bootmem(struct page *page, unsigned long pfn,	148	extern void __free_pages_bootmem(struct page *page, unsigned long pfn,
137	unsigned int order);	149	unsigned int order);


diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 484e86761b3e..24ea06393816 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c
@@ -512,6 +512,8 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
512	int start_sec, end_sec;	512	int start_sec, end_sec;
513	struct vmem_altmap *altmap;	513	struct vmem_altmap *altmap;
514		514
		515	clear_zone_contiguous(zone);
		516
515	/* during initialize mem_map, align hot-added range to section */	517	/* during initialize mem_map, align hot-added range to section */
516	start_sec = pfn_to_section_nr(phys_start_pfn);	518	start_sec = pfn_to_section_nr(phys_start_pfn);
517	end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);	519	end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
@@ -524,7 +526,8 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
524	if (altmap->base_pfn != phys_start_pfn	526	if (altmap->base_pfn != phys_start_pfn
525	\|\| vmem_altmap_offset(altmap) > nr_pages) {	527	\|\| vmem_altmap_offset(altmap) > nr_pages) {
526	pr_warn_once("memory add fail, invalid altmap\n");	528	pr_warn_once("memory add fail, invalid altmap\n");
527	return -EINVAL;	529	err = -EINVAL;
		530	goto out;
528	}	531	}
529	altmap->alloc = 0;	532	altmap->alloc = 0;
530	}	533	}
@@ -542,7 +545,8 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
542	err = 0;	545	err = 0;
543	}	546	}
544	vmemmap_populate_print_last();	547	vmemmap_populate_print_last();
545		548	out:
		549	set_zone_contiguous(zone);
546	return err;	550	return err;
547	}	551	}
548	EXPORT_SYMBOL_GPL(__add_pages);	552	EXPORT_SYMBOL_GPL(__add_pages);
@@ -814,6 +818,8 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
814	}	818	}
815	}	819	}
816		820
		821	clear_zone_contiguous(zone);
		822
817	/*	823	/*
818	* We can only remove entire sections	824	* We can only remove entire sections
819	*/	825	*/
@@ -829,6 +835,9 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
829	if (ret)	835	if (ret)
830	break;	836	break;
831	}	837	}
		838
		839	set_zone_contiguous(zone);
		840
832	return ret;	841	return ret;
833	}	842	}
834	EXPORT_SYMBOL_GPL(__remove_pages);	843	EXPORT_SYMBOL_GPL(__remove_pages);


diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 50897dcaefdb..c46b75d14b6f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c
@@ -1128,6 +1128,75 @@ void __init __free_pages_bootmem(struct page *page, unsigned long pfn,
1128	return __free_pages_boot_core(page, pfn, order);	1128	return __free_pages_boot_core(page, pfn, order);
1129	}	1129	}
1130		1130
		1131	/*
		1132	* Check that the whole (or subset of) a pageblock given by the interval of
		1133	* [start_pfn, end_pfn) is valid and within the same zone, before scanning it
		1134	* with the migration of free compaction scanner. The scanners then need to
		1135	* use only pfn_valid_within() check for arches that allow holes within
		1136	* pageblocks.
		1137	*
		1138	* Return struct page pointer of start_pfn, or NULL if checks were not passed.
		1139	*
		1140	* It's possible on some configurations to have a setup like node0 node1 node0
		1141	* i.e. it's possible that all pages within a zones range of pages do not
		1142	* belong to a single zone. We assume that a border between node0 and node1
		1143	* can occur within a single pageblock, but not a node0 node1 node0
		1144	* interleaving within a single pageblock. It is therefore sufficient to check
		1145	* the first and last page of a pageblock and avoid checking each individual
		1146	* page in a pageblock.
		1147	*/
		1148	struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
		1149	unsigned long end_pfn, struct zone *zone)
		1150	{
		1151	struct page *start_page;
		1152	struct page *end_page;
		1153
		1154	/* end_pfn is one past the range we are checking */
		1155	end_pfn--;
		1156
		1157	if (!pfn_valid(start_pfn) \|\| !pfn_valid(end_pfn))
		1158	return NULL;
		1159
		1160	start_page = pfn_to_page(start_pfn);
		1161
		1162	if (page_zone(start_page) != zone)
		1163	return NULL;
		1164
		1165	end_page = pfn_to_page(end_pfn);
		1166
		1167	/* This gives a shorter code than deriving page_zone(end_page) */
		1168	if (page_zone_id(start_page) != page_zone_id(end_page))
		1169	return NULL;
		1170
		1171	return start_page;
		1172	}
		1173
		1174	void set_zone_contiguous(struct zone *zone)
		1175	{
		1176	unsigned long block_start_pfn = zone->zone_start_pfn;
		1177	unsigned long block_end_pfn;
		1178
		1179	block_end_pfn = ALIGN(block_start_pfn + 1, pageblock_nr_pages);
		1180	for (; block_start_pfn < zone_end_pfn(zone);
		1181	block_start_pfn = block_end_pfn,
		1182	block_end_pfn += pageblock_nr_pages) {
		1183
		1184	block_end_pfn = min(block_end_pfn, zone_end_pfn(zone));
		1185
		1186	if (!__pageblock_pfn_to_page(block_start_pfn,
		1187	block_end_pfn, zone))
		1188	return;
		1189	}
		1190
		1191	/* We confirm that there is no hole */
		1192	zone->contiguous = true;
		1193	}
		1194
		1195	void clear_zone_contiguous(struct zone *zone)
		1196	{
		1197	zone->contiguous = false;
		1198	}
		1199
1131	#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT	1200	#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
1132	static void __init deferred_free_range(struct page *page,	1201	static void __init deferred_free_range(struct page *page,
1133	unsigned long pfn, int nr_pages)	1202	unsigned long pfn, int nr_pages)
@@ -1278,9 +1347,13 @@ free_range:
1278	pgdat_init_report_one_done();	1347	pgdat_init_report_one_done();
1279	return 0;	1348	return 0;
1280	}	1349	}
		1350	#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
1281		1351
1282	void __init page_alloc_init_late(void)	1352	void __init page_alloc_init_late(void)
1283	{	1353	{
		1354	struct zone *zone;
		1355
		1356	#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
1284	int nid;	1357	int nid;
1285		1358
1286	/* There will be num_node_state(N_MEMORY) threads */	1359	/* There will be num_node_state(N_MEMORY) threads */
@@ -1294,8 +1367,11 @@ void __init page_alloc_init_late(void)
1294		1367
1295	/* Reinit limits that are based on free pages after the kernel is up */	1368	/* Reinit limits that are based on free pages after the kernel is up */
1296	files_maxfiles_init();	1369	files_maxfiles_init();
		1370	#endif
		1371
		1372	for_each_populated_zone(zone)
		1373	set_zone_contiguous(zone);
1297	}	1374	}
1298	#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
1299		1375
1300	#ifdef CONFIG_CMA	1376	#ifdef CONFIG_CMA
1301	/* Free whole pageblock and set its migration type to MIGRATE_CMA. */	1377	/* Free whole pageblock and set its migration type to MIGRATE_CMA. */