aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorJoonsoo Kim <iamjoonsoo.kim@lge.com>2016-03-15 17:57:51 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-03-15 19:55:16 -0400
commit7cf91a98e607c2f935dbcc177d70011e95b8faff (patch)
tree8a57a26127dc9c96059ceedebc2cf13e5d124e3c /mm
parente1409c325fdc1fef7b3d8025c51892355f065d15 (diff)
mm/compaction: speed up pageblock_pfn_to_page() when zone is contiguous
There is a performance drop report due to hugepage allocation and in there half of cpu time are spent on pageblock_pfn_to_page() in compaction [1]. In that workload, compaction is triggered to make hugepage but most of pageblocks are un-available for compaction due to pageblock type and skip bit so compaction usually fails. Most costly operations in this case is to find valid pageblock while scanning whole zone range. To check if pageblock is valid to compact, valid pfn within pageblock is required and we can obtain it by calling pageblock_pfn_to_page(). This function checks whether pageblock is in a single zone and return valid pfn if possible. Problem is that we need to check it every time before scanning pageblock even if we re-visit it and this turns out to be very expensive in this workload. Although we have no way to skip this pageblock check in the system where hole exists at arbitrary position, we can use cached value for zone continuity and just do pfn_to_page() in the system where hole doesn't exist. This optimization considerably speeds up in above workload. Before vs After Max: 1096 MB/s vs 1325 MB/s Min: 635 MB/s 1015 MB/s Avg: 899 MB/s 1194 MB/s Avg is improved by roughly 30% [2]. [1]: http://www.spinics.net/lists/linux-mm/msg97378.html [2]: https://lkml.org/lkml/2015/12/9/23 [akpm@linux-foundation.org: don't forget to restore zone->contiguous on error path, per Vlastimil] Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com> Reported-by: Aaron Lu <aaron.lu@intel.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Tested-by: Aaron Lu <aaron.lu@intel.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Rik van Riel <riel@redhat.com> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/compaction.c43
-rw-r--r--mm/internal.h12
-rw-r--r--mm/memory_hotplug.c13
-rw-r--r--mm/page_alloc.c78
4 files changed, 100 insertions, 46 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index 8ce36ebc8d15..93f71d968098 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -71,49 +71,6 @@ static inline bool migrate_async_suitable(int migratetype)
71 return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE; 71 return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE;
72} 72}
73 73
74/*
75 * Check that the whole (or subset of) a pageblock given by the interval of
76 * [start_pfn, end_pfn) is valid and within the same zone, before scanning it
77 * with the migration of free compaction scanner. The scanners then need to
78 * use only pfn_valid_within() check for arches that allow holes within
79 * pageblocks.
80 *
81 * Return struct page pointer of start_pfn, or NULL if checks were not passed.
82 *
83 * It's possible on some configurations to have a setup like node0 node1 node0
84 * i.e. it's possible that all pages within a zones range of pages do not
85 * belong to a single zone. We assume that a border between node0 and node1
86 * can occur within a single pageblock, but not a node0 node1 node0
87 * interleaving within a single pageblock. It is therefore sufficient to check
88 * the first and last page of a pageblock and avoid checking each individual
89 * page in a pageblock.
90 */
91static struct page *pageblock_pfn_to_page(unsigned long start_pfn,
92 unsigned long end_pfn, struct zone *zone)
93{
94 struct page *start_page;
95 struct page *end_page;
96
97 /* end_pfn is one past the range we are checking */
98 end_pfn--;
99
100 if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn))
101 return NULL;
102
103 start_page = pfn_to_page(start_pfn);
104
105 if (page_zone(start_page) != zone)
106 return NULL;
107
108 end_page = pfn_to_page(end_pfn);
109
110 /* This gives a shorter code than deriving page_zone(end_page) */
111 if (page_zone_id(start_page) != page_zone_id(end_page))
112 return NULL;
113
114 return start_page;
115}
116
117#ifdef CONFIG_COMPACTION 74#ifdef CONFIG_COMPACTION
118 75
119/* Do not skip compaction more than 64 times */ 76/* Do not skip compaction more than 64 times */
diff --git a/mm/internal.h b/mm/internal.h
index 6636e1d3ecf0..ad9400d759c8 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -132,6 +132,18 @@ __find_buddy_index(unsigned long page_idx, unsigned int order)
132 return page_idx ^ (1 << order); 132 return page_idx ^ (1 << order);
133} 133}
134 134
135extern struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
136 unsigned long end_pfn, struct zone *zone);
137
138static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn,
139 unsigned long end_pfn, struct zone *zone)
140{
141 if (zone->contiguous)
142 return pfn_to_page(start_pfn);
143
144 return __pageblock_pfn_to_page(start_pfn, end_pfn, zone);
145}
146
135extern int __isolate_free_page(struct page *page, unsigned int order); 147extern int __isolate_free_page(struct page *page, unsigned int order);
136extern void __free_pages_bootmem(struct page *page, unsigned long pfn, 148extern void __free_pages_bootmem(struct page *page, unsigned long pfn,
137 unsigned int order); 149 unsigned int order);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 484e86761b3e..24ea06393816 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -512,6 +512,8 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
512 int start_sec, end_sec; 512 int start_sec, end_sec;
513 struct vmem_altmap *altmap; 513 struct vmem_altmap *altmap;
514 514
515 clear_zone_contiguous(zone);
516
515 /* during initialize mem_map, align hot-added range to section */ 517 /* during initialize mem_map, align hot-added range to section */
516 start_sec = pfn_to_section_nr(phys_start_pfn); 518 start_sec = pfn_to_section_nr(phys_start_pfn);
517 end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); 519 end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
@@ -524,7 +526,8 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
524 if (altmap->base_pfn != phys_start_pfn 526 if (altmap->base_pfn != phys_start_pfn
525 || vmem_altmap_offset(altmap) > nr_pages) { 527 || vmem_altmap_offset(altmap) > nr_pages) {
526 pr_warn_once("memory add fail, invalid altmap\n"); 528 pr_warn_once("memory add fail, invalid altmap\n");
527 return -EINVAL; 529 err = -EINVAL;
530 goto out;
528 } 531 }
529 altmap->alloc = 0; 532 altmap->alloc = 0;
530 } 533 }
@@ -542,7 +545,8 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
542 err = 0; 545 err = 0;
543 } 546 }
544 vmemmap_populate_print_last(); 547 vmemmap_populate_print_last();
545 548out:
549 set_zone_contiguous(zone);
546 return err; 550 return err;
547} 551}
548EXPORT_SYMBOL_GPL(__add_pages); 552EXPORT_SYMBOL_GPL(__add_pages);
@@ -814,6 +818,8 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
814 } 818 }
815 } 819 }
816 820
821 clear_zone_contiguous(zone);
822
817 /* 823 /*
818 * We can only remove entire sections 824 * We can only remove entire sections
819 */ 825 */
@@ -829,6 +835,9 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
829 if (ret) 835 if (ret)
830 break; 836 break;
831 } 837 }
838
839 set_zone_contiguous(zone);
840
832 return ret; 841 return ret;
833} 842}
834EXPORT_SYMBOL_GPL(__remove_pages); 843EXPORT_SYMBOL_GPL(__remove_pages);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 50897dcaefdb..c46b75d14b6f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1128,6 +1128,75 @@ void __init __free_pages_bootmem(struct page *page, unsigned long pfn,
1128 return __free_pages_boot_core(page, pfn, order); 1128 return __free_pages_boot_core(page, pfn, order);
1129} 1129}
1130 1130
1131/*
1132 * Check that the whole (or subset of) a pageblock given by the interval of
1133 * [start_pfn, end_pfn) is valid and within the same zone, before scanning it
1134 * with the migration of free compaction scanner. The scanners then need to
1135 * use only pfn_valid_within() check for arches that allow holes within
1136 * pageblocks.
1137 *
1138 * Return struct page pointer of start_pfn, or NULL if checks were not passed.
1139 *
1140 * It's possible on some configurations to have a setup like node0 node1 node0
1141 * i.e. it's possible that all pages within a zones range of pages do not
1142 * belong to a single zone. We assume that a border between node0 and node1
1143 * can occur within a single pageblock, but not a node0 node1 node0
1144 * interleaving within a single pageblock. It is therefore sufficient to check
1145 * the first and last page of a pageblock and avoid checking each individual
1146 * page in a pageblock.
1147 */
1148struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
1149 unsigned long end_pfn, struct zone *zone)
1150{
1151 struct page *start_page;
1152 struct page *end_page;
1153
1154 /* end_pfn is one past the range we are checking */
1155 end_pfn--;
1156
1157 if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn))
1158 return NULL;
1159
1160 start_page = pfn_to_page(start_pfn);
1161
1162 if (page_zone(start_page) != zone)
1163 return NULL;
1164
1165 end_page = pfn_to_page(end_pfn);
1166
1167 /* This gives a shorter code than deriving page_zone(end_page) */
1168 if (page_zone_id(start_page) != page_zone_id(end_page))
1169 return NULL;
1170
1171 return start_page;
1172}
1173
1174void set_zone_contiguous(struct zone *zone)
1175{
1176 unsigned long block_start_pfn = zone->zone_start_pfn;
1177 unsigned long block_end_pfn;
1178
1179 block_end_pfn = ALIGN(block_start_pfn + 1, pageblock_nr_pages);
1180 for (; block_start_pfn < zone_end_pfn(zone);
1181 block_start_pfn = block_end_pfn,
1182 block_end_pfn += pageblock_nr_pages) {
1183
1184 block_end_pfn = min(block_end_pfn, zone_end_pfn(zone));
1185
1186 if (!__pageblock_pfn_to_page(block_start_pfn,
1187 block_end_pfn, zone))
1188 return;
1189 }
1190
1191 /* We confirm that there is no hole */
1192 zone->contiguous = true;
1193}
1194
1195void clear_zone_contiguous(struct zone *zone)
1196{
1197 zone->contiguous = false;
1198}
1199
1131#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 1200#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
1132static void __init deferred_free_range(struct page *page, 1201static void __init deferred_free_range(struct page *page,
1133 unsigned long pfn, int nr_pages) 1202 unsigned long pfn, int nr_pages)
@@ -1278,9 +1347,13 @@ free_range:
1278 pgdat_init_report_one_done(); 1347 pgdat_init_report_one_done();
1279 return 0; 1348 return 0;
1280} 1349}
1350#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
1281 1351
1282void __init page_alloc_init_late(void) 1352void __init page_alloc_init_late(void)
1283{ 1353{
1354 struct zone *zone;
1355
1356#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
1284 int nid; 1357 int nid;
1285 1358
1286 /* There will be num_node_state(N_MEMORY) threads */ 1359 /* There will be num_node_state(N_MEMORY) threads */
@@ -1294,8 +1367,11 @@ void __init page_alloc_init_late(void)
1294 1367
1295 /* Reinit limits that are based on free pages after the kernel is up */ 1368 /* Reinit limits that are based on free pages after the kernel is up */
1296 files_maxfiles_init(); 1369 files_maxfiles_init();
1370#endif
1371
1372 for_each_populated_zone(zone)
1373 set_zone_contiguous(zone);
1297} 1374}
1298#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
1299 1375
1300#ifdef CONFIG_CMA 1376#ifdef CONFIG_CMA
1301/* Free whole pageblock and set its migration type to MIGRATE_CMA. */ 1377/* Free whole pageblock and set its migration type to MIGRATE_CMA. */