aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2015-06-30 17:57:05 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-30 22:44:56 -0400
commit7e18adb4f80bea90d30b62158694d97c31f71d37 (patch)
tree381b23cf9315c747752e9610c71b38f5a9a58db8 /mm/page_alloc.c
parent3a80a7fa7989fbb6aa56bb6ad31811b62cf99e60 (diff)
mm: meminit: initialise remaining struct pages in parallel with kswapd
Only a subset of struct pages are initialised at the moment. When this patch is applied kswapd initialise the remaining struct pages in parallel. This should boot faster by spreading the work to multiple CPUs and initialising data that is local to the CPU. The user-visible effect on large machines is that free memory will appear to rapidly increase early in the lifetime of the system until kswapd reports that all memory is initialised in the kernel log. Once initialised there should be no other user-visibile effects. Signed-off-by: Mel Gorman <mgorman@suse.de> Tested-by: Nate Zimmer <nzimmer@sgi.com> Tested-by: Waiman Long <waiman.long@hp.com> Tested-by: Daniel J Blueman <daniel@numascale.com> Acked-by: Pekka Enberg <penberg@kernel.org> Cc: Robin Holt <robinmholt@gmail.com> Cc: Nate Zimmer <nzimmer@sgi.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Waiman Long <waiman.long@hp.com> Cc: Scott Norton <scott.norton@hp.com> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c123
1 files changed, 119 insertions, 4 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7af45b2e8870..c30f5a0535fd 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -252,6 +252,14 @@ static inline bool __defermem_init early_page_uninitialised(unsigned long pfn)
252 return false; 252 return false;
253} 253}
254 254
255static inline bool early_page_nid_uninitialised(unsigned long pfn, int nid)
256{
257 if (pfn >= NODE_DATA(nid)->first_deferred_pfn)
258 return true;
259
260 return false;
261}
262
255/* 263/*
256 * Returns false when the remaining initialisation should be deferred until 264 * Returns false when the remaining initialisation should be deferred until
257 * later in the boot cycle when it can be parallelised. 265 * later in the boot cycle when it can be parallelised.
@@ -284,6 +292,11 @@ static inline bool early_page_uninitialised(unsigned long pfn)
284 return false; 292 return false;
285} 293}
286 294
295static inline bool early_page_nid_uninitialised(unsigned long pfn, int nid)
296{
297 return false;
298}
299
287static inline bool update_defer_init(pg_data_t *pgdat, 300static inline bool update_defer_init(pg_data_t *pgdat,
288 unsigned long pfn, unsigned long zone_end, 301 unsigned long pfn, unsigned long zone_end,
289 unsigned long *nr_initialised) 302 unsigned long *nr_initialised)
@@ -866,20 +879,51 @@ static void __meminit __init_single_pfn(unsigned long pfn, unsigned long zone,
866 return __init_single_page(pfn_to_page(pfn), pfn, zone, nid); 879 return __init_single_page(pfn_to_page(pfn), pfn, zone, nid);
867} 880}
868 881
882#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
883static void init_reserved_page(unsigned long pfn)
884{
885 pg_data_t *pgdat;
886 int nid, zid;
887
888 if (!early_page_uninitialised(pfn))
889 return;
890
891 nid = early_pfn_to_nid(pfn);
892 pgdat = NODE_DATA(nid);
893
894 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
895 struct zone *zone = &pgdat->node_zones[zid];
896
897 if (pfn >= zone->zone_start_pfn && pfn < zone_end_pfn(zone))
898 break;
899 }
900 __init_single_pfn(pfn, zid, nid);
901}
902#else
903static inline void init_reserved_page(unsigned long pfn)
904{
905}
906#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
907
869/* 908/*
870 * Initialised pages do not have PageReserved set. This function is 909 * Initialised pages do not have PageReserved set. This function is
871 * called for each range allocated by the bootmem allocator and 910 * called for each range allocated by the bootmem allocator and
872 * marks the pages PageReserved. The remaining valid pages are later 911 * marks the pages PageReserved. The remaining valid pages are later
873 * sent to the buddy page allocator. 912 * sent to the buddy page allocator.
874 */ 913 */
875void reserve_bootmem_region(unsigned long start, unsigned long end) 914void __meminit reserve_bootmem_region(unsigned long start, unsigned long end)
876{ 915{
877 unsigned long start_pfn = PFN_DOWN(start); 916 unsigned long start_pfn = PFN_DOWN(start);
878 unsigned long end_pfn = PFN_UP(end); 917 unsigned long end_pfn = PFN_UP(end);
879 918
880 for (; start_pfn < end_pfn; start_pfn++) 919 for (; start_pfn < end_pfn; start_pfn++) {
881 if (pfn_valid(start_pfn)) 920 if (pfn_valid(start_pfn)) {
882 SetPageReserved(pfn_to_page(start_pfn)); 921 struct page *page = pfn_to_page(start_pfn);
922
923 init_reserved_page(start_pfn);
924 SetPageReserved(page);
925 }
926 }
883} 927}
884 928
885static bool free_pages_prepare(struct page *page, unsigned int order) 929static bool free_pages_prepare(struct page *page, unsigned int order)
@@ -1017,6 +1061,74 @@ void __defer_init __free_pages_bootmem(struct page *page, unsigned long pfn,
1017 return __free_pages_boot_core(page, pfn, order); 1061 return __free_pages_boot_core(page, pfn, order);
1018} 1062}
1019 1063
1064#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
1065/* Initialise remaining memory on a node */
1066void __defermem_init deferred_init_memmap(int nid)
1067{
1068 struct mminit_pfnnid_cache nid_init_state = { };
1069 unsigned long start = jiffies;
1070 unsigned long nr_pages = 0;
1071 unsigned long walk_start, walk_end;
1072 int i, zid;
1073 struct zone *zone;
1074 pg_data_t *pgdat = NODE_DATA(nid);
1075 unsigned long first_init_pfn = pgdat->first_deferred_pfn;
1076
1077 if (first_init_pfn == ULONG_MAX)
1078 return;
1079
1080 /* Sanity check boundaries */
1081 BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn);
1082 BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat));
1083 pgdat->first_deferred_pfn = ULONG_MAX;
1084
1085 /* Only the highest zone is deferred so find it */
1086 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
1087 zone = pgdat->node_zones + zid;
1088 if (first_init_pfn < zone_end_pfn(zone))
1089 break;
1090 }
1091
1092 for_each_mem_pfn_range(i, nid, &walk_start, &walk_end, NULL) {
1093 unsigned long pfn, end_pfn;
1094
1095 end_pfn = min(walk_end, zone_end_pfn(zone));
1096 pfn = first_init_pfn;
1097 if (pfn < walk_start)
1098 pfn = walk_start;
1099 if (pfn < zone->zone_start_pfn)
1100 pfn = zone->zone_start_pfn;
1101
1102 for (; pfn < end_pfn; pfn++) {
1103 struct page *page;
1104
1105 if (!pfn_valid(pfn))
1106 continue;
1107
1108 if (!meminit_pfn_in_nid(pfn, nid, &nid_init_state))
1109 continue;
1110
1111 if (page->flags) {
1112 VM_BUG_ON(page_zone(page) != zone);
1113 continue;
1114 }
1115
1116 __init_single_page(page, pfn, zid, nid);
1117 __free_pages_boot_core(page, pfn, 0);
1118 nr_pages++;
1119 cond_resched();
1120 }
1121 first_init_pfn = max(end_pfn, first_init_pfn);
1122 }
1123
1124 /* Sanity check that the next zone really is unpopulated */
1125 WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
1126
1127 pr_info("kswapd %d initialised %lu pages in %ums\n", nid, nr_pages,
1128 jiffies_to_msecs(jiffies - start));
1129}
1130#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
1131
1020#ifdef CONFIG_CMA 1132#ifdef CONFIG_CMA
1021/* Free whole pageblock and set its migration type to MIGRATE_CMA. */ 1133/* Free whole pageblock and set its migration type to MIGRATE_CMA. */
1022void __init init_cma_reserved_pageblock(struct page *page) 1134void __init init_cma_reserved_pageblock(struct page *page)
@@ -4329,6 +4441,9 @@ static void setup_zone_migrate_reserve(struct zone *zone)
4329 zone->nr_migrate_reserve_block = reserve; 4441 zone->nr_migrate_reserve_block = reserve;
4330 4442
4331 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { 4443 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
4444 if (!early_page_nid_uninitialised(pfn, zone_to_nid(zone)))
4445 return;
4446
4332 if (!pfn_valid(pfn)) 4447 if (!pfn_valid(pfn))
4333 continue; 4448 continue;
4334 page = pfn_to_page(pfn); 4449 page = pfn_to_page(pfn);