mm: meminit: initialise remaining struct pages in parallel with kswapd

Only a subset of struct pages are initialised at the moment. When this patch is applied kswapd initialise the remaining struct pages in parallel. This should boot faster by spreading the work to multiple CPUs and initialising data that is local to the CPU. The user-visible effect on large machines is that free memory will appear to rapidly increase early in the lifetime of the system until kswapd reports that all memory is initialised in the kernel log. Once initialised there should be no other user-visibile effects. Signed-off-by: Mel Gorman <mgorman@suse.de> Tested-by: Nate Zimmer <nzimmer@sgi.com> Tested-by: Waiman Long <waiman.long@hp.com> Tested-by: Daniel J Blueman <daniel@numascale.com> Acked-by: Pekka Enberg <penberg@kernel.org> Cc: Robin Holt <robinmholt@gmail.com> Cc: Nate Zimmer <nzimmer@sgi.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Waiman Long <waiman.long@hp.com> Cc: Scott Norton <scott.norton@hp.com> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Mel Gorman <mgorman@suse.de> 2015-06-30 17:57:05 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2015-06-30 22:44:56 -0400
commit: 7e18adb4f80bea90d30b62158694d97c31f71d37 (patch)
tree: 381b23cf9315c747752e9610c71b38f5a9a58db8 /mm/page_alloc.c
parent: 3a80a7fa7989fbb6aa56bb6ad31811b62cf99e60 (diff)
1 files changed, 119 insertions, 4 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7af45b2e8870..c30f5a0535fd 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -252,6 +252,14 @@ static inline bool __defermem_init early_page_uninitialised(unsigned long pfn)
        return false;
 }
+static inline bool early_page_nid_uninitialised(unsigned long pfn, int nid)
+{
+        if (pfn >= NODE_DATA(nid)->first_deferred_pfn)
+                return true;
+        return false;
+}
 /*
 * Returns false when the remaining initialisation should be deferred until
 * later in the boot cycle when it can be parallelised.
@@ -284,6 +292,11 @@ static inline bool early_page_uninitialised(unsigned long pfn)
        return false;
 }
+static inline bool early_page_nid_uninitialised(unsigned long pfn, int nid)
+{
+        return false;
+}
 static inline bool update_defer_init(pg_data_t *pgdat,
                                unsigned long pfn, unsigned long zone_end,
                                unsigned long *nr_initialised)
@@ -866,20 +879,51 @@ static void __meminit __init_single_pfn(unsigned long pfn, unsigned long zone,
        return __init_single_page(pfn_to_page(pfn), pfn, zone, nid);
 }
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+static void init_reserved_page(unsigned long pfn)
+{
+        pg_data_t *pgdat;
+        int nid, zid;
+        if (!early_page_uninitialised(pfn))
+                return;
+        nid = early_pfn_to_nid(pfn);
+        pgdat = NODE_DATA(nid);
+        for (zid = 0; zid < MAX_NR_ZONES; zid++) {
+                struct zone *zone = &pgdat->node_zones[zid];
+                if (pfn >= zone->zone_start_pfn && pfn < zone_end_pfn(zone))
+                        break;
+        }
+        __init_single_pfn(pfn, zid, nid);
+}
+#else
+static inline void init_reserved_page(unsigned long pfn)
+{
+}
+#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
 /*
 * Initialised pages do not have PageReserved set. This function is
 * called for each range allocated by the bootmem allocator and
 * marks the pages PageReserved. The remaining valid pages are later
 * sent to the buddy page allocator.
 */
-void reserve_bootmem_region(unsigned long start, unsigned long end)
+void __meminit reserve_bootmem_region(unsigned long start, unsigned long end)
 {
        unsigned long start_pfn = PFN_DOWN(start);
        unsigned long end_pfn = PFN_UP(end);
-        for (; start_pfn < end_pfn; start_pfn++)
+        for (; start_pfn < end_pfn; start_pfn++) {
-                if (pfn_valid(start_pfn))
+                if (pfn_valid(start_pfn)) {
-                        SetPageReserved(pfn_to_page(start_pfn));
+                        struct page *page = pfn_to_page(start_pfn);
+                        init_reserved_page(start_pfn);
+                        SetPageReserved(page);
+                }
+        }
 }
 static bool free_pages_prepare(struct page *page, unsigned int order)
@@ -1017,6 +1061,74 @@ void __defer_init __free_pages_bootmem(struct page *page, unsigned long pfn,
        return __free_pages_boot_core(page, pfn, order);
 }
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+/* Initialise remaining memory on a node */
+void __defermem_init deferred_init_memmap(int nid)
+{
+        struct mminit_pfnnid_cache nid_init_state = { };
+        unsigned long start = jiffies;
+        unsigned long nr_pages = 0;
+        unsigned long walk_start, walk_end;
+        int i, zid;
+        struct zone *zone;
+        pg_data_t *pgdat = NODE_DATA(nid);
+        unsigned long first_init_pfn = pgdat->first_deferred_pfn;
+        if (first_init_pfn == ULONG_MAX)
+                return;
+        /* Sanity check boundaries */
+        BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn);
+        BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat));
+        pgdat->first_deferred_pfn = ULONG_MAX;
+        /* Only the highest zone is deferred so find it */
+        for (zid = 0; zid < MAX_NR_ZONES; zid++) {
+                zone = pgdat->node_zones + zid;
+                if (first_init_pfn < zone_end_pfn(zone))
+                        break;
+        }
+        for_each_mem_pfn_range(i, nid, &walk_start, &walk_end, NULL) {
+                unsigned long pfn, end_pfn;
+                end_pfn = min(walk_end, zone_end_pfn(zone));
+                pfn = first_init_pfn;
+                if (pfn < walk_start)
+                        pfn = walk_start;
+                if (pfn < zone->zone_start_pfn)
+                        pfn = zone->zone_start_pfn;
+                for (; pfn < end_pfn; pfn++) {
+                        struct page *page;
+                        if (!pfn_valid(pfn))
+                                continue;
+                        if (!meminit_pfn_in_nid(pfn, nid, &nid_init_state))
+                                continue;
+                        if (page->flags) {
+                                VM_BUG_ON(page_zone(page) != zone);
+                                continue;
+                        }
+                        __init_single_page(page, pfn, zid, nid);
+                        __free_pages_boot_core(page, pfn, 0);
+                        nr_pages++;
+                        cond_resched();
+                }
+                first_init_pfn = max(end_pfn, first_init_pfn);
+        }
+        /* Sanity check that the next zone really is unpopulated */
+        WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
+        pr_info("kswapd %d initialised %lu pages in %ums\n", nid, nr_pages,
+                                        jiffies_to_msecs(jiffies - start));
+}
+#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
 #ifdef CONFIG_CMA
 /* Free whole pageblock and set its migration type to MIGRATE_CMA. */
 void __init init_cma_reserved_pageblock(struct page *page)
@@ -4329,6 +4441,9 @@ static void setup_zone_migrate_reserve(struct zone *zone)
        zone->nr_migrate_reserve_block = reserve;
        for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
+                if (!early_page_nid_uninitialised(pfn, zone_to_nid(zone)))
+                        return;
                if (!pfn_valid(pfn))
                        continue;
                page = pfn_to_page(pfn);
author	Mel Gorman <mgorman@suse.de>	2015-06-30 17:57:05 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2015-06-30 22:44:56 -0400
commit	7e18adb4f80bea90d30b62158694d97c31f71d37 (patch)
tree	381b23cf9315c747752e9610c71b38f5a9a58db8 /mm/page_alloc.c
parent	3a80a7fa7989fbb6aa56bb6ad31811b62cf99e60 (diff)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7af45b2e8870..c30f5a0535fd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c
@@ -252,6 +252,14 @@ static inline bool __defermem_init early_page_uninitialised(unsigned long pfn)
252	return false;	252	return false;
253	}	253	}
254		254
		255	static inline bool early_page_nid_uninitialised(unsigned long pfn, int nid)
		256	{
		257	if (pfn >= NODE_DATA(nid)->first_deferred_pfn)
		258	return true;
		259
		260	return false;
		261	}
		262
255	/*	263	/*
256	* Returns false when the remaining initialisation should be deferred until	264	* Returns false when the remaining initialisation should be deferred until
257	* later in the boot cycle when it can be parallelised.	265	* later in the boot cycle when it can be parallelised.
@@ -284,6 +292,11 @@ static inline bool early_page_uninitialised(unsigned long pfn)
284	return false;	292	return false;
285	}	293	}
286		294
		295	static inline bool early_page_nid_uninitialised(unsigned long pfn, int nid)
		296	{
		297	return false;
		298	}
		299
287	static inline bool update_defer_init(pg_data_t *pgdat,	300	static inline bool update_defer_init(pg_data_t *pgdat,
288	unsigned long pfn, unsigned long zone_end,	301	unsigned long pfn, unsigned long zone_end,
289	unsigned long *nr_initialised)	302	unsigned long *nr_initialised)
@@ -866,20 +879,51 @@ static void __meminit __init_single_pfn(unsigned long pfn, unsigned long zone,
866	return __init_single_page(pfn_to_page(pfn), pfn, zone, nid);	879	return __init_single_page(pfn_to_page(pfn), pfn, zone, nid);
867	}	880	}
868		881
		882	#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
		883	static void init_reserved_page(unsigned long pfn)
		884	{
		885	pg_data_t *pgdat;
		886	int nid, zid;
		887
		888	if (!early_page_uninitialised(pfn))
		889	return;
		890
		891	nid = early_pfn_to_nid(pfn);
		892	pgdat = NODE_DATA(nid);
		893
		894	for (zid = 0; zid < MAX_NR_ZONES; zid++) {
		895	struct zone *zone = &pgdat->node_zones[zid];
		896
		897	if (pfn >= zone->zone_start_pfn && pfn < zone_end_pfn(zone))
		898	break;
		899	}
		900	__init_single_pfn(pfn, zid, nid);
		901	}
		902	#else
		903	static inline void init_reserved_page(unsigned long pfn)
		904	{
		905	}
		906	#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
		907
869	/*	908	/*
870	* Initialised pages do not have PageReserved set. This function is	909	* Initialised pages do not have PageReserved set. This function is
871	* called for each range allocated by the bootmem allocator and	910	* called for each range allocated by the bootmem allocator and
872	* marks the pages PageReserved. The remaining valid pages are later	911	* marks the pages PageReserved. The remaining valid pages are later
873	* sent to the buddy page allocator.	912	* sent to the buddy page allocator.
874	*/	913	*/
875	void reserve_bootmem_region(unsigned long start, unsigned long end)	914	void __meminit reserve_bootmem_region(unsigned long start, unsigned long end)
876	{	915	{
877	unsigned long start_pfn = PFN_DOWN(start);	916	unsigned long start_pfn = PFN_DOWN(start);
878	unsigned long end_pfn = PFN_UP(end);	917	unsigned long end_pfn = PFN_UP(end);
879		918
880	for (; start_pfn < end_pfn; start_pfn++)	919	for (; start_pfn < end_pfn; start_pfn++) {
881	if (pfn_valid(start_pfn))	920	if (pfn_valid(start_pfn)) {
882	SetPageReserved(pfn_to_page(start_pfn));	921	struct page *page = pfn_to_page(start_pfn);
		922
		923	init_reserved_page(start_pfn);
		924	SetPageReserved(page);
		925	}
		926	}
883	}	927	}
884		928
885	static bool free_pages_prepare(struct page *page, unsigned int order)	929	static bool free_pages_prepare(struct page *page, unsigned int order)
@@ -1017,6 +1061,74 @@ void __defer_init __free_pages_bootmem(struct page *page, unsigned long pfn,
1017	return __free_pages_boot_core(page, pfn, order);	1061	return __free_pages_boot_core(page, pfn, order);
1018	}	1062	}
1019		1063
		1064	#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
		1065	/* Initialise remaining memory on a node */
		1066	void __defermem_init deferred_init_memmap(int nid)
		1067	{
		1068	struct mminit_pfnnid_cache nid_init_state = { };
		1069	unsigned long start = jiffies;
		1070	unsigned long nr_pages = 0;
		1071	unsigned long walk_start, walk_end;
		1072	int i, zid;
		1073	struct zone *zone;
		1074	pg_data_t *pgdat = NODE_DATA(nid);
		1075	unsigned long first_init_pfn = pgdat->first_deferred_pfn;
		1076
		1077	if (first_init_pfn == ULONG_MAX)
		1078	return;
		1079
		1080	/* Sanity check boundaries */
		1081	BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn);
		1082	BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat));
		1083	pgdat->first_deferred_pfn = ULONG_MAX;
		1084
		1085	/* Only the highest zone is deferred so find it */
		1086	for (zid = 0; zid < MAX_NR_ZONES; zid++) {
		1087	zone = pgdat->node_zones + zid;
		1088	if (first_init_pfn < zone_end_pfn(zone))
		1089	break;
		1090	}
		1091
		1092	for_each_mem_pfn_range(i, nid, &walk_start, &walk_end, NULL) {
		1093	unsigned long pfn, end_pfn;
		1094
		1095	end_pfn = min(walk_end, zone_end_pfn(zone));
		1096	pfn = first_init_pfn;
		1097	if (pfn < walk_start)
		1098	pfn = walk_start;
		1099	if (pfn < zone->zone_start_pfn)
		1100	pfn = zone->zone_start_pfn;
		1101
		1102	for (; pfn < end_pfn; pfn++) {
		1103	struct page *page;
		1104
		1105	if (!pfn_valid(pfn))
		1106	continue;
		1107
		1108	if (!meminit_pfn_in_nid(pfn, nid, &nid_init_state))
		1109	continue;
		1110
		1111	if (page->flags) {
		1112	VM_BUG_ON(page_zone(page) != zone);
		1113	continue;
		1114	}
		1115
		1116	__init_single_page(page, pfn, zid, nid);
		1117	__free_pages_boot_core(page, pfn, 0);
		1118	nr_pages++;
		1119	cond_resched();
		1120	}
		1121	first_init_pfn = max(end_pfn, first_init_pfn);
		1122	}
		1123
		1124	/* Sanity check that the next zone really is unpopulated */
		1125	WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
		1126
		1127	pr_info("kswapd %d initialised %lu pages in %ums\n", nid, nr_pages,
		1128	jiffies_to_msecs(jiffies - start));
		1129	}
		1130	#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
		1131
1020	#ifdef CONFIG_CMA	1132	#ifdef CONFIG_CMA
1021	/* Free whole pageblock and set its migration type to MIGRATE_CMA. */	1133	/* Free whole pageblock and set its migration type to MIGRATE_CMA. */
1022	void __init init_cma_reserved_pageblock(struct page *page)	1134	void __init init_cma_reserved_pageblock(struct page *page)
@@ -4329,6 +4441,9 @@ static void setup_zone_migrate_reserve(struct zone *zone)
4329	zone->nr_migrate_reserve_block = reserve;	4441	zone->nr_migrate_reserve_block = reserve;
4330		4442
4331	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {	4443	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
		4444	if (!early_page_nid_uninitialised(pfn, zone_to_nid(zone)))
		4445	return;
		4446
4332	if (!pfn_valid(pfn))	4447	if (!pfn_valid(pfn))
4333	continue;	4448	continue;
4334	page = pfn_to_page(pfn);	4449	page = pfn_to_page(pfn);