summaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2015-06-30 17:57:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-30 22:44:56 -0400
commit0e1cc95b4cc7293bb7b39175035e7f7e45c90977 (patch)
tree61b1173bce0ef2738aec4259956c1ecc52280dfa /mm/page_alloc.c
parent74033a798f5a5db368126ee6f690111cf019bf7a (diff)
mm: meminit: finish initialisation of struct pages before basic setup
Waiman Long reported that 24TB machines hit OOM during basic setup when struct page initialisation was deferred. One approach is to initialise memory on demand but it interferes with page allocator paths. This patch creates dedicated threads to initialise memory before basic setup. It then blocks on a rw_semaphore until completion as a wait_queue and counter is overkill. This may be slower to boot but it's simplier overall and also gets rid of a section mangling which existed so kswapd could do the initialisation. [akpm@linux-foundation.org: include rwsem.h, use DECLARE_RWSEM, fix comment, remove unneeded cast] Signed-off-by: Mel Gorman <mgorman@suse.de> Cc: Waiman Long <waiman.long@hp.com Cc: Nathan Zimmer <nzimmer@sgi.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Scott Norton <scott.norton@hp.com> Tested-by: Daniel J Blueman <daniel@numascale.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c46
1 files changed, 37 insertions, 9 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5a38e39b30d1..506eac8b38af 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -18,6 +18,7 @@
18#include <linux/mm.h> 18#include <linux/mm.h>
19#include <linux/swap.h> 19#include <linux/swap.h>
20#include <linux/interrupt.h> 20#include <linux/interrupt.h>
21#include <linux/rwsem.h>
21#include <linux/pagemap.h> 22#include <linux/pagemap.h>
22#include <linux/jiffies.h> 23#include <linux/jiffies.h>
23#include <linux/bootmem.h> 24#include <linux/bootmem.h>
@@ -61,6 +62,7 @@
61#include <linux/hugetlb.h> 62#include <linux/hugetlb.h>
62#include <linux/sched/rt.h> 63#include <linux/sched/rt.h>
63#include <linux/page_owner.h> 64#include <linux/page_owner.h>
65#include <linux/kthread.h>
64 66
65#include <asm/sections.h> 67#include <asm/sections.h>
66#include <asm/tlbflush.h> 68#include <asm/tlbflush.h>
@@ -242,7 +244,7 @@ static inline void reset_deferred_meminit(pg_data_t *pgdat)
242} 244}
243 245
244/* Returns true if the struct page for the pfn is uninitialised */ 246/* Returns true if the struct page for the pfn is uninitialised */
245static inline bool __defermem_init early_page_uninitialised(unsigned long pfn) 247static inline bool __meminit early_page_uninitialised(unsigned long pfn)
246{ 248{
247 int nid = early_pfn_to_nid(pfn); 249 int nid = early_pfn_to_nid(pfn);
248 250
@@ -958,7 +960,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
958 local_irq_restore(flags); 960 local_irq_restore(flags);
959} 961}
960 962
961static void __defer_init __free_pages_boot_core(struct page *page, 963static void __init __free_pages_boot_core(struct page *page,
962 unsigned long pfn, unsigned int order) 964 unsigned long pfn, unsigned int order)
963{ 965{
964 unsigned int nr_pages = 1 << order; 966 unsigned int nr_pages = 1 << order;
@@ -1031,7 +1033,7 @@ static inline bool __meminit meminit_pfn_in_nid(unsigned long pfn, int node,
1031#endif 1033#endif
1032 1034
1033 1035
1034void __defer_init __free_pages_bootmem(struct page *page, unsigned long pfn, 1036void __init __free_pages_bootmem(struct page *page, unsigned long pfn,
1035 unsigned int order) 1037 unsigned int order)
1036{ 1038{
1037 if (early_page_uninitialised(pfn)) 1039 if (early_page_uninitialised(pfn))
@@ -1040,7 +1042,7 @@ void __defer_init __free_pages_bootmem(struct page *page, unsigned long pfn,
1040} 1042}
1041 1043
1042#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 1044#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
1043static void __defermem_init deferred_free_range(struct page *page, 1045static void __init deferred_free_range(struct page *page,
1044 unsigned long pfn, int nr_pages) 1046 unsigned long pfn, int nr_pages)
1045{ 1047{
1046 int i; 1048 int i;
@@ -1060,20 +1062,30 @@ static void __defermem_init deferred_free_range(struct page *page,
1060 __free_pages_boot_core(page, pfn, 0); 1062 __free_pages_boot_core(page, pfn, 0);
1061} 1063}
1062 1064
1065static __initdata DECLARE_RWSEM(pgdat_init_rwsem);
1066
1063/* Initialise remaining memory on a node */ 1067/* Initialise remaining memory on a node */
1064void __defermem_init deferred_init_memmap(int nid) 1068static int __init deferred_init_memmap(void *data)
1065{ 1069{
1070 pg_data_t *pgdat = data;
1071 int nid = pgdat->node_id;
1066 struct mminit_pfnnid_cache nid_init_state = { }; 1072 struct mminit_pfnnid_cache nid_init_state = { };
1067 unsigned long start = jiffies; 1073 unsigned long start = jiffies;
1068 unsigned long nr_pages = 0; 1074 unsigned long nr_pages = 0;
1069 unsigned long walk_start, walk_end; 1075 unsigned long walk_start, walk_end;
1070 int i, zid; 1076 int i, zid;
1071 struct zone *zone; 1077 struct zone *zone;
1072 pg_data_t *pgdat = NODE_DATA(nid);
1073 unsigned long first_init_pfn = pgdat->first_deferred_pfn; 1078 unsigned long first_init_pfn = pgdat->first_deferred_pfn;
1079 const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
1074 1080
1075 if (first_init_pfn == ULONG_MAX) 1081 if (first_init_pfn == ULONG_MAX) {
1076 return; 1082 up_read(&pgdat_init_rwsem);
1083 return 0;
1084 }
1085
1086 /* Bind memory initialisation thread to a local node if possible */
1087 if (!cpumask_empty(cpumask))
1088 set_cpus_allowed_ptr(current, cpumask);
1077 1089
1078 /* Sanity check boundaries */ 1090 /* Sanity check boundaries */
1079 BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn); 1091 BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn);
@@ -1165,8 +1177,24 @@ free_range:
1165 /* Sanity check that the next zone really is unpopulated */ 1177 /* Sanity check that the next zone really is unpopulated */
1166 WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone)); 1178 WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
1167 1179
1168 pr_info("kswapd %d initialised %lu pages in %ums\n", nid, nr_pages, 1180 pr_info("node %d initialised, %lu pages in %ums\n", nid, nr_pages,
1169 jiffies_to_msecs(jiffies - start)); 1181 jiffies_to_msecs(jiffies - start));
1182 up_read(&pgdat_init_rwsem);
1183 return 0;
1184}
1185
1186void __init page_alloc_init_late(void)
1187{
1188 int nid;
1189
1190 for_each_node_state(nid, N_MEMORY) {
1191 down_read(&pgdat_init_rwsem);
1192 kthread_run(deferred_init_memmap, NODE_DATA(nid), "pgdatinit%d", nid);
1193 }
1194
1195 /* Block until all are initialised */
1196 down_write(&pgdat_init_rwsem);
1197 up_write(&pgdat_init_rwsem);
1170} 1198}
1171#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ 1199#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
1172 1200