aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c76
1 files changed, 52 insertions, 24 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 506eac8b38af..5b5240b7f642 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -18,7 +18,6 @@
18#include <linux/mm.h> 18#include <linux/mm.h>
19#include <linux/swap.h> 19#include <linux/swap.h>
20#include <linux/interrupt.h> 20#include <linux/interrupt.h>
21#include <linux/rwsem.h>
22#include <linux/pagemap.h> 21#include <linux/pagemap.h>
23#include <linux/jiffies.h> 22#include <linux/jiffies.h>
24#include <linux/bootmem.h> 23#include <linux/bootmem.h>
@@ -246,9 +245,7 @@ static inline void reset_deferred_meminit(pg_data_t *pgdat)
246/* Returns true if the struct page for the pfn is uninitialised */ 245/* Returns true if the struct page for the pfn is uninitialised */
247static inline bool __meminit early_page_uninitialised(unsigned long pfn) 246static inline bool __meminit early_page_uninitialised(unsigned long pfn)
248{ 247{
249 int nid = early_pfn_to_nid(pfn); 248 if (pfn >= NODE_DATA(early_pfn_to_nid(pfn))->first_deferred_pfn)
250
251 if (pfn >= NODE_DATA(nid)->first_deferred_pfn)
252 return true; 249 return true;
253 250
254 return false; 251 return false;
@@ -983,21 +980,21 @@ static void __init __free_pages_boot_core(struct page *page,
983 980
984#if defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) || \ 981#if defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) || \
985 defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) 982 defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
986/* Only safe to use early in boot when initialisation is single-threaded */ 983
987static struct mminit_pfnnid_cache early_pfnnid_cache __meminitdata; 984static struct mminit_pfnnid_cache early_pfnnid_cache __meminitdata;
988 985
989int __meminit early_pfn_to_nid(unsigned long pfn) 986int __meminit early_pfn_to_nid(unsigned long pfn)
990{ 987{
988 static DEFINE_SPINLOCK(early_pfn_lock);
991 int nid; 989 int nid;
992 990
993 /* The system will behave unpredictably otherwise */ 991 spin_lock(&early_pfn_lock);
994 BUG_ON(system_state != SYSTEM_BOOTING);
995
996 nid = __early_pfn_to_nid(pfn, &early_pfnnid_cache); 992 nid = __early_pfn_to_nid(pfn, &early_pfnnid_cache);
997 if (nid >= 0) 993 if (nid < 0)
998 return nid; 994 nid = 0;
999 /* just returns 0 */ 995 spin_unlock(&early_pfn_lock);
1000 return 0; 996
997 return nid;
1001} 998}
1002#endif 999#endif
1003 1000
@@ -1062,7 +1059,15 @@ static void __init deferred_free_range(struct page *page,
1062 __free_pages_boot_core(page, pfn, 0); 1059 __free_pages_boot_core(page, pfn, 0);
1063} 1060}
1064 1061
1065static __initdata DECLARE_RWSEM(pgdat_init_rwsem); 1062/* Completion tracking for deferred_init_memmap() threads */
1063static atomic_t pgdat_init_n_undone __initdata;
1064static __initdata DECLARE_COMPLETION(pgdat_init_all_done_comp);
1065
1066static inline void __init pgdat_init_report_one_done(void)
1067{
1068 if (atomic_dec_and_test(&pgdat_init_n_undone))
1069 complete(&pgdat_init_all_done_comp);
1070}
1066 1071
1067/* Initialise remaining memory on a node */ 1072/* Initialise remaining memory on a node */
1068static int __init deferred_init_memmap(void *data) 1073static int __init deferred_init_memmap(void *data)
@@ -1079,7 +1084,7 @@ static int __init deferred_init_memmap(void *data)
1079 const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); 1084 const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
1080 1085
1081 if (first_init_pfn == ULONG_MAX) { 1086 if (first_init_pfn == ULONG_MAX) {
1082 up_read(&pgdat_init_rwsem); 1087 pgdat_init_report_one_done();
1083 return 0; 1088 return 0;
1084 } 1089 }
1085 1090
@@ -1179,7 +1184,8 @@ free_range:
1179 1184
1180 pr_info("node %d initialised, %lu pages in %ums\n", nid, nr_pages, 1185 pr_info("node %d initialised, %lu pages in %ums\n", nid, nr_pages,
1181 jiffies_to_msecs(jiffies - start)); 1186 jiffies_to_msecs(jiffies - start));
1182 up_read(&pgdat_init_rwsem); 1187
1188 pgdat_init_report_one_done();
1183 return 0; 1189 return 0;
1184} 1190}
1185 1191
@@ -1187,14 +1193,17 @@ void __init page_alloc_init_late(void)
1187{ 1193{
1188 int nid; 1194 int nid;
1189 1195
1196 /* There will be num_node_state(N_MEMORY) threads */
1197 atomic_set(&pgdat_init_n_undone, num_node_state(N_MEMORY));
1190 for_each_node_state(nid, N_MEMORY) { 1198 for_each_node_state(nid, N_MEMORY) {
1191 down_read(&pgdat_init_rwsem);
1192 kthread_run(deferred_init_memmap, NODE_DATA(nid), "pgdatinit%d", nid); 1199 kthread_run(deferred_init_memmap, NODE_DATA(nid), "pgdatinit%d", nid);
1193 } 1200 }
1194 1201
1195 /* Block until all are initialised */ 1202 /* Block until all are initialised */
1196 down_write(&pgdat_init_rwsem); 1203 wait_for_completion(&pgdat_init_all_done_comp);
1197 up_write(&pgdat_init_rwsem); 1204
1205 /* Reinit limits that are based on free pages after the kernel is up */
1206 files_maxfiles_init();
1198} 1207}
1199#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ 1208#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
1200 1209
@@ -1287,6 +1296,10 @@ static inline int check_new_page(struct page *page)
1287 bad_reason = "non-NULL mapping"; 1296 bad_reason = "non-NULL mapping";
1288 if (unlikely(atomic_read(&page->_count) != 0)) 1297 if (unlikely(atomic_read(&page->_count) != 0))
1289 bad_reason = "nonzero _count"; 1298 bad_reason = "nonzero _count";
1299 if (unlikely(page->flags & __PG_HWPOISON)) {
1300 bad_reason = "HWPoisoned (hardware-corrupted)";
1301 bad_flags = __PG_HWPOISON;
1302 }
1290 if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_PREP)) { 1303 if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_PREP)) {
1291 bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set"; 1304 bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set";
1292 bad_flags = PAGE_FLAGS_CHECK_AT_PREP; 1305 bad_flags = PAGE_FLAGS_CHECK_AT_PREP;
@@ -1330,12 +1343,15 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
1330 set_page_owner(page, order, gfp_flags); 1343 set_page_owner(page, order, gfp_flags);
1331 1344
1332 /* 1345 /*
1333 * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was necessary to 1346 * page is set pfmemalloc when ALLOC_NO_WATERMARKS was necessary to
1334 * allocate the page. The expectation is that the caller is taking 1347 * allocate the page. The expectation is that the caller is taking
1335 * steps that will free more memory. The caller should avoid the page 1348 * steps that will free more memory. The caller should avoid the page
1336 * being used for !PFMEMALLOC purposes. 1349 * being used for !PFMEMALLOC purposes.
1337 */ 1350 */
1338 page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS); 1351 if (alloc_flags & ALLOC_NO_WATERMARKS)
1352 set_page_pfmemalloc(page);
1353 else
1354 clear_page_pfmemalloc(page);
1339 1355
1340 return 0; 1356 return 0;
1341} 1357}
@@ -1950,6 +1966,7 @@ void free_hot_cold_page_list(struct list_head *list, bool cold)
1950void split_page(struct page *page, unsigned int order) 1966void split_page(struct page *page, unsigned int order)
1951{ 1967{
1952 int i; 1968 int i;
1969 gfp_t gfp_mask;
1953 1970
1954 VM_BUG_ON_PAGE(PageCompound(page), page); 1971 VM_BUG_ON_PAGE(PageCompound(page), page);
1955 VM_BUG_ON_PAGE(!page_count(page), page); 1972 VM_BUG_ON_PAGE(!page_count(page), page);
@@ -1963,10 +1980,11 @@ void split_page(struct page *page, unsigned int order)
1963 split_page(virt_to_page(page[0].shadow), order); 1980 split_page(virt_to_page(page[0].shadow), order);
1964#endif 1981#endif
1965 1982
1966 set_page_owner(page, 0, 0); 1983 gfp_mask = get_page_owner_gfp(page);
1984 set_page_owner(page, 0, gfp_mask);
1967 for (i = 1; i < (1 << order); i++) { 1985 for (i = 1; i < (1 << order); i++) {
1968 set_page_refcounted(page + i); 1986 set_page_refcounted(page + i);
1969 set_page_owner(page + i, 0, 0); 1987 set_page_owner(page + i, 0, gfp_mask);
1970 } 1988 }
1971} 1989}
1972EXPORT_SYMBOL_GPL(split_page); 1990EXPORT_SYMBOL_GPL(split_page);
@@ -1996,6 +2014,8 @@ int __isolate_free_page(struct page *page, unsigned int order)
1996 zone->free_area[order].nr_free--; 2014 zone->free_area[order].nr_free--;
1997 rmv_page_order(page); 2015 rmv_page_order(page);
1998 2016
2017 set_page_owner(page, order, __GFP_MOVABLE);
2018
1999 /* Set the pageblock if the isolated page is at least a pageblock */ 2019 /* Set the pageblock if the isolated page is at least a pageblock */
2000 if (order >= pageblock_order - 1) { 2020 if (order >= pageblock_order - 1) {
2001 struct page *endpage = page + (1 << order) - 1; 2021 struct page *endpage = page + (1 << order) - 1;
@@ -2007,7 +2027,7 @@ int __isolate_free_page(struct page *page, unsigned int order)
2007 } 2027 }
2008 } 2028 }
2009 2029
2010 set_page_owner(page, order, 0); 2030
2011 return 1UL << order; 2031 return 1UL << order;
2012} 2032}
2013 2033
@@ -3328,7 +3348,7 @@ refill:
3328 atomic_add(size - 1, &page->_count); 3348 atomic_add(size - 1, &page->_count);
3329 3349
3330 /* reset page count bias and offset to start of new frag */ 3350 /* reset page count bias and offset to start of new frag */
3331 nc->pfmemalloc = page->pfmemalloc; 3351 nc->pfmemalloc = page_is_pfmemalloc(page);
3332 nc->pagecnt_bias = size; 3352 nc->pagecnt_bias = size;
3333 nc->offset = size; 3353 nc->offset = size;
3334 } 3354 }
@@ -5043,6 +5063,10 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid,
5043{ 5063{
5044 unsigned long zone_start_pfn, zone_end_pfn; 5064 unsigned long zone_start_pfn, zone_end_pfn;
5045 5065
5066 /* When hotadd a new node, the node should be empty */
5067 if (!node_start_pfn && !node_end_pfn)
5068 return 0;
5069
5046 /* Get the start and end of the zone */ 5070 /* Get the start and end of the zone */
5047 zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type]; 5071 zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
5048 zone_end_pfn = arch_zone_highest_possible_pfn[zone_type]; 5072 zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
@@ -5106,6 +5130,10 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
5106 unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type]; 5130 unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
5107 unsigned long zone_start_pfn, zone_end_pfn; 5131 unsigned long zone_start_pfn, zone_end_pfn;
5108 5132
5133 /* When hotadd a new node, the node should be empty */
5134 if (!node_start_pfn && !node_end_pfn)
5135 return 0;
5136
5109 zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high); 5137 zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high);
5110 zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high); 5138 zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high);
5111 5139