diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 76 |
1 files changed, 52 insertions, 24 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 506eac8b38af..5b5240b7f642 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -18,7 +18,6 @@ | |||
18 | #include <linux/mm.h> | 18 | #include <linux/mm.h> |
19 | #include <linux/swap.h> | 19 | #include <linux/swap.h> |
20 | #include <linux/interrupt.h> | 20 | #include <linux/interrupt.h> |
21 | #include <linux/rwsem.h> | ||
22 | #include <linux/pagemap.h> | 21 | #include <linux/pagemap.h> |
23 | #include <linux/jiffies.h> | 22 | #include <linux/jiffies.h> |
24 | #include <linux/bootmem.h> | 23 | #include <linux/bootmem.h> |
@@ -246,9 +245,7 @@ static inline void reset_deferred_meminit(pg_data_t *pgdat) | |||
246 | /* Returns true if the struct page for the pfn is uninitialised */ | 245 | /* Returns true if the struct page for the pfn is uninitialised */ |
247 | static inline bool __meminit early_page_uninitialised(unsigned long pfn) | 246 | static inline bool __meminit early_page_uninitialised(unsigned long pfn) |
248 | { | 247 | { |
249 | int nid = early_pfn_to_nid(pfn); | 248 | if (pfn >= NODE_DATA(early_pfn_to_nid(pfn))->first_deferred_pfn) |
250 | |||
251 | if (pfn >= NODE_DATA(nid)->first_deferred_pfn) | ||
252 | return true; | 249 | return true; |
253 | 250 | ||
254 | return false; | 251 | return false; |
@@ -983,21 +980,21 @@ static void __init __free_pages_boot_core(struct page *page, | |||
983 | 980 | ||
984 | #if defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) || \ | 981 | #if defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) || \ |
985 | defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) | 982 | defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) |
986 | /* Only safe to use early in boot when initialisation is single-threaded */ | 983 | |
987 | static struct mminit_pfnnid_cache early_pfnnid_cache __meminitdata; | 984 | static struct mminit_pfnnid_cache early_pfnnid_cache __meminitdata; |
988 | 985 | ||
989 | int __meminit early_pfn_to_nid(unsigned long pfn) | 986 | int __meminit early_pfn_to_nid(unsigned long pfn) |
990 | { | 987 | { |
988 | static DEFINE_SPINLOCK(early_pfn_lock); | ||
991 | int nid; | 989 | int nid; |
992 | 990 | ||
993 | /* The system will behave unpredictably otherwise */ | 991 | spin_lock(&early_pfn_lock); |
994 | BUG_ON(system_state != SYSTEM_BOOTING); | ||
995 | |||
996 | nid = __early_pfn_to_nid(pfn, &early_pfnnid_cache); | 992 | nid = __early_pfn_to_nid(pfn, &early_pfnnid_cache); |
997 | if (nid >= 0) | 993 | if (nid < 0) |
998 | return nid; | 994 | nid = 0; |
999 | /* just returns 0 */ | 995 | spin_unlock(&early_pfn_lock); |
1000 | return 0; | 996 | |
997 | return nid; | ||
1001 | } | 998 | } |
1002 | #endif | 999 | #endif |
1003 | 1000 | ||
@@ -1062,7 +1059,15 @@ static void __init deferred_free_range(struct page *page, | |||
1062 | __free_pages_boot_core(page, pfn, 0); | 1059 | __free_pages_boot_core(page, pfn, 0); |
1063 | } | 1060 | } |
1064 | 1061 | ||
1065 | static __initdata DECLARE_RWSEM(pgdat_init_rwsem); | 1062 | /* Completion tracking for deferred_init_memmap() threads */ |
1063 | static atomic_t pgdat_init_n_undone __initdata; | ||
1064 | static __initdata DECLARE_COMPLETION(pgdat_init_all_done_comp); | ||
1065 | |||
1066 | static inline void __init pgdat_init_report_one_done(void) | ||
1067 | { | ||
1068 | if (atomic_dec_and_test(&pgdat_init_n_undone)) | ||
1069 | complete(&pgdat_init_all_done_comp); | ||
1070 | } | ||
1066 | 1071 | ||
1067 | /* Initialise remaining memory on a node */ | 1072 | /* Initialise remaining memory on a node */ |
1068 | static int __init deferred_init_memmap(void *data) | 1073 | static int __init deferred_init_memmap(void *data) |
@@ -1079,7 +1084,7 @@ static int __init deferred_init_memmap(void *data) | |||
1079 | const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); | 1084 | const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); |
1080 | 1085 | ||
1081 | if (first_init_pfn == ULONG_MAX) { | 1086 | if (first_init_pfn == ULONG_MAX) { |
1082 | up_read(&pgdat_init_rwsem); | 1087 | pgdat_init_report_one_done(); |
1083 | return 0; | 1088 | return 0; |
1084 | } | 1089 | } |
1085 | 1090 | ||
@@ -1179,7 +1184,8 @@ free_range: | |||
1179 | 1184 | ||
1180 | pr_info("node %d initialised, %lu pages in %ums\n", nid, nr_pages, | 1185 | pr_info("node %d initialised, %lu pages in %ums\n", nid, nr_pages, |
1181 | jiffies_to_msecs(jiffies - start)); | 1186 | jiffies_to_msecs(jiffies - start)); |
1182 | up_read(&pgdat_init_rwsem); | 1187 | |
1188 | pgdat_init_report_one_done(); | ||
1183 | return 0; | 1189 | return 0; |
1184 | } | 1190 | } |
1185 | 1191 | ||
@@ -1187,14 +1193,17 @@ void __init page_alloc_init_late(void) | |||
1187 | { | 1193 | { |
1188 | int nid; | 1194 | int nid; |
1189 | 1195 | ||
1196 | /* There will be num_node_state(N_MEMORY) threads */ | ||
1197 | atomic_set(&pgdat_init_n_undone, num_node_state(N_MEMORY)); | ||
1190 | for_each_node_state(nid, N_MEMORY) { | 1198 | for_each_node_state(nid, N_MEMORY) { |
1191 | down_read(&pgdat_init_rwsem); | ||
1192 | kthread_run(deferred_init_memmap, NODE_DATA(nid), "pgdatinit%d", nid); | 1199 | kthread_run(deferred_init_memmap, NODE_DATA(nid), "pgdatinit%d", nid); |
1193 | } | 1200 | } |
1194 | 1201 | ||
1195 | /* Block until all are initialised */ | 1202 | /* Block until all are initialised */ |
1196 | down_write(&pgdat_init_rwsem); | 1203 | wait_for_completion(&pgdat_init_all_done_comp); |
1197 | up_write(&pgdat_init_rwsem); | 1204 | |
1205 | /* Reinit limits that are based on free pages after the kernel is up */ | ||
1206 | files_maxfiles_init(); | ||
1198 | } | 1207 | } |
1199 | #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ | 1208 | #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ |
1200 | 1209 | ||
@@ -1287,6 +1296,10 @@ static inline int check_new_page(struct page *page) | |||
1287 | bad_reason = "non-NULL mapping"; | 1296 | bad_reason = "non-NULL mapping"; |
1288 | if (unlikely(atomic_read(&page->_count) != 0)) | 1297 | if (unlikely(atomic_read(&page->_count) != 0)) |
1289 | bad_reason = "nonzero _count"; | 1298 | bad_reason = "nonzero _count"; |
1299 | if (unlikely(page->flags & __PG_HWPOISON)) { | ||
1300 | bad_reason = "HWPoisoned (hardware-corrupted)"; | ||
1301 | bad_flags = __PG_HWPOISON; | ||
1302 | } | ||
1290 | if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_PREP)) { | 1303 | if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_PREP)) { |
1291 | bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set"; | 1304 | bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set"; |
1292 | bad_flags = PAGE_FLAGS_CHECK_AT_PREP; | 1305 | bad_flags = PAGE_FLAGS_CHECK_AT_PREP; |
@@ -1330,12 +1343,15 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, | |||
1330 | set_page_owner(page, order, gfp_flags); | 1343 | set_page_owner(page, order, gfp_flags); |
1331 | 1344 | ||
1332 | /* | 1345 | /* |
1333 | * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was necessary to | 1346 | * page is set pfmemalloc when ALLOC_NO_WATERMARKS was necessary to |
1334 | * allocate the page. The expectation is that the caller is taking | 1347 | * allocate the page. The expectation is that the caller is taking |
1335 | * steps that will free more memory. The caller should avoid the page | 1348 | * steps that will free more memory. The caller should avoid the page |
1336 | * being used for !PFMEMALLOC purposes. | 1349 | * being used for !PFMEMALLOC purposes. |
1337 | */ | 1350 | */ |
1338 | page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS); | 1351 | if (alloc_flags & ALLOC_NO_WATERMARKS) |
1352 | set_page_pfmemalloc(page); | ||
1353 | else | ||
1354 | clear_page_pfmemalloc(page); | ||
1339 | 1355 | ||
1340 | return 0; | 1356 | return 0; |
1341 | } | 1357 | } |
@@ -1950,6 +1966,7 @@ void free_hot_cold_page_list(struct list_head *list, bool cold) | |||
1950 | void split_page(struct page *page, unsigned int order) | 1966 | void split_page(struct page *page, unsigned int order) |
1951 | { | 1967 | { |
1952 | int i; | 1968 | int i; |
1969 | gfp_t gfp_mask; | ||
1953 | 1970 | ||
1954 | VM_BUG_ON_PAGE(PageCompound(page), page); | 1971 | VM_BUG_ON_PAGE(PageCompound(page), page); |
1955 | VM_BUG_ON_PAGE(!page_count(page), page); | 1972 | VM_BUG_ON_PAGE(!page_count(page), page); |
@@ -1963,10 +1980,11 @@ void split_page(struct page *page, unsigned int order) | |||
1963 | split_page(virt_to_page(page[0].shadow), order); | 1980 | split_page(virt_to_page(page[0].shadow), order); |
1964 | #endif | 1981 | #endif |
1965 | 1982 | ||
1966 | set_page_owner(page, 0, 0); | 1983 | gfp_mask = get_page_owner_gfp(page); |
1984 | set_page_owner(page, 0, gfp_mask); | ||
1967 | for (i = 1; i < (1 << order); i++) { | 1985 | for (i = 1; i < (1 << order); i++) { |
1968 | set_page_refcounted(page + i); | 1986 | set_page_refcounted(page + i); |
1969 | set_page_owner(page + i, 0, 0); | 1987 | set_page_owner(page + i, 0, gfp_mask); |
1970 | } | 1988 | } |
1971 | } | 1989 | } |
1972 | EXPORT_SYMBOL_GPL(split_page); | 1990 | EXPORT_SYMBOL_GPL(split_page); |
@@ -1996,6 +2014,8 @@ int __isolate_free_page(struct page *page, unsigned int order) | |||
1996 | zone->free_area[order].nr_free--; | 2014 | zone->free_area[order].nr_free--; |
1997 | rmv_page_order(page); | 2015 | rmv_page_order(page); |
1998 | 2016 | ||
2017 | set_page_owner(page, order, __GFP_MOVABLE); | ||
2018 | |||
1999 | /* Set the pageblock if the isolated page is at least a pageblock */ | 2019 | /* Set the pageblock if the isolated page is at least a pageblock */ |
2000 | if (order >= pageblock_order - 1) { | 2020 | if (order >= pageblock_order - 1) { |
2001 | struct page *endpage = page + (1 << order) - 1; | 2021 | struct page *endpage = page + (1 << order) - 1; |
@@ -2007,7 +2027,7 @@ int __isolate_free_page(struct page *page, unsigned int order) | |||
2007 | } | 2027 | } |
2008 | } | 2028 | } |
2009 | 2029 | ||
2010 | set_page_owner(page, order, 0); | 2030 | |
2011 | return 1UL << order; | 2031 | return 1UL << order; |
2012 | } | 2032 | } |
2013 | 2033 | ||
@@ -3328,7 +3348,7 @@ refill: | |||
3328 | atomic_add(size - 1, &page->_count); | 3348 | atomic_add(size - 1, &page->_count); |
3329 | 3349 | ||
3330 | /* reset page count bias and offset to start of new frag */ | 3350 | /* reset page count bias and offset to start of new frag */ |
3331 | nc->pfmemalloc = page->pfmemalloc; | 3351 | nc->pfmemalloc = page_is_pfmemalloc(page); |
3332 | nc->pagecnt_bias = size; | 3352 | nc->pagecnt_bias = size; |
3333 | nc->offset = size; | 3353 | nc->offset = size; |
3334 | } | 3354 | } |
@@ -5043,6 +5063,10 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid, | |||
5043 | { | 5063 | { |
5044 | unsigned long zone_start_pfn, zone_end_pfn; | 5064 | unsigned long zone_start_pfn, zone_end_pfn; |
5045 | 5065 | ||
5066 | /* When hotadd a new node, the node should be empty */ | ||
5067 | if (!node_start_pfn && !node_end_pfn) | ||
5068 | return 0; | ||
5069 | |||
5046 | /* Get the start and end of the zone */ | 5070 | /* Get the start and end of the zone */ |
5047 | zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type]; | 5071 | zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type]; |
5048 | zone_end_pfn = arch_zone_highest_possible_pfn[zone_type]; | 5072 | zone_end_pfn = arch_zone_highest_possible_pfn[zone_type]; |
@@ -5106,6 +5130,10 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid, | |||
5106 | unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type]; | 5130 | unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type]; |
5107 | unsigned long zone_start_pfn, zone_end_pfn; | 5131 | unsigned long zone_start_pfn, zone_end_pfn; |
5108 | 5132 | ||
5133 | /* When hotadd a new node, the node should be empty */ | ||
5134 | if (!node_start_pfn && !node_end_pfn) | ||
5135 | return 0; | ||
5136 | |||
5109 | zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high); | 5137 | zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high); |
5110 | zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high); | 5138 | zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high); |
5111 | 5139 | ||