diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 221 |
1 files changed, 160 insertions, 61 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5248fe070aa4..3bac76ae4b30 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -205,7 +205,7 @@ static char * const zone_names[MAX_NR_ZONES] = { | |||
205 | }; | 205 | }; |
206 | 206 | ||
207 | int min_free_kbytes = 1024; | 207 | int min_free_kbytes = 1024; |
208 | int user_min_free_kbytes; | 208 | int user_min_free_kbytes = -1; |
209 | 209 | ||
210 | static unsigned long __meminitdata nr_kernel_pages; | 210 | static unsigned long __meminitdata nr_kernel_pages; |
211 | static unsigned long __meminitdata nr_all_pages; | 211 | static unsigned long __meminitdata nr_all_pages; |
@@ -295,7 +295,7 @@ static inline int bad_range(struct zone *zone, struct page *page) | |||
295 | } | 295 | } |
296 | #endif | 296 | #endif |
297 | 297 | ||
298 | static void bad_page(struct page *page) | 298 | static void bad_page(struct page *page, char *reason, unsigned long bad_flags) |
299 | { | 299 | { |
300 | static unsigned long resume; | 300 | static unsigned long resume; |
301 | static unsigned long nr_shown; | 301 | static unsigned long nr_shown; |
@@ -329,7 +329,7 @@ static void bad_page(struct page *page) | |||
329 | 329 | ||
330 | printk(KERN_ALERT "BUG: Bad page state in process %s pfn:%05lx\n", | 330 | printk(KERN_ALERT "BUG: Bad page state in process %s pfn:%05lx\n", |
331 | current->comm, page_to_pfn(page)); | 331 | current->comm, page_to_pfn(page)); |
332 | dump_page(page); | 332 | dump_page_badflags(page, reason, bad_flags); |
333 | 333 | ||
334 | print_modules(); | 334 | print_modules(); |
335 | dump_stack(); | 335 | dump_stack(); |
@@ -369,9 +369,11 @@ void prep_compound_page(struct page *page, unsigned long order) | |||
369 | __SetPageHead(page); | 369 | __SetPageHead(page); |
370 | for (i = 1; i < nr_pages; i++) { | 370 | for (i = 1; i < nr_pages; i++) { |
371 | struct page *p = page + i; | 371 | struct page *p = page + i; |
372 | __SetPageTail(p); | ||
373 | set_page_count(p, 0); | 372 | set_page_count(p, 0); |
374 | p->first_page = page; | 373 | p->first_page = page; |
374 | /* Make sure p->first_page is always valid for PageTail() */ | ||
375 | smp_wmb(); | ||
376 | __SetPageTail(p); | ||
375 | } | 377 | } |
376 | } | 378 | } |
377 | 379 | ||
@@ -383,7 +385,7 @@ static int destroy_compound_page(struct page *page, unsigned long order) | |||
383 | int bad = 0; | 385 | int bad = 0; |
384 | 386 | ||
385 | if (unlikely(compound_order(page) != order)) { | 387 | if (unlikely(compound_order(page) != order)) { |
386 | bad_page(page); | 388 | bad_page(page, "wrong compound order", 0); |
387 | bad++; | 389 | bad++; |
388 | } | 390 | } |
389 | 391 | ||
@@ -392,8 +394,11 @@ static int destroy_compound_page(struct page *page, unsigned long order) | |||
392 | for (i = 1; i < nr_pages; i++) { | 394 | for (i = 1; i < nr_pages; i++) { |
393 | struct page *p = page + i; | 395 | struct page *p = page + i; |
394 | 396 | ||
395 | if (unlikely(!PageTail(p) || (p->first_page != page))) { | 397 | if (unlikely(!PageTail(p))) { |
396 | bad_page(page); | 398 | bad_page(page, "PageTail not set", 0); |
399 | bad++; | ||
400 | } else if (unlikely(p->first_page != page)) { | ||
401 | bad_page(page, "first_page not consistent", 0); | ||
397 | bad++; | 402 | bad++; |
398 | } | 403 | } |
399 | __ClearPageTail(p); | 404 | __ClearPageTail(p); |
@@ -506,12 +511,12 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, | |||
506 | return 0; | 511 | return 0; |
507 | 512 | ||
508 | if (page_is_guard(buddy) && page_order(buddy) == order) { | 513 | if (page_is_guard(buddy) && page_order(buddy) == order) { |
509 | VM_BUG_ON(page_count(buddy) != 0); | 514 | VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy); |
510 | return 1; | 515 | return 1; |
511 | } | 516 | } |
512 | 517 | ||
513 | if (PageBuddy(buddy) && page_order(buddy) == order) { | 518 | if (PageBuddy(buddy) && page_order(buddy) == order) { |
514 | VM_BUG_ON(page_count(buddy) != 0); | 519 | VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy); |
515 | return 1; | 520 | return 1; |
516 | } | 521 | } |
517 | return 0; | 522 | return 0; |
@@ -561,8 +566,8 @@ static inline void __free_one_page(struct page *page, | |||
561 | 566 | ||
562 | page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); | 567 | page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); |
563 | 568 | ||
564 | VM_BUG_ON(page_idx & ((1 << order) - 1)); | 569 | VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page); |
565 | VM_BUG_ON(bad_range(zone, page)); | 570 | VM_BUG_ON_PAGE(bad_range(zone, page), page); |
566 | 571 | ||
567 | while (order < MAX_ORDER-1) { | 572 | while (order < MAX_ORDER-1) { |
568 | buddy_idx = __find_buddy_index(page_idx, order); | 573 | buddy_idx = __find_buddy_index(page_idx, order); |
@@ -618,12 +623,23 @@ out: | |||
618 | 623 | ||
619 | static inline int free_pages_check(struct page *page) | 624 | static inline int free_pages_check(struct page *page) |
620 | { | 625 | { |
621 | if (unlikely(page_mapcount(page) | | 626 | char *bad_reason = NULL; |
622 | (page->mapping != NULL) | | 627 | unsigned long bad_flags = 0; |
623 | (atomic_read(&page->_count) != 0) | | 628 | |
624 | (page->flags & PAGE_FLAGS_CHECK_AT_FREE) | | 629 | if (unlikely(page_mapcount(page))) |
625 | (mem_cgroup_bad_page_check(page)))) { | 630 | bad_reason = "nonzero mapcount"; |
626 | bad_page(page); | 631 | if (unlikely(page->mapping != NULL)) |
632 | bad_reason = "non-NULL mapping"; | ||
633 | if (unlikely(atomic_read(&page->_count) != 0)) | ||
634 | bad_reason = "nonzero _count"; | ||
635 | if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_FREE)) { | ||
636 | bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set"; | ||
637 | bad_flags = PAGE_FLAGS_CHECK_AT_FREE; | ||
638 | } | ||
639 | if (unlikely(mem_cgroup_bad_page_check(page))) | ||
640 | bad_reason = "cgroup check failed"; | ||
641 | if (unlikely(bad_reason)) { | ||
642 | bad_page(page, bad_reason, bad_flags); | ||
627 | return 1; | 643 | return 1; |
628 | } | 644 | } |
629 | page_cpupid_reset_last(page); | 645 | page_cpupid_reset_last(page); |
@@ -813,7 +829,7 @@ static inline void expand(struct zone *zone, struct page *page, | |||
813 | area--; | 829 | area--; |
814 | high--; | 830 | high--; |
815 | size >>= 1; | 831 | size >>= 1; |
816 | VM_BUG_ON(bad_range(zone, &page[size])); | 832 | VM_BUG_ON_PAGE(bad_range(zone, &page[size]), &page[size]); |
817 | 833 | ||
818 | #ifdef CONFIG_DEBUG_PAGEALLOC | 834 | #ifdef CONFIG_DEBUG_PAGEALLOC |
819 | if (high < debug_guardpage_minorder()) { | 835 | if (high < debug_guardpage_minorder()) { |
@@ -843,12 +859,23 @@ static inline void expand(struct zone *zone, struct page *page, | |||
843 | */ | 859 | */ |
844 | static inline int check_new_page(struct page *page) | 860 | static inline int check_new_page(struct page *page) |
845 | { | 861 | { |
846 | if (unlikely(page_mapcount(page) | | 862 | char *bad_reason = NULL; |
847 | (page->mapping != NULL) | | 863 | unsigned long bad_flags = 0; |
848 | (atomic_read(&page->_count) != 0) | | 864 | |
849 | (page->flags & PAGE_FLAGS_CHECK_AT_PREP) | | 865 | if (unlikely(page_mapcount(page))) |
850 | (mem_cgroup_bad_page_check(page)))) { | 866 | bad_reason = "nonzero mapcount"; |
851 | bad_page(page); | 867 | if (unlikely(page->mapping != NULL)) |
868 | bad_reason = "non-NULL mapping"; | ||
869 | if (unlikely(atomic_read(&page->_count) != 0)) | ||
870 | bad_reason = "nonzero _count"; | ||
871 | if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_PREP)) { | ||
872 | bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set"; | ||
873 | bad_flags = PAGE_FLAGS_CHECK_AT_PREP; | ||
874 | } | ||
875 | if (unlikely(mem_cgroup_bad_page_check(page))) | ||
876 | bad_reason = "cgroup check failed"; | ||
877 | if (unlikely(bad_reason)) { | ||
878 | bad_page(page, bad_reason, bad_flags); | ||
852 | return 1; | 879 | return 1; |
853 | } | 880 | } |
854 | return 0; | 881 | return 0; |
@@ -955,7 +982,7 @@ int move_freepages(struct zone *zone, | |||
955 | 982 | ||
956 | for (page = start_page; page <= end_page;) { | 983 | for (page = start_page; page <= end_page;) { |
957 | /* Make sure we are not inadvertently changing nodes */ | 984 | /* Make sure we are not inadvertently changing nodes */ |
958 | VM_BUG_ON(page_to_nid(page) != zone_to_nid(zone)); | 985 | VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page); |
959 | 986 | ||
960 | if (!pfn_valid_within(page_to_pfn(page))) { | 987 | if (!pfn_valid_within(page_to_pfn(page))) { |
961 | page++; | 988 | page++; |
@@ -1211,6 +1238,15 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) | |||
1211 | } | 1238 | } |
1212 | local_irq_restore(flags); | 1239 | local_irq_restore(flags); |
1213 | } | 1240 | } |
1241 | static bool gfp_thisnode_allocation(gfp_t gfp_mask) | ||
1242 | { | ||
1243 | return (gfp_mask & GFP_THISNODE) == GFP_THISNODE; | ||
1244 | } | ||
1245 | #else | ||
1246 | static bool gfp_thisnode_allocation(gfp_t gfp_mask) | ||
1247 | { | ||
1248 | return false; | ||
1249 | } | ||
1214 | #endif | 1250 | #endif |
1215 | 1251 | ||
1216 | /* | 1252 | /* |
@@ -1404,8 +1440,8 @@ void split_page(struct page *page, unsigned int order) | |||
1404 | { | 1440 | { |
1405 | int i; | 1441 | int i; |
1406 | 1442 | ||
1407 | VM_BUG_ON(PageCompound(page)); | 1443 | VM_BUG_ON_PAGE(PageCompound(page), page); |
1408 | VM_BUG_ON(!page_count(page)); | 1444 | VM_BUG_ON_PAGE(!page_count(page), page); |
1409 | 1445 | ||
1410 | #ifdef CONFIG_KMEMCHECK | 1446 | #ifdef CONFIG_KMEMCHECK |
1411 | /* | 1447 | /* |
@@ -1547,12 +1583,18 @@ again: | |||
1547 | get_pageblock_migratetype(page)); | 1583 | get_pageblock_migratetype(page)); |
1548 | } | 1584 | } |
1549 | 1585 | ||
1550 | __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); | 1586 | /* |
1587 | * NOTE: GFP_THISNODE allocations do not partake in the kswapd | ||
1588 | * aging protocol, so they can't be fair. | ||
1589 | */ | ||
1590 | if (!gfp_thisnode_allocation(gfp_flags)) | ||
1591 | __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); | ||
1592 | |||
1551 | __count_zone_vm_events(PGALLOC, zone, 1 << order); | 1593 | __count_zone_vm_events(PGALLOC, zone, 1 << order); |
1552 | zone_statistics(preferred_zone, zone, gfp_flags); | 1594 | zone_statistics(preferred_zone, zone, gfp_flags); |
1553 | local_irq_restore(flags); | 1595 | local_irq_restore(flags); |
1554 | 1596 | ||
1555 | VM_BUG_ON(bad_range(zone, page)); | 1597 | VM_BUG_ON_PAGE(bad_range(zone, page), page); |
1556 | if (prep_new_page(page, order, gfp_flags)) | 1598 | if (prep_new_page(page, order, gfp_flags)) |
1557 | goto again; | 1599 | goto again; |
1558 | return page; | 1600 | return page; |
@@ -1919,8 +1961,12 @@ zonelist_scan: | |||
1919 | * ultimately fall back to remote zones that do not | 1961 | * ultimately fall back to remote zones that do not |
1920 | * partake in the fairness round-robin cycle of this | 1962 | * partake in the fairness round-robin cycle of this |
1921 | * zonelist. | 1963 | * zonelist. |
1964 | * | ||
1965 | * NOTE: GFP_THISNODE allocations do not partake in | ||
1966 | * the kswapd aging protocol, so they can't be fair. | ||
1922 | */ | 1967 | */ |
1923 | if (alloc_flags & ALLOC_WMARK_LOW) { | 1968 | if ((alloc_flags & ALLOC_WMARK_LOW) && |
1969 | !gfp_thisnode_allocation(gfp_mask)) { | ||
1924 | if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0) | 1970 | if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0) |
1925 | continue; | 1971 | continue; |
1926 | if (!zone_local(preferred_zone, zone)) | 1972 | if (!zone_local(preferred_zone, zone)) |
@@ -2072,13 +2118,6 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...) | |||
2072 | return; | 2118 | return; |
2073 | 2119 | ||
2074 | /* | 2120 | /* |
2075 | * Walking all memory to count page types is very expensive and should | ||
2076 | * be inhibited in non-blockable contexts. | ||
2077 | */ | ||
2078 | if (!(gfp_mask & __GFP_WAIT)) | ||
2079 | filter |= SHOW_MEM_FILTER_PAGE_COUNT; | ||
2080 | |||
2081 | /* | ||
2082 | * This documents exceptions given to allocations in certain | 2121 | * This documents exceptions given to allocations in certain |
2083 | * contexts that are allowed to allocate outside current's set | 2122 | * contexts that are allowed to allocate outside current's set |
2084 | * of allowed nodes. | 2123 | * of allowed nodes. |
@@ -2242,10 +2281,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
2242 | preferred_zone, migratetype); | 2281 | preferred_zone, migratetype); |
2243 | if (page) { | 2282 | if (page) { |
2244 | preferred_zone->compact_blockskip_flush = false; | 2283 | preferred_zone->compact_blockskip_flush = false; |
2245 | preferred_zone->compact_considered = 0; | 2284 | compaction_defer_reset(preferred_zone, order, true); |
2246 | preferred_zone->compact_defer_shift = 0; | ||
2247 | if (order >= preferred_zone->compact_order_failed) | ||
2248 | preferred_zone->compact_order_failed = order + 1; | ||
2249 | count_vm_event(COMPACTSUCCESS); | 2285 | count_vm_event(COMPACTSUCCESS); |
2250 | return page; | 2286 | return page; |
2251 | } | 2287 | } |
@@ -2486,8 +2522,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
2486 | * allowed per node queues are empty and that nodes are | 2522 | * allowed per node queues are empty and that nodes are |
2487 | * over allocated. | 2523 | * over allocated. |
2488 | */ | 2524 | */ |
2489 | if (IS_ENABLED(CONFIG_NUMA) && | 2525 | if (gfp_thisnode_allocation(gfp_mask)) |
2490 | (gfp_mask & GFP_THISNODE) == GFP_THISNODE) | ||
2491 | goto nopage; | 2526 | goto nopage; |
2492 | 2527 | ||
2493 | restart: | 2528 | restart: |
@@ -2535,8 +2570,15 @@ rebalance: | |||
2535 | } | 2570 | } |
2536 | 2571 | ||
2537 | /* Atomic allocations - we can't balance anything */ | 2572 | /* Atomic allocations - we can't balance anything */ |
2538 | if (!wait) | 2573 | if (!wait) { |
2574 | /* | ||
2575 | * All existing users of the deprecated __GFP_NOFAIL are | ||
2576 | * blockable, so warn of any new users that actually allow this | ||
2577 | * type of allocation to fail. | ||
2578 | */ | ||
2579 | WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL); | ||
2539 | goto nopage; | 2580 | goto nopage; |
2581 | } | ||
2540 | 2582 | ||
2541 | /* Avoid recursion of direct reclaim */ | 2583 | /* Avoid recursion of direct reclaim */ |
2542 | if (current->flags & PF_MEMALLOC) | 2584 | if (current->flags & PF_MEMALLOC) |
@@ -3901,6 +3943,7 @@ static void setup_zone_migrate_reserve(struct zone *zone) | |||
3901 | struct page *page; | 3943 | struct page *page; |
3902 | unsigned long block_migratetype; | 3944 | unsigned long block_migratetype; |
3903 | int reserve; | 3945 | int reserve; |
3946 | int old_reserve; | ||
3904 | 3947 | ||
3905 | /* | 3948 | /* |
3906 | * Get the start pfn, end pfn and the number of blocks to reserve | 3949 | * Get the start pfn, end pfn and the number of blocks to reserve |
@@ -3922,6 +3965,12 @@ static void setup_zone_migrate_reserve(struct zone *zone) | |||
3922 | * future allocation of hugepages at runtime. | 3965 | * future allocation of hugepages at runtime. |
3923 | */ | 3966 | */ |
3924 | reserve = min(2, reserve); | 3967 | reserve = min(2, reserve); |
3968 | old_reserve = zone->nr_migrate_reserve_block; | ||
3969 | |||
3970 | /* When memory hot-add, we almost always need to do nothing */ | ||
3971 | if (reserve == old_reserve) | ||
3972 | return; | ||
3973 | zone->nr_migrate_reserve_block = reserve; | ||
3925 | 3974 | ||
3926 | for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { | 3975 | for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { |
3927 | if (!pfn_valid(pfn)) | 3976 | if (!pfn_valid(pfn)) |
@@ -3959,6 +4008,12 @@ static void setup_zone_migrate_reserve(struct zone *zone) | |||
3959 | reserve--; | 4008 | reserve--; |
3960 | continue; | 4009 | continue; |
3961 | } | 4010 | } |
4011 | } else if (!old_reserve) { | ||
4012 | /* | ||
4013 | * At boot time we don't need to scan the whole zone | ||
4014 | * for turning off MIGRATE_RESERVE. | ||
4015 | */ | ||
4016 | break; | ||
3962 | } | 4017 | } |
3963 | 4018 | ||
3964 | /* | 4019 | /* |
@@ -4209,7 +4264,6 @@ static noinline __init_refok | |||
4209 | int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) | 4264 | int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) |
4210 | { | 4265 | { |
4211 | int i; | 4266 | int i; |
4212 | struct pglist_data *pgdat = zone->zone_pgdat; | ||
4213 | size_t alloc_size; | 4267 | size_t alloc_size; |
4214 | 4268 | ||
4215 | /* | 4269 | /* |
@@ -4225,7 +4279,8 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) | |||
4225 | 4279 | ||
4226 | if (!slab_is_available()) { | 4280 | if (!slab_is_available()) { |
4227 | zone->wait_table = (wait_queue_head_t *) | 4281 | zone->wait_table = (wait_queue_head_t *) |
4228 | alloc_bootmem_node_nopanic(pgdat, alloc_size); | 4282 | memblock_virt_alloc_node_nopanic( |
4283 | alloc_size, zone->zone_pgdat->node_id); | ||
4229 | } else { | 4284 | } else { |
4230 | /* | 4285 | /* |
4231 | * This case means that a zone whose size was 0 gets new memory | 4286 | * This case means that a zone whose size was 0 gets new memory |
@@ -4345,13 +4400,14 @@ bool __meminit early_pfn_in_nid(unsigned long pfn, int node) | |||
4345 | #endif | 4400 | #endif |
4346 | 4401 | ||
4347 | /** | 4402 | /** |
4348 | * free_bootmem_with_active_regions - Call free_bootmem_node for each active range | 4403 | * free_bootmem_with_active_regions - Call memblock_free_early_nid for each active range |
4349 | * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed. | 4404 | * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed. |
4350 | * @max_low_pfn: The highest PFN that will be passed to free_bootmem_node | 4405 | * @max_low_pfn: The highest PFN that will be passed to memblock_free_early_nid |
4351 | * | 4406 | * |
4352 | * If an architecture guarantees that all ranges registered with | 4407 | * If an architecture guarantees that all ranges registered with |
4353 | * add_active_ranges() contain no holes and may be freed, this | 4408 | * add_active_ranges() contain no holes and may be freed, this |
4354 | * this function may be used instead of calling free_bootmem() manually. | 4409 | * this function may be used instead of calling memblock_free_early_nid() |
4410 | * manually. | ||
4355 | */ | 4411 | */ |
4356 | void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn) | 4412 | void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn) |
4357 | { | 4413 | { |
@@ -4363,9 +4419,9 @@ void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn) | |||
4363 | end_pfn = min(end_pfn, max_low_pfn); | 4419 | end_pfn = min(end_pfn, max_low_pfn); |
4364 | 4420 | ||
4365 | if (start_pfn < end_pfn) | 4421 | if (start_pfn < end_pfn) |
4366 | free_bootmem_node(NODE_DATA(this_nid), | 4422 | memblock_free_early_nid(PFN_PHYS(start_pfn), |
4367 | PFN_PHYS(start_pfn), | 4423 | (end_pfn - start_pfn) << PAGE_SHIFT, |
4368 | (end_pfn - start_pfn) << PAGE_SHIFT); | 4424 | this_nid); |
4369 | } | 4425 | } |
4370 | } | 4426 | } |
4371 | 4427 | ||
@@ -4636,8 +4692,9 @@ static void __init setup_usemap(struct pglist_data *pgdat, | |||
4636 | unsigned long usemapsize = usemap_size(zone_start_pfn, zonesize); | 4692 | unsigned long usemapsize = usemap_size(zone_start_pfn, zonesize); |
4637 | zone->pageblock_flags = NULL; | 4693 | zone->pageblock_flags = NULL; |
4638 | if (usemapsize) | 4694 | if (usemapsize) |
4639 | zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat, | 4695 | zone->pageblock_flags = |
4640 | usemapsize); | 4696 | memblock_virt_alloc_node_nopanic(usemapsize, |
4697 | pgdat->node_id); | ||
4641 | } | 4698 | } |
4642 | #else | 4699 | #else |
4643 | static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone, | 4700 | static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone, |
@@ -4831,7 +4888,8 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat) | |||
4831 | size = (end - start) * sizeof(struct page); | 4888 | size = (end - start) * sizeof(struct page); |
4832 | map = alloc_remap(pgdat->node_id, size); | 4889 | map = alloc_remap(pgdat->node_id, size); |
4833 | if (!map) | 4890 | if (!map) |
4834 | map = alloc_bootmem_node_nopanic(pgdat, size); | 4891 | map = memblock_virt_alloc_node_nopanic(size, |
4892 | pgdat->node_id); | ||
4835 | pgdat->node_mem_map = map + (pgdat->node_start_pfn - start); | 4893 | pgdat->node_mem_map = map + (pgdat->node_start_pfn - start); |
4836 | } | 4894 | } |
4837 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 4895 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
@@ -5012,9 +5070,33 @@ static void __init find_zone_movable_pfns_for_nodes(void) | |||
5012 | nodemask_t saved_node_state = node_states[N_MEMORY]; | 5070 | nodemask_t saved_node_state = node_states[N_MEMORY]; |
5013 | unsigned long totalpages = early_calculate_totalpages(); | 5071 | unsigned long totalpages = early_calculate_totalpages(); |
5014 | int usable_nodes = nodes_weight(node_states[N_MEMORY]); | 5072 | int usable_nodes = nodes_weight(node_states[N_MEMORY]); |
5073 | struct memblock_type *type = &memblock.memory; | ||
5074 | |||
5075 | /* Need to find movable_zone earlier when movable_node is specified. */ | ||
5076 | find_usable_zone_for_movable(); | ||
5015 | 5077 | ||
5016 | /* | 5078 | /* |
5017 | * If movablecore was specified, calculate what size of | 5079 | * If movable_node is specified, ignore kernelcore and movablecore |
5080 | * options. | ||
5081 | */ | ||
5082 | if (movable_node_is_enabled()) { | ||
5083 | for (i = 0; i < type->cnt; i++) { | ||
5084 | if (!memblock_is_hotpluggable(&type->regions[i])) | ||
5085 | continue; | ||
5086 | |||
5087 | nid = type->regions[i].nid; | ||
5088 | |||
5089 | usable_startpfn = PFN_DOWN(type->regions[i].base); | ||
5090 | zone_movable_pfn[nid] = zone_movable_pfn[nid] ? | ||
5091 | min(usable_startpfn, zone_movable_pfn[nid]) : | ||
5092 | usable_startpfn; | ||
5093 | } | ||
5094 | |||
5095 | goto out2; | ||
5096 | } | ||
5097 | |||
5098 | /* | ||
5099 | * If movablecore=nn[KMG] was specified, calculate what size of | ||
5018 | * kernelcore that corresponds so that memory usable for | 5100 | * kernelcore that corresponds so that memory usable for |
5019 | * any allocation type is evenly spread. If both kernelcore | 5101 | * any allocation type is evenly spread. If both kernelcore |
5020 | * and movablecore are specified, then the value of kernelcore | 5102 | * and movablecore are specified, then the value of kernelcore |
@@ -5040,7 +5122,6 @@ static void __init find_zone_movable_pfns_for_nodes(void) | |||
5040 | goto out; | 5122 | goto out; |
5041 | 5123 | ||
5042 | /* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */ | 5124 | /* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */ |
5043 | find_usable_zone_for_movable(); | ||
5044 | usable_startpfn = arch_zone_lowest_possible_pfn[movable_zone]; | 5125 | usable_startpfn = arch_zone_lowest_possible_pfn[movable_zone]; |
5045 | 5126 | ||
5046 | restart: | 5127 | restart: |
@@ -5131,6 +5212,7 @@ restart: | |||
5131 | if (usable_nodes && required_kernelcore > usable_nodes) | 5212 | if (usable_nodes && required_kernelcore > usable_nodes) |
5132 | goto restart; | 5213 | goto restart; |
5133 | 5214 | ||
5215 | out2: | ||
5134 | /* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */ | 5216 | /* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */ |
5135 | for (nid = 0; nid < MAX_NUMNODES; nid++) | 5217 | for (nid = 0; nid < MAX_NUMNODES; nid++) |
5136 | zone_movable_pfn[nid] = | 5218 | zone_movable_pfn[nid] = |
@@ -5692,7 +5774,12 @@ module_init(init_per_zone_wmark_min) | |||
5692 | int min_free_kbytes_sysctl_handler(ctl_table *table, int write, | 5774 | int min_free_kbytes_sysctl_handler(ctl_table *table, int write, |
5693 | void __user *buffer, size_t *length, loff_t *ppos) | 5775 | void __user *buffer, size_t *length, loff_t *ppos) |
5694 | { | 5776 | { |
5695 | proc_dointvec(table, write, buffer, length, ppos); | 5777 | int rc; |
5778 | |||
5779 | rc = proc_dointvec_minmax(table, write, buffer, length, ppos); | ||
5780 | if (rc) | ||
5781 | return rc; | ||
5782 | |||
5696 | if (write) { | 5783 | if (write) { |
5697 | user_min_free_kbytes = min_free_kbytes; | 5784 | user_min_free_kbytes = min_free_kbytes; |
5698 | setup_per_zone_wmarks(); | 5785 | setup_per_zone_wmarks(); |
@@ -5857,7 +5944,7 @@ void *__init alloc_large_system_hash(const char *tablename, | |||
5857 | do { | 5944 | do { |
5858 | size = bucketsize << log2qty; | 5945 | size = bucketsize << log2qty; |
5859 | if (flags & HASH_EARLY) | 5946 | if (flags & HASH_EARLY) |
5860 | table = alloc_bootmem_nopanic(size); | 5947 | table = memblock_virt_alloc_nopanic(size, 0); |
5861 | else if (hashdist) | 5948 | else if (hashdist) |
5862 | table = __vmalloc(size, GFP_ATOMIC, PAGE_KERNEL); | 5949 | table = __vmalloc(size, GFP_ATOMIC, PAGE_KERNEL); |
5863 | else { | 5950 | else { |
@@ -5959,7 +6046,7 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags, | |||
5959 | pfn = page_to_pfn(page); | 6046 | pfn = page_to_pfn(page); |
5960 | bitmap = get_pageblock_bitmap(zone, pfn); | 6047 | bitmap = get_pageblock_bitmap(zone, pfn); |
5961 | bitidx = pfn_to_bitidx(zone, pfn); | 6048 | bitidx = pfn_to_bitidx(zone, pfn); |
5962 | VM_BUG_ON(!zone_spans_pfn(zone, pfn)); | 6049 | VM_BUG_ON_PAGE(!zone_spans_pfn(zone, pfn), page); |
5963 | 6050 | ||
5964 | for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1) | 6051 | for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1) |
5965 | if (flags & value) | 6052 | if (flags & value) |
@@ -6457,12 +6544,24 @@ static void dump_page_flags(unsigned long flags) | |||
6457 | printk(")\n"); | 6544 | printk(")\n"); |
6458 | } | 6545 | } |
6459 | 6546 | ||
6460 | void dump_page(struct page *page) | 6547 | void dump_page_badflags(struct page *page, char *reason, unsigned long badflags) |
6461 | { | 6548 | { |
6462 | printk(KERN_ALERT | 6549 | printk(KERN_ALERT |
6463 | "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n", | 6550 | "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n", |
6464 | page, atomic_read(&page->_count), page_mapcount(page), | 6551 | page, atomic_read(&page->_count), page_mapcount(page), |
6465 | page->mapping, page->index); | 6552 | page->mapping, page->index); |
6466 | dump_page_flags(page->flags); | 6553 | dump_page_flags(page->flags); |
6554 | if (reason) | ||
6555 | pr_alert("page dumped because: %s\n", reason); | ||
6556 | if (page->flags & badflags) { | ||
6557 | pr_alert("bad because of flags:\n"); | ||
6558 | dump_page_flags(page->flags & badflags); | ||
6559 | } | ||
6467 | mem_cgroup_print_bad_page(page); | 6560 | mem_cgroup_print_bad_page(page); |
6468 | } | 6561 | } |
6562 | |||
6563 | void dump_page(struct page *page, char *reason) | ||
6564 | { | ||
6565 | dump_page_badflags(page, reason, 0); | ||
6566 | } | ||
6567 | EXPORT_SYMBOL_GPL(dump_page); | ||