diff options
author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2015-02-10 14:35:36 -0500 |
---|---|---|
committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2015-02-10 14:35:36 -0500 |
commit | 4ba24fef3eb3b142197135223b90ced2f319cd53 (patch) | |
tree | a20c125b27740ec7b4c761b11d801108e1b316b2 /mm/page_alloc.c | |
parent | 47c1ffb2b6b630894e9a16442611c056ab21c057 (diff) | |
parent | 98a4a59ee31a12105a2b84f5b8b515ac2cb208ef (diff) |
Merge branch 'next' into for-linus
Prepare first round of input updates for 3.20.
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 676 |
1 files changed, 333 insertions, 343 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index eee961958021..7633c503a116 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -48,19 +48,18 @@ | |||
48 | #include <linux/backing-dev.h> | 48 | #include <linux/backing-dev.h> |
49 | #include <linux/fault-inject.h> | 49 | #include <linux/fault-inject.h> |
50 | #include <linux/page-isolation.h> | 50 | #include <linux/page-isolation.h> |
51 | #include <linux/page_cgroup.h> | 51 | #include <linux/page_ext.h> |
52 | #include <linux/debugobjects.h> | 52 | #include <linux/debugobjects.h> |
53 | #include <linux/kmemleak.h> | 53 | #include <linux/kmemleak.h> |
54 | #include <linux/compaction.h> | 54 | #include <linux/compaction.h> |
55 | #include <trace/events/kmem.h> | 55 | #include <trace/events/kmem.h> |
56 | #include <linux/ftrace_event.h> | ||
57 | #include <linux/memcontrol.h> | ||
58 | #include <linux/prefetch.h> | 56 | #include <linux/prefetch.h> |
59 | #include <linux/mm_inline.h> | 57 | #include <linux/mm_inline.h> |
60 | #include <linux/migrate.h> | 58 | #include <linux/migrate.h> |
61 | #include <linux/page-debug-flags.h> | 59 | #include <linux/page_ext.h> |
62 | #include <linux/hugetlb.h> | 60 | #include <linux/hugetlb.h> |
63 | #include <linux/sched/rt.h> | 61 | #include <linux/sched/rt.h> |
62 | #include <linux/page_owner.h> | ||
64 | 63 | ||
65 | #include <asm/sections.h> | 64 | #include <asm/sections.h> |
66 | #include <asm/tlbflush.h> | 65 | #include <asm/tlbflush.h> |
@@ -85,6 +84,7 @@ EXPORT_PER_CPU_SYMBOL(numa_node); | |||
85 | */ | 84 | */ |
86 | DEFINE_PER_CPU(int, _numa_mem_); /* Kernel "local memory" node */ | 85 | DEFINE_PER_CPU(int, _numa_mem_); /* Kernel "local memory" node */ |
87 | EXPORT_PER_CPU_SYMBOL(_numa_mem_); | 86 | EXPORT_PER_CPU_SYMBOL(_numa_mem_); |
87 | int _node_numa_mem_[MAX_NUMNODES]; | ||
88 | #endif | 88 | #endif |
89 | 89 | ||
90 | /* | 90 | /* |
@@ -111,6 +111,7 @@ static DEFINE_SPINLOCK(managed_page_count_lock); | |||
111 | 111 | ||
112 | unsigned long totalram_pages __read_mostly; | 112 | unsigned long totalram_pages __read_mostly; |
113 | unsigned long totalreserve_pages __read_mostly; | 113 | unsigned long totalreserve_pages __read_mostly; |
114 | unsigned long totalcma_pages __read_mostly; | ||
114 | /* | 115 | /* |
115 | * When calculating the number of globally allowed dirty pages, there | 116 | * When calculating the number of globally allowed dirty pages, there |
116 | * is a certain number of per-zone reserves that should not be | 117 | * is a certain number of per-zone reserves that should not be |
@@ -426,6 +427,42 @@ static inline void prep_zero_page(struct page *page, unsigned int order, | |||
426 | 427 | ||
427 | #ifdef CONFIG_DEBUG_PAGEALLOC | 428 | #ifdef CONFIG_DEBUG_PAGEALLOC |
428 | unsigned int _debug_guardpage_minorder; | 429 | unsigned int _debug_guardpage_minorder; |
430 | bool _debug_pagealloc_enabled __read_mostly; | ||
431 | bool _debug_guardpage_enabled __read_mostly; | ||
432 | |||
433 | static int __init early_debug_pagealloc(char *buf) | ||
434 | { | ||
435 | if (!buf) | ||
436 | return -EINVAL; | ||
437 | |||
438 | if (strcmp(buf, "on") == 0) | ||
439 | _debug_pagealloc_enabled = true; | ||
440 | |||
441 | return 0; | ||
442 | } | ||
443 | early_param("debug_pagealloc", early_debug_pagealloc); | ||
444 | |||
445 | static bool need_debug_guardpage(void) | ||
446 | { | ||
447 | /* If we don't use debug_pagealloc, we don't need guard page */ | ||
448 | if (!debug_pagealloc_enabled()) | ||
449 | return false; | ||
450 | |||
451 | return true; | ||
452 | } | ||
453 | |||
454 | static void init_debug_guardpage(void) | ||
455 | { | ||
456 | if (!debug_pagealloc_enabled()) | ||
457 | return; | ||
458 | |||
459 | _debug_guardpage_enabled = true; | ||
460 | } | ||
461 | |||
462 | struct page_ext_operations debug_guardpage_ops = { | ||
463 | .need = need_debug_guardpage, | ||
464 | .init = init_debug_guardpage, | ||
465 | }; | ||
429 | 466 | ||
430 | static int __init debug_guardpage_minorder_setup(char *buf) | 467 | static int __init debug_guardpage_minorder_setup(char *buf) |
431 | { | 468 | { |
@@ -441,18 +478,44 @@ static int __init debug_guardpage_minorder_setup(char *buf) | |||
441 | } | 478 | } |
442 | __setup("debug_guardpage_minorder=", debug_guardpage_minorder_setup); | 479 | __setup("debug_guardpage_minorder=", debug_guardpage_minorder_setup); |
443 | 480 | ||
444 | static inline void set_page_guard_flag(struct page *page) | 481 | static inline void set_page_guard(struct zone *zone, struct page *page, |
482 | unsigned int order, int migratetype) | ||
445 | { | 483 | { |
446 | __set_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags); | 484 | struct page_ext *page_ext; |
485 | |||
486 | if (!debug_guardpage_enabled()) | ||
487 | return; | ||
488 | |||
489 | page_ext = lookup_page_ext(page); | ||
490 | __set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags); | ||
491 | |||
492 | INIT_LIST_HEAD(&page->lru); | ||
493 | set_page_private(page, order); | ||
494 | /* Guard pages are not available for any usage */ | ||
495 | __mod_zone_freepage_state(zone, -(1 << order), migratetype); | ||
447 | } | 496 | } |
448 | 497 | ||
449 | static inline void clear_page_guard_flag(struct page *page) | 498 | static inline void clear_page_guard(struct zone *zone, struct page *page, |
499 | unsigned int order, int migratetype) | ||
450 | { | 500 | { |
451 | __clear_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags); | 501 | struct page_ext *page_ext; |
502 | |||
503 | if (!debug_guardpage_enabled()) | ||
504 | return; | ||
505 | |||
506 | page_ext = lookup_page_ext(page); | ||
507 | __clear_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags); | ||
508 | |||
509 | set_page_private(page, 0); | ||
510 | if (!is_migrate_isolate(migratetype)) | ||
511 | __mod_zone_freepage_state(zone, (1 << order), migratetype); | ||
452 | } | 512 | } |
453 | #else | 513 | #else |
454 | static inline void set_page_guard_flag(struct page *page) { } | 514 | struct page_ext_operations debug_guardpage_ops = { NULL, }; |
455 | static inline void clear_page_guard_flag(struct page *page) { } | 515 | static inline void set_page_guard(struct zone *zone, struct page *page, |
516 | unsigned int order, int migratetype) {} | ||
517 | static inline void clear_page_guard(struct zone *zone, struct page *page, | ||
518 | unsigned int order, int migratetype) {} | ||
456 | #endif | 519 | #endif |
457 | 520 | ||
458 | static inline void set_page_order(struct page *page, unsigned int order) | 521 | static inline void set_page_order(struct page *page, unsigned int order) |
@@ -468,29 +531,6 @@ static inline void rmv_page_order(struct page *page) | |||
468 | } | 531 | } |
469 | 532 | ||
470 | /* | 533 | /* |
471 | * Locate the struct page for both the matching buddy in our | ||
472 | * pair (buddy1) and the combined O(n+1) page they form (page). | ||
473 | * | ||
474 | * 1) Any buddy B1 will have an order O twin B2 which satisfies | ||
475 | * the following equation: | ||
476 | * B2 = B1 ^ (1 << O) | ||
477 | * For example, if the starting buddy (buddy2) is #8 its order | ||
478 | * 1 buddy is #10: | ||
479 | * B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10 | ||
480 | * | ||
481 | * 2) Any buddy B will have an order O+1 parent P which | ||
482 | * satisfies the following equation: | ||
483 | * P = B & ~(1 << O) | ||
484 | * | ||
485 | * Assumption: *_mem_map is contiguous at least up to MAX_ORDER | ||
486 | */ | ||
487 | static inline unsigned long | ||
488 | __find_buddy_index(unsigned long page_idx, unsigned int order) | ||
489 | { | ||
490 | return page_idx ^ (1 << order); | ||
491 | } | ||
492 | |||
493 | /* | ||
494 | * This function checks whether a page is free && is the buddy | 534 | * This function checks whether a page is free && is the buddy |
495 | * we can do coalesce a page and its buddy if | 535 | * we can do coalesce a page and its buddy if |
496 | * (a) the buddy is not in a hole && | 536 | * (a) the buddy is not in a hole && |
@@ -570,6 +610,7 @@ static inline void __free_one_page(struct page *page, | |||
570 | unsigned long combined_idx; | 610 | unsigned long combined_idx; |
571 | unsigned long uninitialized_var(buddy_idx); | 611 | unsigned long uninitialized_var(buddy_idx); |
572 | struct page *buddy; | 612 | struct page *buddy; |
613 | int max_order = MAX_ORDER; | ||
573 | 614 | ||
574 | VM_BUG_ON(!zone_is_initialized(zone)); | 615 | VM_BUG_ON(!zone_is_initialized(zone)); |
575 | 616 | ||
@@ -578,13 +619,24 @@ static inline void __free_one_page(struct page *page, | |||
578 | return; | 619 | return; |
579 | 620 | ||
580 | VM_BUG_ON(migratetype == -1); | 621 | VM_BUG_ON(migratetype == -1); |
622 | if (is_migrate_isolate(migratetype)) { | ||
623 | /* | ||
624 | * We restrict max order of merging to prevent merge | ||
625 | * between freepages on isolate pageblock and normal | ||
626 | * pageblock. Without this, pageblock isolation | ||
627 | * could cause incorrect freepage accounting. | ||
628 | */ | ||
629 | max_order = min(MAX_ORDER, pageblock_order + 1); | ||
630 | } else { | ||
631 | __mod_zone_freepage_state(zone, 1 << order, migratetype); | ||
632 | } | ||
581 | 633 | ||
582 | page_idx = pfn & ((1 << MAX_ORDER) - 1); | 634 | page_idx = pfn & ((1 << max_order) - 1); |
583 | 635 | ||
584 | VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page); | 636 | VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page); |
585 | VM_BUG_ON_PAGE(bad_range(zone, page), page); | 637 | VM_BUG_ON_PAGE(bad_range(zone, page), page); |
586 | 638 | ||
587 | while (order < MAX_ORDER-1) { | 639 | while (order < max_order - 1) { |
588 | buddy_idx = __find_buddy_index(page_idx, order); | 640 | buddy_idx = __find_buddy_index(page_idx, order); |
589 | buddy = page + (buddy_idx - page_idx); | 641 | buddy = page + (buddy_idx - page_idx); |
590 | if (!page_is_buddy(page, buddy, order)) | 642 | if (!page_is_buddy(page, buddy, order)) |
@@ -594,10 +646,7 @@ static inline void __free_one_page(struct page *page, | |||
594 | * merge with it and move up one order. | 646 | * merge with it and move up one order. |
595 | */ | 647 | */ |
596 | if (page_is_guard(buddy)) { | 648 | if (page_is_guard(buddy)) { |
597 | clear_page_guard_flag(buddy); | 649 | clear_page_guard(zone, buddy, order, migratetype); |
598 | set_page_private(page, 0); | ||
599 | __mod_zone_freepage_state(zone, 1 << order, | ||
600 | migratetype); | ||
601 | } else { | 650 | } else { |
602 | list_del(&buddy->lru); | 651 | list_del(&buddy->lru); |
603 | zone->free_area[order].nr_free--; | 652 | zone->free_area[order].nr_free--; |
@@ -651,8 +700,10 @@ static inline int free_pages_check(struct page *page) | |||
651 | bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set"; | 700 | bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set"; |
652 | bad_flags = PAGE_FLAGS_CHECK_AT_FREE; | 701 | bad_flags = PAGE_FLAGS_CHECK_AT_FREE; |
653 | } | 702 | } |
654 | if (unlikely(mem_cgroup_bad_page_check(page))) | 703 | #ifdef CONFIG_MEMCG |
655 | bad_reason = "cgroup check failed"; | 704 | if (unlikely(page->mem_cgroup)) |
705 | bad_reason = "page still charged to cgroup"; | ||
706 | #endif | ||
656 | if (unlikely(bad_reason)) { | 707 | if (unlikely(bad_reason)) { |
657 | bad_page(page, bad_reason, bad_flags); | 708 | bad_page(page, bad_reason, bad_flags); |
658 | return 1; | 709 | return 1; |
@@ -716,14 +767,12 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |||
716 | /* must delete as __free_one_page list manipulates */ | 767 | /* must delete as __free_one_page list manipulates */ |
717 | list_del(&page->lru); | 768 | list_del(&page->lru); |
718 | mt = get_freepage_migratetype(page); | 769 | mt = get_freepage_migratetype(page); |
770 | if (unlikely(has_isolate_pageblock(zone))) | ||
771 | mt = get_pageblock_migratetype(page); | ||
772 | |||
719 | /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ | 773 | /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ |
720 | __free_one_page(page, page_to_pfn(page), zone, 0, mt); | 774 | __free_one_page(page, page_to_pfn(page), zone, 0, mt); |
721 | trace_mm_page_pcpu_drain(page, 0, mt); | 775 | trace_mm_page_pcpu_drain(page, 0, mt); |
722 | if (likely(!is_migrate_isolate_page(page))) { | ||
723 | __mod_zone_page_state(zone, NR_FREE_PAGES, 1); | ||
724 | if (is_migrate_cma(mt)) | ||
725 | __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1); | ||
726 | } | ||
727 | } while (--to_free && --batch_free && !list_empty(list)); | 776 | } while (--to_free && --batch_free && !list_empty(list)); |
728 | } | 777 | } |
729 | spin_unlock(&zone->lock); | 778 | spin_unlock(&zone->lock); |
@@ -740,9 +789,11 @@ static void free_one_page(struct zone *zone, | |||
740 | if (nr_scanned) | 789 | if (nr_scanned) |
741 | __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned); | 790 | __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned); |
742 | 791 | ||
792 | if (unlikely(has_isolate_pageblock(zone) || | ||
793 | is_migrate_isolate(migratetype))) { | ||
794 | migratetype = get_pfnblock_migratetype(page, pfn); | ||
795 | } | ||
743 | __free_one_page(page, pfn, zone, order, migratetype); | 796 | __free_one_page(page, pfn, zone, order, migratetype); |
744 | if (unlikely(!is_migrate_isolate(migratetype))) | ||
745 | __mod_zone_freepage_state(zone, 1 << order, migratetype); | ||
746 | spin_unlock(&zone->lock); | 797 | spin_unlock(&zone->lock); |
747 | } | 798 | } |
748 | 799 | ||
@@ -751,6 +802,9 @@ static bool free_pages_prepare(struct page *page, unsigned int order) | |||
751 | int i; | 802 | int i; |
752 | int bad = 0; | 803 | int bad = 0; |
753 | 804 | ||
805 | VM_BUG_ON_PAGE(PageTail(page), page); | ||
806 | VM_BUG_ON_PAGE(PageHead(page) && compound_order(page) != order, page); | ||
807 | |||
754 | trace_mm_page_free(page, order); | 808 | trace_mm_page_free(page, order); |
755 | kmemcheck_free_shadow(page, order); | 809 | kmemcheck_free_shadow(page, order); |
756 | 810 | ||
@@ -761,6 +815,8 @@ static bool free_pages_prepare(struct page *page, unsigned int order) | |||
761 | if (bad) | 815 | if (bad) |
762 | return false; | 816 | return false; |
763 | 817 | ||
818 | reset_page_owner(page, order); | ||
819 | |||
764 | if (!PageHighMem(page)) { | 820 | if (!PageHighMem(page)) { |
765 | debug_check_no_locks_freed(page_address(page), | 821 | debug_check_no_locks_freed(page_address(page), |
766 | PAGE_SIZE << order); | 822 | PAGE_SIZE << order); |
@@ -867,23 +923,18 @@ static inline void expand(struct zone *zone, struct page *page, | |||
867 | size >>= 1; | 923 | size >>= 1; |
868 | VM_BUG_ON_PAGE(bad_range(zone, &page[size]), &page[size]); | 924 | VM_BUG_ON_PAGE(bad_range(zone, &page[size]), &page[size]); |
869 | 925 | ||
870 | #ifdef CONFIG_DEBUG_PAGEALLOC | 926 | if (IS_ENABLED(CONFIG_DEBUG_PAGEALLOC) && |
871 | if (high < debug_guardpage_minorder()) { | 927 | debug_guardpage_enabled() && |
928 | high < debug_guardpage_minorder()) { | ||
872 | /* | 929 | /* |
873 | * Mark as guard pages (or page), that will allow to | 930 | * Mark as guard pages (or page), that will allow to |
874 | * merge back to allocator when buddy will be freed. | 931 | * merge back to allocator when buddy will be freed. |
875 | * Corresponding page table entries will not be touched, | 932 | * Corresponding page table entries will not be touched, |
876 | * pages will stay not present in virtual address space | 933 | * pages will stay not present in virtual address space |
877 | */ | 934 | */ |
878 | INIT_LIST_HEAD(&page[size].lru); | 935 | set_page_guard(zone, &page[size], high, migratetype); |
879 | set_page_guard_flag(&page[size]); | ||
880 | set_page_private(&page[size], high); | ||
881 | /* Guard pages are not available for any usage */ | ||
882 | __mod_zone_freepage_state(zone, -(1 << high), | ||
883 | migratetype); | ||
884 | continue; | 936 | continue; |
885 | } | 937 | } |
886 | #endif | ||
887 | list_add(&page[size].lru, &area->free_list[migratetype]); | 938 | list_add(&page[size].lru, &area->free_list[migratetype]); |
888 | area->nr_free++; | 939 | area->nr_free++; |
889 | set_page_order(&page[size], high); | 940 | set_page_order(&page[size], high); |
@@ -908,8 +959,10 @@ static inline int check_new_page(struct page *page) | |||
908 | bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set"; | 959 | bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set"; |
909 | bad_flags = PAGE_FLAGS_CHECK_AT_PREP; | 960 | bad_flags = PAGE_FLAGS_CHECK_AT_PREP; |
910 | } | 961 | } |
911 | if (unlikely(mem_cgroup_bad_page_check(page))) | 962 | #ifdef CONFIG_MEMCG |
912 | bad_reason = "cgroup check failed"; | 963 | if (unlikely(page->mem_cgroup)) |
964 | bad_reason = "page still charged to cgroup"; | ||
965 | #endif | ||
913 | if (unlikely(bad_reason)) { | 966 | if (unlikely(bad_reason)) { |
914 | bad_page(page, bad_reason, bad_flags); | 967 | bad_page(page, bad_reason, bad_flags); |
915 | return 1; | 968 | return 1; |
@@ -939,6 +992,8 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags) | |||
939 | if (order && (gfp_flags & __GFP_COMP)) | 992 | if (order && (gfp_flags & __GFP_COMP)) |
940 | prep_compound_page(page, order); | 993 | prep_compound_page(page, order); |
941 | 994 | ||
995 | set_page_owner(page, order, gfp_flags); | ||
996 | |||
942 | return 0; | 997 | return 0; |
943 | } | 998 | } |
944 | 999 | ||
@@ -1014,7 +1069,7 @@ int move_freepages(struct zone *zone, | |||
1014 | * Remove at a later date when no bug reports exist related to | 1069 | * Remove at a later date when no bug reports exist related to |
1015 | * grouping pages by mobility | 1070 | * grouping pages by mobility |
1016 | */ | 1071 | */ |
1017 | BUG_ON(page_zone(start_page) != page_zone(end_page)); | 1072 | VM_BUG_ON(page_zone(start_page) != page_zone(end_page)); |
1018 | #endif | 1073 | #endif |
1019 | 1074 | ||
1020 | for (page = start_page; page <= end_page;) { | 1075 | for (page = start_page; page <= end_page;) { |
@@ -1277,55 +1332,75 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) | |||
1277 | #endif | 1332 | #endif |
1278 | 1333 | ||
1279 | /* | 1334 | /* |
1280 | * Drain pages of the indicated processor. | 1335 | * Drain pcplists of the indicated processor and zone. |
1281 | * | 1336 | * |
1282 | * The processor must either be the current processor and the | 1337 | * The processor must either be the current processor and the |
1283 | * thread pinned to the current processor or a processor that | 1338 | * thread pinned to the current processor or a processor that |
1284 | * is not online. | 1339 | * is not online. |
1285 | */ | 1340 | */ |
1286 | static void drain_pages(unsigned int cpu) | 1341 | static void drain_pages_zone(unsigned int cpu, struct zone *zone) |
1287 | { | 1342 | { |
1288 | unsigned long flags; | 1343 | unsigned long flags; |
1289 | struct zone *zone; | 1344 | struct per_cpu_pageset *pset; |
1345 | struct per_cpu_pages *pcp; | ||
1290 | 1346 | ||
1291 | for_each_populated_zone(zone) { | 1347 | local_irq_save(flags); |
1292 | struct per_cpu_pageset *pset; | 1348 | pset = per_cpu_ptr(zone->pageset, cpu); |
1293 | struct per_cpu_pages *pcp; | ||
1294 | 1349 | ||
1295 | local_irq_save(flags); | 1350 | pcp = &pset->pcp; |
1296 | pset = per_cpu_ptr(zone->pageset, cpu); | 1351 | if (pcp->count) { |
1352 | free_pcppages_bulk(zone, pcp->count, pcp); | ||
1353 | pcp->count = 0; | ||
1354 | } | ||
1355 | local_irq_restore(flags); | ||
1356 | } | ||
1297 | 1357 | ||
1298 | pcp = &pset->pcp; | 1358 | /* |
1299 | if (pcp->count) { | 1359 | * Drain pcplists of all zones on the indicated processor. |
1300 | free_pcppages_bulk(zone, pcp->count, pcp); | 1360 | * |
1301 | pcp->count = 0; | 1361 | * The processor must either be the current processor and the |
1302 | } | 1362 | * thread pinned to the current processor or a processor that |
1303 | local_irq_restore(flags); | 1363 | * is not online. |
1364 | */ | ||
1365 | static void drain_pages(unsigned int cpu) | ||
1366 | { | ||
1367 | struct zone *zone; | ||
1368 | |||
1369 | for_each_populated_zone(zone) { | ||
1370 | drain_pages_zone(cpu, zone); | ||
1304 | } | 1371 | } |
1305 | } | 1372 | } |
1306 | 1373 | ||
1307 | /* | 1374 | /* |
1308 | * Spill all of this CPU's per-cpu pages back into the buddy allocator. | 1375 | * Spill all of this CPU's per-cpu pages back into the buddy allocator. |
1376 | * | ||
1377 | * The CPU has to be pinned. When zone parameter is non-NULL, spill just | ||
1378 | * the single zone's pages. | ||
1309 | */ | 1379 | */ |
1310 | void drain_local_pages(void *arg) | 1380 | void drain_local_pages(struct zone *zone) |
1311 | { | 1381 | { |
1312 | drain_pages(smp_processor_id()); | 1382 | int cpu = smp_processor_id(); |
1383 | |||
1384 | if (zone) | ||
1385 | drain_pages_zone(cpu, zone); | ||
1386 | else | ||
1387 | drain_pages(cpu); | ||
1313 | } | 1388 | } |
1314 | 1389 | ||
1315 | /* | 1390 | /* |
1316 | * Spill all the per-cpu pages from all CPUs back into the buddy allocator. | 1391 | * Spill all the per-cpu pages from all CPUs back into the buddy allocator. |
1317 | * | 1392 | * |
1393 | * When zone parameter is non-NULL, spill just the single zone's pages. | ||
1394 | * | ||
1318 | * Note that this code is protected against sending an IPI to an offline | 1395 | * Note that this code is protected against sending an IPI to an offline |
1319 | * CPU but does not guarantee sending an IPI to newly hotplugged CPUs: | 1396 | * CPU but does not guarantee sending an IPI to newly hotplugged CPUs: |
1320 | * on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but | 1397 | * on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but |
1321 | * nothing keeps CPUs from showing up after we populated the cpumask and | 1398 | * nothing keeps CPUs from showing up after we populated the cpumask and |
1322 | * before the call to on_each_cpu_mask(). | 1399 | * before the call to on_each_cpu_mask(). |
1323 | */ | 1400 | */ |
1324 | void drain_all_pages(void) | 1401 | void drain_all_pages(struct zone *zone) |
1325 | { | 1402 | { |
1326 | int cpu; | 1403 | int cpu; |
1327 | struct per_cpu_pageset *pcp; | ||
1328 | struct zone *zone; | ||
1329 | 1404 | ||
1330 | /* | 1405 | /* |
1331 | * Allocate in the BSS so we wont require allocation in | 1406 | * Allocate in the BSS so we wont require allocation in |
@@ -1340,20 +1415,31 @@ void drain_all_pages(void) | |||
1340 | * disables preemption as part of its processing | 1415 | * disables preemption as part of its processing |
1341 | */ | 1416 | */ |
1342 | for_each_online_cpu(cpu) { | 1417 | for_each_online_cpu(cpu) { |
1418 | struct per_cpu_pageset *pcp; | ||
1419 | struct zone *z; | ||
1343 | bool has_pcps = false; | 1420 | bool has_pcps = false; |
1344 | for_each_populated_zone(zone) { | 1421 | |
1422 | if (zone) { | ||
1345 | pcp = per_cpu_ptr(zone->pageset, cpu); | 1423 | pcp = per_cpu_ptr(zone->pageset, cpu); |
1346 | if (pcp->pcp.count) { | 1424 | if (pcp->pcp.count) |
1347 | has_pcps = true; | 1425 | has_pcps = true; |
1348 | break; | 1426 | } else { |
1427 | for_each_populated_zone(z) { | ||
1428 | pcp = per_cpu_ptr(z->pageset, cpu); | ||
1429 | if (pcp->pcp.count) { | ||
1430 | has_pcps = true; | ||
1431 | break; | ||
1432 | } | ||
1349 | } | 1433 | } |
1350 | } | 1434 | } |
1435 | |||
1351 | if (has_pcps) | 1436 | if (has_pcps) |
1352 | cpumask_set_cpu(cpu, &cpus_with_pcps); | 1437 | cpumask_set_cpu(cpu, &cpus_with_pcps); |
1353 | else | 1438 | else |
1354 | cpumask_clear_cpu(cpu, &cpus_with_pcps); | 1439 | cpumask_clear_cpu(cpu, &cpus_with_pcps); |
1355 | } | 1440 | } |
1356 | on_each_cpu_mask(&cpus_with_pcps, drain_local_pages, NULL, 1); | 1441 | on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages, |
1442 | zone, 1); | ||
1357 | } | 1443 | } |
1358 | 1444 | ||
1359 | #ifdef CONFIG_HIBERNATION | 1445 | #ifdef CONFIG_HIBERNATION |
@@ -1480,12 +1566,15 @@ void split_page(struct page *page, unsigned int order) | |||
1480 | split_page(virt_to_page(page[0].shadow), order); | 1566 | split_page(virt_to_page(page[0].shadow), order); |
1481 | #endif | 1567 | #endif |
1482 | 1568 | ||
1483 | for (i = 1; i < (1 << order); i++) | 1569 | set_page_owner(page, 0, 0); |
1570 | for (i = 1; i < (1 << order); i++) { | ||
1484 | set_page_refcounted(page + i); | 1571 | set_page_refcounted(page + i); |
1572 | set_page_owner(page + i, 0, 0); | ||
1573 | } | ||
1485 | } | 1574 | } |
1486 | EXPORT_SYMBOL_GPL(split_page); | 1575 | EXPORT_SYMBOL_GPL(split_page); |
1487 | 1576 | ||
1488 | static int __isolate_free_page(struct page *page, unsigned int order) | 1577 | int __isolate_free_page(struct page *page, unsigned int order) |
1489 | { | 1578 | { |
1490 | unsigned long watermark; | 1579 | unsigned long watermark; |
1491 | struct zone *zone; | 1580 | struct zone *zone; |
@@ -1521,6 +1610,7 @@ static int __isolate_free_page(struct page *page, unsigned int order) | |||
1521 | } | 1610 | } |
1522 | } | 1611 | } |
1523 | 1612 | ||
1613 | set_page_owner(page, order, 0); | ||
1524 | return 1UL << order; | 1614 | return 1UL << order; |
1525 | } | 1615 | } |
1526 | 1616 | ||
@@ -1613,8 +1703,8 @@ again: | |||
1613 | 1703 | ||
1614 | __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); | 1704 | __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); |
1615 | if (atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]) <= 0 && | 1705 | if (atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]) <= 0 && |
1616 | !zone_is_fair_depleted(zone)) | 1706 | !test_bit(ZONE_FAIR_DEPLETED, &zone->flags)) |
1617 | zone_set_flag(zone, ZONE_FAIR_DEPLETED); | 1707 | set_bit(ZONE_FAIR_DEPLETED, &zone->flags); |
1618 | 1708 | ||
1619 | __count_zone_vm_events(PGALLOC, zone, 1 << order); | 1709 | __count_zone_vm_events(PGALLOC, zone, 1 << order); |
1620 | zone_statistics(preferred_zone, zone, gfp_flags); | 1710 | zone_statistics(preferred_zone, zone, gfp_flags); |
@@ -1715,7 +1805,7 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order, | |||
1715 | unsigned long mark, int classzone_idx, int alloc_flags, | 1805 | unsigned long mark, int classzone_idx, int alloc_flags, |
1716 | long free_pages) | 1806 | long free_pages) |
1717 | { | 1807 | { |
1718 | /* free_pages my go negative - that's OK */ | 1808 | /* free_pages may go negative - that's OK */ |
1719 | long min = mark; | 1809 | long min = mark; |
1720 | int o; | 1810 | int o; |
1721 | long free_cma = 0; | 1811 | long free_cma = 0; |
@@ -1934,7 +2024,7 @@ static void reset_alloc_batches(struct zone *preferred_zone) | |||
1934 | mod_zone_page_state(zone, NR_ALLOC_BATCH, | 2024 | mod_zone_page_state(zone, NR_ALLOC_BATCH, |
1935 | high_wmark_pages(zone) - low_wmark_pages(zone) - | 2025 | high_wmark_pages(zone) - low_wmark_pages(zone) - |
1936 | atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH])); | 2026 | atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH])); |
1937 | zone_clear_flag(zone, ZONE_FAIR_DEPLETED); | 2027 | clear_bit(ZONE_FAIR_DEPLETED, &zone->flags); |
1938 | } while (zone++ != preferred_zone); | 2028 | } while (zone++ != preferred_zone); |
1939 | } | 2029 | } |
1940 | 2030 | ||
@@ -1963,7 +2053,7 @@ zonelist_scan: | |||
1963 | 2053 | ||
1964 | /* | 2054 | /* |
1965 | * Scan zonelist, looking for a zone with enough free. | 2055 | * Scan zonelist, looking for a zone with enough free. |
1966 | * See also __cpuset_node_allowed_softwall() comment in kernel/cpuset.c. | 2056 | * See also __cpuset_node_allowed() comment in kernel/cpuset.c. |
1967 | */ | 2057 | */ |
1968 | for_each_zone_zonelist_nodemask(zone, z, zonelist, | 2058 | for_each_zone_zonelist_nodemask(zone, z, zonelist, |
1969 | high_zoneidx, nodemask) { | 2059 | high_zoneidx, nodemask) { |
@@ -1974,7 +2064,7 @@ zonelist_scan: | |||
1974 | continue; | 2064 | continue; |
1975 | if (cpusets_enabled() && | 2065 | if (cpusets_enabled() && |
1976 | (alloc_flags & ALLOC_CPUSET) && | 2066 | (alloc_flags & ALLOC_CPUSET) && |
1977 | !cpuset_zone_allowed_softwall(zone, gfp_mask)) | 2067 | !cpuset_zone_allowed(zone, gfp_mask)) |
1978 | continue; | 2068 | continue; |
1979 | /* | 2069 | /* |
1980 | * Distribute pages in proportion to the individual | 2070 | * Distribute pages in proportion to the individual |
@@ -1985,7 +2075,7 @@ zonelist_scan: | |||
1985 | if (alloc_flags & ALLOC_FAIR) { | 2075 | if (alloc_flags & ALLOC_FAIR) { |
1986 | if (!zone_local(preferred_zone, zone)) | 2076 | if (!zone_local(preferred_zone, zone)) |
1987 | break; | 2077 | break; |
1988 | if (zone_is_fair_depleted(zone)) { | 2078 | if (test_bit(ZONE_FAIR_DEPLETED, &zone->flags)) { |
1989 | nr_fair_skipped++; | 2079 | nr_fair_skipped++; |
1990 | continue; | 2080 | continue; |
1991 | } | 2081 | } |
@@ -2253,6 +2343,14 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, | |||
2253 | } | 2343 | } |
2254 | 2344 | ||
2255 | /* | 2345 | /* |
2346 | * PM-freezer should be notified that there might be an OOM killer on | ||
2347 | * its way to kill and wake somebody up. This is too early and we might | ||
2348 | * end up not killing anything but false positives are acceptable. | ||
2349 | * See freeze_processes. | ||
2350 | */ | ||
2351 | note_oom_kill(); | ||
2352 | |||
2353 | /* | ||
2256 | * Go through the zonelist yet one more time, keep very high watermark | 2354 | * Go through the zonelist yet one more time, keep very high watermark |
2257 | * here, this is only to catch a parallel oom killing, we must fail if | 2355 | * here, this is only to catch a parallel oom killing, we must fail if |
2258 | * we're still under heavy pressure. | 2356 | * we're still under heavy pressure. |
@@ -2296,58 +2394,59 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
2296 | struct zonelist *zonelist, enum zone_type high_zoneidx, | 2394 | struct zonelist *zonelist, enum zone_type high_zoneidx, |
2297 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, | 2395 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, |
2298 | int classzone_idx, int migratetype, enum migrate_mode mode, | 2396 | int classzone_idx, int migratetype, enum migrate_mode mode, |
2299 | bool *contended_compaction, bool *deferred_compaction, | 2397 | int *contended_compaction, bool *deferred_compaction) |
2300 | unsigned long *did_some_progress) | ||
2301 | { | 2398 | { |
2302 | if (!order) | 2399 | unsigned long compact_result; |
2303 | return NULL; | 2400 | struct page *page; |
2304 | 2401 | ||
2305 | if (compaction_deferred(preferred_zone, order)) { | 2402 | if (!order) |
2306 | *deferred_compaction = true; | ||
2307 | return NULL; | 2403 | return NULL; |
2308 | } | ||
2309 | 2404 | ||
2310 | current->flags |= PF_MEMALLOC; | 2405 | current->flags |= PF_MEMALLOC; |
2311 | *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, | 2406 | compact_result = try_to_compact_pages(zonelist, order, gfp_mask, |
2312 | nodemask, mode, | 2407 | nodemask, mode, |
2313 | contended_compaction); | 2408 | contended_compaction, |
2409 | alloc_flags, classzone_idx); | ||
2314 | current->flags &= ~PF_MEMALLOC; | 2410 | current->flags &= ~PF_MEMALLOC; |
2315 | 2411 | ||
2316 | if (*did_some_progress != COMPACT_SKIPPED) { | 2412 | switch (compact_result) { |
2317 | struct page *page; | 2413 | case COMPACT_DEFERRED: |
2318 | 2414 | *deferred_compaction = true; | |
2319 | /* Page migration frees to the PCP lists but we want merging */ | 2415 | /* fall-through */ |
2320 | drain_pages(get_cpu()); | 2416 | case COMPACT_SKIPPED: |
2321 | put_cpu(); | 2417 | return NULL; |
2418 | default: | ||
2419 | break; | ||
2420 | } | ||
2322 | 2421 | ||
2323 | page = get_page_from_freelist(gfp_mask, nodemask, | 2422 | /* |
2324 | order, zonelist, high_zoneidx, | 2423 | * At least in one zone compaction wasn't deferred or skipped, so let's |
2325 | alloc_flags & ~ALLOC_NO_WATERMARKS, | 2424 | * count a compaction stall |
2326 | preferred_zone, classzone_idx, migratetype); | 2425 | */ |
2327 | if (page) { | 2426 | count_vm_event(COMPACTSTALL); |
2328 | preferred_zone->compact_blockskip_flush = false; | ||
2329 | compaction_defer_reset(preferred_zone, order, true); | ||
2330 | count_vm_event(COMPACTSUCCESS); | ||
2331 | return page; | ||
2332 | } | ||
2333 | 2427 | ||
2334 | /* | 2428 | page = get_page_from_freelist(gfp_mask, nodemask, |
2335 | * It's bad if compaction run occurs and fails. | 2429 | order, zonelist, high_zoneidx, |
2336 | * The most likely reason is that pages exist, | 2430 | alloc_flags & ~ALLOC_NO_WATERMARKS, |
2337 | * but not enough to satisfy watermarks. | 2431 | preferred_zone, classzone_idx, migratetype); |
2338 | */ | ||
2339 | count_vm_event(COMPACTFAIL); | ||
2340 | 2432 | ||
2341 | /* | 2433 | if (page) { |
2342 | * As async compaction considers a subset of pageblocks, only | 2434 | struct zone *zone = page_zone(page); |
2343 | * defer if the failure was a sync compaction failure. | ||
2344 | */ | ||
2345 | if (mode != MIGRATE_ASYNC) | ||
2346 | defer_compaction(preferred_zone, order); | ||
2347 | 2435 | ||
2348 | cond_resched(); | 2436 | zone->compact_blockskip_flush = false; |
2437 | compaction_defer_reset(zone, order, true); | ||
2438 | count_vm_event(COMPACTSUCCESS); | ||
2439 | return page; | ||
2349 | } | 2440 | } |
2350 | 2441 | ||
2442 | /* | ||
2443 | * It's bad if compaction run occurs and fails. The most likely reason | ||
2444 | * is that pages exist, but not enough to satisfy watermarks. | ||
2445 | */ | ||
2446 | count_vm_event(COMPACTFAIL); | ||
2447 | |||
2448 | cond_resched(); | ||
2449 | |||
2351 | return NULL; | 2450 | return NULL; |
2352 | } | 2451 | } |
2353 | #else | 2452 | #else |
@@ -2355,9 +2454,8 @@ static inline struct page * | |||
2355 | __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | 2454 | __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, |
2356 | struct zonelist *zonelist, enum zone_type high_zoneidx, | 2455 | struct zonelist *zonelist, enum zone_type high_zoneidx, |
2357 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, | 2456 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, |
2358 | int classzone_idx, int migratetype, | 2457 | int classzone_idx, int migratetype, enum migrate_mode mode, |
2359 | enum migrate_mode mode, bool *contended_compaction, | 2458 | int *contended_compaction, bool *deferred_compaction) |
2360 | bool *deferred_compaction, unsigned long *did_some_progress) | ||
2361 | { | 2459 | { |
2362 | return NULL; | 2460 | return NULL; |
2363 | } | 2461 | } |
@@ -2422,7 +2520,7 @@ retry: | |||
2422 | * pages are pinned on the per-cpu lists. Drain them and try again | 2520 | * pages are pinned on the per-cpu lists. Drain them and try again |
2423 | */ | 2521 | */ |
2424 | if (!page && !drained) { | 2522 | if (!page && !drained) { |
2425 | drain_all_pages(); | 2523 | drain_all_pages(NULL); |
2426 | drained = true; | 2524 | drained = true; |
2427 | goto retry; | 2525 | goto retry; |
2428 | } | 2526 | } |
@@ -2457,12 +2555,14 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order, | |||
2457 | static void wake_all_kswapds(unsigned int order, | 2555 | static void wake_all_kswapds(unsigned int order, |
2458 | struct zonelist *zonelist, | 2556 | struct zonelist *zonelist, |
2459 | enum zone_type high_zoneidx, | 2557 | enum zone_type high_zoneidx, |
2460 | struct zone *preferred_zone) | 2558 | struct zone *preferred_zone, |
2559 | nodemask_t *nodemask) | ||
2461 | { | 2560 | { |
2462 | struct zoneref *z; | 2561 | struct zoneref *z; |
2463 | struct zone *zone; | 2562 | struct zone *zone; |
2464 | 2563 | ||
2465 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) | 2564 | for_each_zone_zonelist_nodemask(zone, z, zonelist, |
2565 | high_zoneidx, nodemask) | ||
2466 | wakeup_kswapd(zone, order, zone_idx(preferred_zone)); | 2566 | wakeup_kswapd(zone, order, zone_idx(preferred_zone)); |
2467 | } | 2567 | } |
2468 | 2568 | ||
@@ -2492,7 +2592,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask) | |||
2492 | alloc_flags |= ALLOC_HARDER; | 2592 | alloc_flags |= ALLOC_HARDER; |
2493 | /* | 2593 | /* |
2494 | * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the | 2594 | * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the |
2495 | * comment for __cpuset_node_allowed_softwall(). | 2595 | * comment for __cpuset_node_allowed(). |
2496 | */ | 2596 | */ |
2497 | alloc_flags &= ~ALLOC_CPUSET; | 2597 | alloc_flags &= ~ALLOC_CPUSET; |
2498 | } else if (unlikely(rt_task(current)) && !in_interrupt()) | 2598 | } else if (unlikely(rt_task(current)) && !in_interrupt()) |
@@ -2509,7 +2609,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask) | |||
2509 | alloc_flags |= ALLOC_NO_WATERMARKS; | 2609 | alloc_flags |= ALLOC_NO_WATERMARKS; |
2510 | } | 2610 | } |
2511 | #ifdef CONFIG_CMA | 2611 | #ifdef CONFIG_CMA |
2512 | if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) | 2612 | if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) |
2513 | alloc_flags |= ALLOC_CMA; | 2613 | alloc_flags |= ALLOC_CMA; |
2514 | #endif | 2614 | #endif |
2515 | return alloc_flags; | 2615 | return alloc_flags; |
@@ -2533,7 +2633,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
2533 | unsigned long did_some_progress; | 2633 | unsigned long did_some_progress; |
2534 | enum migrate_mode migration_mode = MIGRATE_ASYNC; | 2634 | enum migrate_mode migration_mode = MIGRATE_ASYNC; |
2535 | bool deferred_compaction = false; | 2635 | bool deferred_compaction = false; |
2536 | bool contended_compaction = false; | 2636 | int contended_compaction = COMPACT_CONTENDED_NONE; |
2537 | 2637 | ||
2538 | /* | 2638 | /* |
2539 | * In the slowpath, we sanity check order to avoid ever trying to | 2639 | * In the slowpath, we sanity check order to avoid ever trying to |
@@ -2560,7 +2660,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
2560 | 2660 | ||
2561 | restart: | 2661 | restart: |
2562 | if (!(gfp_mask & __GFP_NO_KSWAPD)) | 2662 | if (!(gfp_mask & __GFP_NO_KSWAPD)) |
2563 | wake_all_kswapds(order, zonelist, high_zoneidx, preferred_zone); | 2663 | wake_all_kswapds(order, zonelist, high_zoneidx, |
2664 | preferred_zone, nodemask); | ||
2564 | 2665 | ||
2565 | /* | 2666 | /* |
2566 | * OK, we're below the kswapd watermark and have kicked background | 2667 | * OK, we're below the kswapd watermark and have kicked background |
@@ -2633,20 +2734,40 @@ rebalance: | |||
2633 | preferred_zone, | 2734 | preferred_zone, |
2634 | classzone_idx, migratetype, | 2735 | classzone_idx, migratetype, |
2635 | migration_mode, &contended_compaction, | 2736 | migration_mode, &contended_compaction, |
2636 | &deferred_compaction, | 2737 | &deferred_compaction); |
2637 | &did_some_progress); | ||
2638 | if (page) | 2738 | if (page) |
2639 | goto got_pg; | 2739 | goto got_pg; |
2640 | 2740 | ||
2641 | /* | 2741 | /* Checks for THP-specific high-order allocations */ |
2642 | * If compaction is deferred for high-order allocations, it is because | 2742 | if ((gfp_mask & GFP_TRANSHUGE) == GFP_TRANSHUGE) { |
2643 | * sync compaction recently failed. In this is the case and the caller | 2743 | /* |
2644 | * requested a movable allocation that does not heavily disrupt the | 2744 | * If compaction is deferred for high-order allocations, it is |
2645 | * system then fail the allocation instead of entering direct reclaim. | 2745 | * because sync compaction recently failed. If this is the case |
2646 | */ | 2746 | * and the caller requested a THP allocation, we do not want |
2647 | if ((deferred_compaction || contended_compaction) && | 2747 | * to heavily disrupt the system, so we fail the allocation |
2648 | (gfp_mask & __GFP_NO_KSWAPD)) | 2748 | * instead of entering direct reclaim. |
2649 | goto nopage; | 2749 | */ |
2750 | if (deferred_compaction) | ||
2751 | goto nopage; | ||
2752 | |||
2753 | /* | ||
2754 | * In all zones where compaction was attempted (and not | ||
2755 | * deferred or skipped), lock contention has been detected. | ||
2756 | * For THP allocation we do not want to disrupt the others | ||
2757 | * so we fallback to base pages instead. | ||
2758 | */ | ||
2759 | if (contended_compaction == COMPACT_CONTENDED_LOCK) | ||
2760 | goto nopage; | ||
2761 | |||
2762 | /* | ||
2763 | * If compaction was aborted due to need_resched(), we do not | ||
2764 | * want to further increase allocation latency, unless it is | ||
2765 | * khugepaged trying to collapse. | ||
2766 | */ | ||
2767 | if (contended_compaction == COMPACT_CONTENDED_SCHED | ||
2768 | && !(current->flags & PF_KTHREAD)) | ||
2769 | goto nopage; | ||
2770 | } | ||
2650 | 2771 | ||
2651 | /* | 2772 | /* |
2652 | * It can become very expensive to allocate transparent hugepages at | 2773 | * It can become very expensive to allocate transparent hugepages at |
@@ -2726,8 +2847,7 @@ rebalance: | |||
2726 | preferred_zone, | 2847 | preferred_zone, |
2727 | classzone_idx, migratetype, | 2848 | classzone_idx, migratetype, |
2728 | migration_mode, &contended_compaction, | 2849 | migration_mode, &contended_compaction, |
2729 | &deferred_compaction, | 2850 | &deferred_compaction); |
2730 | &did_some_progress); | ||
2731 | if (page) | 2851 | if (page) |
2732 | goto got_pg; | 2852 | goto got_pg; |
2733 | } | 2853 | } |
@@ -2753,7 +2873,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2753 | struct zone *preferred_zone; | 2873 | struct zone *preferred_zone; |
2754 | struct zoneref *preferred_zoneref; | 2874 | struct zoneref *preferred_zoneref; |
2755 | struct page *page = NULL; | 2875 | struct page *page = NULL; |
2756 | int migratetype = allocflags_to_migratetype(gfp_mask); | 2876 | int migratetype = gfpflags_to_migratetype(gfp_mask); |
2757 | unsigned int cpuset_mems_cookie; | 2877 | unsigned int cpuset_mems_cookie; |
2758 | int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR; | 2878 | int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR; |
2759 | int classzone_idx; | 2879 | int classzone_idx; |
@@ -2775,6 +2895,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2775 | if (unlikely(!zonelist->_zonerefs->zone)) | 2895 | if (unlikely(!zonelist->_zonerefs->zone)) |
2776 | return NULL; | 2896 | return NULL; |
2777 | 2897 | ||
2898 | if (IS_ENABLED(CONFIG_CMA) && migratetype == MIGRATE_MOVABLE) | ||
2899 | alloc_flags |= ALLOC_CMA; | ||
2900 | |||
2778 | retry_cpuset: | 2901 | retry_cpuset: |
2779 | cpuset_mems_cookie = read_mems_allowed_begin(); | 2902 | cpuset_mems_cookie = read_mems_allowed_begin(); |
2780 | 2903 | ||
@@ -2786,10 +2909,6 @@ retry_cpuset: | |||
2786 | goto out; | 2909 | goto out; |
2787 | classzone_idx = zonelist_zone_idx(preferred_zoneref); | 2910 | classzone_idx = zonelist_zone_idx(preferred_zoneref); |
2788 | 2911 | ||
2789 | #ifdef CONFIG_CMA | ||
2790 | if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) | ||
2791 | alloc_flags |= ALLOC_CMA; | ||
2792 | #endif | ||
2793 | /* First allocation attempt */ | 2912 | /* First allocation attempt */ |
2794 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, | 2913 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, |
2795 | zonelist, high_zoneidx, alloc_flags, | 2914 | zonelist, high_zoneidx, alloc_flags, |
@@ -3579,68 +3698,30 @@ static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes) | |||
3579 | zonelist->_zonerefs[pos].zone_idx = 0; | 3698 | zonelist->_zonerefs[pos].zone_idx = 0; |
3580 | } | 3699 | } |
3581 | 3700 | ||
3701 | #if defined(CONFIG_64BIT) | ||
3702 | /* | ||
3703 | * Devices that require DMA32/DMA are relatively rare and do not justify a | ||
3704 | * penalty to every machine in case the specialised case applies. Default | ||
3705 | * to Node-ordering on 64-bit NUMA machines | ||
3706 | */ | ||
3707 | static int default_zonelist_order(void) | ||
3708 | { | ||
3709 | return ZONELIST_ORDER_NODE; | ||
3710 | } | ||
3711 | #else | ||
3712 | /* | ||
3713 | * On 32-bit, the Normal zone needs to be preserved for allocations accessible | ||
3714 | * by the kernel. If processes running on node 0 deplete the low memory zone | ||
3715 | * then reclaim will occur more frequency increasing stalls and potentially | ||
3716 | * be easier to OOM if a large percentage of the zone is under writeback or | ||
3717 | * dirty. The problem is significantly worse if CONFIG_HIGHPTE is not set. | ||
3718 | * Hence, default to zone ordering on 32-bit. | ||
3719 | */ | ||
3582 | static int default_zonelist_order(void) | 3720 | static int default_zonelist_order(void) |
3583 | { | 3721 | { |
3584 | int nid, zone_type; | ||
3585 | unsigned long low_kmem_size, total_size; | ||
3586 | struct zone *z; | ||
3587 | int average_size; | ||
3588 | /* | ||
3589 | * ZONE_DMA and ZONE_DMA32 can be very small area in the system. | ||
3590 | * If they are really small and used heavily, the system can fall | ||
3591 | * into OOM very easily. | ||
3592 | * This function detect ZONE_DMA/DMA32 size and configures zone order. | ||
3593 | */ | ||
3594 | /* Is there ZONE_NORMAL ? (ex. ppc has only DMA zone..) */ | ||
3595 | low_kmem_size = 0; | ||
3596 | total_size = 0; | ||
3597 | for_each_online_node(nid) { | ||
3598 | for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) { | ||
3599 | z = &NODE_DATA(nid)->node_zones[zone_type]; | ||
3600 | if (populated_zone(z)) { | ||
3601 | if (zone_type < ZONE_NORMAL) | ||
3602 | low_kmem_size += z->managed_pages; | ||
3603 | total_size += z->managed_pages; | ||
3604 | } else if (zone_type == ZONE_NORMAL) { | ||
3605 | /* | ||
3606 | * If any node has only lowmem, then node order | ||
3607 | * is preferred to allow kernel allocations | ||
3608 | * locally; otherwise, they can easily infringe | ||
3609 | * on other nodes when there is an abundance of | ||
3610 | * lowmem available to allocate from. | ||
3611 | */ | ||
3612 | return ZONELIST_ORDER_NODE; | ||
3613 | } | ||
3614 | } | ||
3615 | } | ||
3616 | if (!low_kmem_size || /* there are no DMA area. */ | ||
3617 | low_kmem_size > total_size/2) /* DMA/DMA32 is big. */ | ||
3618 | return ZONELIST_ORDER_NODE; | ||
3619 | /* | ||
3620 | * look into each node's config. | ||
3621 | * If there is a node whose DMA/DMA32 memory is very big area on | ||
3622 | * local memory, NODE_ORDER may be suitable. | ||
3623 | */ | ||
3624 | average_size = total_size / | ||
3625 | (nodes_weight(node_states[N_MEMORY]) + 1); | ||
3626 | for_each_online_node(nid) { | ||
3627 | low_kmem_size = 0; | ||
3628 | total_size = 0; | ||
3629 | for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) { | ||
3630 | z = &NODE_DATA(nid)->node_zones[zone_type]; | ||
3631 | if (populated_zone(z)) { | ||
3632 | if (zone_type < ZONE_NORMAL) | ||
3633 | low_kmem_size += z->present_pages; | ||
3634 | total_size += z->present_pages; | ||
3635 | } | ||
3636 | } | ||
3637 | if (low_kmem_size && | ||
3638 | total_size > average_size && /* ignore small node */ | ||
3639 | low_kmem_size > total_size * 70/100) | ||
3640 | return ZONELIST_ORDER_NODE; | ||
3641 | } | ||
3642 | return ZONELIST_ORDER_ZONE; | 3722 | return ZONELIST_ORDER_ZONE; |
3643 | } | 3723 | } |
3724 | #endif /* CONFIG_64BIT */ | ||
3644 | 3725 | ||
3645 | static void set_zonelist_order(void) | 3726 | static void set_zonelist_order(void) |
3646 | { | 3727 | { |
@@ -3899,14 +3980,14 @@ void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone) | |||
3899 | else | 3980 | else |
3900 | page_group_by_mobility_disabled = 0; | 3981 | page_group_by_mobility_disabled = 0; |
3901 | 3982 | ||
3902 | printk("Built %i zonelists in %s order, mobility grouping %s. " | 3983 | pr_info("Built %i zonelists in %s order, mobility grouping %s. " |
3903 | "Total pages: %ld\n", | 3984 | "Total pages: %ld\n", |
3904 | nr_online_nodes, | 3985 | nr_online_nodes, |
3905 | zonelist_order_name[current_zonelist_order], | 3986 | zonelist_order_name[current_zonelist_order], |
3906 | page_group_by_mobility_disabled ? "off" : "on", | 3987 | page_group_by_mobility_disabled ? "off" : "on", |
3907 | vm_total_pages); | 3988 | vm_total_pages); |
3908 | #ifdef CONFIG_NUMA | 3989 | #ifdef CONFIG_NUMA |
3909 | printk("Policy zone: %s\n", zone_names[policy_zone]); | 3990 | pr_info("Policy zone: %s\n", zone_names[policy_zone]); |
3910 | #endif | 3991 | #endif |
3911 | } | 3992 | } |
3912 | 3993 | ||
@@ -4838,7 +4919,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, | |||
4838 | #endif | 4919 | #endif |
4839 | init_waitqueue_head(&pgdat->kswapd_wait); | 4920 | init_waitqueue_head(&pgdat->kswapd_wait); |
4840 | init_waitqueue_head(&pgdat->pfmemalloc_wait); | 4921 | init_waitqueue_head(&pgdat->pfmemalloc_wait); |
4841 | pgdat_page_cgroup_init(pgdat); | 4922 | pgdat_page_ext_init(pgdat); |
4842 | 4923 | ||
4843 | for (j = 0; j < MAX_NR_ZONES; j++) { | 4924 | for (j = 0; j < MAX_NR_ZONES; j++) { |
4844 | struct zone *zone = pgdat->node_zones + j; | 4925 | struct zone *zone = pgdat->node_zones + j; |
@@ -4857,16 +4938,18 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, | |||
4857 | * and per-cpu initialisations | 4938 | * and per-cpu initialisations |
4858 | */ | 4939 | */ |
4859 | memmap_pages = calc_memmap_size(size, realsize); | 4940 | memmap_pages = calc_memmap_size(size, realsize); |
4860 | if (freesize >= memmap_pages) { | 4941 | if (!is_highmem_idx(j)) { |
4861 | freesize -= memmap_pages; | 4942 | if (freesize >= memmap_pages) { |
4862 | if (memmap_pages) | 4943 | freesize -= memmap_pages; |
4863 | printk(KERN_DEBUG | 4944 | if (memmap_pages) |
4864 | " %s zone: %lu pages used for memmap\n", | 4945 | printk(KERN_DEBUG |
4865 | zone_names[j], memmap_pages); | 4946 | " %s zone: %lu pages used for memmap\n", |
4866 | } else | 4947 | zone_names[j], memmap_pages); |
4867 | printk(KERN_WARNING | 4948 | } else |
4868 | " %s zone: %lu pages exceeds freesize %lu\n", | 4949 | printk(KERN_WARNING |
4869 | zone_names[j], memmap_pages, freesize); | 4950 | " %s zone: %lu pages exceeds freesize %lu\n", |
4951 | zone_names[j], memmap_pages, freesize); | ||
4952 | } | ||
4870 | 4953 | ||
4871 | /* Account for reserved pages */ | 4954 | /* Account for reserved pages */ |
4872 | if (j == 0 && freesize > dma_reserve) { | 4955 | if (j == 0 && freesize > dma_reserve) { |
@@ -4976,6 +5059,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, | |||
4976 | pgdat->node_start_pfn = node_start_pfn; | 5059 | pgdat->node_start_pfn = node_start_pfn; |
4977 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | 5060 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
4978 | get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); | 5061 | get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); |
5062 | printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", nid, | ||
5063 | (u64) start_pfn << PAGE_SHIFT, (u64) (end_pfn << PAGE_SHIFT) - 1); | ||
4979 | #endif | 5064 | #endif |
4980 | calculate_node_totalpages(pgdat, start_pfn, end_pfn, | 5065 | calculate_node_totalpages(pgdat, start_pfn, end_pfn, |
4981 | zones_size, zholes_size); | 5066 | zones_size, zholes_size); |
@@ -5338,33 +5423,33 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) | |||
5338 | find_zone_movable_pfns_for_nodes(); | 5423 | find_zone_movable_pfns_for_nodes(); |
5339 | 5424 | ||
5340 | /* Print out the zone ranges */ | 5425 | /* Print out the zone ranges */ |
5341 | printk("Zone ranges:\n"); | 5426 | pr_info("Zone ranges:\n"); |
5342 | for (i = 0; i < MAX_NR_ZONES; i++) { | 5427 | for (i = 0; i < MAX_NR_ZONES; i++) { |
5343 | if (i == ZONE_MOVABLE) | 5428 | if (i == ZONE_MOVABLE) |
5344 | continue; | 5429 | continue; |
5345 | printk(KERN_CONT " %-8s ", zone_names[i]); | 5430 | pr_info(" %-8s ", zone_names[i]); |
5346 | if (arch_zone_lowest_possible_pfn[i] == | 5431 | if (arch_zone_lowest_possible_pfn[i] == |
5347 | arch_zone_highest_possible_pfn[i]) | 5432 | arch_zone_highest_possible_pfn[i]) |
5348 | printk(KERN_CONT "empty\n"); | 5433 | pr_cont("empty\n"); |
5349 | else | 5434 | else |
5350 | printk(KERN_CONT "[mem %0#10lx-%0#10lx]\n", | 5435 | pr_cont("[mem %0#10lx-%0#10lx]\n", |
5351 | arch_zone_lowest_possible_pfn[i] << PAGE_SHIFT, | 5436 | arch_zone_lowest_possible_pfn[i] << PAGE_SHIFT, |
5352 | (arch_zone_highest_possible_pfn[i] | 5437 | (arch_zone_highest_possible_pfn[i] |
5353 | << PAGE_SHIFT) - 1); | 5438 | << PAGE_SHIFT) - 1); |
5354 | } | 5439 | } |
5355 | 5440 | ||
5356 | /* Print out the PFNs ZONE_MOVABLE begins at in each node */ | 5441 | /* Print out the PFNs ZONE_MOVABLE begins at in each node */ |
5357 | printk("Movable zone start for each node\n"); | 5442 | pr_info("Movable zone start for each node\n"); |
5358 | for (i = 0; i < MAX_NUMNODES; i++) { | 5443 | for (i = 0; i < MAX_NUMNODES; i++) { |
5359 | if (zone_movable_pfn[i]) | 5444 | if (zone_movable_pfn[i]) |
5360 | printk(" Node %d: %#010lx\n", i, | 5445 | pr_info(" Node %d: %#010lx\n", i, |
5361 | zone_movable_pfn[i] << PAGE_SHIFT); | 5446 | zone_movable_pfn[i] << PAGE_SHIFT); |
5362 | } | 5447 | } |
5363 | 5448 | ||
5364 | /* Print out the early node map */ | 5449 | /* Print out the early node map */ |
5365 | printk("Early memory node ranges\n"); | 5450 | pr_info("Early memory node ranges\n"); |
5366 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) | 5451 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) |
5367 | printk(" node %3d: [mem %#010lx-%#010lx]\n", nid, | 5452 | pr_info(" node %3d: [mem %#010lx-%#010lx]\n", nid, |
5368 | start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1); | 5453 | start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1); |
5369 | 5454 | ||
5370 | /* Initialise every node */ | 5455 | /* Initialise every node */ |
@@ -5500,9 +5585,9 @@ void __init mem_init_print_info(const char *str) | |||
5500 | 5585 | ||
5501 | #undef adj_init_size | 5586 | #undef adj_init_size |
5502 | 5587 | ||
5503 | printk("Memory: %luK/%luK available " | 5588 | pr_info("Memory: %luK/%luK available " |
5504 | "(%luK kernel code, %luK rwdata, %luK rodata, " | 5589 | "(%luK kernel code, %luK rwdata, %luK rodata, " |
5505 | "%luK init, %luK bss, %luK reserved" | 5590 | "%luK init, %luK bss, %luK reserved, %luK cma-reserved" |
5506 | #ifdef CONFIG_HIGHMEM | 5591 | #ifdef CONFIG_HIGHMEM |
5507 | ", %luK highmem" | 5592 | ", %luK highmem" |
5508 | #endif | 5593 | #endif |
@@ -5510,7 +5595,8 @@ void __init mem_init_print_info(const char *str) | |||
5510 | nr_free_pages() << (PAGE_SHIFT-10), physpages << (PAGE_SHIFT-10), | 5595 | nr_free_pages() << (PAGE_SHIFT-10), physpages << (PAGE_SHIFT-10), |
5511 | codesize >> 10, datasize >> 10, rosize >> 10, | 5596 | codesize >> 10, datasize >> 10, rosize >> 10, |
5512 | (init_data_size + init_code_size) >> 10, bss_size >> 10, | 5597 | (init_data_size + init_code_size) >> 10, bss_size >> 10, |
5513 | (physpages - totalram_pages) << (PAGE_SHIFT-10), | 5598 | (physpages - totalram_pages - totalcma_pages) << (PAGE_SHIFT-10), |
5599 | totalcma_pages << (PAGE_SHIFT-10), | ||
5514 | #ifdef CONFIG_HIGHMEM | 5600 | #ifdef CONFIG_HIGHMEM |
5515 | totalhigh_pages << (PAGE_SHIFT-10), | 5601 | totalhigh_pages << (PAGE_SHIFT-10), |
5516 | #endif | 5602 | #endif |
@@ -6202,9 +6288,9 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, | |||
6202 | if (!PageLRU(page)) | 6288 | if (!PageLRU(page)) |
6203 | found++; | 6289 | found++; |
6204 | /* | 6290 | /* |
6205 | * If there are RECLAIMABLE pages, we need to check it. | 6291 | * If there are RECLAIMABLE pages, we need to check |
6206 | * But now, memory offline itself doesn't call shrink_slab() | 6292 | * it. But now, memory offline itself doesn't call |
6207 | * and it still to be fixed. | 6293 | * shrink_node_slabs() and it still to be fixed. |
6208 | */ | 6294 | */ |
6209 | /* | 6295 | /* |
6210 | * If the page is not RAM, page_count()should be 0. | 6296 | * If the page is not RAM, page_count()should be 0. |
@@ -6277,8 +6363,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, | |||
6277 | 6363 | ||
6278 | if (list_empty(&cc->migratepages)) { | 6364 | if (list_empty(&cc->migratepages)) { |
6279 | cc->nr_migratepages = 0; | 6365 | cc->nr_migratepages = 0; |
6280 | pfn = isolate_migratepages_range(cc->zone, cc, | 6366 | pfn = isolate_migratepages_range(cc, pfn, end); |
6281 | pfn, end, true); | ||
6282 | if (!pfn) { | 6367 | if (!pfn) { |
6283 | ret = -EINTR; | 6368 | ret = -EINTR; |
6284 | break; | 6369 | break; |
@@ -6390,7 +6475,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, | |||
6390 | */ | 6475 | */ |
6391 | 6476 | ||
6392 | lru_add_drain_all(); | 6477 | lru_add_drain_all(); |
6393 | drain_all_pages(); | 6478 | drain_all_pages(cc.zone); |
6394 | 6479 | ||
6395 | order = 0; | 6480 | order = 0; |
6396 | outer_start = start; | 6481 | outer_start = start; |
@@ -6404,13 +6489,12 @@ int alloc_contig_range(unsigned long start, unsigned long end, | |||
6404 | 6489 | ||
6405 | /* Make sure the range is really isolated. */ | 6490 | /* Make sure the range is really isolated. */ |
6406 | if (test_pages_isolated(outer_start, end, false)) { | 6491 | if (test_pages_isolated(outer_start, end, false)) { |
6407 | pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n", | 6492 | pr_info("%s: [%lx, %lx) PFNs busy\n", |
6408 | outer_start, end); | 6493 | __func__, outer_start, end); |
6409 | ret = -EBUSY; | 6494 | ret = -EBUSY; |
6410 | goto done; | 6495 | goto done; |
6411 | } | 6496 | } |
6412 | 6497 | ||
6413 | |||
6414 | /* Grab isolated pages from freelists. */ | 6498 | /* Grab isolated pages from freelists. */ |
6415 | outer_end = isolate_freepages_range(&cc, outer_start, end); | 6499 | outer_end = isolate_freepages_range(&cc, outer_start, end); |
6416 | if (!outer_end) { | 6500 | if (!outer_end) { |
@@ -6554,97 +6638,3 @@ bool is_free_buddy_page(struct page *page) | |||
6554 | return order < MAX_ORDER; | 6638 | return order < MAX_ORDER; |
6555 | } | 6639 | } |
6556 | #endif | 6640 | #endif |
6557 | |||
6558 | static const struct trace_print_flags pageflag_names[] = { | ||
6559 | {1UL << PG_locked, "locked" }, | ||
6560 | {1UL << PG_error, "error" }, | ||
6561 | {1UL << PG_referenced, "referenced" }, | ||
6562 | {1UL << PG_uptodate, "uptodate" }, | ||
6563 | {1UL << PG_dirty, "dirty" }, | ||
6564 | {1UL << PG_lru, "lru" }, | ||
6565 | {1UL << PG_active, "active" }, | ||
6566 | {1UL << PG_slab, "slab" }, | ||
6567 | {1UL << PG_owner_priv_1, "owner_priv_1" }, | ||
6568 | {1UL << PG_arch_1, "arch_1" }, | ||
6569 | {1UL << PG_reserved, "reserved" }, | ||
6570 | {1UL << PG_private, "private" }, | ||
6571 | {1UL << PG_private_2, "private_2" }, | ||
6572 | {1UL << PG_writeback, "writeback" }, | ||
6573 | #ifdef CONFIG_PAGEFLAGS_EXTENDED | ||
6574 | {1UL << PG_head, "head" }, | ||
6575 | {1UL << PG_tail, "tail" }, | ||
6576 | #else | ||
6577 | {1UL << PG_compound, "compound" }, | ||
6578 | #endif | ||
6579 | {1UL << PG_swapcache, "swapcache" }, | ||
6580 | {1UL << PG_mappedtodisk, "mappedtodisk" }, | ||
6581 | {1UL << PG_reclaim, "reclaim" }, | ||
6582 | {1UL << PG_swapbacked, "swapbacked" }, | ||
6583 | {1UL << PG_unevictable, "unevictable" }, | ||
6584 | #ifdef CONFIG_MMU | ||
6585 | {1UL << PG_mlocked, "mlocked" }, | ||
6586 | #endif | ||
6587 | #ifdef CONFIG_ARCH_USES_PG_UNCACHED | ||
6588 | {1UL << PG_uncached, "uncached" }, | ||
6589 | #endif | ||
6590 | #ifdef CONFIG_MEMORY_FAILURE | ||
6591 | {1UL << PG_hwpoison, "hwpoison" }, | ||
6592 | #endif | ||
6593 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
6594 | {1UL << PG_compound_lock, "compound_lock" }, | ||
6595 | #endif | ||
6596 | }; | ||
6597 | |||
6598 | static void dump_page_flags(unsigned long flags) | ||
6599 | { | ||
6600 | const char *delim = ""; | ||
6601 | unsigned long mask; | ||
6602 | int i; | ||
6603 | |||
6604 | BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS); | ||
6605 | |||
6606 | printk(KERN_ALERT "page flags: %#lx(", flags); | ||
6607 | |||
6608 | /* remove zone id */ | ||
6609 | flags &= (1UL << NR_PAGEFLAGS) - 1; | ||
6610 | |||
6611 | for (i = 0; i < ARRAY_SIZE(pageflag_names) && flags; i++) { | ||
6612 | |||
6613 | mask = pageflag_names[i].mask; | ||
6614 | if ((flags & mask) != mask) | ||
6615 | continue; | ||
6616 | |||
6617 | flags &= ~mask; | ||
6618 | printk("%s%s", delim, pageflag_names[i].name); | ||
6619 | delim = "|"; | ||
6620 | } | ||
6621 | |||
6622 | /* check for left over flags */ | ||
6623 | if (flags) | ||
6624 | printk("%s%#lx", delim, flags); | ||
6625 | |||
6626 | printk(")\n"); | ||
6627 | } | ||
6628 | |||
6629 | void dump_page_badflags(struct page *page, const char *reason, | ||
6630 | unsigned long badflags) | ||
6631 | { | ||
6632 | printk(KERN_ALERT | ||
6633 | "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n", | ||
6634 | page, atomic_read(&page->_count), page_mapcount(page), | ||
6635 | page->mapping, page->index); | ||
6636 | dump_page_flags(page->flags); | ||
6637 | if (reason) | ||
6638 | pr_alert("page dumped because: %s\n", reason); | ||
6639 | if (page->flags & badflags) { | ||
6640 | pr_alert("bad because of flags:\n"); | ||
6641 | dump_page_flags(page->flags & badflags); | ||
6642 | } | ||
6643 | mem_cgroup_print_bad_page(page); | ||
6644 | } | ||
6645 | |||
6646 | void dump_page(struct page *page, const char *reason) | ||
6647 | { | ||
6648 | dump_page_badflags(page, reason, 0); | ||
6649 | } | ||
6650 | EXPORT_SYMBOL(dump_page); | ||