diff options
author | Takashi Iwai <tiwai@suse.de> | 2009-11-01 05:11:07 -0500 |
---|---|---|
committer | Takashi Iwai <tiwai@suse.de> | 2009-11-01 05:11:07 -0500 |
commit | e87a3dd33eab30b4db539500064a9584867e4f2c (patch) | |
tree | 2f7ad16e46ae30518ff63bb5391b63f7f7cc74dd /mm/page_alloc.c | |
parent | b14f5de731ae657d498d18d713c6431bfbeefb4b (diff) | |
parent | 3d00941371a765779c4e3509214c7e5793cce1fe (diff) |
Merge branch 'fix/misc' into topic/misc
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 328 |
1 files changed, 231 insertions, 97 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a0de15f46987..bf720550b44d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #include <linux/page_cgroup.h> | 48 | #include <linux/page_cgroup.h> |
49 | #include <linux/debugobjects.h> | 49 | #include <linux/debugobjects.h> |
50 | #include <linux/kmemleak.h> | 50 | #include <linux/kmemleak.h> |
51 | #include <trace/events/kmem.h> | ||
51 | 52 | ||
52 | #include <asm/tlbflush.h> | 53 | #include <asm/tlbflush.h> |
53 | #include <asm/div64.h> | 54 | #include <asm/div64.h> |
@@ -71,7 +72,6 @@ EXPORT_SYMBOL(node_states); | |||
71 | 72 | ||
72 | unsigned long totalram_pages __read_mostly; | 73 | unsigned long totalram_pages __read_mostly; |
73 | unsigned long totalreserve_pages __read_mostly; | 74 | unsigned long totalreserve_pages __read_mostly; |
74 | unsigned long highest_memmap_pfn __read_mostly; | ||
75 | int percpu_pagelist_fraction; | 75 | int percpu_pagelist_fraction; |
76 | gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; | 76 | gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; |
77 | 77 | ||
@@ -123,8 +123,8 @@ static char * const zone_names[MAX_NR_ZONES] = { | |||
123 | 123 | ||
124 | int min_free_kbytes = 1024; | 124 | int min_free_kbytes = 1024; |
125 | 125 | ||
126 | unsigned long __meminitdata nr_kernel_pages; | 126 | static unsigned long __meminitdata nr_kernel_pages; |
127 | unsigned long __meminitdata nr_all_pages; | 127 | static unsigned long __meminitdata nr_all_pages; |
128 | static unsigned long __meminitdata dma_reserve; | 128 | static unsigned long __meminitdata dma_reserve; |
129 | 129 | ||
130 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP | 130 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP |
@@ -234,6 +234,12 @@ static void bad_page(struct page *page) | |||
234 | static unsigned long nr_shown; | 234 | static unsigned long nr_shown; |
235 | static unsigned long nr_unshown; | 235 | static unsigned long nr_unshown; |
236 | 236 | ||
237 | /* Don't complain about poisoned pages */ | ||
238 | if (PageHWPoison(page)) { | ||
239 | __ClearPageBuddy(page); | ||
240 | return; | ||
241 | } | ||
242 | |||
237 | /* | 243 | /* |
238 | * Allow a burst of 60 reports, then keep quiet for that minute; | 244 | * Allow a burst of 60 reports, then keep quiet for that minute; |
239 | * or allow a steady drip of one report per second. | 245 | * or allow a steady drip of one report per second. |
@@ -510,7 +516,7 @@ static inline int free_pages_check(struct page *page) | |||
510 | } | 516 | } |
511 | 517 | ||
512 | /* | 518 | /* |
513 | * Frees a list of pages. | 519 | * Frees a number of pages from the PCP lists |
514 | * Assumes all pages on list are in same zone, and of same order. | 520 | * Assumes all pages on list are in same zone, and of same order. |
515 | * count is the number of pages to free. | 521 | * count is the number of pages to free. |
516 | * | 522 | * |
@@ -520,22 +526,42 @@ static inline int free_pages_check(struct page *page) | |||
520 | * And clear the zone's pages_scanned counter, to hold off the "all pages are | 526 | * And clear the zone's pages_scanned counter, to hold off the "all pages are |
521 | * pinned" detection logic. | 527 | * pinned" detection logic. |
522 | */ | 528 | */ |
523 | static void free_pages_bulk(struct zone *zone, int count, | 529 | static void free_pcppages_bulk(struct zone *zone, int count, |
524 | struct list_head *list, int order) | 530 | struct per_cpu_pages *pcp) |
525 | { | 531 | { |
532 | int migratetype = 0; | ||
533 | int batch_free = 0; | ||
534 | |||
526 | spin_lock(&zone->lock); | 535 | spin_lock(&zone->lock); |
527 | zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); | 536 | zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); |
528 | zone->pages_scanned = 0; | 537 | zone->pages_scanned = 0; |
529 | 538 | ||
530 | __mod_zone_page_state(zone, NR_FREE_PAGES, count << order); | 539 | __mod_zone_page_state(zone, NR_FREE_PAGES, count); |
531 | while (count--) { | 540 | while (count) { |
532 | struct page *page; | 541 | struct page *page; |
542 | struct list_head *list; | ||
533 | 543 | ||
534 | VM_BUG_ON(list_empty(list)); | 544 | /* |
535 | page = list_entry(list->prev, struct page, lru); | 545 | * Remove pages from lists in a round-robin fashion. A |
536 | /* have to delete it as __free_one_page list manipulates */ | 546 | * batch_free count is maintained that is incremented when an |
537 | list_del(&page->lru); | 547 | * empty list is encountered. This is so more pages are freed |
538 | __free_one_page(page, zone, order, page_private(page)); | 548 | * off fuller lists instead of spinning excessively around empty |
549 | * lists | ||
550 | */ | ||
551 | do { | ||
552 | batch_free++; | ||
553 | if (++migratetype == MIGRATE_PCPTYPES) | ||
554 | migratetype = 0; | ||
555 | list = &pcp->lists[migratetype]; | ||
556 | } while (list_empty(list)); | ||
557 | |||
558 | do { | ||
559 | page = list_entry(list->prev, struct page, lru); | ||
560 | /* must delete as __free_one_page list manipulates */ | ||
561 | list_del(&page->lru); | ||
562 | __free_one_page(page, zone, 0, migratetype); | ||
563 | trace_mm_page_pcpu_drain(page, 0, migratetype); | ||
564 | } while (--count && --batch_free && !list_empty(list)); | ||
539 | } | 565 | } |
540 | spin_unlock(&zone->lock); | 566 | spin_unlock(&zone->lock); |
541 | } | 567 | } |
@@ -557,7 +583,7 @@ static void __free_pages_ok(struct page *page, unsigned int order) | |||
557 | unsigned long flags; | 583 | unsigned long flags; |
558 | int i; | 584 | int i; |
559 | int bad = 0; | 585 | int bad = 0; |
560 | int wasMlocked = TestClearPageMlocked(page); | 586 | int wasMlocked = __TestClearPageMlocked(page); |
561 | 587 | ||
562 | kmemcheck_free_shadow(page, order); | 588 | kmemcheck_free_shadow(page, order); |
563 | 589 | ||
@@ -646,7 +672,7 @@ static inline void expand(struct zone *zone, struct page *page, | |||
646 | /* | 672 | /* |
647 | * This page is about to be returned from the page allocator | 673 | * This page is about to be returned from the page allocator |
648 | */ | 674 | */ |
649 | static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) | 675 | static inline int check_new_page(struct page *page) |
650 | { | 676 | { |
651 | if (unlikely(page_mapcount(page) | | 677 | if (unlikely(page_mapcount(page) | |
652 | (page->mapping != NULL) | | 678 | (page->mapping != NULL) | |
@@ -655,6 +681,18 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) | |||
655 | bad_page(page); | 681 | bad_page(page); |
656 | return 1; | 682 | return 1; |
657 | } | 683 | } |
684 | return 0; | ||
685 | } | ||
686 | |||
687 | static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) | ||
688 | { | ||
689 | int i; | ||
690 | |||
691 | for (i = 0; i < (1 << order); i++) { | ||
692 | struct page *p = page + i; | ||
693 | if (unlikely(check_new_page(p))) | ||
694 | return 1; | ||
695 | } | ||
658 | 696 | ||
659 | set_page_private(page, 0); | 697 | set_page_private(page, 0); |
660 | set_page_refcounted(page); | 698 | set_page_refcounted(page); |
@@ -783,6 +821,17 @@ static int move_freepages_block(struct zone *zone, struct page *page, | |||
783 | return move_freepages(zone, start_page, end_page, migratetype); | 821 | return move_freepages(zone, start_page, end_page, migratetype); |
784 | } | 822 | } |
785 | 823 | ||
824 | static void change_pageblock_range(struct page *pageblock_page, | ||
825 | int start_order, int migratetype) | ||
826 | { | ||
827 | int nr_pageblocks = 1 << (start_order - pageblock_order); | ||
828 | |||
829 | while (nr_pageblocks--) { | ||
830 | set_pageblock_migratetype(pageblock_page, migratetype); | ||
831 | pageblock_page += pageblock_nr_pages; | ||
832 | } | ||
833 | } | ||
834 | |||
786 | /* Remove an element from the buddy allocator from the fallback list */ | 835 | /* Remove an element from the buddy allocator from the fallback list */ |
787 | static inline struct page * | 836 | static inline struct page * |
788 | __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) | 837 | __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) |
@@ -836,11 +885,16 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) | |||
836 | list_del(&page->lru); | 885 | list_del(&page->lru); |
837 | rmv_page_order(page); | 886 | rmv_page_order(page); |
838 | 887 | ||
839 | if (current_order == pageblock_order) | 888 | /* Take ownership for orders >= pageblock_order */ |
840 | set_pageblock_migratetype(page, | 889 | if (current_order >= pageblock_order) |
890 | change_pageblock_range(page, current_order, | ||
841 | start_migratetype); | 891 | start_migratetype); |
842 | 892 | ||
843 | expand(zone, page, order, current_order, area, migratetype); | 893 | expand(zone, page, order, current_order, area, migratetype); |
894 | |||
895 | trace_mm_page_alloc_extfrag(page, order, current_order, | ||
896 | start_migratetype, migratetype); | ||
897 | |||
844 | return page; | 898 | return page; |
845 | } | 899 | } |
846 | } | 900 | } |
@@ -874,6 +928,7 @@ retry_reserve: | |||
874 | } | 928 | } |
875 | } | 929 | } |
876 | 930 | ||
931 | trace_mm_page_alloc_zone_locked(page, order, migratetype); | ||
877 | return page; | 932 | return page; |
878 | } | 933 | } |
879 | 934 | ||
@@ -934,7 +989,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) | |||
934 | to_drain = pcp->batch; | 989 | to_drain = pcp->batch; |
935 | else | 990 | else |
936 | to_drain = pcp->count; | 991 | to_drain = pcp->count; |
937 | free_pages_bulk(zone, to_drain, &pcp->list, 0); | 992 | free_pcppages_bulk(zone, to_drain, pcp); |
938 | pcp->count -= to_drain; | 993 | pcp->count -= to_drain; |
939 | local_irq_restore(flags); | 994 | local_irq_restore(flags); |
940 | } | 995 | } |
@@ -960,7 +1015,7 @@ static void drain_pages(unsigned int cpu) | |||
960 | 1015 | ||
961 | pcp = &pset->pcp; | 1016 | pcp = &pset->pcp; |
962 | local_irq_save(flags); | 1017 | local_irq_save(flags); |
963 | free_pages_bulk(zone, pcp->count, &pcp->list, 0); | 1018 | free_pcppages_bulk(zone, pcp->count, pcp); |
964 | pcp->count = 0; | 1019 | pcp->count = 0; |
965 | local_irq_restore(flags); | 1020 | local_irq_restore(flags); |
966 | } | 1021 | } |
@@ -1026,7 +1081,8 @@ static void free_hot_cold_page(struct page *page, int cold) | |||
1026 | struct zone *zone = page_zone(page); | 1081 | struct zone *zone = page_zone(page); |
1027 | struct per_cpu_pages *pcp; | 1082 | struct per_cpu_pages *pcp; |
1028 | unsigned long flags; | 1083 | unsigned long flags; |
1029 | int wasMlocked = TestClearPageMlocked(page); | 1084 | int migratetype; |
1085 | int wasMlocked = __TestClearPageMlocked(page); | ||
1030 | 1086 | ||
1031 | kmemcheck_free_shadow(page, 0); | 1087 | kmemcheck_free_shadow(page, 0); |
1032 | 1088 | ||
@@ -1043,35 +1099,49 @@ static void free_hot_cold_page(struct page *page, int cold) | |||
1043 | kernel_map_pages(page, 1, 0); | 1099 | kernel_map_pages(page, 1, 0); |
1044 | 1100 | ||
1045 | pcp = &zone_pcp(zone, get_cpu())->pcp; | 1101 | pcp = &zone_pcp(zone, get_cpu())->pcp; |
1046 | set_page_private(page, get_pageblock_migratetype(page)); | 1102 | migratetype = get_pageblock_migratetype(page); |
1103 | set_page_private(page, migratetype); | ||
1047 | local_irq_save(flags); | 1104 | local_irq_save(flags); |
1048 | if (unlikely(wasMlocked)) | 1105 | if (unlikely(wasMlocked)) |
1049 | free_page_mlock(page); | 1106 | free_page_mlock(page); |
1050 | __count_vm_event(PGFREE); | 1107 | __count_vm_event(PGFREE); |
1051 | 1108 | ||
1109 | /* | ||
1110 | * We only track unmovable, reclaimable and movable on pcp lists. | ||
1111 | * Free ISOLATE pages back to the allocator because they are being | ||
1112 | * offlined but treat RESERVE as movable pages so we can get those | ||
1113 | * areas back if necessary. Otherwise, we may have to free | ||
1114 | * excessively into the page allocator | ||
1115 | */ | ||
1116 | if (migratetype >= MIGRATE_PCPTYPES) { | ||
1117 | if (unlikely(migratetype == MIGRATE_ISOLATE)) { | ||
1118 | free_one_page(zone, page, 0, migratetype); | ||
1119 | goto out; | ||
1120 | } | ||
1121 | migratetype = MIGRATE_MOVABLE; | ||
1122 | } | ||
1123 | |||
1052 | if (cold) | 1124 | if (cold) |
1053 | list_add_tail(&page->lru, &pcp->list); | 1125 | list_add_tail(&page->lru, &pcp->lists[migratetype]); |
1054 | else | 1126 | else |
1055 | list_add(&page->lru, &pcp->list); | 1127 | list_add(&page->lru, &pcp->lists[migratetype]); |
1056 | pcp->count++; | 1128 | pcp->count++; |
1057 | if (pcp->count >= pcp->high) { | 1129 | if (pcp->count >= pcp->high) { |
1058 | free_pages_bulk(zone, pcp->batch, &pcp->list, 0); | 1130 | free_pcppages_bulk(zone, pcp->batch, pcp); |
1059 | pcp->count -= pcp->batch; | 1131 | pcp->count -= pcp->batch; |
1060 | } | 1132 | } |
1133 | |||
1134 | out: | ||
1061 | local_irq_restore(flags); | 1135 | local_irq_restore(flags); |
1062 | put_cpu(); | 1136 | put_cpu(); |
1063 | } | 1137 | } |
1064 | 1138 | ||
1065 | void free_hot_page(struct page *page) | 1139 | void free_hot_page(struct page *page) |
1066 | { | 1140 | { |
1141 | trace_mm_page_free_direct(page, 0); | ||
1067 | free_hot_cold_page(page, 0); | 1142 | free_hot_cold_page(page, 0); |
1068 | } | 1143 | } |
1069 | 1144 | ||
1070 | void free_cold_page(struct page *page) | ||
1071 | { | ||
1072 | free_hot_cold_page(page, 1); | ||
1073 | } | ||
1074 | |||
1075 | /* | 1145 | /* |
1076 | * split_page takes a non-compound higher-order page, and splits it into | 1146 | * split_page takes a non-compound higher-order page, and splits it into |
1077 | * n (1<<order) sub-pages: page[0..n] | 1147 | * n (1<<order) sub-pages: page[0..n] |
@@ -1119,35 +1189,23 @@ again: | |||
1119 | cpu = get_cpu(); | 1189 | cpu = get_cpu(); |
1120 | if (likely(order == 0)) { | 1190 | if (likely(order == 0)) { |
1121 | struct per_cpu_pages *pcp; | 1191 | struct per_cpu_pages *pcp; |
1192 | struct list_head *list; | ||
1122 | 1193 | ||
1123 | pcp = &zone_pcp(zone, cpu)->pcp; | 1194 | pcp = &zone_pcp(zone, cpu)->pcp; |
1195 | list = &pcp->lists[migratetype]; | ||
1124 | local_irq_save(flags); | 1196 | local_irq_save(flags); |
1125 | if (!pcp->count) { | 1197 | if (list_empty(list)) { |
1126 | pcp->count = rmqueue_bulk(zone, 0, | 1198 | pcp->count += rmqueue_bulk(zone, 0, |
1127 | pcp->batch, &pcp->list, | 1199 | pcp->batch, list, |
1128 | migratetype, cold); | 1200 | migratetype, cold); |
1129 | if (unlikely(!pcp->count)) | 1201 | if (unlikely(list_empty(list))) |
1130 | goto failed; | 1202 | goto failed; |
1131 | } | 1203 | } |
1132 | 1204 | ||
1133 | /* Find a page of the appropriate migrate type */ | 1205 | if (cold) |
1134 | if (cold) { | 1206 | page = list_entry(list->prev, struct page, lru); |
1135 | list_for_each_entry_reverse(page, &pcp->list, lru) | 1207 | else |
1136 | if (page_private(page) == migratetype) | 1208 | page = list_entry(list->next, struct page, lru); |
1137 | break; | ||
1138 | } else { | ||
1139 | list_for_each_entry(page, &pcp->list, lru) | ||
1140 | if (page_private(page) == migratetype) | ||
1141 | break; | ||
1142 | } | ||
1143 | |||
1144 | /* Allocate more to the pcp list if necessary */ | ||
1145 | if (unlikely(&page->lru == &pcp->list)) { | ||
1146 | pcp->count += rmqueue_bulk(zone, 0, | ||
1147 | pcp->batch, &pcp->list, | ||
1148 | migratetype, cold); | ||
1149 | page = list_entry(pcp->list.next, struct page, lru); | ||
1150 | } | ||
1151 | 1209 | ||
1152 | list_del(&page->lru); | 1210 | list_del(&page->lru); |
1153 | pcp->count--; | 1211 | pcp->count--; |
@@ -1627,10 +1685,6 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, | |||
1627 | 1685 | ||
1628 | /* We now go into synchronous reclaim */ | 1686 | /* We now go into synchronous reclaim */ |
1629 | cpuset_memory_pressure_bump(); | 1687 | cpuset_memory_pressure_bump(); |
1630 | |||
1631 | /* | ||
1632 | * The task's cpuset might have expanded its set of allowable nodes | ||
1633 | */ | ||
1634 | p->flags |= PF_MEMALLOC; | 1688 | p->flags |= PF_MEMALLOC; |
1635 | lockdep_set_current_reclaim_state(gfp_mask); | 1689 | lockdep_set_current_reclaim_state(gfp_mask); |
1636 | reclaim_state.reclaimed_slab = 0; | 1690 | reclaim_state.reclaimed_slab = 0; |
@@ -1765,6 +1819,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
1765 | 1819 | ||
1766 | wake_all_kswapd(order, zonelist, high_zoneidx); | 1820 | wake_all_kswapd(order, zonelist, high_zoneidx); |
1767 | 1821 | ||
1822 | restart: | ||
1768 | /* | 1823 | /* |
1769 | * OK, we're below the kswapd watermark and have kicked background | 1824 | * OK, we're below the kswapd watermark and have kicked background |
1770 | * reclaim. Now things get more complex, so set up alloc_flags according | 1825 | * reclaim. Now things get more complex, so set up alloc_flags according |
@@ -1772,7 +1827,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
1772 | */ | 1827 | */ |
1773 | alloc_flags = gfp_to_alloc_flags(gfp_mask); | 1828 | alloc_flags = gfp_to_alloc_flags(gfp_mask); |
1774 | 1829 | ||
1775 | restart: | ||
1776 | /* This is the last chance, in general, before the goto nopage. */ | 1830 | /* This is the last chance, in general, before the goto nopage. */ |
1777 | page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, | 1831 | page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, |
1778 | high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, | 1832 | high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, |
@@ -1907,6 +1961,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
1907 | zonelist, high_zoneidx, nodemask, | 1961 | zonelist, high_zoneidx, nodemask, |
1908 | preferred_zone, migratetype); | 1962 | preferred_zone, migratetype); |
1909 | 1963 | ||
1964 | trace_mm_page_alloc(page, order, gfp_mask, migratetype); | ||
1910 | return page; | 1965 | return page; |
1911 | } | 1966 | } |
1912 | EXPORT_SYMBOL(__alloc_pages_nodemask); | 1967 | EXPORT_SYMBOL(__alloc_pages_nodemask); |
@@ -1916,44 +1971,41 @@ EXPORT_SYMBOL(__alloc_pages_nodemask); | |||
1916 | */ | 1971 | */ |
1917 | unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order) | 1972 | unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order) |
1918 | { | 1973 | { |
1919 | struct page * page; | 1974 | struct page *page; |
1975 | |||
1976 | /* | ||
1977 | * __get_free_pages() returns a 32-bit address, which cannot represent | ||
1978 | * a highmem page | ||
1979 | */ | ||
1980 | VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); | ||
1981 | |||
1920 | page = alloc_pages(gfp_mask, order); | 1982 | page = alloc_pages(gfp_mask, order); |
1921 | if (!page) | 1983 | if (!page) |
1922 | return 0; | 1984 | return 0; |
1923 | return (unsigned long) page_address(page); | 1985 | return (unsigned long) page_address(page); |
1924 | } | 1986 | } |
1925 | |||
1926 | EXPORT_SYMBOL(__get_free_pages); | 1987 | EXPORT_SYMBOL(__get_free_pages); |
1927 | 1988 | ||
1928 | unsigned long get_zeroed_page(gfp_t gfp_mask) | 1989 | unsigned long get_zeroed_page(gfp_t gfp_mask) |
1929 | { | 1990 | { |
1930 | struct page * page; | 1991 | return __get_free_pages(gfp_mask | __GFP_ZERO, 0); |
1931 | |||
1932 | /* | ||
1933 | * get_zeroed_page() returns a 32-bit address, which cannot represent | ||
1934 | * a highmem page | ||
1935 | */ | ||
1936 | VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); | ||
1937 | |||
1938 | page = alloc_pages(gfp_mask | __GFP_ZERO, 0); | ||
1939 | if (page) | ||
1940 | return (unsigned long) page_address(page); | ||
1941 | return 0; | ||
1942 | } | 1992 | } |
1943 | |||
1944 | EXPORT_SYMBOL(get_zeroed_page); | 1993 | EXPORT_SYMBOL(get_zeroed_page); |
1945 | 1994 | ||
1946 | void __pagevec_free(struct pagevec *pvec) | 1995 | void __pagevec_free(struct pagevec *pvec) |
1947 | { | 1996 | { |
1948 | int i = pagevec_count(pvec); | 1997 | int i = pagevec_count(pvec); |
1949 | 1998 | ||
1950 | while (--i >= 0) | 1999 | while (--i >= 0) { |
2000 | trace_mm_pagevec_free(pvec->pages[i], pvec->cold); | ||
1951 | free_hot_cold_page(pvec->pages[i], pvec->cold); | 2001 | free_hot_cold_page(pvec->pages[i], pvec->cold); |
2002 | } | ||
1952 | } | 2003 | } |
1953 | 2004 | ||
1954 | void __free_pages(struct page *page, unsigned int order) | 2005 | void __free_pages(struct page *page, unsigned int order) |
1955 | { | 2006 | { |
1956 | if (put_page_testzero(page)) { | 2007 | if (put_page_testzero(page)) { |
2008 | trace_mm_page_free_direct(page, order); | ||
1957 | if (order == 0) | 2009 | if (order == 0) |
1958 | free_hot_page(page); | 2010 | free_hot_page(page); |
1959 | else | 2011 | else |
@@ -2128,23 +2180,28 @@ void show_free_areas(void) | |||
2128 | } | 2180 | } |
2129 | } | 2181 | } |
2130 | 2182 | ||
2131 | printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n" | 2183 | printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n" |
2132 | " inactive_file:%lu" | 2184 | " active_file:%lu inactive_file:%lu isolated_file:%lu\n" |
2133 | " unevictable:%lu" | 2185 | " unevictable:%lu" |
2134 | " dirty:%lu writeback:%lu unstable:%lu\n" | 2186 | " dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n" |
2135 | " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n", | 2187 | " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n" |
2188 | " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n", | ||
2136 | global_page_state(NR_ACTIVE_ANON), | 2189 | global_page_state(NR_ACTIVE_ANON), |
2137 | global_page_state(NR_ACTIVE_FILE), | ||
2138 | global_page_state(NR_INACTIVE_ANON), | 2190 | global_page_state(NR_INACTIVE_ANON), |
2191 | global_page_state(NR_ISOLATED_ANON), | ||
2192 | global_page_state(NR_ACTIVE_FILE), | ||
2139 | global_page_state(NR_INACTIVE_FILE), | 2193 | global_page_state(NR_INACTIVE_FILE), |
2194 | global_page_state(NR_ISOLATED_FILE), | ||
2140 | global_page_state(NR_UNEVICTABLE), | 2195 | global_page_state(NR_UNEVICTABLE), |
2141 | global_page_state(NR_FILE_DIRTY), | 2196 | global_page_state(NR_FILE_DIRTY), |
2142 | global_page_state(NR_WRITEBACK), | 2197 | global_page_state(NR_WRITEBACK), |
2143 | global_page_state(NR_UNSTABLE_NFS), | 2198 | global_page_state(NR_UNSTABLE_NFS), |
2199 | nr_blockdev_pages(), | ||
2144 | global_page_state(NR_FREE_PAGES), | 2200 | global_page_state(NR_FREE_PAGES), |
2145 | global_page_state(NR_SLAB_RECLAIMABLE) + | 2201 | global_page_state(NR_SLAB_RECLAIMABLE), |
2146 | global_page_state(NR_SLAB_UNRECLAIMABLE), | 2202 | global_page_state(NR_SLAB_UNRECLAIMABLE), |
2147 | global_page_state(NR_FILE_MAPPED), | 2203 | global_page_state(NR_FILE_MAPPED), |
2204 | global_page_state(NR_SHMEM), | ||
2148 | global_page_state(NR_PAGETABLE), | 2205 | global_page_state(NR_PAGETABLE), |
2149 | global_page_state(NR_BOUNCE)); | 2206 | global_page_state(NR_BOUNCE)); |
2150 | 2207 | ||
@@ -2162,7 +2219,21 @@ void show_free_areas(void) | |||
2162 | " active_file:%lukB" | 2219 | " active_file:%lukB" |
2163 | " inactive_file:%lukB" | 2220 | " inactive_file:%lukB" |
2164 | " unevictable:%lukB" | 2221 | " unevictable:%lukB" |
2222 | " isolated(anon):%lukB" | ||
2223 | " isolated(file):%lukB" | ||
2165 | " present:%lukB" | 2224 | " present:%lukB" |
2225 | " mlocked:%lukB" | ||
2226 | " dirty:%lukB" | ||
2227 | " writeback:%lukB" | ||
2228 | " mapped:%lukB" | ||
2229 | " shmem:%lukB" | ||
2230 | " slab_reclaimable:%lukB" | ||
2231 | " slab_unreclaimable:%lukB" | ||
2232 | " kernel_stack:%lukB" | ||
2233 | " pagetables:%lukB" | ||
2234 | " unstable:%lukB" | ||
2235 | " bounce:%lukB" | ||
2236 | " writeback_tmp:%lukB" | ||
2166 | " pages_scanned:%lu" | 2237 | " pages_scanned:%lu" |
2167 | " all_unreclaimable? %s" | 2238 | " all_unreclaimable? %s" |
2168 | "\n", | 2239 | "\n", |
@@ -2176,7 +2247,22 @@ void show_free_areas(void) | |||
2176 | K(zone_page_state(zone, NR_ACTIVE_FILE)), | 2247 | K(zone_page_state(zone, NR_ACTIVE_FILE)), |
2177 | K(zone_page_state(zone, NR_INACTIVE_FILE)), | 2248 | K(zone_page_state(zone, NR_INACTIVE_FILE)), |
2178 | K(zone_page_state(zone, NR_UNEVICTABLE)), | 2249 | K(zone_page_state(zone, NR_UNEVICTABLE)), |
2250 | K(zone_page_state(zone, NR_ISOLATED_ANON)), | ||
2251 | K(zone_page_state(zone, NR_ISOLATED_FILE)), | ||
2179 | K(zone->present_pages), | 2252 | K(zone->present_pages), |
2253 | K(zone_page_state(zone, NR_MLOCK)), | ||
2254 | K(zone_page_state(zone, NR_FILE_DIRTY)), | ||
2255 | K(zone_page_state(zone, NR_WRITEBACK)), | ||
2256 | K(zone_page_state(zone, NR_FILE_MAPPED)), | ||
2257 | K(zone_page_state(zone, NR_SHMEM)), | ||
2258 | K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)), | ||
2259 | K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)), | ||
2260 | zone_page_state(zone, NR_KERNEL_STACK) * | ||
2261 | THREAD_SIZE / 1024, | ||
2262 | K(zone_page_state(zone, NR_PAGETABLE)), | ||
2263 | K(zone_page_state(zone, NR_UNSTABLE_NFS)), | ||
2264 | K(zone_page_state(zone, NR_BOUNCE)), | ||
2265 | K(zone_page_state(zone, NR_WRITEBACK_TEMP)), | ||
2180 | zone->pages_scanned, | 2266 | zone->pages_scanned, |
2181 | (zone_is_all_unreclaimable(zone) ? "yes" : "no") | 2267 | (zone_is_all_unreclaimable(zone) ? "yes" : "no") |
2182 | ); | 2268 | ); |
@@ -2305,7 +2391,7 @@ early_param("numa_zonelist_order", setup_numa_zonelist_order); | |||
2305 | * sysctl handler for numa_zonelist_order | 2391 | * sysctl handler for numa_zonelist_order |
2306 | */ | 2392 | */ |
2307 | int numa_zonelist_order_handler(ctl_table *table, int write, | 2393 | int numa_zonelist_order_handler(ctl_table *table, int write, |
2308 | struct file *file, void __user *buffer, size_t *length, | 2394 | void __user *buffer, size_t *length, |
2309 | loff_t *ppos) | 2395 | loff_t *ppos) |
2310 | { | 2396 | { |
2311 | char saved_string[NUMA_ZONELIST_ORDER_LEN]; | 2397 | char saved_string[NUMA_ZONELIST_ORDER_LEN]; |
@@ -2314,7 +2400,7 @@ int numa_zonelist_order_handler(ctl_table *table, int write, | |||
2314 | if (write) | 2400 | if (write) |
2315 | strncpy(saved_string, (char*)table->data, | 2401 | strncpy(saved_string, (char*)table->data, |
2316 | NUMA_ZONELIST_ORDER_LEN); | 2402 | NUMA_ZONELIST_ORDER_LEN); |
2317 | ret = proc_dostring(table, write, file, buffer, length, ppos); | 2403 | ret = proc_dostring(table, write, buffer, length, ppos); |
2318 | if (ret) | 2404 | if (ret) |
2319 | return ret; | 2405 | return ret; |
2320 | if (write) { | 2406 | if (write) { |
@@ -2783,7 +2869,8 @@ static void setup_zone_migrate_reserve(struct zone *zone) | |||
2783 | { | 2869 | { |
2784 | unsigned long start_pfn, pfn, end_pfn; | 2870 | unsigned long start_pfn, pfn, end_pfn; |
2785 | struct page *page; | 2871 | struct page *page; |
2786 | unsigned long reserve, block_migratetype; | 2872 | unsigned long block_migratetype; |
2873 | int reserve; | ||
2787 | 2874 | ||
2788 | /* Get the start pfn, end pfn and the number of blocks to reserve */ | 2875 | /* Get the start pfn, end pfn and the number of blocks to reserve */ |
2789 | start_pfn = zone->zone_start_pfn; | 2876 | start_pfn = zone->zone_start_pfn; |
@@ -2791,6 +2878,15 @@ static void setup_zone_migrate_reserve(struct zone *zone) | |||
2791 | reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> | 2878 | reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> |
2792 | pageblock_order; | 2879 | pageblock_order; |
2793 | 2880 | ||
2881 | /* | ||
2882 | * Reserve blocks are generally in place to help high-order atomic | ||
2883 | * allocations that are short-lived. A min_free_kbytes value that | ||
2884 | * would result in more than 2 reserve blocks for atomic allocations | ||
2885 | * is assumed to be in place to help anti-fragmentation for the | ||
2886 | * future allocation of hugepages at runtime. | ||
2887 | */ | ||
2888 | reserve = min(2, reserve); | ||
2889 | |||
2794 | for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { | 2890 | for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { |
2795 | if (!pfn_valid(pfn)) | 2891 | if (!pfn_valid(pfn)) |
2796 | continue; | 2892 | continue; |
@@ -2961,6 +3057,7 @@ static int zone_batchsize(struct zone *zone) | |||
2961 | static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) | 3057 | static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) |
2962 | { | 3058 | { |
2963 | struct per_cpu_pages *pcp; | 3059 | struct per_cpu_pages *pcp; |
3060 | int migratetype; | ||
2964 | 3061 | ||
2965 | memset(p, 0, sizeof(*p)); | 3062 | memset(p, 0, sizeof(*p)); |
2966 | 3063 | ||
@@ -2968,7 +3065,8 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) | |||
2968 | pcp->count = 0; | 3065 | pcp->count = 0; |
2969 | pcp->high = 6 * batch; | 3066 | pcp->high = 6 * batch; |
2970 | pcp->batch = max(1UL, 1 * batch); | 3067 | pcp->batch = max(1UL, 1 * batch); |
2971 | INIT_LIST_HEAD(&pcp->list); | 3068 | for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++) |
3069 | INIT_LIST_HEAD(&pcp->lists[migratetype]); | ||
2972 | } | 3070 | } |
2973 | 3071 | ||
2974 | /* | 3072 | /* |
@@ -3146,6 +3244,32 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) | |||
3146 | return 0; | 3244 | return 0; |
3147 | } | 3245 | } |
3148 | 3246 | ||
3247 | static int __zone_pcp_update(void *data) | ||
3248 | { | ||
3249 | struct zone *zone = data; | ||
3250 | int cpu; | ||
3251 | unsigned long batch = zone_batchsize(zone), flags; | ||
3252 | |||
3253 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
3254 | struct per_cpu_pageset *pset; | ||
3255 | struct per_cpu_pages *pcp; | ||
3256 | |||
3257 | pset = zone_pcp(zone, cpu); | ||
3258 | pcp = &pset->pcp; | ||
3259 | |||
3260 | local_irq_save(flags); | ||
3261 | free_pcppages_bulk(zone, pcp->count, pcp); | ||
3262 | setup_pageset(pset, batch); | ||
3263 | local_irq_restore(flags); | ||
3264 | } | ||
3265 | return 0; | ||
3266 | } | ||
3267 | |||
3268 | void zone_pcp_update(struct zone *zone) | ||
3269 | { | ||
3270 | stop_machine(__zone_pcp_update, zone, NULL); | ||
3271 | } | ||
3272 | |||
3149 | static __meminit void zone_pcp_init(struct zone *zone) | 3273 | static __meminit void zone_pcp_init(struct zone *zone) |
3150 | { | 3274 | { |
3151 | int cpu; | 3275 | int cpu; |
@@ -3720,7 +3844,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, | |||
3720 | zone_pcp_init(zone); | 3844 | zone_pcp_init(zone); |
3721 | for_each_lru(l) { | 3845 | for_each_lru(l) { |
3722 | INIT_LIST_HEAD(&zone->lru[l].list); | 3846 | INIT_LIST_HEAD(&zone->lru[l].list); |
3723 | zone->lru[l].nr_saved_scan = 0; | 3847 | zone->reclaim_stat.nr_saved_scan[l] = 0; |
3724 | } | 3848 | } |
3725 | zone->reclaim_stat.recent_rotated[0] = 0; | 3849 | zone->reclaim_stat.recent_rotated[0] = 0; |
3726 | zone->reclaim_stat.recent_rotated[1] = 0; | 3850 | zone->reclaim_stat.recent_rotated[1] = 0; |
@@ -4509,7 +4633,7 @@ void setup_per_zone_wmarks(void) | |||
4509 | calculate_totalreserve_pages(); | 4633 | calculate_totalreserve_pages(); |
4510 | } | 4634 | } |
4511 | 4635 | ||
4512 | /** | 4636 | /* |
4513 | * The inactive anon list should be small enough that the VM never has to | 4637 | * The inactive anon list should be small enough that the VM never has to |
4514 | * do too much work, but large enough that each inactive page has a chance | 4638 | * do too much work, but large enough that each inactive page has a chance |
4515 | * to be referenced again before it is swapped out. | 4639 | * to be referenced again before it is swapped out. |
@@ -4600,9 +4724,9 @@ module_init(init_per_zone_wmark_min) | |||
4600 | * changes. | 4724 | * changes. |
4601 | */ | 4725 | */ |
4602 | int min_free_kbytes_sysctl_handler(ctl_table *table, int write, | 4726 | int min_free_kbytes_sysctl_handler(ctl_table *table, int write, |
4603 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | 4727 | void __user *buffer, size_t *length, loff_t *ppos) |
4604 | { | 4728 | { |
4605 | proc_dointvec(table, write, file, buffer, length, ppos); | 4729 | proc_dointvec(table, write, buffer, length, ppos); |
4606 | if (write) | 4730 | if (write) |
4607 | setup_per_zone_wmarks(); | 4731 | setup_per_zone_wmarks(); |
4608 | return 0; | 4732 | return 0; |
@@ -4610,12 +4734,12 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write, | |||
4610 | 4734 | ||
4611 | #ifdef CONFIG_NUMA | 4735 | #ifdef CONFIG_NUMA |
4612 | int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, | 4736 | int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, |
4613 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | 4737 | void __user *buffer, size_t *length, loff_t *ppos) |
4614 | { | 4738 | { |
4615 | struct zone *zone; | 4739 | struct zone *zone; |
4616 | int rc; | 4740 | int rc; |
4617 | 4741 | ||
4618 | rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); | 4742 | rc = proc_dointvec_minmax(table, write, buffer, length, ppos); |
4619 | if (rc) | 4743 | if (rc) |
4620 | return rc; | 4744 | return rc; |
4621 | 4745 | ||
@@ -4626,12 +4750,12 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, | |||
4626 | } | 4750 | } |
4627 | 4751 | ||
4628 | int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, | 4752 | int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, |
4629 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | 4753 | void __user *buffer, size_t *length, loff_t *ppos) |
4630 | { | 4754 | { |
4631 | struct zone *zone; | 4755 | struct zone *zone; |
4632 | int rc; | 4756 | int rc; |
4633 | 4757 | ||
4634 | rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); | 4758 | rc = proc_dointvec_minmax(table, write, buffer, length, ppos); |
4635 | if (rc) | 4759 | if (rc) |
4636 | return rc; | 4760 | return rc; |
4637 | 4761 | ||
@@ -4652,9 +4776,9 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, | |||
4652 | * if in function of the boot time zone sizes. | 4776 | * if in function of the boot time zone sizes. |
4653 | */ | 4777 | */ |
4654 | int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, | 4778 | int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, |
4655 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | 4779 | void __user *buffer, size_t *length, loff_t *ppos) |
4656 | { | 4780 | { |
4657 | proc_dointvec_minmax(table, write, file, buffer, length, ppos); | 4781 | proc_dointvec_minmax(table, write, buffer, length, ppos); |
4658 | setup_per_zone_lowmem_reserve(); | 4782 | setup_per_zone_lowmem_reserve(); |
4659 | return 0; | 4783 | return 0; |
4660 | } | 4784 | } |
@@ -4666,13 +4790,13 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, | |||
4666 | */ | 4790 | */ |
4667 | 4791 | ||
4668 | int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, | 4792 | int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, |
4669 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | 4793 | void __user *buffer, size_t *length, loff_t *ppos) |
4670 | { | 4794 | { |
4671 | struct zone *zone; | 4795 | struct zone *zone; |
4672 | unsigned int cpu; | 4796 | unsigned int cpu; |
4673 | int ret; | 4797 | int ret; |
4674 | 4798 | ||
4675 | ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos); | 4799 | ret = proc_dointvec_minmax(table, write, buffer, length, ppos); |
4676 | if (!write || (ret == -EINVAL)) | 4800 | if (!write || (ret == -EINVAL)) |
4677 | return ret; | 4801 | return ret; |
4678 | for_each_populated_zone(zone) { | 4802 | for_each_populated_zone(zone) { |
@@ -4732,7 +4856,14 @@ void *__init alloc_large_system_hash(const char *tablename, | |||
4732 | numentries <<= (PAGE_SHIFT - scale); | 4856 | numentries <<= (PAGE_SHIFT - scale); |
4733 | 4857 | ||
4734 | /* Make sure we've got at least a 0-order allocation.. */ | 4858 | /* Make sure we've got at least a 0-order allocation.. */ |
4735 | if (unlikely((numentries * bucketsize) < PAGE_SIZE)) | 4859 | if (unlikely(flags & HASH_SMALL)) { |
4860 | /* Makes no sense without HASH_EARLY */ | ||
4861 | WARN_ON(!(flags & HASH_EARLY)); | ||
4862 | if (!(numentries >> *_hash_shift)) { | ||
4863 | numentries = 1UL << *_hash_shift; | ||
4864 | BUG_ON(!numentries); | ||
4865 | } | ||
4866 | } else if (unlikely((numentries * bucketsize) < PAGE_SIZE)) | ||
4736 | numentries = PAGE_SIZE / bucketsize; | 4867 | numentries = PAGE_SIZE / bucketsize; |
4737 | } | 4868 | } |
4738 | numentries = roundup_pow_of_two(numentries); | 4869 | numentries = roundup_pow_of_two(numentries); |
@@ -4874,13 +5005,16 @@ int set_migratetype_isolate(struct page *page) | |||
4874 | struct zone *zone; | 5005 | struct zone *zone; |
4875 | unsigned long flags; | 5006 | unsigned long flags; |
4876 | int ret = -EBUSY; | 5007 | int ret = -EBUSY; |
5008 | int zone_idx; | ||
4877 | 5009 | ||
4878 | zone = page_zone(page); | 5010 | zone = page_zone(page); |
5011 | zone_idx = zone_idx(zone); | ||
4879 | spin_lock_irqsave(&zone->lock, flags); | 5012 | spin_lock_irqsave(&zone->lock, flags); |
4880 | /* | 5013 | /* |
4881 | * In future, more migrate types will be able to be isolation target. | 5014 | * In future, more migrate types will be able to be isolation target. |
4882 | */ | 5015 | */ |
4883 | if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE) | 5016 | if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE && |
5017 | zone_idx != ZONE_MOVABLE) | ||
4884 | goto out; | 5018 | goto out; |
4885 | set_pageblock_migratetype(page, MIGRATE_ISOLATE); | 5019 | set_pageblock_migratetype(page, MIGRATE_ISOLATE); |
4886 | move_freepages_block(zone, page, MIGRATE_ISOLATE); | 5020 | move_freepages_block(zone, page, MIGRATE_ISOLATE); |