aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c327
1 files changed, 230 insertions, 97 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a0de15f46987..2bc2ac63f41e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -48,6 +48,7 @@
48#include <linux/page_cgroup.h> 48#include <linux/page_cgroup.h>
49#include <linux/debugobjects.h> 49#include <linux/debugobjects.h>
50#include <linux/kmemleak.h> 50#include <linux/kmemleak.h>
51#include <trace/events/kmem.h>
51 52
52#include <asm/tlbflush.h> 53#include <asm/tlbflush.h>
53#include <asm/div64.h> 54#include <asm/div64.h>
@@ -71,7 +72,6 @@ EXPORT_SYMBOL(node_states);
71 72
72unsigned long totalram_pages __read_mostly; 73unsigned long totalram_pages __read_mostly;
73unsigned long totalreserve_pages __read_mostly; 74unsigned long totalreserve_pages __read_mostly;
74unsigned long highest_memmap_pfn __read_mostly;
75int percpu_pagelist_fraction; 75int percpu_pagelist_fraction;
76gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; 76gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
77 77
@@ -123,8 +123,8 @@ static char * const zone_names[MAX_NR_ZONES] = {
123 123
124int min_free_kbytes = 1024; 124int min_free_kbytes = 1024;
125 125
126unsigned long __meminitdata nr_kernel_pages; 126static unsigned long __meminitdata nr_kernel_pages;
127unsigned long __meminitdata nr_all_pages; 127static unsigned long __meminitdata nr_all_pages;
128static unsigned long __meminitdata dma_reserve; 128static unsigned long __meminitdata dma_reserve;
129 129
130#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 130#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
@@ -234,6 +234,12 @@ static void bad_page(struct page *page)
234 static unsigned long nr_shown; 234 static unsigned long nr_shown;
235 static unsigned long nr_unshown; 235 static unsigned long nr_unshown;
236 236
237 /* Don't complain about poisoned pages */
238 if (PageHWPoison(page)) {
239 __ClearPageBuddy(page);
240 return;
241 }
242
237 /* 243 /*
238 * Allow a burst of 60 reports, then keep quiet for that minute; 244 * Allow a burst of 60 reports, then keep quiet for that minute;
239 * or allow a steady drip of one report per second. 245 * or allow a steady drip of one report per second.
@@ -510,7 +516,7 @@ static inline int free_pages_check(struct page *page)
510} 516}
511 517
512/* 518/*
513 * Frees a list of pages. 519 * Frees a number of pages from the PCP lists
514 * Assumes all pages on list are in same zone, and of same order. 520 * Assumes all pages on list are in same zone, and of same order.
515 * count is the number of pages to free. 521 * count is the number of pages to free.
516 * 522 *
@@ -520,22 +526,42 @@ static inline int free_pages_check(struct page *page)
520 * And clear the zone's pages_scanned counter, to hold off the "all pages are 526 * And clear the zone's pages_scanned counter, to hold off the "all pages are
521 * pinned" detection logic. 527 * pinned" detection logic.
522 */ 528 */
523static void free_pages_bulk(struct zone *zone, int count, 529static void free_pcppages_bulk(struct zone *zone, int count,
524 struct list_head *list, int order) 530 struct per_cpu_pages *pcp)
525{ 531{
532 int migratetype = 0;
533 int batch_free = 0;
534
526 spin_lock(&zone->lock); 535 spin_lock(&zone->lock);
527 zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); 536 zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE);
528 zone->pages_scanned = 0; 537 zone->pages_scanned = 0;
529 538
530 __mod_zone_page_state(zone, NR_FREE_PAGES, count << order); 539 __mod_zone_page_state(zone, NR_FREE_PAGES, count);
531 while (count--) { 540 while (count) {
532 struct page *page; 541 struct page *page;
542 struct list_head *list;
533 543
534 VM_BUG_ON(list_empty(list)); 544 /*
535 page = list_entry(list->prev, struct page, lru); 545 * Remove pages from lists in a round-robin fashion. A
536 /* have to delete it as __free_one_page list manipulates */ 546 * batch_free count is maintained that is incremented when an
537 list_del(&page->lru); 547 * empty list is encountered. This is so more pages are freed
538 __free_one_page(page, zone, order, page_private(page)); 548 * off fuller lists instead of spinning excessively around empty
549 * lists
550 */
551 do {
552 batch_free++;
553 if (++migratetype == MIGRATE_PCPTYPES)
554 migratetype = 0;
555 list = &pcp->lists[migratetype];
556 } while (list_empty(list));
557
558 do {
559 page = list_entry(list->prev, struct page, lru);
560 /* must delete as __free_one_page list manipulates */
561 list_del(&page->lru);
562 __free_one_page(page, zone, 0, migratetype);
563 trace_mm_page_pcpu_drain(page, 0, migratetype);
564 } while (--count && --batch_free && !list_empty(list));
539 } 565 }
540 spin_unlock(&zone->lock); 566 spin_unlock(&zone->lock);
541} 567}
@@ -557,7 +583,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
557 unsigned long flags; 583 unsigned long flags;
558 int i; 584 int i;
559 int bad = 0; 585 int bad = 0;
560 int wasMlocked = TestClearPageMlocked(page); 586 int wasMlocked = __TestClearPageMlocked(page);
561 587
562 kmemcheck_free_shadow(page, order); 588 kmemcheck_free_shadow(page, order);
563 589
@@ -646,7 +672,7 @@ static inline void expand(struct zone *zone, struct page *page,
646/* 672/*
647 * This page is about to be returned from the page allocator 673 * This page is about to be returned from the page allocator
648 */ 674 */
649static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) 675static inline int check_new_page(struct page *page)
650{ 676{
651 if (unlikely(page_mapcount(page) | 677 if (unlikely(page_mapcount(page) |
652 (page->mapping != NULL) | 678 (page->mapping != NULL) |
@@ -655,6 +681,18 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
655 bad_page(page); 681 bad_page(page);
656 return 1; 682 return 1;
657 } 683 }
684 return 0;
685}
686
687static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
688{
689 int i;
690
691 for (i = 0; i < (1 << order); i++) {
692 struct page *p = page + i;
693 if (unlikely(check_new_page(p)))
694 return 1;
695 }
658 696
659 set_page_private(page, 0); 697 set_page_private(page, 0);
660 set_page_refcounted(page); 698 set_page_refcounted(page);
@@ -783,6 +821,17 @@ static int move_freepages_block(struct zone *zone, struct page *page,
783 return move_freepages(zone, start_page, end_page, migratetype); 821 return move_freepages(zone, start_page, end_page, migratetype);
784} 822}
785 823
824static void change_pageblock_range(struct page *pageblock_page,
825 int start_order, int migratetype)
826{
827 int nr_pageblocks = 1 << (start_order - pageblock_order);
828
829 while (nr_pageblocks--) {
830 set_pageblock_migratetype(pageblock_page, migratetype);
831 pageblock_page += pageblock_nr_pages;
832 }
833}
834
786/* Remove an element from the buddy allocator from the fallback list */ 835/* Remove an element from the buddy allocator from the fallback list */
787static inline struct page * 836static inline struct page *
788__rmqueue_fallback(struct zone *zone, int order, int start_migratetype) 837__rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
@@ -836,11 +885,16 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
836 list_del(&page->lru); 885 list_del(&page->lru);
837 rmv_page_order(page); 886 rmv_page_order(page);
838 887
839 if (current_order == pageblock_order) 888 /* Take ownership for orders >= pageblock_order */
840 set_pageblock_migratetype(page, 889 if (current_order >= pageblock_order)
890 change_pageblock_range(page, current_order,
841 start_migratetype); 891 start_migratetype);
842 892
843 expand(zone, page, order, current_order, area, migratetype); 893 expand(zone, page, order, current_order, area, migratetype);
894
895 trace_mm_page_alloc_extfrag(page, order, current_order,
896 start_migratetype, migratetype);
897
844 return page; 898 return page;
845 } 899 }
846 } 900 }
@@ -874,6 +928,7 @@ retry_reserve:
874 } 928 }
875 } 929 }
876 930
931 trace_mm_page_alloc_zone_locked(page, order, migratetype);
877 return page; 932 return page;
878} 933}
879 934
@@ -934,7 +989,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
934 to_drain = pcp->batch; 989 to_drain = pcp->batch;
935 else 990 else
936 to_drain = pcp->count; 991 to_drain = pcp->count;
937 free_pages_bulk(zone, to_drain, &pcp->list, 0); 992 free_pcppages_bulk(zone, to_drain, pcp);
938 pcp->count -= to_drain; 993 pcp->count -= to_drain;
939 local_irq_restore(flags); 994 local_irq_restore(flags);
940} 995}
@@ -960,7 +1015,7 @@ static void drain_pages(unsigned int cpu)
960 1015
961 pcp = &pset->pcp; 1016 pcp = &pset->pcp;
962 local_irq_save(flags); 1017 local_irq_save(flags);
963 free_pages_bulk(zone, pcp->count, &pcp->list, 0); 1018 free_pcppages_bulk(zone, pcp->count, pcp);
964 pcp->count = 0; 1019 pcp->count = 0;
965 local_irq_restore(flags); 1020 local_irq_restore(flags);
966 } 1021 }
@@ -1026,7 +1081,8 @@ static void free_hot_cold_page(struct page *page, int cold)
1026 struct zone *zone = page_zone(page); 1081 struct zone *zone = page_zone(page);
1027 struct per_cpu_pages *pcp; 1082 struct per_cpu_pages *pcp;
1028 unsigned long flags; 1083 unsigned long flags;
1029 int wasMlocked = TestClearPageMlocked(page); 1084 int migratetype;
1085 int wasMlocked = __TestClearPageMlocked(page);
1030 1086
1031 kmemcheck_free_shadow(page, 0); 1087 kmemcheck_free_shadow(page, 0);
1032 1088
@@ -1043,35 +1099,49 @@ static void free_hot_cold_page(struct page *page, int cold)
1043 kernel_map_pages(page, 1, 0); 1099 kernel_map_pages(page, 1, 0);
1044 1100
1045 pcp = &zone_pcp(zone, get_cpu())->pcp; 1101 pcp = &zone_pcp(zone, get_cpu())->pcp;
1046 set_page_private(page, get_pageblock_migratetype(page)); 1102 migratetype = get_pageblock_migratetype(page);
1103 set_page_private(page, migratetype);
1047 local_irq_save(flags); 1104 local_irq_save(flags);
1048 if (unlikely(wasMlocked)) 1105 if (unlikely(wasMlocked))
1049 free_page_mlock(page); 1106 free_page_mlock(page);
1050 __count_vm_event(PGFREE); 1107 __count_vm_event(PGFREE);
1051 1108
1109 /*
1110 * We only track unmovable, reclaimable and movable on pcp lists.
1111 * Free ISOLATE pages back to the allocator because they are being
1112 * offlined but treat RESERVE as movable pages so we can get those
1113 * areas back if necessary. Otherwise, we may have to free
1114 * excessively into the page allocator
1115 */
1116 if (migratetype >= MIGRATE_PCPTYPES) {
1117 if (unlikely(migratetype == MIGRATE_ISOLATE)) {
1118 free_one_page(zone, page, 0, migratetype);
1119 goto out;
1120 }
1121 migratetype = MIGRATE_MOVABLE;
1122 }
1123
1052 if (cold) 1124 if (cold)
1053 list_add_tail(&page->lru, &pcp->list); 1125 list_add_tail(&page->lru, &pcp->lists[migratetype]);
1054 else 1126 else
1055 list_add(&page->lru, &pcp->list); 1127 list_add(&page->lru, &pcp->lists[migratetype]);
1056 pcp->count++; 1128 pcp->count++;
1057 if (pcp->count >= pcp->high) { 1129 if (pcp->count >= pcp->high) {
1058 free_pages_bulk(zone, pcp->batch, &pcp->list, 0); 1130 free_pcppages_bulk(zone, pcp->batch, pcp);
1059 pcp->count -= pcp->batch; 1131 pcp->count -= pcp->batch;
1060 } 1132 }
1133
1134out:
1061 local_irq_restore(flags); 1135 local_irq_restore(flags);
1062 put_cpu(); 1136 put_cpu();
1063} 1137}
1064 1138
1065void free_hot_page(struct page *page) 1139void free_hot_page(struct page *page)
1066{ 1140{
1141 trace_mm_page_free_direct(page, 0);
1067 free_hot_cold_page(page, 0); 1142 free_hot_cold_page(page, 0);
1068} 1143}
1069 1144
1070void free_cold_page(struct page *page)
1071{
1072 free_hot_cold_page(page, 1);
1073}
1074
1075/* 1145/*
1076 * split_page takes a non-compound higher-order page, and splits it into 1146 * split_page takes a non-compound higher-order page, and splits it into
1077 * n (1<<order) sub-pages: page[0..n] 1147 * n (1<<order) sub-pages: page[0..n]
@@ -1119,35 +1189,23 @@ again:
1119 cpu = get_cpu(); 1189 cpu = get_cpu();
1120 if (likely(order == 0)) { 1190 if (likely(order == 0)) {
1121 struct per_cpu_pages *pcp; 1191 struct per_cpu_pages *pcp;
1192 struct list_head *list;
1122 1193
1123 pcp = &zone_pcp(zone, cpu)->pcp; 1194 pcp = &zone_pcp(zone, cpu)->pcp;
1195 list = &pcp->lists[migratetype];
1124 local_irq_save(flags); 1196 local_irq_save(flags);
1125 if (!pcp->count) { 1197 if (list_empty(list)) {
1126 pcp->count = rmqueue_bulk(zone, 0, 1198 pcp->count += rmqueue_bulk(zone, 0,
1127 pcp->batch, &pcp->list, 1199 pcp->batch, list,
1128 migratetype, cold); 1200 migratetype, cold);
1129 if (unlikely(!pcp->count)) 1201 if (unlikely(list_empty(list)))
1130 goto failed; 1202 goto failed;
1131 } 1203 }
1132 1204
1133 /* Find a page of the appropriate migrate type */ 1205 if (cold)
1134 if (cold) { 1206 page = list_entry(list->prev, struct page, lru);
1135 list_for_each_entry_reverse(page, &pcp->list, lru) 1207 else
1136 if (page_private(page) == migratetype) 1208 page = list_entry(list->next, struct page, lru);
1137 break;
1138 } else {
1139 list_for_each_entry(page, &pcp->list, lru)
1140 if (page_private(page) == migratetype)
1141 break;
1142 }
1143
1144 /* Allocate more to the pcp list if necessary */
1145 if (unlikely(&page->lru == &pcp->list)) {
1146 pcp->count += rmqueue_bulk(zone, 0,
1147 pcp->batch, &pcp->list,
1148 migratetype, cold);
1149 page = list_entry(pcp->list.next, struct page, lru);
1150 }
1151 1209
1152 list_del(&page->lru); 1210 list_del(&page->lru);
1153 pcp->count--; 1211 pcp->count--;
@@ -1627,10 +1685,6 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
1627 1685
1628 /* We now go into synchronous reclaim */ 1686 /* We now go into synchronous reclaim */
1629 cpuset_memory_pressure_bump(); 1687 cpuset_memory_pressure_bump();
1630
1631 /*
1632 * The task's cpuset might have expanded its set of allowable nodes
1633 */
1634 p->flags |= PF_MEMALLOC; 1688 p->flags |= PF_MEMALLOC;
1635 lockdep_set_current_reclaim_state(gfp_mask); 1689 lockdep_set_current_reclaim_state(gfp_mask);
1636 reclaim_state.reclaimed_slab = 0; 1690 reclaim_state.reclaimed_slab = 0;
@@ -1715,7 +1769,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
1715 * See also cpuset_zone_allowed() comment in kernel/cpuset.c. 1769 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
1716 */ 1770 */
1717 alloc_flags &= ~ALLOC_CPUSET; 1771 alloc_flags &= ~ALLOC_CPUSET;
1718 } else if (unlikely(rt_task(p))) 1772 } else if (unlikely(rt_task(p)) && !in_interrupt())
1719 alloc_flags |= ALLOC_HARDER; 1773 alloc_flags |= ALLOC_HARDER;
1720 1774
1721 if (likely(!(gfp_mask & __GFP_NOMEMALLOC))) { 1775 if (likely(!(gfp_mask & __GFP_NOMEMALLOC))) {
@@ -1763,6 +1817,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
1763 if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE) 1817 if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
1764 goto nopage; 1818 goto nopage;
1765 1819
1820restart:
1766 wake_all_kswapd(order, zonelist, high_zoneidx); 1821 wake_all_kswapd(order, zonelist, high_zoneidx);
1767 1822
1768 /* 1823 /*
@@ -1772,7 +1827,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
1772 */ 1827 */
1773 alloc_flags = gfp_to_alloc_flags(gfp_mask); 1828 alloc_flags = gfp_to_alloc_flags(gfp_mask);
1774 1829
1775restart:
1776 /* This is the last chance, in general, before the goto nopage. */ 1830 /* This is the last chance, in general, before the goto nopage. */
1777 page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, 1831 page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
1778 high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, 1832 high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS,
@@ -1907,6 +1961,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
1907 zonelist, high_zoneidx, nodemask, 1961 zonelist, high_zoneidx, nodemask,
1908 preferred_zone, migratetype); 1962 preferred_zone, migratetype);
1909 1963
1964 trace_mm_page_alloc(page, order, gfp_mask, migratetype);
1910 return page; 1965 return page;
1911} 1966}
1912EXPORT_SYMBOL(__alloc_pages_nodemask); 1967EXPORT_SYMBOL(__alloc_pages_nodemask);
@@ -1916,44 +1971,41 @@ EXPORT_SYMBOL(__alloc_pages_nodemask);
1916 */ 1971 */
1917unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order) 1972unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order)
1918{ 1973{
1919 struct page * page; 1974 struct page *page;
1975
1976 /*
1977 * __get_free_pages() returns a 32-bit address, which cannot represent
1978 * a highmem page
1979 */
1980 VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
1981
1920 page = alloc_pages(gfp_mask, order); 1982 page = alloc_pages(gfp_mask, order);
1921 if (!page) 1983 if (!page)
1922 return 0; 1984 return 0;
1923 return (unsigned long) page_address(page); 1985 return (unsigned long) page_address(page);
1924} 1986}
1925
1926EXPORT_SYMBOL(__get_free_pages); 1987EXPORT_SYMBOL(__get_free_pages);
1927 1988
1928unsigned long get_zeroed_page(gfp_t gfp_mask) 1989unsigned long get_zeroed_page(gfp_t gfp_mask)
1929{ 1990{
1930 struct page * page; 1991 return __get_free_pages(gfp_mask | __GFP_ZERO, 0);
1931
1932 /*
1933 * get_zeroed_page() returns a 32-bit address, which cannot represent
1934 * a highmem page
1935 */
1936 VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
1937
1938 page = alloc_pages(gfp_mask | __GFP_ZERO, 0);
1939 if (page)
1940 return (unsigned long) page_address(page);
1941 return 0;
1942} 1992}
1943
1944EXPORT_SYMBOL(get_zeroed_page); 1993EXPORT_SYMBOL(get_zeroed_page);
1945 1994
1946void __pagevec_free(struct pagevec *pvec) 1995void __pagevec_free(struct pagevec *pvec)
1947{ 1996{
1948 int i = pagevec_count(pvec); 1997 int i = pagevec_count(pvec);
1949 1998
1950 while (--i >= 0) 1999 while (--i >= 0) {
2000 trace_mm_pagevec_free(pvec->pages[i], pvec->cold);
1951 free_hot_cold_page(pvec->pages[i], pvec->cold); 2001 free_hot_cold_page(pvec->pages[i], pvec->cold);
2002 }
1952} 2003}
1953 2004
1954void __free_pages(struct page *page, unsigned int order) 2005void __free_pages(struct page *page, unsigned int order)
1955{ 2006{
1956 if (put_page_testzero(page)) { 2007 if (put_page_testzero(page)) {
2008 trace_mm_page_free_direct(page, order);
1957 if (order == 0) 2009 if (order == 0)
1958 free_hot_page(page); 2010 free_hot_page(page);
1959 else 2011 else
@@ -2128,23 +2180,27 @@ void show_free_areas(void)
2128 } 2180 }
2129 } 2181 }
2130 2182
2131 printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n" 2183 printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
2132 " inactive_file:%lu" 2184 " active_file:%lu inactive_file:%lu isolated_file:%lu\n"
2133 " unevictable:%lu" 2185 " unevictable:%lu"
2134 " dirty:%lu writeback:%lu unstable:%lu\n" 2186 " dirty:%lu writeback:%lu unstable:%lu\n"
2135 " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n", 2187 " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n"
2188 " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n",
2136 global_page_state(NR_ACTIVE_ANON), 2189 global_page_state(NR_ACTIVE_ANON),
2137 global_page_state(NR_ACTIVE_FILE),
2138 global_page_state(NR_INACTIVE_ANON), 2190 global_page_state(NR_INACTIVE_ANON),
2191 global_page_state(NR_ISOLATED_ANON),
2192 global_page_state(NR_ACTIVE_FILE),
2139 global_page_state(NR_INACTIVE_FILE), 2193 global_page_state(NR_INACTIVE_FILE),
2194 global_page_state(NR_ISOLATED_FILE),
2140 global_page_state(NR_UNEVICTABLE), 2195 global_page_state(NR_UNEVICTABLE),
2141 global_page_state(NR_FILE_DIRTY), 2196 global_page_state(NR_FILE_DIRTY),
2142 global_page_state(NR_WRITEBACK), 2197 global_page_state(NR_WRITEBACK),
2143 global_page_state(NR_UNSTABLE_NFS), 2198 global_page_state(NR_UNSTABLE_NFS),
2144 global_page_state(NR_FREE_PAGES), 2199 global_page_state(NR_FREE_PAGES),
2145 global_page_state(NR_SLAB_RECLAIMABLE) + 2200 global_page_state(NR_SLAB_RECLAIMABLE),
2146 global_page_state(NR_SLAB_UNRECLAIMABLE), 2201 global_page_state(NR_SLAB_UNRECLAIMABLE),
2147 global_page_state(NR_FILE_MAPPED), 2202 global_page_state(NR_FILE_MAPPED),
2203 global_page_state(NR_SHMEM),
2148 global_page_state(NR_PAGETABLE), 2204 global_page_state(NR_PAGETABLE),
2149 global_page_state(NR_BOUNCE)); 2205 global_page_state(NR_BOUNCE));
2150 2206
@@ -2162,7 +2218,21 @@ void show_free_areas(void)
2162 " active_file:%lukB" 2218 " active_file:%lukB"
2163 " inactive_file:%lukB" 2219 " inactive_file:%lukB"
2164 " unevictable:%lukB" 2220 " unevictable:%lukB"
2221 " isolated(anon):%lukB"
2222 " isolated(file):%lukB"
2165 " present:%lukB" 2223 " present:%lukB"
2224 " mlocked:%lukB"
2225 " dirty:%lukB"
2226 " writeback:%lukB"
2227 " mapped:%lukB"
2228 " shmem:%lukB"
2229 " slab_reclaimable:%lukB"
2230 " slab_unreclaimable:%lukB"
2231 " kernel_stack:%lukB"
2232 " pagetables:%lukB"
2233 " unstable:%lukB"
2234 " bounce:%lukB"
2235 " writeback_tmp:%lukB"
2166 " pages_scanned:%lu" 2236 " pages_scanned:%lu"
2167 " all_unreclaimable? %s" 2237 " all_unreclaimable? %s"
2168 "\n", 2238 "\n",
@@ -2176,7 +2246,22 @@ void show_free_areas(void)
2176 K(zone_page_state(zone, NR_ACTIVE_FILE)), 2246 K(zone_page_state(zone, NR_ACTIVE_FILE)),
2177 K(zone_page_state(zone, NR_INACTIVE_FILE)), 2247 K(zone_page_state(zone, NR_INACTIVE_FILE)),
2178 K(zone_page_state(zone, NR_UNEVICTABLE)), 2248 K(zone_page_state(zone, NR_UNEVICTABLE)),
2249 K(zone_page_state(zone, NR_ISOLATED_ANON)),
2250 K(zone_page_state(zone, NR_ISOLATED_FILE)),
2179 K(zone->present_pages), 2251 K(zone->present_pages),
2252 K(zone_page_state(zone, NR_MLOCK)),
2253 K(zone_page_state(zone, NR_FILE_DIRTY)),
2254 K(zone_page_state(zone, NR_WRITEBACK)),
2255 K(zone_page_state(zone, NR_FILE_MAPPED)),
2256 K(zone_page_state(zone, NR_SHMEM)),
2257 K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)),
2258 K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)),
2259 zone_page_state(zone, NR_KERNEL_STACK) *
2260 THREAD_SIZE / 1024,
2261 K(zone_page_state(zone, NR_PAGETABLE)),
2262 K(zone_page_state(zone, NR_UNSTABLE_NFS)),
2263 K(zone_page_state(zone, NR_BOUNCE)),
2264 K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
2180 zone->pages_scanned, 2265 zone->pages_scanned,
2181 (zone_is_all_unreclaimable(zone) ? "yes" : "no") 2266 (zone_is_all_unreclaimable(zone) ? "yes" : "no")
2182 ); 2267 );
@@ -2305,7 +2390,7 @@ early_param("numa_zonelist_order", setup_numa_zonelist_order);
2305 * sysctl handler for numa_zonelist_order 2390 * sysctl handler for numa_zonelist_order
2306 */ 2391 */
2307int numa_zonelist_order_handler(ctl_table *table, int write, 2392int numa_zonelist_order_handler(ctl_table *table, int write,
2308 struct file *file, void __user *buffer, size_t *length, 2393 void __user *buffer, size_t *length,
2309 loff_t *ppos) 2394 loff_t *ppos)
2310{ 2395{
2311 char saved_string[NUMA_ZONELIST_ORDER_LEN]; 2396 char saved_string[NUMA_ZONELIST_ORDER_LEN];
@@ -2314,7 +2399,7 @@ int numa_zonelist_order_handler(ctl_table *table, int write,
2314 if (write) 2399 if (write)
2315 strncpy(saved_string, (char*)table->data, 2400 strncpy(saved_string, (char*)table->data,
2316 NUMA_ZONELIST_ORDER_LEN); 2401 NUMA_ZONELIST_ORDER_LEN);
2317 ret = proc_dostring(table, write, file, buffer, length, ppos); 2402 ret = proc_dostring(table, write, buffer, length, ppos);
2318 if (ret) 2403 if (ret)
2319 return ret; 2404 return ret;
2320 if (write) { 2405 if (write) {
@@ -2783,7 +2868,8 @@ static void setup_zone_migrate_reserve(struct zone *zone)
2783{ 2868{
2784 unsigned long start_pfn, pfn, end_pfn; 2869 unsigned long start_pfn, pfn, end_pfn;
2785 struct page *page; 2870 struct page *page;
2786 unsigned long reserve, block_migratetype; 2871 unsigned long block_migratetype;
2872 int reserve;
2787 2873
2788 /* Get the start pfn, end pfn and the number of blocks to reserve */ 2874 /* Get the start pfn, end pfn and the number of blocks to reserve */
2789 start_pfn = zone->zone_start_pfn; 2875 start_pfn = zone->zone_start_pfn;
@@ -2791,6 +2877,15 @@ static void setup_zone_migrate_reserve(struct zone *zone)
2791 reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> 2877 reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
2792 pageblock_order; 2878 pageblock_order;
2793 2879
2880 /*
2881 * Reserve blocks are generally in place to help high-order atomic
2882 * allocations that are short-lived. A min_free_kbytes value that
2883 * would result in more than 2 reserve blocks for atomic allocations
2884 * is assumed to be in place to help anti-fragmentation for the
2885 * future allocation of hugepages at runtime.
2886 */
2887 reserve = min(2, reserve);
2888
2794 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { 2889 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
2795 if (!pfn_valid(pfn)) 2890 if (!pfn_valid(pfn))
2796 continue; 2891 continue;
@@ -2961,6 +3056,7 @@ static int zone_batchsize(struct zone *zone)
2961static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) 3056static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
2962{ 3057{
2963 struct per_cpu_pages *pcp; 3058 struct per_cpu_pages *pcp;
3059 int migratetype;
2964 3060
2965 memset(p, 0, sizeof(*p)); 3061 memset(p, 0, sizeof(*p));
2966 3062
@@ -2968,7 +3064,8 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
2968 pcp->count = 0; 3064 pcp->count = 0;
2969 pcp->high = 6 * batch; 3065 pcp->high = 6 * batch;
2970 pcp->batch = max(1UL, 1 * batch); 3066 pcp->batch = max(1UL, 1 * batch);
2971 INIT_LIST_HEAD(&pcp->list); 3067 for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++)
3068 INIT_LIST_HEAD(&pcp->lists[migratetype]);
2972} 3069}
2973 3070
2974/* 3071/*
@@ -3146,6 +3243,32 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
3146 return 0; 3243 return 0;
3147} 3244}
3148 3245
3246static int __zone_pcp_update(void *data)
3247{
3248 struct zone *zone = data;
3249 int cpu;
3250 unsigned long batch = zone_batchsize(zone), flags;
3251
3252 for (cpu = 0; cpu < NR_CPUS; cpu++) {
3253 struct per_cpu_pageset *pset;
3254 struct per_cpu_pages *pcp;
3255
3256 pset = zone_pcp(zone, cpu);
3257 pcp = &pset->pcp;
3258
3259 local_irq_save(flags);
3260 free_pcppages_bulk(zone, pcp->count, pcp);
3261 setup_pageset(pset, batch);
3262 local_irq_restore(flags);
3263 }
3264 return 0;
3265}
3266
3267void zone_pcp_update(struct zone *zone)
3268{
3269 stop_machine(__zone_pcp_update, zone, NULL);
3270}
3271
3149static __meminit void zone_pcp_init(struct zone *zone) 3272static __meminit void zone_pcp_init(struct zone *zone)
3150{ 3273{
3151 int cpu; 3274 int cpu;
@@ -3720,7 +3843,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
3720 zone_pcp_init(zone); 3843 zone_pcp_init(zone);
3721 for_each_lru(l) { 3844 for_each_lru(l) {
3722 INIT_LIST_HEAD(&zone->lru[l].list); 3845 INIT_LIST_HEAD(&zone->lru[l].list);
3723 zone->lru[l].nr_saved_scan = 0; 3846 zone->reclaim_stat.nr_saved_scan[l] = 0;
3724 } 3847 }
3725 zone->reclaim_stat.recent_rotated[0] = 0; 3848 zone->reclaim_stat.recent_rotated[0] = 0;
3726 zone->reclaim_stat.recent_rotated[1] = 0; 3849 zone->reclaim_stat.recent_rotated[1] = 0;
@@ -4509,7 +4632,7 @@ void setup_per_zone_wmarks(void)
4509 calculate_totalreserve_pages(); 4632 calculate_totalreserve_pages();
4510} 4633}
4511 4634
4512/** 4635/*
4513 * The inactive anon list should be small enough that the VM never has to 4636 * The inactive anon list should be small enough that the VM never has to
4514 * do too much work, but large enough that each inactive page has a chance 4637 * do too much work, but large enough that each inactive page has a chance
4515 * to be referenced again before it is swapped out. 4638 * to be referenced again before it is swapped out.
@@ -4600,9 +4723,9 @@ module_init(init_per_zone_wmark_min)
4600 * changes. 4723 * changes.
4601 */ 4724 */
4602int min_free_kbytes_sysctl_handler(ctl_table *table, int write, 4725int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
4603 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 4726 void __user *buffer, size_t *length, loff_t *ppos)
4604{ 4727{
4605 proc_dointvec(table, write, file, buffer, length, ppos); 4728 proc_dointvec(table, write, buffer, length, ppos);
4606 if (write) 4729 if (write)
4607 setup_per_zone_wmarks(); 4730 setup_per_zone_wmarks();
4608 return 0; 4731 return 0;
@@ -4610,12 +4733,12 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
4610 4733
4611#ifdef CONFIG_NUMA 4734#ifdef CONFIG_NUMA
4612int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, 4735int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
4613 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 4736 void __user *buffer, size_t *length, loff_t *ppos)
4614{ 4737{
4615 struct zone *zone; 4738 struct zone *zone;
4616 int rc; 4739 int rc;
4617 4740
4618 rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); 4741 rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
4619 if (rc) 4742 if (rc)
4620 return rc; 4743 return rc;
4621 4744
@@ -4626,12 +4749,12 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
4626} 4749}
4627 4750
4628int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, 4751int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
4629 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 4752 void __user *buffer, size_t *length, loff_t *ppos)
4630{ 4753{
4631 struct zone *zone; 4754 struct zone *zone;
4632 int rc; 4755 int rc;
4633 4756
4634 rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); 4757 rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
4635 if (rc) 4758 if (rc)
4636 return rc; 4759 return rc;
4637 4760
@@ -4652,9 +4775,9 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
4652 * if in function of the boot time zone sizes. 4775 * if in function of the boot time zone sizes.
4653 */ 4776 */
4654int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, 4777int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
4655 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 4778 void __user *buffer, size_t *length, loff_t *ppos)
4656{ 4779{
4657 proc_dointvec_minmax(table, write, file, buffer, length, ppos); 4780 proc_dointvec_minmax(table, write, buffer, length, ppos);
4658 setup_per_zone_lowmem_reserve(); 4781 setup_per_zone_lowmem_reserve();
4659 return 0; 4782 return 0;
4660} 4783}
@@ -4666,13 +4789,13 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
4666 */ 4789 */
4667 4790
4668int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, 4791int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
4669 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 4792 void __user *buffer, size_t *length, loff_t *ppos)
4670{ 4793{
4671 struct zone *zone; 4794 struct zone *zone;
4672 unsigned int cpu; 4795 unsigned int cpu;
4673 int ret; 4796 int ret;
4674 4797
4675 ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos); 4798 ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
4676 if (!write || (ret == -EINVAL)) 4799 if (!write || (ret == -EINVAL))
4677 return ret; 4800 return ret;
4678 for_each_populated_zone(zone) { 4801 for_each_populated_zone(zone) {
@@ -4732,7 +4855,14 @@ void *__init alloc_large_system_hash(const char *tablename,
4732 numentries <<= (PAGE_SHIFT - scale); 4855 numentries <<= (PAGE_SHIFT - scale);
4733 4856
4734 /* Make sure we've got at least a 0-order allocation.. */ 4857 /* Make sure we've got at least a 0-order allocation.. */
4735 if (unlikely((numentries * bucketsize) < PAGE_SIZE)) 4858 if (unlikely(flags & HASH_SMALL)) {
4859 /* Makes no sense without HASH_EARLY */
4860 WARN_ON(!(flags & HASH_EARLY));
4861 if (!(numentries >> *_hash_shift)) {
4862 numentries = 1UL << *_hash_shift;
4863 BUG_ON(!numentries);
4864 }
4865 } else if (unlikely((numentries * bucketsize) < PAGE_SIZE))
4736 numentries = PAGE_SIZE / bucketsize; 4866 numentries = PAGE_SIZE / bucketsize;
4737 } 4867 }
4738 numentries = roundup_pow_of_two(numentries); 4868 numentries = roundup_pow_of_two(numentries);
@@ -4874,13 +5004,16 @@ int set_migratetype_isolate(struct page *page)
4874 struct zone *zone; 5004 struct zone *zone;
4875 unsigned long flags; 5005 unsigned long flags;
4876 int ret = -EBUSY; 5006 int ret = -EBUSY;
5007 int zone_idx;
4877 5008
4878 zone = page_zone(page); 5009 zone = page_zone(page);
5010 zone_idx = zone_idx(zone);
4879 spin_lock_irqsave(&zone->lock, flags); 5011 spin_lock_irqsave(&zone->lock, flags);
4880 /* 5012 /*
4881 * In future, more migrate types will be able to be isolation target. 5013 * In future, more migrate types will be able to be isolation target.
4882 */ 5014 */
4883 if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE) 5015 if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE &&
5016 zone_idx != ZONE_MOVABLE)
4884 goto out; 5017 goto out;
4885 set_pageblock_migratetype(page, MIGRATE_ISOLATE); 5018 set_pageblock_migratetype(page, MIGRATE_ISOLATE);
4886 move_freepages_block(zone, page, MIGRATE_ISOLATE); 5019 move_freepages_block(zone, page, MIGRATE_ISOLATE);