aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c308
1 files changed, 212 insertions, 96 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9faa7ad95ac..bf720550b44 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -48,6 +48,7 @@
48#include <linux/page_cgroup.h> 48#include <linux/page_cgroup.h>
49#include <linux/debugobjects.h> 49#include <linux/debugobjects.h>
50#include <linux/kmemleak.h> 50#include <linux/kmemleak.h>
51#include <trace/events/kmem.h>
51 52
52#include <asm/tlbflush.h> 53#include <asm/tlbflush.h>
53#include <asm/div64.h> 54#include <asm/div64.h>
@@ -71,7 +72,6 @@ EXPORT_SYMBOL(node_states);
71 72
72unsigned long totalram_pages __read_mostly; 73unsigned long totalram_pages __read_mostly;
73unsigned long totalreserve_pages __read_mostly; 74unsigned long totalreserve_pages __read_mostly;
74unsigned long highest_memmap_pfn __read_mostly;
75int percpu_pagelist_fraction; 75int percpu_pagelist_fraction;
76gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; 76gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
77 77
@@ -123,8 +123,8 @@ static char * const zone_names[MAX_NR_ZONES] = {
123 123
124int min_free_kbytes = 1024; 124int min_free_kbytes = 1024;
125 125
126unsigned long __meminitdata nr_kernel_pages; 126static unsigned long __meminitdata nr_kernel_pages;
127unsigned long __meminitdata nr_all_pages; 127static unsigned long __meminitdata nr_all_pages;
128static unsigned long __meminitdata dma_reserve; 128static unsigned long __meminitdata dma_reserve;
129 129
130#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 130#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
@@ -516,7 +516,7 @@ static inline int free_pages_check(struct page *page)
516} 516}
517 517
518/* 518/*
519 * Frees a list of pages. 519 * Frees a number of pages from the PCP lists
520 * Assumes all pages on list are in same zone, and of same order. 520 * Assumes all pages on list are in same zone, and of same order.
521 * count is the number of pages to free. 521 * count is the number of pages to free.
522 * 522 *
@@ -526,22 +526,42 @@ static inline int free_pages_check(struct page *page)
526 * And clear the zone's pages_scanned counter, to hold off the "all pages are 526 * And clear the zone's pages_scanned counter, to hold off the "all pages are
527 * pinned" detection logic. 527 * pinned" detection logic.
528 */ 528 */
529static void free_pages_bulk(struct zone *zone, int count, 529static void free_pcppages_bulk(struct zone *zone, int count,
530 struct list_head *list, int order) 530 struct per_cpu_pages *pcp)
531{ 531{
532 int migratetype = 0;
533 int batch_free = 0;
534
532 spin_lock(&zone->lock); 535 spin_lock(&zone->lock);
533 zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); 536 zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE);
534 zone->pages_scanned = 0; 537 zone->pages_scanned = 0;
535 538
536 __mod_zone_page_state(zone, NR_FREE_PAGES, count << order); 539 __mod_zone_page_state(zone, NR_FREE_PAGES, count);
537 while (count--) { 540 while (count) {
538 struct page *page; 541 struct page *page;
542 struct list_head *list;
539 543
540 VM_BUG_ON(list_empty(list)); 544 /*
541 page = list_entry(list->prev, struct page, lru); 545 * Remove pages from lists in a round-robin fashion. A
542 /* have to delete it as __free_one_page list manipulates */ 546 * batch_free count is maintained that is incremented when an
543 list_del(&page->lru); 547 * empty list is encountered. This is so more pages are freed
544 __free_one_page(page, zone, order, page_private(page)); 548 * off fuller lists instead of spinning excessively around empty
549 * lists
550 */
551 do {
552 batch_free++;
553 if (++migratetype == MIGRATE_PCPTYPES)
554 migratetype = 0;
555 list = &pcp->lists[migratetype];
556 } while (list_empty(list));
557
558 do {
559 page = list_entry(list->prev, struct page, lru);
560 /* must delete as __free_one_page list manipulates */
561 list_del(&page->lru);
562 __free_one_page(page, zone, 0, migratetype);
563 trace_mm_page_pcpu_drain(page, 0, migratetype);
564 } while (--count && --batch_free && !list_empty(list));
545 } 565 }
546 spin_unlock(&zone->lock); 566 spin_unlock(&zone->lock);
547} 567}
@@ -563,7 +583,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
563 unsigned long flags; 583 unsigned long flags;
564 int i; 584 int i;
565 int bad = 0; 585 int bad = 0;
566 int wasMlocked = TestClearPageMlocked(page); 586 int wasMlocked = __TestClearPageMlocked(page);
567 587
568 kmemcheck_free_shadow(page, order); 588 kmemcheck_free_shadow(page, order);
569 589
@@ -801,6 +821,17 @@ static int move_freepages_block(struct zone *zone, struct page *page,
801 return move_freepages(zone, start_page, end_page, migratetype); 821 return move_freepages(zone, start_page, end_page, migratetype);
802} 822}
803 823
824static void change_pageblock_range(struct page *pageblock_page,
825 int start_order, int migratetype)
826{
827 int nr_pageblocks = 1 << (start_order - pageblock_order);
828
829 while (nr_pageblocks--) {
830 set_pageblock_migratetype(pageblock_page, migratetype);
831 pageblock_page += pageblock_nr_pages;
832 }
833}
834
804/* Remove an element from the buddy allocator from the fallback list */ 835/* Remove an element from the buddy allocator from the fallback list */
805static inline struct page * 836static inline struct page *
806__rmqueue_fallback(struct zone *zone, int order, int start_migratetype) 837__rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
@@ -854,11 +885,16 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
854 list_del(&page->lru); 885 list_del(&page->lru);
855 rmv_page_order(page); 886 rmv_page_order(page);
856 887
857 if (current_order == pageblock_order) 888 /* Take ownership for orders >= pageblock_order */
858 set_pageblock_migratetype(page, 889 if (current_order >= pageblock_order)
890 change_pageblock_range(page, current_order,
859 start_migratetype); 891 start_migratetype);
860 892
861 expand(zone, page, order, current_order, area, migratetype); 893 expand(zone, page, order, current_order, area, migratetype);
894
895 trace_mm_page_alloc_extfrag(page, order, current_order,
896 start_migratetype, migratetype);
897
862 return page; 898 return page;
863 } 899 }
864 } 900 }
@@ -892,6 +928,7 @@ retry_reserve:
892 } 928 }
893 } 929 }
894 930
931 trace_mm_page_alloc_zone_locked(page, order, migratetype);
895 return page; 932 return page;
896} 933}
897 934
@@ -952,7 +989,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
952 to_drain = pcp->batch; 989 to_drain = pcp->batch;
953 else 990 else
954 to_drain = pcp->count; 991 to_drain = pcp->count;
955 free_pages_bulk(zone, to_drain, &pcp->list, 0); 992 free_pcppages_bulk(zone, to_drain, pcp);
956 pcp->count -= to_drain; 993 pcp->count -= to_drain;
957 local_irq_restore(flags); 994 local_irq_restore(flags);
958} 995}
@@ -978,7 +1015,7 @@ static void drain_pages(unsigned int cpu)
978 1015
979 pcp = &pset->pcp; 1016 pcp = &pset->pcp;
980 local_irq_save(flags); 1017 local_irq_save(flags);
981 free_pages_bulk(zone, pcp->count, &pcp->list, 0); 1018 free_pcppages_bulk(zone, pcp->count, pcp);
982 pcp->count = 0; 1019 pcp->count = 0;
983 local_irq_restore(flags); 1020 local_irq_restore(flags);
984 } 1021 }
@@ -1044,7 +1081,8 @@ static void free_hot_cold_page(struct page *page, int cold)
1044 struct zone *zone = page_zone(page); 1081 struct zone *zone = page_zone(page);
1045 struct per_cpu_pages *pcp; 1082 struct per_cpu_pages *pcp;
1046 unsigned long flags; 1083 unsigned long flags;
1047 int wasMlocked = TestClearPageMlocked(page); 1084 int migratetype;
1085 int wasMlocked = __TestClearPageMlocked(page);
1048 1086
1049 kmemcheck_free_shadow(page, 0); 1087 kmemcheck_free_shadow(page, 0);
1050 1088
@@ -1061,35 +1099,49 @@ static void free_hot_cold_page(struct page *page, int cold)
1061 kernel_map_pages(page, 1, 0); 1099 kernel_map_pages(page, 1, 0);
1062 1100
1063 pcp = &zone_pcp(zone, get_cpu())->pcp; 1101 pcp = &zone_pcp(zone, get_cpu())->pcp;
1064 set_page_private(page, get_pageblock_migratetype(page)); 1102 migratetype = get_pageblock_migratetype(page);
1103 set_page_private(page, migratetype);
1065 local_irq_save(flags); 1104 local_irq_save(flags);
1066 if (unlikely(wasMlocked)) 1105 if (unlikely(wasMlocked))
1067 free_page_mlock(page); 1106 free_page_mlock(page);
1068 __count_vm_event(PGFREE); 1107 __count_vm_event(PGFREE);
1069 1108
1109 /*
1110 * We only track unmovable, reclaimable and movable on pcp lists.
1111 * Free ISOLATE pages back to the allocator because they are being
1112 * offlined but treat RESERVE as movable pages so we can get those
1113 * areas back if necessary. Otherwise, we may have to free
1114 * excessively into the page allocator
1115 */
1116 if (migratetype >= MIGRATE_PCPTYPES) {
1117 if (unlikely(migratetype == MIGRATE_ISOLATE)) {
1118 free_one_page(zone, page, 0, migratetype);
1119 goto out;
1120 }
1121 migratetype = MIGRATE_MOVABLE;
1122 }
1123
1070 if (cold) 1124 if (cold)
1071 list_add_tail(&page->lru, &pcp->list); 1125 list_add_tail(&page->lru, &pcp->lists[migratetype]);
1072 else 1126 else
1073 list_add(&page->lru, &pcp->list); 1127 list_add(&page->lru, &pcp->lists[migratetype]);
1074 pcp->count++; 1128 pcp->count++;
1075 if (pcp->count >= pcp->high) { 1129 if (pcp->count >= pcp->high) {
1076 free_pages_bulk(zone, pcp->batch, &pcp->list, 0); 1130 free_pcppages_bulk(zone, pcp->batch, pcp);
1077 pcp->count -= pcp->batch; 1131 pcp->count -= pcp->batch;
1078 } 1132 }
1133
1134out:
1079 local_irq_restore(flags); 1135 local_irq_restore(flags);
1080 put_cpu(); 1136 put_cpu();
1081} 1137}
1082 1138
1083void free_hot_page(struct page *page) 1139void free_hot_page(struct page *page)
1084{ 1140{
1141 trace_mm_page_free_direct(page, 0);
1085 free_hot_cold_page(page, 0); 1142 free_hot_cold_page(page, 0);
1086} 1143}
1087 1144
1088void free_cold_page(struct page *page)
1089{
1090 free_hot_cold_page(page, 1);
1091}
1092
1093/* 1145/*
1094 * split_page takes a non-compound higher-order page, and splits it into 1146 * split_page takes a non-compound higher-order page, and splits it into
1095 * n (1<<order) sub-pages: page[0..n] 1147 * n (1<<order) sub-pages: page[0..n]
@@ -1137,35 +1189,23 @@ again:
1137 cpu = get_cpu(); 1189 cpu = get_cpu();
1138 if (likely(order == 0)) { 1190 if (likely(order == 0)) {
1139 struct per_cpu_pages *pcp; 1191 struct per_cpu_pages *pcp;
1192 struct list_head *list;
1140 1193
1141 pcp = &zone_pcp(zone, cpu)->pcp; 1194 pcp = &zone_pcp(zone, cpu)->pcp;
1195 list = &pcp->lists[migratetype];
1142 local_irq_save(flags); 1196 local_irq_save(flags);
1143 if (!pcp->count) { 1197 if (list_empty(list)) {
1144 pcp->count = rmqueue_bulk(zone, 0, 1198 pcp->count += rmqueue_bulk(zone, 0,
1145 pcp->batch, &pcp->list, 1199 pcp->batch, list,
1146 migratetype, cold); 1200 migratetype, cold);
1147 if (unlikely(!pcp->count)) 1201 if (unlikely(list_empty(list)))
1148 goto failed; 1202 goto failed;
1149 } 1203 }
1150 1204
1151 /* Find a page of the appropriate migrate type */ 1205 if (cold)
1152 if (cold) { 1206 page = list_entry(list->prev, struct page, lru);
1153 list_for_each_entry_reverse(page, &pcp->list, lru) 1207 else
1154 if (page_private(page) == migratetype) 1208 page = list_entry(list->next, struct page, lru);
1155 break;
1156 } else {
1157 list_for_each_entry(page, &pcp->list, lru)
1158 if (page_private(page) == migratetype)
1159 break;
1160 }
1161
1162 /* Allocate more to the pcp list if necessary */
1163 if (unlikely(&page->lru == &pcp->list)) {
1164 pcp->count += rmqueue_bulk(zone, 0,
1165 pcp->batch, &pcp->list,
1166 migratetype, cold);
1167 page = list_entry(pcp->list.next, struct page, lru);
1168 }
1169 1209
1170 list_del(&page->lru); 1210 list_del(&page->lru);
1171 pcp->count--; 1211 pcp->count--;
@@ -1645,10 +1685,6 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
1645 1685
1646 /* We now go into synchronous reclaim */ 1686 /* We now go into synchronous reclaim */
1647 cpuset_memory_pressure_bump(); 1687 cpuset_memory_pressure_bump();
1648
1649 /*
1650 * The task's cpuset might have expanded its set of allowable nodes
1651 */
1652 p->flags |= PF_MEMALLOC; 1688 p->flags |= PF_MEMALLOC;
1653 lockdep_set_current_reclaim_state(gfp_mask); 1689 lockdep_set_current_reclaim_state(gfp_mask);
1654 reclaim_state.reclaimed_slab = 0; 1690 reclaim_state.reclaimed_slab = 0;
@@ -1783,6 +1819,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
1783 1819
1784 wake_all_kswapd(order, zonelist, high_zoneidx); 1820 wake_all_kswapd(order, zonelist, high_zoneidx);
1785 1821
1822restart:
1786 /* 1823 /*
1787 * OK, we're below the kswapd watermark and have kicked background 1824 * OK, we're below the kswapd watermark and have kicked background
1788 * reclaim. Now things get more complex, so set up alloc_flags according 1825 * reclaim. Now things get more complex, so set up alloc_flags according
@@ -1790,7 +1827,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
1790 */ 1827 */
1791 alloc_flags = gfp_to_alloc_flags(gfp_mask); 1828 alloc_flags = gfp_to_alloc_flags(gfp_mask);
1792 1829
1793restart:
1794 /* This is the last chance, in general, before the goto nopage. */ 1830 /* This is the last chance, in general, before the goto nopage. */
1795 page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, 1831 page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
1796 high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, 1832 high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS,
@@ -1925,6 +1961,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
1925 zonelist, high_zoneidx, nodemask, 1961 zonelist, high_zoneidx, nodemask,
1926 preferred_zone, migratetype); 1962 preferred_zone, migratetype);
1927 1963
1964 trace_mm_page_alloc(page, order, gfp_mask, migratetype);
1928 return page; 1965 return page;
1929} 1966}
1930EXPORT_SYMBOL(__alloc_pages_nodemask); 1967EXPORT_SYMBOL(__alloc_pages_nodemask);
@@ -1934,44 +1971,41 @@ EXPORT_SYMBOL(__alloc_pages_nodemask);
1934 */ 1971 */
1935unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order) 1972unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order)
1936{ 1973{
1937 struct page * page; 1974 struct page *page;
1975
1976 /*
1977 * __get_free_pages() returns a 32-bit address, which cannot represent
1978 * a highmem page
1979 */
1980 VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
1981
1938 page = alloc_pages(gfp_mask, order); 1982 page = alloc_pages(gfp_mask, order);
1939 if (!page) 1983 if (!page)
1940 return 0; 1984 return 0;
1941 return (unsigned long) page_address(page); 1985 return (unsigned long) page_address(page);
1942} 1986}
1943
1944EXPORT_SYMBOL(__get_free_pages); 1987EXPORT_SYMBOL(__get_free_pages);
1945 1988
1946unsigned long get_zeroed_page(gfp_t gfp_mask) 1989unsigned long get_zeroed_page(gfp_t gfp_mask)
1947{ 1990{
1948 struct page * page; 1991 return __get_free_pages(gfp_mask | __GFP_ZERO, 0);
1949
1950 /*
1951 * get_zeroed_page() returns a 32-bit address, which cannot represent
1952 * a highmem page
1953 */
1954 VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
1955
1956 page = alloc_pages(gfp_mask | __GFP_ZERO, 0);
1957 if (page)
1958 return (unsigned long) page_address(page);
1959 return 0;
1960} 1992}
1961
1962EXPORT_SYMBOL(get_zeroed_page); 1993EXPORT_SYMBOL(get_zeroed_page);
1963 1994
1964void __pagevec_free(struct pagevec *pvec) 1995void __pagevec_free(struct pagevec *pvec)
1965{ 1996{
1966 int i = pagevec_count(pvec); 1997 int i = pagevec_count(pvec);
1967 1998
1968 while (--i >= 0) 1999 while (--i >= 0) {
2000 trace_mm_pagevec_free(pvec->pages[i], pvec->cold);
1969 free_hot_cold_page(pvec->pages[i], pvec->cold); 2001 free_hot_cold_page(pvec->pages[i], pvec->cold);
2002 }
1970} 2003}
1971 2004
1972void __free_pages(struct page *page, unsigned int order) 2005void __free_pages(struct page *page, unsigned int order)
1973{ 2006{
1974 if (put_page_testzero(page)) { 2007 if (put_page_testzero(page)) {
2008 trace_mm_page_free_direct(page, order);
1975 if (order == 0) 2009 if (order == 0)
1976 free_hot_page(page); 2010 free_hot_page(page);
1977 else 2011 else
@@ -2146,23 +2180,28 @@ void show_free_areas(void)
2146 } 2180 }
2147 } 2181 }
2148 2182
2149 printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n" 2183 printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
2150 " inactive_file:%lu" 2184 " active_file:%lu inactive_file:%lu isolated_file:%lu\n"
2151 " unevictable:%lu" 2185 " unevictable:%lu"
2152 " dirty:%lu writeback:%lu unstable:%lu\n" 2186 " dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n"
2153 " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n", 2187 " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n"
2188 " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n",
2154 global_page_state(NR_ACTIVE_ANON), 2189 global_page_state(NR_ACTIVE_ANON),
2155 global_page_state(NR_ACTIVE_FILE),
2156 global_page_state(NR_INACTIVE_ANON), 2190 global_page_state(NR_INACTIVE_ANON),
2191 global_page_state(NR_ISOLATED_ANON),
2192 global_page_state(NR_ACTIVE_FILE),
2157 global_page_state(NR_INACTIVE_FILE), 2193 global_page_state(NR_INACTIVE_FILE),
2194 global_page_state(NR_ISOLATED_FILE),
2158 global_page_state(NR_UNEVICTABLE), 2195 global_page_state(NR_UNEVICTABLE),
2159 global_page_state(NR_FILE_DIRTY), 2196 global_page_state(NR_FILE_DIRTY),
2160 global_page_state(NR_WRITEBACK), 2197 global_page_state(NR_WRITEBACK),
2161 global_page_state(NR_UNSTABLE_NFS), 2198 global_page_state(NR_UNSTABLE_NFS),
2199 nr_blockdev_pages(),
2162 global_page_state(NR_FREE_PAGES), 2200 global_page_state(NR_FREE_PAGES),
2163 global_page_state(NR_SLAB_RECLAIMABLE) + 2201 global_page_state(NR_SLAB_RECLAIMABLE),
2164 global_page_state(NR_SLAB_UNRECLAIMABLE), 2202 global_page_state(NR_SLAB_UNRECLAIMABLE),
2165 global_page_state(NR_FILE_MAPPED), 2203 global_page_state(NR_FILE_MAPPED),
2204 global_page_state(NR_SHMEM),
2166 global_page_state(NR_PAGETABLE), 2205 global_page_state(NR_PAGETABLE),
2167 global_page_state(NR_BOUNCE)); 2206 global_page_state(NR_BOUNCE));
2168 2207
@@ -2180,7 +2219,21 @@ void show_free_areas(void)
2180 " active_file:%lukB" 2219 " active_file:%lukB"
2181 " inactive_file:%lukB" 2220 " inactive_file:%lukB"
2182 " unevictable:%lukB" 2221 " unevictable:%lukB"
2222 " isolated(anon):%lukB"
2223 " isolated(file):%lukB"
2183 " present:%lukB" 2224 " present:%lukB"
2225 " mlocked:%lukB"
2226 " dirty:%lukB"
2227 " writeback:%lukB"
2228 " mapped:%lukB"
2229 " shmem:%lukB"
2230 " slab_reclaimable:%lukB"
2231 " slab_unreclaimable:%lukB"
2232 " kernel_stack:%lukB"
2233 " pagetables:%lukB"
2234 " unstable:%lukB"
2235 " bounce:%lukB"
2236 " writeback_tmp:%lukB"
2184 " pages_scanned:%lu" 2237 " pages_scanned:%lu"
2185 " all_unreclaimable? %s" 2238 " all_unreclaimable? %s"
2186 "\n", 2239 "\n",
@@ -2194,7 +2247,22 @@ void show_free_areas(void)
2194 K(zone_page_state(zone, NR_ACTIVE_FILE)), 2247 K(zone_page_state(zone, NR_ACTIVE_FILE)),
2195 K(zone_page_state(zone, NR_INACTIVE_FILE)), 2248 K(zone_page_state(zone, NR_INACTIVE_FILE)),
2196 K(zone_page_state(zone, NR_UNEVICTABLE)), 2249 K(zone_page_state(zone, NR_UNEVICTABLE)),
2250 K(zone_page_state(zone, NR_ISOLATED_ANON)),
2251 K(zone_page_state(zone, NR_ISOLATED_FILE)),
2197 K(zone->present_pages), 2252 K(zone->present_pages),
2253 K(zone_page_state(zone, NR_MLOCK)),
2254 K(zone_page_state(zone, NR_FILE_DIRTY)),
2255 K(zone_page_state(zone, NR_WRITEBACK)),
2256 K(zone_page_state(zone, NR_FILE_MAPPED)),
2257 K(zone_page_state(zone, NR_SHMEM)),
2258 K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)),
2259 K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)),
2260 zone_page_state(zone, NR_KERNEL_STACK) *
2261 THREAD_SIZE / 1024,
2262 K(zone_page_state(zone, NR_PAGETABLE)),
2263 K(zone_page_state(zone, NR_UNSTABLE_NFS)),
2264 K(zone_page_state(zone, NR_BOUNCE)),
2265 K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
2198 zone->pages_scanned, 2266 zone->pages_scanned,
2199 (zone_is_all_unreclaimable(zone) ? "yes" : "no") 2267 (zone_is_all_unreclaimable(zone) ? "yes" : "no")
2200 ); 2268 );
@@ -2323,7 +2391,7 @@ early_param("numa_zonelist_order", setup_numa_zonelist_order);
2323 * sysctl handler for numa_zonelist_order 2391 * sysctl handler for numa_zonelist_order
2324 */ 2392 */
2325int numa_zonelist_order_handler(ctl_table *table, int write, 2393int numa_zonelist_order_handler(ctl_table *table, int write,
2326 struct file *file, void __user *buffer, size_t *length, 2394 void __user *buffer, size_t *length,
2327 loff_t *ppos) 2395 loff_t *ppos)
2328{ 2396{
2329 char saved_string[NUMA_ZONELIST_ORDER_LEN]; 2397 char saved_string[NUMA_ZONELIST_ORDER_LEN];
@@ -2332,7 +2400,7 @@ int numa_zonelist_order_handler(ctl_table *table, int write,
2332 if (write) 2400 if (write)
2333 strncpy(saved_string, (char*)table->data, 2401 strncpy(saved_string, (char*)table->data,
2334 NUMA_ZONELIST_ORDER_LEN); 2402 NUMA_ZONELIST_ORDER_LEN);
2335 ret = proc_dostring(table, write, file, buffer, length, ppos); 2403 ret = proc_dostring(table, write, buffer, length, ppos);
2336 if (ret) 2404 if (ret)
2337 return ret; 2405 return ret;
2338 if (write) { 2406 if (write) {
@@ -2801,7 +2869,8 @@ static void setup_zone_migrate_reserve(struct zone *zone)
2801{ 2869{
2802 unsigned long start_pfn, pfn, end_pfn; 2870 unsigned long start_pfn, pfn, end_pfn;
2803 struct page *page; 2871 struct page *page;
2804 unsigned long reserve, block_migratetype; 2872 unsigned long block_migratetype;
2873 int reserve;
2805 2874
2806 /* Get the start pfn, end pfn and the number of blocks to reserve */ 2875 /* Get the start pfn, end pfn and the number of blocks to reserve */
2807 start_pfn = zone->zone_start_pfn; 2876 start_pfn = zone->zone_start_pfn;
@@ -2809,6 +2878,15 @@ static void setup_zone_migrate_reserve(struct zone *zone)
2809 reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> 2878 reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
2810 pageblock_order; 2879 pageblock_order;
2811 2880
2881 /*
2882 * Reserve blocks are generally in place to help high-order atomic
2883 * allocations that are short-lived. A min_free_kbytes value that
2884 * would result in more than 2 reserve blocks for atomic allocations
2885 * is assumed to be in place to help anti-fragmentation for the
2886 * future allocation of hugepages at runtime.
2887 */
2888 reserve = min(2, reserve);
2889
2812 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { 2890 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
2813 if (!pfn_valid(pfn)) 2891 if (!pfn_valid(pfn))
2814 continue; 2892 continue;
@@ -2979,6 +3057,7 @@ static int zone_batchsize(struct zone *zone)
2979static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) 3057static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
2980{ 3058{
2981 struct per_cpu_pages *pcp; 3059 struct per_cpu_pages *pcp;
3060 int migratetype;
2982 3061
2983 memset(p, 0, sizeof(*p)); 3062 memset(p, 0, sizeof(*p));
2984 3063
@@ -2986,7 +3065,8 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
2986 pcp->count = 0; 3065 pcp->count = 0;
2987 pcp->high = 6 * batch; 3066 pcp->high = 6 * batch;
2988 pcp->batch = max(1UL, 1 * batch); 3067 pcp->batch = max(1UL, 1 * batch);
2989 INIT_LIST_HEAD(&pcp->list); 3068 for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++)
3069 INIT_LIST_HEAD(&pcp->lists[migratetype]);
2990} 3070}
2991 3071
2992/* 3072/*
@@ -3164,6 +3244,32 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
3164 return 0; 3244 return 0;
3165} 3245}
3166 3246
3247static int __zone_pcp_update(void *data)
3248{
3249 struct zone *zone = data;
3250 int cpu;
3251 unsigned long batch = zone_batchsize(zone), flags;
3252
3253 for (cpu = 0; cpu < NR_CPUS; cpu++) {
3254 struct per_cpu_pageset *pset;
3255 struct per_cpu_pages *pcp;
3256
3257 pset = zone_pcp(zone, cpu);
3258 pcp = &pset->pcp;
3259
3260 local_irq_save(flags);
3261 free_pcppages_bulk(zone, pcp->count, pcp);
3262 setup_pageset(pset, batch);
3263 local_irq_restore(flags);
3264 }
3265 return 0;
3266}
3267
3268void zone_pcp_update(struct zone *zone)
3269{
3270 stop_machine(__zone_pcp_update, zone, NULL);
3271}
3272
3167static __meminit void zone_pcp_init(struct zone *zone) 3273static __meminit void zone_pcp_init(struct zone *zone)
3168{ 3274{
3169 int cpu; 3275 int cpu;
@@ -3738,7 +3844,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
3738 zone_pcp_init(zone); 3844 zone_pcp_init(zone);
3739 for_each_lru(l) { 3845 for_each_lru(l) {
3740 INIT_LIST_HEAD(&zone->lru[l].list); 3846 INIT_LIST_HEAD(&zone->lru[l].list);
3741 zone->lru[l].nr_saved_scan = 0; 3847 zone->reclaim_stat.nr_saved_scan[l] = 0;
3742 } 3848 }
3743 zone->reclaim_stat.recent_rotated[0] = 0; 3849 zone->reclaim_stat.recent_rotated[0] = 0;
3744 zone->reclaim_stat.recent_rotated[1] = 0; 3850 zone->reclaim_stat.recent_rotated[1] = 0;
@@ -4527,7 +4633,7 @@ void setup_per_zone_wmarks(void)
4527 calculate_totalreserve_pages(); 4633 calculate_totalreserve_pages();
4528} 4634}
4529 4635
4530/** 4636/*
4531 * The inactive anon list should be small enough that the VM never has to 4637 * The inactive anon list should be small enough that the VM never has to
4532 * do too much work, but large enough that each inactive page has a chance 4638 * do too much work, but large enough that each inactive page has a chance
4533 * to be referenced again before it is swapped out. 4639 * to be referenced again before it is swapped out.
@@ -4618,9 +4724,9 @@ module_init(init_per_zone_wmark_min)
4618 * changes. 4724 * changes.
4619 */ 4725 */
4620int min_free_kbytes_sysctl_handler(ctl_table *table, int write, 4726int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
4621 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 4727 void __user *buffer, size_t *length, loff_t *ppos)
4622{ 4728{
4623 proc_dointvec(table, write, file, buffer, length, ppos); 4729 proc_dointvec(table, write, buffer, length, ppos);
4624 if (write) 4730 if (write)
4625 setup_per_zone_wmarks(); 4731 setup_per_zone_wmarks();
4626 return 0; 4732 return 0;
@@ -4628,12 +4734,12 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
4628 4734
4629#ifdef CONFIG_NUMA 4735#ifdef CONFIG_NUMA
4630int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, 4736int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
4631 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 4737 void __user *buffer, size_t *length, loff_t *ppos)
4632{ 4738{
4633 struct zone *zone; 4739 struct zone *zone;
4634 int rc; 4740 int rc;
4635 4741
4636 rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); 4742 rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
4637 if (rc) 4743 if (rc)
4638 return rc; 4744 return rc;
4639 4745
@@ -4644,12 +4750,12 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
4644} 4750}
4645 4751
4646int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, 4752int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
4647 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 4753 void __user *buffer, size_t *length, loff_t *ppos)
4648{ 4754{
4649 struct zone *zone; 4755 struct zone *zone;
4650 int rc; 4756 int rc;
4651 4757
4652 rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); 4758 rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
4653 if (rc) 4759 if (rc)
4654 return rc; 4760 return rc;
4655 4761
@@ -4670,9 +4776,9 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
4670 * if in function of the boot time zone sizes. 4776 * if in function of the boot time zone sizes.
4671 */ 4777 */
4672int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, 4778int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
4673 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 4779 void __user *buffer, size_t *length, loff_t *ppos)
4674{ 4780{
4675 proc_dointvec_minmax(table, write, file, buffer, length, ppos); 4781 proc_dointvec_minmax(table, write, buffer, length, ppos);
4676 setup_per_zone_lowmem_reserve(); 4782 setup_per_zone_lowmem_reserve();
4677 return 0; 4783 return 0;
4678} 4784}
@@ -4684,13 +4790,13 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
4684 */ 4790 */
4685 4791
4686int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, 4792int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
4687 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 4793 void __user *buffer, size_t *length, loff_t *ppos)
4688{ 4794{
4689 struct zone *zone; 4795 struct zone *zone;
4690 unsigned int cpu; 4796 unsigned int cpu;
4691 int ret; 4797 int ret;
4692 4798
4693 ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos); 4799 ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
4694 if (!write || (ret == -EINVAL)) 4800 if (!write || (ret == -EINVAL))
4695 return ret; 4801 return ret;
4696 for_each_populated_zone(zone) { 4802 for_each_populated_zone(zone) {
@@ -4750,7 +4856,14 @@ void *__init alloc_large_system_hash(const char *tablename,
4750 numentries <<= (PAGE_SHIFT - scale); 4856 numentries <<= (PAGE_SHIFT - scale);
4751 4857
4752 /* Make sure we've got at least a 0-order allocation.. */ 4858 /* Make sure we've got at least a 0-order allocation.. */
4753 if (unlikely((numentries * bucketsize) < PAGE_SIZE)) 4859 if (unlikely(flags & HASH_SMALL)) {
4860 /* Makes no sense without HASH_EARLY */
4861 WARN_ON(!(flags & HASH_EARLY));
4862 if (!(numentries >> *_hash_shift)) {
4863 numentries = 1UL << *_hash_shift;
4864 BUG_ON(!numentries);
4865 }
4866 } else if (unlikely((numentries * bucketsize) < PAGE_SIZE))
4754 numentries = PAGE_SIZE / bucketsize; 4867 numentries = PAGE_SIZE / bucketsize;
4755 } 4868 }
4756 numentries = roundup_pow_of_two(numentries); 4869 numentries = roundup_pow_of_two(numentries);
@@ -4892,13 +5005,16 @@ int set_migratetype_isolate(struct page *page)
4892 struct zone *zone; 5005 struct zone *zone;
4893 unsigned long flags; 5006 unsigned long flags;
4894 int ret = -EBUSY; 5007 int ret = -EBUSY;
5008 int zone_idx;
4895 5009
4896 zone = page_zone(page); 5010 zone = page_zone(page);
5011 zone_idx = zone_idx(zone);
4897 spin_lock_irqsave(&zone->lock, flags); 5012 spin_lock_irqsave(&zone->lock, flags);
4898 /* 5013 /*
4899 * In future, more migrate types will be able to be isolation target. 5014 * In future, more migrate types will be able to be isolation target.
4900 */ 5015 */
4901 if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE) 5016 if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE &&
5017 zone_idx != ZONE_MOVABLE)
4902 goto out; 5018 goto out;
4903 set_pageblock_migratetype(page, MIGRATE_ISOLATE); 5019 set_pageblock_migratetype(page, MIGRATE_ISOLATE);
4904 move_freepages_block(zone, page, MIGRATE_ISOLATE); 5020 move_freepages_block(zone, page, MIGRATE_ISOLATE);