aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorTakashi Iwai <tiwai@suse.de>2009-11-01 05:11:07 -0500
committerTakashi Iwai <tiwai@suse.de>2009-11-01 05:11:07 -0500
commite87a3dd33eab30b4db539500064a9584867e4f2c (patch)
tree2f7ad16e46ae30518ff63bb5391b63f7f7cc74dd /mm/page_alloc.c
parentb14f5de731ae657d498d18d713c6431bfbeefb4b (diff)
parent3d00941371a765779c4e3509214c7e5793cce1fe (diff)
Merge branch 'fix/misc' into topic/misc
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c328
1 files changed, 231 insertions, 97 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a0de15f46987..bf720550b44d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -48,6 +48,7 @@
48#include <linux/page_cgroup.h> 48#include <linux/page_cgroup.h>
49#include <linux/debugobjects.h> 49#include <linux/debugobjects.h>
50#include <linux/kmemleak.h> 50#include <linux/kmemleak.h>
51#include <trace/events/kmem.h>
51 52
52#include <asm/tlbflush.h> 53#include <asm/tlbflush.h>
53#include <asm/div64.h> 54#include <asm/div64.h>
@@ -71,7 +72,6 @@ EXPORT_SYMBOL(node_states);
71 72
72unsigned long totalram_pages __read_mostly; 73unsigned long totalram_pages __read_mostly;
73unsigned long totalreserve_pages __read_mostly; 74unsigned long totalreserve_pages __read_mostly;
74unsigned long highest_memmap_pfn __read_mostly;
75int percpu_pagelist_fraction; 75int percpu_pagelist_fraction;
76gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; 76gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
77 77
@@ -123,8 +123,8 @@ static char * const zone_names[MAX_NR_ZONES] = {
123 123
124int min_free_kbytes = 1024; 124int min_free_kbytes = 1024;
125 125
126unsigned long __meminitdata nr_kernel_pages; 126static unsigned long __meminitdata nr_kernel_pages;
127unsigned long __meminitdata nr_all_pages; 127static unsigned long __meminitdata nr_all_pages;
128static unsigned long __meminitdata dma_reserve; 128static unsigned long __meminitdata dma_reserve;
129 129
130#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 130#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
@@ -234,6 +234,12 @@ static void bad_page(struct page *page)
234 static unsigned long nr_shown; 234 static unsigned long nr_shown;
235 static unsigned long nr_unshown; 235 static unsigned long nr_unshown;
236 236
237 /* Don't complain about poisoned pages */
238 if (PageHWPoison(page)) {
239 __ClearPageBuddy(page);
240 return;
241 }
242
237 /* 243 /*
238 * Allow a burst of 60 reports, then keep quiet for that minute; 244 * Allow a burst of 60 reports, then keep quiet for that minute;
239 * or allow a steady drip of one report per second. 245 * or allow a steady drip of one report per second.
@@ -510,7 +516,7 @@ static inline int free_pages_check(struct page *page)
510} 516}
511 517
512/* 518/*
513 * Frees a list of pages. 519 * Frees a number of pages from the PCP lists
514 * Assumes all pages on list are in same zone, and of same order. 520 * Assumes all pages on list are in same zone, and of same order.
515 * count is the number of pages to free. 521 * count is the number of pages to free.
516 * 522 *
@@ -520,22 +526,42 @@ static inline int free_pages_check(struct page *page)
520 * And clear the zone's pages_scanned counter, to hold off the "all pages are 526 * And clear the zone's pages_scanned counter, to hold off the "all pages are
521 * pinned" detection logic. 527 * pinned" detection logic.
522 */ 528 */
523static void free_pages_bulk(struct zone *zone, int count, 529static void free_pcppages_bulk(struct zone *zone, int count,
524 struct list_head *list, int order) 530 struct per_cpu_pages *pcp)
525{ 531{
532 int migratetype = 0;
533 int batch_free = 0;
534
526 spin_lock(&zone->lock); 535 spin_lock(&zone->lock);
527 zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); 536 zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE);
528 zone->pages_scanned = 0; 537 zone->pages_scanned = 0;
529 538
530 __mod_zone_page_state(zone, NR_FREE_PAGES, count << order); 539 __mod_zone_page_state(zone, NR_FREE_PAGES, count);
531 while (count--) { 540 while (count) {
532 struct page *page; 541 struct page *page;
542 struct list_head *list;
533 543
534 VM_BUG_ON(list_empty(list)); 544 /*
535 page = list_entry(list->prev, struct page, lru); 545 * Remove pages from lists in a round-robin fashion. A
536 /* have to delete it as __free_one_page list manipulates */ 546 * batch_free count is maintained that is incremented when an
537 list_del(&page->lru); 547 * empty list is encountered. This is so more pages are freed
538 __free_one_page(page, zone, order, page_private(page)); 548 * off fuller lists instead of spinning excessively around empty
549 * lists
550 */
551 do {
552 batch_free++;
553 if (++migratetype == MIGRATE_PCPTYPES)
554 migratetype = 0;
555 list = &pcp->lists[migratetype];
556 } while (list_empty(list));
557
558 do {
559 page = list_entry(list->prev, struct page, lru);
560 /* must delete as __free_one_page list manipulates */
561 list_del(&page->lru);
562 __free_one_page(page, zone, 0, migratetype);
563 trace_mm_page_pcpu_drain(page, 0, migratetype);
564 } while (--count && --batch_free && !list_empty(list));
539 } 565 }
540 spin_unlock(&zone->lock); 566 spin_unlock(&zone->lock);
541} 567}
@@ -557,7 +583,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
557 unsigned long flags; 583 unsigned long flags;
558 int i; 584 int i;
559 int bad = 0; 585 int bad = 0;
560 int wasMlocked = TestClearPageMlocked(page); 586 int wasMlocked = __TestClearPageMlocked(page);
561 587
562 kmemcheck_free_shadow(page, order); 588 kmemcheck_free_shadow(page, order);
563 589
@@ -646,7 +672,7 @@ static inline void expand(struct zone *zone, struct page *page,
646/* 672/*
647 * This page is about to be returned from the page allocator 673 * This page is about to be returned from the page allocator
648 */ 674 */
649static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) 675static inline int check_new_page(struct page *page)
650{ 676{
651 if (unlikely(page_mapcount(page) | 677 if (unlikely(page_mapcount(page) |
652 (page->mapping != NULL) | 678 (page->mapping != NULL) |
@@ -655,6 +681,18 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
655 bad_page(page); 681 bad_page(page);
656 return 1; 682 return 1;
657 } 683 }
684 return 0;
685}
686
687static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
688{
689 int i;
690
691 for (i = 0; i < (1 << order); i++) {
692 struct page *p = page + i;
693 if (unlikely(check_new_page(p)))
694 return 1;
695 }
658 696
659 set_page_private(page, 0); 697 set_page_private(page, 0);
660 set_page_refcounted(page); 698 set_page_refcounted(page);
@@ -783,6 +821,17 @@ static int move_freepages_block(struct zone *zone, struct page *page,
783 return move_freepages(zone, start_page, end_page, migratetype); 821 return move_freepages(zone, start_page, end_page, migratetype);
784} 822}
785 823
824static void change_pageblock_range(struct page *pageblock_page,
825 int start_order, int migratetype)
826{
827 int nr_pageblocks = 1 << (start_order - pageblock_order);
828
829 while (nr_pageblocks--) {
830 set_pageblock_migratetype(pageblock_page, migratetype);
831 pageblock_page += pageblock_nr_pages;
832 }
833}
834
786/* Remove an element from the buddy allocator from the fallback list */ 835/* Remove an element from the buddy allocator from the fallback list */
787static inline struct page * 836static inline struct page *
788__rmqueue_fallback(struct zone *zone, int order, int start_migratetype) 837__rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
@@ -836,11 +885,16 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
836 list_del(&page->lru); 885 list_del(&page->lru);
837 rmv_page_order(page); 886 rmv_page_order(page);
838 887
839 if (current_order == pageblock_order) 888 /* Take ownership for orders >= pageblock_order */
840 set_pageblock_migratetype(page, 889 if (current_order >= pageblock_order)
890 change_pageblock_range(page, current_order,
841 start_migratetype); 891 start_migratetype);
842 892
843 expand(zone, page, order, current_order, area, migratetype); 893 expand(zone, page, order, current_order, area, migratetype);
894
895 trace_mm_page_alloc_extfrag(page, order, current_order,
896 start_migratetype, migratetype);
897
844 return page; 898 return page;
845 } 899 }
846 } 900 }
@@ -874,6 +928,7 @@ retry_reserve:
874 } 928 }
875 } 929 }
876 930
931 trace_mm_page_alloc_zone_locked(page, order, migratetype);
877 return page; 932 return page;
878} 933}
879 934
@@ -934,7 +989,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
934 to_drain = pcp->batch; 989 to_drain = pcp->batch;
935 else 990 else
936 to_drain = pcp->count; 991 to_drain = pcp->count;
937 free_pages_bulk(zone, to_drain, &pcp->list, 0); 992 free_pcppages_bulk(zone, to_drain, pcp);
938 pcp->count -= to_drain; 993 pcp->count -= to_drain;
939 local_irq_restore(flags); 994 local_irq_restore(flags);
940} 995}
@@ -960,7 +1015,7 @@ static void drain_pages(unsigned int cpu)
960 1015
961 pcp = &pset->pcp; 1016 pcp = &pset->pcp;
962 local_irq_save(flags); 1017 local_irq_save(flags);
963 free_pages_bulk(zone, pcp->count, &pcp->list, 0); 1018 free_pcppages_bulk(zone, pcp->count, pcp);
964 pcp->count = 0; 1019 pcp->count = 0;
965 local_irq_restore(flags); 1020 local_irq_restore(flags);
966 } 1021 }
@@ -1026,7 +1081,8 @@ static void free_hot_cold_page(struct page *page, int cold)
1026 struct zone *zone = page_zone(page); 1081 struct zone *zone = page_zone(page);
1027 struct per_cpu_pages *pcp; 1082 struct per_cpu_pages *pcp;
1028 unsigned long flags; 1083 unsigned long flags;
1029 int wasMlocked = TestClearPageMlocked(page); 1084 int migratetype;
1085 int wasMlocked = __TestClearPageMlocked(page);
1030 1086
1031 kmemcheck_free_shadow(page, 0); 1087 kmemcheck_free_shadow(page, 0);
1032 1088
@@ -1043,35 +1099,49 @@ static void free_hot_cold_page(struct page *page, int cold)
1043 kernel_map_pages(page, 1, 0); 1099 kernel_map_pages(page, 1, 0);
1044 1100
1045 pcp = &zone_pcp(zone, get_cpu())->pcp; 1101 pcp = &zone_pcp(zone, get_cpu())->pcp;
1046 set_page_private(page, get_pageblock_migratetype(page)); 1102 migratetype = get_pageblock_migratetype(page);
1103 set_page_private(page, migratetype);
1047 local_irq_save(flags); 1104 local_irq_save(flags);
1048 if (unlikely(wasMlocked)) 1105 if (unlikely(wasMlocked))
1049 free_page_mlock(page); 1106 free_page_mlock(page);
1050 __count_vm_event(PGFREE); 1107 __count_vm_event(PGFREE);
1051 1108
1109 /*
1110 * We only track unmovable, reclaimable and movable on pcp lists.
1111 * Free ISOLATE pages back to the allocator because they are being
1112 * offlined but treat RESERVE as movable pages so we can get those
1113 * areas back if necessary. Otherwise, we may have to free
1114 * excessively into the page allocator
1115 */
1116 if (migratetype >= MIGRATE_PCPTYPES) {
1117 if (unlikely(migratetype == MIGRATE_ISOLATE)) {
1118 free_one_page(zone, page, 0, migratetype);
1119 goto out;
1120 }
1121 migratetype = MIGRATE_MOVABLE;
1122 }
1123
1052 if (cold) 1124 if (cold)
1053 list_add_tail(&page->lru, &pcp->list); 1125 list_add_tail(&page->lru, &pcp->lists[migratetype]);
1054 else 1126 else
1055 list_add(&page->lru, &pcp->list); 1127 list_add(&page->lru, &pcp->lists[migratetype]);
1056 pcp->count++; 1128 pcp->count++;
1057 if (pcp->count >= pcp->high) { 1129 if (pcp->count >= pcp->high) {
1058 free_pages_bulk(zone, pcp->batch, &pcp->list, 0); 1130 free_pcppages_bulk(zone, pcp->batch, pcp);
1059 pcp->count -= pcp->batch; 1131 pcp->count -= pcp->batch;
1060 } 1132 }
1133
1134out:
1061 local_irq_restore(flags); 1135 local_irq_restore(flags);
1062 put_cpu(); 1136 put_cpu();
1063} 1137}
1064 1138
1065void free_hot_page(struct page *page) 1139void free_hot_page(struct page *page)
1066{ 1140{
1141 trace_mm_page_free_direct(page, 0);
1067 free_hot_cold_page(page, 0); 1142 free_hot_cold_page(page, 0);
1068} 1143}
1069 1144
1070void free_cold_page(struct page *page)
1071{
1072 free_hot_cold_page(page, 1);
1073}
1074
1075/* 1145/*
1076 * split_page takes a non-compound higher-order page, and splits it into 1146 * split_page takes a non-compound higher-order page, and splits it into
1077 * n (1<<order) sub-pages: page[0..n] 1147 * n (1<<order) sub-pages: page[0..n]
@@ -1119,35 +1189,23 @@ again:
1119 cpu = get_cpu(); 1189 cpu = get_cpu();
1120 if (likely(order == 0)) { 1190 if (likely(order == 0)) {
1121 struct per_cpu_pages *pcp; 1191 struct per_cpu_pages *pcp;
1192 struct list_head *list;
1122 1193
1123 pcp = &zone_pcp(zone, cpu)->pcp; 1194 pcp = &zone_pcp(zone, cpu)->pcp;
1195 list = &pcp->lists[migratetype];
1124 local_irq_save(flags); 1196 local_irq_save(flags);
1125 if (!pcp->count) { 1197 if (list_empty(list)) {
1126 pcp->count = rmqueue_bulk(zone, 0, 1198 pcp->count += rmqueue_bulk(zone, 0,
1127 pcp->batch, &pcp->list, 1199 pcp->batch, list,
1128 migratetype, cold); 1200 migratetype, cold);
1129 if (unlikely(!pcp->count)) 1201 if (unlikely(list_empty(list)))
1130 goto failed; 1202 goto failed;
1131 } 1203 }
1132 1204
1133 /* Find a page of the appropriate migrate type */ 1205 if (cold)
1134 if (cold) { 1206 page = list_entry(list->prev, struct page, lru);
1135 list_for_each_entry_reverse(page, &pcp->list, lru) 1207 else
1136 if (page_private(page) == migratetype) 1208 page = list_entry(list->next, struct page, lru);
1137 break;
1138 } else {
1139 list_for_each_entry(page, &pcp->list, lru)
1140 if (page_private(page) == migratetype)
1141 break;
1142 }
1143
1144 /* Allocate more to the pcp list if necessary */
1145 if (unlikely(&page->lru == &pcp->list)) {
1146 pcp->count += rmqueue_bulk(zone, 0,
1147 pcp->batch, &pcp->list,
1148 migratetype, cold);
1149 page = list_entry(pcp->list.next, struct page, lru);
1150 }
1151 1209
1152 list_del(&page->lru); 1210 list_del(&page->lru);
1153 pcp->count--; 1211 pcp->count--;
@@ -1627,10 +1685,6 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
1627 1685
1628 /* We now go into synchronous reclaim */ 1686 /* We now go into synchronous reclaim */
1629 cpuset_memory_pressure_bump(); 1687 cpuset_memory_pressure_bump();
1630
1631 /*
1632 * The task's cpuset might have expanded its set of allowable nodes
1633 */
1634 p->flags |= PF_MEMALLOC; 1688 p->flags |= PF_MEMALLOC;
1635 lockdep_set_current_reclaim_state(gfp_mask); 1689 lockdep_set_current_reclaim_state(gfp_mask);
1636 reclaim_state.reclaimed_slab = 0; 1690 reclaim_state.reclaimed_slab = 0;
@@ -1765,6 +1819,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
1765 1819
1766 wake_all_kswapd(order, zonelist, high_zoneidx); 1820 wake_all_kswapd(order, zonelist, high_zoneidx);
1767 1821
1822restart:
1768 /* 1823 /*
1769 * OK, we're below the kswapd watermark and have kicked background 1824 * OK, we're below the kswapd watermark and have kicked background
1770 * reclaim. Now things get more complex, so set up alloc_flags according 1825 * reclaim. Now things get more complex, so set up alloc_flags according
@@ -1772,7 +1827,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
1772 */ 1827 */
1773 alloc_flags = gfp_to_alloc_flags(gfp_mask); 1828 alloc_flags = gfp_to_alloc_flags(gfp_mask);
1774 1829
1775restart:
1776 /* This is the last chance, in general, before the goto nopage. */ 1830 /* This is the last chance, in general, before the goto nopage. */
1777 page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, 1831 page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
1778 high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, 1832 high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS,
@@ -1907,6 +1961,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
1907 zonelist, high_zoneidx, nodemask, 1961 zonelist, high_zoneidx, nodemask,
1908 preferred_zone, migratetype); 1962 preferred_zone, migratetype);
1909 1963
1964 trace_mm_page_alloc(page, order, gfp_mask, migratetype);
1910 return page; 1965 return page;
1911} 1966}
1912EXPORT_SYMBOL(__alloc_pages_nodemask); 1967EXPORT_SYMBOL(__alloc_pages_nodemask);
@@ -1916,44 +1971,41 @@ EXPORT_SYMBOL(__alloc_pages_nodemask);
1916 */ 1971 */
1917unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order) 1972unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order)
1918{ 1973{
1919 struct page * page; 1974 struct page *page;
1975
1976 /*
1977 * __get_free_pages() returns a 32-bit address, which cannot represent
1978 * a highmem page
1979 */
1980 VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
1981
1920 page = alloc_pages(gfp_mask, order); 1982 page = alloc_pages(gfp_mask, order);
1921 if (!page) 1983 if (!page)
1922 return 0; 1984 return 0;
1923 return (unsigned long) page_address(page); 1985 return (unsigned long) page_address(page);
1924} 1986}
1925
1926EXPORT_SYMBOL(__get_free_pages); 1987EXPORT_SYMBOL(__get_free_pages);
1927 1988
1928unsigned long get_zeroed_page(gfp_t gfp_mask) 1989unsigned long get_zeroed_page(gfp_t gfp_mask)
1929{ 1990{
1930 struct page * page; 1991 return __get_free_pages(gfp_mask | __GFP_ZERO, 0);
1931
1932 /*
1933 * get_zeroed_page() returns a 32-bit address, which cannot represent
1934 * a highmem page
1935 */
1936 VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
1937
1938 page = alloc_pages(gfp_mask | __GFP_ZERO, 0);
1939 if (page)
1940 return (unsigned long) page_address(page);
1941 return 0;
1942} 1992}
1943
1944EXPORT_SYMBOL(get_zeroed_page); 1993EXPORT_SYMBOL(get_zeroed_page);
1945 1994
1946void __pagevec_free(struct pagevec *pvec) 1995void __pagevec_free(struct pagevec *pvec)
1947{ 1996{
1948 int i = pagevec_count(pvec); 1997 int i = pagevec_count(pvec);
1949 1998
1950 while (--i >= 0) 1999 while (--i >= 0) {
2000 trace_mm_pagevec_free(pvec->pages[i], pvec->cold);
1951 free_hot_cold_page(pvec->pages[i], pvec->cold); 2001 free_hot_cold_page(pvec->pages[i], pvec->cold);
2002 }
1952} 2003}
1953 2004
1954void __free_pages(struct page *page, unsigned int order) 2005void __free_pages(struct page *page, unsigned int order)
1955{ 2006{
1956 if (put_page_testzero(page)) { 2007 if (put_page_testzero(page)) {
2008 trace_mm_page_free_direct(page, order);
1957 if (order == 0) 2009 if (order == 0)
1958 free_hot_page(page); 2010 free_hot_page(page);
1959 else 2011 else
@@ -2128,23 +2180,28 @@ void show_free_areas(void)
2128 } 2180 }
2129 } 2181 }
2130 2182
2131 printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n" 2183 printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
2132 " inactive_file:%lu" 2184 " active_file:%lu inactive_file:%lu isolated_file:%lu\n"
2133 " unevictable:%lu" 2185 " unevictable:%lu"
2134 " dirty:%lu writeback:%lu unstable:%lu\n" 2186 " dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n"
2135 " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n", 2187 " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n"
2188 " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n",
2136 global_page_state(NR_ACTIVE_ANON), 2189 global_page_state(NR_ACTIVE_ANON),
2137 global_page_state(NR_ACTIVE_FILE),
2138 global_page_state(NR_INACTIVE_ANON), 2190 global_page_state(NR_INACTIVE_ANON),
2191 global_page_state(NR_ISOLATED_ANON),
2192 global_page_state(NR_ACTIVE_FILE),
2139 global_page_state(NR_INACTIVE_FILE), 2193 global_page_state(NR_INACTIVE_FILE),
2194 global_page_state(NR_ISOLATED_FILE),
2140 global_page_state(NR_UNEVICTABLE), 2195 global_page_state(NR_UNEVICTABLE),
2141 global_page_state(NR_FILE_DIRTY), 2196 global_page_state(NR_FILE_DIRTY),
2142 global_page_state(NR_WRITEBACK), 2197 global_page_state(NR_WRITEBACK),
2143 global_page_state(NR_UNSTABLE_NFS), 2198 global_page_state(NR_UNSTABLE_NFS),
2199 nr_blockdev_pages(),
2144 global_page_state(NR_FREE_PAGES), 2200 global_page_state(NR_FREE_PAGES),
2145 global_page_state(NR_SLAB_RECLAIMABLE) + 2201 global_page_state(NR_SLAB_RECLAIMABLE),
2146 global_page_state(NR_SLAB_UNRECLAIMABLE), 2202 global_page_state(NR_SLAB_UNRECLAIMABLE),
2147 global_page_state(NR_FILE_MAPPED), 2203 global_page_state(NR_FILE_MAPPED),
2204 global_page_state(NR_SHMEM),
2148 global_page_state(NR_PAGETABLE), 2205 global_page_state(NR_PAGETABLE),
2149 global_page_state(NR_BOUNCE)); 2206 global_page_state(NR_BOUNCE));
2150 2207
@@ -2162,7 +2219,21 @@ void show_free_areas(void)
2162 " active_file:%lukB" 2219 " active_file:%lukB"
2163 " inactive_file:%lukB" 2220 " inactive_file:%lukB"
2164 " unevictable:%lukB" 2221 " unevictable:%lukB"
2222 " isolated(anon):%lukB"
2223 " isolated(file):%lukB"
2165 " present:%lukB" 2224 " present:%lukB"
2225 " mlocked:%lukB"
2226 " dirty:%lukB"
2227 " writeback:%lukB"
2228 " mapped:%lukB"
2229 " shmem:%lukB"
2230 " slab_reclaimable:%lukB"
2231 " slab_unreclaimable:%lukB"
2232 " kernel_stack:%lukB"
2233 " pagetables:%lukB"
2234 " unstable:%lukB"
2235 " bounce:%lukB"
2236 " writeback_tmp:%lukB"
2166 " pages_scanned:%lu" 2237 " pages_scanned:%lu"
2167 " all_unreclaimable? %s" 2238 " all_unreclaimable? %s"
2168 "\n", 2239 "\n",
@@ -2176,7 +2247,22 @@ void show_free_areas(void)
2176 K(zone_page_state(zone, NR_ACTIVE_FILE)), 2247 K(zone_page_state(zone, NR_ACTIVE_FILE)),
2177 K(zone_page_state(zone, NR_INACTIVE_FILE)), 2248 K(zone_page_state(zone, NR_INACTIVE_FILE)),
2178 K(zone_page_state(zone, NR_UNEVICTABLE)), 2249 K(zone_page_state(zone, NR_UNEVICTABLE)),
2250 K(zone_page_state(zone, NR_ISOLATED_ANON)),
2251 K(zone_page_state(zone, NR_ISOLATED_FILE)),
2179 K(zone->present_pages), 2252 K(zone->present_pages),
2253 K(zone_page_state(zone, NR_MLOCK)),
2254 K(zone_page_state(zone, NR_FILE_DIRTY)),
2255 K(zone_page_state(zone, NR_WRITEBACK)),
2256 K(zone_page_state(zone, NR_FILE_MAPPED)),
2257 K(zone_page_state(zone, NR_SHMEM)),
2258 K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)),
2259 K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)),
2260 zone_page_state(zone, NR_KERNEL_STACK) *
2261 THREAD_SIZE / 1024,
2262 K(zone_page_state(zone, NR_PAGETABLE)),
2263 K(zone_page_state(zone, NR_UNSTABLE_NFS)),
2264 K(zone_page_state(zone, NR_BOUNCE)),
2265 K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
2180 zone->pages_scanned, 2266 zone->pages_scanned,
2181 (zone_is_all_unreclaimable(zone) ? "yes" : "no") 2267 (zone_is_all_unreclaimable(zone) ? "yes" : "no")
2182 ); 2268 );
@@ -2305,7 +2391,7 @@ early_param("numa_zonelist_order", setup_numa_zonelist_order);
2305 * sysctl handler for numa_zonelist_order 2391 * sysctl handler for numa_zonelist_order
2306 */ 2392 */
2307int numa_zonelist_order_handler(ctl_table *table, int write, 2393int numa_zonelist_order_handler(ctl_table *table, int write,
2308 struct file *file, void __user *buffer, size_t *length, 2394 void __user *buffer, size_t *length,
2309 loff_t *ppos) 2395 loff_t *ppos)
2310{ 2396{
2311 char saved_string[NUMA_ZONELIST_ORDER_LEN]; 2397 char saved_string[NUMA_ZONELIST_ORDER_LEN];
@@ -2314,7 +2400,7 @@ int numa_zonelist_order_handler(ctl_table *table, int write,
2314 if (write) 2400 if (write)
2315 strncpy(saved_string, (char*)table->data, 2401 strncpy(saved_string, (char*)table->data,
2316 NUMA_ZONELIST_ORDER_LEN); 2402 NUMA_ZONELIST_ORDER_LEN);
2317 ret = proc_dostring(table, write, file, buffer, length, ppos); 2403 ret = proc_dostring(table, write, buffer, length, ppos);
2318 if (ret) 2404 if (ret)
2319 return ret; 2405 return ret;
2320 if (write) { 2406 if (write) {
@@ -2783,7 +2869,8 @@ static void setup_zone_migrate_reserve(struct zone *zone)
2783{ 2869{
2784 unsigned long start_pfn, pfn, end_pfn; 2870 unsigned long start_pfn, pfn, end_pfn;
2785 struct page *page; 2871 struct page *page;
2786 unsigned long reserve, block_migratetype; 2872 unsigned long block_migratetype;
2873 int reserve;
2787 2874
2788 /* Get the start pfn, end pfn and the number of blocks to reserve */ 2875 /* Get the start pfn, end pfn and the number of blocks to reserve */
2789 start_pfn = zone->zone_start_pfn; 2876 start_pfn = zone->zone_start_pfn;
@@ -2791,6 +2878,15 @@ static void setup_zone_migrate_reserve(struct zone *zone)
2791 reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> 2878 reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
2792 pageblock_order; 2879 pageblock_order;
2793 2880
2881 /*
2882 * Reserve blocks are generally in place to help high-order atomic
2883 * allocations that are short-lived. A min_free_kbytes value that
2884 * would result in more than 2 reserve blocks for atomic allocations
2885 * is assumed to be in place to help anti-fragmentation for the
2886 * future allocation of hugepages at runtime.
2887 */
2888 reserve = min(2, reserve);
2889
2794 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { 2890 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
2795 if (!pfn_valid(pfn)) 2891 if (!pfn_valid(pfn))
2796 continue; 2892 continue;
@@ -2961,6 +3057,7 @@ static int zone_batchsize(struct zone *zone)
2961static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) 3057static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
2962{ 3058{
2963 struct per_cpu_pages *pcp; 3059 struct per_cpu_pages *pcp;
3060 int migratetype;
2964 3061
2965 memset(p, 0, sizeof(*p)); 3062 memset(p, 0, sizeof(*p));
2966 3063
@@ -2968,7 +3065,8 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
2968 pcp->count = 0; 3065 pcp->count = 0;
2969 pcp->high = 6 * batch; 3066 pcp->high = 6 * batch;
2970 pcp->batch = max(1UL, 1 * batch); 3067 pcp->batch = max(1UL, 1 * batch);
2971 INIT_LIST_HEAD(&pcp->list); 3068 for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++)
3069 INIT_LIST_HEAD(&pcp->lists[migratetype]);
2972} 3070}
2973 3071
2974/* 3072/*
@@ -3146,6 +3244,32 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
3146 return 0; 3244 return 0;
3147} 3245}
3148 3246
3247static int __zone_pcp_update(void *data)
3248{
3249 struct zone *zone = data;
3250 int cpu;
3251 unsigned long batch = zone_batchsize(zone), flags;
3252
3253 for (cpu = 0; cpu < NR_CPUS; cpu++) {
3254 struct per_cpu_pageset *pset;
3255 struct per_cpu_pages *pcp;
3256
3257 pset = zone_pcp(zone, cpu);
3258 pcp = &pset->pcp;
3259
3260 local_irq_save(flags);
3261 free_pcppages_bulk(zone, pcp->count, pcp);
3262 setup_pageset(pset, batch);
3263 local_irq_restore(flags);
3264 }
3265 return 0;
3266}
3267
3268void zone_pcp_update(struct zone *zone)
3269{
3270 stop_machine(__zone_pcp_update, zone, NULL);
3271}
3272
3149static __meminit void zone_pcp_init(struct zone *zone) 3273static __meminit void zone_pcp_init(struct zone *zone)
3150{ 3274{
3151 int cpu; 3275 int cpu;
@@ -3720,7 +3844,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
3720 zone_pcp_init(zone); 3844 zone_pcp_init(zone);
3721 for_each_lru(l) { 3845 for_each_lru(l) {
3722 INIT_LIST_HEAD(&zone->lru[l].list); 3846 INIT_LIST_HEAD(&zone->lru[l].list);
3723 zone->lru[l].nr_saved_scan = 0; 3847 zone->reclaim_stat.nr_saved_scan[l] = 0;
3724 } 3848 }
3725 zone->reclaim_stat.recent_rotated[0] = 0; 3849 zone->reclaim_stat.recent_rotated[0] = 0;
3726 zone->reclaim_stat.recent_rotated[1] = 0; 3850 zone->reclaim_stat.recent_rotated[1] = 0;
@@ -4509,7 +4633,7 @@ void setup_per_zone_wmarks(void)
4509 calculate_totalreserve_pages(); 4633 calculate_totalreserve_pages();
4510} 4634}
4511 4635
4512/** 4636/*
4513 * The inactive anon list should be small enough that the VM never has to 4637 * The inactive anon list should be small enough that the VM never has to
4514 * do too much work, but large enough that each inactive page has a chance 4638 * do too much work, but large enough that each inactive page has a chance
4515 * to be referenced again before it is swapped out. 4639 * to be referenced again before it is swapped out.
@@ -4600,9 +4724,9 @@ module_init(init_per_zone_wmark_min)
4600 * changes. 4724 * changes.
4601 */ 4725 */
4602int min_free_kbytes_sysctl_handler(ctl_table *table, int write, 4726int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
4603 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 4727 void __user *buffer, size_t *length, loff_t *ppos)
4604{ 4728{
4605 proc_dointvec(table, write, file, buffer, length, ppos); 4729 proc_dointvec(table, write, buffer, length, ppos);
4606 if (write) 4730 if (write)
4607 setup_per_zone_wmarks(); 4731 setup_per_zone_wmarks();
4608 return 0; 4732 return 0;
@@ -4610,12 +4734,12 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
4610 4734
4611#ifdef CONFIG_NUMA 4735#ifdef CONFIG_NUMA
4612int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, 4736int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
4613 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 4737 void __user *buffer, size_t *length, loff_t *ppos)
4614{ 4738{
4615 struct zone *zone; 4739 struct zone *zone;
4616 int rc; 4740 int rc;
4617 4741
4618 rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); 4742 rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
4619 if (rc) 4743 if (rc)
4620 return rc; 4744 return rc;
4621 4745
@@ -4626,12 +4750,12 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
4626} 4750}
4627 4751
4628int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, 4752int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
4629 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 4753 void __user *buffer, size_t *length, loff_t *ppos)
4630{ 4754{
4631 struct zone *zone; 4755 struct zone *zone;
4632 int rc; 4756 int rc;
4633 4757
4634 rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); 4758 rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
4635 if (rc) 4759 if (rc)
4636 return rc; 4760 return rc;
4637 4761
@@ -4652,9 +4776,9 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
4652 * if in function of the boot time zone sizes. 4776 * if in function of the boot time zone sizes.
4653 */ 4777 */
4654int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, 4778int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
4655 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 4779 void __user *buffer, size_t *length, loff_t *ppos)
4656{ 4780{
4657 proc_dointvec_minmax(table, write, file, buffer, length, ppos); 4781 proc_dointvec_minmax(table, write, buffer, length, ppos);
4658 setup_per_zone_lowmem_reserve(); 4782 setup_per_zone_lowmem_reserve();
4659 return 0; 4783 return 0;
4660} 4784}
@@ -4666,13 +4790,13 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
4666 */ 4790 */
4667 4791
4668int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, 4792int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
4669 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 4793 void __user *buffer, size_t *length, loff_t *ppos)
4670{ 4794{
4671 struct zone *zone; 4795 struct zone *zone;
4672 unsigned int cpu; 4796 unsigned int cpu;
4673 int ret; 4797 int ret;
4674 4798
4675 ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos); 4799 ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
4676 if (!write || (ret == -EINVAL)) 4800 if (!write || (ret == -EINVAL))
4677 return ret; 4801 return ret;
4678 for_each_populated_zone(zone) { 4802 for_each_populated_zone(zone) {
@@ -4732,7 +4856,14 @@ void *__init alloc_large_system_hash(const char *tablename,
4732 numentries <<= (PAGE_SHIFT - scale); 4856 numentries <<= (PAGE_SHIFT - scale);
4733 4857
4734 /* Make sure we've got at least a 0-order allocation.. */ 4858 /* Make sure we've got at least a 0-order allocation.. */
4735 if (unlikely((numentries * bucketsize) < PAGE_SIZE)) 4859 if (unlikely(flags & HASH_SMALL)) {
4860 /* Makes no sense without HASH_EARLY */
4861 WARN_ON(!(flags & HASH_EARLY));
4862 if (!(numentries >> *_hash_shift)) {
4863 numentries = 1UL << *_hash_shift;
4864 BUG_ON(!numentries);
4865 }
4866 } else if (unlikely((numentries * bucketsize) < PAGE_SIZE))
4736 numentries = PAGE_SIZE / bucketsize; 4867 numentries = PAGE_SIZE / bucketsize;
4737 } 4868 }
4738 numentries = roundup_pow_of_two(numentries); 4869 numentries = roundup_pow_of_two(numentries);
@@ -4874,13 +5005,16 @@ int set_migratetype_isolate(struct page *page)
4874 struct zone *zone; 5005 struct zone *zone;
4875 unsigned long flags; 5006 unsigned long flags;
4876 int ret = -EBUSY; 5007 int ret = -EBUSY;
5008 int zone_idx;
4877 5009
4878 zone = page_zone(page); 5010 zone = page_zone(page);
5011 zone_idx = zone_idx(zone);
4879 spin_lock_irqsave(&zone->lock, flags); 5012 spin_lock_irqsave(&zone->lock, flags);
4880 /* 5013 /*
4881 * In future, more migrate types will be able to be isolation target. 5014 * In future, more migrate types will be able to be isolation target.
4882 */ 5015 */
4883 if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE) 5016 if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE &&
5017 zone_idx != ZONE_MOVABLE)
4884 goto out; 5018 goto out;
4885 set_pageblock_migratetype(page, MIGRATE_ISOLATE); 5019 set_pageblock_migratetype(page, MIGRATE_ISOLATE);
4886 move_freepages_block(zone, page, MIGRATE_ISOLATE); 5020 move_freepages_block(zone, page, MIGRATE_ISOLATE);