diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 308 |
1 files changed, 212 insertions, 96 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9faa7ad95ac..bf720550b44 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #include <linux/page_cgroup.h> | 48 | #include <linux/page_cgroup.h> |
49 | #include <linux/debugobjects.h> | 49 | #include <linux/debugobjects.h> |
50 | #include <linux/kmemleak.h> | 50 | #include <linux/kmemleak.h> |
51 | #include <trace/events/kmem.h> | ||
51 | 52 | ||
52 | #include <asm/tlbflush.h> | 53 | #include <asm/tlbflush.h> |
53 | #include <asm/div64.h> | 54 | #include <asm/div64.h> |
@@ -71,7 +72,6 @@ EXPORT_SYMBOL(node_states); | |||
71 | 72 | ||
72 | unsigned long totalram_pages __read_mostly; | 73 | unsigned long totalram_pages __read_mostly; |
73 | unsigned long totalreserve_pages __read_mostly; | 74 | unsigned long totalreserve_pages __read_mostly; |
74 | unsigned long highest_memmap_pfn __read_mostly; | ||
75 | int percpu_pagelist_fraction; | 75 | int percpu_pagelist_fraction; |
76 | gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; | 76 | gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; |
77 | 77 | ||
@@ -123,8 +123,8 @@ static char * const zone_names[MAX_NR_ZONES] = { | |||
123 | 123 | ||
124 | int min_free_kbytes = 1024; | 124 | int min_free_kbytes = 1024; |
125 | 125 | ||
126 | unsigned long __meminitdata nr_kernel_pages; | 126 | static unsigned long __meminitdata nr_kernel_pages; |
127 | unsigned long __meminitdata nr_all_pages; | 127 | static unsigned long __meminitdata nr_all_pages; |
128 | static unsigned long __meminitdata dma_reserve; | 128 | static unsigned long __meminitdata dma_reserve; |
129 | 129 | ||
130 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP | 130 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP |
@@ -516,7 +516,7 @@ static inline int free_pages_check(struct page *page) | |||
516 | } | 516 | } |
517 | 517 | ||
518 | /* | 518 | /* |
519 | * Frees a list of pages. | 519 | * Frees a number of pages from the PCP lists |
520 | * Assumes all pages on list are in same zone, and of same order. | 520 | * Assumes all pages on list are in same zone, and of same order. |
521 | * count is the number of pages to free. | 521 | * count is the number of pages to free. |
522 | * | 522 | * |
@@ -526,22 +526,42 @@ static inline int free_pages_check(struct page *page) | |||
526 | * And clear the zone's pages_scanned counter, to hold off the "all pages are | 526 | * And clear the zone's pages_scanned counter, to hold off the "all pages are |
527 | * pinned" detection logic. | 527 | * pinned" detection logic. |
528 | */ | 528 | */ |
529 | static void free_pages_bulk(struct zone *zone, int count, | 529 | static void free_pcppages_bulk(struct zone *zone, int count, |
530 | struct list_head *list, int order) | 530 | struct per_cpu_pages *pcp) |
531 | { | 531 | { |
532 | int migratetype = 0; | ||
533 | int batch_free = 0; | ||
534 | |||
532 | spin_lock(&zone->lock); | 535 | spin_lock(&zone->lock); |
533 | zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); | 536 | zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); |
534 | zone->pages_scanned = 0; | 537 | zone->pages_scanned = 0; |
535 | 538 | ||
536 | __mod_zone_page_state(zone, NR_FREE_PAGES, count << order); | 539 | __mod_zone_page_state(zone, NR_FREE_PAGES, count); |
537 | while (count--) { | 540 | while (count) { |
538 | struct page *page; | 541 | struct page *page; |
542 | struct list_head *list; | ||
539 | 543 | ||
540 | VM_BUG_ON(list_empty(list)); | 544 | /* |
541 | page = list_entry(list->prev, struct page, lru); | 545 | * Remove pages from lists in a round-robin fashion. A |
542 | /* have to delete it as __free_one_page list manipulates */ | 546 | * batch_free count is maintained that is incremented when an |
543 | list_del(&page->lru); | 547 | * empty list is encountered. This is so more pages are freed |
544 | __free_one_page(page, zone, order, page_private(page)); | 548 | * off fuller lists instead of spinning excessively around empty |
549 | * lists | ||
550 | */ | ||
551 | do { | ||
552 | batch_free++; | ||
553 | if (++migratetype == MIGRATE_PCPTYPES) | ||
554 | migratetype = 0; | ||
555 | list = &pcp->lists[migratetype]; | ||
556 | } while (list_empty(list)); | ||
557 | |||
558 | do { | ||
559 | page = list_entry(list->prev, struct page, lru); | ||
560 | /* must delete as __free_one_page list manipulates */ | ||
561 | list_del(&page->lru); | ||
562 | __free_one_page(page, zone, 0, migratetype); | ||
563 | trace_mm_page_pcpu_drain(page, 0, migratetype); | ||
564 | } while (--count && --batch_free && !list_empty(list)); | ||
545 | } | 565 | } |
546 | spin_unlock(&zone->lock); | 566 | spin_unlock(&zone->lock); |
547 | } | 567 | } |
@@ -563,7 +583,7 @@ static void __free_pages_ok(struct page *page, unsigned int order) | |||
563 | unsigned long flags; | 583 | unsigned long flags; |
564 | int i; | 584 | int i; |
565 | int bad = 0; | 585 | int bad = 0; |
566 | int wasMlocked = TestClearPageMlocked(page); | 586 | int wasMlocked = __TestClearPageMlocked(page); |
567 | 587 | ||
568 | kmemcheck_free_shadow(page, order); | 588 | kmemcheck_free_shadow(page, order); |
569 | 589 | ||
@@ -801,6 +821,17 @@ static int move_freepages_block(struct zone *zone, struct page *page, | |||
801 | return move_freepages(zone, start_page, end_page, migratetype); | 821 | return move_freepages(zone, start_page, end_page, migratetype); |
802 | } | 822 | } |
803 | 823 | ||
824 | static void change_pageblock_range(struct page *pageblock_page, | ||
825 | int start_order, int migratetype) | ||
826 | { | ||
827 | int nr_pageblocks = 1 << (start_order - pageblock_order); | ||
828 | |||
829 | while (nr_pageblocks--) { | ||
830 | set_pageblock_migratetype(pageblock_page, migratetype); | ||
831 | pageblock_page += pageblock_nr_pages; | ||
832 | } | ||
833 | } | ||
834 | |||
804 | /* Remove an element from the buddy allocator from the fallback list */ | 835 | /* Remove an element from the buddy allocator from the fallback list */ |
805 | static inline struct page * | 836 | static inline struct page * |
806 | __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) | 837 | __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) |
@@ -854,11 +885,16 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) | |||
854 | list_del(&page->lru); | 885 | list_del(&page->lru); |
855 | rmv_page_order(page); | 886 | rmv_page_order(page); |
856 | 887 | ||
857 | if (current_order == pageblock_order) | 888 | /* Take ownership for orders >= pageblock_order */ |
858 | set_pageblock_migratetype(page, | 889 | if (current_order >= pageblock_order) |
890 | change_pageblock_range(page, current_order, | ||
859 | start_migratetype); | 891 | start_migratetype); |
860 | 892 | ||
861 | expand(zone, page, order, current_order, area, migratetype); | 893 | expand(zone, page, order, current_order, area, migratetype); |
894 | |||
895 | trace_mm_page_alloc_extfrag(page, order, current_order, | ||
896 | start_migratetype, migratetype); | ||
897 | |||
862 | return page; | 898 | return page; |
863 | } | 899 | } |
864 | } | 900 | } |
@@ -892,6 +928,7 @@ retry_reserve: | |||
892 | } | 928 | } |
893 | } | 929 | } |
894 | 930 | ||
931 | trace_mm_page_alloc_zone_locked(page, order, migratetype); | ||
895 | return page; | 932 | return page; |
896 | } | 933 | } |
897 | 934 | ||
@@ -952,7 +989,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) | |||
952 | to_drain = pcp->batch; | 989 | to_drain = pcp->batch; |
953 | else | 990 | else |
954 | to_drain = pcp->count; | 991 | to_drain = pcp->count; |
955 | free_pages_bulk(zone, to_drain, &pcp->list, 0); | 992 | free_pcppages_bulk(zone, to_drain, pcp); |
956 | pcp->count -= to_drain; | 993 | pcp->count -= to_drain; |
957 | local_irq_restore(flags); | 994 | local_irq_restore(flags); |
958 | } | 995 | } |
@@ -978,7 +1015,7 @@ static void drain_pages(unsigned int cpu) | |||
978 | 1015 | ||
979 | pcp = &pset->pcp; | 1016 | pcp = &pset->pcp; |
980 | local_irq_save(flags); | 1017 | local_irq_save(flags); |
981 | free_pages_bulk(zone, pcp->count, &pcp->list, 0); | 1018 | free_pcppages_bulk(zone, pcp->count, pcp); |
982 | pcp->count = 0; | 1019 | pcp->count = 0; |
983 | local_irq_restore(flags); | 1020 | local_irq_restore(flags); |
984 | } | 1021 | } |
@@ -1044,7 +1081,8 @@ static void free_hot_cold_page(struct page *page, int cold) | |||
1044 | struct zone *zone = page_zone(page); | 1081 | struct zone *zone = page_zone(page); |
1045 | struct per_cpu_pages *pcp; | 1082 | struct per_cpu_pages *pcp; |
1046 | unsigned long flags; | 1083 | unsigned long flags; |
1047 | int wasMlocked = TestClearPageMlocked(page); | 1084 | int migratetype; |
1085 | int wasMlocked = __TestClearPageMlocked(page); | ||
1048 | 1086 | ||
1049 | kmemcheck_free_shadow(page, 0); | 1087 | kmemcheck_free_shadow(page, 0); |
1050 | 1088 | ||
@@ -1061,35 +1099,49 @@ static void free_hot_cold_page(struct page *page, int cold) | |||
1061 | kernel_map_pages(page, 1, 0); | 1099 | kernel_map_pages(page, 1, 0); |
1062 | 1100 | ||
1063 | pcp = &zone_pcp(zone, get_cpu())->pcp; | 1101 | pcp = &zone_pcp(zone, get_cpu())->pcp; |
1064 | set_page_private(page, get_pageblock_migratetype(page)); | 1102 | migratetype = get_pageblock_migratetype(page); |
1103 | set_page_private(page, migratetype); | ||
1065 | local_irq_save(flags); | 1104 | local_irq_save(flags); |
1066 | if (unlikely(wasMlocked)) | 1105 | if (unlikely(wasMlocked)) |
1067 | free_page_mlock(page); | 1106 | free_page_mlock(page); |
1068 | __count_vm_event(PGFREE); | 1107 | __count_vm_event(PGFREE); |
1069 | 1108 | ||
1109 | /* | ||
1110 | * We only track unmovable, reclaimable and movable on pcp lists. | ||
1111 | * Free ISOLATE pages back to the allocator because they are being | ||
1112 | * offlined but treat RESERVE as movable pages so we can get those | ||
1113 | * areas back if necessary. Otherwise, we may have to free | ||
1114 | * excessively into the page allocator | ||
1115 | */ | ||
1116 | if (migratetype >= MIGRATE_PCPTYPES) { | ||
1117 | if (unlikely(migratetype == MIGRATE_ISOLATE)) { | ||
1118 | free_one_page(zone, page, 0, migratetype); | ||
1119 | goto out; | ||
1120 | } | ||
1121 | migratetype = MIGRATE_MOVABLE; | ||
1122 | } | ||
1123 | |||
1070 | if (cold) | 1124 | if (cold) |
1071 | list_add_tail(&page->lru, &pcp->list); | 1125 | list_add_tail(&page->lru, &pcp->lists[migratetype]); |
1072 | else | 1126 | else |
1073 | list_add(&page->lru, &pcp->list); | 1127 | list_add(&page->lru, &pcp->lists[migratetype]); |
1074 | pcp->count++; | 1128 | pcp->count++; |
1075 | if (pcp->count >= pcp->high) { | 1129 | if (pcp->count >= pcp->high) { |
1076 | free_pages_bulk(zone, pcp->batch, &pcp->list, 0); | 1130 | free_pcppages_bulk(zone, pcp->batch, pcp); |
1077 | pcp->count -= pcp->batch; | 1131 | pcp->count -= pcp->batch; |
1078 | } | 1132 | } |
1133 | |||
1134 | out: | ||
1079 | local_irq_restore(flags); | 1135 | local_irq_restore(flags); |
1080 | put_cpu(); | 1136 | put_cpu(); |
1081 | } | 1137 | } |
1082 | 1138 | ||
1083 | void free_hot_page(struct page *page) | 1139 | void free_hot_page(struct page *page) |
1084 | { | 1140 | { |
1141 | trace_mm_page_free_direct(page, 0); | ||
1085 | free_hot_cold_page(page, 0); | 1142 | free_hot_cold_page(page, 0); |
1086 | } | 1143 | } |
1087 | 1144 | ||
1088 | void free_cold_page(struct page *page) | ||
1089 | { | ||
1090 | free_hot_cold_page(page, 1); | ||
1091 | } | ||
1092 | |||
1093 | /* | 1145 | /* |
1094 | * split_page takes a non-compound higher-order page, and splits it into | 1146 | * split_page takes a non-compound higher-order page, and splits it into |
1095 | * n (1<<order) sub-pages: page[0..n] | 1147 | * n (1<<order) sub-pages: page[0..n] |
@@ -1137,35 +1189,23 @@ again: | |||
1137 | cpu = get_cpu(); | 1189 | cpu = get_cpu(); |
1138 | if (likely(order == 0)) { | 1190 | if (likely(order == 0)) { |
1139 | struct per_cpu_pages *pcp; | 1191 | struct per_cpu_pages *pcp; |
1192 | struct list_head *list; | ||
1140 | 1193 | ||
1141 | pcp = &zone_pcp(zone, cpu)->pcp; | 1194 | pcp = &zone_pcp(zone, cpu)->pcp; |
1195 | list = &pcp->lists[migratetype]; | ||
1142 | local_irq_save(flags); | 1196 | local_irq_save(flags); |
1143 | if (!pcp->count) { | 1197 | if (list_empty(list)) { |
1144 | pcp->count = rmqueue_bulk(zone, 0, | 1198 | pcp->count += rmqueue_bulk(zone, 0, |
1145 | pcp->batch, &pcp->list, | 1199 | pcp->batch, list, |
1146 | migratetype, cold); | 1200 | migratetype, cold); |
1147 | if (unlikely(!pcp->count)) | 1201 | if (unlikely(list_empty(list))) |
1148 | goto failed; | 1202 | goto failed; |
1149 | } | 1203 | } |
1150 | 1204 | ||
1151 | /* Find a page of the appropriate migrate type */ | 1205 | if (cold) |
1152 | if (cold) { | 1206 | page = list_entry(list->prev, struct page, lru); |
1153 | list_for_each_entry_reverse(page, &pcp->list, lru) | 1207 | else |
1154 | if (page_private(page) == migratetype) | 1208 | page = list_entry(list->next, struct page, lru); |
1155 | break; | ||
1156 | } else { | ||
1157 | list_for_each_entry(page, &pcp->list, lru) | ||
1158 | if (page_private(page) == migratetype) | ||
1159 | break; | ||
1160 | } | ||
1161 | |||
1162 | /* Allocate more to the pcp list if necessary */ | ||
1163 | if (unlikely(&page->lru == &pcp->list)) { | ||
1164 | pcp->count += rmqueue_bulk(zone, 0, | ||
1165 | pcp->batch, &pcp->list, | ||
1166 | migratetype, cold); | ||
1167 | page = list_entry(pcp->list.next, struct page, lru); | ||
1168 | } | ||
1169 | 1209 | ||
1170 | list_del(&page->lru); | 1210 | list_del(&page->lru); |
1171 | pcp->count--; | 1211 | pcp->count--; |
@@ -1645,10 +1685,6 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, | |||
1645 | 1685 | ||
1646 | /* We now go into synchronous reclaim */ | 1686 | /* We now go into synchronous reclaim */ |
1647 | cpuset_memory_pressure_bump(); | 1687 | cpuset_memory_pressure_bump(); |
1648 | |||
1649 | /* | ||
1650 | * The task's cpuset might have expanded its set of allowable nodes | ||
1651 | */ | ||
1652 | p->flags |= PF_MEMALLOC; | 1688 | p->flags |= PF_MEMALLOC; |
1653 | lockdep_set_current_reclaim_state(gfp_mask); | 1689 | lockdep_set_current_reclaim_state(gfp_mask); |
1654 | reclaim_state.reclaimed_slab = 0; | 1690 | reclaim_state.reclaimed_slab = 0; |
@@ -1783,6 +1819,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
1783 | 1819 | ||
1784 | wake_all_kswapd(order, zonelist, high_zoneidx); | 1820 | wake_all_kswapd(order, zonelist, high_zoneidx); |
1785 | 1821 | ||
1822 | restart: | ||
1786 | /* | 1823 | /* |
1787 | * OK, we're below the kswapd watermark and have kicked background | 1824 | * OK, we're below the kswapd watermark and have kicked background |
1788 | * reclaim. Now things get more complex, so set up alloc_flags according | 1825 | * reclaim. Now things get more complex, so set up alloc_flags according |
@@ -1790,7 +1827,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
1790 | */ | 1827 | */ |
1791 | alloc_flags = gfp_to_alloc_flags(gfp_mask); | 1828 | alloc_flags = gfp_to_alloc_flags(gfp_mask); |
1792 | 1829 | ||
1793 | restart: | ||
1794 | /* This is the last chance, in general, before the goto nopage. */ | 1830 | /* This is the last chance, in general, before the goto nopage. */ |
1795 | page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, | 1831 | page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, |
1796 | high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, | 1832 | high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, |
@@ -1925,6 +1961,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
1925 | zonelist, high_zoneidx, nodemask, | 1961 | zonelist, high_zoneidx, nodemask, |
1926 | preferred_zone, migratetype); | 1962 | preferred_zone, migratetype); |
1927 | 1963 | ||
1964 | trace_mm_page_alloc(page, order, gfp_mask, migratetype); | ||
1928 | return page; | 1965 | return page; |
1929 | } | 1966 | } |
1930 | EXPORT_SYMBOL(__alloc_pages_nodemask); | 1967 | EXPORT_SYMBOL(__alloc_pages_nodemask); |
@@ -1934,44 +1971,41 @@ EXPORT_SYMBOL(__alloc_pages_nodemask); | |||
1934 | */ | 1971 | */ |
1935 | unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order) | 1972 | unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order) |
1936 | { | 1973 | { |
1937 | struct page * page; | 1974 | struct page *page; |
1975 | |||
1976 | /* | ||
1977 | * __get_free_pages() returns a 32-bit address, which cannot represent | ||
1978 | * a highmem page | ||
1979 | */ | ||
1980 | VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); | ||
1981 | |||
1938 | page = alloc_pages(gfp_mask, order); | 1982 | page = alloc_pages(gfp_mask, order); |
1939 | if (!page) | 1983 | if (!page) |
1940 | return 0; | 1984 | return 0; |
1941 | return (unsigned long) page_address(page); | 1985 | return (unsigned long) page_address(page); |
1942 | } | 1986 | } |
1943 | |||
1944 | EXPORT_SYMBOL(__get_free_pages); | 1987 | EXPORT_SYMBOL(__get_free_pages); |
1945 | 1988 | ||
1946 | unsigned long get_zeroed_page(gfp_t gfp_mask) | 1989 | unsigned long get_zeroed_page(gfp_t gfp_mask) |
1947 | { | 1990 | { |
1948 | struct page * page; | 1991 | return __get_free_pages(gfp_mask | __GFP_ZERO, 0); |
1949 | |||
1950 | /* | ||
1951 | * get_zeroed_page() returns a 32-bit address, which cannot represent | ||
1952 | * a highmem page | ||
1953 | */ | ||
1954 | VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); | ||
1955 | |||
1956 | page = alloc_pages(gfp_mask | __GFP_ZERO, 0); | ||
1957 | if (page) | ||
1958 | return (unsigned long) page_address(page); | ||
1959 | return 0; | ||
1960 | } | 1992 | } |
1961 | |||
1962 | EXPORT_SYMBOL(get_zeroed_page); | 1993 | EXPORT_SYMBOL(get_zeroed_page); |
1963 | 1994 | ||
1964 | void __pagevec_free(struct pagevec *pvec) | 1995 | void __pagevec_free(struct pagevec *pvec) |
1965 | { | 1996 | { |
1966 | int i = pagevec_count(pvec); | 1997 | int i = pagevec_count(pvec); |
1967 | 1998 | ||
1968 | while (--i >= 0) | 1999 | while (--i >= 0) { |
2000 | trace_mm_pagevec_free(pvec->pages[i], pvec->cold); | ||
1969 | free_hot_cold_page(pvec->pages[i], pvec->cold); | 2001 | free_hot_cold_page(pvec->pages[i], pvec->cold); |
2002 | } | ||
1970 | } | 2003 | } |
1971 | 2004 | ||
1972 | void __free_pages(struct page *page, unsigned int order) | 2005 | void __free_pages(struct page *page, unsigned int order) |
1973 | { | 2006 | { |
1974 | if (put_page_testzero(page)) { | 2007 | if (put_page_testzero(page)) { |
2008 | trace_mm_page_free_direct(page, order); | ||
1975 | if (order == 0) | 2009 | if (order == 0) |
1976 | free_hot_page(page); | 2010 | free_hot_page(page); |
1977 | else | 2011 | else |
@@ -2146,23 +2180,28 @@ void show_free_areas(void) | |||
2146 | } | 2180 | } |
2147 | } | 2181 | } |
2148 | 2182 | ||
2149 | printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n" | 2183 | printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n" |
2150 | " inactive_file:%lu" | 2184 | " active_file:%lu inactive_file:%lu isolated_file:%lu\n" |
2151 | " unevictable:%lu" | 2185 | " unevictable:%lu" |
2152 | " dirty:%lu writeback:%lu unstable:%lu\n" | 2186 | " dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n" |
2153 | " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n", | 2187 | " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n" |
2188 | " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n", | ||
2154 | global_page_state(NR_ACTIVE_ANON), | 2189 | global_page_state(NR_ACTIVE_ANON), |
2155 | global_page_state(NR_ACTIVE_FILE), | ||
2156 | global_page_state(NR_INACTIVE_ANON), | 2190 | global_page_state(NR_INACTIVE_ANON), |
2191 | global_page_state(NR_ISOLATED_ANON), | ||
2192 | global_page_state(NR_ACTIVE_FILE), | ||
2157 | global_page_state(NR_INACTIVE_FILE), | 2193 | global_page_state(NR_INACTIVE_FILE), |
2194 | global_page_state(NR_ISOLATED_FILE), | ||
2158 | global_page_state(NR_UNEVICTABLE), | 2195 | global_page_state(NR_UNEVICTABLE), |
2159 | global_page_state(NR_FILE_DIRTY), | 2196 | global_page_state(NR_FILE_DIRTY), |
2160 | global_page_state(NR_WRITEBACK), | 2197 | global_page_state(NR_WRITEBACK), |
2161 | global_page_state(NR_UNSTABLE_NFS), | 2198 | global_page_state(NR_UNSTABLE_NFS), |
2199 | nr_blockdev_pages(), | ||
2162 | global_page_state(NR_FREE_PAGES), | 2200 | global_page_state(NR_FREE_PAGES), |
2163 | global_page_state(NR_SLAB_RECLAIMABLE) + | 2201 | global_page_state(NR_SLAB_RECLAIMABLE), |
2164 | global_page_state(NR_SLAB_UNRECLAIMABLE), | 2202 | global_page_state(NR_SLAB_UNRECLAIMABLE), |
2165 | global_page_state(NR_FILE_MAPPED), | 2203 | global_page_state(NR_FILE_MAPPED), |
2204 | global_page_state(NR_SHMEM), | ||
2166 | global_page_state(NR_PAGETABLE), | 2205 | global_page_state(NR_PAGETABLE), |
2167 | global_page_state(NR_BOUNCE)); | 2206 | global_page_state(NR_BOUNCE)); |
2168 | 2207 | ||
@@ -2180,7 +2219,21 @@ void show_free_areas(void) | |||
2180 | " active_file:%lukB" | 2219 | " active_file:%lukB" |
2181 | " inactive_file:%lukB" | 2220 | " inactive_file:%lukB" |
2182 | " unevictable:%lukB" | 2221 | " unevictable:%lukB" |
2222 | " isolated(anon):%lukB" | ||
2223 | " isolated(file):%lukB" | ||
2183 | " present:%lukB" | 2224 | " present:%lukB" |
2225 | " mlocked:%lukB" | ||
2226 | " dirty:%lukB" | ||
2227 | " writeback:%lukB" | ||
2228 | " mapped:%lukB" | ||
2229 | " shmem:%lukB" | ||
2230 | " slab_reclaimable:%lukB" | ||
2231 | " slab_unreclaimable:%lukB" | ||
2232 | " kernel_stack:%lukB" | ||
2233 | " pagetables:%lukB" | ||
2234 | " unstable:%lukB" | ||
2235 | " bounce:%lukB" | ||
2236 | " writeback_tmp:%lukB" | ||
2184 | " pages_scanned:%lu" | 2237 | " pages_scanned:%lu" |
2185 | " all_unreclaimable? %s" | 2238 | " all_unreclaimable? %s" |
2186 | "\n", | 2239 | "\n", |
@@ -2194,7 +2247,22 @@ void show_free_areas(void) | |||
2194 | K(zone_page_state(zone, NR_ACTIVE_FILE)), | 2247 | K(zone_page_state(zone, NR_ACTIVE_FILE)), |
2195 | K(zone_page_state(zone, NR_INACTIVE_FILE)), | 2248 | K(zone_page_state(zone, NR_INACTIVE_FILE)), |
2196 | K(zone_page_state(zone, NR_UNEVICTABLE)), | 2249 | K(zone_page_state(zone, NR_UNEVICTABLE)), |
2250 | K(zone_page_state(zone, NR_ISOLATED_ANON)), | ||
2251 | K(zone_page_state(zone, NR_ISOLATED_FILE)), | ||
2197 | K(zone->present_pages), | 2252 | K(zone->present_pages), |
2253 | K(zone_page_state(zone, NR_MLOCK)), | ||
2254 | K(zone_page_state(zone, NR_FILE_DIRTY)), | ||
2255 | K(zone_page_state(zone, NR_WRITEBACK)), | ||
2256 | K(zone_page_state(zone, NR_FILE_MAPPED)), | ||
2257 | K(zone_page_state(zone, NR_SHMEM)), | ||
2258 | K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)), | ||
2259 | K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)), | ||
2260 | zone_page_state(zone, NR_KERNEL_STACK) * | ||
2261 | THREAD_SIZE / 1024, | ||
2262 | K(zone_page_state(zone, NR_PAGETABLE)), | ||
2263 | K(zone_page_state(zone, NR_UNSTABLE_NFS)), | ||
2264 | K(zone_page_state(zone, NR_BOUNCE)), | ||
2265 | K(zone_page_state(zone, NR_WRITEBACK_TEMP)), | ||
2198 | zone->pages_scanned, | 2266 | zone->pages_scanned, |
2199 | (zone_is_all_unreclaimable(zone) ? "yes" : "no") | 2267 | (zone_is_all_unreclaimable(zone) ? "yes" : "no") |
2200 | ); | 2268 | ); |
@@ -2323,7 +2391,7 @@ early_param("numa_zonelist_order", setup_numa_zonelist_order); | |||
2323 | * sysctl handler for numa_zonelist_order | 2391 | * sysctl handler for numa_zonelist_order |
2324 | */ | 2392 | */ |
2325 | int numa_zonelist_order_handler(ctl_table *table, int write, | 2393 | int numa_zonelist_order_handler(ctl_table *table, int write, |
2326 | struct file *file, void __user *buffer, size_t *length, | 2394 | void __user *buffer, size_t *length, |
2327 | loff_t *ppos) | 2395 | loff_t *ppos) |
2328 | { | 2396 | { |
2329 | char saved_string[NUMA_ZONELIST_ORDER_LEN]; | 2397 | char saved_string[NUMA_ZONELIST_ORDER_LEN]; |
@@ -2332,7 +2400,7 @@ int numa_zonelist_order_handler(ctl_table *table, int write, | |||
2332 | if (write) | 2400 | if (write) |
2333 | strncpy(saved_string, (char*)table->data, | 2401 | strncpy(saved_string, (char*)table->data, |
2334 | NUMA_ZONELIST_ORDER_LEN); | 2402 | NUMA_ZONELIST_ORDER_LEN); |
2335 | ret = proc_dostring(table, write, file, buffer, length, ppos); | 2403 | ret = proc_dostring(table, write, buffer, length, ppos); |
2336 | if (ret) | 2404 | if (ret) |
2337 | return ret; | 2405 | return ret; |
2338 | if (write) { | 2406 | if (write) { |
@@ -2801,7 +2869,8 @@ static void setup_zone_migrate_reserve(struct zone *zone) | |||
2801 | { | 2869 | { |
2802 | unsigned long start_pfn, pfn, end_pfn; | 2870 | unsigned long start_pfn, pfn, end_pfn; |
2803 | struct page *page; | 2871 | struct page *page; |
2804 | unsigned long reserve, block_migratetype; | 2872 | unsigned long block_migratetype; |
2873 | int reserve; | ||
2805 | 2874 | ||
2806 | /* Get the start pfn, end pfn and the number of blocks to reserve */ | 2875 | /* Get the start pfn, end pfn and the number of blocks to reserve */ |
2807 | start_pfn = zone->zone_start_pfn; | 2876 | start_pfn = zone->zone_start_pfn; |
@@ -2809,6 +2878,15 @@ static void setup_zone_migrate_reserve(struct zone *zone) | |||
2809 | reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> | 2878 | reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> |
2810 | pageblock_order; | 2879 | pageblock_order; |
2811 | 2880 | ||
2881 | /* | ||
2882 | * Reserve blocks are generally in place to help high-order atomic | ||
2883 | * allocations that are short-lived. A min_free_kbytes value that | ||
2884 | * would result in more than 2 reserve blocks for atomic allocations | ||
2885 | * is assumed to be in place to help anti-fragmentation for the | ||
2886 | * future allocation of hugepages at runtime. | ||
2887 | */ | ||
2888 | reserve = min(2, reserve); | ||
2889 | |||
2812 | for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { | 2890 | for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { |
2813 | if (!pfn_valid(pfn)) | 2891 | if (!pfn_valid(pfn)) |
2814 | continue; | 2892 | continue; |
@@ -2979,6 +3057,7 @@ static int zone_batchsize(struct zone *zone) | |||
2979 | static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) | 3057 | static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) |
2980 | { | 3058 | { |
2981 | struct per_cpu_pages *pcp; | 3059 | struct per_cpu_pages *pcp; |
3060 | int migratetype; | ||
2982 | 3061 | ||
2983 | memset(p, 0, sizeof(*p)); | 3062 | memset(p, 0, sizeof(*p)); |
2984 | 3063 | ||
@@ -2986,7 +3065,8 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) | |||
2986 | pcp->count = 0; | 3065 | pcp->count = 0; |
2987 | pcp->high = 6 * batch; | 3066 | pcp->high = 6 * batch; |
2988 | pcp->batch = max(1UL, 1 * batch); | 3067 | pcp->batch = max(1UL, 1 * batch); |
2989 | INIT_LIST_HEAD(&pcp->list); | 3068 | for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++) |
3069 | INIT_LIST_HEAD(&pcp->lists[migratetype]); | ||
2990 | } | 3070 | } |
2991 | 3071 | ||
2992 | /* | 3072 | /* |
@@ -3164,6 +3244,32 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) | |||
3164 | return 0; | 3244 | return 0; |
3165 | } | 3245 | } |
3166 | 3246 | ||
3247 | static int __zone_pcp_update(void *data) | ||
3248 | { | ||
3249 | struct zone *zone = data; | ||
3250 | int cpu; | ||
3251 | unsigned long batch = zone_batchsize(zone), flags; | ||
3252 | |||
3253 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
3254 | struct per_cpu_pageset *pset; | ||
3255 | struct per_cpu_pages *pcp; | ||
3256 | |||
3257 | pset = zone_pcp(zone, cpu); | ||
3258 | pcp = &pset->pcp; | ||
3259 | |||
3260 | local_irq_save(flags); | ||
3261 | free_pcppages_bulk(zone, pcp->count, pcp); | ||
3262 | setup_pageset(pset, batch); | ||
3263 | local_irq_restore(flags); | ||
3264 | } | ||
3265 | return 0; | ||
3266 | } | ||
3267 | |||
3268 | void zone_pcp_update(struct zone *zone) | ||
3269 | { | ||
3270 | stop_machine(__zone_pcp_update, zone, NULL); | ||
3271 | } | ||
3272 | |||
3167 | static __meminit void zone_pcp_init(struct zone *zone) | 3273 | static __meminit void zone_pcp_init(struct zone *zone) |
3168 | { | 3274 | { |
3169 | int cpu; | 3275 | int cpu; |
@@ -3738,7 +3844,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, | |||
3738 | zone_pcp_init(zone); | 3844 | zone_pcp_init(zone); |
3739 | for_each_lru(l) { | 3845 | for_each_lru(l) { |
3740 | INIT_LIST_HEAD(&zone->lru[l].list); | 3846 | INIT_LIST_HEAD(&zone->lru[l].list); |
3741 | zone->lru[l].nr_saved_scan = 0; | 3847 | zone->reclaim_stat.nr_saved_scan[l] = 0; |
3742 | } | 3848 | } |
3743 | zone->reclaim_stat.recent_rotated[0] = 0; | 3849 | zone->reclaim_stat.recent_rotated[0] = 0; |
3744 | zone->reclaim_stat.recent_rotated[1] = 0; | 3850 | zone->reclaim_stat.recent_rotated[1] = 0; |
@@ -4527,7 +4633,7 @@ void setup_per_zone_wmarks(void) | |||
4527 | calculate_totalreserve_pages(); | 4633 | calculate_totalreserve_pages(); |
4528 | } | 4634 | } |
4529 | 4635 | ||
4530 | /** | 4636 | /* |
4531 | * The inactive anon list should be small enough that the VM never has to | 4637 | * The inactive anon list should be small enough that the VM never has to |
4532 | * do too much work, but large enough that each inactive page has a chance | 4638 | * do too much work, but large enough that each inactive page has a chance |
4533 | * to be referenced again before it is swapped out. | 4639 | * to be referenced again before it is swapped out. |
@@ -4618,9 +4724,9 @@ module_init(init_per_zone_wmark_min) | |||
4618 | * changes. | 4724 | * changes. |
4619 | */ | 4725 | */ |
4620 | int min_free_kbytes_sysctl_handler(ctl_table *table, int write, | 4726 | int min_free_kbytes_sysctl_handler(ctl_table *table, int write, |
4621 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | 4727 | void __user *buffer, size_t *length, loff_t *ppos) |
4622 | { | 4728 | { |
4623 | proc_dointvec(table, write, file, buffer, length, ppos); | 4729 | proc_dointvec(table, write, buffer, length, ppos); |
4624 | if (write) | 4730 | if (write) |
4625 | setup_per_zone_wmarks(); | 4731 | setup_per_zone_wmarks(); |
4626 | return 0; | 4732 | return 0; |
@@ -4628,12 +4734,12 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write, | |||
4628 | 4734 | ||
4629 | #ifdef CONFIG_NUMA | 4735 | #ifdef CONFIG_NUMA |
4630 | int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, | 4736 | int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, |
4631 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | 4737 | void __user *buffer, size_t *length, loff_t *ppos) |
4632 | { | 4738 | { |
4633 | struct zone *zone; | 4739 | struct zone *zone; |
4634 | int rc; | 4740 | int rc; |
4635 | 4741 | ||
4636 | rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); | 4742 | rc = proc_dointvec_minmax(table, write, buffer, length, ppos); |
4637 | if (rc) | 4743 | if (rc) |
4638 | return rc; | 4744 | return rc; |
4639 | 4745 | ||
@@ -4644,12 +4750,12 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, | |||
4644 | } | 4750 | } |
4645 | 4751 | ||
4646 | int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, | 4752 | int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, |
4647 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | 4753 | void __user *buffer, size_t *length, loff_t *ppos) |
4648 | { | 4754 | { |
4649 | struct zone *zone; | 4755 | struct zone *zone; |
4650 | int rc; | 4756 | int rc; |
4651 | 4757 | ||
4652 | rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); | 4758 | rc = proc_dointvec_minmax(table, write, buffer, length, ppos); |
4653 | if (rc) | 4759 | if (rc) |
4654 | return rc; | 4760 | return rc; |
4655 | 4761 | ||
@@ -4670,9 +4776,9 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, | |||
4670 | * if in function of the boot time zone sizes. | 4776 | * if in function of the boot time zone sizes. |
4671 | */ | 4777 | */ |
4672 | int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, | 4778 | int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, |
4673 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | 4779 | void __user *buffer, size_t *length, loff_t *ppos) |
4674 | { | 4780 | { |
4675 | proc_dointvec_minmax(table, write, file, buffer, length, ppos); | 4781 | proc_dointvec_minmax(table, write, buffer, length, ppos); |
4676 | setup_per_zone_lowmem_reserve(); | 4782 | setup_per_zone_lowmem_reserve(); |
4677 | return 0; | 4783 | return 0; |
4678 | } | 4784 | } |
@@ -4684,13 +4790,13 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, | |||
4684 | */ | 4790 | */ |
4685 | 4791 | ||
4686 | int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, | 4792 | int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, |
4687 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | 4793 | void __user *buffer, size_t *length, loff_t *ppos) |
4688 | { | 4794 | { |
4689 | struct zone *zone; | 4795 | struct zone *zone; |
4690 | unsigned int cpu; | 4796 | unsigned int cpu; |
4691 | int ret; | 4797 | int ret; |
4692 | 4798 | ||
4693 | ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos); | 4799 | ret = proc_dointvec_minmax(table, write, buffer, length, ppos); |
4694 | if (!write || (ret == -EINVAL)) | 4800 | if (!write || (ret == -EINVAL)) |
4695 | return ret; | 4801 | return ret; |
4696 | for_each_populated_zone(zone) { | 4802 | for_each_populated_zone(zone) { |
@@ -4750,7 +4856,14 @@ void *__init alloc_large_system_hash(const char *tablename, | |||
4750 | numentries <<= (PAGE_SHIFT - scale); | 4856 | numentries <<= (PAGE_SHIFT - scale); |
4751 | 4857 | ||
4752 | /* Make sure we've got at least a 0-order allocation.. */ | 4858 | /* Make sure we've got at least a 0-order allocation.. */ |
4753 | if (unlikely((numentries * bucketsize) < PAGE_SIZE)) | 4859 | if (unlikely(flags & HASH_SMALL)) { |
4860 | /* Makes no sense without HASH_EARLY */ | ||
4861 | WARN_ON(!(flags & HASH_EARLY)); | ||
4862 | if (!(numentries >> *_hash_shift)) { | ||
4863 | numentries = 1UL << *_hash_shift; | ||
4864 | BUG_ON(!numentries); | ||
4865 | } | ||
4866 | } else if (unlikely((numentries * bucketsize) < PAGE_SIZE)) | ||
4754 | numentries = PAGE_SIZE / bucketsize; | 4867 | numentries = PAGE_SIZE / bucketsize; |
4755 | } | 4868 | } |
4756 | numentries = roundup_pow_of_two(numentries); | 4869 | numentries = roundup_pow_of_two(numentries); |
@@ -4892,13 +5005,16 @@ int set_migratetype_isolate(struct page *page) | |||
4892 | struct zone *zone; | 5005 | struct zone *zone; |
4893 | unsigned long flags; | 5006 | unsigned long flags; |
4894 | int ret = -EBUSY; | 5007 | int ret = -EBUSY; |
5008 | int zone_idx; | ||
4895 | 5009 | ||
4896 | zone = page_zone(page); | 5010 | zone = page_zone(page); |
5011 | zone_idx = zone_idx(zone); | ||
4897 | spin_lock_irqsave(&zone->lock, flags); | 5012 | spin_lock_irqsave(&zone->lock, flags); |
4898 | /* | 5013 | /* |
4899 | * In future, more migrate types will be able to be isolation target. | 5014 | * In future, more migrate types will be able to be isolation target. |
4900 | */ | 5015 | */ |
4901 | if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE) | 5016 | if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE && |
5017 | zone_idx != ZONE_MOVABLE) | ||
4902 | goto out; | 5018 | goto out; |
4903 | set_pageblock_migratetype(page, MIGRATE_ISOLATE); | 5019 | set_pageblock_migratetype(page, MIGRATE_ISOLATE); |
4904 | move_freepages_block(zone, page, MIGRATE_ISOLATE); | 5020 | move_freepages_block(zone, page, MIGRATE_ISOLATE); |