diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 848 |
1 files changed, 339 insertions, 509 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9dd443d89d8b..d2186ecb36f7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -57,6 +57,7 @@ | |||
57 | #include <linux/ftrace_event.h> | 57 | #include <linux/ftrace_event.h> |
58 | #include <linux/memcontrol.h> | 58 | #include <linux/memcontrol.h> |
59 | #include <linux/prefetch.h> | 59 | #include <linux/prefetch.h> |
60 | #include <linux/page-debug-flags.h> | ||
60 | 61 | ||
61 | #include <asm/tlbflush.h> | 62 | #include <asm/tlbflush.h> |
62 | #include <asm/div64.h> | 63 | #include <asm/div64.h> |
@@ -96,6 +97,14 @@ EXPORT_SYMBOL(node_states); | |||
96 | 97 | ||
97 | unsigned long totalram_pages __read_mostly; | 98 | unsigned long totalram_pages __read_mostly; |
98 | unsigned long totalreserve_pages __read_mostly; | 99 | unsigned long totalreserve_pages __read_mostly; |
100 | /* | ||
101 | * When calculating the number of globally allowed dirty pages, there | ||
102 | * is a certain number of per-zone reserves that should not be | ||
103 | * considered dirtyable memory. This is the sum of those reserves | ||
104 | * over all existing zones that contribute dirtyable memory. | ||
105 | */ | ||
106 | unsigned long dirty_balance_reserve __read_mostly; | ||
107 | |||
99 | int percpu_pagelist_fraction; | 108 | int percpu_pagelist_fraction; |
100 | gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; | 109 | gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; |
101 | 110 | ||
@@ -127,6 +136,13 @@ void pm_restrict_gfp_mask(void) | |||
127 | saved_gfp_mask = gfp_allowed_mask; | 136 | saved_gfp_mask = gfp_allowed_mask; |
128 | gfp_allowed_mask &= ~GFP_IOFS; | 137 | gfp_allowed_mask &= ~GFP_IOFS; |
129 | } | 138 | } |
139 | |||
140 | bool pm_suspended_storage(void) | ||
141 | { | ||
142 | if ((gfp_allowed_mask & GFP_IOFS) == GFP_IOFS) | ||
143 | return false; | ||
144 | return true; | ||
145 | } | ||
130 | #endif /* CONFIG_PM_SLEEP */ | 146 | #endif /* CONFIG_PM_SLEEP */ |
131 | 147 | ||
132 | #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE | 148 | #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE |
@@ -181,39 +197,17 @@ static unsigned long __meminitdata nr_kernel_pages; | |||
181 | static unsigned long __meminitdata nr_all_pages; | 197 | static unsigned long __meminitdata nr_all_pages; |
182 | static unsigned long __meminitdata dma_reserve; | 198 | static unsigned long __meminitdata dma_reserve; |
183 | 199 | ||
184 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP | 200 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
185 | /* | 201 | static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; |
186 | * MAX_ACTIVE_REGIONS determines the maximum number of distinct | 202 | static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; |
187 | * ranges of memory (RAM) that may be registered with add_active_range(). | 203 | static unsigned long __initdata required_kernelcore; |
188 | * Ranges passed to add_active_range() will be merged if possible | 204 | static unsigned long __initdata required_movablecore; |
189 | * so the number of times add_active_range() can be called is | 205 | static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES]; |
190 | * related to the number of nodes and the number of holes | 206 | |
191 | */ | 207 | /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ |
192 | #ifdef CONFIG_MAX_ACTIVE_REGIONS | 208 | int movable_zone; |
193 | /* Allow an architecture to set MAX_ACTIVE_REGIONS to save memory */ | 209 | EXPORT_SYMBOL(movable_zone); |
194 | #define MAX_ACTIVE_REGIONS CONFIG_MAX_ACTIVE_REGIONS | 210 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ |
195 | #else | ||
196 | #if MAX_NUMNODES >= 32 | ||
197 | /* If there can be many nodes, allow up to 50 holes per node */ | ||
198 | #define MAX_ACTIVE_REGIONS (MAX_NUMNODES*50) | ||
199 | #else | ||
200 | /* By default, allow up to 256 distinct regions */ | ||
201 | #define MAX_ACTIVE_REGIONS 256 | ||
202 | #endif | ||
203 | #endif | ||
204 | |||
205 | static struct node_active_region __meminitdata early_node_map[MAX_ACTIVE_REGIONS]; | ||
206 | static int __meminitdata nr_nodemap_entries; | ||
207 | static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; | ||
208 | static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; | ||
209 | static unsigned long __initdata required_kernelcore; | ||
210 | static unsigned long __initdata required_movablecore; | ||
211 | static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES]; | ||
212 | |||
213 | /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ | ||
214 | int movable_zone; | ||
215 | EXPORT_SYMBOL(movable_zone); | ||
216 | #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ | ||
217 | 211 | ||
218 | #if MAX_NUMNODES > 1 | 212 | #if MAX_NUMNODES > 1 |
219 | int nr_node_ids __read_mostly = MAX_NUMNODES; | 213 | int nr_node_ids __read_mostly = MAX_NUMNODES; |
@@ -333,8 +327,8 @@ out: | |||
333 | * | 327 | * |
334 | * The remaining PAGE_SIZE pages are called "tail pages". | 328 | * The remaining PAGE_SIZE pages are called "tail pages". |
335 | * | 329 | * |
336 | * All pages have PG_compound set. All pages have their ->private pointing at | 330 | * All pages have PG_compound set. All tail pages have their ->first_page |
337 | * the head page (even the head page has this). | 331 | * pointing at the head page. |
338 | * | 332 | * |
339 | * The first tail page's ->lru.next holds the address of the compound page's | 333 | * The first tail page's ->lru.next holds the address of the compound page's |
340 | * put_page() function. Its ->lru.prev holds the order of allocation. | 334 | * put_page() function. Its ->lru.prev holds the order of allocation. |
@@ -356,8 +350,8 @@ void prep_compound_page(struct page *page, unsigned long order) | |||
356 | __SetPageHead(page); | 350 | __SetPageHead(page); |
357 | for (i = 1; i < nr_pages; i++) { | 351 | for (i = 1; i < nr_pages; i++) { |
358 | struct page *p = page + i; | 352 | struct page *p = page + i; |
359 | |||
360 | __SetPageTail(p); | 353 | __SetPageTail(p); |
354 | set_page_count(p, 0); | ||
361 | p->first_page = page; | 355 | p->first_page = page; |
362 | } | 356 | } |
363 | } | 357 | } |
@@ -403,6 +397,37 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) | |||
403 | clear_highpage(page + i); | 397 | clear_highpage(page + i); |
404 | } | 398 | } |
405 | 399 | ||
400 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
401 | unsigned int _debug_guardpage_minorder; | ||
402 | |||
403 | static int __init debug_guardpage_minorder_setup(char *buf) | ||
404 | { | ||
405 | unsigned long res; | ||
406 | |||
407 | if (kstrtoul(buf, 10, &res) < 0 || res > MAX_ORDER / 2) { | ||
408 | printk(KERN_ERR "Bad debug_guardpage_minorder value\n"); | ||
409 | return 0; | ||
410 | } | ||
411 | _debug_guardpage_minorder = res; | ||
412 | printk(KERN_INFO "Setting debug_guardpage_minorder to %lu\n", res); | ||
413 | return 0; | ||
414 | } | ||
415 | __setup("debug_guardpage_minorder=", debug_guardpage_minorder_setup); | ||
416 | |||
417 | static inline void set_page_guard_flag(struct page *page) | ||
418 | { | ||
419 | __set_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags); | ||
420 | } | ||
421 | |||
422 | static inline void clear_page_guard_flag(struct page *page) | ||
423 | { | ||
424 | __clear_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags); | ||
425 | } | ||
426 | #else | ||
427 | static inline void set_page_guard_flag(struct page *page) { } | ||
428 | static inline void clear_page_guard_flag(struct page *page) { } | ||
429 | #endif | ||
430 | |||
406 | static inline void set_page_order(struct page *page, int order) | 431 | static inline void set_page_order(struct page *page, int order) |
407 | { | 432 | { |
408 | set_page_private(page, order); | 433 | set_page_private(page, order); |
@@ -460,6 +485,11 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, | |||
460 | if (page_zone_id(page) != page_zone_id(buddy)) | 485 | if (page_zone_id(page) != page_zone_id(buddy)) |
461 | return 0; | 486 | return 0; |
462 | 487 | ||
488 | if (page_is_guard(buddy) && page_order(buddy) == order) { | ||
489 | VM_BUG_ON(page_count(buddy) != 0); | ||
490 | return 1; | ||
491 | } | ||
492 | |||
463 | if (PageBuddy(buddy) && page_order(buddy) == order) { | 493 | if (PageBuddy(buddy) && page_order(buddy) == order) { |
464 | VM_BUG_ON(page_count(buddy) != 0); | 494 | VM_BUG_ON(page_count(buddy) != 0); |
465 | return 1; | 495 | return 1; |
@@ -516,11 +546,19 @@ static inline void __free_one_page(struct page *page, | |||
516 | buddy = page + (buddy_idx - page_idx); | 546 | buddy = page + (buddy_idx - page_idx); |
517 | if (!page_is_buddy(page, buddy, order)) | 547 | if (!page_is_buddy(page, buddy, order)) |
518 | break; | 548 | break; |
519 | 549 | /* | |
520 | /* Our buddy is free, merge with it and move up one order. */ | 550 | * Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page, |
521 | list_del(&buddy->lru); | 551 | * merge with it and move up one order. |
522 | zone->free_area[order].nr_free--; | 552 | */ |
523 | rmv_page_order(buddy); | 553 | if (page_is_guard(buddy)) { |
554 | clear_page_guard_flag(buddy); | ||
555 | set_page_private(page, 0); | ||
556 | __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order); | ||
557 | } else { | ||
558 | list_del(&buddy->lru); | ||
559 | zone->free_area[order].nr_free--; | ||
560 | rmv_page_order(buddy); | ||
561 | } | ||
524 | combined_idx = buddy_idx & page_idx; | 562 | combined_idx = buddy_idx & page_idx; |
525 | page = page + (combined_idx - page_idx); | 563 | page = page + (combined_idx - page_idx); |
526 | page_idx = combined_idx; | 564 | page_idx = combined_idx; |
@@ -654,7 +692,7 @@ static bool free_pages_prepare(struct page *page, unsigned int order) | |||
654 | int i; | 692 | int i; |
655 | int bad = 0; | 693 | int bad = 0; |
656 | 694 | ||
657 | trace_mm_page_free_direct(page, order); | 695 | trace_mm_page_free(page, order); |
658 | kmemcheck_free_shadow(page, order); | 696 | kmemcheck_free_shadow(page, order); |
659 | 697 | ||
660 | if (PageAnon(page)) | 698 | if (PageAnon(page)) |
@@ -692,32 +730,23 @@ static void __free_pages_ok(struct page *page, unsigned int order) | |||
692 | local_irq_restore(flags); | 730 | local_irq_restore(flags); |
693 | } | 731 | } |
694 | 732 | ||
695 | /* | ||
696 | * permit the bootmem allocator to evade page validation on high-order frees | ||
697 | */ | ||
698 | void __meminit __free_pages_bootmem(struct page *page, unsigned int order) | 733 | void __meminit __free_pages_bootmem(struct page *page, unsigned int order) |
699 | { | 734 | { |
700 | if (order == 0) { | 735 | unsigned int nr_pages = 1 << order; |
701 | __ClearPageReserved(page); | 736 | unsigned int loop; |
702 | set_page_count(page, 0); | ||
703 | set_page_refcounted(page); | ||
704 | __free_page(page); | ||
705 | } else { | ||
706 | int loop; | ||
707 | 737 | ||
708 | prefetchw(page); | 738 | prefetchw(page); |
709 | for (loop = 0; loop < BITS_PER_LONG; loop++) { | 739 | for (loop = 0; loop < nr_pages; loop++) { |
710 | struct page *p = &page[loop]; | 740 | struct page *p = &page[loop]; |
711 | 741 | ||
712 | if (loop + 1 < BITS_PER_LONG) | 742 | if (loop + 1 < nr_pages) |
713 | prefetchw(p + 1); | 743 | prefetchw(p + 1); |
714 | __ClearPageReserved(p); | 744 | __ClearPageReserved(p); |
715 | set_page_count(p, 0); | 745 | set_page_count(p, 0); |
716 | } | ||
717 | |||
718 | set_page_refcounted(page); | ||
719 | __free_pages(page, order); | ||
720 | } | 746 | } |
747 | |||
748 | set_page_refcounted(page); | ||
749 | __free_pages(page, order); | ||
721 | } | 750 | } |
722 | 751 | ||
723 | 752 | ||
@@ -746,6 +775,23 @@ static inline void expand(struct zone *zone, struct page *page, | |||
746 | high--; | 775 | high--; |
747 | size >>= 1; | 776 | size >>= 1; |
748 | VM_BUG_ON(bad_range(zone, &page[size])); | 777 | VM_BUG_ON(bad_range(zone, &page[size])); |
778 | |||
779 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
780 | if (high < debug_guardpage_minorder()) { | ||
781 | /* | ||
782 | * Mark as guard pages (or page), that will allow to | ||
783 | * merge back to allocator when buddy will be freed. | ||
784 | * Corresponding page table entries will not be touched, | ||
785 | * pages will stay not present in virtual address space | ||
786 | */ | ||
787 | INIT_LIST_HEAD(&page[size].lru); | ||
788 | set_page_guard_flag(&page[size]); | ||
789 | set_page_private(&page[size], high); | ||
790 | /* Guard pages are not available for any usage */ | ||
791 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << high)); | ||
792 | continue; | ||
793 | } | ||
794 | #endif | ||
749 | list_add(&page[size].lru, &area->free_list[migratetype]); | 795 | list_add(&page[size].lru, &area->free_list[migratetype]); |
750 | area->nr_free++; | 796 | area->nr_free++; |
751 | set_page_order(&page[size], high); | 797 | set_page_order(&page[size], high); |
@@ -1211,6 +1257,19 @@ out: | |||
1211 | } | 1257 | } |
1212 | 1258 | ||
1213 | /* | 1259 | /* |
1260 | * Free a list of 0-order pages | ||
1261 | */ | ||
1262 | void free_hot_cold_page_list(struct list_head *list, int cold) | ||
1263 | { | ||
1264 | struct page *page, *next; | ||
1265 | |||
1266 | list_for_each_entry_safe(page, next, list, lru) { | ||
1267 | trace_mm_page_free_batched(page, cold); | ||
1268 | free_hot_cold_page(page, cold); | ||
1269 | } | ||
1270 | } | ||
1271 | |||
1272 | /* | ||
1214 | * split_page takes a non-compound higher-order page, and splits it into | 1273 | * split_page takes a non-compound higher-order page, and splits it into |
1215 | * n (1<<order) sub-pages: page[0..n] | 1274 | * n (1<<order) sub-pages: page[0..n] |
1216 | * Each sub-page must be freed individually. | 1275 | * Each sub-page must be freed individually. |
@@ -1408,7 +1467,7 @@ static int should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) | |||
1408 | 1467 | ||
1409 | static int __init fail_page_alloc_debugfs(void) | 1468 | static int __init fail_page_alloc_debugfs(void) |
1410 | { | 1469 | { |
1411 | mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; | 1470 | umode_t mode = S_IFREG | S_IRUSR | S_IWUSR; |
1412 | struct dentry *dir; | 1471 | struct dentry *dir; |
1413 | 1472 | ||
1414 | dir = fault_create_debugfs_attr("fail_page_alloc", NULL, | 1473 | dir = fault_create_debugfs_attr("fail_page_alloc", NULL, |
@@ -1457,7 +1516,7 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark, | |||
1457 | long min = mark; | 1516 | long min = mark; |
1458 | int o; | 1517 | int o; |
1459 | 1518 | ||
1460 | free_pages -= (1 << order) + 1; | 1519 | free_pages -= (1 << order) - 1; |
1461 | if (alloc_flags & ALLOC_HIGH) | 1520 | if (alloc_flags & ALLOC_HIGH) |
1462 | min -= min / 2; | 1521 | min -= min / 2; |
1463 | if (alloc_flags & ALLOC_HARDER) | 1522 | if (alloc_flags & ALLOC_HARDER) |
@@ -1667,6 +1726,35 @@ zonelist_scan: | |||
1667 | if ((alloc_flags & ALLOC_CPUSET) && | 1726 | if ((alloc_flags & ALLOC_CPUSET) && |
1668 | !cpuset_zone_allowed_softwall(zone, gfp_mask)) | 1727 | !cpuset_zone_allowed_softwall(zone, gfp_mask)) |
1669 | continue; | 1728 | continue; |
1729 | /* | ||
1730 | * When allocating a page cache page for writing, we | ||
1731 | * want to get it from a zone that is within its dirty | ||
1732 | * limit, such that no single zone holds more than its | ||
1733 | * proportional share of globally allowed dirty pages. | ||
1734 | * The dirty limits take into account the zone's | ||
1735 | * lowmem reserves and high watermark so that kswapd | ||
1736 | * should be able to balance it without having to | ||
1737 | * write pages from its LRU list. | ||
1738 | * | ||
1739 | * This may look like it could increase pressure on | ||
1740 | * lower zones by failing allocations in higher zones | ||
1741 | * before they are full. But the pages that do spill | ||
1742 | * over are limited as the lower zones are protected | ||
1743 | * by this very same mechanism. It should not become | ||
1744 | * a practical burden to them. | ||
1745 | * | ||
1746 | * XXX: For now, allow allocations to potentially | ||
1747 | * exceed the per-zone dirty limit in the slowpath | ||
1748 | * (ALLOC_WMARK_LOW unset) before going into reclaim, | ||
1749 | * which is important when on a NUMA setup the allowed | ||
1750 | * zones are together not big enough to reach the | ||
1751 | * global limit. The proper fix for these situations | ||
1752 | * will require awareness of zones in the | ||
1753 | * dirty-throttling and the flusher threads. | ||
1754 | */ | ||
1755 | if ((alloc_flags & ALLOC_WMARK_LOW) && | ||
1756 | (gfp_mask & __GFP_WRITE) && !zone_dirty_ok(zone)) | ||
1757 | goto this_zone_full; | ||
1670 | 1758 | ||
1671 | BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); | 1759 | BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); |
1672 | if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { | 1760 | if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { |
@@ -1756,7 +1844,8 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...) | |||
1756 | { | 1844 | { |
1757 | unsigned int filter = SHOW_MEM_FILTER_NODES; | 1845 | unsigned int filter = SHOW_MEM_FILTER_NODES; |
1758 | 1846 | ||
1759 | if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs)) | 1847 | if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) || |
1848 | debug_guardpage_minorder() > 0) | ||
1760 | return; | 1849 | return; |
1761 | 1850 | ||
1762 | /* | 1851 | /* |
@@ -1795,12 +1884,25 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...) | |||
1795 | 1884 | ||
1796 | static inline int | 1885 | static inline int |
1797 | should_alloc_retry(gfp_t gfp_mask, unsigned int order, | 1886 | should_alloc_retry(gfp_t gfp_mask, unsigned int order, |
1887 | unsigned long did_some_progress, | ||
1798 | unsigned long pages_reclaimed) | 1888 | unsigned long pages_reclaimed) |
1799 | { | 1889 | { |
1800 | /* Do not loop if specifically requested */ | 1890 | /* Do not loop if specifically requested */ |
1801 | if (gfp_mask & __GFP_NORETRY) | 1891 | if (gfp_mask & __GFP_NORETRY) |
1802 | return 0; | 1892 | return 0; |
1803 | 1893 | ||
1894 | /* Always retry if specifically requested */ | ||
1895 | if (gfp_mask & __GFP_NOFAIL) | ||
1896 | return 1; | ||
1897 | |||
1898 | /* | ||
1899 | * Suspend converts GFP_KERNEL to __GFP_WAIT which can prevent reclaim | ||
1900 | * making forward progress without invoking OOM. Suspend also disables | ||
1901 | * storage devices so kswapd will not help. Bail if we are suspending. | ||
1902 | */ | ||
1903 | if (!did_some_progress && pm_suspended_storage()) | ||
1904 | return 0; | ||
1905 | |||
1804 | /* | 1906 | /* |
1805 | * In this implementation, order <= PAGE_ALLOC_COSTLY_ORDER | 1907 | * In this implementation, order <= PAGE_ALLOC_COSTLY_ORDER |
1806 | * means __GFP_NOFAIL, but that may not be true in other | 1908 | * means __GFP_NOFAIL, but that may not be true in other |
@@ -1819,13 +1921,6 @@ should_alloc_retry(gfp_t gfp_mask, unsigned int order, | |||
1819 | if (gfp_mask & __GFP_REPEAT && pages_reclaimed < (1 << order)) | 1921 | if (gfp_mask & __GFP_REPEAT && pages_reclaimed < (1 << order)) |
1820 | return 1; | 1922 | return 1; |
1821 | 1923 | ||
1822 | /* | ||
1823 | * Don't let big-order allocations loop unless the caller | ||
1824 | * explicitly requests that. | ||
1825 | */ | ||
1826 | if (gfp_mask & __GFP_NOFAIL) | ||
1827 | return 1; | ||
1828 | |||
1829 | return 0; | 1924 | return 0; |
1830 | } | 1925 | } |
1831 | 1926 | ||
@@ -1886,14 +1981,20 @@ static struct page * | |||
1886 | __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | 1981 | __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, |
1887 | struct zonelist *zonelist, enum zone_type high_zoneidx, | 1982 | struct zonelist *zonelist, enum zone_type high_zoneidx, |
1888 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, | 1983 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, |
1889 | int migratetype, unsigned long *did_some_progress, | 1984 | int migratetype, bool sync_migration, |
1890 | bool sync_migration) | 1985 | bool *deferred_compaction, |
1986 | unsigned long *did_some_progress) | ||
1891 | { | 1987 | { |
1892 | struct page *page; | 1988 | struct page *page; |
1893 | 1989 | ||
1894 | if (!order || compaction_deferred(preferred_zone)) | 1990 | if (!order) |
1895 | return NULL; | 1991 | return NULL; |
1896 | 1992 | ||
1993 | if (compaction_deferred(preferred_zone)) { | ||
1994 | *deferred_compaction = true; | ||
1995 | return NULL; | ||
1996 | } | ||
1997 | |||
1897 | current->flags |= PF_MEMALLOC; | 1998 | current->flags |= PF_MEMALLOC; |
1898 | *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, | 1999 | *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, |
1899 | nodemask, sync_migration); | 2000 | nodemask, sync_migration); |
@@ -1921,7 +2022,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
1921 | * but not enough to satisfy watermarks. | 2022 | * but not enough to satisfy watermarks. |
1922 | */ | 2023 | */ |
1923 | count_vm_event(COMPACTFAIL); | 2024 | count_vm_event(COMPACTFAIL); |
1924 | defer_compaction(preferred_zone); | 2025 | |
2026 | /* | ||
2027 | * As async compaction considers a subset of pageblocks, only | ||
2028 | * defer if the failure was a sync compaction failure. | ||
2029 | */ | ||
2030 | if (sync_migration) | ||
2031 | defer_compaction(preferred_zone); | ||
1925 | 2032 | ||
1926 | cond_resched(); | 2033 | cond_resched(); |
1927 | } | 2034 | } |
@@ -1933,8 +2040,9 @@ static inline struct page * | |||
1933 | __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | 2040 | __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, |
1934 | struct zonelist *zonelist, enum zone_type high_zoneidx, | 2041 | struct zonelist *zonelist, enum zone_type high_zoneidx, |
1935 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, | 2042 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, |
1936 | int migratetype, unsigned long *did_some_progress, | 2043 | int migratetype, bool sync_migration, |
1937 | bool sync_migration) | 2044 | bool *deferred_compaction, |
2045 | unsigned long *did_some_progress) | ||
1938 | { | 2046 | { |
1939 | return NULL; | 2047 | return NULL; |
1940 | } | 2048 | } |
@@ -2084,6 +2192,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
2084 | unsigned long pages_reclaimed = 0; | 2192 | unsigned long pages_reclaimed = 0; |
2085 | unsigned long did_some_progress; | 2193 | unsigned long did_some_progress; |
2086 | bool sync_migration = false; | 2194 | bool sync_migration = false; |
2195 | bool deferred_compaction = false; | ||
2087 | 2196 | ||
2088 | /* | 2197 | /* |
2089 | * In the slowpath, we sanity check order to avoid ever trying to | 2198 | * In the slowpath, we sanity check order to avoid ever trying to |
@@ -2164,12 +2273,22 @@ rebalance: | |||
2164 | zonelist, high_zoneidx, | 2273 | zonelist, high_zoneidx, |
2165 | nodemask, | 2274 | nodemask, |
2166 | alloc_flags, preferred_zone, | 2275 | alloc_flags, preferred_zone, |
2167 | migratetype, &did_some_progress, | 2276 | migratetype, sync_migration, |
2168 | sync_migration); | 2277 | &deferred_compaction, |
2278 | &did_some_progress); | ||
2169 | if (page) | 2279 | if (page) |
2170 | goto got_pg; | 2280 | goto got_pg; |
2171 | sync_migration = true; | 2281 | sync_migration = true; |
2172 | 2282 | ||
2283 | /* | ||
2284 | * If compaction is deferred for high-order allocations, it is because | ||
2285 | * sync compaction recently failed. In this is the case and the caller | ||
2286 | * has requested the system not be heavily disrupted, fail the | ||
2287 | * allocation now instead of entering direct reclaim | ||
2288 | */ | ||
2289 | if (deferred_compaction && (gfp_mask & __GFP_NO_KSWAPD)) | ||
2290 | goto nopage; | ||
2291 | |||
2173 | /* Try direct reclaim and then allocating */ | 2292 | /* Try direct reclaim and then allocating */ |
2174 | page = __alloc_pages_direct_reclaim(gfp_mask, order, | 2293 | page = __alloc_pages_direct_reclaim(gfp_mask, order, |
2175 | zonelist, high_zoneidx, | 2294 | zonelist, high_zoneidx, |
@@ -2218,7 +2337,8 @@ rebalance: | |||
2218 | 2337 | ||
2219 | /* Check if we should retry the allocation */ | 2338 | /* Check if we should retry the allocation */ |
2220 | pages_reclaimed += did_some_progress; | 2339 | pages_reclaimed += did_some_progress; |
2221 | if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) { | 2340 | if (should_alloc_retry(gfp_mask, order, did_some_progress, |
2341 | pages_reclaimed)) { | ||
2222 | /* Wait for some write requests to complete then retry */ | 2342 | /* Wait for some write requests to complete then retry */ |
2223 | wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50); | 2343 | wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50); |
2224 | goto rebalance; | 2344 | goto rebalance; |
@@ -2232,8 +2352,9 @@ rebalance: | |||
2232 | zonelist, high_zoneidx, | 2352 | zonelist, high_zoneidx, |
2233 | nodemask, | 2353 | nodemask, |
2234 | alloc_flags, preferred_zone, | 2354 | alloc_flags, preferred_zone, |
2235 | migratetype, &did_some_progress, | 2355 | migratetype, sync_migration, |
2236 | sync_migration); | 2356 | &deferred_compaction, |
2357 | &did_some_progress); | ||
2237 | if (page) | 2358 | if (page) |
2238 | goto got_pg; | 2359 | goto got_pg; |
2239 | } | 2360 | } |
@@ -2328,16 +2449,6 @@ unsigned long get_zeroed_page(gfp_t gfp_mask) | |||
2328 | } | 2449 | } |
2329 | EXPORT_SYMBOL(get_zeroed_page); | 2450 | EXPORT_SYMBOL(get_zeroed_page); |
2330 | 2451 | ||
2331 | void __pagevec_free(struct pagevec *pvec) | ||
2332 | { | ||
2333 | int i = pagevec_count(pvec); | ||
2334 | |||
2335 | while (--i >= 0) { | ||
2336 | trace_mm_pagevec_free(pvec->pages[i], pvec->cold); | ||
2337 | free_hot_cold_page(pvec->pages[i], pvec->cold); | ||
2338 | } | ||
2339 | } | ||
2340 | |||
2341 | void __free_pages(struct page *page, unsigned int order) | 2452 | void __free_pages(struct page *page, unsigned int order) |
2342 | { | 2453 | { |
2343 | if (put_page_testzero(page)) { | 2454 | if (put_page_testzero(page)) { |
@@ -3377,9 +3488,15 @@ static void setup_zone_migrate_reserve(struct zone *zone) | |||
3377 | unsigned long block_migratetype; | 3488 | unsigned long block_migratetype; |
3378 | int reserve; | 3489 | int reserve; |
3379 | 3490 | ||
3380 | /* Get the start pfn, end pfn and the number of blocks to reserve */ | 3491 | /* |
3492 | * Get the start pfn, end pfn and the number of blocks to reserve | ||
3493 | * We have to be careful to be aligned to pageblock_nr_pages to | ||
3494 | * make sure that we always check pfn_valid for the first page in | ||
3495 | * the block. | ||
3496 | */ | ||
3381 | start_pfn = zone->zone_start_pfn; | 3497 | start_pfn = zone->zone_start_pfn; |
3382 | end_pfn = start_pfn + zone->spanned_pages; | 3498 | end_pfn = start_pfn + zone->spanned_pages; |
3499 | start_pfn = roundup(start_pfn, pageblock_nr_pages); | ||
3383 | reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> | 3500 | reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> |
3384 | pageblock_order; | 3501 | pageblock_order; |
3385 | 3502 | ||
@@ -3401,25 +3518,33 @@ static void setup_zone_migrate_reserve(struct zone *zone) | |||
3401 | if (page_to_nid(page) != zone_to_nid(zone)) | 3518 | if (page_to_nid(page) != zone_to_nid(zone)) |
3402 | continue; | 3519 | continue; |
3403 | 3520 | ||
3404 | /* Blocks with reserved pages will never free, skip them. */ | ||
3405 | block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn); | ||
3406 | if (pageblock_is_reserved(pfn, block_end_pfn)) | ||
3407 | continue; | ||
3408 | |||
3409 | block_migratetype = get_pageblock_migratetype(page); | 3521 | block_migratetype = get_pageblock_migratetype(page); |
3410 | 3522 | ||
3411 | /* If this block is reserved, account for it */ | 3523 | /* Only test what is necessary when the reserves are not met */ |
3412 | if (reserve > 0 && block_migratetype == MIGRATE_RESERVE) { | 3524 | if (reserve > 0) { |
3413 | reserve--; | 3525 | /* |
3414 | continue; | 3526 | * Blocks with reserved pages will never free, skip |
3415 | } | 3527 | * them. |
3528 | */ | ||
3529 | block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn); | ||
3530 | if (pageblock_is_reserved(pfn, block_end_pfn)) | ||
3531 | continue; | ||
3416 | 3532 | ||
3417 | /* Suitable for reserving if this block is movable */ | 3533 | /* If this block is reserved, account for it */ |
3418 | if (reserve > 0 && block_migratetype == MIGRATE_MOVABLE) { | 3534 | if (block_migratetype == MIGRATE_RESERVE) { |
3419 | set_pageblock_migratetype(page, MIGRATE_RESERVE); | 3535 | reserve--; |
3420 | move_freepages_block(zone, page, MIGRATE_RESERVE); | 3536 | continue; |
3421 | reserve--; | 3537 | } |
3422 | continue; | 3538 | |
3539 | /* Suitable for reserving if this block is movable */ | ||
3540 | if (block_migratetype == MIGRATE_MOVABLE) { | ||
3541 | set_pageblock_migratetype(page, | ||
3542 | MIGRATE_RESERVE); | ||
3543 | move_freepages_block(zone, page, | ||
3544 | MIGRATE_RESERVE); | ||
3545 | reserve--; | ||
3546 | continue; | ||
3547 | } | ||
3423 | } | 3548 | } |
3424 | 3549 | ||
3425 | /* | 3550 | /* |
@@ -3731,35 +3856,7 @@ __meminit int init_currently_empty_zone(struct zone *zone, | |||
3731 | return 0; | 3856 | return 0; |
3732 | } | 3857 | } |
3733 | 3858 | ||
3734 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP | 3859 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
3735 | /* | ||
3736 | * Basic iterator support. Return the first range of PFNs for a node | ||
3737 | * Note: nid == MAX_NUMNODES returns first region regardless of node | ||
3738 | */ | ||
3739 | static int __meminit first_active_region_index_in_nid(int nid) | ||
3740 | { | ||
3741 | int i; | ||
3742 | |||
3743 | for (i = 0; i < nr_nodemap_entries; i++) | ||
3744 | if (nid == MAX_NUMNODES || early_node_map[i].nid == nid) | ||
3745 | return i; | ||
3746 | |||
3747 | return -1; | ||
3748 | } | ||
3749 | |||
3750 | /* | ||
3751 | * Basic iterator support. Return the next active range of PFNs for a node | ||
3752 | * Note: nid == MAX_NUMNODES returns next region regardless of node | ||
3753 | */ | ||
3754 | static int __meminit next_active_region_index_in_nid(int index, int nid) | ||
3755 | { | ||
3756 | for (index = index + 1; index < nr_nodemap_entries; index++) | ||
3757 | if (nid == MAX_NUMNODES || early_node_map[index].nid == nid) | ||
3758 | return index; | ||
3759 | |||
3760 | return -1; | ||
3761 | } | ||
3762 | |||
3763 | #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID | 3860 | #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID |
3764 | /* | 3861 | /* |
3765 | * Required by SPARSEMEM. Given a PFN, return what node the PFN is on. | 3862 | * Required by SPARSEMEM. Given a PFN, return what node the PFN is on. |
@@ -3769,15 +3866,12 @@ static int __meminit next_active_region_index_in_nid(int index, int nid) | |||
3769 | */ | 3866 | */ |
3770 | int __meminit __early_pfn_to_nid(unsigned long pfn) | 3867 | int __meminit __early_pfn_to_nid(unsigned long pfn) |
3771 | { | 3868 | { |
3772 | int i; | 3869 | unsigned long start_pfn, end_pfn; |
3773 | 3870 | int i, nid; | |
3774 | for (i = 0; i < nr_nodemap_entries; i++) { | ||
3775 | unsigned long start_pfn = early_node_map[i].start_pfn; | ||
3776 | unsigned long end_pfn = early_node_map[i].end_pfn; | ||
3777 | 3871 | ||
3872 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) | ||
3778 | if (start_pfn <= pfn && pfn < end_pfn) | 3873 | if (start_pfn <= pfn && pfn < end_pfn) |
3779 | return early_node_map[i].nid; | 3874 | return nid; |
3780 | } | ||
3781 | /* This is a memory hole */ | 3875 | /* This is a memory hole */ |
3782 | return -1; | 3876 | return -1; |
3783 | } | 3877 | } |
@@ -3806,11 +3900,6 @@ bool __meminit early_pfn_in_nid(unsigned long pfn, int node) | |||
3806 | } | 3900 | } |
3807 | #endif | 3901 | #endif |
3808 | 3902 | ||
3809 | /* Basic iterator support to walk early_node_map[] */ | ||
3810 | #define for_each_active_range_index_in_nid(i, nid) \ | ||
3811 | for (i = first_active_region_index_in_nid(nid); i != -1; \ | ||
3812 | i = next_active_region_index_in_nid(i, nid)) | ||
3813 | |||
3814 | /** | 3903 | /** |
3815 | * free_bootmem_with_active_regions - Call free_bootmem_node for each active range | 3904 | * free_bootmem_with_active_regions - Call free_bootmem_node for each active range |
3816 | * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed. | 3905 | * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed. |
@@ -3820,122 +3909,34 @@ bool __meminit early_pfn_in_nid(unsigned long pfn, int node) | |||
3820 | * add_active_ranges() contain no holes and may be freed, this | 3909 | * add_active_ranges() contain no holes and may be freed, this |
3821 | * this function may be used instead of calling free_bootmem() manually. | 3910 | * this function may be used instead of calling free_bootmem() manually. |
3822 | */ | 3911 | */ |
3823 | void __init free_bootmem_with_active_regions(int nid, | 3912 | void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn) |
3824 | unsigned long max_low_pfn) | ||
3825 | { | ||
3826 | int i; | ||
3827 | |||
3828 | for_each_active_range_index_in_nid(i, nid) { | ||
3829 | unsigned long size_pages = 0; | ||
3830 | unsigned long end_pfn = early_node_map[i].end_pfn; | ||
3831 | |||
3832 | if (early_node_map[i].start_pfn >= max_low_pfn) | ||
3833 | continue; | ||
3834 | |||
3835 | if (end_pfn > max_low_pfn) | ||
3836 | end_pfn = max_low_pfn; | ||
3837 | |||
3838 | size_pages = end_pfn - early_node_map[i].start_pfn; | ||
3839 | free_bootmem_node(NODE_DATA(early_node_map[i].nid), | ||
3840 | PFN_PHYS(early_node_map[i].start_pfn), | ||
3841 | size_pages << PAGE_SHIFT); | ||
3842 | } | ||
3843 | } | ||
3844 | |||
3845 | #ifdef CONFIG_HAVE_MEMBLOCK | ||
3846 | /* | ||
3847 | * Basic iterator support. Return the last range of PFNs for a node | ||
3848 | * Note: nid == MAX_NUMNODES returns last region regardless of node | ||
3849 | */ | ||
3850 | static int __meminit last_active_region_index_in_nid(int nid) | ||
3851 | { | ||
3852 | int i; | ||
3853 | |||
3854 | for (i = nr_nodemap_entries - 1; i >= 0; i--) | ||
3855 | if (nid == MAX_NUMNODES || early_node_map[i].nid == nid) | ||
3856 | return i; | ||
3857 | |||
3858 | return -1; | ||
3859 | } | ||
3860 | |||
3861 | /* | ||
3862 | * Basic iterator support. Return the previous active range of PFNs for a node | ||
3863 | * Note: nid == MAX_NUMNODES returns next region regardless of node | ||
3864 | */ | ||
3865 | static int __meminit previous_active_region_index_in_nid(int index, int nid) | ||
3866 | { | ||
3867 | for (index = index - 1; index >= 0; index--) | ||
3868 | if (nid == MAX_NUMNODES || early_node_map[index].nid == nid) | ||
3869 | return index; | ||
3870 | |||
3871 | return -1; | ||
3872 | } | ||
3873 | |||
3874 | #define for_each_active_range_index_in_nid_reverse(i, nid) \ | ||
3875 | for (i = last_active_region_index_in_nid(nid); i != -1; \ | ||
3876 | i = previous_active_region_index_in_nid(i, nid)) | ||
3877 | |||
3878 | u64 __init find_memory_core_early(int nid, u64 size, u64 align, | ||
3879 | u64 goal, u64 limit) | ||
3880 | { | 3913 | { |
3881 | int i; | 3914 | unsigned long start_pfn, end_pfn; |
3882 | 3915 | int i, this_nid; | |
3883 | /* Need to go over early_node_map to find out good range for node */ | ||
3884 | for_each_active_range_index_in_nid_reverse(i, nid) { | ||
3885 | u64 addr; | ||
3886 | u64 ei_start, ei_last; | ||
3887 | u64 final_start, final_end; | ||
3888 | |||
3889 | ei_last = early_node_map[i].end_pfn; | ||
3890 | ei_last <<= PAGE_SHIFT; | ||
3891 | ei_start = early_node_map[i].start_pfn; | ||
3892 | ei_start <<= PAGE_SHIFT; | ||
3893 | |||
3894 | final_start = max(ei_start, goal); | ||
3895 | final_end = min(ei_last, limit); | ||
3896 | |||
3897 | if (final_start >= final_end) | ||
3898 | continue; | ||
3899 | |||
3900 | addr = memblock_find_in_range(final_start, final_end, size, align); | ||
3901 | 3916 | ||
3902 | if (addr == MEMBLOCK_ERROR) | 3917 | for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid) { |
3903 | continue; | 3918 | start_pfn = min(start_pfn, max_low_pfn); |
3919 | end_pfn = min(end_pfn, max_low_pfn); | ||
3904 | 3920 | ||
3905 | return addr; | 3921 | if (start_pfn < end_pfn) |
3922 | free_bootmem_node(NODE_DATA(this_nid), | ||
3923 | PFN_PHYS(start_pfn), | ||
3924 | (end_pfn - start_pfn) << PAGE_SHIFT); | ||
3906 | } | 3925 | } |
3907 | |||
3908 | return MEMBLOCK_ERROR; | ||
3909 | } | 3926 | } |
3910 | #endif | ||
3911 | 3927 | ||
3912 | int __init add_from_early_node_map(struct range *range, int az, | 3928 | int __init add_from_early_node_map(struct range *range, int az, |
3913 | int nr_range, int nid) | 3929 | int nr_range, int nid) |
3914 | { | 3930 | { |
3931 | unsigned long start_pfn, end_pfn; | ||
3915 | int i; | 3932 | int i; |
3916 | u64 start, end; | ||
3917 | 3933 | ||
3918 | /* need to go over early_node_map to find out good range for node */ | 3934 | /* need to go over early_node_map to find out good range for node */ |
3919 | for_each_active_range_index_in_nid(i, nid) { | 3935 | for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) |
3920 | start = early_node_map[i].start_pfn; | 3936 | nr_range = add_range(range, az, nr_range, start_pfn, end_pfn); |
3921 | end = early_node_map[i].end_pfn; | ||
3922 | nr_range = add_range(range, az, nr_range, start, end); | ||
3923 | } | ||
3924 | return nr_range; | 3937 | return nr_range; |
3925 | } | 3938 | } |
3926 | 3939 | ||
3927 | void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data) | ||
3928 | { | ||
3929 | int i; | ||
3930 | int ret; | ||
3931 | |||
3932 | for_each_active_range_index_in_nid(i, nid) { | ||
3933 | ret = work_fn(early_node_map[i].start_pfn, | ||
3934 | early_node_map[i].end_pfn, data); | ||
3935 | if (ret) | ||
3936 | break; | ||
3937 | } | ||
3938 | } | ||
3939 | /** | 3940 | /** |
3940 | * sparse_memory_present_with_active_regions - Call memory_present for each active range | 3941 | * sparse_memory_present_with_active_regions - Call memory_present for each active range |
3941 | * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used. | 3942 | * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used. |
@@ -3946,12 +3947,11 @@ void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data) | |||
3946 | */ | 3947 | */ |
3947 | void __init sparse_memory_present_with_active_regions(int nid) | 3948 | void __init sparse_memory_present_with_active_regions(int nid) |
3948 | { | 3949 | { |
3949 | int i; | 3950 | unsigned long start_pfn, end_pfn; |
3951 | int i, this_nid; | ||
3950 | 3952 | ||
3951 | for_each_active_range_index_in_nid(i, nid) | 3953 | for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid) |
3952 | memory_present(early_node_map[i].nid, | 3954 | memory_present(this_nid, start_pfn, end_pfn); |
3953 | early_node_map[i].start_pfn, | ||
3954 | early_node_map[i].end_pfn); | ||
3955 | } | 3955 | } |
3956 | 3956 | ||
3957 | /** | 3957 | /** |
@@ -3968,13 +3968,15 @@ void __init sparse_memory_present_with_active_regions(int nid) | |||
3968 | void __meminit get_pfn_range_for_nid(unsigned int nid, | 3968 | void __meminit get_pfn_range_for_nid(unsigned int nid, |
3969 | unsigned long *start_pfn, unsigned long *end_pfn) | 3969 | unsigned long *start_pfn, unsigned long *end_pfn) |
3970 | { | 3970 | { |
3971 | unsigned long this_start_pfn, this_end_pfn; | ||
3971 | int i; | 3972 | int i; |
3973 | |||
3972 | *start_pfn = -1UL; | 3974 | *start_pfn = -1UL; |
3973 | *end_pfn = 0; | 3975 | *end_pfn = 0; |
3974 | 3976 | ||
3975 | for_each_active_range_index_in_nid(i, nid) { | 3977 | for_each_mem_pfn_range(i, nid, &this_start_pfn, &this_end_pfn, NULL) { |
3976 | *start_pfn = min(*start_pfn, early_node_map[i].start_pfn); | 3978 | *start_pfn = min(*start_pfn, this_start_pfn); |
3977 | *end_pfn = max(*end_pfn, early_node_map[i].end_pfn); | 3979 | *end_pfn = max(*end_pfn, this_end_pfn); |
3978 | } | 3980 | } |
3979 | 3981 | ||
3980 | if (*start_pfn == -1UL) | 3982 | if (*start_pfn == -1UL) |
@@ -4077,46 +4079,16 @@ unsigned long __meminit __absent_pages_in_range(int nid, | |||
4077 | unsigned long range_start_pfn, | 4079 | unsigned long range_start_pfn, |
4078 | unsigned long range_end_pfn) | 4080 | unsigned long range_end_pfn) |
4079 | { | 4081 | { |
4080 | int i = 0; | 4082 | unsigned long nr_absent = range_end_pfn - range_start_pfn; |
4081 | unsigned long prev_end_pfn = 0, hole_pages = 0; | 4083 | unsigned long start_pfn, end_pfn; |
4082 | unsigned long start_pfn; | 4084 | int i; |
4083 | |||
4084 | /* Find the end_pfn of the first active range of pfns in the node */ | ||
4085 | i = first_active_region_index_in_nid(nid); | ||
4086 | if (i == -1) | ||
4087 | return 0; | ||
4088 | |||
4089 | prev_end_pfn = min(early_node_map[i].start_pfn, range_end_pfn); | ||
4090 | |||
4091 | /* Account for ranges before physical memory on this node */ | ||
4092 | if (early_node_map[i].start_pfn > range_start_pfn) | ||
4093 | hole_pages = prev_end_pfn - range_start_pfn; | ||
4094 | |||
4095 | /* Find all holes for the zone within the node */ | ||
4096 | for (; i != -1; i = next_active_region_index_in_nid(i, nid)) { | ||
4097 | |||
4098 | /* No need to continue if prev_end_pfn is outside the zone */ | ||
4099 | if (prev_end_pfn >= range_end_pfn) | ||
4100 | break; | ||
4101 | |||
4102 | /* Make sure the end of the zone is not within the hole */ | ||
4103 | start_pfn = min(early_node_map[i].start_pfn, range_end_pfn); | ||
4104 | prev_end_pfn = max(prev_end_pfn, range_start_pfn); | ||
4105 | 4085 | ||
4106 | /* Update the hole size cound and move on */ | 4086 | for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { |
4107 | if (start_pfn > range_start_pfn) { | 4087 | start_pfn = clamp(start_pfn, range_start_pfn, range_end_pfn); |
4108 | BUG_ON(prev_end_pfn > start_pfn); | 4088 | end_pfn = clamp(end_pfn, range_start_pfn, range_end_pfn); |
4109 | hole_pages += start_pfn - prev_end_pfn; | 4089 | nr_absent -= end_pfn - start_pfn; |
4110 | } | ||
4111 | prev_end_pfn = early_node_map[i].end_pfn; | ||
4112 | } | 4090 | } |
4113 | 4091 | return nr_absent; | |
4114 | /* Account for ranges past physical memory on this node */ | ||
4115 | if (range_end_pfn > prev_end_pfn) | ||
4116 | hole_pages += range_end_pfn - | ||
4117 | max(range_start_pfn, prev_end_pfn); | ||
4118 | |||
4119 | return hole_pages; | ||
4120 | } | 4092 | } |
4121 | 4093 | ||
4122 | /** | 4094 | /** |
@@ -4137,14 +4109,14 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid, | |||
4137 | unsigned long zone_type, | 4109 | unsigned long zone_type, |
4138 | unsigned long *ignored) | 4110 | unsigned long *ignored) |
4139 | { | 4111 | { |
4112 | unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type]; | ||
4113 | unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type]; | ||
4140 | unsigned long node_start_pfn, node_end_pfn; | 4114 | unsigned long node_start_pfn, node_end_pfn; |
4141 | unsigned long zone_start_pfn, zone_end_pfn; | 4115 | unsigned long zone_start_pfn, zone_end_pfn; |
4142 | 4116 | ||
4143 | get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn); | 4117 | get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn); |
4144 | zone_start_pfn = max(arch_zone_lowest_possible_pfn[zone_type], | 4118 | zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high); |
4145 | node_start_pfn); | 4119 | zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high); |
4146 | zone_end_pfn = min(arch_zone_highest_possible_pfn[zone_type], | ||
4147 | node_end_pfn); | ||
4148 | 4120 | ||
4149 | adjust_zone_range_for_zone_movable(nid, zone_type, | 4121 | adjust_zone_range_for_zone_movable(nid, zone_type, |
4150 | node_start_pfn, node_end_pfn, | 4122 | node_start_pfn, node_end_pfn, |
@@ -4152,7 +4124,7 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid, | |||
4152 | return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); | 4124 | return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); |
4153 | } | 4125 | } |
4154 | 4126 | ||
4155 | #else | 4127 | #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ |
4156 | static inline unsigned long __meminit zone_spanned_pages_in_node(int nid, | 4128 | static inline unsigned long __meminit zone_spanned_pages_in_node(int nid, |
4157 | unsigned long zone_type, | 4129 | unsigned long zone_type, |
4158 | unsigned long *zones_size) | 4130 | unsigned long *zones_size) |
@@ -4170,7 +4142,7 @@ static inline unsigned long __meminit zone_absent_pages_in_node(int nid, | |||
4170 | return zholes_size[zone_type]; | 4142 | return zholes_size[zone_type]; |
4171 | } | 4143 | } |
4172 | 4144 | ||
4173 | #endif | 4145 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ |
4174 | 4146 | ||
4175 | static void __meminit calculate_node_totalpages(struct pglist_data *pgdat, | 4147 | static void __meminit calculate_node_totalpages(struct pglist_data *pgdat, |
4176 | unsigned long *zones_size, unsigned long *zholes_size) | 4148 | unsigned long *zones_size, unsigned long *zholes_size) |
@@ -4290,7 +4262,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, | |||
4290 | for (j = 0; j < MAX_NR_ZONES; j++) { | 4262 | for (j = 0; j < MAX_NR_ZONES; j++) { |
4291 | struct zone *zone = pgdat->node_zones + j; | 4263 | struct zone *zone = pgdat->node_zones + j; |
4292 | unsigned long size, realsize, memmap_pages; | 4264 | unsigned long size, realsize, memmap_pages; |
4293 | enum lru_list l; | 4265 | enum lru_list lru; |
4294 | 4266 | ||
4295 | size = zone_spanned_pages_in_node(nid, j, zones_size); | 4267 | size = zone_spanned_pages_in_node(nid, j, zones_size); |
4296 | realsize = size - zone_absent_pages_in_node(nid, j, | 4268 | realsize = size - zone_absent_pages_in_node(nid, j, |
@@ -4340,8 +4312,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, | |||
4340 | zone->zone_pgdat = pgdat; | 4312 | zone->zone_pgdat = pgdat; |
4341 | 4313 | ||
4342 | zone_pcp_init(zone); | 4314 | zone_pcp_init(zone); |
4343 | for_each_lru(l) | 4315 | for_each_lru(lru) |
4344 | INIT_LIST_HEAD(&zone->lru[l].list); | 4316 | INIT_LIST_HEAD(&zone->lruvec.lists[lru]); |
4345 | zone->reclaim_stat.recent_rotated[0] = 0; | 4317 | zone->reclaim_stat.recent_rotated[0] = 0; |
4346 | zone->reclaim_stat.recent_rotated[1] = 0; | 4318 | zone->reclaim_stat.recent_rotated[1] = 0; |
4347 | zone->reclaim_stat.recent_scanned[0] = 0; | 4319 | zone->reclaim_stat.recent_scanned[0] = 0; |
@@ -4393,10 +4365,10 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat) | |||
4393 | */ | 4365 | */ |
4394 | if (pgdat == NODE_DATA(0)) { | 4366 | if (pgdat == NODE_DATA(0)) { |
4395 | mem_map = NODE_DATA(0)->node_mem_map; | 4367 | mem_map = NODE_DATA(0)->node_mem_map; |
4396 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP | 4368 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
4397 | if (page_to_pfn(mem_map) != pgdat->node_start_pfn) | 4369 | if (page_to_pfn(mem_map) != pgdat->node_start_pfn) |
4398 | mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET); | 4370 | mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET); |
4399 | #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ | 4371 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ |
4400 | } | 4372 | } |
4401 | #endif | 4373 | #endif |
4402 | #endif /* CONFIG_FLAT_NODE_MEM_MAP */ | 4374 | #endif /* CONFIG_FLAT_NODE_MEM_MAP */ |
@@ -4421,7 +4393,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, | |||
4421 | free_area_init_core(pgdat, zones_size, zholes_size); | 4393 | free_area_init_core(pgdat, zones_size, zholes_size); |
4422 | } | 4394 | } |
4423 | 4395 | ||
4424 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP | 4396 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
4425 | 4397 | ||
4426 | #if MAX_NUMNODES > 1 | 4398 | #if MAX_NUMNODES > 1 |
4427 | /* | 4399 | /* |
@@ -4443,170 +4415,6 @@ static inline void setup_nr_node_ids(void) | |||
4443 | #endif | 4415 | #endif |
4444 | 4416 | ||
4445 | /** | 4417 | /** |
4446 | * add_active_range - Register a range of PFNs backed by physical memory | ||
4447 | * @nid: The node ID the range resides on | ||
4448 | * @start_pfn: The start PFN of the available physical memory | ||
4449 | * @end_pfn: The end PFN of the available physical memory | ||
4450 | * | ||
4451 | * These ranges are stored in an early_node_map[] and later used by | ||
4452 | * free_area_init_nodes() to calculate zone sizes and holes. If the | ||
4453 | * range spans a memory hole, it is up to the architecture to ensure | ||
4454 | * the memory is not freed by the bootmem allocator. If possible | ||
4455 | * the range being registered will be merged with existing ranges. | ||
4456 | */ | ||
4457 | void __init add_active_range(unsigned int nid, unsigned long start_pfn, | ||
4458 | unsigned long end_pfn) | ||
4459 | { | ||
4460 | int i; | ||
4461 | |||
4462 | mminit_dprintk(MMINIT_TRACE, "memory_register", | ||
4463 | "Entering add_active_range(%d, %#lx, %#lx) " | ||
4464 | "%d entries of %d used\n", | ||
4465 | nid, start_pfn, end_pfn, | ||
4466 | nr_nodemap_entries, MAX_ACTIVE_REGIONS); | ||
4467 | |||
4468 | mminit_validate_memmodel_limits(&start_pfn, &end_pfn); | ||
4469 | |||
4470 | /* Merge with existing active regions if possible */ | ||
4471 | for (i = 0; i < nr_nodemap_entries; i++) { | ||
4472 | if (early_node_map[i].nid != nid) | ||
4473 | continue; | ||
4474 | |||
4475 | /* Skip if an existing region covers this new one */ | ||
4476 | if (start_pfn >= early_node_map[i].start_pfn && | ||
4477 | end_pfn <= early_node_map[i].end_pfn) | ||
4478 | return; | ||
4479 | |||
4480 | /* Merge forward if suitable */ | ||
4481 | if (start_pfn <= early_node_map[i].end_pfn && | ||
4482 | end_pfn > early_node_map[i].end_pfn) { | ||
4483 | early_node_map[i].end_pfn = end_pfn; | ||
4484 | return; | ||
4485 | } | ||
4486 | |||
4487 | /* Merge backward if suitable */ | ||
4488 | if (start_pfn < early_node_map[i].start_pfn && | ||
4489 | end_pfn >= early_node_map[i].start_pfn) { | ||
4490 | early_node_map[i].start_pfn = start_pfn; | ||
4491 | return; | ||
4492 | } | ||
4493 | } | ||
4494 | |||
4495 | /* Check that early_node_map is large enough */ | ||
4496 | if (i >= MAX_ACTIVE_REGIONS) { | ||
4497 | printk(KERN_CRIT "More than %d memory regions, truncating\n", | ||
4498 | MAX_ACTIVE_REGIONS); | ||
4499 | return; | ||
4500 | } | ||
4501 | |||
4502 | early_node_map[i].nid = nid; | ||
4503 | early_node_map[i].start_pfn = start_pfn; | ||
4504 | early_node_map[i].end_pfn = end_pfn; | ||
4505 | nr_nodemap_entries = i + 1; | ||
4506 | } | ||
4507 | |||
4508 | /** | ||
4509 | * remove_active_range - Shrink an existing registered range of PFNs | ||
4510 | * @nid: The node id the range is on that should be shrunk | ||
4511 | * @start_pfn: The new PFN of the range | ||
4512 | * @end_pfn: The new PFN of the range | ||
4513 | * | ||
4514 | * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node. | ||
4515 | * The map is kept near the end physical page range that has already been | ||
4516 | * registered. This function allows an arch to shrink an existing registered | ||
4517 | * range. | ||
4518 | */ | ||
4519 | void __init remove_active_range(unsigned int nid, unsigned long start_pfn, | ||
4520 | unsigned long end_pfn) | ||
4521 | { | ||
4522 | int i, j; | ||
4523 | int removed = 0; | ||
4524 | |||
4525 | printk(KERN_DEBUG "remove_active_range (%d, %lu, %lu)\n", | ||
4526 | nid, start_pfn, end_pfn); | ||
4527 | |||
4528 | /* Find the old active region end and shrink */ | ||
4529 | for_each_active_range_index_in_nid(i, nid) { | ||
4530 | if (early_node_map[i].start_pfn >= start_pfn && | ||
4531 | early_node_map[i].end_pfn <= end_pfn) { | ||
4532 | /* clear it */ | ||
4533 | early_node_map[i].start_pfn = 0; | ||
4534 | early_node_map[i].end_pfn = 0; | ||
4535 | removed = 1; | ||
4536 | continue; | ||
4537 | } | ||
4538 | if (early_node_map[i].start_pfn < start_pfn && | ||
4539 | early_node_map[i].end_pfn > start_pfn) { | ||
4540 | unsigned long temp_end_pfn = early_node_map[i].end_pfn; | ||
4541 | early_node_map[i].end_pfn = start_pfn; | ||
4542 | if (temp_end_pfn > end_pfn) | ||
4543 | add_active_range(nid, end_pfn, temp_end_pfn); | ||
4544 | continue; | ||
4545 | } | ||
4546 | if (early_node_map[i].start_pfn >= start_pfn && | ||
4547 | early_node_map[i].end_pfn > end_pfn && | ||
4548 | early_node_map[i].start_pfn < end_pfn) { | ||
4549 | early_node_map[i].start_pfn = end_pfn; | ||
4550 | continue; | ||
4551 | } | ||
4552 | } | ||
4553 | |||
4554 | if (!removed) | ||
4555 | return; | ||
4556 | |||
4557 | /* remove the blank ones */ | ||
4558 | for (i = nr_nodemap_entries - 1; i > 0; i--) { | ||
4559 | if (early_node_map[i].nid != nid) | ||
4560 | continue; | ||
4561 | if (early_node_map[i].end_pfn) | ||
4562 | continue; | ||
4563 | /* we found it, get rid of it */ | ||
4564 | for (j = i; j < nr_nodemap_entries - 1; j++) | ||
4565 | memcpy(&early_node_map[j], &early_node_map[j+1], | ||
4566 | sizeof(early_node_map[j])); | ||
4567 | j = nr_nodemap_entries - 1; | ||
4568 | memset(&early_node_map[j], 0, sizeof(early_node_map[j])); | ||
4569 | nr_nodemap_entries--; | ||
4570 | } | ||
4571 | } | ||
4572 | |||
4573 | /** | ||
4574 | * remove_all_active_ranges - Remove all currently registered regions | ||
4575 | * | ||
4576 | * During discovery, it may be found that a table like SRAT is invalid | ||
4577 | * and an alternative discovery method must be used. This function removes | ||
4578 | * all currently registered regions. | ||
4579 | */ | ||
4580 | void __init remove_all_active_ranges(void) | ||
4581 | { | ||
4582 | memset(early_node_map, 0, sizeof(early_node_map)); | ||
4583 | nr_nodemap_entries = 0; | ||
4584 | } | ||
4585 | |||
4586 | /* Compare two active node_active_regions */ | ||
4587 | static int __init cmp_node_active_region(const void *a, const void *b) | ||
4588 | { | ||
4589 | struct node_active_region *arange = (struct node_active_region *)a; | ||
4590 | struct node_active_region *brange = (struct node_active_region *)b; | ||
4591 | |||
4592 | /* Done this way to avoid overflows */ | ||
4593 | if (arange->start_pfn > brange->start_pfn) | ||
4594 | return 1; | ||
4595 | if (arange->start_pfn < brange->start_pfn) | ||
4596 | return -1; | ||
4597 | |||
4598 | return 0; | ||
4599 | } | ||
4600 | |||
4601 | /* sort the node_map by start_pfn */ | ||
4602 | void __init sort_node_map(void) | ||
4603 | { | ||
4604 | sort(early_node_map, (size_t)nr_nodemap_entries, | ||
4605 | sizeof(struct node_active_region), | ||
4606 | cmp_node_active_region, NULL); | ||
4607 | } | ||
4608 | |||
4609 | /** | ||
4610 | * node_map_pfn_alignment - determine the maximum internode alignment | 4418 | * node_map_pfn_alignment - determine the maximum internode alignment |
4611 | * | 4419 | * |
4612 | * This function should be called after node map is populated and sorted. | 4420 | * This function should be called after node map is populated and sorted. |
@@ -4628,15 +4436,11 @@ void __init sort_node_map(void) | |||
4628 | unsigned long __init node_map_pfn_alignment(void) | 4436 | unsigned long __init node_map_pfn_alignment(void) |
4629 | { | 4437 | { |
4630 | unsigned long accl_mask = 0, last_end = 0; | 4438 | unsigned long accl_mask = 0, last_end = 0; |
4439 | unsigned long start, end, mask; | ||
4631 | int last_nid = -1; | 4440 | int last_nid = -1; |
4632 | int i; | 4441 | int i, nid; |
4633 | |||
4634 | for_each_active_range_index_in_nid(i, MAX_NUMNODES) { | ||
4635 | int nid = early_node_map[i].nid; | ||
4636 | unsigned long start = early_node_map[i].start_pfn; | ||
4637 | unsigned long end = early_node_map[i].end_pfn; | ||
4638 | unsigned long mask; | ||
4639 | 4442 | ||
4443 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid) { | ||
4640 | if (!start || last_nid < 0 || last_nid == nid) { | 4444 | if (!start || last_nid < 0 || last_nid == nid) { |
4641 | last_nid = nid; | 4445 | last_nid = nid; |
4642 | last_end = end; | 4446 | last_end = end; |
@@ -4663,12 +4467,12 @@ unsigned long __init node_map_pfn_alignment(void) | |||
4663 | /* Find the lowest pfn for a node */ | 4467 | /* Find the lowest pfn for a node */ |
4664 | static unsigned long __init find_min_pfn_for_node(int nid) | 4468 | static unsigned long __init find_min_pfn_for_node(int nid) |
4665 | { | 4469 | { |
4666 | int i; | ||
4667 | unsigned long min_pfn = ULONG_MAX; | 4470 | unsigned long min_pfn = ULONG_MAX; |
4471 | unsigned long start_pfn; | ||
4472 | int i; | ||
4668 | 4473 | ||
4669 | /* Assuming a sorted map, the first range found has the starting pfn */ | 4474 | for_each_mem_pfn_range(i, nid, &start_pfn, NULL, NULL) |
4670 | for_each_active_range_index_in_nid(i, nid) | 4475 | min_pfn = min(min_pfn, start_pfn); |
4671 | min_pfn = min(min_pfn, early_node_map[i].start_pfn); | ||
4672 | 4476 | ||
4673 | if (min_pfn == ULONG_MAX) { | 4477 | if (min_pfn == ULONG_MAX) { |
4674 | printk(KERN_WARNING | 4478 | printk(KERN_WARNING |
@@ -4697,15 +4501,16 @@ unsigned long __init find_min_pfn_with_active_regions(void) | |||
4697 | */ | 4501 | */ |
4698 | static unsigned long __init early_calculate_totalpages(void) | 4502 | static unsigned long __init early_calculate_totalpages(void) |
4699 | { | 4503 | { |
4700 | int i; | ||
4701 | unsigned long totalpages = 0; | 4504 | unsigned long totalpages = 0; |
4505 | unsigned long start_pfn, end_pfn; | ||
4506 | int i, nid; | ||
4507 | |||
4508 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { | ||
4509 | unsigned long pages = end_pfn - start_pfn; | ||
4702 | 4510 | ||
4703 | for (i = 0; i < nr_nodemap_entries; i++) { | ||
4704 | unsigned long pages = early_node_map[i].end_pfn - | ||
4705 | early_node_map[i].start_pfn; | ||
4706 | totalpages += pages; | 4511 | totalpages += pages; |
4707 | if (pages) | 4512 | if (pages) |
4708 | node_set_state(early_node_map[i].nid, N_HIGH_MEMORY); | 4513 | node_set_state(nid, N_HIGH_MEMORY); |
4709 | } | 4514 | } |
4710 | return totalpages; | 4515 | return totalpages; |
4711 | } | 4516 | } |
@@ -4760,6 +4565,8 @@ restart: | |||
4760 | /* Spread kernelcore memory as evenly as possible throughout nodes */ | 4565 | /* Spread kernelcore memory as evenly as possible throughout nodes */ |
4761 | kernelcore_node = required_kernelcore / usable_nodes; | 4566 | kernelcore_node = required_kernelcore / usable_nodes; |
4762 | for_each_node_state(nid, N_HIGH_MEMORY) { | 4567 | for_each_node_state(nid, N_HIGH_MEMORY) { |
4568 | unsigned long start_pfn, end_pfn; | ||
4569 | |||
4763 | /* | 4570 | /* |
4764 | * Recalculate kernelcore_node if the division per node | 4571 | * Recalculate kernelcore_node if the division per node |
4765 | * now exceeds what is necessary to satisfy the requested | 4572 | * now exceeds what is necessary to satisfy the requested |
@@ -4776,13 +4583,10 @@ restart: | |||
4776 | kernelcore_remaining = kernelcore_node; | 4583 | kernelcore_remaining = kernelcore_node; |
4777 | 4584 | ||
4778 | /* Go through each range of PFNs within this node */ | 4585 | /* Go through each range of PFNs within this node */ |
4779 | for_each_active_range_index_in_nid(i, nid) { | 4586 | for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { |
4780 | unsigned long start_pfn, end_pfn; | ||
4781 | unsigned long size_pages; | 4587 | unsigned long size_pages; |
4782 | 4588 | ||
4783 | start_pfn = max(early_node_map[i].start_pfn, | 4589 | start_pfn = max(start_pfn, zone_movable_pfn[nid]); |
4784 | zone_movable_pfn[nid]); | ||
4785 | end_pfn = early_node_map[i].end_pfn; | ||
4786 | if (start_pfn >= end_pfn) | 4590 | if (start_pfn >= end_pfn) |
4787 | continue; | 4591 | continue; |
4788 | 4592 | ||
@@ -4863,8 +4667,10 @@ static void check_for_regular_memory(pg_data_t *pgdat) | |||
4863 | 4667 | ||
4864 | for (zone_type = 0; zone_type <= ZONE_NORMAL; zone_type++) { | 4668 | for (zone_type = 0; zone_type <= ZONE_NORMAL; zone_type++) { |
4865 | struct zone *zone = &pgdat->node_zones[zone_type]; | 4669 | struct zone *zone = &pgdat->node_zones[zone_type]; |
4866 | if (zone->present_pages) | 4670 | if (zone->present_pages) { |
4867 | node_set_state(zone_to_nid(zone), N_NORMAL_MEMORY); | 4671 | node_set_state(zone_to_nid(zone), N_NORMAL_MEMORY); |
4672 | break; | ||
4673 | } | ||
4868 | } | 4674 | } |
4869 | #endif | 4675 | #endif |
4870 | } | 4676 | } |
@@ -4884,11 +4690,8 @@ static void check_for_regular_memory(pg_data_t *pgdat) | |||
4884 | */ | 4690 | */ |
4885 | void __init free_area_init_nodes(unsigned long *max_zone_pfn) | 4691 | void __init free_area_init_nodes(unsigned long *max_zone_pfn) |
4886 | { | 4692 | { |
4887 | unsigned long nid; | 4693 | unsigned long start_pfn, end_pfn; |
4888 | int i; | 4694 | int i, nid; |
4889 | |||
4890 | /* Sort early_node_map as initialisation assumes it is sorted */ | ||
4891 | sort_node_map(); | ||
4892 | 4695 | ||
4893 | /* Record where the zone boundaries are */ | 4696 | /* Record where the zone boundaries are */ |
4894 | memset(arch_zone_lowest_possible_pfn, 0, | 4697 | memset(arch_zone_lowest_possible_pfn, 0, |
@@ -4935,11 +4738,9 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) | |||
4935 | } | 4738 | } |
4936 | 4739 | ||
4937 | /* Print out the early_node_map[] */ | 4740 | /* Print out the early_node_map[] */ |
4938 | printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries); | 4741 | printk("Early memory PFN ranges\n"); |
4939 | for (i = 0; i < nr_nodemap_entries; i++) | 4742 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) |
4940 | printk(" %3d: %0#10lx -> %0#10lx\n", early_node_map[i].nid, | 4743 | printk(" %3d: %0#10lx -> %0#10lx\n", nid, start_pfn, end_pfn); |
4941 | early_node_map[i].start_pfn, | ||
4942 | early_node_map[i].end_pfn); | ||
4943 | 4744 | ||
4944 | /* Initialise every node */ | 4745 | /* Initialise every node */ |
4945 | mminit_verify_pageflags_layout(); | 4746 | mminit_verify_pageflags_layout(); |
@@ -4992,7 +4793,7 @@ static int __init cmdline_parse_movablecore(char *p) | |||
4992 | early_param("kernelcore", cmdline_parse_kernelcore); | 4793 | early_param("kernelcore", cmdline_parse_kernelcore); |
4993 | early_param("movablecore", cmdline_parse_movablecore); | 4794 | early_param("movablecore", cmdline_parse_movablecore); |
4994 | 4795 | ||
4995 | #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ | 4796 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ |
4996 | 4797 | ||
4997 | /** | 4798 | /** |
4998 | * set_dma_reserve - set the specified number of pages reserved in the first zone | 4799 | * set_dma_reserve - set the specified number of pages reserved in the first zone |
@@ -5076,8 +4877,19 @@ static void calculate_totalreserve_pages(void) | |||
5076 | if (max > zone->present_pages) | 4877 | if (max > zone->present_pages) |
5077 | max = zone->present_pages; | 4878 | max = zone->present_pages; |
5078 | reserve_pages += max; | 4879 | reserve_pages += max; |
4880 | /* | ||
4881 | * Lowmem reserves are not available to | ||
4882 | * GFP_HIGHUSER page cache allocations and | ||
4883 | * kswapd tries to balance zones to their high | ||
4884 | * watermark. As a result, neither should be | ||
4885 | * regarded as dirtyable memory, to prevent a | ||
4886 | * situation where reclaim has to clean pages | ||
4887 | * in order to balance the zones. | ||
4888 | */ | ||
4889 | zone->dirty_balance_reserve = max; | ||
5079 | } | 4890 | } |
5080 | } | 4891 | } |
4892 | dirty_balance_reserve = reserve_pages; | ||
5081 | totalreserve_pages = reserve_pages; | 4893 | totalreserve_pages = reserve_pages; |
5082 | } | 4894 | } |
5083 | 4895 | ||
@@ -5601,7 +5413,25 @@ __count_immobile_pages(struct zone *zone, struct page *page, int count) | |||
5601 | 5413 | ||
5602 | bool is_pageblock_removable_nolock(struct page *page) | 5414 | bool is_pageblock_removable_nolock(struct page *page) |
5603 | { | 5415 | { |
5604 | struct zone *zone = page_zone(page); | 5416 | struct zone *zone; |
5417 | unsigned long pfn; | ||
5418 | |||
5419 | /* | ||
5420 | * We have to be careful here because we are iterating over memory | ||
5421 | * sections which are not zone aware so we might end up outside of | ||
5422 | * the zone but still within the section. | ||
5423 | * We have to take care about the node as well. If the node is offline | ||
5424 | * its NODE_DATA will be NULL - see page_zone. | ||
5425 | */ | ||
5426 | if (!node_online(page_to_nid(page))) | ||
5427 | return false; | ||
5428 | |||
5429 | zone = page_zone(page); | ||
5430 | pfn = page_to_pfn(page); | ||
5431 | if (zone->zone_start_pfn > pfn || | ||
5432 | zone->zone_start_pfn + zone->spanned_pages <= pfn) | ||
5433 | return false; | ||
5434 | |||
5605 | return __count_immobile_pages(zone, page, 0); | 5435 | return __count_immobile_pages(zone, page, 0); |
5606 | } | 5436 | } |
5607 | 5437 | ||