diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 974 |
1 files changed, 822 insertions, 152 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3b5358a0561f..4f59d90b81e6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -37,6 +37,8 @@ | |||
37 | #include <linux/vmalloc.h> | 37 | #include <linux/vmalloc.h> |
38 | #include <linux/mempolicy.h> | 38 | #include <linux/mempolicy.h> |
39 | #include <linux/stop_machine.h> | 39 | #include <linux/stop_machine.h> |
40 | #include <linux/sort.h> | ||
41 | #include <linux/pfn.h> | ||
40 | 42 | ||
41 | #include <asm/tlbflush.h> | 43 | #include <asm/tlbflush.h> |
42 | #include <asm/div64.h> | 44 | #include <asm/div64.h> |
@@ -51,7 +53,6 @@ EXPORT_SYMBOL(node_online_map); | |||
51 | nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL; | 53 | nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL; |
52 | EXPORT_SYMBOL(node_possible_map); | 54 | EXPORT_SYMBOL(node_possible_map); |
53 | unsigned long totalram_pages __read_mostly; | 55 | unsigned long totalram_pages __read_mostly; |
54 | unsigned long totalhigh_pages __read_mostly; | ||
55 | unsigned long totalreserve_pages __read_mostly; | 56 | unsigned long totalreserve_pages __read_mostly; |
56 | long nr_swap_pages; | 57 | long nr_swap_pages; |
57 | int percpu_pagelist_fraction; | 58 | int percpu_pagelist_fraction; |
@@ -69,7 +70,15 @@ static void __free_pages_ok(struct page *page, unsigned int order); | |||
69 | * TBD: should special case ZONE_DMA32 machines here - in those we normally | 70 | * TBD: should special case ZONE_DMA32 machines here - in those we normally |
70 | * don't need any ZONE_NORMAL reservation | 71 | * don't need any ZONE_NORMAL reservation |
71 | */ | 72 | */ |
72 | int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 256, 32 }; | 73 | int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { |
74 | 256, | ||
75 | #ifdef CONFIG_ZONE_DMA32 | ||
76 | 256, | ||
77 | #endif | ||
78 | #ifdef CONFIG_HIGHMEM | ||
79 | 32 | ||
80 | #endif | ||
81 | }; | ||
73 | 82 | ||
74 | EXPORT_SYMBOL(totalram_pages); | 83 | EXPORT_SYMBOL(totalram_pages); |
75 | 84 | ||
@@ -80,11 +89,53 @@ EXPORT_SYMBOL(totalram_pages); | |||
80 | struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly; | 89 | struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly; |
81 | EXPORT_SYMBOL(zone_table); | 90 | EXPORT_SYMBOL(zone_table); |
82 | 91 | ||
83 | static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" }; | 92 | static char *zone_names[MAX_NR_ZONES] = { |
93 | "DMA", | ||
94 | #ifdef CONFIG_ZONE_DMA32 | ||
95 | "DMA32", | ||
96 | #endif | ||
97 | "Normal", | ||
98 | #ifdef CONFIG_HIGHMEM | ||
99 | "HighMem" | ||
100 | #endif | ||
101 | }; | ||
102 | |||
84 | int min_free_kbytes = 1024; | 103 | int min_free_kbytes = 1024; |
85 | 104 | ||
86 | unsigned long __meminitdata nr_kernel_pages; | 105 | unsigned long __meminitdata nr_kernel_pages; |
87 | unsigned long __meminitdata nr_all_pages; | 106 | unsigned long __meminitdata nr_all_pages; |
107 | static unsigned long __initdata dma_reserve; | ||
108 | |||
109 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP | ||
110 | /* | ||
111 | * MAX_ACTIVE_REGIONS determines the maxmimum number of distinct | ||
112 | * ranges of memory (RAM) that may be registered with add_active_range(). | ||
113 | * Ranges passed to add_active_range() will be merged if possible | ||
114 | * so the number of times add_active_range() can be called is | ||
115 | * related to the number of nodes and the number of holes | ||
116 | */ | ||
117 | #ifdef CONFIG_MAX_ACTIVE_REGIONS | ||
118 | /* Allow an architecture to set MAX_ACTIVE_REGIONS to save memory */ | ||
119 | #define MAX_ACTIVE_REGIONS CONFIG_MAX_ACTIVE_REGIONS | ||
120 | #else | ||
121 | #if MAX_NUMNODES >= 32 | ||
122 | /* If there can be many nodes, allow up to 50 holes per node */ | ||
123 | #define MAX_ACTIVE_REGIONS (MAX_NUMNODES*50) | ||
124 | #else | ||
125 | /* By default, allow up to 256 distinct regions */ | ||
126 | #define MAX_ACTIVE_REGIONS 256 | ||
127 | #endif | ||
128 | #endif | ||
129 | |||
130 | struct node_active_region __initdata early_node_map[MAX_ACTIVE_REGIONS]; | ||
131 | int __initdata nr_nodemap_entries; | ||
132 | unsigned long __initdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; | ||
133 | unsigned long __initdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; | ||
134 | #ifdef CONFIG_MEMORY_HOTPLUG_RESERVE | ||
135 | unsigned long __initdata node_boundary_start_pfn[MAX_NUMNODES]; | ||
136 | unsigned long __initdata node_boundary_end_pfn[MAX_NUMNODES]; | ||
137 | #endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */ | ||
138 | #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ | ||
88 | 139 | ||
89 | #ifdef CONFIG_DEBUG_VM | 140 | #ifdef CONFIG_DEBUG_VM |
90 | static int page_outside_zone_boundaries(struct zone *zone, struct page *page) | 141 | static int page_outside_zone_boundaries(struct zone *zone, struct page *page) |
@@ -127,7 +178,6 @@ static int bad_range(struct zone *zone, struct page *page) | |||
127 | 178 | ||
128 | return 0; | 179 | return 0; |
129 | } | 180 | } |
130 | |||
131 | #else | 181 | #else |
132 | static inline int bad_range(struct zone *zone, struct page *page) | 182 | static inline int bad_range(struct zone *zone, struct page *page) |
133 | { | 183 | { |
@@ -218,12 +268,12 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) | |||
218 | { | 268 | { |
219 | int i; | 269 | int i; |
220 | 270 | ||
221 | BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM); | 271 | VM_BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM); |
222 | /* | 272 | /* |
223 | * clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO | 273 | * clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO |
224 | * and __GFP_HIGHMEM from hard or soft interrupt context. | 274 | * and __GFP_HIGHMEM from hard or soft interrupt context. |
225 | */ | 275 | */ |
226 | BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt()); | 276 | VM_BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt()); |
227 | for (i = 0; i < (1 << order); i++) | 277 | for (i = 0; i < (1 << order); i++) |
228 | clear_highpage(page + i); | 278 | clear_highpage(page + i); |
229 | } | 279 | } |
@@ -347,8 +397,8 @@ static inline void __free_one_page(struct page *page, | |||
347 | 397 | ||
348 | page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); | 398 | page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); |
349 | 399 | ||
350 | BUG_ON(page_idx & (order_size - 1)); | 400 | VM_BUG_ON(page_idx & (order_size - 1)); |
351 | BUG_ON(bad_range(zone, page)); | 401 | VM_BUG_ON(bad_range(zone, page)); |
352 | 402 | ||
353 | zone->free_pages += order_size; | 403 | zone->free_pages += order_size; |
354 | while (order < MAX_ORDER-1) { | 404 | while (order < MAX_ORDER-1) { |
@@ -421,7 +471,7 @@ static void free_pages_bulk(struct zone *zone, int count, | |||
421 | while (count--) { | 471 | while (count--) { |
422 | struct page *page; | 472 | struct page *page; |
423 | 473 | ||
424 | BUG_ON(list_empty(list)); | 474 | VM_BUG_ON(list_empty(list)); |
425 | page = list_entry(list->prev, struct page, lru); | 475 | page = list_entry(list->prev, struct page, lru); |
426 | /* have to delete it as __free_one_page list manipulates */ | 476 | /* have to delete it as __free_one_page list manipulates */ |
427 | list_del(&page->lru); | 477 | list_del(&page->lru); |
@@ -432,9 +482,11 @@ static void free_pages_bulk(struct zone *zone, int count, | |||
432 | 482 | ||
433 | static void free_one_page(struct zone *zone, struct page *page, int order) | 483 | static void free_one_page(struct zone *zone, struct page *page, int order) |
434 | { | 484 | { |
435 | LIST_HEAD(list); | 485 | spin_lock(&zone->lock); |
436 | list_add(&page->lru, &list); | 486 | zone->all_unreclaimable = 0; |
437 | free_pages_bulk(zone, 1, &list, order); | 487 | zone->pages_scanned = 0; |
488 | __free_one_page(page, zone ,order); | ||
489 | spin_unlock(&zone->lock); | ||
438 | } | 490 | } |
439 | 491 | ||
440 | static void __free_pages_ok(struct page *page, unsigned int order) | 492 | static void __free_pages_ok(struct page *page, unsigned int order) |
@@ -512,7 +564,7 @@ static inline void expand(struct zone *zone, struct page *page, | |||
512 | area--; | 564 | area--; |
513 | high--; | 565 | high--; |
514 | size >>= 1; | 566 | size >>= 1; |
515 | BUG_ON(bad_range(zone, &page[size])); | 567 | VM_BUG_ON(bad_range(zone, &page[size])); |
516 | list_add(&page[size].lru, &area->free_list); | 568 | list_add(&page[size].lru, &area->free_list); |
517 | area->nr_free++; | 569 | area->nr_free++; |
518 | set_page_order(&page[size], high); | 570 | set_page_order(&page[size], high); |
@@ -615,19 +667,23 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
615 | #ifdef CONFIG_NUMA | 667 | #ifdef CONFIG_NUMA |
616 | /* | 668 | /* |
617 | * Called from the slab reaper to drain pagesets on a particular node that | 669 | * Called from the slab reaper to drain pagesets on a particular node that |
618 | * belong to the currently executing processor. | 670 | * belongs to the currently executing processor. |
619 | * Note that this function must be called with the thread pinned to | 671 | * Note that this function must be called with the thread pinned to |
620 | * a single processor. | 672 | * a single processor. |
621 | */ | 673 | */ |
622 | void drain_node_pages(int nodeid) | 674 | void drain_node_pages(int nodeid) |
623 | { | 675 | { |
624 | int i, z; | 676 | int i; |
677 | enum zone_type z; | ||
625 | unsigned long flags; | 678 | unsigned long flags; |
626 | 679 | ||
627 | for (z = 0; z < MAX_NR_ZONES; z++) { | 680 | for (z = 0; z < MAX_NR_ZONES; z++) { |
628 | struct zone *zone = NODE_DATA(nodeid)->node_zones + z; | 681 | struct zone *zone = NODE_DATA(nodeid)->node_zones + z; |
629 | struct per_cpu_pageset *pset; | 682 | struct per_cpu_pageset *pset; |
630 | 683 | ||
684 | if (!populated_zone(zone)) | ||
685 | continue; | ||
686 | |||
631 | pset = zone_pcp(zone, smp_processor_id()); | 687 | pset = zone_pcp(zone, smp_processor_id()); |
632 | for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { | 688 | for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { |
633 | struct per_cpu_pages *pcp; | 689 | struct per_cpu_pages *pcp; |
@@ -672,7 +728,8 @@ static void __drain_pages(unsigned int cpu) | |||
672 | 728 | ||
673 | void mark_free_pages(struct zone *zone) | 729 | void mark_free_pages(struct zone *zone) |
674 | { | 730 | { |
675 | unsigned long zone_pfn, flags; | 731 | unsigned long pfn, max_zone_pfn; |
732 | unsigned long flags; | ||
676 | int order; | 733 | int order; |
677 | struct list_head *curr; | 734 | struct list_head *curr; |
678 | 735 | ||
@@ -680,18 +737,25 @@ void mark_free_pages(struct zone *zone) | |||
680 | return; | 737 | return; |
681 | 738 | ||
682 | spin_lock_irqsave(&zone->lock, flags); | 739 | spin_lock_irqsave(&zone->lock, flags); |
683 | for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) | 740 | |
684 | ClearPageNosaveFree(pfn_to_page(zone_pfn + zone->zone_start_pfn)); | 741 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; |
742 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) | ||
743 | if (pfn_valid(pfn)) { | ||
744 | struct page *page = pfn_to_page(pfn); | ||
745 | |||
746 | if (!PageNosave(page)) | ||
747 | ClearPageNosaveFree(page); | ||
748 | } | ||
685 | 749 | ||
686 | for (order = MAX_ORDER - 1; order >= 0; --order) | 750 | for (order = MAX_ORDER - 1; order >= 0; --order) |
687 | list_for_each(curr, &zone->free_area[order].free_list) { | 751 | list_for_each(curr, &zone->free_area[order].free_list) { |
688 | unsigned long start_pfn, i; | 752 | unsigned long i; |
689 | 753 | ||
690 | start_pfn = page_to_pfn(list_entry(curr, struct page, lru)); | 754 | pfn = page_to_pfn(list_entry(curr, struct page, lru)); |
755 | for (i = 0; i < (1UL << order); i++) | ||
756 | SetPageNosaveFree(pfn_to_page(pfn + i)); | ||
757 | } | ||
691 | 758 | ||
692 | for (i=0; i < (1<<order); i++) | ||
693 | SetPageNosaveFree(pfn_to_page(start_pfn+i)); | ||
694 | } | ||
695 | spin_unlock_irqrestore(&zone->lock, flags); | 759 | spin_unlock_irqrestore(&zone->lock, flags); |
696 | } | 760 | } |
697 | 761 | ||
@@ -761,8 +825,8 @@ void split_page(struct page *page, unsigned int order) | |||
761 | { | 825 | { |
762 | int i; | 826 | int i; |
763 | 827 | ||
764 | BUG_ON(PageCompound(page)); | 828 | VM_BUG_ON(PageCompound(page)); |
765 | BUG_ON(!page_count(page)); | 829 | VM_BUG_ON(!page_count(page)); |
766 | for (i = 1; i < (1 << order); i++) | 830 | for (i = 1; i < (1 << order); i++) |
767 | set_page_refcounted(page + i); | 831 | set_page_refcounted(page + i); |
768 | } | 832 | } |
@@ -809,7 +873,7 @@ again: | |||
809 | local_irq_restore(flags); | 873 | local_irq_restore(flags); |
810 | put_cpu(); | 874 | put_cpu(); |
811 | 875 | ||
812 | BUG_ON(bad_range(zone, page)); | 876 | VM_BUG_ON(bad_range(zone, page)); |
813 | if (prep_new_page(page, order, gfp_flags)) | 877 | if (prep_new_page(page, order, gfp_flags)) |
814 | goto again; | 878 | goto again; |
815 | return page; | 879 | return page; |
@@ -870,32 +934,37 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, | |||
870 | struct zone **z = zonelist->zones; | 934 | struct zone **z = zonelist->zones; |
871 | struct page *page = NULL; | 935 | struct page *page = NULL; |
872 | int classzone_idx = zone_idx(*z); | 936 | int classzone_idx = zone_idx(*z); |
937 | struct zone *zone; | ||
873 | 938 | ||
874 | /* | 939 | /* |
875 | * Go through the zonelist once, looking for a zone with enough free. | 940 | * Go through the zonelist once, looking for a zone with enough free. |
876 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. | 941 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. |
877 | */ | 942 | */ |
878 | do { | 943 | do { |
944 | zone = *z; | ||
945 | if (unlikely(NUMA_BUILD && (gfp_mask & __GFP_THISNODE) && | ||
946 | zone->zone_pgdat != zonelist->zones[0]->zone_pgdat)) | ||
947 | break; | ||
879 | if ((alloc_flags & ALLOC_CPUSET) && | 948 | if ((alloc_flags & ALLOC_CPUSET) && |
880 | !cpuset_zone_allowed(*z, gfp_mask)) | 949 | !cpuset_zone_allowed(zone, gfp_mask)) |
881 | continue; | 950 | continue; |
882 | 951 | ||
883 | if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { | 952 | if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { |
884 | unsigned long mark; | 953 | unsigned long mark; |
885 | if (alloc_flags & ALLOC_WMARK_MIN) | 954 | if (alloc_flags & ALLOC_WMARK_MIN) |
886 | mark = (*z)->pages_min; | 955 | mark = zone->pages_min; |
887 | else if (alloc_flags & ALLOC_WMARK_LOW) | 956 | else if (alloc_flags & ALLOC_WMARK_LOW) |
888 | mark = (*z)->pages_low; | 957 | mark = zone->pages_low; |
889 | else | 958 | else |
890 | mark = (*z)->pages_high; | 959 | mark = zone->pages_high; |
891 | if (!zone_watermark_ok(*z, order, mark, | 960 | if (!zone_watermark_ok(zone , order, mark, |
892 | classzone_idx, alloc_flags)) | 961 | classzone_idx, alloc_flags)) |
893 | if (!zone_reclaim_mode || | 962 | if (!zone_reclaim_mode || |
894 | !zone_reclaim(*z, gfp_mask, order)) | 963 | !zone_reclaim(zone, gfp_mask, order)) |
895 | continue; | 964 | continue; |
896 | } | 965 | } |
897 | 966 | ||
898 | page = buffered_rmqueue(zonelist, *z, order, gfp_mask); | 967 | page = buffered_rmqueue(zonelist, zone, order, gfp_mask); |
899 | if (page) { | 968 | if (page) { |
900 | break; | 969 | break; |
901 | } | 970 | } |
@@ -1083,7 +1152,7 @@ fastcall unsigned long get_zeroed_page(gfp_t gfp_mask) | |||
1083 | * get_zeroed_page() returns a 32-bit address, which cannot represent | 1152 | * get_zeroed_page() returns a 32-bit address, which cannot represent |
1084 | * a highmem page | 1153 | * a highmem page |
1085 | */ | 1154 | */ |
1086 | BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); | 1155 | VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); |
1087 | 1156 | ||
1088 | page = alloc_pages(gfp_mask | __GFP_ZERO, 0); | 1157 | page = alloc_pages(gfp_mask | __GFP_ZERO, 0); |
1089 | if (page) | 1158 | if (page) |
@@ -1116,7 +1185,7 @@ EXPORT_SYMBOL(__free_pages); | |||
1116 | fastcall void free_pages(unsigned long addr, unsigned int order) | 1185 | fastcall void free_pages(unsigned long addr, unsigned int order) |
1117 | { | 1186 | { |
1118 | if (addr != 0) { | 1187 | if (addr != 0) { |
1119 | BUG_ON(!virt_addr_valid((void *)addr)); | 1188 | VM_BUG_ON(!virt_addr_valid((void *)addr)); |
1120 | __free_pages(virt_to_page((void *)addr), order); | 1189 | __free_pages(virt_to_page((void *)addr), order); |
1121 | } | 1190 | } |
1122 | } | 1191 | } |
@@ -1142,7 +1211,8 @@ EXPORT_SYMBOL(nr_free_pages); | |||
1142 | #ifdef CONFIG_NUMA | 1211 | #ifdef CONFIG_NUMA |
1143 | unsigned int nr_free_pages_pgdat(pg_data_t *pgdat) | 1212 | unsigned int nr_free_pages_pgdat(pg_data_t *pgdat) |
1144 | { | 1213 | { |
1145 | unsigned int i, sum = 0; | 1214 | unsigned int sum = 0; |
1215 | enum zone_type i; | ||
1146 | 1216 | ||
1147 | for (i = 0; i < MAX_NR_ZONES; i++) | 1217 | for (i = 0; i < MAX_NR_ZONES; i++) |
1148 | sum += pgdat->node_zones[i].free_pages; | 1218 | sum += pgdat->node_zones[i].free_pages; |
@@ -1187,27 +1257,11 @@ unsigned int nr_free_pagecache_pages(void) | |||
1187 | return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER)); | 1257 | return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER)); |
1188 | } | 1258 | } |
1189 | 1259 | ||
1190 | #ifdef CONFIG_HIGHMEM | 1260 | static inline void show_node(struct zone *zone) |
1191 | unsigned int nr_free_highpages (void) | ||
1192 | { | 1261 | { |
1193 | pg_data_t *pgdat; | 1262 | if (NUMA_BUILD) |
1194 | unsigned int pages = 0; | 1263 | printk("Node %ld ", zone_to_nid(zone)); |
1195 | |||
1196 | for_each_online_pgdat(pgdat) | ||
1197 | pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages; | ||
1198 | |||
1199 | return pages; | ||
1200 | } | 1264 | } |
1201 | #endif | ||
1202 | |||
1203 | #ifdef CONFIG_NUMA | ||
1204 | static void show_node(struct zone *zone) | ||
1205 | { | ||
1206 | printk("Node %d ", zone->zone_pgdat->node_id); | ||
1207 | } | ||
1208 | #else | ||
1209 | #define show_node(zone) do { } while (0) | ||
1210 | #endif | ||
1211 | 1265 | ||
1212 | void si_meminfo(struct sysinfo *val) | 1266 | void si_meminfo(struct sysinfo *val) |
1213 | { | 1267 | { |
@@ -1215,13 +1269,8 @@ void si_meminfo(struct sysinfo *val) | |||
1215 | val->sharedram = 0; | 1269 | val->sharedram = 0; |
1216 | val->freeram = nr_free_pages(); | 1270 | val->freeram = nr_free_pages(); |
1217 | val->bufferram = nr_blockdev_pages(); | 1271 | val->bufferram = nr_blockdev_pages(); |
1218 | #ifdef CONFIG_HIGHMEM | ||
1219 | val->totalhigh = totalhigh_pages; | 1272 | val->totalhigh = totalhigh_pages; |
1220 | val->freehigh = nr_free_highpages(); | 1273 | val->freehigh = nr_free_highpages(); |
1221 | #else | ||
1222 | val->totalhigh = 0; | ||
1223 | val->freehigh = 0; | ||
1224 | #endif | ||
1225 | val->mem_unit = PAGE_SIZE; | 1274 | val->mem_unit = PAGE_SIZE; |
1226 | } | 1275 | } |
1227 | 1276 | ||
@@ -1234,8 +1283,13 @@ void si_meminfo_node(struct sysinfo *val, int nid) | |||
1234 | 1283 | ||
1235 | val->totalram = pgdat->node_present_pages; | 1284 | val->totalram = pgdat->node_present_pages; |
1236 | val->freeram = nr_free_pages_pgdat(pgdat); | 1285 | val->freeram = nr_free_pages_pgdat(pgdat); |
1286 | #ifdef CONFIG_HIGHMEM | ||
1237 | val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages; | 1287 | val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages; |
1238 | val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages; | 1288 | val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages; |
1289 | #else | ||
1290 | val->totalhigh = 0; | ||
1291 | val->freehigh = 0; | ||
1292 | #endif | ||
1239 | val->mem_unit = PAGE_SIZE; | 1293 | val->mem_unit = PAGE_SIZE; |
1240 | } | 1294 | } |
1241 | #endif | 1295 | #endif |
@@ -1249,43 +1303,35 @@ void si_meminfo_node(struct sysinfo *val, int nid) | |||
1249 | */ | 1303 | */ |
1250 | void show_free_areas(void) | 1304 | void show_free_areas(void) |
1251 | { | 1305 | { |
1252 | int cpu, temperature; | 1306 | int cpu; |
1253 | unsigned long active; | 1307 | unsigned long active; |
1254 | unsigned long inactive; | 1308 | unsigned long inactive; |
1255 | unsigned long free; | 1309 | unsigned long free; |
1256 | struct zone *zone; | 1310 | struct zone *zone; |
1257 | 1311 | ||
1258 | for_each_zone(zone) { | 1312 | for_each_zone(zone) { |
1259 | show_node(zone); | 1313 | if (!populated_zone(zone)) |
1260 | printk("%s per-cpu:", zone->name); | ||
1261 | |||
1262 | if (!populated_zone(zone)) { | ||
1263 | printk(" empty\n"); | ||
1264 | continue; | 1314 | continue; |
1265 | } else | 1315 | |
1266 | printk("\n"); | 1316 | show_node(zone); |
1317 | printk("%s per-cpu:\n", zone->name); | ||
1267 | 1318 | ||
1268 | for_each_online_cpu(cpu) { | 1319 | for_each_online_cpu(cpu) { |
1269 | struct per_cpu_pageset *pageset; | 1320 | struct per_cpu_pageset *pageset; |
1270 | 1321 | ||
1271 | pageset = zone_pcp(zone, cpu); | 1322 | pageset = zone_pcp(zone, cpu); |
1272 | 1323 | ||
1273 | for (temperature = 0; temperature < 2; temperature++) | 1324 | printk("CPU %4d: Hot: hi:%5d, btch:%4d usd:%4d " |
1274 | printk("cpu %d %s: high %d, batch %d used:%d\n", | 1325 | "Cold: hi:%5d, btch:%4d usd:%4d\n", |
1275 | cpu, | 1326 | cpu, pageset->pcp[0].high, |
1276 | temperature ? "cold" : "hot", | 1327 | pageset->pcp[0].batch, pageset->pcp[0].count, |
1277 | pageset->pcp[temperature].high, | 1328 | pageset->pcp[1].high, pageset->pcp[1].batch, |
1278 | pageset->pcp[temperature].batch, | 1329 | pageset->pcp[1].count); |
1279 | pageset->pcp[temperature].count); | ||
1280 | } | 1330 | } |
1281 | } | 1331 | } |
1282 | 1332 | ||
1283 | get_zone_counts(&active, &inactive, &free); | 1333 | get_zone_counts(&active, &inactive, &free); |
1284 | 1334 | ||
1285 | printk("Free pages: %11ukB (%ukB HighMem)\n", | ||
1286 | K(nr_free_pages()), | ||
1287 | K(nr_free_highpages())); | ||
1288 | |||
1289 | printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu " | 1335 | printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu " |
1290 | "unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n", | 1336 | "unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n", |
1291 | active, | 1337 | active, |
@@ -1294,13 +1340,17 @@ void show_free_areas(void) | |||
1294 | global_page_state(NR_WRITEBACK), | 1340 | global_page_state(NR_WRITEBACK), |
1295 | global_page_state(NR_UNSTABLE_NFS), | 1341 | global_page_state(NR_UNSTABLE_NFS), |
1296 | nr_free_pages(), | 1342 | nr_free_pages(), |
1297 | global_page_state(NR_SLAB), | 1343 | global_page_state(NR_SLAB_RECLAIMABLE) + |
1344 | global_page_state(NR_SLAB_UNRECLAIMABLE), | ||
1298 | global_page_state(NR_FILE_MAPPED), | 1345 | global_page_state(NR_FILE_MAPPED), |
1299 | global_page_state(NR_PAGETABLE)); | 1346 | global_page_state(NR_PAGETABLE)); |
1300 | 1347 | ||
1301 | for_each_zone(zone) { | 1348 | for_each_zone(zone) { |
1302 | int i; | 1349 | int i; |
1303 | 1350 | ||
1351 | if (!populated_zone(zone)) | ||
1352 | continue; | ||
1353 | |||
1304 | show_node(zone); | 1354 | show_node(zone); |
1305 | printk("%s" | 1355 | printk("%s" |
1306 | " free:%lukB" | 1356 | " free:%lukB" |
@@ -1333,12 +1383,11 @@ void show_free_areas(void) | |||
1333 | for_each_zone(zone) { | 1383 | for_each_zone(zone) { |
1334 | unsigned long nr[MAX_ORDER], flags, order, total = 0; | 1384 | unsigned long nr[MAX_ORDER], flags, order, total = 0; |
1335 | 1385 | ||
1386 | if (!populated_zone(zone)) | ||
1387 | continue; | ||
1388 | |||
1336 | show_node(zone); | 1389 | show_node(zone); |
1337 | printk("%s: ", zone->name); | 1390 | printk("%s: ", zone->name); |
1338 | if (!populated_zone(zone)) { | ||
1339 | printk("empty\n"); | ||
1340 | continue; | ||
1341 | } | ||
1342 | 1391 | ||
1343 | spin_lock_irqsave(&zone->lock, flags); | 1392 | spin_lock_irqsave(&zone->lock, flags); |
1344 | for (order = 0; order < MAX_ORDER; order++) { | 1393 | for (order = 0; order < MAX_ORDER; order++) { |
@@ -1360,39 +1409,25 @@ void show_free_areas(void) | |||
1360 | * Add all populated zones of a node to the zonelist. | 1409 | * Add all populated zones of a node to the zonelist. |
1361 | */ | 1410 | */ |
1362 | static int __meminit build_zonelists_node(pg_data_t *pgdat, | 1411 | static int __meminit build_zonelists_node(pg_data_t *pgdat, |
1363 | struct zonelist *zonelist, int nr_zones, int zone_type) | 1412 | struct zonelist *zonelist, int nr_zones, enum zone_type zone_type) |
1364 | { | 1413 | { |
1365 | struct zone *zone; | 1414 | struct zone *zone; |
1366 | 1415 | ||
1367 | BUG_ON(zone_type > ZONE_HIGHMEM); | 1416 | BUG_ON(zone_type >= MAX_NR_ZONES); |
1417 | zone_type++; | ||
1368 | 1418 | ||
1369 | do { | 1419 | do { |
1420 | zone_type--; | ||
1370 | zone = pgdat->node_zones + zone_type; | 1421 | zone = pgdat->node_zones + zone_type; |
1371 | if (populated_zone(zone)) { | 1422 | if (populated_zone(zone)) { |
1372 | #ifndef CONFIG_HIGHMEM | ||
1373 | BUG_ON(zone_type > ZONE_NORMAL); | ||
1374 | #endif | ||
1375 | zonelist->zones[nr_zones++] = zone; | 1423 | zonelist->zones[nr_zones++] = zone; |
1376 | check_highest_zone(zone_type); | 1424 | check_highest_zone(zone_type); |
1377 | } | 1425 | } |
1378 | zone_type--; | ||
1379 | 1426 | ||
1380 | } while (zone_type >= 0); | 1427 | } while (zone_type); |
1381 | return nr_zones; | 1428 | return nr_zones; |
1382 | } | 1429 | } |
1383 | 1430 | ||
1384 | static inline int highest_zone(int zone_bits) | ||
1385 | { | ||
1386 | int res = ZONE_NORMAL; | ||
1387 | if (zone_bits & (__force int)__GFP_HIGHMEM) | ||
1388 | res = ZONE_HIGHMEM; | ||
1389 | if (zone_bits & (__force int)__GFP_DMA32) | ||
1390 | res = ZONE_DMA32; | ||
1391 | if (zone_bits & (__force int)__GFP_DMA) | ||
1392 | res = ZONE_DMA; | ||
1393 | return res; | ||
1394 | } | ||
1395 | |||
1396 | #ifdef CONFIG_NUMA | 1431 | #ifdef CONFIG_NUMA |
1397 | #define MAX_NODE_LOAD (num_online_nodes()) | 1432 | #define MAX_NODE_LOAD (num_online_nodes()) |
1398 | static int __meminitdata node_load[MAX_NUMNODES]; | 1433 | static int __meminitdata node_load[MAX_NUMNODES]; |
@@ -1458,13 +1493,14 @@ static int __meminit find_next_best_node(int node, nodemask_t *used_node_mask) | |||
1458 | 1493 | ||
1459 | static void __meminit build_zonelists(pg_data_t *pgdat) | 1494 | static void __meminit build_zonelists(pg_data_t *pgdat) |
1460 | { | 1495 | { |
1461 | int i, j, k, node, local_node; | 1496 | int j, node, local_node; |
1497 | enum zone_type i; | ||
1462 | int prev_node, load; | 1498 | int prev_node, load; |
1463 | struct zonelist *zonelist; | 1499 | struct zonelist *zonelist; |
1464 | nodemask_t used_mask; | 1500 | nodemask_t used_mask; |
1465 | 1501 | ||
1466 | /* initialize zonelists */ | 1502 | /* initialize zonelists */ |
1467 | for (i = 0; i < GFP_ZONETYPES; i++) { | 1503 | for (i = 0; i < MAX_NR_ZONES; i++) { |
1468 | zonelist = pgdat->node_zonelists + i; | 1504 | zonelist = pgdat->node_zonelists + i; |
1469 | zonelist->zones[0] = NULL; | 1505 | zonelist->zones[0] = NULL; |
1470 | } | 1506 | } |
@@ -1494,13 +1530,11 @@ static void __meminit build_zonelists(pg_data_t *pgdat) | |||
1494 | node_load[node] += load; | 1530 | node_load[node] += load; |
1495 | prev_node = node; | 1531 | prev_node = node; |
1496 | load--; | 1532 | load--; |
1497 | for (i = 0; i < GFP_ZONETYPES; i++) { | 1533 | for (i = 0; i < MAX_NR_ZONES; i++) { |
1498 | zonelist = pgdat->node_zonelists + i; | 1534 | zonelist = pgdat->node_zonelists + i; |
1499 | for (j = 0; zonelist->zones[j] != NULL; j++); | 1535 | for (j = 0; zonelist->zones[j] != NULL; j++); |
1500 | 1536 | ||
1501 | k = highest_zone(i); | 1537 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, i); |
1502 | |||
1503 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); | ||
1504 | zonelist->zones[j] = NULL; | 1538 | zonelist->zones[j] = NULL; |
1505 | } | 1539 | } |
1506 | } | 1540 | } |
@@ -1510,17 +1544,16 @@ static void __meminit build_zonelists(pg_data_t *pgdat) | |||
1510 | 1544 | ||
1511 | static void __meminit build_zonelists(pg_data_t *pgdat) | 1545 | static void __meminit build_zonelists(pg_data_t *pgdat) |
1512 | { | 1546 | { |
1513 | int i, j, k, node, local_node; | 1547 | int node, local_node; |
1548 | enum zone_type i,j; | ||
1514 | 1549 | ||
1515 | local_node = pgdat->node_id; | 1550 | local_node = pgdat->node_id; |
1516 | for (i = 0; i < GFP_ZONETYPES; i++) { | 1551 | for (i = 0; i < MAX_NR_ZONES; i++) { |
1517 | struct zonelist *zonelist; | 1552 | struct zonelist *zonelist; |
1518 | 1553 | ||
1519 | zonelist = pgdat->node_zonelists + i; | 1554 | zonelist = pgdat->node_zonelists + i; |
1520 | 1555 | ||
1521 | j = 0; | 1556 | j = build_zonelists_node(pgdat, zonelist, 0, i); |
1522 | k = highest_zone(i); | ||
1523 | j = build_zonelists_node(pgdat, zonelist, j, k); | ||
1524 | /* | 1557 | /* |
1525 | * Now we build the zonelist so that it contains the zones | 1558 | * Now we build the zonelist so that it contains the zones |
1526 | * of all the other nodes. | 1559 | * of all the other nodes. |
@@ -1532,12 +1565,12 @@ static void __meminit build_zonelists(pg_data_t *pgdat) | |||
1532 | for (node = local_node + 1; node < MAX_NUMNODES; node++) { | 1565 | for (node = local_node + 1; node < MAX_NUMNODES; node++) { |
1533 | if (!node_online(node)) | 1566 | if (!node_online(node)) |
1534 | continue; | 1567 | continue; |
1535 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); | 1568 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, i); |
1536 | } | 1569 | } |
1537 | for (node = 0; node < local_node; node++) { | 1570 | for (node = 0; node < local_node; node++) { |
1538 | if (!node_online(node)) | 1571 | if (!node_online(node)) |
1539 | continue; | 1572 | continue; |
1540 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); | 1573 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, i); |
1541 | } | 1574 | } |
1542 | 1575 | ||
1543 | zonelist->zones[j] = NULL; | 1576 | zonelist->zones[j] = NULL; |
@@ -1558,7 +1591,7 @@ static int __meminit __build_all_zonelists(void *dummy) | |||
1558 | void __meminit build_all_zonelists(void) | 1591 | void __meminit build_all_zonelists(void) |
1559 | { | 1592 | { |
1560 | if (system_state == SYSTEM_BOOTING) { | 1593 | if (system_state == SYSTEM_BOOTING) { |
1561 | __build_all_zonelists(0); | 1594 | __build_all_zonelists(NULL); |
1562 | cpuset_init_current_mems_allowed(); | 1595 | cpuset_init_current_mems_allowed(); |
1563 | } else { | 1596 | } else { |
1564 | /* we have to stop all cpus to guaranntee there is no user | 1597 | /* we have to stop all cpus to guaranntee there is no user |
@@ -1639,25 +1672,6 @@ static inline unsigned long wait_table_bits(unsigned long size) | |||
1639 | 1672 | ||
1640 | #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1)) | 1673 | #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1)) |
1641 | 1674 | ||
1642 | static void __init calculate_zone_totalpages(struct pglist_data *pgdat, | ||
1643 | unsigned long *zones_size, unsigned long *zholes_size) | ||
1644 | { | ||
1645 | unsigned long realtotalpages, totalpages = 0; | ||
1646 | int i; | ||
1647 | |||
1648 | for (i = 0; i < MAX_NR_ZONES; i++) | ||
1649 | totalpages += zones_size[i]; | ||
1650 | pgdat->node_spanned_pages = totalpages; | ||
1651 | |||
1652 | realtotalpages = totalpages; | ||
1653 | if (zholes_size) | ||
1654 | for (i = 0; i < MAX_NR_ZONES; i++) | ||
1655 | realtotalpages -= zholes_size[i]; | ||
1656 | pgdat->node_present_pages = realtotalpages; | ||
1657 | printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages); | ||
1658 | } | ||
1659 | |||
1660 | |||
1661 | /* | 1675 | /* |
1662 | * Initially all pages are reserved - free ones are freed | 1676 | * Initially all pages are reserved - free ones are freed |
1663 | * up by free_all_bootmem() once the early boot process is | 1677 | * up by free_all_bootmem() once the early boot process is |
@@ -1698,8 +1712,8 @@ void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone, | |||
1698 | } | 1712 | } |
1699 | 1713 | ||
1700 | #define ZONETABLE_INDEX(x, zone_nr) ((x << ZONES_SHIFT) | zone_nr) | 1714 | #define ZONETABLE_INDEX(x, zone_nr) ((x << ZONES_SHIFT) | zone_nr) |
1701 | void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn, | 1715 | void zonetable_add(struct zone *zone, int nid, enum zone_type zid, |
1702 | unsigned long size) | 1716 | unsigned long pfn, unsigned long size) |
1703 | { | 1717 | { |
1704 | unsigned long snum = pfn_to_section_nr(pfn); | 1718 | unsigned long snum = pfn_to_section_nr(pfn); |
1705 | unsigned long end = pfn_to_section_nr(pfn + size); | 1719 | unsigned long end = pfn_to_section_nr(pfn + size); |
@@ -1815,6 +1829,9 @@ static int __cpuinit process_zones(int cpu) | |||
1815 | 1829 | ||
1816 | for_each_zone(zone) { | 1830 | for_each_zone(zone) { |
1817 | 1831 | ||
1832 | if (!populated_zone(zone)) | ||
1833 | continue; | ||
1834 | |||
1818 | zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset), | 1835 | zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset), |
1819 | GFP_KERNEL, cpu_to_node(cpu)); | 1836 | GFP_KERNEL, cpu_to_node(cpu)); |
1820 | if (!zone_pcp(zone, cpu)) | 1837 | if (!zone_pcp(zone, cpu)) |
@@ -1845,8 +1862,10 @@ static inline void free_zone_pagesets(int cpu) | |||
1845 | for_each_zone(zone) { | 1862 | for_each_zone(zone) { |
1846 | struct per_cpu_pageset *pset = zone_pcp(zone, cpu); | 1863 | struct per_cpu_pageset *pset = zone_pcp(zone, cpu); |
1847 | 1864 | ||
1865 | /* Free per_cpu_pageset if it is slab allocated */ | ||
1866 | if (pset != &boot_pageset[cpu]) | ||
1867 | kfree(pset); | ||
1848 | zone_pcp(zone, cpu) = NULL; | 1868 | zone_pcp(zone, cpu) = NULL; |
1849 | kfree(pset); | ||
1850 | } | 1869 | } |
1851 | } | 1870 | } |
1852 | 1871 | ||
@@ -1972,6 +1991,366 @@ __meminit int init_currently_empty_zone(struct zone *zone, | |||
1972 | return 0; | 1991 | return 0; |
1973 | } | 1992 | } |
1974 | 1993 | ||
1994 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP | ||
1995 | /* | ||
1996 | * Basic iterator support. Return the first range of PFNs for a node | ||
1997 | * Note: nid == MAX_NUMNODES returns first region regardless of node | ||
1998 | */ | ||
1999 | static int __init first_active_region_index_in_nid(int nid) | ||
2000 | { | ||
2001 | int i; | ||
2002 | |||
2003 | for (i = 0; i < nr_nodemap_entries; i++) | ||
2004 | if (nid == MAX_NUMNODES || early_node_map[i].nid == nid) | ||
2005 | return i; | ||
2006 | |||
2007 | return -1; | ||
2008 | } | ||
2009 | |||
2010 | /* | ||
2011 | * Basic iterator support. Return the next active range of PFNs for a node | ||
2012 | * Note: nid == MAX_NUMNODES returns next region regardles of node | ||
2013 | */ | ||
2014 | static int __init next_active_region_index_in_nid(int index, int nid) | ||
2015 | { | ||
2016 | for (index = index + 1; index < nr_nodemap_entries; index++) | ||
2017 | if (nid == MAX_NUMNODES || early_node_map[index].nid == nid) | ||
2018 | return index; | ||
2019 | |||
2020 | return -1; | ||
2021 | } | ||
2022 | |||
2023 | #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID | ||
2024 | /* | ||
2025 | * Required by SPARSEMEM. Given a PFN, return what node the PFN is on. | ||
2026 | * Architectures may implement their own version but if add_active_range() | ||
2027 | * was used and there are no special requirements, this is a convenient | ||
2028 | * alternative | ||
2029 | */ | ||
2030 | int __init early_pfn_to_nid(unsigned long pfn) | ||
2031 | { | ||
2032 | int i; | ||
2033 | |||
2034 | for (i = 0; i < nr_nodemap_entries; i++) { | ||
2035 | unsigned long start_pfn = early_node_map[i].start_pfn; | ||
2036 | unsigned long end_pfn = early_node_map[i].end_pfn; | ||
2037 | |||
2038 | if (start_pfn <= pfn && pfn < end_pfn) | ||
2039 | return early_node_map[i].nid; | ||
2040 | } | ||
2041 | |||
2042 | return 0; | ||
2043 | } | ||
2044 | #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ | ||
2045 | |||
2046 | /* Basic iterator support to walk early_node_map[] */ | ||
2047 | #define for_each_active_range_index_in_nid(i, nid) \ | ||
2048 | for (i = first_active_region_index_in_nid(nid); i != -1; \ | ||
2049 | i = next_active_region_index_in_nid(i, nid)) | ||
2050 | |||
2051 | /** | ||
2052 | * free_bootmem_with_active_regions - Call free_bootmem_node for each active range | ||
2053 | * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed | ||
2054 | * @max_low_pfn: The highest PFN that till be passed to free_bootmem_node | ||
2055 | * | ||
2056 | * If an architecture guarantees that all ranges registered with | ||
2057 | * add_active_ranges() contain no holes and may be freed, this | ||
2058 | * this function may be used instead of calling free_bootmem() manually. | ||
2059 | */ | ||
2060 | void __init free_bootmem_with_active_regions(int nid, | ||
2061 | unsigned long max_low_pfn) | ||
2062 | { | ||
2063 | int i; | ||
2064 | |||
2065 | for_each_active_range_index_in_nid(i, nid) { | ||
2066 | unsigned long size_pages = 0; | ||
2067 | unsigned long end_pfn = early_node_map[i].end_pfn; | ||
2068 | |||
2069 | if (early_node_map[i].start_pfn >= max_low_pfn) | ||
2070 | continue; | ||
2071 | |||
2072 | if (end_pfn > max_low_pfn) | ||
2073 | end_pfn = max_low_pfn; | ||
2074 | |||
2075 | size_pages = end_pfn - early_node_map[i].start_pfn; | ||
2076 | free_bootmem_node(NODE_DATA(early_node_map[i].nid), | ||
2077 | PFN_PHYS(early_node_map[i].start_pfn), | ||
2078 | size_pages << PAGE_SHIFT); | ||
2079 | } | ||
2080 | } | ||
2081 | |||
2082 | /** | ||
2083 | * sparse_memory_present_with_active_regions - Call memory_present for each active range | ||
2084 | * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used | ||
2085 | * | ||
2086 | * If an architecture guarantees that all ranges registered with | ||
2087 | * add_active_ranges() contain no holes and may be freed, this | ||
2088 | * this function may be used instead of calling memory_present() manually. | ||
2089 | */ | ||
2090 | void __init sparse_memory_present_with_active_regions(int nid) | ||
2091 | { | ||
2092 | int i; | ||
2093 | |||
2094 | for_each_active_range_index_in_nid(i, nid) | ||
2095 | memory_present(early_node_map[i].nid, | ||
2096 | early_node_map[i].start_pfn, | ||
2097 | early_node_map[i].end_pfn); | ||
2098 | } | ||
2099 | |||
2100 | /** | ||
2101 | * push_node_boundaries - Push node boundaries to at least the requested boundary | ||
2102 | * @nid: The nid of the node to push the boundary for | ||
2103 | * @start_pfn: The start pfn of the node | ||
2104 | * @end_pfn: The end pfn of the node | ||
2105 | * | ||
2106 | * In reserve-based hot-add, mem_map is allocated that is unused until hotadd | ||
2107 | * time. Specifically, on x86_64, SRAT will report ranges that can potentially | ||
2108 | * be hotplugged even though no physical memory exists. This function allows | ||
2109 | * an arch to push out the node boundaries so mem_map is allocated that can | ||
2110 | * be used later. | ||
2111 | */ | ||
2112 | #ifdef CONFIG_MEMORY_HOTPLUG_RESERVE | ||
2113 | void __init push_node_boundaries(unsigned int nid, | ||
2114 | unsigned long start_pfn, unsigned long end_pfn) | ||
2115 | { | ||
2116 | printk(KERN_DEBUG "Entering push_node_boundaries(%u, %lu, %lu)\n", | ||
2117 | nid, start_pfn, end_pfn); | ||
2118 | |||
2119 | /* Initialise the boundary for this node if necessary */ | ||
2120 | if (node_boundary_end_pfn[nid] == 0) | ||
2121 | node_boundary_start_pfn[nid] = -1UL; | ||
2122 | |||
2123 | /* Update the boundaries */ | ||
2124 | if (node_boundary_start_pfn[nid] > start_pfn) | ||
2125 | node_boundary_start_pfn[nid] = start_pfn; | ||
2126 | if (node_boundary_end_pfn[nid] < end_pfn) | ||
2127 | node_boundary_end_pfn[nid] = end_pfn; | ||
2128 | } | ||
2129 | |||
2130 | /* If necessary, push the node boundary out for reserve hotadd */ | ||
2131 | static void __init account_node_boundary(unsigned int nid, | ||
2132 | unsigned long *start_pfn, unsigned long *end_pfn) | ||
2133 | { | ||
2134 | printk(KERN_DEBUG "Entering account_node_boundary(%u, %lu, %lu)\n", | ||
2135 | nid, *start_pfn, *end_pfn); | ||
2136 | |||
2137 | /* Return if boundary information has not been provided */ | ||
2138 | if (node_boundary_end_pfn[nid] == 0) | ||
2139 | return; | ||
2140 | |||
2141 | /* Check the boundaries and update if necessary */ | ||
2142 | if (node_boundary_start_pfn[nid] < *start_pfn) | ||
2143 | *start_pfn = node_boundary_start_pfn[nid]; | ||
2144 | if (node_boundary_end_pfn[nid] > *end_pfn) | ||
2145 | *end_pfn = node_boundary_end_pfn[nid]; | ||
2146 | } | ||
2147 | #else | ||
2148 | void __init push_node_boundaries(unsigned int nid, | ||
2149 | unsigned long start_pfn, unsigned long end_pfn) {} | ||
2150 | |||
2151 | static void __init account_node_boundary(unsigned int nid, | ||
2152 | unsigned long *start_pfn, unsigned long *end_pfn) {} | ||
2153 | #endif | ||
2154 | |||
2155 | |||
2156 | /** | ||
2157 | * get_pfn_range_for_nid - Return the start and end page frames for a node | ||
2158 | * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned | ||
2159 | * @start_pfn: Passed by reference. On return, it will have the node start_pfn | ||
2160 | * @end_pfn: Passed by reference. On return, it will have the node end_pfn | ||
2161 | * | ||
2162 | * It returns the start and end page frame of a node based on information | ||
2163 | * provided by an arch calling add_active_range(). If called for a node | ||
2164 | * with no available memory, a warning is printed and the start and end | ||
2165 | * PFNs will be 0 | ||
2166 | */ | ||
2167 | void __init get_pfn_range_for_nid(unsigned int nid, | ||
2168 | unsigned long *start_pfn, unsigned long *end_pfn) | ||
2169 | { | ||
2170 | int i; | ||
2171 | *start_pfn = -1UL; | ||
2172 | *end_pfn = 0; | ||
2173 | |||
2174 | for_each_active_range_index_in_nid(i, nid) { | ||
2175 | *start_pfn = min(*start_pfn, early_node_map[i].start_pfn); | ||
2176 | *end_pfn = max(*end_pfn, early_node_map[i].end_pfn); | ||
2177 | } | ||
2178 | |||
2179 | if (*start_pfn == -1UL) { | ||
2180 | printk(KERN_WARNING "Node %u active with no memory\n", nid); | ||
2181 | *start_pfn = 0; | ||
2182 | } | ||
2183 | |||
2184 | /* Push the node boundaries out if requested */ | ||
2185 | account_node_boundary(nid, start_pfn, end_pfn); | ||
2186 | } | ||
2187 | |||
2188 | /* | ||
2189 | * Return the number of pages a zone spans in a node, including holes | ||
2190 | * present_pages = zone_spanned_pages_in_node() - zone_absent_pages_in_node() | ||
2191 | */ | ||
2192 | unsigned long __init zone_spanned_pages_in_node(int nid, | ||
2193 | unsigned long zone_type, | ||
2194 | unsigned long *ignored) | ||
2195 | { | ||
2196 | unsigned long node_start_pfn, node_end_pfn; | ||
2197 | unsigned long zone_start_pfn, zone_end_pfn; | ||
2198 | |||
2199 | /* Get the start and end of the node and zone */ | ||
2200 | get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn); | ||
2201 | zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type]; | ||
2202 | zone_end_pfn = arch_zone_highest_possible_pfn[zone_type]; | ||
2203 | |||
2204 | /* Check that this node has pages within the zone's required range */ | ||
2205 | if (zone_end_pfn < node_start_pfn || zone_start_pfn > node_end_pfn) | ||
2206 | return 0; | ||
2207 | |||
2208 | /* Move the zone boundaries inside the node if necessary */ | ||
2209 | zone_end_pfn = min(zone_end_pfn, node_end_pfn); | ||
2210 | zone_start_pfn = max(zone_start_pfn, node_start_pfn); | ||
2211 | |||
2212 | /* Return the spanned pages */ | ||
2213 | return zone_end_pfn - zone_start_pfn; | ||
2214 | } | ||
2215 | |||
2216 | /* | ||
2217 | * Return the number of holes in a range on a node. If nid is MAX_NUMNODES, | ||
2218 | * then all holes in the requested range will be accounted for | ||
2219 | */ | ||
2220 | unsigned long __init __absent_pages_in_range(int nid, | ||
2221 | unsigned long range_start_pfn, | ||
2222 | unsigned long range_end_pfn) | ||
2223 | { | ||
2224 | int i = 0; | ||
2225 | unsigned long prev_end_pfn = 0, hole_pages = 0; | ||
2226 | unsigned long start_pfn; | ||
2227 | |||
2228 | /* Find the end_pfn of the first active range of pfns in the node */ | ||
2229 | i = first_active_region_index_in_nid(nid); | ||
2230 | if (i == -1) | ||
2231 | return 0; | ||
2232 | |||
2233 | /* Account for ranges before physical memory on this node */ | ||
2234 | if (early_node_map[i].start_pfn > range_start_pfn) | ||
2235 | hole_pages = early_node_map[i].start_pfn - range_start_pfn; | ||
2236 | |||
2237 | prev_end_pfn = early_node_map[i].start_pfn; | ||
2238 | |||
2239 | /* Find all holes for the zone within the node */ | ||
2240 | for (; i != -1; i = next_active_region_index_in_nid(i, nid)) { | ||
2241 | |||
2242 | /* No need to continue if prev_end_pfn is outside the zone */ | ||
2243 | if (prev_end_pfn >= range_end_pfn) | ||
2244 | break; | ||
2245 | |||
2246 | /* Make sure the end of the zone is not within the hole */ | ||
2247 | start_pfn = min(early_node_map[i].start_pfn, range_end_pfn); | ||
2248 | prev_end_pfn = max(prev_end_pfn, range_start_pfn); | ||
2249 | |||
2250 | /* Update the hole size cound and move on */ | ||
2251 | if (start_pfn > range_start_pfn) { | ||
2252 | BUG_ON(prev_end_pfn > start_pfn); | ||
2253 | hole_pages += start_pfn - prev_end_pfn; | ||
2254 | } | ||
2255 | prev_end_pfn = early_node_map[i].end_pfn; | ||
2256 | } | ||
2257 | |||
2258 | /* Account for ranges past physical memory on this node */ | ||
2259 | if (range_end_pfn > prev_end_pfn) | ||
2260 | hole_pages = range_end_pfn - | ||
2261 | max(range_start_pfn, prev_end_pfn); | ||
2262 | |||
2263 | return hole_pages; | ||
2264 | } | ||
2265 | |||
2266 | /** | ||
2267 | * absent_pages_in_range - Return number of page frames in holes within a range | ||
2268 | * @start_pfn: The start PFN to start searching for holes | ||
2269 | * @end_pfn: The end PFN to stop searching for holes | ||
2270 | * | ||
2271 | * It returns the number of pages frames in memory holes within a range | ||
2272 | */ | ||
2273 | unsigned long __init absent_pages_in_range(unsigned long start_pfn, | ||
2274 | unsigned long end_pfn) | ||
2275 | { | ||
2276 | return __absent_pages_in_range(MAX_NUMNODES, start_pfn, end_pfn); | ||
2277 | } | ||
2278 | |||
2279 | /* Return the number of page frames in holes in a zone on a node */ | ||
2280 | unsigned long __init zone_absent_pages_in_node(int nid, | ||
2281 | unsigned long zone_type, | ||
2282 | unsigned long *ignored) | ||
2283 | { | ||
2284 | unsigned long node_start_pfn, node_end_pfn; | ||
2285 | unsigned long zone_start_pfn, zone_end_pfn; | ||
2286 | |||
2287 | get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn); | ||
2288 | zone_start_pfn = max(arch_zone_lowest_possible_pfn[zone_type], | ||
2289 | node_start_pfn); | ||
2290 | zone_end_pfn = min(arch_zone_highest_possible_pfn[zone_type], | ||
2291 | node_end_pfn); | ||
2292 | |||
2293 | return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); | ||
2294 | } | ||
2295 | |||
2296 | /* Return the zone index a PFN is in */ | ||
2297 | int memmap_zone_idx(struct page *lmem_map) | ||
2298 | { | ||
2299 | int i; | ||
2300 | unsigned long phys_addr = virt_to_phys(lmem_map); | ||
2301 | unsigned long pfn = phys_addr >> PAGE_SHIFT; | ||
2302 | |||
2303 | for (i = 0; i < MAX_NR_ZONES; i++) | ||
2304 | if (pfn < arch_zone_highest_possible_pfn[i]) | ||
2305 | break; | ||
2306 | |||
2307 | return i; | ||
2308 | } | ||
2309 | #else | ||
2310 | static inline unsigned long zone_spanned_pages_in_node(int nid, | ||
2311 | unsigned long zone_type, | ||
2312 | unsigned long *zones_size) | ||
2313 | { | ||
2314 | return zones_size[zone_type]; | ||
2315 | } | ||
2316 | |||
2317 | static inline unsigned long zone_absent_pages_in_node(int nid, | ||
2318 | unsigned long zone_type, | ||
2319 | unsigned long *zholes_size) | ||
2320 | { | ||
2321 | if (!zholes_size) | ||
2322 | return 0; | ||
2323 | |||
2324 | return zholes_size[zone_type]; | ||
2325 | } | ||
2326 | |||
2327 | static inline int memmap_zone_idx(struct page *lmem_map) | ||
2328 | { | ||
2329 | return MAX_NR_ZONES; | ||
2330 | } | ||
2331 | #endif | ||
2332 | |||
2333 | static void __init calculate_node_totalpages(struct pglist_data *pgdat, | ||
2334 | unsigned long *zones_size, unsigned long *zholes_size) | ||
2335 | { | ||
2336 | unsigned long realtotalpages, totalpages = 0; | ||
2337 | enum zone_type i; | ||
2338 | |||
2339 | for (i = 0; i < MAX_NR_ZONES; i++) | ||
2340 | totalpages += zone_spanned_pages_in_node(pgdat->node_id, i, | ||
2341 | zones_size); | ||
2342 | pgdat->node_spanned_pages = totalpages; | ||
2343 | |||
2344 | realtotalpages = totalpages; | ||
2345 | for (i = 0; i < MAX_NR_ZONES; i++) | ||
2346 | realtotalpages -= | ||
2347 | zone_absent_pages_in_node(pgdat->node_id, i, | ||
2348 | zholes_size); | ||
2349 | pgdat->node_present_pages = realtotalpages; | ||
2350 | printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id, | ||
2351 | realtotalpages); | ||
2352 | } | ||
2353 | |||
1975 | /* | 2354 | /* |
1976 | * Set up the zone data structures: | 2355 | * Set up the zone data structures: |
1977 | * - mark all pages reserved | 2356 | * - mark all pages reserved |
@@ -1981,7 +2360,7 @@ __meminit int init_currently_empty_zone(struct zone *zone, | |||
1981 | static void __meminit free_area_init_core(struct pglist_data *pgdat, | 2360 | static void __meminit free_area_init_core(struct pglist_data *pgdat, |
1982 | unsigned long *zones_size, unsigned long *zholes_size) | 2361 | unsigned long *zones_size, unsigned long *zholes_size) |
1983 | { | 2362 | { |
1984 | unsigned long j; | 2363 | enum zone_type j; |
1985 | int nid = pgdat->node_id; | 2364 | int nid = pgdat->node_id; |
1986 | unsigned long zone_start_pfn = pgdat->node_start_pfn; | 2365 | unsigned long zone_start_pfn = pgdat->node_start_pfn; |
1987 | int ret; | 2366 | int ret; |
@@ -1993,21 +2372,46 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, | |||
1993 | 2372 | ||
1994 | for (j = 0; j < MAX_NR_ZONES; j++) { | 2373 | for (j = 0; j < MAX_NR_ZONES; j++) { |
1995 | struct zone *zone = pgdat->node_zones + j; | 2374 | struct zone *zone = pgdat->node_zones + j; |
1996 | unsigned long size, realsize; | 2375 | unsigned long size, realsize, memmap_pages; |
1997 | 2376 | ||
1998 | realsize = size = zones_size[j]; | 2377 | size = zone_spanned_pages_in_node(nid, j, zones_size); |
1999 | if (zholes_size) | 2378 | realsize = size - zone_absent_pages_in_node(nid, j, |
2000 | realsize -= zholes_size[j]; | 2379 | zholes_size); |
2001 | 2380 | ||
2002 | if (j < ZONE_HIGHMEM) | 2381 | /* |
2382 | * Adjust realsize so that it accounts for how much memory | ||
2383 | * is used by this zone for memmap. This affects the watermark | ||
2384 | * and per-cpu initialisations | ||
2385 | */ | ||
2386 | memmap_pages = (size * sizeof(struct page)) >> PAGE_SHIFT; | ||
2387 | if (realsize >= memmap_pages) { | ||
2388 | realsize -= memmap_pages; | ||
2389 | printk(KERN_DEBUG | ||
2390 | " %s zone: %lu pages used for memmap\n", | ||
2391 | zone_names[j], memmap_pages); | ||
2392 | } else | ||
2393 | printk(KERN_WARNING | ||
2394 | " %s zone: %lu pages exceeds realsize %lu\n", | ||
2395 | zone_names[j], memmap_pages, realsize); | ||
2396 | |||
2397 | /* Account for reserved DMA pages */ | ||
2398 | if (j == ZONE_DMA && realsize > dma_reserve) { | ||
2399 | realsize -= dma_reserve; | ||
2400 | printk(KERN_DEBUG " DMA zone: %lu pages reserved\n", | ||
2401 | dma_reserve); | ||
2402 | } | ||
2403 | |||
2404 | if (!is_highmem_idx(j)) | ||
2003 | nr_kernel_pages += realsize; | 2405 | nr_kernel_pages += realsize; |
2004 | nr_all_pages += realsize; | 2406 | nr_all_pages += realsize; |
2005 | 2407 | ||
2006 | zone->spanned_pages = size; | 2408 | zone->spanned_pages = size; |
2007 | zone->present_pages = realsize; | 2409 | zone->present_pages = realsize; |
2008 | #ifdef CONFIG_NUMA | 2410 | #ifdef CONFIG_NUMA |
2009 | zone->min_unmapped_ratio = (realsize*sysctl_min_unmapped_ratio) | 2411 | zone->node = nid; |
2412 | zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio) | ||
2010 | / 100; | 2413 | / 100; |
2414 | zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100; | ||
2011 | #endif | 2415 | #endif |
2012 | zone->name = zone_names[j]; | 2416 | zone->name = zone_names[j]; |
2013 | spin_lock_init(&zone->lock); | 2417 | spin_lock_init(&zone->lock); |
@@ -2067,8 +2471,13 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat) | |||
2067 | /* | 2471 | /* |
2068 | * With no DISCONTIG, the global mem_map is just set as node 0's | 2472 | * With no DISCONTIG, the global mem_map is just set as node 0's |
2069 | */ | 2473 | */ |
2070 | if (pgdat == NODE_DATA(0)) | 2474 | if (pgdat == NODE_DATA(0)) { |
2071 | mem_map = NODE_DATA(0)->node_mem_map; | 2475 | mem_map = NODE_DATA(0)->node_mem_map; |
2476 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP | ||
2477 | if (page_to_pfn(mem_map) != pgdat->node_start_pfn) | ||
2478 | mem_map -= pgdat->node_start_pfn; | ||
2479 | #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ | ||
2480 | } | ||
2072 | #endif | 2481 | #endif |
2073 | #endif /* CONFIG_FLAT_NODE_MEM_MAP */ | 2482 | #endif /* CONFIG_FLAT_NODE_MEM_MAP */ |
2074 | } | 2483 | } |
@@ -2079,13 +2488,255 @@ void __meminit free_area_init_node(int nid, struct pglist_data *pgdat, | |||
2079 | { | 2488 | { |
2080 | pgdat->node_id = nid; | 2489 | pgdat->node_id = nid; |
2081 | pgdat->node_start_pfn = node_start_pfn; | 2490 | pgdat->node_start_pfn = node_start_pfn; |
2082 | calculate_zone_totalpages(pgdat, zones_size, zholes_size); | 2491 | calculate_node_totalpages(pgdat, zones_size, zholes_size); |
2083 | 2492 | ||
2084 | alloc_node_mem_map(pgdat); | 2493 | alloc_node_mem_map(pgdat); |
2085 | 2494 | ||
2086 | free_area_init_core(pgdat, zones_size, zholes_size); | 2495 | free_area_init_core(pgdat, zones_size, zholes_size); |
2087 | } | 2496 | } |
2088 | 2497 | ||
2498 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP | ||
2499 | /** | ||
2500 | * add_active_range - Register a range of PFNs backed by physical memory | ||
2501 | * @nid: The node ID the range resides on | ||
2502 | * @start_pfn: The start PFN of the available physical memory | ||
2503 | * @end_pfn: The end PFN of the available physical memory | ||
2504 | * | ||
2505 | * These ranges are stored in an early_node_map[] and later used by | ||
2506 | * free_area_init_nodes() to calculate zone sizes and holes. If the | ||
2507 | * range spans a memory hole, it is up to the architecture to ensure | ||
2508 | * the memory is not freed by the bootmem allocator. If possible | ||
2509 | * the range being registered will be merged with existing ranges. | ||
2510 | */ | ||
2511 | void __init add_active_range(unsigned int nid, unsigned long start_pfn, | ||
2512 | unsigned long end_pfn) | ||
2513 | { | ||
2514 | int i; | ||
2515 | |||
2516 | printk(KERN_DEBUG "Entering add_active_range(%d, %lu, %lu) " | ||
2517 | "%d entries of %d used\n", | ||
2518 | nid, start_pfn, end_pfn, | ||
2519 | nr_nodemap_entries, MAX_ACTIVE_REGIONS); | ||
2520 | |||
2521 | /* Merge with existing active regions if possible */ | ||
2522 | for (i = 0; i < nr_nodemap_entries; i++) { | ||
2523 | if (early_node_map[i].nid != nid) | ||
2524 | continue; | ||
2525 | |||
2526 | /* Skip if an existing region covers this new one */ | ||
2527 | if (start_pfn >= early_node_map[i].start_pfn && | ||
2528 | end_pfn <= early_node_map[i].end_pfn) | ||
2529 | return; | ||
2530 | |||
2531 | /* Merge forward if suitable */ | ||
2532 | if (start_pfn <= early_node_map[i].end_pfn && | ||
2533 | end_pfn > early_node_map[i].end_pfn) { | ||
2534 | early_node_map[i].end_pfn = end_pfn; | ||
2535 | return; | ||
2536 | } | ||
2537 | |||
2538 | /* Merge backward if suitable */ | ||
2539 | if (start_pfn < early_node_map[i].end_pfn && | ||
2540 | end_pfn >= early_node_map[i].start_pfn) { | ||
2541 | early_node_map[i].start_pfn = start_pfn; | ||
2542 | return; | ||
2543 | } | ||
2544 | } | ||
2545 | |||
2546 | /* Check that early_node_map is large enough */ | ||
2547 | if (i >= MAX_ACTIVE_REGIONS) { | ||
2548 | printk(KERN_CRIT "More than %d memory regions, truncating\n", | ||
2549 | MAX_ACTIVE_REGIONS); | ||
2550 | return; | ||
2551 | } | ||
2552 | |||
2553 | early_node_map[i].nid = nid; | ||
2554 | early_node_map[i].start_pfn = start_pfn; | ||
2555 | early_node_map[i].end_pfn = end_pfn; | ||
2556 | nr_nodemap_entries = i + 1; | ||
2557 | } | ||
2558 | |||
2559 | /** | ||
2560 | * shrink_active_range - Shrink an existing registered range of PFNs | ||
2561 | * @nid: The node id the range is on that should be shrunk | ||
2562 | * @old_end_pfn: The old end PFN of the range | ||
2563 | * @new_end_pfn: The new PFN of the range | ||
2564 | * | ||
2565 | * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node. | ||
2566 | * The map is kept at the end physical page range that has already been | ||
2567 | * registered with add_active_range(). This function allows an arch to shrink | ||
2568 | * an existing registered range. | ||
2569 | */ | ||
2570 | void __init shrink_active_range(unsigned int nid, unsigned long old_end_pfn, | ||
2571 | unsigned long new_end_pfn) | ||
2572 | { | ||
2573 | int i; | ||
2574 | |||
2575 | /* Find the old active region end and shrink */ | ||
2576 | for_each_active_range_index_in_nid(i, nid) | ||
2577 | if (early_node_map[i].end_pfn == old_end_pfn) { | ||
2578 | early_node_map[i].end_pfn = new_end_pfn; | ||
2579 | break; | ||
2580 | } | ||
2581 | } | ||
2582 | |||
2583 | /** | ||
2584 | * remove_all_active_ranges - Remove all currently registered regions | ||
2585 | * During discovery, it may be found that a table like SRAT is invalid | ||
2586 | * and an alternative discovery method must be used. This function removes | ||
2587 | * all currently registered regions. | ||
2588 | */ | ||
2589 | void __init remove_all_active_ranges() | ||
2590 | { | ||
2591 | memset(early_node_map, 0, sizeof(early_node_map)); | ||
2592 | nr_nodemap_entries = 0; | ||
2593 | #ifdef CONFIG_MEMORY_HOTPLUG_RESERVE | ||
2594 | memset(node_boundary_start_pfn, 0, sizeof(node_boundary_start_pfn)); | ||
2595 | memset(node_boundary_end_pfn, 0, sizeof(node_boundary_end_pfn)); | ||
2596 | #endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */ | ||
2597 | } | ||
2598 | |||
2599 | /* Compare two active node_active_regions */ | ||
2600 | static int __init cmp_node_active_region(const void *a, const void *b) | ||
2601 | { | ||
2602 | struct node_active_region *arange = (struct node_active_region *)a; | ||
2603 | struct node_active_region *brange = (struct node_active_region *)b; | ||
2604 | |||
2605 | /* Done this way to avoid overflows */ | ||
2606 | if (arange->start_pfn > brange->start_pfn) | ||
2607 | return 1; | ||
2608 | if (arange->start_pfn < brange->start_pfn) | ||
2609 | return -1; | ||
2610 | |||
2611 | return 0; | ||
2612 | } | ||
2613 | |||
2614 | /* sort the node_map by start_pfn */ | ||
2615 | static void __init sort_node_map(void) | ||
2616 | { | ||
2617 | sort(early_node_map, (size_t)nr_nodemap_entries, | ||
2618 | sizeof(struct node_active_region), | ||
2619 | cmp_node_active_region, NULL); | ||
2620 | } | ||
2621 | |||
2622 | /* Find the lowest pfn for a node. This depends on a sorted early_node_map */ | ||
2623 | unsigned long __init find_min_pfn_for_node(unsigned long nid) | ||
2624 | { | ||
2625 | int i; | ||
2626 | |||
2627 | /* Assuming a sorted map, the first range found has the starting pfn */ | ||
2628 | for_each_active_range_index_in_nid(i, nid) | ||
2629 | return early_node_map[i].start_pfn; | ||
2630 | |||
2631 | printk(KERN_WARNING "Could not find start_pfn for node %lu\n", nid); | ||
2632 | return 0; | ||
2633 | } | ||
2634 | |||
2635 | /** | ||
2636 | * find_min_pfn_with_active_regions - Find the minimum PFN registered | ||
2637 | * | ||
2638 | * It returns the minimum PFN based on information provided via | ||
2639 | * add_active_range() | ||
2640 | */ | ||
2641 | unsigned long __init find_min_pfn_with_active_regions(void) | ||
2642 | { | ||
2643 | return find_min_pfn_for_node(MAX_NUMNODES); | ||
2644 | } | ||
2645 | |||
2646 | /** | ||
2647 | * find_max_pfn_with_active_regions - Find the maximum PFN registered | ||
2648 | * | ||
2649 | * It returns the maximum PFN based on information provided via | ||
2650 | * add_active_range() | ||
2651 | */ | ||
2652 | unsigned long __init find_max_pfn_with_active_regions(void) | ||
2653 | { | ||
2654 | int i; | ||
2655 | unsigned long max_pfn = 0; | ||
2656 | |||
2657 | for (i = 0; i < nr_nodemap_entries; i++) | ||
2658 | max_pfn = max(max_pfn, early_node_map[i].end_pfn); | ||
2659 | |||
2660 | return max_pfn; | ||
2661 | } | ||
2662 | |||
2663 | /** | ||
2664 | * free_area_init_nodes - Initialise all pg_data_t and zone data | ||
2665 | * @arch_max_dma_pfn: The maximum PFN usable for ZONE_DMA | ||
2666 | * @arch_max_dma32_pfn: The maximum PFN usable for ZONE_DMA32 | ||
2667 | * @arch_max_low_pfn: The maximum PFN usable for ZONE_NORMAL | ||
2668 | * @arch_max_high_pfn: The maximum PFN usable for ZONE_HIGHMEM | ||
2669 | * | ||
2670 | * This will call free_area_init_node() for each active node in the system. | ||
2671 | * Using the page ranges provided by add_active_range(), the size of each | ||
2672 | * zone in each node and their holes is calculated. If the maximum PFN | ||
2673 | * between two adjacent zones match, it is assumed that the zone is empty. | ||
2674 | * For example, if arch_max_dma_pfn == arch_max_dma32_pfn, it is assumed | ||
2675 | * that arch_max_dma32_pfn has no pages. It is also assumed that a zone | ||
2676 | * starts where the previous one ended. For example, ZONE_DMA32 starts | ||
2677 | * at arch_max_dma_pfn. | ||
2678 | */ | ||
2679 | void __init free_area_init_nodes(unsigned long *max_zone_pfn) | ||
2680 | { | ||
2681 | unsigned long nid; | ||
2682 | enum zone_type i; | ||
2683 | |||
2684 | /* Record where the zone boundaries are */ | ||
2685 | memset(arch_zone_lowest_possible_pfn, 0, | ||
2686 | sizeof(arch_zone_lowest_possible_pfn)); | ||
2687 | memset(arch_zone_highest_possible_pfn, 0, | ||
2688 | sizeof(arch_zone_highest_possible_pfn)); | ||
2689 | arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions(); | ||
2690 | arch_zone_highest_possible_pfn[0] = max_zone_pfn[0]; | ||
2691 | for (i = 1; i < MAX_NR_ZONES; i++) { | ||
2692 | arch_zone_lowest_possible_pfn[i] = | ||
2693 | arch_zone_highest_possible_pfn[i-1]; | ||
2694 | arch_zone_highest_possible_pfn[i] = | ||
2695 | max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]); | ||
2696 | } | ||
2697 | |||
2698 | /* Regions in the early_node_map can be in any order */ | ||
2699 | sort_node_map(); | ||
2700 | |||
2701 | /* Print out the zone ranges */ | ||
2702 | printk("Zone PFN ranges:\n"); | ||
2703 | for (i = 0; i < MAX_NR_ZONES; i++) | ||
2704 | printk(" %-8s %8lu -> %8lu\n", | ||
2705 | zone_names[i], | ||
2706 | arch_zone_lowest_possible_pfn[i], | ||
2707 | arch_zone_highest_possible_pfn[i]); | ||
2708 | |||
2709 | /* Print out the early_node_map[] */ | ||
2710 | printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries); | ||
2711 | for (i = 0; i < nr_nodemap_entries; i++) | ||
2712 | printk(" %3d: %8lu -> %8lu\n", early_node_map[i].nid, | ||
2713 | early_node_map[i].start_pfn, | ||
2714 | early_node_map[i].end_pfn); | ||
2715 | |||
2716 | /* Initialise every node */ | ||
2717 | for_each_online_node(nid) { | ||
2718 | pg_data_t *pgdat = NODE_DATA(nid); | ||
2719 | free_area_init_node(nid, pgdat, NULL, | ||
2720 | find_min_pfn_for_node(nid), NULL); | ||
2721 | } | ||
2722 | } | ||
2723 | #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ | ||
2724 | |||
2725 | /** | ||
2726 | * set_dma_reserve - Account the specified number of pages reserved in ZONE_DMA | ||
2727 | * @new_dma_reserve - The number of pages to mark reserved | ||
2728 | * | ||
2729 | * The per-cpu batchsize and zone watermarks are determined by present_pages. | ||
2730 | * In the DMA zone, a significant percentage may be consumed by kernel image | ||
2731 | * and other unfreeable allocations which can skew the watermarks badly. This | ||
2732 | * function may optionally be used to account for unfreeable pages in | ||
2733 | * ZONE_DMA. The effect will be lower watermarks and smaller per-cpu batchsize | ||
2734 | */ | ||
2735 | void __init set_dma_reserve(unsigned long new_dma_reserve) | ||
2736 | { | ||
2737 | dma_reserve = new_dma_reserve; | ||
2738 | } | ||
2739 | |||
2089 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 2740 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
2090 | static bootmem_data_t contig_bootmem_data; | 2741 | static bootmem_data_t contig_bootmem_data; |
2091 | struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data }; | 2742 | struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data }; |
@@ -2129,7 +2780,7 @@ static void calculate_totalreserve_pages(void) | |||
2129 | { | 2780 | { |
2130 | struct pglist_data *pgdat; | 2781 | struct pglist_data *pgdat; |
2131 | unsigned long reserve_pages = 0; | 2782 | unsigned long reserve_pages = 0; |
2132 | int i, j; | 2783 | enum zone_type i, j; |
2133 | 2784 | ||
2134 | for_each_online_pgdat(pgdat) { | 2785 | for_each_online_pgdat(pgdat) { |
2135 | for (i = 0; i < MAX_NR_ZONES; i++) { | 2786 | for (i = 0; i < MAX_NR_ZONES; i++) { |
@@ -2162,7 +2813,7 @@ static void calculate_totalreserve_pages(void) | |||
2162 | static void setup_per_zone_lowmem_reserve(void) | 2813 | static void setup_per_zone_lowmem_reserve(void) |
2163 | { | 2814 | { |
2164 | struct pglist_data *pgdat; | 2815 | struct pglist_data *pgdat; |
2165 | int j, idx; | 2816 | enum zone_type j, idx; |
2166 | 2817 | ||
2167 | for_each_online_pgdat(pgdat) { | 2818 | for_each_online_pgdat(pgdat) { |
2168 | for (j = 0; j < MAX_NR_ZONES; j++) { | 2819 | for (j = 0; j < MAX_NR_ZONES; j++) { |
@@ -2171,9 +2822,12 @@ static void setup_per_zone_lowmem_reserve(void) | |||
2171 | 2822 | ||
2172 | zone->lowmem_reserve[j] = 0; | 2823 | zone->lowmem_reserve[j] = 0; |
2173 | 2824 | ||
2174 | for (idx = j-1; idx >= 0; idx--) { | 2825 | idx = j; |
2826 | while (idx) { | ||
2175 | struct zone *lower_zone; | 2827 | struct zone *lower_zone; |
2176 | 2828 | ||
2829 | idx--; | ||
2830 | |||
2177 | if (sysctl_lowmem_reserve_ratio[idx] < 1) | 2831 | if (sysctl_lowmem_reserve_ratio[idx] < 1) |
2178 | sysctl_lowmem_reserve_ratio[idx] = 1; | 2832 | sysctl_lowmem_reserve_ratio[idx] = 1; |
2179 | 2833 | ||
@@ -2314,10 +2968,26 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, | |||
2314 | return rc; | 2968 | return rc; |
2315 | 2969 | ||
2316 | for_each_zone(zone) | 2970 | for_each_zone(zone) |
2317 | zone->min_unmapped_ratio = (zone->present_pages * | 2971 | zone->min_unmapped_pages = (zone->present_pages * |
2318 | sysctl_min_unmapped_ratio) / 100; | 2972 | sysctl_min_unmapped_ratio) / 100; |
2319 | return 0; | 2973 | return 0; |
2320 | } | 2974 | } |
2975 | |||
2976 | int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, | ||
2977 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | ||
2978 | { | ||
2979 | struct zone *zone; | ||
2980 | int rc; | ||
2981 | |||
2982 | rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); | ||
2983 | if (rc) | ||
2984 | return rc; | ||
2985 | |||
2986 | for_each_zone(zone) | ||
2987 | zone->min_slab_pages = (zone->present_pages * | ||
2988 | sysctl_min_slab_ratio) / 100; | ||
2989 | return 0; | ||
2990 | } | ||
2321 | #endif | 2991 | #endif |
2322 | 2992 | ||
2323 | /* | 2993 | /* |