aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c537
1 files changed, 101 insertions, 436 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6e8ecb6e021c..f24bc1c40080 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -181,39 +181,17 @@ static unsigned long __meminitdata nr_kernel_pages;
181static unsigned long __meminitdata nr_all_pages; 181static unsigned long __meminitdata nr_all_pages;
182static unsigned long __meminitdata dma_reserve; 182static unsigned long __meminitdata dma_reserve;
183 183
184#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 184#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
185 /* 185static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
186 * MAX_ACTIVE_REGIONS determines the maximum number of distinct 186static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
187 * ranges of memory (RAM) that may be registered with add_active_range(). 187static unsigned long __initdata required_kernelcore;
188 * Ranges passed to add_active_range() will be merged if possible 188static unsigned long __initdata required_movablecore;
189 * so the number of times add_active_range() can be called is 189static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
190 * related to the number of nodes and the number of holes 190
191 */ 191/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
192 #ifdef CONFIG_MAX_ACTIVE_REGIONS 192int movable_zone;
193 /* Allow an architecture to set MAX_ACTIVE_REGIONS to save memory */ 193EXPORT_SYMBOL(movable_zone);
194 #define MAX_ACTIVE_REGIONS CONFIG_MAX_ACTIVE_REGIONS 194#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
195 #else
196 #if MAX_NUMNODES >= 32
197 /* If there can be many nodes, allow up to 50 holes per node */
198 #define MAX_ACTIVE_REGIONS (MAX_NUMNODES*50)
199 #else
200 /* By default, allow up to 256 distinct regions */
201 #define MAX_ACTIVE_REGIONS 256
202 #endif
203 #endif
204
205 static struct node_active_region __meminitdata early_node_map[MAX_ACTIVE_REGIONS];
206 static int __meminitdata nr_nodemap_entries;
207 static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
208 static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
209 static unsigned long __initdata required_kernelcore;
210 static unsigned long __initdata required_movablecore;
211 static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
212
213 /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
214 int movable_zone;
215 EXPORT_SYMBOL(movable_zone);
216#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
217 195
218#if MAX_NUMNODES > 1 196#if MAX_NUMNODES > 1
219int nr_node_ids __read_mostly = MAX_NUMNODES; 197int nr_node_ids __read_mostly = MAX_NUMNODES;
@@ -318,6 +296,7 @@ static void bad_page(struct page *page)
318 current->comm, page_to_pfn(page)); 296 current->comm, page_to_pfn(page));
319 dump_page(page); 297 dump_page(page);
320 298
299 print_modules();
321 dump_stack(); 300 dump_stack();
322out: 301out:
323 /* Leave bad fields for debug, except PageBuddy could make trouble */ 302 /* Leave bad fields for debug, except PageBuddy could make trouble */
@@ -355,8 +334,8 @@ void prep_compound_page(struct page *page, unsigned long order)
355 __SetPageHead(page); 334 __SetPageHead(page);
356 for (i = 1; i < nr_pages; i++) { 335 for (i = 1; i < nr_pages; i++) {
357 struct page *p = page + i; 336 struct page *p = page + i;
358
359 __SetPageTail(p); 337 __SetPageTail(p);
338 set_page_count(p, 0);
360 p->first_page = page; 339 p->first_page = page;
361 } 340 }
362} 341}
@@ -705,10 +684,10 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
705 int loop; 684 int loop;
706 685
707 prefetchw(page); 686 prefetchw(page);
708 for (loop = 0; loop < BITS_PER_LONG; loop++) { 687 for (loop = 0; loop < (1 << order); loop++) {
709 struct page *p = &page[loop]; 688 struct page *p = &page[loop];
710 689
711 if (loop + 1 < BITS_PER_LONG) 690 if (loop + 1 < (1 << order))
712 prefetchw(p + 1); 691 prefetchw(p + 1);
713 __ClearPageReserved(p); 692 __ClearPageReserved(p);
714 set_page_count(p, 0); 693 set_page_count(p, 0);
@@ -1407,7 +1386,7 @@ static int should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
1407 1386
1408static int __init fail_page_alloc_debugfs(void) 1387static int __init fail_page_alloc_debugfs(void)
1409{ 1388{
1410 mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; 1389 umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
1411 struct dentry *dir; 1390 struct dentry *dir;
1412 1391
1413 dir = fault_create_debugfs_attr("fail_page_alloc", NULL, 1392 dir = fault_create_debugfs_attr("fail_page_alloc", NULL,
@@ -1753,7 +1732,6 @@ static DEFINE_RATELIMIT_STATE(nopage_rs,
1753 1732
1754void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...) 1733void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
1755{ 1734{
1756 va_list args;
1757 unsigned int filter = SHOW_MEM_FILTER_NODES; 1735 unsigned int filter = SHOW_MEM_FILTER_NODES;
1758 1736
1759 if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs)) 1737 if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs))
@@ -1772,14 +1750,21 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
1772 filter &= ~SHOW_MEM_FILTER_NODES; 1750 filter &= ~SHOW_MEM_FILTER_NODES;
1773 1751
1774 if (fmt) { 1752 if (fmt) {
1775 printk(KERN_WARNING); 1753 struct va_format vaf;
1754 va_list args;
1755
1776 va_start(args, fmt); 1756 va_start(args, fmt);
1777 vprintk(fmt, args); 1757
1758 vaf.fmt = fmt;
1759 vaf.va = &args;
1760
1761 pr_warn("%pV", &vaf);
1762
1778 va_end(args); 1763 va_end(args);
1779 } 1764 }
1780 1765
1781 pr_warning("%s: page allocation failure: order:%d, mode:0x%x\n", 1766 pr_warn("%s: page allocation failure: order:%d, mode:0x%x\n",
1782 current->comm, order, gfp_mask); 1767 current->comm, order, gfp_mask);
1783 1768
1784 dump_stack(); 1769 dump_stack();
1785 if (!should_suppress_show_mem()) 1770 if (!should_suppress_show_mem())
@@ -3370,9 +3355,15 @@ static void setup_zone_migrate_reserve(struct zone *zone)
3370 unsigned long block_migratetype; 3355 unsigned long block_migratetype;
3371 int reserve; 3356 int reserve;
3372 3357
3373 /* Get the start pfn, end pfn and the number of blocks to reserve */ 3358 /*
3359 * Get the start pfn, end pfn and the number of blocks to reserve
3360 * We have to be careful to be aligned to pageblock_nr_pages to
3361 * make sure that we always check pfn_valid for the first page in
3362 * the block.
3363 */
3374 start_pfn = zone->zone_start_pfn; 3364 start_pfn = zone->zone_start_pfn;
3375 end_pfn = start_pfn + zone->spanned_pages; 3365 end_pfn = start_pfn + zone->spanned_pages;
3366 start_pfn = roundup(start_pfn, pageblock_nr_pages);
3376 reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> 3367 reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
3377 pageblock_order; 3368 pageblock_order;
3378 3369
@@ -3724,35 +3715,7 @@ __meminit int init_currently_empty_zone(struct zone *zone,
3724 return 0; 3715 return 0;
3725} 3716}
3726 3717
3727#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 3718#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
3728/*
3729 * Basic iterator support. Return the first range of PFNs for a node
3730 * Note: nid == MAX_NUMNODES returns first region regardless of node
3731 */
3732static int __meminit first_active_region_index_in_nid(int nid)
3733{
3734 int i;
3735
3736 for (i = 0; i < nr_nodemap_entries; i++)
3737 if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
3738 return i;
3739
3740 return -1;
3741}
3742
3743/*
3744 * Basic iterator support. Return the next active range of PFNs for a node
3745 * Note: nid == MAX_NUMNODES returns next region regardless of node
3746 */
3747static int __meminit next_active_region_index_in_nid(int index, int nid)
3748{
3749 for (index = index + 1; index < nr_nodemap_entries; index++)
3750 if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
3751 return index;
3752
3753 return -1;
3754}
3755
3756#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID 3719#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
3757/* 3720/*
3758 * Required by SPARSEMEM. Given a PFN, return what node the PFN is on. 3721 * Required by SPARSEMEM. Given a PFN, return what node the PFN is on.
@@ -3762,15 +3725,12 @@ static int __meminit next_active_region_index_in_nid(int index, int nid)
3762 */ 3725 */
3763int __meminit __early_pfn_to_nid(unsigned long pfn) 3726int __meminit __early_pfn_to_nid(unsigned long pfn)
3764{ 3727{
3765 int i; 3728 unsigned long start_pfn, end_pfn;
3766 3729 int i, nid;
3767 for (i = 0; i < nr_nodemap_entries; i++) {
3768 unsigned long start_pfn = early_node_map[i].start_pfn;
3769 unsigned long end_pfn = early_node_map[i].end_pfn;
3770 3730
3731 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
3771 if (start_pfn <= pfn && pfn < end_pfn) 3732 if (start_pfn <= pfn && pfn < end_pfn)
3772 return early_node_map[i].nid; 3733 return nid;
3773 }
3774 /* This is a memory hole */ 3734 /* This is a memory hole */
3775 return -1; 3735 return -1;
3776} 3736}
@@ -3799,11 +3759,6 @@ bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
3799} 3759}
3800#endif 3760#endif
3801 3761
3802/* Basic iterator support to walk early_node_map[] */
3803#define for_each_active_range_index_in_nid(i, nid) \
3804 for (i = first_active_region_index_in_nid(nid); i != -1; \
3805 i = next_active_region_index_in_nid(i, nid))
3806
3807/** 3762/**
3808 * free_bootmem_with_active_regions - Call free_bootmem_node for each active range 3763 * free_bootmem_with_active_regions - Call free_bootmem_node for each active range
3809 * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed. 3764 * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed.
@@ -3813,122 +3768,34 @@ bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
3813 * add_active_ranges() contain no holes and may be freed, this 3768 * add_active_ranges() contain no holes and may be freed, this
3814 * this function may be used instead of calling free_bootmem() manually. 3769 * this function may be used instead of calling free_bootmem() manually.
3815 */ 3770 */
3816void __init free_bootmem_with_active_regions(int nid, 3771void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn)
3817 unsigned long max_low_pfn)
3818{ 3772{
3819 int i; 3773 unsigned long start_pfn, end_pfn;
3820 3774 int i, this_nid;
3821 for_each_active_range_index_in_nid(i, nid) {
3822 unsigned long size_pages = 0;
3823 unsigned long end_pfn = early_node_map[i].end_pfn;
3824
3825 if (early_node_map[i].start_pfn >= max_low_pfn)
3826 continue;
3827
3828 if (end_pfn > max_low_pfn)
3829 end_pfn = max_low_pfn;
3830
3831 size_pages = end_pfn - early_node_map[i].start_pfn;
3832 free_bootmem_node(NODE_DATA(early_node_map[i].nid),
3833 PFN_PHYS(early_node_map[i].start_pfn),
3834 size_pages << PAGE_SHIFT);
3835 }
3836}
3837
3838#ifdef CONFIG_HAVE_MEMBLOCK
3839/*
3840 * Basic iterator support. Return the last range of PFNs for a node
3841 * Note: nid == MAX_NUMNODES returns last region regardless of node
3842 */
3843static int __meminit last_active_region_index_in_nid(int nid)
3844{
3845 int i;
3846
3847 for (i = nr_nodemap_entries - 1; i >= 0; i--)
3848 if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
3849 return i;
3850
3851 return -1;
3852}
3853
3854/*
3855 * Basic iterator support. Return the previous active range of PFNs for a node
3856 * Note: nid == MAX_NUMNODES returns next region regardless of node
3857 */
3858static int __meminit previous_active_region_index_in_nid(int index, int nid)
3859{
3860 for (index = index - 1; index >= 0; index--)
3861 if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
3862 return index;
3863
3864 return -1;
3865}
3866
3867#define for_each_active_range_index_in_nid_reverse(i, nid) \
3868 for (i = last_active_region_index_in_nid(nid); i != -1; \
3869 i = previous_active_region_index_in_nid(i, nid))
3870
3871u64 __init find_memory_core_early(int nid, u64 size, u64 align,
3872 u64 goal, u64 limit)
3873{
3874 int i;
3875
3876 /* Need to go over early_node_map to find out good range for node */
3877 for_each_active_range_index_in_nid_reverse(i, nid) {
3878 u64 addr;
3879 u64 ei_start, ei_last;
3880 u64 final_start, final_end;
3881
3882 ei_last = early_node_map[i].end_pfn;
3883 ei_last <<= PAGE_SHIFT;
3884 ei_start = early_node_map[i].start_pfn;
3885 ei_start <<= PAGE_SHIFT;
3886
3887 final_start = max(ei_start, goal);
3888 final_end = min(ei_last, limit);
3889
3890 if (final_start >= final_end)
3891 continue;
3892
3893 addr = memblock_find_in_range(final_start, final_end, size, align);
3894 3775
3895 if (addr == MEMBLOCK_ERROR) 3776 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid) {
3896 continue; 3777 start_pfn = min(start_pfn, max_low_pfn);
3778 end_pfn = min(end_pfn, max_low_pfn);
3897 3779
3898 return addr; 3780 if (start_pfn < end_pfn)
3781 free_bootmem_node(NODE_DATA(this_nid),
3782 PFN_PHYS(start_pfn),
3783 (end_pfn - start_pfn) << PAGE_SHIFT);
3899 } 3784 }
3900
3901 return MEMBLOCK_ERROR;
3902} 3785}
3903#endif
3904 3786
3905int __init add_from_early_node_map(struct range *range, int az, 3787int __init add_from_early_node_map(struct range *range, int az,
3906 int nr_range, int nid) 3788 int nr_range, int nid)
3907{ 3789{
3790 unsigned long start_pfn, end_pfn;
3908 int i; 3791 int i;
3909 u64 start, end;
3910 3792
3911 /* need to go over early_node_map to find out good range for node */ 3793 /* need to go over early_node_map to find out good range for node */
3912 for_each_active_range_index_in_nid(i, nid) { 3794 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL)
3913 start = early_node_map[i].start_pfn; 3795 nr_range = add_range(range, az, nr_range, start_pfn, end_pfn);
3914 end = early_node_map[i].end_pfn;
3915 nr_range = add_range(range, az, nr_range, start, end);
3916 }
3917 return nr_range; 3796 return nr_range;
3918} 3797}
3919 3798
3920void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
3921{
3922 int i;
3923 int ret;
3924
3925 for_each_active_range_index_in_nid(i, nid) {
3926 ret = work_fn(early_node_map[i].start_pfn,
3927 early_node_map[i].end_pfn, data);
3928 if (ret)
3929 break;
3930 }
3931}
3932/** 3799/**
3933 * sparse_memory_present_with_active_regions - Call memory_present for each active range 3800 * sparse_memory_present_with_active_regions - Call memory_present for each active range
3934 * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used. 3801 * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used.
@@ -3939,12 +3806,11 @@ void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
3939 */ 3806 */
3940void __init sparse_memory_present_with_active_regions(int nid) 3807void __init sparse_memory_present_with_active_regions(int nid)
3941{ 3808{
3942 int i; 3809 unsigned long start_pfn, end_pfn;
3810 int i, this_nid;
3943 3811
3944 for_each_active_range_index_in_nid(i, nid) 3812 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid)
3945 memory_present(early_node_map[i].nid, 3813 memory_present(this_nid, start_pfn, end_pfn);
3946 early_node_map[i].start_pfn,
3947 early_node_map[i].end_pfn);
3948} 3814}
3949 3815
3950/** 3816/**
@@ -3961,13 +3827,15 @@ void __init sparse_memory_present_with_active_regions(int nid)
3961void __meminit get_pfn_range_for_nid(unsigned int nid, 3827void __meminit get_pfn_range_for_nid(unsigned int nid,
3962 unsigned long *start_pfn, unsigned long *end_pfn) 3828 unsigned long *start_pfn, unsigned long *end_pfn)
3963{ 3829{
3830 unsigned long this_start_pfn, this_end_pfn;
3964 int i; 3831 int i;
3832
3965 *start_pfn = -1UL; 3833 *start_pfn = -1UL;
3966 *end_pfn = 0; 3834 *end_pfn = 0;
3967 3835
3968 for_each_active_range_index_in_nid(i, nid) { 3836 for_each_mem_pfn_range(i, nid, &this_start_pfn, &this_end_pfn, NULL) {
3969 *start_pfn = min(*start_pfn, early_node_map[i].start_pfn); 3837 *start_pfn = min(*start_pfn, this_start_pfn);
3970 *end_pfn = max(*end_pfn, early_node_map[i].end_pfn); 3838 *end_pfn = max(*end_pfn, this_end_pfn);
3971 } 3839 }
3972 3840
3973 if (*start_pfn == -1UL) 3841 if (*start_pfn == -1UL)
@@ -4070,46 +3938,16 @@ unsigned long __meminit __absent_pages_in_range(int nid,
4070 unsigned long range_start_pfn, 3938 unsigned long range_start_pfn,
4071 unsigned long range_end_pfn) 3939 unsigned long range_end_pfn)
4072{ 3940{
4073 int i = 0; 3941 unsigned long nr_absent = range_end_pfn - range_start_pfn;
4074 unsigned long prev_end_pfn = 0, hole_pages = 0; 3942 unsigned long start_pfn, end_pfn;
4075 unsigned long start_pfn; 3943 int i;
4076
4077 /* Find the end_pfn of the first active range of pfns in the node */
4078 i = first_active_region_index_in_nid(nid);
4079 if (i == -1)
4080 return 0;
4081
4082 prev_end_pfn = min(early_node_map[i].start_pfn, range_end_pfn);
4083
4084 /* Account for ranges before physical memory on this node */
4085 if (early_node_map[i].start_pfn > range_start_pfn)
4086 hole_pages = prev_end_pfn - range_start_pfn;
4087
4088 /* Find all holes for the zone within the node */
4089 for (; i != -1; i = next_active_region_index_in_nid(i, nid)) {
4090
4091 /* No need to continue if prev_end_pfn is outside the zone */
4092 if (prev_end_pfn >= range_end_pfn)
4093 break;
4094
4095 /* Make sure the end of the zone is not within the hole */
4096 start_pfn = min(early_node_map[i].start_pfn, range_end_pfn);
4097 prev_end_pfn = max(prev_end_pfn, range_start_pfn);
4098 3944
4099 /* Update the hole size cound and move on */ 3945 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
4100 if (start_pfn > range_start_pfn) { 3946 start_pfn = clamp(start_pfn, range_start_pfn, range_end_pfn);
4101 BUG_ON(prev_end_pfn > start_pfn); 3947 end_pfn = clamp(end_pfn, range_start_pfn, range_end_pfn);
4102 hole_pages += start_pfn - prev_end_pfn; 3948 nr_absent -= end_pfn - start_pfn;
4103 }
4104 prev_end_pfn = early_node_map[i].end_pfn;
4105 } 3949 }
4106 3950 return nr_absent;
4107 /* Account for ranges past physical memory on this node */
4108 if (range_end_pfn > prev_end_pfn)
4109 hole_pages += range_end_pfn -
4110 max(range_start_pfn, prev_end_pfn);
4111
4112 return hole_pages;
4113} 3951}
4114 3952
4115/** 3953/**
@@ -4130,14 +3968,14 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
4130 unsigned long zone_type, 3968 unsigned long zone_type,
4131 unsigned long *ignored) 3969 unsigned long *ignored)
4132{ 3970{
3971 unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type];
3972 unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
4133 unsigned long node_start_pfn, node_end_pfn; 3973 unsigned long node_start_pfn, node_end_pfn;
4134 unsigned long zone_start_pfn, zone_end_pfn; 3974 unsigned long zone_start_pfn, zone_end_pfn;
4135 3975
4136 get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn); 3976 get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
4137 zone_start_pfn = max(arch_zone_lowest_possible_pfn[zone_type], 3977 zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high);
4138 node_start_pfn); 3978 zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high);
4139 zone_end_pfn = min(arch_zone_highest_possible_pfn[zone_type],
4140 node_end_pfn);
4141 3979
4142 adjust_zone_range_for_zone_movable(nid, zone_type, 3980 adjust_zone_range_for_zone_movable(nid, zone_type,
4143 node_start_pfn, node_end_pfn, 3981 node_start_pfn, node_end_pfn,
@@ -4145,7 +3983,7 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
4145 return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); 3983 return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
4146} 3984}
4147 3985
4148#else 3986#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
4149static inline unsigned long __meminit zone_spanned_pages_in_node(int nid, 3987static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
4150 unsigned long zone_type, 3988 unsigned long zone_type,
4151 unsigned long *zones_size) 3989 unsigned long *zones_size)
@@ -4163,7 +4001,7 @@ static inline unsigned long __meminit zone_absent_pages_in_node(int nid,
4163 return zholes_size[zone_type]; 4001 return zholes_size[zone_type];
4164} 4002}
4165 4003
4166#endif 4004#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
4167 4005
4168static void __meminit calculate_node_totalpages(struct pglist_data *pgdat, 4006static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
4169 unsigned long *zones_size, unsigned long *zholes_size) 4007 unsigned long *zones_size, unsigned long *zholes_size)
@@ -4386,10 +4224,10 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
4386 */ 4224 */
4387 if (pgdat == NODE_DATA(0)) { 4225 if (pgdat == NODE_DATA(0)) {
4388 mem_map = NODE_DATA(0)->node_mem_map; 4226 mem_map = NODE_DATA(0)->node_mem_map;
4389#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 4227#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
4390 if (page_to_pfn(mem_map) != pgdat->node_start_pfn) 4228 if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
4391 mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET); 4229 mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET);
4392#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ 4230#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
4393 } 4231 }
4394#endif 4232#endif
4395#endif /* CONFIG_FLAT_NODE_MEM_MAP */ 4233#endif /* CONFIG_FLAT_NODE_MEM_MAP */
@@ -4414,7 +4252,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
4414 free_area_init_core(pgdat, zones_size, zholes_size); 4252 free_area_init_core(pgdat, zones_size, zholes_size);
4415} 4253}
4416 4254
4417#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 4255#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
4418 4256
4419#if MAX_NUMNODES > 1 4257#if MAX_NUMNODES > 1
4420/* 4258/*
@@ -4436,170 +4274,6 @@ static inline void setup_nr_node_ids(void)
4436#endif 4274#endif
4437 4275
4438/** 4276/**
4439 * add_active_range - Register a range of PFNs backed by physical memory
4440 * @nid: The node ID the range resides on
4441 * @start_pfn: The start PFN of the available physical memory
4442 * @end_pfn: The end PFN of the available physical memory
4443 *
4444 * These ranges are stored in an early_node_map[] and later used by
4445 * free_area_init_nodes() to calculate zone sizes and holes. If the
4446 * range spans a memory hole, it is up to the architecture to ensure
4447 * the memory is not freed by the bootmem allocator. If possible
4448 * the range being registered will be merged with existing ranges.
4449 */
4450void __init add_active_range(unsigned int nid, unsigned long start_pfn,
4451 unsigned long end_pfn)
4452{
4453 int i;
4454
4455 mminit_dprintk(MMINIT_TRACE, "memory_register",
4456 "Entering add_active_range(%d, %#lx, %#lx) "
4457 "%d entries of %d used\n",
4458 nid, start_pfn, end_pfn,
4459 nr_nodemap_entries, MAX_ACTIVE_REGIONS);
4460
4461 mminit_validate_memmodel_limits(&start_pfn, &end_pfn);
4462
4463 /* Merge with existing active regions if possible */
4464 for (i = 0; i < nr_nodemap_entries; i++) {
4465 if (early_node_map[i].nid != nid)
4466 continue;
4467
4468 /* Skip if an existing region covers this new one */
4469 if (start_pfn >= early_node_map[i].start_pfn &&
4470 end_pfn <= early_node_map[i].end_pfn)
4471 return;
4472
4473 /* Merge forward if suitable */
4474 if (start_pfn <= early_node_map[i].end_pfn &&
4475 end_pfn > early_node_map[i].end_pfn) {
4476 early_node_map[i].end_pfn = end_pfn;
4477 return;
4478 }
4479
4480 /* Merge backward if suitable */
4481 if (start_pfn < early_node_map[i].start_pfn &&
4482 end_pfn >= early_node_map[i].start_pfn) {
4483 early_node_map[i].start_pfn = start_pfn;
4484 return;
4485 }
4486 }
4487
4488 /* Check that early_node_map is large enough */
4489 if (i >= MAX_ACTIVE_REGIONS) {
4490 printk(KERN_CRIT "More than %d memory regions, truncating\n",
4491 MAX_ACTIVE_REGIONS);
4492 return;
4493 }
4494
4495 early_node_map[i].nid = nid;
4496 early_node_map[i].start_pfn = start_pfn;
4497 early_node_map[i].end_pfn = end_pfn;
4498 nr_nodemap_entries = i + 1;
4499}
4500
4501/**
4502 * remove_active_range - Shrink an existing registered range of PFNs
4503 * @nid: The node id the range is on that should be shrunk
4504 * @start_pfn: The new PFN of the range
4505 * @end_pfn: The new PFN of the range
4506 *
4507 * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node.
4508 * The map is kept near the end physical page range that has already been
4509 * registered. This function allows an arch to shrink an existing registered
4510 * range.
4511 */
4512void __init remove_active_range(unsigned int nid, unsigned long start_pfn,
4513 unsigned long end_pfn)
4514{
4515 int i, j;
4516 int removed = 0;
4517
4518 printk(KERN_DEBUG "remove_active_range (%d, %lu, %lu)\n",
4519 nid, start_pfn, end_pfn);
4520
4521 /* Find the old active region end and shrink */
4522 for_each_active_range_index_in_nid(i, nid) {
4523 if (early_node_map[i].start_pfn >= start_pfn &&
4524 early_node_map[i].end_pfn <= end_pfn) {
4525 /* clear it */
4526 early_node_map[i].start_pfn = 0;
4527 early_node_map[i].end_pfn = 0;
4528 removed = 1;
4529 continue;
4530 }
4531 if (early_node_map[i].start_pfn < start_pfn &&
4532 early_node_map[i].end_pfn > start_pfn) {
4533 unsigned long temp_end_pfn = early_node_map[i].end_pfn;
4534 early_node_map[i].end_pfn = start_pfn;
4535 if (temp_end_pfn > end_pfn)
4536 add_active_range(nid, end_pfn, temp_end_pfn);
4537 continue;
4538 }
4539 if (early_node_map[i].start_pfn >= start_pfn &&
4540 early_node_map[i].end_pfn > end_pfn &&
4541 early_node_map[i].start_pfn < end_pfn) {
4542 early_node_map[i].start_pfn = end_pfn;
4543 continue;
4544 }
4545 }
4546
4547 if (!removed)
4548 return;
4549
4550 /* remove the blank ones */
4551 for (i = nr_nodemap_entries - 1; i > 0; i--) {
4552 if (early_node_map[i].nid != nid)
4553 continue;
4554 if (early_node_map[i].end_pfn)
4555 continue;
4556 /* we found it, get rid of it */
4557 for (j = i; j < nr_nodemap_entries - 1; j++)
4558 memcpy(&early_node_map[j], &early_node_map[j+1],
4559 sizeof(early_node_map[j]));
4560 j = nr_nodemap_entries - 1;
4561 memset(&early_node_map[j], 0, sizeof(early_node_map[j]));
4562 nr_nodemap_entries--;
4563 }
4564}
4565
4566/**
4567 * remove_all_active_ranges - Remove all currently registered regions
4568 *
4569 * During discovery, it may be found that a table like SRAT is invalid
4570 * and an alternative discovery method must be used. This function removes
4571 * all currently registered regions.
4572 */
4573void __init remove_all_active_ranges(void)
4574{
4575 memset(early_node_map, 0, sizeof(early_node_map));
4576 nr_nodemap_entries = 0;
4577}
4578
4579/* Compare two active node_active_regions */
4580static int __init cmp_node_active_region(const void *a, const void *b)
4581{
4582 struct node_active_region *arange = (struct node_active_region *)a;
4583 struct node_active_region *brange = (struct node_active_region *)b;
4584
4585 /* Done this way to avoid overflows */
4586 if (arange->start_pfn > brange->start_pfn)
4587 return 1;
4588 if (arange->start_pfn < brange->start_pfn)
4589 return -1;
4590
4591 return 0;
4592}
4593
4594/* sort the node_map by start_pfn */
4595void __init sort_node_map(void)
4596{
4597 sort(early_node_map, (size_t)nr_nodemap_entries,
4598 sizeof(struct node_active_region),
4599 cmp_node_active_region, NULL);
4600}
4601
4602/**
4603 * node_map_pfn_alignment - determine the maximum internode alignment 4277 * node_map_pfn_alignment - determine the maximum internode alignment
4604 * 4278 *
4605 * This function should be called after node map is populated and sorted. 4279 * This function should be called after node map is populated and sorted.
@@ -4621,15 +4295,11 @@ void __init sort_node_map(void)
4621unsigned long __init node_map_pfn_alignment(void) 4295unsigned long __init node_map_pfn_alignment(void)
4622{ 4296{
4623 unsigned long accl_mask = 0, last_end = 0; 4297 unsigned long accl_mask = 0, last_end = 0;
4298 unsigned long start, end, mask;
4624 int last_nid = -1; 4299 int last_nid = -1;
4625 int i; 4300 int i, nid;
4626
4627 for_each_active_range_index_in_nid(i, MAX_NUMNODES) {
4628 int nid = early_node_map[i].nid;
4629 unsigned long start = early_node_map[i].start_pfn;
4630 unsigned long end = early_node_map[i].end_pfn;
4631 unsigned long mask;
4632 4301
4302 for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid) {
4633 if (!start || last_nid < 0 || last_nid == nid) { 4303 if (!start || last_nid < 0 || last_nid == nid) {
4634 last_nid = nid; 4304 last_nid = nid;
4635 last_end = end; 4305 last_end = end;
@@ -4656,12 +4326,12 @@ unsigned long __init node_map_pfn_alignment(void)
4656/* Find the lowest pfn for a node */ 4326/* Find the lowest pfn for a node */
4657static unsigned long __init find_min_pfn_for_node(int nid) 4327static unsigned long __init find_min_pfn_for_node(int nid)
4658{ 4328{
4659 int i;
4660 unsigned long min_pfn = ULONG_MAX; 4329 unsigned long min_pfn = ULONG_MAX;
4330 unsigned long start_pfn;
4331 int i;
4661 4332
4662 /* Assuming a sorted map, the first range found has the starting pfn */ 4333 for_each_mem_pfn_range(i, nid, &start_pfn, NULL, NULL)
4663 for_each_active_range_index_in_nid(i, nid) 4334 min_pfn = min(min_pfn, start_pfn);
4664 min_pfn = min(min_pfn, early_node_map[i].start_pfn);
4665 4335
4666 if (min_pfn == ULONG_MAX) { 4336 if (min_pfn == ULONG_MAX) {
4667 printk(KERN_WARNING 4337 printk(KERN_WARNING
@@ -4690,15 +4360,16 @@ unsigned long __init find_min_pfn_with_active_regions(void)
4690 */ 4360 */
4691static unsigned long __init early_calculate_totalpages(void) 4361static unsigned long __init early_calculate_totalpages(void)
4692{ 4362{
4693 int i;
4694 unsigned long totalpages = 0; 4363 unsigned long totalpages = 0;
4364 unsigned long start_pfn, end_pfn;
4365 int i, nid;
4366
4367 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
4368 unsigned long pages = end_pfn - start_pfn;
4695 4369
4696 for (i = 0; i < nr_nodemap_entries; i++) {
4697 unsigned long pages = early_node_map[i].end_pfn -
4698 early_node_map[i].start_pfn;
4699 totalpages += pages; 4370 totalpages += pages;
4700 if (pages) 4371 if (pages)
4701 node_set_state(early_node_map[i].nid, N_HIGH_MEMORY); 4372 node_set_state(nid, N_HIGH_MEMORY);
4702 } 4373 }
4703 return totalpages; 4374 return totalpages;
4704} 4375}
@@ -4753,6 +4424,8 @@ restart:
4753 /* Spread kernelcore memory as evenly as possible throughout nodes */ 4424 /* Spread kernelcore memory as evenly as possible throughout nodes */
4754 kernelcore_node = required_kernelcore / usable_nodes; 4425 kernelcore_node = required_kernelcore / usable_nodes;
4755 for_each_node_state(nid, N_HIGH_MEMORY) { 4426 for_each_node_state(nid, N_HIGH_MEMORY) {
4427 unsigned long start_pfn, end_pfn;
4428
4756 /* 4429 /*
4757 * Recalculate kernelcore_node if the division per node 4430 * Recalculate kernelcore_node if the division per node
4758 * now exceeds what is necessary to satisfy the requested 4431 * now exceeds what is necessary to satisfy the requested
@@ -4769,13 +4442,10 @@ restart:
4769 kernelcore_remaining = kernelcore_node; 4442 kernelcore_remaining = kernelcore_node;
4770 4443
4771 /* Go through each range of PFNs within this node */ 4444 /* Go through each range of PFNs within this node */
4772 for_each_active_range_index_in_nid(i, nid) { 4445 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
4773 unsigned long start_pfn, end_pfn;
4774 unsigned long size_pages; 4446 unsigned long size_pages;
4775 4447
4776 start_pfn = max(early_node_map[i].start_pfn, 4448 start_pfn = max(start_pfn, zone_movable_pfn[nid]);
4777 zone_movable_pfn[nid]);
4778 end_pfn = early_node_map[i].end_pfn;
4779 if (start_pfn >= end_pfn) 4449 if (start_pfn >= end_pfn)
4780 continue; 4450 continue;
4781 4451
@@ -4877,11 +4547,8 @@ static void check_for_regular_memory(pg_data_t *pgdat)
4877 */ 4547 */
4878void __init free_area_init_nodes(unsigned long *max_zone_pfn) 4548void __init free_area_init_nodes(unsigned long *max_zone_pfn)
4879{ 4549{
4880 unsigned long nid; 4550 unsigned long start_pfn, end_pfn;
4881 int i; 4551 int i, nid;
4882
4883 /* Sort early_node_map as initialisation assumes it is sorted */
4884 sort_node_map();
4885 4552
4886 /* Record where the zone boundaries are */ 4553 /* Record where the zone boundaries are */
4887 memset(arch_zone_lowest_possible_pfn, 0, 4554 memset(arch_zone_lowest_possible_pfn, 0,
@@ -4928,11 +4595,9 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
4928 } 4595 }
4929 4596
4930 /* Print out the early_node_map[] */ 4597 /* Print out the early_node_map[] */
4931 printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries); 4598 printk("Early memory PFN ranges\n");
4932 for (i = 0; i < nr_nodemap_entries; i++) 4599 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
4933 printk(" %3d: %0#10lx -> %0#10lx\n", early_node_map[i].nid, 4600 printk(" %3d: %0#10lx -> %0#10lx\n", nid, start_pfn, end_pfn);
4934 early_node_map[i].start_pfn,
4935 early_node_map[i].end_pfn);
4936 4601
4937 /* Initialise every node */ 4602 /* Initialise every node */
4938 mminit_verify_pageflags_layout(); 4603 mminit_verify_pageflags_layout();
@@ -4985,7 +4650,7 @@ static int __init cmdline_parse_movablecore(char *p)
4985early_param("kernelcore", cmdline_parse_kernelcore); 4650early_param("kernelcore", cmdline_parse_kernelcore);
4986early_param("movablecore", cmdline_parse_movablecore); 4651early_param("movablecore", cmdline_parse_movablecore);
4987 4652
4988#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ 4653#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
4989 4654
4990/** 4655/**
4991 * set_dma_reserve - set the specified number of pages reserved in the first zone 4656 * set_dma_reserve - set the specified number of pages reserved in the first zone