summaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2011-12-20 06:14:26 -0500
committerIngo Molnar <mingo@elte.hu>2011-12-20 06:14:26 -0500
commit45aa0663cc408617b79a2b53f0a5f50e94688a48 (patch)
tree0a53931c317c3c72a3555bd2fbb70a881ee870f2 /mm/page_alloc.c
parent511585a28e5b5fd1cac61e601e42efc4c5dd64b5 (diff)
parent7bd0b0f0da3b1ec11cbcc798eb0ef747a1184077 (diff)
Merge branch 'memblock-kill-early_node_map' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc into core/memblock
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c508
1 files changed, 80 insertions, 428 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2b8ba3aebf6e..bdc804c2d99c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -181,39 +181,17 @@ static unsigned long __meminitdata nr_kernel_pages;
181static unsigned long __meminitdata nr_all_pages; 181static unsigned long __meminitdata nr_all_pages;
182static unsigned long __meminitdata dma_reserve; 182static unsigned long __meminitdata dma_reserve;
183 183
184#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 184#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
185 /* 185static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
186 * MAX_ACTIVE_REGIONS determines the maximum number of distinct 186static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
187 * ranges of memory (RAM) that may be registered with add_active_range(). 187static unsigned long __initdata required_kernelcore;
188 * Ranges passed to add_active_range() will be merged if possible 188static unsigned long __initdata required_movablecore;
189 * so the number of times add_active_range() can be called is 189static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
190 * related to the number of nodes and the number of holes 190
191 */ 191/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
192 #ifdef CONFIG_MAX_ACTIVE_REGIONS 192int movable_zone;
193 /* Allow an architecture to set MAX_ACTIVE_REGIONS to save memory */ 193EXPORT_SYMBOL(movable_zone);
194 #define MAX_ACTIVE_REGIONS CONFIG_MAX_ACTIVE_REGIONS 194#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
195 #else
196 #if MAX_NUMNODES >= 32
197 /* If there can be many nodes, allow up to 50 holes per node */
198 #define MAX_ACTIVE_REGIONS (MAX_NUMNODES*50)
199 #else
200 /* By default, allow up to 256 distinct regions */
201 #define MAX_ACTIVE_REGIONS 256
202 #endif
203 #endif
204
205 static struct node_active_region __meminitdata early_node_map[MAX_ACTIVE_REGIONS];
206 static int __meminitdata nr_nodemap_entries;
207 static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
208 static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
209 static unsigned long __initdata required_kernelcore;
210 static unsigned long __initdata required_movablecore;
211 static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
212
213 /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
214 int movable_zone;
215 EXPORT_SYMBOL(movable_zone);
216#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
217 195
218#if MAX_NUMNODES > 1 196#if MAX_NUMNODES > 1
219int nr_node_ids __read_mostly = MAX_NUMNODES; 197int nr_node_ids __read_mostly = MAX_NUMNODES;
@@ -706,10 +684,10 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
706 int loop; 684 int loop;
707 685
708 prefetchw(page); 686 prefetchw(page);
709 for (loop = 0; loop < BITS_PER_LONG; loop++) { 687 for (loop = 0; loop < (1 << order); loop++) {
710 struct page *p = &page[loop]; 688 struct page *p = &page[loop];
711 689
712 if (loop + 1 < BITS_PER_LONG) 690 if (loop + 1 < (1 << order))
713 prefetchw(p + 1); 691 prefetchw(p + 1);
714 __ClearPageReserved(p); 692 __ClearPageReserved(p);
715 set_page_count(p, 0); 693 set_page_count(p, 0);
@@ -3737,35 +3715,7 @@ __meminit int init_currently_empty_zone(struct zone *zone,
3737 return 0; 3715 return 0;
3738} 3716}
3739 3717
3740#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 3718#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
3741/*
3742 * Basic iterator support. Return the first range of PFNs for a node
3743 * Note: nid == MAX_NUMNODES returns first region regardless of node
3744 */
3745static int __meminit first_active_region_index_in_nid(int nid)
3746{
3747 int i;
3748
3749 for (i = 0; i < nr_nodemap_entries; i++)
3750 if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
3751 return i;
3752
3753 return -1;
3754}
3755
3756/*
3757 * Basic iterator support. Return the next active range of PFNs for a node
3758 * Note: nid == MAX_NUMNODES returns next region regardless of node
3759 */
3760static int __meminit next_active_region_index_in_nid(int index, int nid)
3761{
3762 for (index = index + 1; index < nr_nodemap_entries; index++)
3763 if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
3764 return index;
3765
3766 return -1;
3767}
3768
3769#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID 3719#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
3770/* 3720/*
3771 * Required by SPARSEMEM. Given a PFN, return what node the PFN is on. 3721 * Required by SPARSEMEM. Given a PFN, return what node the PFN is on.
@@ -3775,15 +3725,12 @@ static int __meminit next_active_region_index_in_nid(int index, int nid)
3775 */ 3725 */
3776int __meminit __early_pfn_to_nid(unsigned long pfn) 3726int __meminit __early_pfn_to_nid(unsigned long pfn)
3777{ 3727{
3778 int i; 3728 unsigned long start_pfn, end_pfn;
3779 3729 int i, nid;
3780 for (i = 0; i < nr_nodemap_entries; i++) {
3781 unsigned long start_pfn = early_node_map[i].start_pfn;
3782 unsigned long end_pfn = early_node_map[i].end_pfn;
3783 3730
3731 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
3784 if (start_pfn <= pfn && pfn < end_pfn) 3732 if (start_pfn <= pfn && pfn < end_pfn)
3785 return early_node_map[i].nid; 3733 return nid;
3786 }
3787 /* This is a memory hole */ 3734 /* This is a memory hole */
3788 return -1; 3735 return -1;
3789} 3736}
@@ -3812,11 +3759,6 @@ bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
3812} 3759}
3813#endif 3760#endif
3814 3761
3815/* Basic iterator support to walk early_node_map[] */
3816#define for_each_active_range_index_in_nid(i, nid) \
3817 for (i = first_active_region_index_in_nid(nid); i != -1; \
3818 i = next_active_region_index_in_nid(i, nid))
3819
3820/** 3762/**
3821 * free_bootmem_with_active_regions - Call free_bootmem_node for each active range 3763 * free_bootmem_with_active_regions - Call free_bootmem_node for each active range
3822 * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed. 3764 * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed.
@@ -3826,122 +3768,34 @@ bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
3826 * add_active_ranges() contain no holes and may be freed, this 3768 * add_active_ranges() contain no holes and may be freed, this
3827 * this function may be used instead of calling free_bootmem() manually. 3769 * this function may be used instead of calling free_bootmem() manually.
3828 */ 3770 */
3829void __init free_bootmem_with_active_regions(int nid, 3771void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn)
3830 unsigned long max_low_pfn)
3831{
3832 int i;
3833
3834 for_each_active_range_index_in_nid(i, nid) {
3835 unsigned long size_pages = 0;
3836 unsigned long end_pfn = early_node_map[i].end_pfn;
3837
3838 if (early_node_map[i].start_pfn >= max_low_pfn)
3839 continue;
3840
3841 if (end_pfn > max_low_pfn)
3842 end_pfn = max_low_pfn;
3843
3844 size_pages = end_pfn - early_node_map[i].start_pfn;
3845 free_bootmem_node(NODE_DATA(early_node_map[i].nid),
3846 PFN_PHYS(early_node_map[i].start_pfn),
3847 size_pages << PAGE_SHIFT);
3848 }
3849}
3850
3851#ifdef CONFIG_HAVE_MEMBLOCK
3852/*
3853 * Basic iterator support. Return the last range of PFNs for a node
3854 * Note: nid == MAX_NUMNODES returns last region regardless of node
3855 */
3856static int __meminit last_active_region_index_in_nid(int nid)
3857{ 3772{
3858 int i; 3773 unsigned long start_pfn, end_pfn;
3859 3774 int i, this_nid;
3860 for (i = nr_nodemap_entries - 1; i >= 0; i--)
3861 if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
3862 return i;
3863
3864 return -1;
3865}
3866
3867/*
3868 * Basic iterator support. Return the previous active range of PFNs for a node
3869 * Note: nid == MAX_NUMNODES returns next region regardless of node
3870 */
3871static int __meminit previous_active_region_index_in_nid(int index, int nid)
3872{
3873 for (index = index - 1; index >= 0; index--)
3874 if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
3875 return index;
3876
3877 return -1;
3878}
3879
3880#define for_each_active_range_index_in_nid_reverse(i, nid) \
3881 for (i = last_active_region_index_in_nid(nid); i != -1; \
3882 i = previous_active_region_index_in_nid(i, nid))
3883
3884u64 __init find_memory_core_early(int nid, u64 size, u64 align,
3885 u64 goal, u64 limit)
3886{
3887 int i;
3888
3889 /* Need to go over early_node_map to find out good range for node */
3890 for_each_active_range_index_in_nid_reverse(i, nid) {
3891 u64 addr;
3892 u64 ei_start, ei_last;
3893 u64 final_start, final_end;
3894
3895 ei_last = early_node_map[i].end_pfn;
3896 ei_last <<= PAGE_SHIFT;
3897 ei_start = early_node_map[i].start_pfn;
3898 ei_start <<= PAGE_SHIFT;
3899
3900 final_start = max(ei_start, goal);
3901 final_end = min(ei_last, limit);
3902
3903 if (final_start >= final_end)
3904 continue;
3905
3906 addr = memblock_find_in_range(final_start, final_end, size, align);
3907 3775
3908 if (addr == MEMBLOCK_ERROR) 3776 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid) {
3909 continue; 3777 start_pfn = min(start_pfn, max_low_pfn);
3778 end_pfn = min(end_pfn, max_low_pfn);
3910 3779
3911 return addr; 3780 if (start_pfn < end_pfn)
3781 free_bootmem_node(NODE_DATA(this_nid),
3782 PFN_PHYS(start_pfn),
3783 (end_pfn - start_pfn) << PAGE_SHIFT);
3912 } 3784 }
3913
3914 return MEMBLOCK_ERROR;
3915} 3785}
3916#endif
3917 3786
3918int __init add_from_early_node_map(struct range *range, int az, 3787int __init add_from_early_node_map(struct range *range, int az,
3919 int nr_range, int nid) 3788 int nr_range, int nid)
3920{ 3789{
3790 unsigned long start_pfn, end_pfn;
3921 int i; 3791 int i;
3922 u64 start, end;
3923 3792
3924 /* need to go over early_node_map to find out good range for node */ 3793 /* need to go over early_node_map to find out good range for node */
3925 for_each_active_range_index_in_nid(i, nid) { 3794 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL)
3926 start = early_node_map[i].start_pfn; 3795 nr_range = add_range(range, az, nr_range, start_pfn, end_pfn);
3927 end = early_node_map[i].end_pfn;
3928 nr_range = add_range(range, az, nr_range, start, end);
3929 }
3930 return nr_range; 3796 return nr_range;
3931} 3797}
3932 3798
3933void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
3934{
3935 int i;
3936 int ret;
3937
3938 for_each_active_range_index_in_nid(i, nid) {
3939 ret = work_fn(early_node_map[i].start_pfn,
3940 early_node_map[i].end_pfn, data);
3941 if (ret)
3942 break;
3943 }
3944}
3945/** 3799/**
3946 * sparse_memory_present_with_active_regions - Call memory_present for each active range 3800 * sparse_memory_present_with_active_regions - Call memory_present for each active range
3947 * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used. 3801 * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used.
@@ -3952,12 +3806,11 @@ void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
3952 */ 3806 */
3953void __init sparse_memory_present_with_active_regions(int nid) 3807void __init sparse_memory_present_with_active_regions(int nid)
3954{ 3808{
3955 int i; 3809 unsigned long start_pfn, end_pfn;
3810 int i, this_nid;
3956 3811
3957 for_each_active_range_index_in_nid(i, nid) 3812 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid)
3958 memory_present(early_node_map[i].nid, 3813 memory_present(this_nid, start_pfn, end_pfn);
3959 early_node_map[i].start_pfn,
3960 early_node_map[i].end_pfn);
3961} 3814}
3962 3815
3963/** 3816/**
@@ -3974,13 +3827,15 @@ void __init sparse_memory_present_with_active_regions(int nid)
3974void __meminit get_pfn_range_for_nid(unsigned int nid, 3827void __meminit get_pfn_range_for_nid(unsigned int nid,
3975 unsigned long *start_pfn, unsigned long *end_pfn) 3828 unsigned long *start_pfn, unsigned long *end_pfn)
3976{ 3829{
3830 unsigned long this_start_pfn, this_end_pfn;
3977 int i; 3831 int i;
3832
3978 *start_pfn = -1UL; 3833 *start_pfn = -1UL;
3979 *end_pfn = 0; 3834 *end_pfn = 0;
3980 3835
3981 for_each_active_range_index_in_nid(i, nid) { 3836 for_each_mem_pfn_range(i, nid, &this_start_pfn, &this_end_pfn, NULL) {
3982 *start_pfn = min(*start_pfn, early_node_map[i].start_pfn); 3837 *start_pfn = min(*start_pfn, this_start_pfn);
3983 *end_pfn = max(*end_pfn, early_node_map[i].end_pfn); 3838 *end_pfn = max(*end_pfn, this_end_pfn);
3984 } 3839 }
3985 3840
3986 if (*start_pfn == -1UL) 3841 if (*start_pfn == -1UL)
@@ -4083,46 +3938,16 @@ unsigned long __meminit __absent_pages_in_range(int nid,
4083 unsigned long range_start_pfn, 3938 unsigned long range_start_pfn,
4084 unsigned long range_end_pfn) 3939 unsigned long range_end_pfn)
4085{ 3940{
4086 int i = 0; 3941 unsigned long nr_absent = range_end_pfn - range_start_pfn;
4087 unsigned long prev_end_pfn = 0, hole_pages = 0; 3942 unsigned long start_pfn, end_pfn;
4088 unsigned long start_pfn; 3943 int i;
4089
4090 /* Find the end_pfn of the first active range of pfns in the node */
4091 i = first_active_region_index_in_nid(nid);
4092 if (i == -1)
4093 return 0;
4094
4095 prev_end_pfn = min(early_node_map[i].start_pfn, range_end_pfn);
4096
4097 /* Account for ranges before physical memory on this node */
4098 if (early_node_map[i].start_pfn > range_start_pfn)
4099 hole_pages = prev_end_pfn - range_start_pfn;
4100
4101 /* Find all holes for the zone within the node */
4102 for (; i != -1; i = next_active_region_index_in_nid(i, nid)) {
4103
4104 /* No need to continue if prev_end_pfn is outside the zone */
4105 if (prev_end_pfn >= range_end_pfn)
4106 break;
4107
4108 /* Make sure the end of the zone is not within the hole */
4109 start_pfn = min(early_node_map[i].start_pfn, range_end_pfn);
4110 prev_end_pfn = max(prev_end_pfn, range_start_pfn);
4111 3944
4112 /* Update the hole size cound and move on */ 3945 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
4113 if (start_pfn > range_start_pfn) { 3946 start_pfn = clamp(start_pfn, range_start_pfn, range_end_pfn);
4114 BUG_ON(prev_end_pfn > start_pfn); 3947 end_pfn = clamp(end_pfn, range_start_pfn, range_end_pfn);
4115 hole_pages += start_pfn - prev_end_pfn; 3948 nr_absent -= end_pfn - start_pfn;
4116 }
4117 prev_end_pfn = early_node_map[i].end_pfn;
4118 } 3949 }
4119 3950 return nr_absent;
4120 /* Account for ranges past physical memory on this node */
4121 if (range_end_pfn > prev_end_pfn)
4122 hole_pages += range_end_pfn -
4123 max(range_start_pfn, prev_end_pfn);
4124
4125 return hole_pages;
4126} 3951}
4127 3952
4128/** 3953/**
@@ -4143,14 +3968,14 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
4143 unsigned long zone_type, 3968 unsigned long zone_type,
4144 unsigned long *ignored) 3969 unsigned long *ignored)
4145{ 3970{
3971 unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type];
3972 unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
4146 unsigned long node_start_pfn, node_end_pfn; 3973 unsigned long node_start_pfn, node_end_pfn;
4147 unsigned long zone_start_pfn, zone_end_pfn; 3974 unsigned long zone_start_pfn, zone_end_pfn;
4148 3975
4149 get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn); 3976 get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
4150 zone_start_pfn = max(arch_zone_lowest_possible_pfn[zone_type], 3977 zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high);
4151 node_start_pfn); 3978 zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high);
4152 zone_end_pfn = min(arch_zone_highest_possible_pfn[zone_type],
4153 node_end_pfn);
4154 3979
4155 adjust_zone_range_for_zone_movable(nid, zone_type, 3980 adjust_zone_range_for_zone_movable(nid, zone_type,
4156 node_start_pfn, node_end_pfn, 3981 node_start_pfn, node_end_pfn,
@@ -4158,7 +3983,7 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
4158 return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); 3983 return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
4159} 3984}
4160 3985
4161#else 3986#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
4162static inline unsigned long __meminit zone_spanned_pages_in_node(int nid, 3987static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
4163 unsigned long zone_type, 3988 unsigned long zone_type,
4164 unsigned long *zones_size) 3989 unsigned long *zones_size)
@@ -4176,7 +4001,7 @@ static inline unsigned long __meminit zone_absent_pages_in_node(int nid,
4176 return zholes_size[zone_type]; 4001 return zholes_size[zone_type];
4177} 4002}
4178 4003
4179#endif 4004#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
4180 4005
4181static void __meminit calculate_node_totalpages(struct pglist_data *pgdat, 4006static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
4182 unsigned long *zones_size, unsigned long *zholes_size) 4007 unsigned long *zones_size, unsigned long *zholes_size)
@@ -4399,10 +4224,10 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
4399 */ 4224 */
4400 if (pgdat == NODE_DATA(0)) { 4225 if (pgdat == NODE_DATA(0)) {
4401 mem_map = NODE_DATA(0)->node_mem_map; 4226 mem_map = NODE_DATA(0)->node_mem_map;
4402#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 4227#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
4403 if (page_to_pfn(mem_map) != pgdat->node_start_pfn) 4228 if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
4404 mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET); 4229 mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET);
4405#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ 4230#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
4406 } 4231 }
4407#endif 4232#endif
4408#endif /* CONFIG_FLAT_NODE_MEM_MAP */ 4233#endif /* CONFIG_FLAT_NODE_MEM_MAP */
@@ -4427,7 +4252,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
4427 free_area_init_core(pgdat, zones_size, zholes_size); 4252 free_area_init_core(pgdat, zones_size, zholes_size);
4428} 4253}
4429 4254
4430#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 4255#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
4431 4256
4432#if MAX_NUMNODES > 1 4257#if MAX_NUMNODES > 1
4433/* 4258/*
@@ -4449,170 +4274,6 @@ static inline void setup_nr_node_ids(void)
4449#endif 4274#endif
4450 4275
4451/** 4276/**
4452 * add_active_range - Register a range of PFNs backed by physical memory
4453 * @nid: The node ID the range resides on
4454 * @start_pfn: The start PFN of the available physical memory
4455 * @end_pfn: The end PFN of the available physical memory
4456 *
4457 * These ranges are stored in an early_node_map[] and later used by
4458 * free_area_init_nodes() to calculate zone sizes and holes. If the
4459 * range spans a memory hole, it is up to the architecture to ensure
4460 * the memory is not freed by the bootmem allocator. If possible
4461 * the range being registered will be merged with existing ranges.
4462 */
4463void __init add_active_range(unsigned int nid, unsigned long start_pfn,
4464 unsigned long end_pfn)
4465{
4466 int i;
4467
4468 mminit_dprintk(MMINIT_TRACE, "memory_register",
4469 "Entering add_active_range(%d, %#lx, %#lx) "
4470 "%d entries of %d used\n",
4471 nid, start_pfn, end_pfn,
4472 nr_nodemap_entries, MAX_ACTIVE_REGIONS);
4473
4474 mminit_validate_memmodel_limits(&start_pfn, &end_pfn);
4475
4476 /* Merge with existing active regions if possible */
4477 for (i = 0; i < nr_nodemap_entries; i++) {
4478 if (early_node_map[i].nid != nid)
4479 continue;
4480
4481 /* Skip if an existing region covers this new one */
4482 if (start_pfn >= early_node_map[i].start_pfn &&
4483 end_pfn <= early_node_map[i].end_pfn)
4484 return;
4485
4486 /* Merge forward if suitable */
4487 if (start_pfn <= early_node_map[i].end_pfn &&
4488 end_pfn > early_node_map[i].end_pfn) {
4489 early_node_map[i].end_pfn = end_pfn;
4490 return;
4491 }
4492
4493 /* Merge backward if suitable */
4494 if (start_pfn < early_node_map[i].start_pfn &&
4495 end_pfn >= early_node_map[i].start_pfn) {
4496 early_node_map[i].start_pfn = start_pfn;
4497 return;
4498 }
4499 }
4500
4501 /* Check that early_node_map is large enough */
4502 if (i >= MAX_ACTIVE_REGIONS) {
4503 printk(KERN_CRIT "More than %d memory regions, truncating\n",
4504 MAX_ACTIVE_REGIONS);
4505 return;
4506 }
4507
4508 early_node_map[i].nid = nid;
4509 early_node_map[i].start_pfn = start_pfn;
4510 early_node_map[i].end_pfn = end_pfn;
4511 nr_nodemap_entries = i + 1;
4512}
4513
4514/**
4515 * remove_active_range - Shrink an existing registered range of PFNs
4516 * @nid: The node id the range is on that should be shrunk
4517 * @start_pfn: The new PFN of the range
4518 * @end_pfn: The new PFN of the range
4519 *
4520 * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node.
4521 * The map is kept near the end physical page range that has already been
4522 * registered. This function allows an arch to shrink an existing registered
4523 * range.
4524 */
4525void __init remove_active_range(unsigned int nid, unsigned long start_pfn,
4526 unsigned long end_pfn)
4527{
4528 int i, j;
4529 int removed = 0;
4530
4531 printk(KERN_DEBUG "remove_active_range (%d, %lu, %lu)\n",
4532 nid, start_pfn, end_pfn);
4533
4534 /* Find the old active region end and shrink */
4535 for_each_active_range_index_in_nid(i, nid) {
4536 if (early_node_map[i].start_pfn >= start_pfn &&
4537 early_node_map[i].end_pfn <= end_pfn) {
4538 /* clear it */
4539 early_node_map[i].start_pfn = 0;
4540 early_node_map[i].end_pfn = 0;
4541 removed = 1;
4542 continue;
4543 }
4544 if (early_node_map[i].start_pfn < start_pfn &&
4545 early_node_map[i].end_pfn > start_pfn) {
4546 unsigned long temp_end_pfn = early_node_map[i].end_pfn;
4547 early_node_map[i].end_pfn = start_pfn;
4548 if (temp_end_pfn > end_pfn)
4549 add_active_range(nid, end_pfn, temp_end_pfn);
4550 continue;
4551 }
4552 if (early_node_map[i].start_pfn >= start_pfn &&
4553 early_node_map[i].end_pfn > end_pfn &&
4554 early_node_map[i].start_pfn < end_pfn) {
4555 early_node_map[i].start_pfn = end_pfn;
4556 continue;
4557 }
4558 }
4559
4560 if (!removed)
4561 return;
4562
4563 /* remove the blank ones */
4564 for (i = nr_nodemap_entries - 1; i > 0; i--) {
4565 if (early_node_map[i].nid != nid)
4566 continue;
4567 if (early_node_map[i].end_pfn)
4568 continue;
4569 /* we found it, get rid of it */
4570 for (j = i; j < nr_nodemap_entries - 1; j++)
4571 memcpy(&early_node_map[j], &early_node_map[j+1],
4572 sizeof(early_node_map[j]));
4573 j = nr_nodemap_entries - 1;
4574 memset(&early_node_map[j], 0, sizeof(early_node_map[j]));
4575 nr_nodemap_entries--;
4576 }
4577}
4578
4579/**
4580 * remove_all_active_ranges - Remove all currently registered regions
4581 *
4582 * During discovery, it may be found that a table like SRAT is invalid
4583 * and an alternative discovery method must be used. This function removes
4584 * all currently registered regions.
4585 */
4586void __init remove_all_active_ranges(void)
4587{
4588 memset(early_node_map, 0, sizeof(early_node_map));
4589 nr_nodemap_entries = 0;
4590}
4591
4592/* Compare two active node_active_regions */
4593static int __init cmp_node_active_region(const void *a, const void *b)
4594{
4595 struct node_active_region *arange = (struct node_active_region *)a;
4596 struct node_active_region *brange = (struct node_active_region *)b;
4597
4598 /* Done this way to avoid overflows */
4599 if (arange->start_pfn > brange->start_pfn)
4600 return 1;
4601 if (arange->start_pfn < brange->start_pfn)
4602 return -1;
4603
4604 return 0;
4605}
4606
4607/* sort the node_map by start_pfn */
4608void __init sort_node_map(void)
4609{
4610 sort(early_node_map, (size_t)nr_nodemap_entries,
4611 sizeof(struct node_active_region),
4612 cmp_node_active_region, NULL);
4613}
4614
4615/**
4616 * node_map_pfn_alignment - determine the maximum internode alignment 4277 * node_map_pfn_alignment - determine the maximum internode alignment
4617 * 4278 *
4618 * This function should be called after node map is populated and sorted. 4279 * This function should be called after node map is populated and sorted.
@@ -4634,15 +4295,11 @@ void __init sort_node_map(void)
4634unsigned long __init node_map_pfn_alignment(void) 4295unsigned long __init node_map_pfn_alignment(void)
4635{ 4296{
4636 unsigned long accl_mask = 0, last_end = 0; 4297 unsigned long accl_mask = 0, last_end = 0;
4298 unsigned long start, end, mask;
4637 int last_nid = -1; 4299 int last_nid = -1;
4638 int i; 4300 int i, nid;
4639
4640 for_each_active_range_index_in_nid(i, MAX_NUMNODES) {
4641 int nid = early_node_map[i].nid;
4642 unsigned long start = early_node_map[i].start_pfn;
4643 unsigned long end = early_node_map[i].end_pfn;
4644 unsigned long mask;
4645 4301
4302 for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid) {
4646 if (!start || last_nid < 0 || last_nid == nid) { 4303 if (!start || last_nid < 0 || last_nid == nid) {
4647 last_nid = nid; 4304 last_nid = nid;
4648 last_end = end; 4305 last_end = end;
@@ -4669,12 +4326,12 @@ unsigned long __init node_map_pfn_alignment(void)
4669/* Find the lowest pfn for a node */ 4326/* Find the lowest pfn for a node */
4670static unsigned long __init find_min_pfn_for_node(int nid) 4327static unsigned long __init find_min_pfn_for_node(int nid)
4671{ 4328{
4672 int i;
4673 unsigned long min_pfn = ULONG_MAX; 4329 unsigned long min_pfn = ULONG_MAX;
4330 unsigned long start_pfn;
4331 int i;
4674 4332
4675 /* Assuming a sorted map, the first range found has the starting pfn */ 4333 for_each_mem_pfn_range(i, nid, &start_pfn, NULL, NULL)
4676 for_each_active_range_index_in_nid(i, nid) 4334 min_pfn = min(min_pfn, start_pfn);
4677 min_pfn = min(min_pfn, early_node_map[i].start_pfn);
4678 4335
4679 if (min_pfn == ULONG_MAX) { 4336 if (min_pfn == ULONG_MAX) {
4680 printk(KERN_WARNING 4337 printk(KERN_WARNING
@@ -4703,15 +4360,16 @@ unsigned long __init find_min_pfn_with_active_regions(void)
4703 */ 4360 */
4704static unsigned long __init early_calculate_totalpages(void) 4361static unsigned long __init early_calculate_totalpages(void)
4705{ 4362{
4706 int i;
4707 unsigned long totalpages = 0; 4363 unsigned long totalpages = 0;
4364 unsigned long start_pfn, end_pfn;
4365 int i, nid;
4366
4367 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
4368 unsigned long pages = end_pfn - start_pfn;
4708 4369
4709 for (i = 0; i < nr_nodemap_entries; i++) {
4710 unsigned long pages = early_node_map[i].end_pfn -
4711 early_node_map[i].start_pfn;
4712 totalpages += pages; 4370 totalpages += pages;
4713 if (pages) 4371 if (pages)
4714 node_set_state(early_node_map[i].nid, N_HIGH_MEMORY); 4372 node_set_state(nid, N_HIGH_MEMORY);
4715 } 4373 }
4716 return totalpages; 4374 return totalpages;
4717} 4375}
@@ -4766,6 +4424,8 @@ restart:
4766 /* Spread kernelcore memory as evenly as possible throughout nodes */ 4424 /* Spread kernelcore memory as evenly as possible throughout nodes */
4767 kernelcore_node = required_kernelcore / usable_nodes; 4425 kernelcore_node = required_kernelcore / usable_nodes;
4768 for_each_node_state(nid, N_HIGH_MEMORY) { 4426 for_each_node_state(nid, N_HIGH_MEMORY) {
4427 unsigned long start_pfn, end_pfn;
4428
4769 /* 4429 /*
4770 * Recalculate kernelcore_node if the division per node 4430 * Recalculate kernelcore_node if the division per node
4771 * now exceeds what is necessary to satisfy the requested 4431 * now exceeds what is necessary to satisfy the requested
@@ -4782,13 +4442,10 @@ restart:
4782 kernelcore_remaining = kernelcore_node; 4442 kernelcore_remaining = kernelcore_node;
4783 4443
4784 /* Go through each range of PFNs within this node */ 4444 /* Go through each range of PFNs within this node */
4785 for_each_active_range_index_in_nid(i, nid) { 4445 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
4786 unsigned long start_pfn, end_pfn;
4787 unsigned long size_pages; 4446 unsigned long size_pages;
4788 4447
4789 start_pfn = max(early_node_map[i].start_pfn, 4448 start_pfn = max(start_pfn, zone_movable_pfn[nid]);
4790 zone_movable_pfn[nid]);
4791 end_pfn = early_node_map[i].end_pfn;
4792 if (start_pfn >= end_pfn) 4449 if (start_pfn >= end_pfn)
4793 continue; 4450 continue;
4794 4451
@@ -4890,11 +4547,8 @@ static void check_for_regular_memory(pg_data_t *pgdat)
4890 */ 4547 */
4891void __init free_area_init_nodes(unsigned long *max_zone_pfn) 4548void __init free_area_init_nodes(unsigned long *max_zone_pfn)
4892{ 4549{
4893 unsigned long nid; 4550 unsigned long start_pfn, end_pfn;
4894 int i; 4551 int i, nid;
4895
4896 /* Sort early_node_map as initialisation assumes it is sorted */
4897 sort_node_map();
4898 4552
4899 /* Record where the zone boundaries are */ 4553 /* Record where the zone boundaries are */
4900 memset(arch_zone_lowest_possible_pfn, 0, 4554 memset(arch_zone_lowest_possible_pfn, 0,
@@ -4941,11 +4595,9 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
4941 } 4595 }
4942 4596
4943 /* Print out the early_node_map[] */ 4597 /* Print out the early_node_map[] */
4944 printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries); 4598 printk("Early memory PFN ranges\n");
4945 for (i = 0; i < nr_nodemap_entries; i++) 4599 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
4946 printk(" %3d: %0#10lx -> %0#10lx\n", early_node_map[i].nid, 4600 printk(" %3d: %0#10lx -> %0#10lx\n", nid, start_pfn, end_pfn);
4947 early_node_map[i].start_pfn,
4948 early_node_map[i].end_pfn);
4949 4601
4950 /* Initialise every node */ 4602 /* Initialise every node */
4951 mminit_verify_pageflags_layout(); 4603 mminit_verify_pageflags_layout();
@@ -4998,7 +4650,7 @@ static int __init cmdline_parse_movablecore(char *p)
4998early_param("kernelcore", cmdline_parse_kernelcore); 4650early_param("kernelcore", cmdline_parse_kernelcore);
4999early_param("movablecore", cmdline_parse_movablecore); 4651early_param("movablecore", cmdline_parse_movablecore);
5000 4652
5001#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ 4653#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
5002 4654
5003/** 4655/**
5004 * set_dma_reserve - set the specified number of pages reserved in the first zone 4656 * set_dma_reserve - set the specified number of pages reserved in the first zone