aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/kernel-parameters.txt36
-rw-r--r--arch/x86/kernel/setup.c13
-rw-r--r--arch/x86/mm/numa.c11
-rw-r--r--arch/x86/mm/srat.c125
-rw-r--r--drivers/acpi/numa.c23
-rw-r--r--include/linux/acpi.h8
-rw-r--r--include/linux/memblock.h2
-rw-r--r--include/linux/mm.h18
-rw-r--r--mm/memblock.c50
-rw-r--r--mm/page_alloc.c285
10 files changed, 27 insertions, 544 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e567af39ee34..3a54fca730c0 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1645,42 +1645,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
1645 that the amount of memory usable for all allocations 1645 that the amount of memory usable for all allocations
1646 is not too small. 1646 is not too small.
1647 1647
1648 movablemem_map=acpi
1649 [KNL,X86,IA-64,PPC] This parameter is similar to
1650 memmap except it specifies the memory map of
1651 ZONE_MOVABLE.
1652 This option inform the kernel to use Hot Pluggable bit
1653 in flags from SRAT from ACPI BIOS to determine which
1654 memory devices could be hotplugged. The corresponding
1655 memory ranges will be set as ZONE_MOVABLE.
1656 NOTE: Whatever node the kernel resides in will always
1657 be un-hotpluggable.
1658
1659 movablemem_map=nn[KMG]@ss[KMG]
1660 [KNL,X86,IA-64,PPC] This parameter is similar to
1661 memmap except it specifies the memory map of
1662 ZONE_MOVABLE.
1663 If user specifies memory ranges, the info in SRAT will
1664 be ingored. And it works like the following:
1665 - If more ranges are all within one node, then from
1666 lowest ss to the end of the node will be ZONE_MOVABLE.
1667 - If a range is within a node, then from ss to the end
1668 of the node will be ZONE_MOVABLE.
1669 - If a range covers two or more nodes, then from ss to
1670 the end of the 1st node will be ZONE_MOVABLE, and all
1671 the rest nodes will only have ZONE_MOVABLE.
1672 If memmap is specified at the same time, the
1673 movablemem_map will be limited within the memmap
1674 areas. If kernelcore or movablecore is also specified,
1675 movablemem_map will have higher priority to be
1676 satisfied. So the administrator should be careful that
1677 the amount of movablemem_map areas are not too large.
1678 Otherwise kernel won't have enough memory to start.
1679 NOTE: We don't stop users specifying the node the
1680 kernel resides in as hotpluggable so that this
1681 option can be used as a workaround of firmware
1682 bugs.
1683
1684 MTD_Partition= [MTD] 1648 MTD_Partition= [MTD]
1685 Format: <name>,<region-number>,<size>,<offset> 1649 Format: <name>,<region-number>,<size>,<offset>
1686 1650
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index e89acdf6b77b..84d32855f65c 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1056,15 +1056,6 @@ void __init setup_arch(char **cmdline_p)
1056 setup_bios_corruption_check(); 1056 setup_bios_corruption_check();
1057#endif 1057#endif
1058 1058
1059 /*
1060 * In the memory hotplug case, the kernel needs info from SRAT to
1061 * determine which memory is hotpluggable before allocating memory
1062 * using memblock.
1063 */
1064 acpi_boot_table_init();
1065 early_acpi_boot_init();
1066 early_parse_srat();
1067
1068#ifdef CONFIG_X86_32 1059#ifdef CONFIG_X86_32
1069 printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n", 1060 printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n",
1070 (max_pfn_mapped<<PAGE_SHIFT) - 1); 1061 (max_pfn_mapped<<PAGE_SHIFT) - 1);
@@ -1110,6 +1101,10 @@ void __init setup_arch(char **cmdline_p)
1110 /* 1101 /*
1111 * Parse the ACPI tables for possible boot-time SMP configuration. 1102 * Parse the ACPI tables for possible boot-time SMP configuration.
1112 */ 1103 */
1104 acpi_boot_table_init();
1105
1106 early_acpi_boot_init();
1107
1113 initmem_init(); 1108 initmem_init();
1114 memblock_find_dma_reserve(); 1109 memblock_find_dma_reserve();
1115 1110
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index ff3633c794c6..72fe01e9e414 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -212,9 +212,10 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
212 * Allocate node data. Try node-local memory and then any node. 212 * Allocate node data. Try node-local memory and then any node.
213 * Never allocate in DMA zone. 213 * Never allocate in DMA zone.
214 */ 214 */
215 nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); 215 nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
216 if (!nd_pa) { 216 if (!nd_pa) {
217 pr_err("Cannot find %zu bytes in any node\n", nd_size); 217 pr_err("Cannot find %zu bytes in node %d\n",
218 nd_size, nid);
218 return; 219 return;
219 } 220 }
220 nd = __va(nd_pa); 221 nd = __va(nd_pa);
@@ -559,12 +560,10 @@ static int __init numa_init(int (*init_func)(void))
559 for (i = 0; i < MAX_LOCAL_APIC; i++) 560 for (i = 0; i < MAX_LOCAL_APIC; i++)
560 set_apicid_to_node(i, NUMA_NO_NODE); 561 set_apicid_to_node(i, NUMA_NO_NODE);
561 562
562 /* 563 nodes_clear(numa_nodes_parsed);
563 * Do not clear numa_nodes_parsed or zero numa_meminfo here, because
564 * SRAT was parsed earlier in early_parse_srat().
565 */
566 nodes_clear(node_possible_map); 564 nodes_clear(node_possible_map);
567 nodes_clear(node_online_map); 565 nodes_clear(node_online_map);
566 memset(&numa_meminfo, 0, sizeof(numa_meminfo));
568 WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES)); 567 WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES));
569 numa_reset_distance(); 568 numa_reset_distance();
570 569
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 79836d01f789..cdd0da9dd530 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -141,126 +141,11 @@ static inline int save_add_info(void) {return 1;}
141static inline int save_add_info(void) {return 0;} 141static inline int save_add_info(void) {return 0;}
142#endif 142#endif
143 143
144#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
145static void __init
146handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable)
147{
148 int overlap, i;
149 unsigned long start_pfn, end_pfn;
150
151 start_pfn = PFN_DOWN(start);
152 end_pfn = PFN_UP(end);
153
154 /*
155 * For movablemem_map=acpi:
156 *
157 * SRAT: |_____| |_____| |_________| |_________| ......
158 * node id: 0 1 1 2
159 * hotpluggable: n y y n
160 * movablemem_map: |_____| |_________|
161 *
162 * Using movablemem_map, we can prevent memblock from allocating memory
163 * on ZONE_MOVABLE at boot time.
164 *
165 * Before parsing SRAT, memblock has already reserve some memory ranges
166 * for other purposes, such as for kernel image. We cannot prevent
167 * kernel from using these memory, so we need to exclude these memory
168 * even if it is hotpluggable.
169 * Furthermore, to ensure the kernel has enough memory to boot, we make
170 * all the memory on the node which the kernel resides in
171 * un-hotpluggable.
172 */
173 if (hotpluggable && movablemem_map.acpi) {
174 /* Exclude ranges reserved by memblock. */
175 struct memblock_type *rgn = &memblock.reserved;
176
177 for (i = 0; i < rgn->cnt; i++) {
178 if (end <= rgn->regions[i].base ||
179 start >= rgn->regions[i].base +
180 rgn->regions[i].size)
181 continue;
182
183 /*
184 * If the memory range overlaps the memory reserved by
185 * memblock, then the kernel resides in this node.
186 */
187 node_set(node, movablemem_map.numa_nodes_kernel);
188
189 goto out;
190 }
191
192 /*
193 * If the kernel resides in this node, then the whole node
194 * should not be hotpluggable.
195 */
196 if (node_isset(node, movablemem_map.numa_nodes_kernel))
197 goto out;
198
199 insert_movablemem_map(start_pfn, end_pfn);
200
201 /*
202 * numa_nodes_hotplug nodemask represents which nodes are put
203 * into movablemem_map.map[].
204 */
205 node_set(node, movablemem_map.numa_nodes_hotplug);
206 goto out;
207 }
208
209 /*
210 * For movablemem_map=nn[KMG]@ss[KMG]:
211 *
212 * SRAT: |_____| |_____| |_________| |_________| ......
213 * node id: 0 1 1 2
214 * user specified: |__| |___|
215 * movablemem_map: |___| |_________| |______| ......
216 *
217 * Using movablemem_map, we can prevent memblock from allocating memory
218 * on ZONE_MOVABLE at boot time.
219 *
220 * NOTE: In this case, SRAT info will be ingored.
221 */
222 overlap = movablemem_map_overlap(start_pfn, end_pfn);
223 if (overlap >= 0) {
224 /*
225 * If part of this range is in movablemem_map, we need to
226 * add the range after it to extend the range to the end
227 * of the node, because from the min address specified to
228 * the end of the node will be ZONE_MOVABLE.
229 */
230 start_pfn = max(start_pfn,
231 movablemem_map.map[overlap].start_pfn);
232 insert_movablemem_map(start_pfn, end_pfn);
233
234 /*
235 * Set the nodemask, so that if the address range on one node
236 * is not continuse, we can add the subsequent ranges on the
237 * same node into movablemem_map.
238 */
239 node_set(node, movablemem_map.numa_nodes_hotplug);
240 } else {
241 if (node_isset(node, movablemem_map.numa_nodes_hotplug))
242 /*
243 * Insert the range if we already have movable ranges
244 * on the same node.
245 */
246 insert_movablemem_map(start_pfn, end_pfn);
247 }
248out:
249 return;
250}
251#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
252static inline void
253handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable)
254{
255}
256#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
257
258/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ 144/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
259int __init 145int __init
260acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) 146acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
261{ 147{
262 u64 start, end; 148 u64 start, end;
263 u32 hotpluggable;
264 int node, pxm; 149 int node, pxm;
265 150
266 if (srat_disabled()) 151 if (srat_disabled())
@@ -269,8 +154,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
269 goto out_err_bad_srat; 154 goto out_err_bad_srat;
270 if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0) 155 if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
271 goto out_err; 156 goto out_err;
272 hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE; 157 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
273 if (hotpluggable && !save_add_info())
274 goto out_err; 158 goto out_err;
275 159
276 start = ma->base_address; 160 start = ma->base_address;
@@ -290,12 +174,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
290 174
291 node_set(node, numa_nodes_parsed); 175 node_set(node, numa_nodes_parsed);
292 176
293 printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx] %s\n", 177 printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n",
294 node, pxm, 178 node, pxm,
295 (unsigned long long) start, (unsigned long long) end - 1, 179 (unsigned long long) start, (unsigned long long) end - 1);
296 hotpluggable ? "Hot Pluggable": "");
297
298 handle_movablemem(node, start, end, hotpluggable);
299 180
300 return 0; 181 return 0;
301out_err_bad_srat: 182out_err_bad_srat:
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 59844ee149be..33e609f63585 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -282,10 +282,10 @@ acpi_table_parse_srat(enum acpi_srat_type id,
282 handler, max_entries); 282 handler, max_entries);
283} 283}
284 284
285static int srat_mem_cnt; 285int __init acpi_numa_init(void)
286
287void __init early_parse_srat(void)
288{ 286{
287 int cnt = 0;
288
289 /* 289 /*
290 * Should not limit number with cpu num that is from NR_CPUS or nr_cpus= 290 * Should not limit number with cpu num that is from NR_CPUS or nr_cpus=
291 * SRAT cpu entries could have different order with that in MADT. 291 * SRAT cpu entries could have different order with that in MADT.
@@ -295,24 +295,21 @@ void __init early_parse_srat(void)
295 /* SRAT: Static Resource Affinity Table */ 295 /* SRAT: Static Resource Affinity Table */
296 if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) { 296 if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
297 acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY, 297 acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY,
298 acpi_parse_x2apic_affinity, 0); 298 acpi_parse_x2apic_affinity, 0);
299 acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY, 299 acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
300 acpi_parse_processor_affinity, 0); 300 acpi_parse_processor_affinity, 0);
301 srat_mem_cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, 301 cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
302 acpi_parse_memory_affinity, 302 acpi_parse_memory_affinity,
303 NR_NODE_MEMBLKS); 303 NR_NODE_MEMBLKS);
304 } 304 }
305}
306 305
307int __init acpi_numa_init(void)
308{
309 /* SLIT: System Locality Information Table */ 306 /* SLIT: System Locality Information Table */
310 acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit); 307 acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit);
311 308
312 acpi_numa_arch_fixup(); 309 acpi_numa_arch_fixup();
313 310
314 if (srat_mem_cnt < 0) 311 if (cnt < 0)
315 return srat_mem_cnt; 312 return cnt;
316 else if (!parsed_numa_memblks) 313 else if (!parsed_numa_memblks)
317 return -ENOENT; 314 return -ENOENT;
318 return 0; 315 return 0;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index f46cfd73a553..bcbdd7484e58 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -485,14 +485,6 @@ static inline bool acpi_driver_match_device(struct device *dev,
485 485
486#endif /* !CONFIG_ACPI */ 486#endif /* !CONFIG_ACPI */
487 487
488#ifdef CONFIG_ACPI_NUMA
489void __init early_parse_srat(void);
490#else
491static inline void early_parse_srat(void)
492{
493}
494#endif
495
496#ifdef CONFIG_ACPI 488#ifdef CONFIG_ACPI
497void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state, 489void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state,
498 u32 pm1a_ctrl, u32 pm1b_ctrl)); 490 u32 pm1a_ctrl, u32 pm1b_ctrl));
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 3e5ecb2d790e..f388203db7e8 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -42,7 +42,6 @@ struct memblock {
42 42
43extern struct memblock memblock; 43extern struct memblock memblock;
44extern int memblock_debug; 44extern int memblock_debug;
45extern struct movablemem_map movablemem_map;
46 45
47#define memblock_dbg(fmt, ...) \ 46#define memblock_dbg(fmt, ...) \
48 if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) 47 if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
@@ -61,7 +60,6 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size);
61void memblock_trim_memory(phys_addr_t align); 60void memblock_trim_memory(phys_addr_t align);
62 61
63#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP 62#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
64
65void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, 63void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
66 unsigned long *out_end_pfn, int *out_nid); 64 unsigned long *out_end_pfn, int *out_nid);
67 65
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e7c3f9a0111a..1ede55f292c2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1333,24 +1333,6 @@ extern void free_bootmem_with_active_regions(int nid,
1333 unsigned long max_low_pfn); 1333 unsigned long max_low_pfn);
1334extern void sparse_memory_present_with_active_regions(int nid); 1334extern void sparse_memory_present_with_active_regions(int nid);
1335 1335
1336#define MOVABLEMEM_MAP_MAX MAX_NUMNODES
1337struct movablemem_entry {
1338 unsigned long start_pfn; /* start pfn of memory segment */
1339 unsigned long end_pfn; /* end pfn of memory segment (exclusive) */
1340};
1341
1342struct movablemem_map {
1343 bool acpi; /* true if using SRAT info */
1344 int nr_map;
1345 struct movablemem_entry map[MOVABLEMEM_MAP_MAX];
1346 nodemask_t numa_nodes_hotplug; /* on which nodes we specify memory */
1347 nodemask_t numa_nodes_kernel; /* on which nodes kernel resides in */
1348};
1349
1350extern void __init insert_movablemem_map(unsigned long start_pfn,
1351 unsigned long end_pfn);
1352extern int __init movablemem_map_overlap(unsigned long start_pfn,
1353 unsigned long end_pfn);
1354#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ 1336#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
1355 1337
1356#if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \ 1338#if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \
diff --git a/mm/memblock.c b/mm/memblock.c
index 1bcd9b970564..b8d9147e5c08 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -92,58 +92,9 @@ static long __init_memblock memblock_overlaps_region(struct memblock_type *type,
92 * 92 *
93 * Find @size free area aligned to @align in the specified range and node. 93 * Find @size free area aligned to @align in the specified range and node.
94 * 94 *
95 * If we have CONFIG_HAVE_MEMBLOCK_NODE_MAP defined, we need to check if the
96 * memory we found if not in hotpluggable ranges.
97 *
98 * RETURNS: 95 * RETURNS:
99 * Found address on success, %0 on failure. 96 * Found address on success, %0 on failure.
100 */ 97 */
101#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
102phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
103 phys_addr_t end, phys_addr_t size,
104 phys_addr_t align, int nid)
105{
106 phys_addr_t this_start, this_end, cand;
107 u64 i;
108 int curr = movablemem_map.nr_map - 1;
109
110 /* pump up @end */
111 if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
112 end = memblock.current_limit;
113
114 /* avoid allocating the first page */
115 start = max_t(phys_addr_t, start, PAGE_SIZE);
116 end = max(start, end);
117
118 for_each_free_mem_range_reverse(i, nid, &this_start, &this_end, NULL) {
119 this_start = clamp(this_start, start, end);
120 this_end = clamp(this_end, start, end);
121
122restart:
123 if (this_end <= this_start || this_end < size)
124 continue;
125
126 for (; curr >= 0; curr--) {
127 if ((movablemem_map.map[curr].start_pfn << PAGE_SHIFT)
128 < this_end)
129 break;
130 }
131
132 cand = round_down(this_end - size, align);
133 if (curr >= 0 &&
134 cand < movablemem_map.map[curr].end_pfn << PAGE_SHIFT) {
135 this_end = movablemem_map.map[curr].start_pfn
136 << PAGE_SHIFT;
137 goto restart;
138 }
139
140 if (cand >= this_start)
141 return cand;
142 }
143
144 return 0;
145}
146#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
147phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start, 98phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
148 phys_addr_t end, phys_addr_t size, 99 phys_addr_t end, phys_addr_t size,
149 phys_addr_t align, int nid) 100 phys_addr_t align, int nid)
@@ -172,7 +123,6 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
172 } 123 }
173 return 0; 124 return 0;
174} 125}
175#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
176 126
177/** 127/**
178 * memblock_find_in_range - find free area in given range 128 * memblock_find_in_range - find free area in given range
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0dade3f18f7d..8fcced7823fa 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -202,18 +202,11 @@ static unsigned long __meminitdata nr_all_pages;
202static unsigned long __meminitdata dma_reserve; 202static unsigned long __meminitdata dma_reserve;
203 203
204#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP 204#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
205/* Movable memory ranges, will also be used by memblock subsystem. */
206struct movablemem_map movablemem_map = {
207 .acpi = false,
208 .nr_map = 0,
209};
210
211static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; 205static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
212static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; 206static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
213static unsigned long __initdata required_kernelcore; 207static unsigned long __initdata required_kernelcore;
214static unsigned long __initdata required_movablecore; 208static unsigned long __initdata required_movablecore;
215static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES]; 209static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
216static unsigned long __meminitdata zone_movable_limit[MAX_NUMNODES];
217 210
218/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ 211/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
219int movable_zone; 212int movable_zone;
@@ -4412,77 +4405,6 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
4412 return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); 4405 return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
4413} 4406}
4414 4407
4415/**
4416 * sanitize_zone_movable_limit - Sanitize the zone_movable_limit array.
4417 *
4418 * zone_movable_limit is initialized as 0. This function will try to get
4419 * the first ZONE_MOVABLE pfn of each node from movablemem_map, and
4420 * assigne them to zone_movable_limit.
4421 * zone_movable_limit[nid] == 0 means no limit for the node.
4422 *
4423 * Note: Each range is represented as [start_pfn, end_pfn)
4424 */
4425static void __meminit sanitize_zone_movable_limit(void)
4426{
4427 int map_pos = 0, i, nid;
4428 unsigned long start_pfn, end_pfn;
4429
4430 if (!movablemem_map.nr_map)
4431 return;
4432
4433 /* Iterate all ranges from minimum to maximum */
4434 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
4435 /*
4436 * If we have found lowest pfn of ZONE_MOVABLE of the node
4437 * specified by user, just go on to check next range.
4438 */
4439 if (zone_movable_limit[nid])
4440 continue;
4441
4442#ifdef CONFIG_ZONE_DMA
4443 /* Skip DMA memory. */
4444 if (start_pfn < arch_zone_highest_possible_pfn[ZONE_DMA])
4445 start_pfn = arch_zone_highest_possible_pfn[ZONE_DMA];
4446#endif
4447
4448#ifdef CONFIG_ZONE_DMA32
4449 /* Skip DMA32 memory. */
4450 if (start_pfn < arch_zone_highest_possible_pfn[ZONE_DMA32])
4451 start_pfn = arch_zone_highest_possible_pfn[ZONE_DMA32];
4452#endif
4453
4454#ifdef CONFIG_HIGHMEM
4455 /* Skip lowmem if ZONE_MOVABLE is highmem. */
4456 if (zone_movable_is_highmem() &&
4457 start_pfn < arch_zone_lowest_possible_pfn[ZONE_HIGHMEM])
4458 start_pfn = arch_zone_lowest_possible_pfn[ZONE_HIGHMEM];
4459#endif
4460
4461 if (start_pfn >= end_pfn)
4462 continue;
4463
4464 while (map_pos < movablemem_map.nr_map) {
4465 if (end_pfn <= movablemem_map.map[map_pos].start_pfn)
4466 break;
4467
4468 if (start_pfn >= movablemem_map.map[map_pos].end_pfn) {
4469 map_pos++;
4470 continue;
4471 }
4472
4473 /*
4474 * The start_pfn of ZONE_MOVABLE is either the minimum
4475 * pfn specified by movablemem_map, or 0, which means
4476 * the node has no ZONE_MOVABLE.
4477 */
4478 zone_movable_limit[nid] = max(start_pfn,
4479 movablemem_map.map[map_pos].start_pfn);
4480
4481 break;
4482 }
4483 }
4484}
4485
4486#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ 4408#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
4487static inline unsigned long __meminit zone_spanned_pages_in_node(int nid, 4409static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
4488 unsigned long zone_type, 4410 unsigned long zone_type,
@@ -4500,6 +4422,7 @@ static inline unsigned long __meminit zone_absent_pages_in_node(int nid,
4500 4422
4501 return zholes_size[zone_type]; 4423 return zholes_size[zone_type];
4502} 4424}
4425
4503#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ 4426#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
4504 4427
4505static void __meminit calculate_node_totalpages(struct pglist_data *pgdat, 4428static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
@@ -4941,19 +4864,12 @@ static void __init find_zone_movable_pfns_for_nodes(void)
4941 required_kernelcore = max(required_kernelcore, corepages); 4864 required_kernelcore = max(required_kernelcore, corepages);
4942 } 4865 }
4943 4866
4944 /* 4867 /* If kernelcore was not specified, there is no ZONE_MOVABLE */
4945 * If neither kernelcore/movablecore nor movablemem_map is specified, 4868 if (!required_kernelcore)
4946 * there is no ZONE_MOVABLE. But if movablemem_map is specified, the
4947 * start pfn of ZONE_MOVABLE has been stored in zone_movable_limit[].
4948 */
4949 if (!required_kernelcore) {
4950 if (movablemem_map.nr_map)
4951 memcpy(zone_movable_pfn, zone_movable_limit,
4952 sizeof(zone_movable_pfn));
4953 goto out; 4869 goto out;
4954 }
4955 4870
4956 /* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */ 4871 /* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */
4872 find_usable_zone_for_movable();
4957 usable_startpfn = arch_zone_lowest_possible_pfn[movable_zone]; 4873 usable_startpfn = arch_zone_lowest_possible_pfn[movable_zone];
4958 4874
4959restart: 4875restart:
@@ -4981,24 +4897,10 @@ restart:
4981 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { 4897 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
4982 unsigned long size_pages; 4898 unsigned long size_pages;
4983 4899
4984 /*
4985 * Find more memory for kernelcore in
4986 * [zone_movable_pfn[nid], zone_movable_limit[nid]).
4987 */
4988 start_pfn = max(start_pfn, zone_movable_pfn[nid]); 4900 start_pfn = max(start_pfn, zone_movable_pfn[nid]);
4989 if (start_pfn >= end_pfn) 4901 if (start_pfn >= end_pfn)
4990 continue; 4902 continue;
4991 4903
4992 if (zone_movable_limit[nid]) {
4993 end_pfn = min(end_pfn, zone_movable_limit[nid]);
4994 /* No range left for kernelcore in this node */
4995 if (start_pfn >= end_pfn) {
4996 zone_movable_pfn[nid] =
4997 zone_movable_limit[nid];
4998 break;
4999 }
5000 }
5001
5002 /* Account for what is only usable for kernelcore */ 4904 /* Account for what is only usable for kernelcore */
5003 if (start_pfn < usable_startpfn) { 4905 if (start_pfn < usable_startpfn) {
5004 unsigned long kernel_pages; 4906 unsigned long kernel_pages;
@@ -5058,12 +4960,12 @@ restart:
5058 if (usable_nodes && required_kernelcore > usable_nodes) 4960 if (usable_nodes && required_kernelcore > usable_nodes)
5059 goto restart; 4961 goto restart;
5060 4962
5061out:
5062 /* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */ 4963 /* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */
5063 for (nid = 0; nid < MAX_NUMNODES; nid++) 4964 for (nid = 0; nid < MAX_NUMNODES; nid++)
5064 zone_movable_pfn[nid] = 4965 zone_movable_pfn[nid] =
5065 roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES); 4966 roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES);
5066 4967
4968out:
5067 /* restore the node_state */ 4969 /* restore the node_state */
5068 node_states[N_MEMORY] = saved_node_state; 4970 node_states[N_MEMORY] = saved_node_state;
5069} 4971}
@@ -5126,8 +5028,6 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
5126 5028
5127 /* Find the PFNs that ZONE_MOVABLE begins at in each node */ 5029 /* Find the PFNs that ZONE_MOVABLE begins at in each node */
5128 memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn)); 5030 memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn));
5129 find_usable_zone_for_movable();
5130 sanitize_zone_movable_limit();
5131 find_zone_movable_pfns_for_nodes(); 5031 find_zone_movable_pfns_for_nodes();
5132 5032
5133 /* Print out the zone ranges */ 5033 /* Print out the zone ranges */
@@ -5211,181 +5111,6 @@ static int __init cmdline_parse_movablecore(char *p)
5211early_param("kernelcore", cmdline_parse_kernelcore); 5111early_param("kernelcore", cmdline_parse_kernelcore);
5212early_param("movablecore", cmdline_parse_movablecore); 5112early_param("movablecore", cmdline_parse_movablecore);
5213 5113
5214/**
5215 * movablemem_map_overlap() - Check if a range overlaps movablemem_map.map[].
5216 * @start_pfn: start pfn of the range to be checked
5217 * @end_pfn: end pfn of the range to be checked (exclusive)
5218 *
5219 * This function checks if a given memory range [start_pfn, end_pfn) overlaps
5220 * the movablemem_map.map[] array.
5221 *
5222 * Return: index of the first overlapped element in movablemem_map.map[]
5223 * or -1 if they don't overlap each other.
5224 */
5225int __init movablemem_map_overlap(unsigned long start_pfn,
5226 unsigned long end_pfn)
5227{
5228 int overlap;
5229
5230 if (!movablemem_map.nr_map)
5231 return -1;
5232
5233 for (overlap = 0; overlap < movablemem_map.nr_map; overlap++)
5234 if (start_pfn < movablemem_map.map[overlap].end_pfn)
5235 break;
5236
5237 if (overlap == movablemem_map.nr_map ||
5238 end_pfn <= movablemem_map.map[overlap].start_pfn)
5239 return -1;
5240
5241 return overlap;
5242}
5243
5244/**
5245 * insert_movablemem_map - Insert a memory range in to movablemem_map.map.
5246 * @start_pfn: start pfn of the range
5247 * @end_pfn: end pfn of the range
5248 *
5249 * This function will also merge the overlapped ranges, and sort the array
5250 * by start_pfn in monotonic increasing order.
5251 */
5252void __init insert_movablemem_map(unsigned long start_pfn,
5253 unsigned long end_pfn)
5254{
5255 int pos, overlap;
5256
5257 /*
5258 * pos will be at the 1st overlapped range, or the position
5259 * where the element should be inserted.
5260 */
5261 for (pos = 0; pos < movablemem_map.nr_map; pos++)
5262 if (start_pfn <= movablemem_map.map[pos].end_pfn)
5263 break;
5264
5265 /* If there is no overlapped range, just insert the element. */
5266 if (pos == movablemem_map.nr_map ||
5267 end_pfn < movablemem_map.map[pos].start_pfn) {
5268 /*
5269 * If pos is not the end of array, we need to move all
5270 * the rest elements backward.
5271 */
5272 if (pos < movablemem_map.nr_map)
5273 memmove(&movablemem_map.map[pos+1],
5274 &movablemem_map.map[pos],
5275 sizeof(struct movablemem_entry) *
5276 (movablemem_map.nr_map - pos));
5277 movablemem_map.map[pos].start_pfn = start_pfn;
5278 movablemem_map.map[pos].end_pfn = end_pfn;
5279 movablemem_map.nr_map++;
5280 return;
5281 }
5282
5283 /* overlap will be at the last overlapped range */
5284 for (overlap = pos + 1; overlap < movablemem_map.nr_map; overlap++)
5285 if (end_pfn < movablemem_map.map[overlap].start_pfn)
5286 break;
5287
5288 /*
5289 * If there are more ranges overlapped, we need to merge them,
5290 * and move the rest elements forward.
5291 */
5292 overlap--;
5293 movablemem_map.map[pos].start_pfn = min(start_pfn,
5294 movablemem_map.map[pos].start_pfn);
5295 movablemem_map.map[pos].end_pfn = max(end_pfn,
5296 movablemem_map.map[overlap].end_pfn);
5297
5298 if (pos != overlap && overlap + 1 != movablemem_map.nr_map)
5299 memmove(&movablemem_map.map[pos+1],
5300 &movablemem_map.map[overlap+1],
5301 sizeof(struct movablemem_entry) *
5302 (movablemem_map.nr_map - overlap - 1));
5303
5304 movablemem_map.nr_map -= overlap - pos;
5305}
5306
5307/**
5308 * movablemem_map_add_region - Add a memory range into movablemem_map.
5309 * @start: physical start address of range
5310 * @end: physical end address of range
5311 *
5312 * This function transform the physical address into pfn, and then add the
5313 * range into movablemem_map by calling insert_movablemem_map().
5314 */
5315static void __init movablemem_map_add_region(u64 start, u64 size)
5316{
5317 unsigned long start_pfn, end_pfn;
5318
5319 /* In case size == 0 or start + size overflows */
5320 if (start + size <= start)
5321 return;
5322
5323 if (movablemem_map.nr_map >= ARRAY_SIZE(movablemem_map.map)) {
5324 pr_err("movablemem_map: too many entries;"
5325 " ignoring [mem %#010llx-%#010llx]\n",
5326 (unsigned long long) start,
5327 (unsigned long long) (start + size - 1));
5328 return;
5329 }
5330
5331 start_pfn = PFN_DOWN(start);
5332 end_pfn = PFN_UP(start + size);
5333 insert_movablemem_map(start_pfn, end_pfn);
5334}
5335
5336/*
5337 * cmdline_parse_movablemem_map - Parse boot option movablemem_map.
5338 * @p: The boot option of the following format:
5339 * movablemem_map=nn[KMG]@ss[KMG]
5340 *
5341 * This option sets the memory range [ss, ss+nn) to be used as movable memory.
5342 *
5343 * Return: 0 on success or -EINVAL on failure.
5344 */
5345static int __init cmdline_parse_movablemem_map(char *p)
5346{
5347 char *oldp;
5348 u64 start_at, mem_size;
5349
5350 if (!p)
5351 goto err;
5352
5353 if (!strcmp(p, "acpi"))
5354 movablemem_map.acpi = true;
5355
5356 /*
5357 * If user decide to use info from BIOS, all the other user specified
5358 * ranges will be ingored.
5359 */
5360 if (movablemem_map.acpi) {
5361 if (movablemem_map.nr_map) {
5362 memset(movablemem_map.map, 0,
5363 sizeof(struct movablemem_entry)
5364 * movablemem_map.nr_map);
5365 movablemem_map.nr_map = 0;
5366 }
5367 return 0;
5368 }
5369
5370 oldp = p;
5371 mem_size = memparse(p, &p);
5372 if (p == oldp)
5373 goto err;
5374
5375 if (*p == '@') {
5376 oldp = ++p;
5377 start_at = memparse(p, &p);
5378 if (p == oldp || *p != '\0')
5379 goto err;
5380
5381 movablemem_map_add_region(start_at, mem_size);
5382 return 0;
5383 }
5384err:
5385 return -EINVAL;
5386}
5387early_param("movablemem_map", cmdline_parse_movablemem_map);
5388
5389#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ 5114#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
5390 5115
5391/** 5116/**