diff options
-rw-r--r-- | Documentation/kernel-parameters.txt | 29 | ||||
-rw-r--r-- | arch/x86/mm/srat.c | 71 | ||||
-rw-r--r-- | include/linux/mm.h | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 22 |
4 files changed, 113 insertions, 11 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 722a74161246..766087781ecd 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -1640,15 +1640,30 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
1640 | that the amount of memory usable for all allocations | 1640 | that the amount of memory usable for all allocations |
1641 | is not too small. | 1641 | is not too small. |
1642 | 1642 | ||
1643 | movablemem_map=acpi | ||
1644 | [KNL,X86,IA-64,PPC] This parameter is similar to | ||
1645 | memmap except it specifies the memory map of | ||
1646 | ZONE_MOVABLE. | ||
1647 | This option inform the kernel to use Hot Pluggable bit | ||
1648 | in flags from SRAT from ACPI BIOS to determine which | ||
1649 | memory devices could be hotplugged. The corresponding | ||
1650 | memory ranges will be set as ZONE_MOVABLE. | ||
1651 | NOTE: Whatever node the kernel resides in will always | ||
1652 | be un-hotpluggable. | ||
1653 | |||
1643 | movablemem_map=nn[KMG]@ss[KMG] | 1654 | movablemem_map=nn[KMG]@ss[KMG] |
1644 | [KNL,X86,IA-64,PPC] This parameter is similar to | 1655 | [KNL,X86,IA-64,PPC] This parameter is similar to |
1645 | memmap except it specifies the memory map of | 1656 | memmap except it specifies the memory map of |
1646 | ZONE_MOVABLE. | 1657 | ZONE_MOVABLE. |
1647 | If more areas are all within one node, then from | 1658 | If user specifies memory ranges, the info in SRAT will |
1648 | lowest ss to the end of the node will be ZONE_MOVABLE. | 1659 | be ingored. And it works like the following: |
1649 | If an area covers two or more nodes, the area from | 1660 | - If more ranges are all within one node, then from |
1650 | ss to the end of the 1st node will be ZONE_MOVABLE, | 1661 | lowest ss to the end of the node will be ZONE_MOVABLE. |
1651 | and all the rest nodes will only have ZONE_MOVABLE. | 1662 | - If a range is within a node, then from ss to the end |
1663 | of the node will be ZONE_MOVABLE. | ||
1664 | - If a range covers two or more nodes, then from ss to | ||
1665 | the end of the 1st node will be ZONE_MOVABLE, and all | ||
1666 | the rest nodes will only have ZONE_MOVABLE. | ||
1652 | If memmap is specified at the same time, the | 1667 | If memmap is specified at the same time, the |
1653 | movablemem_map will be limited within the memmap | 1668 | movablemem_map will be limited within the memmap |
1654 | areas. If kernelcore or movablecore is also specified, | 1669 | areas. If kernelcore or movablecore is also specified, |
@@ -1656,6 +1671,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
1656 | satisfied. So the administrator should be careful that | 1671 | satisfied. So the administrator should be careful that |
1657 | the amount of movablemem_map areas are not too large. | 1672 | the amount of movablemem_map areas are not too large. |
1658 | Otherwise kernel won't have enough memory to start. | 1673 | Otherwise kernel won't have enough memory to start. |
1674 | NOTE: We don't stop users specifying the node the | ||
1675 | kernel resides in as hotpluggable so that this | ||
1676 | option can be used as a workaround of firmware | ||
1677 | bugs. | ||
1659 | 1678 | ||
1660 | MTD_Partition= [MTD] | 1679 | MTD_Partition= [MTD] |
1661 | Format: <name>,<region-number>,<size>,<offset> | 1680 | Format: <name>,<region-number>,<size>,<offset> |
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 3e90039e52e0..79836d01f789 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c | |||
@@ -142,16 +142,72 @@ static inline int save_add_info(void) {return 0;} | |||
142 | #endif | 142 | #endif |
143 | 143 | ||
144 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | 144 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
145 | static void __init handle_movablemem(int node, u64 start, u64 end) | 145 | static void __init |
146 | handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable) | ||
146 | { | 147 | { |
147 | int overlap; | 148 | int overlap, i; |
148 | unsigned long start_pfn, end_pfn; | 149 | unsigned long start_pfn, end_pfn; |
149 | 150 | ||
150 | start_pfn = PFN_DOWN(start); | 151 | start_pfn = PFN_DOWN(start); |
151 | end_pfn = PFN_UP(end); | 152 | end_pfn = PFN_UP(end); |
152 | 153 | ||
153 | /* | 154 | /* |
154 | * For movablecore_map=nn[KMG]@ss[KMG]: | 155 | * For movablemem_map=acpi: |
156 | * | ||
157 | * SRAT: |_____| |_____| |_________| |_________| ...... | ||
158 | * node id: 0 1 1 2 | ||
159 | * hotpluggable: n y y n | ||
160 | * movablemem_map: |_____| |_________| | ||
161 | * | ||
162 | * Using movablemem_map, we can prevent memblock from allocating memory | ||
163 | * on ZONE_MOVABLE at boot time. | ||
164 | * | ||
165 | * Before parsing SRAT, memblock has already reserve some memory ranges | ||
166 | * for other purposes, such as for kernel image. We cannot prevent | ||
167 | * kernel from using these memory, so we need to exclude these memory | ||
168 | * even if it is hotpluggable. | ||
169 | * Furthermore, to ensure the kernel has enough memory to boot, we make | ||
170 | * all the memory on the node which the kernel resides in | ||
171 | * un-hotpluggable. | ||
172 | */ | ||
173 | if (hotpluggable && movablemem_map.acpi) { | ||
174 | /* Exclude ranges reserved by memblock. */ | ||
175 | struct memblock_type *rgn = &memblock.reserved; | ||
176 | |||
177 | for (i = 0; i < rgn->cnt; i++) { | ||
178 | if (end <= rgn->regions[i].base || | ||
179 | start >= rgn->regions[i].base + | ||
180 | rgn->regions[i].size) | ||
181 | continue; | ||
182 | |||
183 | /* | ||
184 | * If the memory range overlaps the memory reserved by | ||
185 | * memblock, then the kernel resides in this node. | ||
186 | */ | ||
187 | node_set(node, movablemem_map.numa_nodes_kernel); | ||
188 | |||
189 | goto out; | ||
190 | } | ||
191 | |||
192 | /* | ||
193 | * If the kernel resides in this node, then the whole node | ||
194 | * should not be hotpluggable. | ||
195 | */ | ||
196 | if (node_isset(node, movablemem_map.numa_nodes_kernel)) | ||
197 | goto out; | ||
198 | |||
199 | insert_movablemem_map(start_pfn, end_pfn); | ||
200 | |||
201 | /* | ||
202 | * numa_nodes_hotplug nodemask represents which nodes are put | ||
203 | * into movablemem_map.map[]. | ||
204 | */ | ||
205 | node_set(node, movablemem_map.numa_nodes_hotplug); | ||
206 | goto out; | ||
207 | } | ||
208 | |||
209 | /* | ||
210 | * For movablemem_map=nn[KMG]@ss[KMG]: | ||
155 | * | 211 | * |
156 | * SRAT: |_____| |_____| |_________| |_________| ...... | 212 | * SRAT: |_____| |_____| |_________| |_________| ...... |
157 | * node id: 0 1 1 2 | 213 | * node id: 0 1 1 2 |
@@ -160,6 +216,8 @@ static void __init handle_movablemem(int node, u64 start, u64 end) | |||
160 | * | 216 | * |
161 | * Using movablemem_map, we can prevent memblock from allocating memory | 217 | * Using movablemem_map, we can prevent memblock from allocating memory |
162 | * on ZONE_MOVABLE at boot time. | 218 | * on ZONE_MOVABLE at boot time. |
219 | * | ||
220 | * NOTE: In this case, SRAT info will be ingored. | ||
163 | */ | 221 | */ |
164 | overlap = movablemem_map_overlap(start_pfn, end_pfn); | 222 | overlap = movablemem_map_overlap(start_pfn, end_pfn); |
165 | if (overlap >= 0) { | 223 | if (overlap >= 0) { |
@@ -187,9 +245,12 @@ static void __init handle_movablemem(int node, u64 start, u64 end) | |||
187 | */ | 245 | */ |
188 | insert_movablemem_map(start_pfn, end_pfn); | 246 | insert_movablemem_map(start_pfn, end_pfn); |
189 | } | 247 | } |
248 | out: | ||
249 | return; | ||
190 | } | 250 | } |
191 | #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ | 251 | #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ |
192 | static inline void handle_movablemem(int node, u64 start, u64 end) | 252 | static inline void |
253 | handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable) | ||
193 | { | 254 | { |
194 | } | 255 | } |
195 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ | 256 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ |
@@ -234,7 +295,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
234 | (unsigned long long) start, (unsigned long long) end - 1, | 295 | (unsigned long long) start, (unsigned long long) end - 1, |
235 | hotpluggable ? "Hot Pluggable": ""); | 296 | hotpluggable ? "Hot Pluggable": ""); |
236 | 297 | ||
237 | handle_movablemem(node, start, end); | 298 | handle_movablemem(node, start, end, hotpluggable); |
238 | 299 | ||
239 | return 0; | 300 | return 0; |
240 | out_err_bad_srat: | 301 | out_err_bad_srat: |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 4d7377a1d084..72a42c0fa633 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -1366,9 +1366,11 @@ struct movablemem_entry { | |||
1366 | }; | 1366 | }; |
1367 | 1367 | ||
1368 | struct movablemem_map { | 1368 | struct movablemem_map { |
1369 | bool acpi; /* true if using SRAT info */ | ||
1369 | int nr_map; | 1370 | int nr_map; |
1370 | struct movablemem_entry map[MOVABLEMEM_MAP_MAX]; | 1371 | struct movablemem_entry map[MOVABLEMEM_MAP_MAX]; |
1371 | nodemask_t numa_nodes_hotplug; /* on which nodes we specify memory */ | 1372 | nodemask_t numa_nodes_hotplug; /* on which nodes we specify memory */ |
1373 | nodemask_t numa_nodes_kernel; /* on which nodes kernel resides in */ | ||
1372 | }; | 1374 | }; |
1373 | 1375 | ||
1374 | extern void __init insert_movablemem_map(unsigned long start_pfn, | 1376 | extern void __init insert_movablemem_map(unsigned long start_pfn, |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7ea9a003ad57..a7381be21320 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -203,7 +203,10 @@ static unsigned long __meminitdata dma_reserve; | |||
203 | 203 | ||
204 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | 204 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
205 | /* Movable memory ranges, will also be used by memblock subsystem. */ | 205 | /* Movable memory ranges, will also be used by memblock subsystem. */ |
206 | struct movablemem_map movablemem_map; | 206 | struct movablemem_map movablemem_map = { |
207 | .acpi = false, | ||
208 | .nr_map = 0, | ||
209 | }; | ||
207 | 210 | ||
208 | static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; | 211 | static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; |
209 | static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; | 212 | static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; |
@@ -5314,6 +5317,23 @@ static int __init cmdline_parse_movablemem_map(char *p) | |||
5314 | if (!p) | 5317 | if (!p) |
5315 | goto err; | 5318 | goto err; |
5316 | 5319 | ||
5320 | if (!strcmp(p, "acpi")) | ||
5321 | movablemem_map.acpi = true; | ||
5322 | |||
5323 | /* | ||
5324 | * If user decide to use info from BIOS, all the other user specified | ||
5325 | * ranges will be ingored. | ||
5326 | */ | ||
5327 | if (movablemem_map.acpi) { | ||
5328 | if (movablemem_map.nr_map) { | ||
5329 | memset(movablemem_map.map, 0, | ||
5330 | sizeof(struct movablemem_entry) | ||
5331 | * movablemem_map.nr_map); | ||
5332 | movablemem_map.nr_map = 0; | ||
5333 | } | ||
5334 | return 0; | ||
5335 | } | ||
5336 | |||
5317 | oldp = p; | 5337 | oldp = p; |
5318 | mem_size = memparse(p, &p); | 5338 | mem_size = memparse(p, &p); |
5319 | if (p == oldp) | 5339 | if (p == oldp) |