diff options
author | Yinghai Lu <yhlu.kernel@gmail.com> | 2008-06-06 21:53:33 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-06-10 05:31:52 -0400 |
commit | 9043f007963f4039befa3c31f47173f74a0b1c70 (patch) | |
tree | b0ff264c6b4b690400c944b3991467d083752ce7 | |
parent | cc1a9d86ce989083703c4bdc11b75a87e1cc404a (diff) |
x86, numa, 32-bit: use find_e820_area() to find KVA RAM on node
don't assume we can use RAM near the end of every node.
Esp systems that have few memory and they could have
kva address and kva RAM all below max_low_pfn.
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/mm/discontig_32.c | 59 |
1 files changed, 33 insertions, 26 deletions
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 489605bab85a..accc7c6c57fc 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c | |||
@@ -228,17 +228,21 @@ static unsigned long calculate_numa_remap_pages(void) | |||
228 | { | 228 | { |
229 | int nid; | 229 | int nid; |
230 | unsigned long size, reserve_pages = 0; | 230 | unsigned long size, reserve_pages = 0; |
231 | unsigned long pfn; | ||
232 | 231 | ||
233 | for_each_online_node(nid) { | 232 | for_each_online_node(nid) { |
234 | unsigned old_end_pfn = node_end_pfn[nid]; | 233 | u64 node_end_target; |
234 | u64 node_end_final; | ||
235 | 235 | ||
236 | /* | 236 | /* |
237 | * The acpi/srat node info can show hot-add memroy zones | 237 | * The acpi/srat node info can show hot-add memroy zones |
238 | * where memory could be added but not currently present. | 238 | * where memory could be added but not currently present. |
239 | */ | 239 | */ |
240 | printk("node %d pfn: [%lx - %lx]\n", | ||
241 | nid, node_start_pfn[nid], node_end_pfn[nid]); | ||
240 | if (node_start_pfn[nid] > max_pfn) | 242 | if (node_start_pfn[nid] > max_pfn) |
241 | continue; | 243 | continue; |
244 | if (!node_end_pfn[nid]) | ||
245 | continue; | ||
242 | if (node_end_pfn[nid] > max_pfn) | 246 | if (node_end_pfn[nid] > max_pfn) |
243 | node_end_pfn[nid] = max_pfn; | 247 | node_end_pfn[nid] = max_pfn; |
244 | 248 | ||
@@ -250,37 +254,40 @@ static unsigned long calculate_numa_remap_pages(void) | |||
250 | /* now the roundup is correct, convert to PAGE_SIZE pages */ | 254 | /* now the roundup is correct, convert to PAGE_SIZE pages */ |
251 | size = size * PTRS_PER_PTE; | 255 | size = size * PTRS_PER_PTE; |
252 | 256 | ||
253 | /* | 257 | node_end_target = round_down(node_end_pfn[nid] - size, |
254 | * Validate the region we are allocating only contains valid | 258 | PTRS_PER_PTE); |
255 | * pages. | 259 | node_end_target <<= PAGE_SHIFT; |
256 | */ | 260 | do { |
257 | for (pfn = node_end_pfn[nid] - size; | 261 | node_end_final = find_e820_area(node_end_target, |
258 | pfn < node_end_pfn[nid]; pfn++) | 262 | ((u64)node_end_pfn[nid])<<PAGE_SHIFT, |
259 | if (!page_is_ram(pfn)) | 263 | ((u64)size)<<PAGE_SHIFT, |
260 | break; | 264 | LARGE_PAGE_BYTES); |
261 | 265 | node_end_target -= LARGE_PAGE_BYTES; | |
262 | if (pfn != node_end_pfn[nid]) | 266 | } while (node_end_final == -1ULL && |
263 | size = 0; | 267 | (node_end_target>>PAGE_SHIFT) > (node_start_pfn[nid])); |
268 | |||
269 | if (node_end_final == -1ULL) | ||
270 | panic("Can not get kva ram\n"); | ||
264 | 271 | ||
265 | printk("Reserving %ld pages of KVA for lmem_map of node %d\n", | 272 | printk("Reserving %ld pages of KVA for lmem_map of node %d\n", |
266 | size, nid); | 273 | size, nid); |
267 | node_remap_size[nid] = size; | 274 | node_remap_size[nid] = size; |
268 | node_remap_offset[nid] = reserve_pages; | 275 | node_remap_offset[nid] = reserve_pages; |
269 | reserve_pages += size; | 276 | reserve_pages += size; |
270 | printk("Shrinking node %d from %ld pages to %ld pages\n", | 277 | printk("Shrinking node %d from %ld pages to %lld pages\n", |
271 | nid, node_end_pfn[nid], node_end_pfn[nid] - size); | 278 | nid, node_end_pfn[nid], node_end_final>>PAGE_SHIFT); |
272 | 279 | ||
273 | if (node_end_pfn[nid] & (PTRS_PER_PTE-1)) { | 280 | /* |
274 | /* | 281 | * prevent kva address below max_low_pfn want it on system |
275 | * Align node_end_pfn[] and node_remap_start_pfn[] to | 282 | * with less memory later. |
276 | * pmd boundary. remap_numa_kva will barf otherwise. | 283 | * layout will be: KVA address , KVA RAM |
277 | */ | 284 | */ |
278 | printk("Shrinking node %d further by %ld pages for proper alignment\n", | 285 | if ((node_end_final>>PAGE_SHIFT) < max_low_pfn) |
279 | nid, node_end_pfn[nid] & (PTRS_PER_PTE-1)); | 286 | reserve_early(node_end_final, |
280 | size += node_end_pfn[nid] & (PTRS_PER_PTE-1); | 287 | node_end_final+(((u64)size)<<PAGE_SHIFT), |
281 | } | 288 | "KVA RAM"); |
282 | 289 | ||
283 | node_end_pfn[nid] -= size; | 290 | node_end_pfn[nid] = node_end_final>>PAGE_SHIFT; |
284 | node_remap_start_pfn[nid] = node_end_pfn[nid]; | 291 | node_remap_start_pfn[nid] = node_end_pfn[nid]; |
285 | shrink_active_range(nid, node_end_pfn[nid]); | 292 | shrink_active_range(nid, node_end_pfn[nid]); |
286 | } | 293 | } |