diff options
author | Yinghai Lu <yhlu.kernel@gmail.com> | 2008-03-07 18:02:50 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-03-21 12:06:15 -0400 |
commit | f62f1fc9ef94f74fda2b456d935ba2da69fa0a40 (patch) | |
tree | 93a1b5d29e516d0c155199b3bab49d40cb1f9cbb | |
parent | fc115bf19b5b2f383614b264569fb48a43bfe56d (diff) |
x86: reserve dma32 early for gart
a system with 256 GB of RAM, when NUMA is disabled crashes the
following way:
Your BIOS doesn't leave a aperture memory hole
Please enable the IOMMU option in the BIOS setup
This costs you 64 MB of RAM
Cannot allocate aperture memory hole (ffff8101c0000000,65536K)
Kernel panic - not syncing: Not enough memory for aperture
Pid: 0, comm: swapper Not tainted 2.6.25-rc4-x86-latest.git #33
Call Trace:
[<ffffffff84037c62>] panic+0xb2/0x190
[<ffffffff840381fc>] ? release_console_sem+0x7c/0x250
[<ffffffff847b1628>] ? __alloc_bootmem_nopanic+0x48/0x90
[<ffffffff847b0ac9>] ? free_bootmem+0x29/0x50
[<ffffffff847ac1f7>] gart_iommu_hole_init+0x5e7/0x680
[<ffffffff847b255b>] ? alloc_large_system_hash+0x16b/0x310
[<ffffffff84506a2f>] ? _etext+0x0/0x1
[<ffffffff847a2e8c>] pci_iommu_alloc+0x1c/0x40
[<ffffffff847ac795>] mem_init+0x45/0x1a0
[<ffffffff8479ff35>] start_kernel+0x295/0x380
[<ffffffff8479f1c2>] _sinittext+0x1c2/0x230
the root cause is : memmap PMD is too big,
[ffffe200e0600000-ffffe200e07fffff] PMD ->ffff81383c000000 on node 0
almost near 4G..., and vmemmap_alloc_block will use up the ram under 4G.
solution will be:
1. make memmap allocation get memory above 4G...
2. reserve some dma32 range early before we try to set up memmap for all.
and release that before pci_iommu_alloc, so gart or swiotlb could get some
range under 4g limit for sure.
the patch is using method 2.
because method1 may need more code to handle SPARSEMEM and SPASEMEM_VMEMMAP
will get
Your BIOS doesn't leave a aperture memory hole
Please enable the IOMMU option in the BIOS setup
This costs you 64 MB of RAM
Mapping aperture over 65536 KB of RAM @ 4000000
Memory: 264245736k/268959744k available (8484k kernel code, 4187464k reserved, 4004k data, 724k init)
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r-- | arch/x86/kernel/pci-dma_64.c | 49 | ||||
-rw-r--r-- | arch/x86/kernel/setup_64.c | 2 | ||||
-rw-r--r-- | include/asm-x86/pci_64.h | 1 |
3 files changed, 52 insertions, 0 deletions
diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index 375cb2bc45be..8bc1e185e557 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c | |||
@@ -8,6 +8,8 @@ | |||
8 | #include <linux/pci.h> | 8 | #include <linux/pci.h> |
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <linux/dmar.h> | 10 | #include <linux/dmar.h> |
11 | #include <linux/bootmem.h> | ||
12 | #include <asm/proto.h> | ||
11 | #include <asm/io.h> | 13 | #include <asm/io.h> |
12 | #include <asm/gart.h> | 14 | #include <asm/gart.h> |
13 | #include <asm/calgary.h> | 15 | #include <asm/calgary.h> |
@@ -286,8 +288,55 @@ static __init int iommu_setup(char *p) | |||
286 | } | 288 | } |
287 | early_param("iommu", iommu_setup); | 289 | early_param("iommu", iommu_setup); |
288 | 290 | ||
291 | static __initdata void *dma32_bootmem_ptr; | ||
292 | static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); | ||
293 | |||
294 | static int __init parse_dma32_size_opt(char *p) | ||
295 | { | ||
296 | if (!p) | ||
297 | return -EINVAL; | ||
298 | dma32_bootmem_size = memparse(p, &p); | ||
299 | return 0; | ||
300 | } | ||
301 | early_param("dma32_size", parse_dma32_size_opt); | ||
302 | |||
303 | void __init dma32_reserve_bootmem(void) | ||
304 | { | ||
305 | unsigned long size, align; | ||
306 | if (end_pfn <= MAX_DMA32_PFN) | ||
307 | return; | ||
308 | |||
309 | align = 64ULL<<20; | ||
310 | size = round_up(dma32_bootmem_size, align); | ||
311 | dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, | ||
312 | __pa(MAX_DMA_ADDRESS)); | ||
313 | if (dma32_bootmem_ptr) | ||
314 | dma32_bootmem_size = size; | ||
315 | else | ||
316 | dma32_bootmem_size = 0; | ||
317 | } | ||
318 | static void __init dma32_free_bootmem(void) | ||
319 | { | ||
320 | int node; | ||
321 | |||
322 | if (end_pfn <= MAX_DMA32_PFN) | ||
323 | return; | ||
324 | |||
325 | if (!dma32_bootmem_ptr) | ||
326 | return; | ||
327 | |||
328 | for_each_online_node(node) | ||
329 | free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr), | ||
330 | dma32_bootmem_size); | ||
331 | |||
332 | dma32_bootmem_ptr = NULL; | ||
333 | dma32_bootmem_size = 0; | ||
334 | } | ||
335 | |||
289 | void __init pci_iommu_alloc(void) | 336 | void __init pci_iommu_alloc(void) |
290 | { | 337 | { |
338 | /* free the range so iommu could get some range less than 4G */ | ||
339 | dma32_free_bootmem(); | ||
291 | /* | 340 | /* |
292 | * The order of these functions is important for | 341 | * The order of these functions is important for |
293 | * fall-back/fail-over reasons | 342 | * fall-back/fail-over reasons |
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 7637dc91c79b..a775fe3de955 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c | |||
@@ -389,6 +389,8 @@ void __init setup_arch(char **cmdline_p) | |||
389 | 389 | ||
390 | early_res_to_bootmem(); | 390 | early_res_to_bootmem(); |
391 | 391 | ||
392 | dma32_reserve_bootmem(); | ||
393 | |||
392 | #ifdef CONFIG_ACPI_SLEEP | 394 | #ifdef CONFIG_ACPI_SLEEP |
393 | /* | 395 | /* |
394 | * Reserve low memory region for sleep support. | 396 | * Reserve low memory region for sleep support. |
diff --git a/include/asm-x86/pci_64.h b/include/asm-x86/pci_64.h index 374690314539..da8266a08005 100644 --- a/include/asm-x86/pci_64.h +++ b/include/asm-x86/pci_64.h | |||
@@ -25,6 +25,7 @@ extern int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int l | |||
25 | 25 | ||
26 | 26 | ||
27 | 27 | ||
28 | extern void dma32_reserve_bootmem(void); | ||
28 | extern void pci_iommu_alloc(void); | 29 | extern void pci_iommu_alloc(void); |
29 | 30 | ||
30 | /* The PCI address space does equal the physical memory | 31 | /* The PCI address space does equal the physical memory |