aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorFUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>2009-11-10 05:46:20 -0500
committerIngo Molnar <mingo@elte.hu>2009-11-10 06:32:07 -0500
commit75f1cdf1dda92cae037ec848ae63690d91913eac (patch)
tree9c12705002ebfa2d75333c20a19d0ac15f1db1d9 /arch
parentad32e8cb86e7894aac51c8963eaa9f36bb8a4e14 (diff)
x86: Handle HW IOMMU initialization failure gracefully
If HW IOMMU initialization fails (Intel VT-d often does this, typically due to BIOS bugs), we fall back to nommu. It doesn't work for the majority since nowadays we have more than 4GB memory so we must use swiotlb instead of nommu. The problem is that it's too late to initialize swiotlb when HW IOMMU initialization fails. We need to allocate swiotlb memory earlier from bootmem allocator. Chris explained the issue in detail: http://marc.info/?l=linux-kernel&m=125657444317079&w=2 The current x86 IOMMU initialization sequence is too complicated and handling the above issue makes it more hacky. This patch changes x86 IOMMU initialization sequence to handle the above issue cleanly. The new x86 IOMMU initialization sequence are: 1. we initialize the swiotlb (and setting swiotlb to 1) in the case of (max_pfn > MAX_DMA32_PFN && !no_iommu). dma_ops is set to swiotlb_dma_ops or nommu_dma_ops. if swiotlb usage is forced by the boot option, we finish here. 2. we call the detection functions of all the IOMMUs 3. the detection function sets x86_init.iommu.iommu_init to the IOMMU initialization function (so we can avoid calling the initialization functions of all the IOMMUs needlessly). 4. if the IOMMU initialization function doesn't need to swiotlb then sets swiotlb to zero (e.g. the initialization is sucessful). 5. if we find that swiotlb is set to zero, we free swiotlb resource. Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com LKML-Reference: <1257849980-22640-10-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/asm/iommu.h1
-rw-r--r--arch/x86/kernel/amd_iommu.c2
-rw-r--r--arch/x86/kernel/amd_iommu_init.c2
-rw-r--r--arch/x86/kernel/aperture_64.c2
-rw-r--r--arch/x86/kernel/pci-calgary_64.c10
-rw-r--r--arch/x86/kernel/pci-dma.c21
-rw-r--r--arch/x86/kernel/pci-gart_64.c1
-rw-r--r--arch/x86/kernel/pci-nommu.c9
-rw-r--r--arch/x86/kernel/pci-swiotlb.c7
9 files changed, 21 insertions, 34 deletions
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index 878b3071576..df42a712361 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -2,7 +2,6 @@
2#define _ASM_X86_IOMMU_H 2#define _ASM_X86_IOMMU_H
3 3
4static inline void iommu_shutdown_noop(void) {} 4static inline void iommu_shutdown_noop(void) {}
5extern void no_iommu_init(void);
6extern struct dma_map_ops nommu_dma_ops; 5extern struct dma_map_ops nommu_dma_ops;
7extern int force_iommu, no_iommu; 6extern int force_iommu, no_iommu;
8extern int iommu_detected; 7extern int iommu_detected;
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 0285521e0a9..66237fde758 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -2110,8 +2110,8 @@ int __init amd_iommu_init_dma_ops(void)
2110 prealloc_protection_domains(); 2110 prealloc_protection_domains();
2111 2111
2112 iommu_detected = 1; 2112 iommu_detected = 1;
2113 force_iommu = 1;
2114 bad_dma_address = 0; 2113 bad_dma_address = 0;
2114 swiotlb = 0;
2115#ifdef CONFIG_GART_IOMMU 2115#ifdef CONFIG_GART_IOMMU
2116 gart_iommu_aperture_disabled = 1; 2116 gart_iommu_aperture_disabled = 1;
2117 gart_iommu_aperture = 0; 2117 gart_iommu_aperture = 0;
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c41aabddaa2..0d4581e602a 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -1330,7 +1330,7 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table)
1330 1330
1331void __init amd_iommu_detect(void) 1331void __init amd_iommu_detect(void)
1332{ 1332{
1333 if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture)) 1333 if (no_iommu || (iommu_detected && !gart_iommu_aperture))
1334 return; 1334 return;
1335 1335
1336 if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { 1336 if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 03933cf0b63..e0dfb6856aa 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -458,7 +458,7 @@ out:
458 458
459 if (aper_alloc) { 459 if (aper_alloc) {
460 /* Got the aperture from the AGP bridge */ 460 /* Got the aperture from the AGP bridge */
461 } else if (swiotlb && !valid_agp) { 461 } else if (!valid_agp) {
462 /* Do nothing */ 462 /* Do nothing */
463 } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) || 463 } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) ||
464 force_iommu || 464 force_iommu ||
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 47bd419ea4d..833f491440b 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1360,7 +1360,7 @@ void __init detect_calgary(void)
1360 * if the user specified iommu=off or iommu=soft or we found 1360 * if the user specified iommu=off or iommu=soft or we found
1361 * another HW IOMMU already, bail out. 1361 * another HW IOMMU already, bail out.
1362 */ 1362 */
1363 if (swiotlb || no_iommu || iommu_detected) 1363 if (no_iommu || iommu_detected)
1364 return; 1364 return;
1365 1365
1366 if (!use_calgary) 1366 if (!use_calgary)
@@ -1445,10 +1445,6 @@ void __init detect_calgary(void)
1445 printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n", 1445 printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n",
1446 specified_table_size); 1446 specified_table_size);
1447 1447
1448 /* swiotlb for devices that aren't behind the Calgary. */
1449 if (max_pfn > MAX_DMA32_PFN)
1450 swiotlb = 1;
1451
1452 x86_init.iommu.iommu_init = calgary_iommu_init; 1448 x86_init.iommu.iommu_init = calgary_iommu_init;
1453 } 1449 }
1454 return; 1450 return;
@@ -1476,11 +1472,7 @@ int __init calgary_iommu_init(void)
1476 return ret; 1472 return ret;
1477 } 1473 }
1478 1474
1479 force_iommu = 1;
1480 bad_dma_address = 0x0; 1475 bad_dma_address = 0x0;
1481 /* dma_ops is set to swiotlb or nommu */
1482 if (!dma_ops)
1483 dma_ops = &nommu_dma_ops;
1484 1476
1485 return 0; 1477 return 0;
1486} 1478}
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index bed05e2e589..a234e63c265 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -124,24 +124,24 @@ static void __init dma32_free_bootmem(void)
124 124
125void __init pci_iommu_alloc(void) 125void __init pci_iommu_alloc(void)
126{ 126{
127 /* swiotlb is forced by the boot option */
128 int use_swiotlb = swiotlb;
127#ifdef CONFIG_X86_64 129#ifdef CONFIG_X86_64
128 /* free the range so iommu could get some range less than 4G */ 130 /* free the range so iommu could get some range less than 4G */
129 dma32_free_bootmem(); 131 dma32_free_bootmem();
130#endif 132#endif
133 pci_swiotlb_init();
134 if (use_swiotlb)
135 return;
131 136
132 /*
133 * The order of these functions is important for
134 * fall-back/fail-over reasons
135 */
136 gart_iommu_hole_init(); 137 gart_iommu_hole_init();
137 138
138 detect_calgary(); 139 detect_calgary();
139 140
140 detect_intel_iommu(); 141 detect_intel_iommu();
141 142
143 /* needs to be called after gart_iommu_hole_init */
142 amd_iommu_detect(); 144 amd_iommu_detect();
143
144 pci_swiotlb_init();
145} 145}
146 146
147void *dma_generic_alloc_coherent(struct device *dev, size_t size, 147void *dma_generic_alloc_coherent(struct device *dev, size_t size,
@@ -291,10 +291,15 @@ static int __init pci_iommu_init(void)
291#ifdef CONFIG_PCI 291#ifdef CONFIG_PCI
292 dma_debug_add_bus(&pci_bus_type); 292 dma_debug_add_bus(&pci_bus_type);
293#endif 293#endif
294
295 x86_init.iommu.iommu_init(); 294 x86_init.iommu.iommu_init();
296 295
297 no_iommu_init(); 296 if (swiotlb) {
297 printk(KERN_INFO "PCI-DMA: "
298 "Using software bounce buffering for IO (SWIOTLB)\n");
299 swiotlb_print_info();
300 } else
301 swiotlb_free();
302
298 return 0; 303 return 0;
299} 304}
300/* Must execute after PCI subsystem */ 305/* Must execute after PCI subsystem */
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 0410bd30060..919182e15d1 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -833,6 +833,7 @@ int __init gart_iommu_init(void)
833 flush_gart(); 833 flush_gart();
834 dma_ops = &gart_dma_ops; 834 dma_ops = &gart_dma_ops;
835 x86_platform.iommu_shutdown = gart_iommu_shutdown; 835 x86_platform.iommu_shutdown = gart_iommu_shutdown;
836 swiotlb = 0;
836 837
837 return 0; 838 return 0;
838} 839}
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index a3933d4330c..875e3822ae6 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -103,12 +103,3 @@ struct dma_map_ops nommu_dma_ops = {
103 .sync_sg_for_device = nommu_sync_sg_for_device, 103 .sync_sg_for_device = nommu_sync_sg_for_device,
104 .is_phys = 1, 104 .is_phys = 1,
105}; 105};
106
107void __init no_iommu_init(void)
108{
109 if (dma_ops)
110 return;
111
112 force_iommu = 0; /* no HW IOMMU */
113 dma_ops = &nommu_dma_ops;
114}
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index ea20ef7ca52..17ce4221bd0 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -46,13 +46,12 @@ void __init pci_swiotlb_init(void)
46{ 46{
47 /* don't initialize swiotlb if iommu=off (no_iommu=1) */ 47 /* don't initialize swiotlb if iommu=off (no_iommu=1) */
48#ifdef CONFIG_X86_64 48#ifdef CONFIG_X86_64
49 if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)) 49 if (!no_iommu && max_pfn > MAX_DMA32_PFN)
50 swiotlb = 1; 50 swiotlb = 1;
51#endif 51#endif
52 if (swiotlb_force)
53 swiotlb = 1;
54 if (swiotlb) { 52 if (swiotlb) {
55 swiotlb_init(0); 53 swiotlb_init(0);
56 dma_ops = &swiotlb_dma_ops; 54 dma_ops = &swiotlb_dma_ops;
57 } 55 } else
56 dma_ops = &nommu_dma_ops;
58} 57}