aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>2009-11-10 05:46:20 -0500
committerIngo Molnar <mingo@elte.hu>2009-11-10 06:32:07 -0500
commit75f1cdf1dda92cae037ec848ae63690d91913eac (patch)
tree9c12705002ebfa2d75333c20a19d0ac15f1db1d9
parentad32e8cb86e7894aac51c8963eaa9f36bb8a4e14 (diff)
x86: Handle HW IOMMU initialization failure gracefully
If HW IOMMU initialization fails (Intel VT-d often does this, typically due to BIOS bugs), we fall back to nommu. It doesn't work for the majority since nowadays we have more than 4GB memory so we must use swiotlb instead of nommu. The problem is that it's too late to initialize swiotlb when HW IOMMU initialization fails. We need to allocate swiotlb memory earlier from bootmem allocator. Chris explained the issue in detail: http://marc.info/?l=linux-kernel&m=125657444317079&w=2 The current x86 IOMMU initialization sequence is too complicated and handling the above issue makes it more hacky. This patch changes x86 IOMMU initialization sequence to handle the above issue cleanly. The new x86 IOMMU initialization sequence are: 1. we initialize the swiotlb (and setting swiotlb to 1) in the case of (max_pfn > MAX_DMA32_PFN && !no_iommu). dma_ops is set to swiotlb_dma_ops or nommu_dma_ops. if swiotlb usage is forced by the boot option, we finish here. 2. we call the detection functions of all the IOMMUs 3. the detection function sets x86_init.iommu.iommu_init to the IOMMU initialization function (so we can avoid calling the initialization functions of all the IOMMUs needlessly). 4. if the IOMMU initialization function doesn't need to swiotlb then sets swiotlb to zero (e.g. the initialization is sucessful). 5. if we find that swiotlb is set to zero, we free swiotlb resource. Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com LKML-Reference: <1257849980-22640-10-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/include/asm/iommu.h1
-rw-r--r--arch/x86/kernel/amd_iommu.c2
-rw-r--r--arch/x86/kernel/amd_iommu_init.c2
-rw-r--r--arch/x86/kernel/aperture_64.c2
-rw-r--r--arch/x86/kernel/pci-calgary_64.c10
-rw-r--r--arch/x86/kernel/pci-dma.c21
-rw-r--r--arch/x86/kernel/pci-gart_64.c1
-rw-r--r--arch/x86/kernel/pci-nommu.c9
-rw-r--r--arch/x86/kernel/pci-swiotlb.c7
-rw-r--r--drivers/pci/dmar.c3
-rw-r--r--drivers/pci/intel-iommu.c6
-rw-r--r--lib/swiotlb.c4
12 files changed, 29 insertions, 39 deletions
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index 878b30715766..df42a712361f 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -2,7 +2,6 @@
2#define _ASM_X86_IOMMU_H 2#define _ASM_X86_IOMMU_H
3 3
4static inline void iommu_shutdown_noop(void) {} 4static inline void iommu_shutdown_noop(void) {}
5extern void no_iommu_init(void);
6extern struct dma_map_ops nommu_dma_ops; 5extern struct dma_map_ops nommu_dma_ops;
7extern int force_iommu, no_iommu; 6extern int force_iommu, no_iommu;
8extern int iommu_detected; 7extern int iommu_detected;
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 0285521e0a99..66237fde758f 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -2110,8 +2110,8 @@ int __init amd_iommu_init_dma_ops(void)
2110 prealloc_protection_domains(); 2110 prealloc_protection_domains();
2111 2111
2112 iommu_detected = 1; 2112 iommu_detected = 1;
2113 force_iommu = 1;
2114 bad_dma_address = 0; 2113 bad_dma_address = 0;
2114 swiotlb = 0;
2115#ifdef CONFIG_GART_IOMMU 2115#ifdef CONFIG_GART_IOMMU
2116 gart_iommu_aperture_disabled = 1; 2116 gart_iommu_aperture_disabled = 1;
2117 gart_iommu_aperture = 0; 2117 gart_iommu_aperture = 0;
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c41aabddaa2a..0d4581e602a4 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -1330,7 +1330,7 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table)
1330 1330
1331void __init amd_iommu_detect(void) 1331void __init amd_iommu_detect(void)
1332{ 1332{
1333 if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture)) 1333 if (no_iommu || (iommu_detected && !gart_iommu_aperture))
1334 return; 1334 return;
1335 1335
1336 if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { 1336 if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 03933cf0b63c..e0dfb6856aa2 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -458,7 +458,7 @@ out:
458 458
459 if (aper_alloc) { 459 if (aper_alloc) {
460 /* Got the aperture from the AGP bridge */ 460 /* Got the aperture from the AGP bridge */
461 } else if (swiotlb && !valid_agp) { 461 } else if (!valid_agp) {
462 /* Do nothing */ 462 /* Do nothing */
463 } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) || 463 } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) ||
464 force_iommu || 464 force_iommu ||
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 47bd419ea4d2..833f491440b9 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1360,7 +1360,7 @@ void __init detect_calgary(void)
1360 * if the user specified iommu=off or iommu=soft or we found 1360 * if the user specified iommu=off or iommu=soft or we found
1361 * another HW IOMMU already, bail out. 1361 * another HW IOMMU already, bail out.
1362 */ 1362 */
1363 if (swiotlb || no_iommu || iommu_detected) 1363 if (no_iommu || iommu_detected)
1364 return; 1364 return;
1365 1365
1366 if (!use_calgary) 1366 if (!use_calgary)
@@ -1445,10 +1445,6 @@ void __init detect_calgary(void)
1445 printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n", 1445 printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n",
1446 specified_table_size); 1446 specified_table_size);
1447 1447
1448 /* swiotlb for devices that aren't behind the Calgary. */
1449 if (max_pfn > MAX_DMA32_PFN)
1450 swiotlb = 1;
1451
1452 x86_init.iommu.iommu_init = calgary_iommu_init; 1448 x86_init.iommu.iommu_init = calgary_iommu_init;
1453 } 1449 }
1454 return; 1450 return;
@@ -1476,11 +1472,7 @@ int __init calgary_iommu_init(void)
1476 return ret; 1472 return ret;
1477 } 1473 }
1478 1474
1479 force_iommu = 1;
1480 bad_dma_address = 0x0; 1475 bad_dma_address = 0x0;
1481 /* dma_ops is set to swiotlb or nommu */
1482 if (!dma_ops)
1483 dma_ops = &nommu_dma_ops;
1484 1476
1485 return 0; 1477 return 0;
1486} 1478}
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index bed05e2e5890..a234e63c2656 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -124,24 +124,24 @@ static void __init dma32_free_bootmem(void)
124 124
125void __init pci_iommu_alloc(void) 125void __init pci_iommu_alloc(void)
126{ 126{
127 /* swiotlb is forced by the boot option */
128 int use_swiotlb = swiotlb;
127#ifdef CONFIG_X86_64 129#ifdef CONFIG_X86_64
128 /* free the range so iommu could get some range less than 4G */ 130 /* free the range so iommu could get some range less than 4G */
129 dma32_free_bootmem(); 131 dma32_free_bootmem();
130#endif 132#endif
133 pci_swiotlb_init();
134 if (use_swiotlb)
135 return;
131 136
132 /*
133 * The order of these functions is important for
134 * fall-back/fail-over reasons
135 */
136 gart_iommu_hole_init(); 137 gart_iommu_hole_init();
137 138
138 detect_calgary(); 139 detect_calgary();
139 140
140 detect_intel_iommu(); 141 detect_intel_iommu();
141 142
143 /* needs to be called after gart_iommu_hole_init */
142 amd_iommu_detect(); 144 amd_iommu_detect();
143
144 pci_swiotlb_init();
145} 145}
146 146
147void *dma_generic_alloc_coherent(struct device *dev, size_t size, 147void *dma_generic_alloc_coherent(struct device *dev, size_t size,
@@ -291,10 +291,15 @@ static int __init pci_iommu_init(void)
291#ifdef CONFIG_PCI 291#ifdef CONFIG_PCI
292 dma_debug_add_bus(&pci_bus_type); 292 dma_debug_add_bus(&pci_bus_type);
293#endif 293#endif
294
295 x86_init.iommu.iommu_init(); 294 x86_init.iommu.iommu_init();
296 295
297 no_iommu_init(); 296 if (swiotlb) {
297 printk(KERN_INFO "PCI-DMA: "
298 "Using software bounce buffering for IO (SWIOTLB)\n");
299 swiotlb_print_info();
300 } else
301 swiotlb_free();
302
298 return 0; 303 return 0;
299} 304}
300/* Must execute after PCI subsystem */ 305/* Must execute after PCI subsystem */
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 0410bd30060d..919182e15d1e 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -833,6 +833,7 @@ int __init gart_iommu_init(void)
833 flush_gart(); 833 flush_gart();
834 dma_ops = &gart_dma_ops; 834 dma_ops = &gart_dma_ops;
835 x86_platform.iommu_shutdown = gart_iommu_shutdown; 835 x86_platform.iommu_shutdown = gart_iommu_shutdown;
836 swiotlb = 0;
836 837
837 return 0; 838 return 0;
838} 839}
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index a3933d4330cd..875e3822ae61 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -103,12 +103,3 @@ struct dma_map_ops nommu_dma_ops = {
103 .sync_sg_for_device = nommu_sync_sg_for_device, 103 .sync_sg_for_device = nommu_sync_sg_for_device,
104 .is_phys = 1, 104 .is_phys = 1,
105}; 105};
106
107void __init no_iommu_init(void)
108{
109 if (dma_ops)
110 return;
111
112 force_iommu = 0; /* no HW IOMMU */
113 dma_ops = &nommu_dma_ops;
114}
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index ea20ef7ca523..17ce4221bd03 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -46,13 +46,12 @@ void __init pci_swiotlb_init(void)
46{ 46{
47 /* don't initialize swiotlb if iommu=off (no_iommu=1) */ 47 /* don't initialize swiotlb if iommu=off (no_iommu=1) */
48#ifdef CONFIG_X86_64 48#ifdef CONFIG_X86_64
49 if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)) 49 if (!no_iommu && max_pfn > MAX_DMA32_PFN)
50 swiotlb = 1; 50 swiotlb = 1;
51#endif 51#endif
52 if (swiotlb_force)
53 swiotlb = 1;
54 if (swiotlb) { 52 if (swiotlb) {
55 swiotlb_init(0); 53 swiotlb_init(0);
56 dma_ops = &swiotlb_dma_ops; 54 dma_ops = &swiotlb_dma_ops;
57 } 55 } else
56 dma_ops = &nommu_dma_ops;
58} 57}
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index bce9cd7c755a..437399667e5a 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -613,8 +613,7 @@ void __init detect_intel_iommu(void)
613 "x2apic and Intr-remapping.\n"); 613 "x2apic and Intr-remapping.\n");
614#endif 614#endif
615#ifdef CONFIG_DMAR 615#ifdef CONFIG_DMAR
616 if (ret && !no_iommu && !iommu_detected && !swiotlb && 616 if (ret && !no_iommu && !iommu_detected && !dmar_disabled)
617 !dmar_disabled)
618 iommu_detected = 1; 617 iommu_detected = 1;
619#endif 618#endif
620#ifdef CONFIG_X86 619#ifdef CONFIG_X86
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index b1e97e682500..43d755a2e14a 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -3231,7 +3231,7 @@ int __init intel_iommu_init(void)
3231 * Check the need for DMA-remapping initialization now. 3231 * Check the need for DMA-remapping initialization now.
3232 * Above initialization will also be used by Interrupt-remapping. 3232 * Above initialization will also be used by Interrupt-remapping.
3233 */ 3233 */
3234 if (no_iommu || swiotlb || dmar_disabled) 3234 if (no_iommu || dmar_disabled)
3235 return -ENODEV; 3235 return -ENODEV;
3236 3236
3237 iommu_init_mempool(); 3237 iommu_init_mempool();
@@ -3252,7 +3252,9 @@ int __init intel_iommu_init(void)
3252 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n"); 3252 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3253 3253
3254 init_timer(&unmap_timer); 3254 init_timer(&unmap_timer);
3255 force_iommu = 1; 3255#ifdef CONFIG_SWIOTLB
3256 swiotlb = 0;
3257#endif
3256 dma_ops = &intel_dma_ops; 3258 dma_ops = &intel_dma_ops;
3257 3259
3258 init_iommu_sysfs(); 3260 init_iommu_sysfs();
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 0c12d7cce300..e6755a0574fb 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -109,8 +109,10 @@ setup_io_tlb_npages(char *str)
109 } 109 }
110 if (*str == ',') 110 if (*str == ',')
111 ++str; 111 ++str;
112 if (!strcmp(str, "force")) 112 if (!strcmp(str, "force")) {
113 swiotlb_force = 1; 113 swiotlb_force = 1;
114 swiotlb = 1;
115 }
114 return 1; 116 return 1;
115} 117}
116__setup("swiotlb=", setup_io_tlb_npages); 118__setup("swiotlb=", setup_io_tlb_npages);