aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2014-02-10 19:32:38 -0500
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2014-02-11 00:07:37 -0500
commitcd15b048445d0a54f7147c35a86c5a16ef231554 (patch)
tree80c7e63624143adc5d453106ec13f445438c6135 /arch
parentea961a828fe7250e954f086d74d9323c3d44c3e4 (diff)
powerpc/powernv: Add iommu DMA bypass support for IODA2
This patch adds the support for to create a direct iommu "bypass" window on IODA2 bridges (such as Power8) allowing to bypass iommu page translation completely for 64-bit DMA capable devices, thus significantly improving DMA performances. Additionally, this adds a hook to the struct iommu_table so that the IOMMU API / VFIO can disable the bypass when external ownership is requested, since in that case, the device will be used by an environment such as userspace or a KVM guest which must not be allowed to bypass translations. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/include/asm/dma-mapping.h1
-rw-r--r--arch/powerpc/include/asm/iommu.h1
-rw-r--r--arch/powerpc/kernel/dma.c10
-rw-r--r--arch/powerpc/kernel/iommu.c12
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c84
-rw-r--r--arch/powerpc/platforms/powernv/pci.c10
-rw-r--r--arch/powerpc/platforms/powernv/pci.h6
-rw-r--r--arch/powerpc/platforms/powernv/powernv.h8
-rw-r--r--arch/powerpc/platforms/powernv/setup.c9
9 files changed, 137 insertions, 4 deletions
diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index e27e9ad6818e..150866b2a3fe 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -134,6 +134,7 @@ static inline int dma_supported(struct device *dev, u64 mask)
134} 134}
135 135
136extern int dma_set_mask(struct device *dev, u64 dma_mask); 136extern int dma_set_mask(struct device *dev, u64 dma_mask);
137extern int __dma_set_mask(struct device *dev, u64 dma_mask);
137 138
138#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL) 139#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL)
139 140
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index f7a8036579b5..42632c7a2a4e 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -77,6 +77,7 @@ struct iommu_table {
77#ifdef CONFIG_IOMMU_API 77#ifdef CONFIG_IOMMU_API
78 struct iommu_group *it_group; 78 struct iommu_group *it_group;
79#endif 79#endif
80 void (*set_bypass)(struct iommu_table *tbl, bool enable);
80}; 81};
81 82
82/* Pure 2^n version of get_order */ 83/* Pure 2^n version of get_order */
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 8032b97ccdcb..ee78f6e49d64 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -191,12 +191,10 @@ EXPORT_SYMBOL(dma_direct_ops);
191 191
192#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) 192#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
193 193
194int dma_set_mask(struct device *dev, u64 dma_mask) 194int __dma_set_mask(struct device *dev, u64 dma_mask)
195{ 195{
196 struct dma_map_ops *dma_ops = get_dma_ops(dev); 196 struct dma_map_ops *dma_ops = get_dma_ops(dev);
197 197
198 if (ppc_md.dma_set_mask)
199 return ppc_md.dma_set_mask(dev, dma_mask);
200 if ((dma_ops != NULL) && (dma_ops->set_dma_mask != NULL)) 198 if ((dma_ops != NULL) && (dma_ops->set_dma_mask != NULL))
201 return dma_ops->set_dma_mask(dev, dma_mask); 199 return dma_ops->set_dma_mask(dev, dma_mask);
202 if (!dev->dma_mask || !dma_supported(dev, dma_mask)) 200 if (!dev->dma_mask || !dma_supported(dev, dma_mask))
@@ -204,6 +202,12 @@ int dma_set_mask(struct device *dev, u64 dma_mask)
204 *dev->dma_mask = dma_mask; 202 *dev->dma_mask = dma_mask;
205 return 0; 203 return 0;
206} 204}
205int dma_set_mask(struct device *dev, u64 dma_mask)
206{
207 if (ppc_md.dma_set_mask)
208 return ppc_md.dma_set_mask(dev, dma_mask);
209 return __dma_set_mask(dev, dma_mask);
210}
207EXPORT_SYMBOL(dma_set_mask); 211EXPORT_SYMBOL(dma_set_mask);
208 212
209u64 dma_get_required_mask(struct device *dev) 213u64 dma_get_required_mask(struct device *dev)
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index d773dd440a45..88e3ec6e1d96 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1088,6 +1088,14 @@ int iommu_take_ownership(struct iommu_table *tbl)
1088 memset(tbl->it_map, 0xff, sz); 1088 memset(tbl->it_map, 0xff, sz);
1089 iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size); 1089 iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
1090 1090
1091 /*
1092 * Disable iommu bypass, otherwise the user can DMA to all of
1093 * our physical memory via the bypass window instead of just
1094 * the pages that has been explicitly mapped into the iommu
1095 */
1096 if (tbl->set_bypass)
1097 tbl->set_bypass(tbl, false);
1098
1091 return 0; 1099 return 0;
1092} 1100}
1093EXPORT_SYMBOL_GPL(iommu_take_ownership); 1101EXPORT_SYMBOL_GPL(iommu_take_ownership);
@@ -1102,6 +1110,10 @@ void iommu_release_ownership(struct iommu_table *tbl)
1102 /* Restore bit#0 set by iommu_init_table() */ 1110 /* Restore bit#0 set by iommu_init_table() */
1103 if (tbl->it_offset == 0) 1111 if (tbl->it_offset == 0)
1104 set_bit(0, tbl->it_map); 1112 set_bit(0, tbl->it_map);
1113
1114 /* The kernel owns the device now, we can restore the iommu bypass */
1115 if (tbl->set_bypass)
1116 tbl->set_bypass(tbl, true);
1105} 1117}
1106EXPORT_SYMBOL_GPL(iommu_release_ownership); 1118EXPORT_SYMBOL_GPL(iommu_release_ownership);
1107 1119
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 7d6dcc6d5fa9..3b2b4fb3585b 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -21,6 +21,7 @@
21#include <linux/irq.h> 21#include <linux/irq.h>
22#include <linux/io.h> 22#include <linux/io.h>
23#include <linux/msi.h> 23#include <linux/msi.h>
24#include <linux/memblock.h>
24 25
25#include <asm/sections.h> 26#include <asm/sections.h>
26#include <asm/io.h> 27#include <asm/io.h>
@@ -460,9 +461,39 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
460 return; 461 return;
461 462
462 pe = &phb->ioda.pe_array[pdn->pe_number]; 463 pe = &phb->ioda.pe_array[pdn->pe_number];
464 WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
463 set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table); 465 set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table);
464} 466}
465 467
468static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
469 struct pci_dev *pdev, u64 dma_mask)
470{
471 struct pci_dn *pdn = pci_get_pdn(pdev);
472 struct pnv_ioda_pe *pe;
473 uint64_t top;
474 bool bypass = false;
475
476 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
477 return -ENODEV;;
478
479 pe = &phb->ioda.pe_array[pdn->pe_number];
480 if (pe->tce_bypass_enabled) {
481 top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1;
482 bypass = (dma_mask >= top);
483 }
484
485 if (bypass) {
486 dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n");
487 set_dma_ops(&pdev->dev, &dma_direct_ops);
488 set_dma_offset(&pdev->dev, pe->tce_bypass_base);
489 } else {
490 dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
491 set_dma_ops(&pdev->dev, &dma_iommu_ops);
492 set_iommu_table_base(&pdev->dev, &pe->tce32_table);
493 }
494 return 0;
495}
496
466static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) 497static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
467{ 498{
468 struct pci_dev *dev; 499 struct pci_dev *dev;
@@ -657,6 +688,56 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
657 __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs)); 688 __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
658} 689}
659 690
691static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
692{
693 struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
694 tce32_table);
695 uint16_t window_id = (pe->pe_number << 1 ) + 1;
696 int64_t rc;
697
698 pe_info(pe, "%sabling 64-bit DMA bypass\n", enable ? "En" : "Dis");
699 if (enable) {
700 phys_addr_t top = memblock_end_of_DRAM();
701
702 top = roundup_pow_of_two(top);
703 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
704 pe->pe_number,
705 window_id,
706 pe->tce_bypass_base,
707 top);
708 } else {
709 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
710 pe->pe_number,
711 window_id,
712 pe->tce_bypass_base,
713 0);
714
715 /*
716 * We might want to reset the DMA ops of all devices on
717 * this PE. However in theory, that shouldn't be necessary
718 * as this is used for VFIO/KVM pass-through and the device
719 * hasn't yet been returned to its kernel driver
720 */
721 }
722 if (rc)
723 pe_err(pe, "OPAL error %lld configuring bypass window\n", rc);
724 else
725 pe->tce_bypass_enabled = enable;
726}
727
728static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb,
729 struct pnv_ioda_pe *pe)
730{
731 /* TVE #1 is selected by PCI address bit 59 */
732 pe->tce_bypass_base = 1ull << 59;
733
734 /* Install set_bypass callback for VFIO */
735 pe->tce32_table.set_bypass = pnv_pci_ioda2_set_bypass;
736
737 /* Enable bypass by default */
738 pnv_pci_ioda2_set_bypass(&pe->tce32_table, true);
739}
740
660static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, 741static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
661 struct pnv_ioda_pe *pe) 742 struct pnv_ioda_pe *pe)
662{ 743{
@@ -727,6 +808,8 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
727 else 808 else
728 pnv_ioda_setup_bus_dma(pe, pe->pbus); 809 pnv_ioda_setup_bus_dma(pe, pe->pbus);
729 810
811 /* Also create a bypass window */
812 pnv_pci_ioda2_setup_bypass_pe(phb, pe);
730 return; 813 return;
731fail: 814fail:
732 if (pe->tce32_seg >= 0) 815 if (pe->tce32_seg >= 0)
@@ -1286,6 +1369,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
1286 1369
1287 /* Setup TCEs */ 1370 /* Setup TCEs */
1288 phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; 1371 phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
1372 phb->dma_set_mask = pnv_pci_ioda_dma_set_mask;
1289 1373
1290 /* Setup shutdown function for kexec */ 1374 /* Setup shutdown function for kexec */
1291 phb->shutdown = pnv_pci_ioda_shutdown; 1375 phb->shutdown = pnv_pci_ioda_shutdown;
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index b555ebc57ef5..95633d79ef5d 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -634,6 +634,16 @@ static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
634 pnv_pci_dma_fallback_setup(hose, pdev); 634 pnv_pci_dma_fallback_setup(hose, pdev);
635} 635}
636 636
637int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
638{
639 struct pci_controller *hose = pci_bus_to_host(pdev->bus);
640 struct pnv_phb *phb = hose->private_data;
641
642 if (phb && phb->dma_set_mask)
643 return phb->dma_set_mask(phb, pdev, dma_mask);
644 return __dma_set_mask(&pdev->dev, dma_mask);
645}
646
637void pnv_pci_shutdown(void) 647void pnv_pci_shutdown(void)
638{ 648{
639 struct pci_controller *hose; 649 struct pci_controller *hose;
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 13f1942a9a5f..cde169442775 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -54,7 +54,9 @@ struct pnv_ioda_pe {
54 struct iommu_table tce32_table; 54 struct iommu_table tce32_table;
55 phys_addr_t tce_inval_reg_phys; 55 phys_addr_t tce_inval_reg_phys;
56 56
57 /* XXX TODO: Add support for additional 64-bit iommus */ 57 /* 64-bit TCE bypass region */
58 bool tce_bypass_enabled;
59 uint64_t tce_bypass_base;
58 60
59 /* MSIs. MVE index is identical for for 32 and 64 bit MSI 61 /* MSIs. MVE index is identical for for 32 and 64 bit MSI
60 * and -1 if not supported. (It's actually identical to the 62 * and -1 if not supported. (It's actually identical to the
@@ -113,6 +115,8 @@ struct pnv_phb {
113 unsigned int hwirq, unsigned int virq, 115 unsigned int hwirq, unsigned int virq,
114 unsigned int is_64, struct msi_msg *msg); 116 unsigned int is_64, struct msi_msg *msg);
115 void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev); 117 void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev);
118 int (*dma_set_mask)(struct pnv_phb *phb, struct pci_dev *pdev,
119 u64 dma_mask);
116 void (*fixup_phb)(struct pci_controller *hose); 120 void (*fixup_phb)(struct pci_controller *hose);
117 u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn); 121 u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
118 void (*shutdown)(struct pnv_phb *phb); 122 void (*shutdown)(struct pnv_phb *phb);
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index de6819be1f95..0051e108ef0f 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -7,12 +7,20 @@ extern void pnv_smp_init(void);
7static inline void pnv_smp_init(void) { } 7static inline void pnv_smp_init(void) { }
8#endif 8#endif
9 9
10struct pci_dev;
11
10#ifdef CONFIG_PCI 12#ifdef CONFIG_PCI
11extern void pnv_pci_init(void); 13extern void pnv_pci_init(void);
12extern void pnv_pci_shutdown(void); 14extern void pnv_pci_shutdown(void);
15extern int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask);
13#else 16#else
14static inline void pnv_pci_init(void) { } 17static inline void pnv_pci_init(void) { }
15static inline void pnv_pci_shutdown(void) { } 18static inline void pnv_pci_shutdown(void) { }
19
20static inline int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
21{
22 return -ENODEV;
23}
16#endif 24#endif
17 25
18extern void pnv_lpc_init(void); 26extern void pnv_lpc_init(void);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 21166f65c97c..110f4fbd319f 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -27,6 +27,7 @@
27#include <linux/interrupt.h> 27#include <linux/interrupt.h>
28#include <linux/bug.h> 28#include <linux/bug.h>
29#include <linux/cpuidle.h> 29#include <linux/cpuidle.h>
30#include <linux/pci.h>
30 31
31#include <asm/machdep.h> 32#include <asm/machdep.h>
32#include <asm/firmware.h> 33#include <asm/firmware.h>
@@ -141,6 +142,13 @@ static void pnv_progress(char *s, unsigned short hex)
141{ 142{
142} 143}
143 144
145static int pnv_dma_set_mask(struct device *dev, u64 dma_mask)
146{
147 if (dev_is_pci(dev))
148 return pnv_pci_dma_set_mask(to_pci_dev(dev), dma_mask);
149 return __dma_set_mask(dev, dma_mask);
150}
151
144static void pnv_shutdown(void) 152static void pnv_shutdown(void)
145{ 153{
146 /* Let the PCI code clear up IODA tables */ 154 /* Let the PCI code clear up IODA tables */
@@ -238,6 +246,7 @@ define_machine(powernv) {
238 .machine_shutdown = pnv_shutdown, 246 .machine_shutdown = pnv_shutdown,
239 .power_save = powernv_idle, 247 .power_save = powernv_idle,
240 .calibrate_decr = generic_calibrate_decr, 248 .calibrate_decr = generic_calibrate_decr,
249 .dma_set_mask = pnv_dma_set_mask,
241#ifdef CONFIG_KEXEC 250#ifdef CONFIG_KEXEC
242 .kexec_cpu_down = pnv_kexec_cpu_down, 251 .kexec_cpu_down = pnv_kexec_cpu_down,
243#endif 252#endif