aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2014-02-10 19:32:38 -0500
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2014-02-11 00:07:37 -0500
commitcd15b048445d0a54f7147c35a86c5a16ef231554 (patch)
tree80c7e63624143adc5d453106ec13f445438c6135 /arch/powerpc/platforms
parentea961a828fe7250e954f086d74d9323c3d44c3e4 (diff)
powerpc/powernv: Add iommu DMA bypass support for IODA2
This patch adds the support for to create a direct iommu "bypass" window on IODA2 bridges (such as Power8) allowing to bypass iommu page translation completely for 64-bit DMA capable devices, thus significantly improving DMA performances. Additionally, this adds a hook to the struct iommu_table so that the IOMMU API / VFIO can disable the bypass when external ownership is requested, since in that case, the device will be used by an environment such as userspace or a KVM guest which must not be allowed to bypass translations. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/platforms')
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c84
-rw-r--r--arch/powerpc/platforms/powernv/pci.c10
-rw-r--r--arch/powerpc/platforms/powernv/pci.h6
-rw-r--r--arch/powerpc/platforms/powernv/powernv.h8
-rw-r--r--arch/powerpc/platforms/powernv/setup.c9
5 files changed, 116 insertions, 1 deletions
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 7d6dcc6d5fa9..3b2b4fb3585b 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -21,6 +21,7 @@
21#include <linux/irq.h> 21#include <linux/irq.h>
22#include <linux/io.h> 22#include <linux/io.h>
23#include <linux/msi.h> 23#include <linux/msi.h>
24#include <linux/memblock.h>
24 25
25#include <asm/sections.h> 26#include <asm/sections.h>
26#include <asm/io.h> 27#include <asm/io.h>
@@ -460,9 +461,39 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
460 return; 461 return;
461 462
462 pe = &phb->ioda.pe_array[pdn->pe_number]; 463 pe = &phb->ioda.pe_array[pdn->pe_number];
464 WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
463 set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table); 465 set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table);
464} 466}
465 467
468static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
469 struct pci_dev *pdev, u64 dma_mask)
470{
471 struct pci_dn *pdn = pci_get_pdn(pdev);
472 struct pnv_ioda_pe *pe;
473 uint64_t top;
474 bool bypass = false;
475
476 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
477 return -ENODEV;;
478
479 pe = &phb->ioda.pe_array[pdn->pe_number];
480 if (pe->tce_bypass_enabled) {
481 top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1;
482 bypass = (dma_mask >= top);
483 }
484
485 if (bypass) {
486 dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n");
487 set_dma_ops(&pdev->dev, &dma_direct_ops);
488 set_dma_offset(&pdev->dev, pe->tce_bypass_base);
489 } else {
490 dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
491 set_dma_ops(&pdev->dev, &dma_iommu_ops);
492 set_iommu_table_base(&pdev->dev, &pe->tce32_table);
493 }
494 return 0;
495}
496
466static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) 497static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
467{ 498{
468 struct pci_dev *dev; 499 struct pci_dev *dev;
@@ -657,6 +688,56 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
657 __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs)); 688 __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
658} 689}
659 690
691static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
692{
693 struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
694 tce32_table);
695 uint16_t window_id = (pe->pe_number << 1 ) + 1;
696 int64_t rc;
697
698 pe_info(pe, "%sabling 64-bit DMA bypass\n", enable ? "En" : "Dis");
699 if (enable) {
700 phys_addr_t top = memblock_end_of_DRAM();
701
702 top = roundup_pow_of_two(top);
703 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
704 pe->pe_number,
705 window_id,
706 pe->tce_bypass_base,
707 top);
708 } else {
709 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
710 pe->pe_number,
711 window_id,
712 pe->tce_bypass_base,
713 0);
714
715 /*
716 * We might want to reset the DMA ops of all devices on
717 * this PE. However in theory, that shouldn't be necessary
718 * as this is used for VFIO/KVM pass-through and the device
719 * hasn't yet been returned to its kernel driver
720 */
721 }
722 if (rc)
723 pe_err(pe, "OPAL error %lld configuring bypass window\n", rc);
724 else
725 pe->tce_bypass_enabled = enable;
726}
727
728static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb,
729 struct pnv_ioda_pe *pe)
730{
731 /* TVE #1 is selected by PCI address bit 59 */
732 pe->tce_bypass_base = 1ull << 59;
733
734 /* Install set_bypass callback for VFIO */
735 pe->tce32_table.set_bypass = pnv_pci_ioda2_set_bypass;
736
737 /* Enable bypass by default */
738 pnv_pci_ioda2_set_bypass(&pe->tce32_table, true);
739}
740
660static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, 741static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
661 struct pnv_ioda_pe *pe) 742 struct pnv_ioda_pe *pe)
662{ 743{
@@ -727,6 +808,8 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
727 else 808 else
728 pnv_ioda_setup_bus_dma(pe, pe->pbus); 809 pnv_ioda_setup_bus_dma(pe, pe->pbus);
729 810
811 /* Also create a bypass window */
812 pnv_pci_ioda2_setup_bypass_pe(phb, pe);
730 return; 813 return;
731fail: 814fail:
732 if (pe->tce32_seg >= 0) 815 if (pe->tce32_seg >= 0)
@@ -1286,6 +1369,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
1286 1369
1287 /* Setup TCEs */ 1370 /* Setup TCEs */
1288 phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; 1371 phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
1372 phb->dma_set_mask = pnv_pci_ioda_dma_set_mask;
1289 1373
1290 /* Setup shutdown function for kexec */ 1374 /* Setup shutdown function for kexec */
1291 phb->shutdown = pnv_pci_ioda_shutdown; 1375 phb->shutdown = pnv_pci_ioda_shutdown;
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index b555ebc57ef5..95633d79ef5d 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -634,6 +634,16 @@ static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
634 pnv_pci_dma_fallback_setup(hose, pdev); 634 pnv_pci_dma_fallback_setup(hose, pdev);
635} 635}
636 636
637int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
638{
639 struct pci_controller *hose = pci_bus_to_host(pdev->bus);
640 struct pnv_phb *phb = hose->private_data;
641
642 if (phb && phb->dma_set_mask)
643 return phb->dma_set_mask(phb, pdev, dma_mask);
644 return __dma_set_mask(&pdev->dev, dma_mask);
645}
646
637void pnv_pci_shutdown(void) 647void pnv_pci_shutdown(void)
638{ 648{
639 struct pci_controller *hose; 649 struct pci_controller *hose;
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 13f1942a9a5f..cde169442775 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -54,7 +54,9 @@ struct pnv_ioda_pe {
54 struct iommu_table tce32_table; 54 struct iommu_table tce32_table;
55 phys_addr_t tce_inval_reg_phys; 55 phys_addr_t tce_inval_reg_phys;
56 56
57 /* XXX TODO: Add support for additional 64-bit iommus */ 57 /* 64-bit TCE bypass region */
58 bool tce_bypass_enabled;
59 uint64_t tce_bypass_base;
58 60
59 /* MSIs. MVE index is identical for for 32 and 64 bit MSI 61 /* MSIs. MVE index is identical for for 32 and 64 bit MSI
60 * and -1 if not supported. (It's actually identical to the 62 * and -1 if not supported. (It's actually identical to the
@@ -113,6 +115,8 @@ struct pnv_phb {
113 unsigned int hwirq, unsigned int virq, 115 unsigned int hwirq, unsigned int virq,
114 unsigned int is_64, struct msi_msg *msg); 116 unsigned int is_64, struct msi_msg *msg);
115 void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev); 117 void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev);
118 int (*dma_set_mask)(struct pnv_phb *phb, struct pci_dev *pdev,
119 u64 dma_mask);
116 void (*fixup_phb)(struct pci_controller *hose); 120 void (*fixup_phb)(struct pci_controller *hose);
117 u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn); 121 u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
118 void (*shutdown)(struct pnv_phb *phb); 122 void (*shutdown)(struct pnv_phb *phb);
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index de6819be1f95..0051e108ef0f 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -7,12 +7,20 @@ extern void pnv_smp_init(void);
7static inline void pnv_smp_init(void) { } 7static inline void pnv_smp_init(void) { }
8#endif 8#endif
9 9
10struct pci_dev;
11
10#ifdef CONFIG_PCI 12#ifdef CONFIG_PCI
11extern void pnv_pci_init(void); 13extern void pnv_pci_init(void);
12extern void pnv_pci_shutdown(void); 14extern void pnv_pci_shutdown(void);
15extern int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask);
13#else 16#else
14static inline void pnv_pci_init(void) { } 17static inline void pnv_pci_init(void) { }
15static inline void pnv_pci_shutdown(void) { } 18static inline void pnv_pci_shutdown(void) { }
19
20static inline int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
21{
22 return -ENODEV;
23}
16#endif 24#endif
17 25
18extern void pnv_lpc_init(void); 26extern void pnv_lpc_init(void);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 21166f65c97c..110f4fbd319f 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -27,6 +27,7 @@
27#include <linux/interrupt.h> 27#include <linux/interrupt.h>
28#include <linux/bug.h> 28#include <linux/bug.h>
29#include <linux/cpuidle.h> 29#include <linux/cpuidle.h>
30#include <linux/pci.h>
30 31
31#include <asm/machdep.h> 32#include <asm/machdep.h>
32#include <asm/firmware.h> 33#include <asm/firmware.h>
@@ -141,6 +142,13 @@ static void pnv_progress(char *s, unsigned short hex)
141{ 142{
142} 143}
143 144
145static int pnv_dma_set_mask(struct device *dev, u64 dma_mask)
146{
147 if (dev_is_pci(dev))
148 return pnv_pci_dma_set_mask(to_pci_dev(dev), dma_mask);
149 return __dma_set_mask(dev, dma_mask);
150}
151
144static void pnv_shutdown(void) 152static void pnv_shutdown(void)
145{ 153{
146 /* Let the PCI code clear up IODA tables */ 154 /* Let the PCI code clear up IODA tables */
@@ -238,6 +246,7 @@ define_machine(powernv) {
238 .machine_shutdown = pnv_shutdown, 246 .machine_shutdown = pnv_shutdown,
239 .power_save = powernv_idle, 247 .power_save = powernv_idle,
240 .calibrate_decr = generic_calibrate_decr, 248 .calibrate_decr = generic_calibrate_decr,
249 .dma_set_mask = pnv_dma_set_mask,
241#ifdef CONFIG_KEXEC 250#ifdef CONFIG_KEXEC
242 .kexec_cpu_down = pnv_kexec_cpu_down, 251 .kexec_cpu_down = pnv_kexec_cpu_down,
243#endif 252#endif