aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorAlistair Popple <alistair@popple.id.au>2015-12-16 21:43:13 -0500
committerMichael Ellerman <mpe@ellerman.id.au>2015-12-17 06:41:00 -0500
commit5d2aa710e697244f5504125e4aa6e2cfcf6c4791 (patch)
tree92d41bd995ab538e0de8b57c0de8b66cea204cc0 /arch
parenta84bf321401ab206baafbbfd3bfad485a1a2c3b4 (diff)
powerpc/powernv: Add support for Nvlink NPUs
NVLink is a high speed interconnect that is used in conjunction with a PCI-E connection to create an interface between CPU and GPU that provides very high data bandwidth. A PCI-E connection to a GPU is used as the control path to initiate and report status of large data transfers sent via the NVLink. On IBM Power systems the NVLink processing unit (NPU) is similar to the existing PHB3. This patch adds support for a new NPU PHB type. DMA operations on the NPU are not supported as this patch sets the TCE translation tables to be the same as the related GPU PCIe device for each NVLink. Therefore all DMA operations are setup and controlled via the PCIe device. EEH is not presently supported for the NPU devices, although it may be added in future. Signed-off-by: Alistair Popple <alistair@popple.id.au> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/include/asm/pci.h4
-rw-r--r--arch/powerpc/platforms/powernv/Makefile2
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c348
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c138
-rw-r--r--arch/powerpc/platforms/powernv/pci.c4
-rw-r--r--arch/powerpc/platforms/powernv/pci.h19
6 files changed, 502 insertions, 13 deletions
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 3453bd8dc18f..6f8065a7d487 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -149,4 +149,8 @@ extern void pcibios_setup_phb_io_space(struct pci_controller *hose);
149extern void pcibios_scan_phb(struct pci_controller *hose); 149extern void pcibios_scan_phb(struct pci_controller *hose);
150 150
151#endif /* __KERNEL__ */ 151#endif /* __KERNEL__ */
152
153extern struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev);
154extern struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index);
155
152#endif /* __ASM_POWERPC_PCI_H */ 156#endif /* __ASM_POWERPC_PCI_H */
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 1c8cdb6250e7..ee774e8a4837 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -4,7 +4,7 @@ obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
4obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o 4obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
5 5
6obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o 6obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o
7obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o 7obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o npu-dma.o
8obj-$(CONFIG_EEH) += eeh-powernv.o 8obj-$(CONFIG_EEH) += eeh-powernv.o
9obj-$(CONFIG_PPC_SCOM) += opal-xscom.o 9obj-$(CONFIG_PPC_SCOM) += opal-xscom.o
10obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o 10obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
new file mode 100644
index 000000000000..e85aa900f5c0
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -0,0 +1,348 @@
1/*
2 * This file implements the DMA operations for NVLink devices. The NPU
3 * devices all point to the same iommu table as the parent PCI device.
4 *
5 * Copyright Alistair Popple, IBM Corporation 2015.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of version 2 of the GNU General Public
9 * License as published by the Free Software Foundation.
10 */
11
12#include <linux/export.h>
13#include <linux/pci.h>
14#include <linux/memblock.h>
15
16#include <asm/iommu.h>
17#include <asm/pnv-pci.h>
18#include <asm/msi_bitmap.h>
19#include <asm/opal.h>
20
21#include "powernv.h"
22#include "pci.h"
23
24/*
25 * Other types of TCE cache invalidation are not functional in the
26 * hardware.
27 */
28#define TCE_KILL_INVAL_ALL PPC_BIT(0)
29
30static struct pci_dev *get_pci_dev(struct device_node *dn)
31{
32 return PCI_DN(dn)->pcidev;
33}
34
35/* Given a NPU device get the associated PCI device. */
36struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev)
37{
38 struct device_node *dn;
39 struct pci_dev *gpdev;
40
41 /* Get assoicated PCI device */
42 dn = of_parse_phandle(npdev->dev.of_node, "ibm,gpu", 0);
43 if (!dn)
44 return NULL;
45
46 gpdev = get_pci_dev(dn);
47 of_node_put(dn);
48
49 return gpdev;
50}
51EXPORT_SYMBOL(pnv_pci_get_gpu_dev);
52
53/* Given the real PCI device get a linked NPU device. */
54struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index)
55{
56 struct device_node *dn;
57 struct pci_dev *npdev;
58
59 /* Get assoicated PCI device */
60 dn = of_parse_phandle(gpdev->dev.of_node, "ibm,npu", index);
61 if (!dn)
62 return NULL;
63
64 npdev = get_pci_dev(dn);
65 of_node_put(dn);
66
67 return npdev;
68}
69EXPORT_SYMBOL(pnv_pci_get_npu_dev);
70
71#define NPU_DMA_OP_UNSUPPORTED() \
72 dev_err_once(dev, "%s operation unsupported for NVLink devices\n", \
73 __func__)
74
75static void *dma_npu_alloc(struct device *dev, size_t size,
76 dma_addr_t *dma_handle, gfp_t flag,
77 struct dma_attrs *attrs)
78{
79 NPU_DMA_OP_UNSUPPORTED();
80 return NULL;
81}
82
83static void dma_npu_free(struct device *dev, size_t size,
84 void *vaddr, dma_addr_t dma_handle,
85 struct dma_attrs *attrs)
86{
87 NPU_DMA_OP_UNSUPPORTED();
88}
89
90static dma_addr_t dma_npu_map_page(struct device *dev, struct page *page,
91 unsigned long offset, size_t size,
92 enum dma_data_direction direction,
93 struct dma_attrs *attrs)
94{
95 NPU_DMA_OP_UNSUPPORTED();
96 return 0;
97}
98
99static int dma_npu_map_sg(struct device *dev, struct scatterlist *sglist,
100 int nelems, enum dma_data_direction direction,
101 struct dma_attrs *attrs)
102{
103 NPU_DMA_OP_UNSUPPORTED();
104 return 0;
105}
106
107static int dma_npu_dma_supported(struct device *dev, u64 mask)
108{
109 NPU_DMA_OP_UNSUPPORTED();
110 return 0;
111}
112
113static u64 dma_npu_get_required_mask(struct device *dev)
114{
115 NPU_DMA_OP_UNSUPPORTED();
116 return 0;
117}
118
119struct dma_map_ops dma_npu_ops = {
120 .map_page = dma_npu_map_page,
121 .map_sg = dma_npu_map_sg,
122 .alloc = dma_npu_alloc,
123 .free = dma_npu_free,
124 .dma_supported = dma_npu_dma_supported,
125 .get_required_mask = dma_npu_get_required_mask,
126};
127
128/*
129 * Returns the PE assoicated with the PCI device of the given
130 * NPU. Returns the linked pci device if pci_dev != NULL.
131 */
132static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe,
133 struct pci_dev **gpdev)
134{
135 struct pnv_phb *phb;
136 struct pci_controller *hose;
137 struct pci_dev *pdev;
138 struct pnv_ioda_pe *pe;
139 struct pci_dn *pdn;
140
141 if (npe->flags & PNV_IODA_PE_PEER) {
142 pe = npe->peers[0];
143 pdev = pe->pdev;
144 } else {
145 pdev = pnv_pci_get_gpu_dev(npe->pdev);
146 if (!pdev)
147 return NULL;
148
149 pdn = pci_get_pdn(pdev);
150 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
151 return NULL;
152
153 hose = pci_bus_to_host(pdev->bus);
154 phb = hose->private_data;
155 pe = &phb->ioda.pe_array[pdn->pe_number];
156 }
157
158 if (gpdev)
159 *gpdev = pdev;
160
161 return pe;
162}
163
164void pnv_npu_tce_invalidate_entire(struct pnv_ioda_pe *npe)
165{
166 struct pnv_phb *phb = npe->phb;
167
168 if (WARN_ON(phb->type != PNV_PHB_NPU ||
169 !phb->ioda.tce_inval_reg ||
170 !(npe->flags & PNV_IODA_PE_DEV)))
171 return;
172
173 mb(); /* Ensure previous TCE table stores are visible */
174 __raw_writeq(cpu_to_be64(TCE_KILL_INVAL_ALL),
175 phb->ioda.tce_inval_reg);
176}
177
178void pnv_npu_tce_invalidate(struct pnv_ioda_pe *npe,
179 struct iommu_table *tbl,
180 unsigned long index,
181 unsigned long npages,
182 bool rm)
183{
184 struct pnv_phb *phb = npe->phb;
185
186 /* We can only invalidate the whole cache on NPU */
187 unsigned long val = TCE_KILL_INVAL_ALL;
188
189 if (WARN_ON(phb->type != PNV_PHB_NPU ||
190 !phb->ioda.tce_inval_reg ||
191 !(npe->flags & PNV_IODA_PE_DEV)))
192 return;
193
194 mb(); /* Ensure previous TCE table stores are visible */
195 if (rm)
196 __raw_rm_writeq(cpu_to_be64(val),
197 (__be64 __iomem *) phb->ioda.tce_inval_reg_phys);
198 else
199 __raw_writeq(cpu_to_be64(val),
200 phb->ioda.tce_inval_reg);
201}
202
203void pnv_npu_init_dma_pe(struct pnv_ioda_pe *npe)
204{
205 struct pnv_ioda_pe *gpe;
206 struct pci_dev *gpdev;
207 int i, avail = -1;
208
209 if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV))
210 return;
211
212 gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
213 if (!gpe)
214 return;
215
216 for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
217 /* Nothing to do if the PE is already connected. */
218 if (gpe->peers[i] == npe)
219 return;
220
221 if (!gpe->peers[i])
222 avail = i;
223 }
224
225 if (WARN_ON(avail < 0))
226 return;
227
228 gpe->peers[avail] = npe;
229 gpe->flags |= PNV_IODA_PE_PEER;
230
231 /*
232 * We assume that the NPU devices only have a single peer PE
233 * (the GPU PCIe device PE).
234 */
235 npe->peers[0] = gpe;
236 npe->flags |= PNV_IODA_PE_PEER;
237}
238
239/*
240 * For the NPU we want to point the TCE table at the same table as the
241 * real PCI device.
242 */
243static void pnv_npu_disable_bypass(struct pnv_ioda_pe *npe)
244{
245 struct pnv_phb *phb = npe->phb;
246 struct pci_dev *gpdev;
247 struct pnv_ioda_pe *gpe;
248 void *addr;
249 unsigned int size;
250 int64_t rc;
251
252 /*
253 * Find the assoicated PCI devices and get the dma window
254 * information from there.
255 */
256 if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV))
257 return;
258
259 gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
260 if (!gpe)
261 return;
262
263 addr = (void *)gpe->table_group.tables[0]->it_base;
264 size = gpe->table_group.tables[0]->it_size << 3;
265 rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number,
266 npe->pe_number, 1, __pa(addr),
267 size, 0x1000);
268 if (rc != OPAL_SUCCESS)
269 pr_warn("%s: Error %lld setting DMA window on PHB#%d-PE#%d\n",
270 __func__, rc, phb->hose->global_number, npe->pe_number);
271
272 /*
273 * We don't initialise npu_pe->tce32_table as we always use
274 * dma_npu_ops which are nops.
275 */
276 set_dma_ops(&npe->pdev->dev, &dma_npu_ops);
277}
278
279/*
280 * Enable/disable bypass mode on the NPU. The NPU only supports one
281 * window per link, so bypass needs to be explicity enabled or
282 * disabled. Unlike for a PHB3 bypass and non-bypass modes can't be
283 * active at the same time.
284 */
285int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe, bool enable)
286{
287 struct pnv_phb *phb = npe->phb;
288 int64_t rc = 0;
289
290 if (phb->type != PNV_PHB_NPU || !npe->pdev)
291 return -EINVAL;
292
293 if (enable) {
294 /* Enable the bypass window */
295 phys_addr_t top = memblock_end_of_DRAM();
296
297 npe->tce_bypass_base = 0;
298 top = roundup_pow_of_two(top);
299 dev_info(&npe->pdev->dev, "Enabling bypass for PE %d\n",
300 npe->pe_number);
301 rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
302 npe->pe_number, npe->pe_number,
303 npe->tce_bypass_base, top);
304 } else {
305 /*
306 * Disable the bypass window by replacing it with the
307 * TCE32 window.
308 */
309 pnv_npu_disable_bypass(npe);
310 }
311
312 return rc;
313}
314
315int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask)
316{
317 struct pci_controller *hose = pci_bus_to_host(npdev->bus);
318 struct pnv_phb *phb = hose->private_data;
319 struct pci_dn *pdn = pci_get_pdn(npdev);
320 struct pnv_ioda_pe *npe, *gpe;
321 struct pci_dev *gpdev;
322 uint64_t top;
323 bool bypass = false;
324
325 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
326 return -ENXIO;
327
328 /* We only do bypass if it's enabled on the linked device */
329 npe = &phb->ioda.pe_array[pdn->pe_number];
330 gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
331 if (!gpe)
332 return -ENODEV;
333
334 if (gpe->tce_bypass_enabled) {
335 top = gpe->tce_bypass_base + memblock_end_of_DRAM() - 1;
336 bypass = (dma_mask >= top);
337 }
338
339 if (bypass)
340 dev_info(&npdev->dev, "Using 64-bit DMA iommu bypass\n");
341 else
342 dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n");
343
344 pnv_npu_dma_set_bypass(npe, bypass);
345 *npdev->dev.dma_mask = dma_mask;
346
347 return 0;
348}
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 7a3a30ee6468..323e1e58da93 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -771,8 +771,12 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
771 return -ENXIO; 771 return -ENXIO;
772 } 772 }
773 773
774 /* Configure PELTV */ 774 /*
775 pnv_ioda_set_peltv(phb, pe, true); 775 * Configure PELTV. NPUs don't have a PELTV table so skip
776 * configuration on them.
777 */
778 if (phb->type != PNV_PHB_NPU)
779 pnv_ioda_set_peltv(phb, pe, true);
776 780
777 /* Setup reverse map */ 781 /* Setup reverse map */
778 for (rid = pe->rid; rid < rid_end; rid++) 782 for (rid = pe->rid; rid < rid_end; rid++)
@@ -915,7 +919,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
915} 919}
916#endif /* CONFIG_PCI_IOV */ 920#endif /* CONFIG_PCI_IOV */
917 921
918#if 0
919static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) 922static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
920{ 923{
921 struct pci_controller *hose = pci_bus_to_host(dev->bus); 924 struct pci_controller *hose = pci_bus_to_host(dev->bus);
@@ -932,11 +935,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
932 if (pdn->pe_number != IODA_INVALID_PE) 935 if (pdn->pe_number != IODA_INVALID_PE)
933 return NULL; 936 return NULL;
934 937
935 /* PE#0 has been pre-set */ 938 pe_num = pnv_ioda_alloc_pe(phb);
936 if (dev->bus->number == 0)
937 pe_num = 0;
938 else
939 pe_num = pnv_ioda_alloc_pe(phb);
940 if (pe_num == IODA_INVALID_PE) { 939 if (pe_num == IODA_INVALID_PE) {
941 pr_warning("%s: Not enough PE# available, disabling device\n", 940 pr_warning("%s: Not enough PE# available, disabling device\n",
942 pci_name(dev)); 941 pci_name(dev));
@@ -954,6 +953,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
954 pci_dev_get(dev); 953 pci_dev_get(dev);
955 pdn->pcidev = dev; 954 pdn->pcidev = dev;
956 pdn->pe_number = pe_num; 955 pdn->pe_number = pe_num;
956 pe->flags = PNV_IODA_PE_DEV;
957 pe->pdev = dev; 957 pe->pdev = dev;
958 pe->pbus = NULL; 958 pe->pbus = NULL;
959 pe->tce32_seg = -1; 959 pe->tce32_seg = -1;
@@ -984,7 +984,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
984 984
985 return pe; 985 return pe;
986} 986}
987#endif /* Useful for SRIOV case */
988 987
989static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) 988static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
990{ 989{
@@ -1075,6 +1074,18 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
1075 pnv_ioda_link_pe_by_weight(phb, pe); 1074 pnv_ioda_link_pe_by_weight(phb, pe);
1076} 1075}
1077 1076
1077static void pnv_ioda_setup_dev_PEs(struct pci_bus *bus)
1078{
1079 struct pci_bus *child;
1080 struct pci_dev *pdev;
1081
1082 list_for_each_entry(pdev, &bus->devices, bus_list)
1083 pnv_ioda_setup_dev_PE(pdev);
1084
1085 list_for_each_entry(child, &bus->children, node)
1086 pnv_ioda_setup_dev_PEs(child);
1087}
1088
1078static void pnv_ioda_setup_PEs(struct pci_bus *bus) 1089static void pnv_ioda_setup_PEs(struct pci_bus *bus)
1079{ 1090{
1080 struct pci_dev *dev; 1091 struct pci_dev *dev;
@@ -1111,7 +1122,15 @@ static void pnv_pci_ioda_setup_PEs(void)
1111 if (phb->reserve_m64_pe) 1122 if (phb->reserve_m64_pe)
1112 phb->reserve_m64_pe(hose->bus, NULL, true); 1123 phb->reserve_m64_pe(hose->bus, NULL, true);
1113 1124
1114 pnv_ioda_setup_PEs(hose->bus); 1125 /*
1126 * On NPU PHB, we expect separate PEs for individual PCI
1127 * functions. PCI bus dependent PEs are required for the
1128 * remaining types of PHBs.
1129 */
1130 if (phb->type == PNV_PHB_NPU)
1131 pnv_ioda_setup_dev_PEs(hose->bus);
1132 else
1133 pnv_ioda_setup_PEs(hose->bus);
1115 } 1134 }
1116} 1135}
1117 1136
@@ -1570,6 +1589,8 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
1570 struct pnv_ioda_pe *pe; 1589 struct pnv_ioda_pe *pe;
1571 uint64_t top; 1590 uint64_t top;
1572 bool bypass = false; 1591 bool bypass = false;
1592 struct pci_dev *linked_npu_dev;
1593 int i;
1573 1594
1574 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) 1595 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
1575 return -ENODEV;; 1596 return -ENODEV;;
@@ -1588,6 +1609,15 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
1588 set_dma_ops(&pdev->dev, &dma_iommu_ops); 1609 set_dma_ops(&pdev->dev, &dma_iommu_ops);
1589 } 1610 }
1590 *pdev->dev.dma_mask = dma_mask; 1611 *pdev->dev.dma_mask = dma_mask;
1612
1613 /* Update peer npu devices */
1614 if (pe->flags & PNV_IODA_PE_PEER)
1615 for (i = 0; pe->peers[i]; i++) {
1616 linked_npu_dev = pe->peers[i]->pdev;
1617 if (dma_get_mask(&linked_npu_dev->dev) != dma_mask)
1618 dma_set_mask(&linked_npu_dev->dev, dma_mask);
1619 }
1620
1591 return 0; 1621 return 0;
1592} 1622}
1593 1623
@@ -1732,12 +1762,23 @@ static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe)
1732 /* 01xb - invalidate TCEs that match the specified PE# */ 1762 /* 01xb - invalidate TCEs that match the specified PE# */
1733 unsigned long val = (0x4ull << 60) | (pe->pe_number & 0xFF); 1763 unsigned long val = (0x4ull << 60) | (pe->pe_number & 0xFF);
1734 struct pnv_phb *phb = pe->phb; 1764 struct pnv_phb *phb = pe->phb;
1765 struct pnv_ioda_pe *npe;
1766 int i;
1735 1767
1736 if (!phb->ioda.tce_inval_reg) 1768 if (!phb->ioda.tce_inval_reg)
1737 return; 1769 return;
1738 1770
1739 mb(); /* Ensure above stores are visible */ 1771 mb(); /* Ensure above stores are visible */
1740 __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg); 1772 __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg);
1773
1774 if (pe->flags & PNV_IODA_PE_PEER)
1775 for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
1776 npe = pe->peers[i];
1777 if (!npe || npe->phb->type != PNV_PHB_NPU)
1778 continue;
1779
1780 pnv_npu_tce_invalidate_entire(npe);
1781 }
1741} 1782}
1742 1783
1743static void pnv_pci_ioda2_do_tce_invalidate(unsigned pe_number, bool rm, 1784static void pnv_pci_ioda2_do_tce_invalidate(unsigned pe_number, bool rm,
@@ -1772,15 +1813,28 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
1772 struct iommu_table_group_link *tgl; 1813 struct iommu_table_group_link *tgl;
1773 1814
1774 list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { 1815 list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
1816 struct pnv_ioda_pe *npe;
1775 struct pnv_ioda_pe *pe = container_of(tgl->table_group, 1817 struct pnv_ioda_pe *pe = container_of(tgl->table_group,
1776 struct pnv_ioda_pe, table_group); 1818 struct pnv_ioda_pe, table_group);
1777 __be64 __iomem *invalidate = rm ? 1819 __be64 __iomem *invalidate = rm ?
1778 (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys : 1820 (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys :
1779 pe->phb->ioda.tce_inval_reg; 1821 pe->phb->ioda.tce_inval_reg;
1822 int i;
1780 1823
1781 pnv_pci_ioda2_do_tce_invalidate(pe->pe_number, rm, 1824 pnv_pci_ioda2_do_tce_invalidate(pe->pe_number, rm,
1782 invalidate, tbl->it_page_shift, 1825 invalidate, tbl->it_page_shift,
1783 index, npages); 1826 index, npages);
1827
1828 if (pe->flags & PNV_IODA_PE_PEER)
1829 /* Invalidate PEs using the same TCE table */
1830 for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
1831 npe = pe->peers[i];
1832 if (!npe || npe->phb->type != PNV_PHB_NPU)
1833 continue;
1834
1835 pnv_npu_tce_invalidate(npe, tbl, index,
1836 npages, rm);
1837 }
1784 } 1838 }
1785} 1839}
1786 1840
@@ -2428,10 +2482,17 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb)
2428 pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n", 2482 pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
2429 pe->dma_weight, segs); 2483 pe->dma_weight, segs);
2430 pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs); 2484 pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
2431 } else { 2485 } else if (phb->type == PNV_PHB_IODA2) {
2432 pe_info(pe, "Assign DMA32 space\n"); 2486 pe_info(pe, "Assign DMA32 space\n");
2433 segs = 0; 2487 segs = 0;
2434 pnv_pci_ioda2_setup_dma_pe(phb, pe); 2488 pnv_pci_ioda2_setup_dma_pe(phb, pe);
2489 } else if (phb->type == PNV_PHB_NPU) {
2490 /*
2491 * We initialise the DMA space for an NPU PHB
2492 * after setup of the PHB is complete as we
2493 * point the NPU TVT to the the same location
2494 * as the PHB3 TVT.
2495 */
2435 } 2496 }
2436 2497
2437 remaining -= segs; 2498 remaining -= segs;
@@ -2873,6 +2934,11 @@ static void pnv_pci_ioda_setup_seg(void)
2873 2934
2874 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 2935 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
2875 phb = hose->private_data; 2936 phb = hose->private_data;
2937
2938 /* NPU PHB does not support IO or MMIO segmentation */
2939 if (phb->type == PNV_PHB_NPU)
2940 continue;
2941
2876 list_for_each_entry(pe, &phb->ioda.pe_list, list) { 2942 list_for_each_entry(pe, &phb->ioda.pe_list, list) {
2877 pnv_ioda_setup_pe_seg(hose, pe); 2943 pnv_ioda_setup_pe_seg(hose, pe);
2878 } 2944 }
@@ -2912,6 +2978,27 @@ static void pnv_pci_ioda_create_dbgfs(void)
2912#endif /* CONFIG_DEBUG_FS */ 2978#endif /* CONFIG_DEBUG_FS */
2913} 2979}
2914 2980
2981static void pnv_npu_ioda_fixup(void)
2982{
2983 bool enable_bypass;
2984 struct pci_controller *hose, *tmp;
2985 struct pnv_phb *phb;
2986 struct pnv_ioda_pe *pe;
2987
2988 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
2989 phb = hose->private_data;
2990 if (phb->type != PNV_PHB_NPU)
2991 continue;
2992
2993 list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
2994 enable_bypass = dma_get_mask(&pe->pdev->dev) ==
2995 DMA_BIT_MASK(64);
2996 pnv_npu_init_dma_pe(pe);
2997 pnv_npu_dma_set_bypass(pe, enable_bypass);
2998 }
2999 }
3000}
3001
2915static void pnv_pci_ioda_fixup(void) 3002static void pnv_pci_ioda_fixup(void)
2916{ 3003{
2917 pnv_pci_ioda_setup_PEs(); 3004 pnv_pci_ioda_setup_PEs();
@@ -2924,6 +3011,9 @@ static void pnv_pci_ioda_fixup(void)
2924 eeh_init(); 3011 eeh_init();
2925 eeh_addr_cache_build(); 3012 eeh_addr_cache_build();
2926#endif 3013#endif
3014
3015 /* Link NPU IODA tables to their PCI devices. */
3016 pnv_npu_ioda_fixup();
2927} 3017}
2928 3018
2929/* 3019/*
@@ -3038,6 +3128,19 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
3038 .shutdown = pnv_pci_ioda_shutdown, 3128 .shutdown = pnv_pci_ioda_shutdown,
3039}; 3129};
3040 3130
3131static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
3132 .dma_dev_setup = pnv_pci_dma_dev_setup,
3133#ifdef CONFIG_PCI_MSI
3134 .setup_msi_irqs = pnv_setup_msi_irqs,
3135 .teardown_msi_irqs = pnv_teardown_msi_irqs,
3136#endif
3137 .enable_device_hook = pnv_pci_enable_device_hook,
3138 .window_alignment = pnv_pci_window_alignment,
3139 .reset_secondary_bus = pnv_pci_reset_secondary_bus,
3140 .dma_set_mask = pnv_npu_dma_set_mask,
3141 .shutdown = pnv_pci_ioda_shutdown,
3142};
3143
3041static void __init pnv_pci_init_ioda_phb(struct device_node *np, 3144static void __init pnv_pci_init_ioda_phb(struct device_node *np,
3042 u64 hub_id, int ioda_type) 3145 u64 hub_id, int ioda_type)
3043{ 3146{
@@ -3093,6 +3196,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
3093 phb->model = PNV_PHB_MODEL_P7IOC; 3196 phb->model = PNV_PHB_MODEL_P7IOC;
3094 else if (of_device_is_compatible(np, "ibm,power8-pciex")) 3197 else if (of_device_is_compatible(np, "ibm,power8-pciex"))
3095 phb->model = PNV_PHB_MODEL_PHB3; 3198 phb->model = PNV_PHB_MODEL_PHB3;
3199 else if (of_device_is_compatible(np, "ibm,power8-npu-pciex"))
3200 phb->model = PNV_PHB_MODEL_NPU;
3096 else 3201 else
3097 phb->model = PNV_PHB_MODEL_UNKNOWN; 3202 phb->model = PNV_PHB_MODEL_UNKNOWN;
3098 3203
@@ -3193,7 +3298,11 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
3193 * the child P2P bridges) can form individual PE. 3298 * the child P2P bridges) can form individual PE.
3194 */ 3299 */
3195 ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; 3300 ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
3196 hose->controller_ops = pnv_pci_ioda_controller_ops; 3301
3302 if (phb->type == PNV_PHB_NPU)
3303 hose->controller_ops = pnv_npu_ioda_controller_ops;
3304 else
3305 hose->controller_ops = pnv_pci_ioda_controller_ops;
3197 3306
3198#ifdef CONFIG_PCI_IOV 3307#ifdef CONFIG_PCI_IOV
3199 ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources; 3308 ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources;
@@ -3228,6 +3337,11 @@ void __init pnv_pci_init_ioda2_phb(struct device_node *np)
3228 pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2); 3337 pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
3229} 3338}
3230 3339
3340void __init pnv_pci_init_npu_phb(struct device_node *np)
3341{
3342 pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU);
3343}
3344
3231void __init pnv_pci_init_ioda_hub(struct device_node *np) 3345void __init pnv_pci_init_ioda_hub(struct device_node *np)
3232{ 3346{
3233 struct device_node *phbn; 3347 struct device_node *phbn;
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index f2dd77234240..ff4e42d9d259 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -807,6 +807,10 @@ void __init pnv_pci_init(void)
807 for_each_compatible_node(np, NULL, "ibm,ioda2-phb") 807 for_each_compatible_node(np, NULL, "ibm,ioda2-phb")
808 pnv_pci_init_ioda2_phb(np); 808 pnv_pci_init_ioda2_phb(np);
809 809
810 /* Look for NPU PHBs */
811 for_each_compatible_node(np, NULL, "ibm,ioda2-npu-phb")
812 pnv_pci_init_npu_phb(np);
813
810 /* Setup the linkage between OF nodes and PHBs */ 814 /* Setup the linkage between OF nodes and PHBs */
811 pci_devs_phb_init(); 815 pci_devs_phb_init();
812 816
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index c8ff50e90766..7f56313e8d72 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -7,6 +7,7 @@ enum pnv_phb_type {
7 PNV_PHB_P5IOC2 = 0, 7 PNV_PHB_P5IOC2 = 0,
8 PNV_PHB_IODA1 = 1, 8 PNV_PHB_IODA1 = 1,
9 PNV_PHB_IODA2 = 2, 9 PNV_PHB_IODA2 = 2,
10 PNV_PHB_NPU = 3,
10}; 11};
11 12
12/* Precise PHB model for error management */ 13/* Precise PHB model for error management */
@@ -15,6 +16,7 @@ enum pnv_phb_model {
15 PNV_PHB_MODEL_P5IOC2, 16 PNV_PHB_MODEL_P5IOC2,
16 PNV_PHB_MODEL_P7IOC, 17 PNV_PHB_MODEL_P7IOC,
17 PNV_PHB_MODEL_PHB3, 18 PNV_PHB_MODEL_PHB3,
19 PNV_PHB_MODEL_NPU,
18}; 20};
19 21
20#define PNV_PCI_DIAG_BUF_SIZE 8192 22#define PNV_PCI_DIAG_BUF_SIZE 8192
@@ -24,6 +26,7 @@ enum pnv_phb_model {
24#define PNV_IODA_PE_MASTER (1 << 3) /* Master PE in compound case */ 26#define PNV_IODA_PE_MASTER (1 << 3) /* Master PE in compound case */
25#define PNV_IODA_PE_SLAVE (1 << 4) /* Slave PE in compound case */ 27#define PNV_IODA_PE_SLAVE (1 << 4) /* Slave PE in compound case */
26#define PNV_IODA_PE_VF (1 << 5) /* PE for one VF */ 28#define PNV_IODA_PE_VF (1 << 5) /* PE for one VF */
29#define PNV_IODA_PE_PEER (1 << 6) /* PE has peers */
27 30
28/* Data associated with a PE, including IOMMU tracking etc.. */ 31/* Data associated with a PE, including IOMMU tracking etc.. */
29struct pnv_phb; 32struct pnv_phb;
@@ -31,6 +34,9 @@ struct pnv_ioda_pe {
31 unsigned long flags; 34 unsigned long flags;
32 struct pnv_phb *phb; 35 struct pnv_phb *phb;
33 36
37#define PNV_IODA_MAX_PEER_PES 8
38 struct pnv_ioda_pe *peers[PNV_IODA_MAX_PEER_PES];
39
34 /* A PE can be associated with a single device or an 40 /* A PE can be associated with a single device or an
35 * entire bus (& children). In the former case, pdev 41 * entire bus (& children). In the former case, pdev
36 * is populated, in the later case, pbus is. 42 * is populated, in the later case, pbus is.
@@ -229,6 +235,7 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
229extern void pnv_pci_init_p5ioc2_hub(struct device_node *np); 235extern void pnv_pci_init_p5ioc2_hub(struct device_node *np);
230extern void pnv_pci_init_ioda_hub(struct device_node *np); 236extern void pnv_pci_init_ioda_hub(struct device_node *np);
231extern void pnv_pci_init_ioda2_phb(struct device_node *np); 237extern void pnv_pci_init_ioda2_phb(struct device_node *np);
238extern void pnv_pci_init_npu_phb(struct device_node *np);
232extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, 239extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
233 __be64 *startp, __be64 *endp, bool rm); 240 __be64 *startp, __be64 *endp, bool rm);
234extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev); 241extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
@@ -238,4 +245,16 @@ extern void pnv_pci_dma_dev_setup(struct pci_dev *pdev);
238extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); 245extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
239extern void pnv_teardown_msi_irqs(struct pci_dev *pdev); 246extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
240 247
248/* Nvlink functions */
249extern void pnv_npu_tce_invalidate_entire(struct pnv_ioda_pe *npe);
250extern void pnv_npu_tce_invalidate(struct pnv_ioda_pe *npe,
251 struct iommu_table *tbl,
252 unsigned long index,
253 unsigned long npages,
254 bool rm);
255extern void pnv_npu_init_dma_pe(struct pnv_ioda_pe *npe);
256extern void pnv_npu_setup_dma_pe(struct pnv_ioda_pe *npe);
257extern int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe, bool enabled);
258extern int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask);
259
241#endif /* __POWERNV_PCI_H */ 260#endif /* __POWERNV_PCI_H */