aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2011-11-15 12:29:08 -0500
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2011-11-24 22:53:15 -0500
commit184cd4a3b962a4769889615430eaf40076b97969 (patch)
treedc1df367a9a3e5b641f480a2e9a729a337762492
parentca45cfe31ee5b59eb2e0f19baac575a4f5b68537 (diff)
powerpc/powernv: PCI support for p7IOC under OPAL v2
This adds support for p7IOC (and possibly other IODA v1 IO Hubs) using OPAL v2 interfaces. We completely take over resource assignment and assign them using an algorithm that hands out device BARs in a way that makes them fit in individual segments of the M32 window of the bridge, which enables us to assign individual PEs to devices and functions. The current implementation gives out a PE per functions on PCIe, and a PE for the entire bridge for PCIe to PCI-X bridges. This can be adjusted / fine tuned later. We also setup DMA resources (32-bit only for now) and MSIs (both 32-bit and 64-bit MSI are supported). The DMA allocation tries to divide the available 256M segments of the 32-bit DMA address space "fairly" among PEs. This is done using a "weight" heuristic which assigns less value to things like OHCI USB controllers than, for example SCSI RAID controllers. This algorithm will probably want some fine tuning for specific devices or device types. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h6
-rw-r--r--arch/powerpc/kernel/pci_dn.c3
-rw-r--r--arch/powerpc/platforms/powernv/Makefile2
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c1325
-rw-r--r--arch/powerpc/platforms/powernv/pci.c20
-rw-r--r--arch/powerpc/platforms/powernv/pci.h84
6 files changed, 1434 insertions, 6 deletions
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 56b879ab3a40..882b6aa6c857 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -153,8 +153,8 @@ struct pci_dn {
153 153
154 int pci_ext_config_space; /* for pci devices */ 154 int pci_ext_config_space; /* for pci devices */
155 155
156#ifdef CONFIG_EEH
157 struct pci_dev *pcidev; /* back-pointer to the pci device */ 156 struct pci_dev *pcidev; /* back-pointer to the pci device */
157#ifdef CONFIG_EEH
158 int class_code; /* pci device class */ 158 int class_code; /* pci device class */
159 int eeh_mode; /* See eeh.h for possible EEH_MODEs */ 159 int eeh_mode; /* See eeh.h for possible EEH_MODEs */
160 int eeh_config_addr; 160 int eeh_config_addr;
@@ -164,6 +164,10 @@ struct pci_dn {
164 int eeh_false_positives; /* # times this device reported #ff's */ 164 int eeh_false_positives; /* # times this device reported #ff's */
165 u32 config_space[16]; /* saved PCI config space */ 165 u32 config_space[16]; /* saved PCI config space */
166#endif 166#endif
167#define IODA_INVALID_PE (-1)
168#ifdef CONFIG_PPC_POWERNV
169 int pe_number;
170#endif
167}; 171};
168 172
169/* Get the pointer to a device_node's pci_dn */ 173/* Get the pointer to a device_node's pci_dn */
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 4e69deb89b37..dd9e4a04bf79 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -50,6 +50,9 @@ void * __devinit update_dn_pci_info(struct device_node *dn, void *data)
50 dn->data = pdn; 50 dn->data = pdn;
51 pdn->node = dn; 51 pdn->node = dn;
52 pdn->phb = phb; 52 pdn->phb = phb;
53#ifdef CONFIG_PPC_POWERNV
54 pdn->pe_number = IODA_INVALID_PE;
55#endif
53 regs = of_get_property(dn, "reg", NULL); 56 regs = of_get_property(dn, "reg", NULL);
54 if (regs) { 57 if (regs) {
55 /* First register entry is addr (00BBSS00) */ 58 /* First register entry is addr (00BBSS00) */
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 31853008b418..bcc3cb48a44e 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -2,4 +2,4 @@ obj-y += setup.o opal-takeover.o opal-wrappers.o opal.o
2obj-y += opal-rtc.o opal-nvram.o 2obj-y += opal-rtc.o opal-nvram.o
3 3
4obj-$(CONFIG_SMP) += smp.o 4obj-$(CONFIG_SMP) += smp.o
5obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o 5obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
new file mode 100644
index 000000000000..cf89f305c8b1
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -0,0 +1,1325 @@
1/*
2 * Support PCI/PCIe on PowerNV platforms
3 *
4 * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#define DEBUG
13
14#include <linux/kernel.h>
15#include <linux/pci.h>
16#include <linux/delay.h>
17#include <linux/string.h>
18#include <linux/init.h>
19#include <linux/bootmem.h>
20#include <linux/irq.h>
21#include <linux/io.h>
22#include <linux/msi.h>
23
24#include <asm/sections.h>
25#include <asm/io.h>
26#include <asm/prom.h>
27#include <asm/pci-bridge.h>
28#include <asm/machdep.h>
29#include <asm/ppc-pci.h>
30#include <asm/opal.h>
31#include <asm/iommu.h>
32#include <asm/tce.h>
33#include <asm/abs_addr.h>
34
35#include "powernv.h"
36#include "pci.h"
37
38struct resource_wrap {
39 struct list_head link;
40 resource_size_t size;
41 resource_size_t align;
42 struct pci_dev *dev; /* Set if it's a device */
43 struct pci_bus *bus; /* Set if it's a bridge */
44};
45
46static int __pe_printk(const char *level, const struct pnv_ioda_pe *pe,
47 struct va_format *vaf)
48{
49 char pfix[32];
50
51 if (pe->pdev)
52 strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix));
53 else
54 sprintf(pfix, "%04x:%02x ",
55 pci_domain_nr(pe->pbus), pe->pbus->number);
56 return printk("pci %s%s: [PE# %.3d] %pV", level, pfix, pe->pe_number, vaf);
57}
58
59#define define_pe_printk_level(func, kern_level) \
60static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \
61{ \
62 struct va_format vaf; \
63 va_list args; \
64 int r; \
65 \
66 va_start(args, fmt); \
67 \
68 vaf.fmt = fmt; \
69 vaf.va = &args; \
70 \
71 r = __pe_printk(kern_level, pe, &vaf); \
72 va_end(args); \
73 \
74 return r; \
75} \
76
77define_pe_printk_level(pe_err, KERN_ERR);
78define_pe_printk_level(pe_warn, KERN_WARNING);
79define_pe_printk_level(pe_info, KERN_INFO);
80
81
82/* Calculate resource usage & alignment requirement of a single
83 * device. This will also assign all resources within the device
84 * for a given type starting at 0 for the biggest one and then
85 * assigning in decreasing order of size.
86 */
87static void __devinit pnv_ioda_calc_dev(struct pci_dev *dev, unsigned int flags,
88 resource_size_t *size,
89 resource_size_t *align)
90{
91 resource_size_t start;
92 struct resource *r;
93 int i;
94
95 pr_devel(" -> CDR %s\n", pci_name(dev));
96
97 *size = *align = 0;
98
99 /* Clear the resources out and mark them all unset */
100 for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
101 r = &dev->resource[i];
102 if (!(r->flags & flags))
103 continue;
104 if (r->start) {
105 r->end -= r->start;
106 r->start = 0;
107 }
108 r->flags |= IORESOURCE_UNSET;
109 }
110
111 /* We currently keep all memory resources together, we
112 * will handle prefetch & 64-bit separately in the future
113 * but for now we stick everybody in M32
114 */
115 start = 0;
116 for (;;) {
117 resource_size_t max_size = 0;
118 int max_no = -1;
119
120 /* Find next biggest resource */
121 for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
122 r = &dev->resource[i];
123 if (!(r->flags & IORESOURCE_UNSET) ||
124 !(r->flags & flags))
125 continue;
126 if (resource_size(r) > max_size) {
127 max_size = resource_size(r);
128 max_no = i;
129 }
130 }
131 if (max_no < 0)
132 break;
133 r = &dev->resource[max_no];
134 if (max_size > *align)
135 *align = max_size;
136 *size += max_size;
137 r->start = start;
138 start += max_size;
139 r->end = r->start + max_size - 1;
140 r->flags &= ~IORESOURCE_UNSET;
141 pr_devel(" -> R%d %016llx..%016llx\n",
142 max_no, r->start, r->end);
143 }
144 pr_devel(" <- CDR %s size=%llx align=%llx\n",
145 pci_name(dev), *size, *align);
146}
147
148/* Allocate a resource "wrap" for a given device or bridge and
149 * insert it at the right position in the sorted list
150 */
151static void __devinit pnv_ioda_add_wrap(struct list_head *list,
152 struct pci_bus *bus,
153 struct pci_dev *dev,
154 resource_size_t size,
155 resource_size_t align)
156{
157 struct resource_wrap *w1, *w = kzalloc(sizeof(*w), GFP_KERNEL);
158
159 w->size = size;
160 w->align = align;
161 w->dev = dev;
162 w->bus = bus;
163
164 list_for_each_entry(w1, list, link) {
165 if (w1->align < align) {
166 list_add_tail(&w->link, &w1->link);
167 return;
168 }
169 }
170 list_add_tail(&w->link, list);
171}
172
173/* Offset device resources of a given type */
174static void __devinit pnv_ioda_offset_dev(struct pci_dev *dev,
175 unsigned int flags,
176 resource_size_t offset)
177{
178 struct resource *r;
179 int i;
180
181 pr_devel(" -> ODR %s [%x] +%016llx\n", pci_name(dev), flags, offset);
182
183 for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
184 r = &dev->resource[i];
185 if (r->flags & flags) {
186 dev->resource[i].start += offset;
187 dev->resource[i].end += offset;
188 }
189 }
190
191 pr_devel(" <- ODR %s [%x] +%016llx\n", pci_name(dev), flags, offset);
192}
193
194/* Offset bus resources (& all children) of a given type */
195static void __devinit pnv_ioda_offset_bus(struct pci_bus *bus,
196 unsigned int flags,
197 resource_size_t offset)
198{
199 struct resource *r;
200 struct pci_dev *dev;
201 struct pci_bus *cbus;
202 int i;
203
204 pr_devel(" -> OBR %s [%x] +%016llx\n",
205 bus->self ? pci_name(bus->self) : "root", flags, offset);
206
207 for (i = 0; i < 2; i++) {
208 r = bus->resource[i];
209 if (r && (r->flags & flags)) {
210 bus->resource[i]->start += offset;
211 bus->resource[i]->end += offset;
212 }
213 }
214 list_for_each_entry(dev, &bus->devices, bus_list)
215 pnv_ioda_offset_dev(dev, flags, offset);
216 list_for_each_entry(cbus, &bus->children, node)
217 pnv_ioda_offset_bus(cbus, flags, offset);
218
219 pr_devel(" <- OBR %s [%x]\n",
220 bus->self ? pci_name(bus->self) : "root", flags);
221}
222
223/* This is the guts of our IODA resource allocation. This is called
224 * recursively for each bus in the system. It calculates all the
225 * necessary size and requirements for children and assign them
226 * resources such that:
227 *
228 * - Each function fits in it's own contiguous set of IO/M32
229 * segment
230 *
231 * - All segments behind a P2P bridge are contiguous and obey
232 * alignment constraints of those bridges
233 */
234static void __devinit pnv_ioda_calc_bus(struct pci_bus *bus, unsigned int flags,
235 resource_size_t *size,
236 resource_size_t *align)
237{
238 struct pci_controller *hose = pci_bus_to_host(bus);
239 struct pnv_phb *phb = hose->private_data;
240 resource_size_t dev_size, dev_align, start;
241 resource_size_t min_align, min_balign;
242 struct pci_dev *cdev;
243 struct pci_bus *cbus;
244 struct list_head head;
245 struct resource_wrap *w;
246 unsigned int bres;
247
248 *size = *align = 0;
249
250 pr_devel("-> CBR %s [%x]\n",
251 bus->self ? pci_name(bus->self) : "root", flags);
252
253 /* Calculate alignment requirements based on the type
254 * of resource we are working on
255 */
256 if (flags & IORESOURCE_IO) {
257 bres = 0;
258 min_align = phb->ioda.io_segsize;
259 min_balign = 0x1000;
260 } else {
261 bres = 1;
262 min_align = phb->ioda.m32_segsize;
263 min_balign = 0x100000;
264 }
265
266 /* Gather all our children resources ordered by alignment */
267 INIT_LIST_HEAD(&head);
268
269 /* - Busses */
270 list_for_each_entry(cbus, &bus->children, node) {
271 pnv_ioda_calc_bus(cbus, flags, &dev_size, &dev_align);
272 pnv_ioda_add_wrap(&head, cbus, NULL, dev_size, dev_align);
273 }
274
275 /* - Devices */
276 list_for_each_entry(cdev, &bus->devices, bus_list) {
277 pnv_ioda_calc_dev(cdev, flags, &dev_size, &dev_align);
278 /* Align them to segment size */
279 if (dev_align < min_align)
280 dev_align = min_align;
281 pnv_ioda_add_wrap(&head, NULL, cdev, dev_size, dev_align);
282 }
283 if (list_empty(&head))
284 goto empty;
285
286 /* Now we can do two things: assign offsets to them within that
287 * level and get our total alignment & size requirements. The
288 * assignment algorithm is going to be uber-trivial for now, we
289 * can try to be smarter later at filling out holes.
290 */
291 start = bus->self ? 0 : bus->resource[bres]->start;
292
293 /* Don't hand out IO 0 */
294 if ((flags & IORESOURCE_IO) && !bus->self)
295 start += 0x1000;
296
297 while(!list_empty(&head)) {
298 w = list_first_entry(&head, struct resource_wrap, link);
299 list_del(&w->link);
300 if (w->size) {
301 if (start) {
302 start = ALIGN(start, w->align);
303 if (w->dev)
304 pnv_ioda_offset_dev(w->dev,flags,start);
305 else if (w->bus)
306 pnv_ioda_offset_bus(w->bus,flags,start);
307 }
308 if (w->align > *align)
309 *align = w->align;
310 }
311 start += w->size;
312 kfree(w);
313 }
314 *size = start;
315
316 /* Align and setup bridge resources */
317 *align = max_t(resource_size_t, *align,
318 max_t(resource_size_t, min_align, min_balign));
319 *size = ALIGN(*size,
320 max_t(resource_size_t, min_align, min_balign));
321 empty:
322 /* Only setup P2P's, not the PHB itself */
323 if (bus->self) {
324 WARN_ON(bus->resource[bres] == NULL);
325 bus->resource[bres]->start = 0;
326 bus->resource[bres]->flags = (*size) ? flags : 0;
327 bus->resource[bres]->end = (*size) ? (*size - 1) : 0;
328
329 /* Clear prefetch bus resources for now */
330 bus->resource[2]->flags = 0;
331 }
332
333 pr_devel("<- CBR %s [%x] *size=%016llx *align=%016llx\n",
334 bus->self ? pci_name(bus->self) : "root", flags,*size,*align);
335}
336
337static struct pci_dn *pnv_ioda_get_pdn(struct pci_dev *dev)
338{
339 struct device_node *np;
340
341 np = pci_device_to_OF_node(dev);
342 if (!np)
343 return NULL;
344 return PCI_DN(np);
345}
346
347static void __devinit pnv_ioda_setup_pe_segments(struct pci_dev *dev)
348{
349 struct pci_controller *hose = pci_bus_to_host(dev->bus);
350 struct pnv_phb *phb = hose->private_data;
351 struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
352 unsigned int pe, i;
353 resource_size_t pos;
354 struct resource io_res;
355 struct resource m32_res;
356 struct pci_bus_region region;
357 int rc;
358
359 /* Anything not referenced in the device-tree gets PE#0 */
360 pe = pdn ? pdn->pe_number : 0;
361
362 /* Calculate the device min/max */
363 io_res.start = m32_res.start = (resource_size_t)-1;
364 io_res.end = m32_res.end = 0;
365 io_res.flags = IORESOURCE_IO;
366 m32_res.flags = IORESOURCE_MEM;
367
368 for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
369 struct resource *r = NULL;
370 if (dev->resource[i].flags & IORESOURCE_IO)
371 r = &io_res;
372 if (dev->resource[i].flags & IORESOURCE_MEM)
373 r = &m32_res;
374 if (!r)
375 continue;
376 if (dev->resource[i].start < r->start)
377 r->start = dev->resource[i].start;
378 if (dev->resource[i].end > r->end)
379 r->end = dev->resource[i].end;
380 }
381
382 /* Setup IO segments */
383 if (io_res.start < io_res.end) {
384 pcibios_resource_to_bus(dev, &region, &io_res);
385 pos = region.start;
386 i = pos / phb->ioda.io_segsize;
387 while(i < phb->ioda.total_pe && pos <= region.end) {
388 if (phb->ioda.io_segmap[i]) {
389 pr_err("%s: Trying to use IO seg #%d which is"
390 " already used by PE# %d\n",
391 pci_name(dev), i,
392 phb->ioda.io_segmap[i]);
393 /* XXX DO SOMETHING TO DISABLE DEVICE ? */
394 break;
395 }
396 phb->ioda.io_segmap[i] = pe;
397 rc = opal_pci_map_pe_mmio_window(phb->opal_id, pe,
398 OPAL_IO_WINDOW_TYPE,
399 0, i);
400 if (rc != OPAL_SUCCESS) {
401 pr_err("%s: OPAL error %d setting up mapping"
402 " for IO seg# %d\n",
403 pci_name(dev), rc, i);
404 /* XXX DO SOMETHING TO DISABLE DEVICE ? */
405 break;
406 }
407 pos += phb->ioda.io_segsize;
408 i++;
409 };
410 }
411
412 /* Setup M32 segments */
413 if (m32_res.start < m32_res.end) {
414 pcibios_resource_to_bus(dev, &region, &m32_res);
415 pos = region.start;
416 i = pos / phb->ioda.m32_segsize;
417 while(i < phb->ioda.total_pe && pos <= region.end) {
418 if (phb->ioda.m32_segmap[i]) {
419 pr_err("%s: Trying to use M32 seg #%d which is"
420 " already used by PE# %d\n",
421 pci_name(dev), i,
422 phb->ioda.m32_segmap[i]);
423 /* XXX DO SOMETHING TO DISABLE DEVICE ? */
424 break;
425 }
426 phb->ioda.m32_segmap[i] = pe;
427 rc = opal_pci_map_pe_mmio_window(phb->opal_id, pe,
428 OPAL_M32_WINDOW_TYPE,
429 0, i);
430 if (rc != OPAL_SUCCESS) {
431 pr_err("%s: OPAL error %d setting up mapping"
432 " for M32 seg# %d\n",
433 pci_name(dev), rc, i);
434 /* XXX DO SOMETHING TO DISABLE DEVICE ? */
435 break;
436 }
437 pos += phb->ioda.m32_segsize;
438 i++;
439 }
440 }
441}
442
443/* Check if a resource still fits in the total IO or M32 range
444 * for a given PHB
445 */
446static int __devinit pnv_ioda_resource_fit(struct pci_controller *hose,
447 struct resource *r)
448{
449 struct resource *bounds;
450
451 if (r->flags & IORESOURCE_IO)
452 bounds = &hose->io_resource;
453 else if (r->flags & IORESOURCE_MEM)
454 bounds = &hose->mem_resources[0];
455 else
456 return 1;
457
458 if (r->start >= bounds->start && r->end <= bounds->end)
459 return 1;
460 r->flags = 0;
461 return 0;
462}
463
464static void __devinit pnv_ioda_update_resources(struct pci_bus *bus)
465{
466 struct pci_controller *hose = pci_bus_to_host(bus);
467 struct pci_bus *cbus;
468 struct pci_dev *cdev;
469 unsigned int i;
470 u16 cmd;
471
472 /* Clear all device enables */
473 list_for_each_entry(cdev, &bus->devices, bus_list) {
474 pci_read_config_word(cdev, PCI_COMMAND, &cmd);
475 cmd &= ~(PCI_COMMAND_IO|PCI_COMMAND_MEMORY|PCI_COMMAND_MASTER);
476 pci_write_config_word(cdev, PCI_COMMAND, cmd);
477 }
478
479 /* Check if bus resources fit in our IO or M32 range */
480 for (i = 0; bus->self && (i < 2); i++) {
481 struct resource *r = bus->resource[i];
482 if (r && !pnv_ioda_resource_fit(hose, r))
483 pr_err("%s: Bus %d resource %d disabled, no room\n",
484 pci_name(bus->self), bus->number, i);
485 }
486
487 /* Update self if it's not a PHB */
488 if (bus->self)
489 pci_setup_bridge(bus);
490
491 /* Update child devices */
492 list_for_each_entry(cdev, &bus->devices, bus_list) {
493 /* Check if resource fits, if not, disabled it */
494 for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
495 struct resource *r = &cdev->resource[i];
496 if (!pnv_ioda_resource_fit(hose, r))
497 pr_err("%s: Resource %d disabled, no room\n",
498 pci_name(cdev), i);
499 }
500
501 /* Assign segments */
502 pnv_ioda_setup_pe_segments(cdev);
503
504 /* Update HW BARs */
505 for (i = 0; i <= PCI_ROM_RESOURCE; i++)
506 pci_update_resource(cdev, i);
507 }
508
509 /* Update child busses */
510 list_for_each_entry(cbus, &bus->children, node)
511 pnv_ioda_update_resources(cbus);
512}
513
514static int __devinit pnv_ioda_alloc_pe(struct pnv_phb *phb)
515{
516 unsigned long pe;
517
518 do {
519 pe = find_next_zero_bit(phb->ioda.pe_alloc,
520 phb->ioda.total_pe, 0);
521 if (pe >= phb->ioda.total_pe)
522 return IODA_INVALID_PE;
523 } while(test_and_set_bit(pe, phb->ioda.pe_alloc));
524
525 phb->ioda.pe_array[pe].pe_number = pe;
526 return pe;
527}
528
529static void __devinit pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
530{
531 WARN_ON(phb->ioda.pe_array[pe].pdev);
532
533 memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe));
534 clear_bit(pe, phb->ioda.pe_alloc);
535}
536
537/* Currently those 2 are only used when MSIs are enabled, this will change
538 * but in the meantime, we need to protect them to avoid warnings
539 */
540#ifdef CONFIG_PCI_MSI
541static struct pnv_ioda_pe * __devinit __pnv_ioda_get_one_pe(struct pci_dev *dev)
542{
543 struct pci_controller *hose = pci_bus_to_host(dev->bus);
544 struct pnv_phb *phb = hose->private_data;
545 struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
546
547 if (!pdn)
548 return NULL;
549 if (pdn->pe_number == IODA_INVALID_PE)
550 return NULL;
551 return &phb->ioda.pe_array[pdn->pe_number];
552}
553
554static struct pnv_ioda_pe * __devinit pnv_ioda_get_pe(struct pci_dev *dev)
555{
556 struct pnv_ioda_pe *pe = __pnv_ioda_get_one_pe(dev);
557
558 while (!pe && dev->bus->self) {
559 dev = dev->bus->self;
560 pe = __pnv_ioda_get_one_pe(dev);
561 if (pe)
562 pe = pe->bus_pe;
563 }
564 return pe;
565}
566#endif /* CONFIG_PCI_MSI */
567
568static int __devinit pnv_ioda_configure_pe(struct pnv_phb *phb,
569 struct pnv_ioda_pe *pe)
570{
571 struct pci_dev *parent;
572 uint8_t bcomp, dcomp, fcomp;
573 long rc, rid_end, rid;
574
575 /* Bus validation ? */
576 if (pe->pbus) {
577 int count;
578
579 dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
580 fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
581 parent = pe->pbus->self;
582 count = pe->pbus->subordinate - pe->pbus->secondary + 1;
583 switch(count) {
584 case 1: bcomp = OpalPciBusAll; break;
585 case 2: bcomp = OpalPciBus7Bits; break;
586 case 4: bcomp = OpalPciBus6Bits; break;
587 case 8: bcomp = OpalPciBus5Bits; break;
588 case 16: bcomp = OpalPciBus4Bits; break;
589 case 32: bcomp = OpalPciBus3Bits; break;
590 default:
591 pr_err("%s: Number of subordinate busses %d"
592 " unsupported\n",
593 pci_name(pe->pbus->self), count);
594 /* Do an exact match only */
595 bcomp = OpalPciBusAll;
596 }
597 rid_end = pe->rid + (count << 8);
598 } else {
599 parent = pe->pdev->bus->self;
600 bcomp = OpalPciBusAll;
601 dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
602 fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
603 rid_end = pe->rid + 1;
604 }
605
606 /* Associate PE in PELT */
607 rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
608 bcomp, dcomp, fcomp, OPAL_MAP_PE);
609 if (rc) {
610 pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
611 return -ENXIO;
612 }
613 opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
614 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
615
616 /* Add to all parents PELT-V */
617 while (parent) {
618 struct pci_dn *pdn = pnv_ioda_get_pdn(parent);
619 if (pdn && pdn->pe_number != IODA_INVALID_PE) {
620 rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
621 pe->pe_number, 1);
622 /* XXX What to do in case of error ? */
623 }
624 parent = parent->bus->self;
625 }
626 /* Setup reverse map */
627 for (rid = pe->rid; rid < rid_end; rid++)
628 phb->ioda.pe_rmap[rid] = pe->pe_number;
629
630 /* Setup one MVTs on IODA1 */
631 if (phb->type == PNV_PHB_IODA1) {
632 pe->mve_number = pe->pe_number;
633 rc = opal_pci_set_mve(phb->opal_id, pe->mve_number,
634 pe->pe_number);
635 if (rc) {
636 pe_err(pe, "OPAL error %ld setting up MVE %d\n",
637 rc, pe->mve_number);
638 pe->mve_number = -1;
639 } else {
640 rc = opal_pci_set_mve_enable(phb->opal_id,
641 pe->mve_number, 1);
642 if (rc) {
643 pe_err(pe, "OPAL error %ld enabling MVE %d\n",
644 rc, pe->mve_number);
645 pe->mve_number = -1;
646 }
647 }
648 } else if (phb->type == PNV_PHB_IODA2)
649 pe->mve_number = 0;
650
651 return 0;
652}
653
654static void __devinit pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
655 struct pnv_ioda_pe *pe)
656{
657 struct pnv_ioda_pe *lpe;
658
659 list_for_each_entry(lpe, &phb->ioda.pe_list, link) {
660 if (lpe->dma_weight < pe->dma_weight) {
661 list_add_tail(&pe->link, &lpe->link);
662 return;
663 }
664 }
665 list_add_tail(&pe->link, &phb->ioda.pe_list);
666}
667
668static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
669{
670 /* This is quite simplistic. The "base" weight of a device
671 * is 10. 0 means no DMA is to be accounted for it.
672 */
673
674 /* If it's a bridge, no DMA */
675 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
676 return 0;
677
678 /* Reduce the weight of slow USB controllers */
679 if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
680 dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
681 dev->class == PCI_CLASS_SERIAL_USB_EHCI)
682 return 3;
683
684 /* Increase the weight of RAID (includes Obsidian) */
685 if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
686 return 15;
687
688 /* Default */
689 return 10;
690}
691
692static struct pnv_ioda_pe * __devinit pnv_ioda_setup_dev_PE(struct pci_dev *dev)
693{
694 struct pci_controller *hose = pci_bus_to_host(dev->bus);
695 struct pnv_phb *phb = hose->private_data;
696 struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
697 struct pnv_ioda_pe *pe;
698 int pe_num;
699
700 if (!pdn) {
701 pr_err("%s: Device tree node not associated properly\n",
702 pci_name(dev));
703 return NULL;
704 }
705 if (pdn->pe_number != IODA_INVALID_PE)
706 return NULL;
707
708 /* PE#0 has been pre-set */
709 if (dev->bus->number == 0)
710 pe_num = 0;
711 else
712 pe_num = pnv_ioda_alloc_pe(phb);
713 if (pe_num == IODA_INVALID_PE) {
714 pr_warning("%s: Not enough PE# available, disabling device\n",
715 pci_name(dev));
716 return NULL;
717 }
718
719 /* NOTE: We get only one ref to the pci_dev for the pdn, not for the
720 * pointer in the PE data structure, both should be destroyed at the
721 * same time. However, this needs to be looked at more closely again
722 * once we actually start removing things (Hotplug, SR-IOV, ...)
723 *
724 * At some point we want to remove the PDN completely anyways
725 */
726 pe = &phb->ioda.pe_array[pe_num];
727 pci_dev_get(dev);
728 pdn->pcidev = dev;
729 pdn->pe_number = pe_num;
730 pe->pdev = dev;
731 pe->pbus = NULL;
732 pe->tce32_seg = -1;
733 pe->mve_number = -1;
734 pe->rid = dev->bus->number << 8 | pdn->devfn;
735
736 pe_info(pe, "Associated device to PE\n");
737
738 if (pnv_ioda_configure_pe(phb, pe)) {
739 /* XXX What do we do here ? */
740 if (pe_num)
741 pnv_ioda_free_pe(phb, pe_num);
742 pdn->pe_number = IODA_INVALID_PE;
743 pe->pdev = NULL;
744 pci_dev_put(dev);
745 return NULL;
746 }
747
748 /* Assign a DMA weight to the device */
749 pe->dma_weight = pnv_ioda_dma_weight(dev);
750 if (pe->dma_weight != 0) {
751 phb->ioda.dma_weight += pe->dma_weight;
752 phb->ioda.dma_pe_count++;
753 }
754
755 /* Link the PE */
756 pnv_ioda_link_pe_by_weight(phb, pe);
757
758 return pe;
759}
760
761static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
762{
763 struct pci_dev *dev;
764
765 list_for_each_entry(dev, &bus->devices, bus_list) {
766 struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
767
768 if (pdn == NULL) {
769 pr_warn("%s: No device node associated with device !\n",
770 pci_name(dev));
771 continue;
772 }
773 pci_dev_get(dev);
774 pdn->pcidev = dev;
775 pdn->pe_number = pe->pe_number;
776 pe->dma_weight += pnv_ioda_dma_weight(dev);
777 if (dev->subordinate)
778 pnv_ioda_setup_same_PE(dev->subordinate, pe);
779 }
780}
781
782static void __devinit pnv_ioda_setup_bus_PE(struct pci_dev *dev,
783 struct pnv_ioda_pe *ppe)
784{
785 struct pci_controller *hose = pci_bus_to_host(dev->bus);
786 struct pnv_phb *phb = hose->private_data;
787 struct pci_bus *bus = dev->subordinate;
788 struct pnv_ioda_pe *pe;
789 int pe_num;
790
791 if (!bus) {
792 pr_warning("%s: Bridge without a subordinate bus !\n",
793 pci_name(dev));
794 return;
795 }
796 pe_num = pnv_ioda_alloc_pe(phb);
797 if (pe_num == IODA_INVALID_PE) {
798 pr_warning("%s: Not enough PE# available, disabling bus\n",
799 pci_name(dev));
800 return;
801 }
802
803 pe = &phb->ioda.pe_array[pe_num];
804 ppe->bus_pe = pe;
805 pe->pbus = bus;
806 pe->pdev = NULL;
807 pe->tce32_seg = -1;
808 pe->mve_number = -1;
809 pe->rid = bus->secondary << 8;
810 pe->dma_weight = 0;
811
812 pe_info(pe, "Secondary busses %d..%d associated with PE\n",
813 bus->secondary, bus->subordinate);
814
815 if (pnv_ioda_configure_pe(phb, pe)) {
816 /* XXX What do we do here ? */
817 if (pe_num)
818 pnv_ioda_free_pe(phb, pe_num);
819 pe->pbus = NULL;
820 return;
821 }
822
823 /* Associate it with all child devices */
824 pnv_ioda_setup_same_PE(bus, pe);
825
826 /* Account for one DMA PE if at least one DMA capable device exist
827 * below the bridge
828 */
829 if (pe->dma_weight != 0) {
830 phb->ioda.dma_weight += pe->dma_weight;
831 phb->ioda.dma_pe_count++;
832 }
833
834 /* Link the PE */
835 pnv_ioda_link_pe_by_weight(phb, pe);
836}
837
838static void __devinit pnv_ioda_setup_PEs(struct pci_bus *bus)
839{
840 struct pci_dev *dev;
841 struct pnv_ioda_pe *pe;
842
843 list_for_each_entry(dev, &bus->devices, bus_list) {
844 pe = pnv_ioda_setup_dev_PE(dev);
845 if (pe == NULL)
846 continue;
847 /* Leaving the PCIe domain ... single PE# */
848 if (dev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
849 pnv_ioda_setup_bus_PE(dev, pe);
850 else if (dev->subordinate)
851 pnv_ioda_setup_PEs(dev->subordinate);
852 }
853}
854
855static void __devinit pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb,
856 struct pci_dev *dev)
857{
858 /* We delay DMA setup after we have assigned all PE# */
859}
860
861static void __devinit pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
862 struct pci_bus *bus)
863{
864 struct pci_dev *dev;
865
866 list_for_each_entry(dev, &bus->devices, bus_list) {
867 set_iommu_table_base(&dev->dev, &pe->tce32_table);
868 if (dev->subordinate)
869 pnv_ioda_setup_bus_dma(pe, dev->subordinate);
870 }
871}
872
873static void __devinit pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
874 struct pnv_ioda_pe *pe,
875 unsigned int base,
876 unsigned int segs)
877{
878
879 struct page *tce_mem = NULL;
880 const __be64 *swinvp;
881 struct iommu_table *tbl;
882 unsigned int i;
883 int64_t rc;
884 void *addr;
885
886 /* 256M DMA window, 4K TCE pages, 8 bytes TCE */
887#define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8)
888
889 /* XXX FIXME: Handle 64-bit only DMA devices */
890 /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
891 /* XXX FIXME: Allocate multi-level tables on PHB3 */
892
893 /* We shouldn't already have a 32-bit DMA associated */
894 if (WARN_ON(pe->tce32_seg >= 0))
895 return;
896
897 /* Grab a 32-bit TCE table */
898 pe->tce32_seg = base;
899 pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
900 (base << 28), ((base + segs) << 28) - 1);
901
902 /* XXX Currently, we allocate one big contiguous table for the
903 * TCEs. We only really need one chunk per 256M of TCE space
904 * (ie per segment) but that's an optimization for later, it
905 * requires some added smarts with our get/put_tce implementation
906 */
907 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
908 get_order(TCE32_TABLE_SIZE * segs));
909 if (!tce_mem) {
910 pe_err(pe, " Failed to allocate a 32-bit TCE memory\n");
911 goto fail;
912 }
913 addr = page_address(tce_mem);
914 memset(addr, 0, TCE32_TABLE_SIZE * segs);
915
916 /* Configure HW */
917 for (i = 0; i < segs; i++) {
918 rc = opal_pci_map_pe_dma_window(phb->opal_id,
919 pe->pe_number,
920 base + i, 1,
921 __pa(addr) + TCE32_TABLE_SIZE * i,
922 TCE32_TABLE_SIZE, 0x1000);
923 if (rc) {
924 pe_err(pe, " Failed to configure 32-bit TCE table,"
925 " err %ld\n", rc);
926 goto fail;
927 }
928 }
929
930 /* Setup linux iommu table */
931 tbl = &pe->tce32_table;
932 pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
933 base << 28);
934
935 /* OPAL variant of P7IOC SW invalidated TCEs */
936 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
937 if (swinvp) {
938 /* We need a couple more fields -- an address and a data
939 * to or. Since the bus is only printed out on table free
940 * errors, and on the first pass the data will be a relative
941 * bus number, print that out instead.
942 */
943 tbl->it_busno = 0;
944 tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
945 tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE
946 | TCE_PCI_SWINV_PAIR;
947 }
948 iommu_init_table(tbl, phb->hose->node);
949
950 if (pe->pdev)
951 set_iommu_table_base(&pe->pdev->dev, tbl);
952 else
953 pnv_ioda_setup_bus_dma(pe, pe->pbus);
954
955 return;
956 fail:
957 /* XXX Failure: Try to fallback to 64-bit only ? */
958 if (pe->tce32_seg >= 0)
959 pe->tce32_seg = -1;
960 if (tce_mem)
961 __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
962}
963
964static void __devinit pnv_ioda_setup_dma(struct pnv_phb *phb)
965{
966 struct pci_controller *hose = phb->hose;
967 unsigned int residual, remaining, segs, tw, base;
968 struct pnv_ioda_pe *pe;
969
970 /* If we have more PE# than segments available, hand out one
971 * per PE until we run out and let the rest fail. If not,
972 * then we assign at least one segment per PE, plus more based
973 * on the amount of devices under that PE
974 */
975 if (phb->ioda.dma_pe_count > phb->ioda.tce32_count)
976 residual = 0;
977 else
978 residual = phb->ioda.tce32_count -
979 phb->ioda.dma_pe_count;
980
981 pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n",
982 hose->global_number, phb->ioda.tce32_count);
983 pr_info("PCI: %d PE# for a total weight of %d\n",
984 phb->ioda.dma_pe_count, phb->ioda.dma_weight);
985
986 /* Walk our PE list and configure their DMA segments, hand them
987 * out one base segment plus any residual segments based on
988 * weight
989 */
990 remaining = phb->ioda.tce32_count;
991 tw = phb->ioda.dma_weight;
992 base = 0;
993 list_for_each_entry(pe, &phb->ioda.pe_list, link) {
994 if (!pe->dma_weight)
995 continue;
996 if (!remaining) {
997 pe_warn(pe, "No DMA32 resources available\n");
998 continue;
999 }
1000 segs = 1;
1001 if (residual) {
1002 segs += ((pe->dma_weight * residual) + (tw / 2)) / tw;
1003 if (segs > remaining)
1004 segs = remaining;
1005 }
1006 pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
1007 pe->dma_weight, segs);
1008 pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
1009 remaining -= segs;
1010 base += segs;
1011 }
1012}
1013
1014#ifdef CONFIG_PCI_MSI
1015static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
1016 unsigned int hwirq, unsigned int is_64,
1017 struct msi_msg *msg)
1018{
1019 struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
1020 unsigned int xive_num = hwirq - phb->msi_base;
1021 uint64_t addr64;
1022 uint32_t addr32, data;
1023 int rc;
1024
1025 /* No PE assigned ? bail out ... no MSI for you ! */
1026 if (pe == NULL)
1027 return -ENXIO;
1028
1029 /* Check if we have an MVE */
1030 if (pe->mve_number < 0)
1031 return -ENXIO;
1032
1033 /* Assign XIVE to PE */
1034 rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num);
1035 if (rc) {
1036 pr_warn("%s: OPAL error %d setting XIVE %d PE\n",
1037 pci_name(dev), rc, xive_num);
1038 return -EIO;
1039 }
1040
1041 if (is_64) {
1042 rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1,
1043 &addr64, &data);
1044 if (rc) {
1045 pr_warn("%s: OPAL error %d getting 64-bit MSI data\n",
1046 pci_name(dev), rc);
1047 return -EIO;
1048 }
1049 msg->address_hi = addr64 >> 32;
1050 msg->address_lo = addr64 & 0xfffffffful;
1051 } else {
1052 rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1,
1053 &addr32, &data);
1054 if (rc) {
1055 pr_warn("%s: OPAL error %d getting 32-bit MSI data\n",
1056 pci_name(dev), rc);
1057 return -EIO;
1058 }
1059 msg->address_hi = 0;
1060 msg->address_lo = addr32;
1061 }
1062 msg->data = data;
1063
1064 pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d),"
1065 " address=%x_%08x data=%x PE# %d\n",
1066 pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num,
1067 msg->address_hi, msg->address_lo, data, pe->pe_number);
1068
1069 return 0;
1070}
1071
1072static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
1073{
1074 unsigned int bmap_size;
1075 const __be32 *prop = of_get_property(phb->hose->dn,
1076 "ibm,opal-msi-ranges", NULL);
1077 if (!prop) {
1078 /* BML Fallback */
1079 prop = of_get_property(phb->hose->dn, "msi-ranges", NULL);
1080 }
1081 if (!prop)
1082 return;
1083
1084 phb->msi_base = be32_to_cpup(prop);
1085 phb->msi_count = be32_to_cpup(prop + 1);
1086 bmap_size = BITS_TO_LONGS(phb->msi_count) * sizeof(unsigned long);
1087 phb->msi_map = zalloc_maybe_bootmem(bmap_size, GFP_KERNEL);
1088 if (!phb->msi_map) {
1089 pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
1090 phb->hose->global_number);
1091 return;
1092 }
1093 phb->msi_setup = pnv_pci_ioda_msi_setup;
1094 phb->msi32_support = 1;
1095 pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
1096 phb->msi_count, phb->msi_base);
1097}
1098#else
1099static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
1100#endif /* CONFIG_PCI_MSI */
1101
1102/* This is the starting point of our IODA specific resource
1103 * allocation process
1104 */
1105static void __devinit pnv_pci_ioda_fixup_phb(struct pci_controller *hose)
1106{
1107 resource_size_t size, align;
1108 struct pci_bus *child;
1109
1110 /* Associate PEs per functions */
1111 pnv_ioda_setup_PEs(hose->bus);
1112
1113 /* Calculate all resources */
1114 pnv_ioda_calc_bus(hose->bus, IORESOURCE_IO, &size, &align);
1115 pnv_ioda_calc_bus(hose->bus, IORESOURCE_MEM, &size, &align);
1116
1117 /* Apply then to HW */
1118 pnv_ioda_update_resources(hose->bus);
1119
1120 /* Setup DMA */
1121 pnv_ioda_setup_dma(hose->private_data);
1122
1123 /* Configure PCI Express settings */
1124 list_for_each_entry(child, &hose->bus->children, node) {
1125 struct pci_dev *self = child->self;
1126 if (!self)
1127 continue;
1128 pcie_bus_configure_settings(child, self->pcie_mpss);
1129 }
1130}
1131
1132/* Prevent enabling devices for which we couldn't properly
1133 * assign a PE
1134 */
1135static int __devinit pnv_pci_enable_device_hook(struct pci_dev *dev)
1136{
1137 struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
1138
1139 if (!pdn || pdn->pe_number == IODA_INVALID_PE)
1140 return -EINVAL;
1141 return 0;
1142}
1143
1144static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
1145 u32 devfn)
1146{
1147 return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
1148}
1149
1150void __init pnv_pci_init_ioda1_phb(struct device_node *np)
1151{
1152 struct pci_controller *hose;
1153 static int primary = 1;
1154 struct pnv_phb *phb;
1155 unsigned long size, m32map_off, iomap_off, pemap_off;
1156 const u64 *prop64;
1157 u64 phb_id;
1158 void *aux;
1159 long rc;
1160
1161 pr_info(" Initializing IODA OPAL PHB %s\n", np->full_name);
1162
1163 prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
1164 if (!prop64) {
1165 pr_err(" Missing \"ibm,opal-phbid\" property !\n");
1166 return;
1167 }
1168 phb_id = be64_to_cpup(prop64);
1169 pr_debug(" PHB-ID : 0x%016llx\n", phb_id);
1170
1171 phb = alloc_bootmem(sizeof(struct pnv_phb));
1172 if (phb) {
1173 memset(phb, 0, sizeof(struct pnv_phb));
1174 phb->hose = hose = pcibios_alloc_controller(np);
1175 }
1176 if (!phb || !phb->hose) {
1177 pr_err("PCI: Failed to allocate PCI controller for %s\n",
1178 np->full_name);
1179 return;
1180 }
1181
1182 spin_lock_init(&phb->lock);
1183 /* XXX Use device-tree */
1184 hose->first_busno = 0;
1185 hose->last_busno = 0xff;
1186 hose->private_data = phb;
1187 phb->opal_id = phb_id;
1188 phb->type = PNV_PHB_IODA1;
1189
1190 /* We parse "ranges" now since we need to deduce the register base
1191 * from the IO base
1192 */
1193 pci_process_bridge_OF_ranges(phb->hose, np, primary);
1194 primary = 0;
1195
1196 /* Magic formula from Milton */
1197 phb->regs = of_iomap(np, 0);
1198 if (phb->regs == NULL)
1199 pr_err(" Failed to map registers !\n");
1200
1201
1202 /* XXX This is hack-a-thon. This needs to be changed so that:
1203 * - we obtain stuff like PE# etc... from device-tree
1204 * - we properly re-allocate M32 ourselves
1205 * (the OFW one isn't very good)
1206 */
1207
1208 /* Initialize more IODA stuff */
1209 phb->ioda.total_pe = 128;
1210
1211 phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
1212 /* OFW Has already off top 64k of M32 space (MSI space) */
1213 phb->ioda.m32_size += 0x10000;
1214
1215 phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe;
1216 phb->ioda.m32_pci_base = hose->mem_resources[0].start -
1217 hose->pci_mem_offset;
1218 phb->ioda.io_size = hose->pci_io_size;
1219 phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe;
1220 phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
1221
1222 /* Allocate aux data & arrays */
1223 size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
1224 m32map_off = size;
1225 size += phb->ioda.total_pe;
1226 iomap_off = size;
1227 size += phb->ioda.total_pe;
1228 pemap_off = size;
1229 size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe);
1230 aux = alloc_bootmem(size);
1231 memset(aux, 0, size);
1232 phb->ioda.pe_alloc = aux;
1233 phb->ioda.m32_segmap = aux + m32map_off;
1234 phb->ioda.io_segmap = aux + iomap_off;
1235 phb->ioda.pe_array = aux + pemap_off;
1236 set_bit(0, phb->ioda.pe_alloc);
1237
1238 INIT_LIST_HEAD(&phb->ioda.pe_list);
1239
1240 /* Calculate how many 32-bit TCE segments we have */
1241 phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
1242
1243 /* Clear unusable m64 */
1244 hose->mem_resources[1].flags = 0;
1245 hose->mem_resources[1].start = 0;
1246 hose->mem_resources[1].end = 0;
1247 hose->mem_resources[2].flags = 0;
1248 hose->mem_resources[2].start = 0;
1249 hose->mem_resources[2].end = 0;
1250
1251#if 0
1252 rc = opal_pci_set_phb_mem_window(opal->phb_id,
1253 window_type,
1254 window_num,
1255 starting_real_address,
1256 starting_pci_address,
1257 segment_size);
1258#endif
1259
1260 pr_info(" %d PE's M32: 0x%x [segment=0x%x] IO: 0x%x [segment=0x%x]\n",
1261 phb->ioda.total_pe,
1262 phb->ioda.m32_size, phb->ioda.m32_segsize,
1263 phb->ioda.io_size, phb->ioda.io_segsize);
1264
1265 if (phb->regs) {
1266 pr_devel(" BUID = 0x%016llx\n", in_be64(phb->regs + 0x100));
1267 pr_devel(" PHB2_CR = 0x%016llx\n", in_be64(phb->regs + 0x160));
1268 pr_devel(" IO_BAR = 0x%016llx\n", in_be64(phb->regs + 0x170));
1269 pr_devel(" IO_BAMR = 0x%016llx\n", in_be64(phb->regs + 0x178));
1270 pr_devel(" IO_SAR = 0x%016llx\n", in_be64(phb->regs + 0x180));
1271 pr_devel(" M32_BAR = 0x%016llx\n", in_be64(phb->regs + 0x190));
1272 pr_devel(" M32_BAMR = 0x%016llx\n", in_be64(phb->regs + 0x198));
1273 pr_devel(" M32_SAR = 0x%016llx\n", in_be64(phb->regs + 0x1a0));
1274 }
1275 phb->hose->ops = &pnv_pci_ops;
1276
1277 /* Setup RID -> PE mapping function */
1278 phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
1279
1280 /* Setup TCEs */
1281 phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
1282
1283 /* Setup MSI support */
1284 pnv_pci_init_ioda_msis(phb);
1285
1286 /* We set both probe_only and PCI_REASSIGN_ALL_RSRC. This is an
1287 * odd combination which essentially means that we skip all resource
1288 * fixups and assignments in the generic code, and do it all
1289 * ourselves here
1290 */
1291 pci_probe_only = 1;
1292 ppc_md.pcibios_fixup_phb = pnv_pci_ioda_fixup_phb;
1293 ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
1294 pci_add_flags(PCI_REASSIGN_ALL_RSRC);
1295
1296 /* Reset IODA tables to a clean state */
1297 rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_RESET, OPAL_ASSERT_RESET);
1298 if (rc)
1299 pr_warning(" OPAL Error %ld performing IODA reset !\n", rc);
1300 opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE);
1301}
1302
1303void __init pnv_pci_init_ioda_hub(struct device_node *np)
1304{
1305 struct device_node *phbn;
1306 const u64 *prop64;
1307 u64 hub_id;
1308
1309 pr_info("Probing IODA IO-Hub %s\n", np->full_name);
1310
1311 prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
1312 if (!prop64) {
1313 pr_err(" Missing \"ibm,opal-hubid\" property !\n");
1314 return;
1315 }
1316 hub_id = be64_to_cpup(prop64);
1317 pr_devel(" HUB-ID : 0x%016llx\n", hub_id);
1318
1319 /* Count child PHBs */
1320 for_each_child_of_node(np, phbn) {
1321 /* Look for IODA1 PHBs */
1322 if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
1323 pnv_pci_init_ioda1_phb(phbn);
1324 }
1325}
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index baef772d41f4..c0ed379498a0 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -467,12 +467,24 @@ void __init pnv_pci_init(void)
467 init_pci_config_tokens(); 467 init_pci_config_tokens();
468 find_and_init_phbs(); 468 find_and_init_phbs();
469#endif /* CONFIG_PPC_POWERNV_RTAS */ 469#endif /* CONFIG_PPC_POWERNV_RTAS */
470 } else { 470 }
471 /* OPAL is here, do our normal stuff */ 471 /* OPAL is here, do our normal stuff */
472 else {
473 int found_ioda = 0;
474
475 /* Look for IODA IO-Hubs. We don't support mixing IODA
476 * and p5ioc2 due to the need to change some global
477 * probing flags
478 */
479 for_each_compatible_node(np, NULL, "ibm,ioda-hub") {
480 pnv_pci_init_ioda_hub(np);
481 found_ioda = 1;
482 }
472 483
473 /* Look for p5ioc2 IO-Hubs */ 484 /* Look for p5ioc2 IO-Hubs */
474 for_each_compatible_node(np, NULL, "ibm,p5ioc2") 485 if (!found_ioda)
475 pnv_pci_init_p5ioc2_hub(np); 486 for_each_compatible_node(np, NULL, "ibm,p5ioc2")
487 pnv_pci_init_p5ioc2_hub(np);
476 } 488 }
477 489
478 /* Setup the linkage between OF nodes and PHBs */ 490 /* Setup the linkage between OF nodes and PHBs */
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index d4dbc4950936..28ae4ca512c4 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -9,6 +9,50 @@ enum pnv_phb_type {
9 PNV_PHB_IODA2, 9 PNV_PHB_IODA2,
10}; 10};
11 11
12/* Data associated with a PE, including IOMMU tracking etc.. */
13struct pnv_ioda_pe {
14 /* A PE can be associated with a single device or an
15 * entire bus (& children). In the former case, pdev
16 * is populated, in the later case, pbus is.
17 */
18 struct pci_dev *pdev;
19 struct pci_bus *pbus;
20
21 /* Effective RID (device RID for a device PE and base bus
22 * RID with devfn 0 for a bus PE)
23 */
24 unsigned int rid;
25
26 /* PE number */
27 unsigned int pe_number;
28
29 /* "Weight" assigned to the PE for the sake of DMA resource
30 * allocations
31 */
32 unsigned int dma_weight;
33
34 /* This is a PCI-E -> PCI-X bridge, this points to the
35 * corresponding bus PE
36 */
37 struct pnv_ioda_pe *bus_pe;
38
39 /* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
40 int tce32_seg;
41 int tce32_segcount;
42 struct iommu_table tce32_table;
43
44 /* XXX TODO: Add support for additional 64-bit iommus */
45
46 /* MSIs. MVE index is identical for for 32 and 64 bit MSI
47 * and -1 if not supported. (It's actually identical to the
48 * PE number)
49 */
50 int mve_number;
51
52 /* Link in list of PE#s */
53 struct list_head link;
54};
55
12struct pnv_phb { 56struct pnv_phb {
13 struct pci_controller *hose; 57 struct pci_controller *hose;
14 enum pnv_phb_type type; 58 enum pnv_phb_type type;
@@ -34,6 +78,45 @@ struct pnv_phb {
34 struct { 78 struct {
35 struct iommu_table iommu_table; 79 struct iommu_table iommu_table;
36 } p5ioc2; 80 } p5ioc2;
81
82 struct {
83 /* Global bridge info */
84 unsigned int total_pe;
85 unsigned int m32_size;
86 unsigned int m32_segsize;
87 unsigned int m32_pci_base;
88 unsigned int io_size;
89 unsigned int io_segsize;
90 unsigned int io_pci_base;
91
92 /* PE allocation bitmap */
93 unsigned long *pe_alloc;
94
95 /* M32 & IO segment maps */
96 unsigned int *m32_segmap;
97 unsigned int *io_segmap;
98 struct pnv_ioda_pe *pe_array;
99
100 /* Reverse map of PEs, will have to extend if
101 * we are to support more than 256 PEs, indexed
102 * bus { bus, devfn }
103 */
104 unsigned char pe_rmap[0x10000];
105
106 /* 32-bit TCE tables allocation */
107 unsigned long tce32_count;
108
109 /* Total "weight" for the sake of DMA resources
110 * allocation
111 */
112 unsigned int dma_weight;
113 unsigned int dma_pe_count;
114
115 /* Sorted list of used PE's, sorted at
116 * boot for resource allocation purposes
117 */
118 struct list_head pe_list;
119 } ioda;
37 }; 120 };
38}; 121};
39 122
@@ -43,6 +126,7 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
43 void *tce_mem, u64 tce_size, 126 void *tce_mem, u64 tce_size,
44 u64 dma_offset); 127 u64 dma_offset);
45extern void pnv_pci_init_p5ioc2_hub(struct device_node *np); 128extern void pnv_pci_init_p5ioc2_hub(struct device_node *np);
129extern void pnv_pci_init_ioda_hub(struct device_node *np);
46 130
47 131
48#endif /* __POWERNV_PCI_H */ 132#endif /* __POWERNV_PCI_H */