aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms/powernv
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-01-06 20:58:22 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-06 20:58:22 -0500
commite4e88f31bcb5f05f24b9ae518d4ecb44e1a7774d (patch)
tree9eef6998f5bbd1a2c999011d9e0151f00c6e7297 /arch/powerpc/platforms/powernv
parent9753dfe19a85e7e45a34a56f4cb2048bb4f50e27 (diff)
parentef88e3911c0e0301e73fa3b3b2567aabdbe17cc4 (diff)
Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
* 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (185 commits) powerpc: fix compile error with 85xx/p1010rdb.c powerpc: fix compile error with 85xx/p1023_rds.c powerpc/fsl: add MSI support for the Freescale hypervisor arch/powerpc/sysdev/fsl_rmu.c: introduce missing kfree powerpc/fsl: Add support for Integrated Flash Controller powerpc/fsl: update compatiable on fsl 16550 uart nodes powerpc/85xx: fix PCI and localbus properties in p1022ds.dts powerpc/85xx: re-enable ePAPR byte channel driver in corenet32_smp_defconfig powerpc/fsl: Update defconfigs to enable some standard FSL HW features powerpc: Add TBI PHY node to first MDIO bus sbc834x: put full compat string in board match check powerpc/fsl-pci: Allow 64-bit PCIe devices to DMA to any memory address powerpc: Fix unpaired probe_hcall_entry and probe_hcall_exit offb: Fix setting of the pseudo-palette for >8bpp offb: Add palette hack for qemu "standard vga" framebuffer offb: Fix bug in calculating requested vram size powerpc/boot: Change the WARN to INFO for boot wrapper overlap message powerpc/44x: Fix build error on currituck platform powerpc/boot: Change the load address for the wrapper to fit the kernel powerpc/44x: Enable CRASH_DUMP for 440x ... Fix up a trivial conflict in arch/powerpc/include/asm/cputime.h due to the additional sparse-checking code for cputime_t.
Diffstat (limited to 'arch/powerpc/platforms/powernv')
-rw-r--r--arch/powerpc/platforms/powernv/Makefile2
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S8
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c1330
-rw-r--r--arch/powerpc/platforms/powernv/pci-p5ioc2.c1
-rw-r--r--arch/powerpc/platforms/powernv/pci.c228
-rw-r--r--arch/powerpc/platforms/powernv/pci.h100
-rw-r--r--arch/powerpc/platforms/powernv/smp.c2
7 files changed, 1644 insertions, 27 deletions
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 31853008b418..bcc3cb48a44e 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -2,4 +2,4 @@ obj-y += setup.o opal-takeover.o opal-wrappers.o opal.o
2obj-y += opal-rtc.o opal-nvram.o 2obj-y += opal-rtc.o opal-nvram.o
3 3
4obj-$(CONFIG_SMP) += smp.o 4obj-$(CONFIG_SMP) += smp.o
5obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o 5obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 4a3f46d8533e..3bb07e5e43cd 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -99,3 +99,11 @@ OPAL_CALL(opal_write_oppanel, OPAL_WRITE_OPPANEL);
99OPAL_CALL(opal_pci_map_pe_dma_window, OPAL_PCI_MAP_PE_DMA_WINDOW); 99OPAL_CALL(opal_pci_map_pe_dma_window, OPAL_PCI_MAP_PE_DMA_WINDOW);
100OPAL_CALL(opal_pci_map_pe_dma_window_real, OPAL_PCI_MAP_PE_DMA_WINDOW_REAL); 100OPAL_CALL(opal_pci_map_pe_dma_window_real, OPAL_PCI_MAP_PE_DMA_WINDOW_REAL);
101OPAL_CALL(opal_pci_reset, OPAL_PCI_RESET); 101OPAL_CALL(opal_pci_reset, OPAL_PCI_RESET);
102OPAL_CALL(opal_pci_get_hub_diag_data, OPAL_PCI_GET_HUB_DIAG_DATA);
103OPAL_CALL(opal_pci_get_phb_diag_data, OPAL_PCI_GET_PHB_DIAG_DATA);
104OPAL_CALL(opal_pci_fence_phb, OPAL_PCI_FENCE_PHB);
105OPAL_CALL(opal_pci_reinit, OPAL_PCI_REINIT);
106OPAL_CALL(opal_pci_mask_pe_error, OPAL_PCI_MASK_PE_ERROR);
107OPAL_CALL(opal_set_slot_led_status, OPAL_SET_SLOT_LED_STATUS);
108OPAL_CALL(opal_get_epow_status, OPAL_GET_EPOW_STATUS);
109OPAL_CALL(opal_set_system_attention_led, OPAL_SET_SYSTEM_ATTENTION_LED);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
new file mode 100644
index 000000000000..f31162cfdaa9
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -0,0 +1,1330 @@
1/*
2 * Support PCI/PCIe on PowerNV platforms
3 *
4 * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#undef DEBUG
13
14#include <linux/kernel.h>
15#include <linux/pci.h>
16#include <linux/delay.h>
17#include <linux/string.h>
18#include <linux/init.h>
19#include <linux/bootmem.h>
20#include <linux/irq.h>
21#include <linux/io.h>
22#include <linux/msi.h>
23
24#include <asm/sections.h>
25#include <asm/io.h>
26#include <asm/prom.h>
27#include <asm/pci-bridge.h>
28#include <asm/machdep.h>
29#include <asm/ppc-pci.h>
30#include <asm/opal.h>
31#include <asm/iommu.h>
32#include <asm/tce.h>
33#include <asm/abs_addr.h>
34
35#include "powernv.h"
36#include "pci.h"
37
38struct resource_wrap {
39 struct list_head link;
40 resource_size_t size;
41 resource_size_t align;
42 struct pci_dev *dev; /* Set if it's a device */
43 struct pci_bus *bus; /* Set if it's a bridge */
44};
45
46static int __pe_printk(const char *level, const struct pnv_ioda_pe *pe,
47 struct va_format *vaf)
48{
49 char pfix[32];
50
51 if (pe->pdev)
52 strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix));
53 else
54 sprintf(pfix, "%04x:%02x ",
55 pci_domain_nr(pe->pbus), pe->pbus->number);
56 return printk("pci %s%s: [PE# %.3d] %pV", level, pfix, pe->pe_number, vaf);
57}
58
59#define define_pe_printk_level(func, kern_level) \
60static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \
61{ \
62 struct va_format vaf; \
63 va_list args; \
64 int r; \
65 \
66 va_start(args, fmt); \
67 \
68 vaf.fmt = fmt; \
69 vaf.va = &args; \
70 \
71 r = __pe_printk(kern_level, pe, &vaf); \
72 va_end(args); \
73 \
74 return r; \
75} \
76
77define_pe_printk_level(pe_err, KERN_ERR);
78define_pe_printk_level(pe_warn, KERN_WARNING);
79define_pe_printk_level(pe_info, KERN_INFO);
80
81
82/* Calculate resource usage & alignment requirement of a single
83 * device. This will also assign all resources within the device
84 * for a given type starting at 0 for the biggest one and then
85 * assigning in decreasing order of size.
86 */
87static void __devinit pnv_ioda_calc_dev(struct pci_dev *dev, unsigned int flags,
88 resource_size_t *size,
89 resource_size_t *align)
90{
91 resource_size_t start;
92 struct resource *r;
93 int i;
94
95 pr_devel(" -> CDR %s\n", pci_name(dev));
96
97 *size = *align = 0;
98
99 /* Clear the resources out and mark them all unset */
100 for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
101 r = &dev->resource[i];
102 if (!(r->flags & flags))
103 continue;
104 if (r->start) {
105 r->end -= r->start;
106 r->start = 0;
107 }
108 r->flags |= IORESOURCE_UNSET;
109 }
110
111 /* We currently keep all memory resources together, we
112 * will handle prefetch & 64-bit separately in the future
113 * but for now we stick everybody in M32
114 */
115 start = 0;
116 for (;;) {
117 resource_size_t max_size = 0;
118 int max_no = -1;
119
120 /* Find next biggest resource */
121 for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
122 r = &dev->resource[i];
123 if (!(r->flags & IORESOURCE_UNSET) ||
124 !(r->flags & flags))
125 continue;
126 if (resource_size(r) > max_size) {
127 max_size = resource_size(r);
128 max_no = i;
129 }
130 }
131 if (max_no < 0)
132 break;
133 r = &dev->resource[max_no];
134 if (max_size > *align)
135 *align = max_size;
136 *size += max_size;
137 r->start = start;
138 start += max_size;
139 r->end = r->start + max_size - 1;
140 r->flags &= ~IORESOURCE_UNSET;
141 pr_devel(" -> R%d %016llx..%016llx\n",
142 max_no, r->start, r->end);
143 }
144 pr_devel(" <- CDR %s size=%llx align=%llx\n",
145 pci_name(dev), *size, *align);
146}
147
148/* Allocate a resource "wrap" for a given device or bridge and
149 * insert it at the right position in the sorted list
150 */
151static void __devinit pnv_ioda_add_wrap(struct list_head *list,
152 struct pci_bus *bus,
153 struct pci_dev *dev,
154 resource_size_t size,
155 resource_size_t align)
156{
157 struct resource_wrap *w1, *w = kzalloc(sizeof(*w), GFP_KERNEL);
158
159 w->size = size;
160 w->align = align;
161 w->dev = dev;
162 w->bus = bus;
163
164 list_for_each_entry(w1, list, link) {
165 if (w1->align < align) {
166 list_add_tail(&w->link, &w1->link);
167 return;
168 }
169 }
170 list_add_tail(&w->link, list);
171}
172
173/* Offset device resources of a given type */
174static void __devinit pnv_ioda_offset_dev(struct pci_dev *dev,
175 unsigned int flags,
176 resource_size_t offset)
177{
178 struct resource *r;
179 int i;
180
181 pr_devel(" -> ODR %s [%x] +%016llx\n", pci_name(dev), flags, offset);
182
183 for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
184 r = &dev->resource[i];
185 if (r->flags & flags) {
186 dev->resource[i].start += offset;
187 dev->resource[i].end += offset;
188 }
189 }
190
191 pr_devel(" <- ODR %s [%x] +%016llx\n", pci_name(dev), flags, offset);
192}
193
194/* Offset bus resources (& all children) of a given type */
195static void __devinit pnv_ioda_offset_bus(struct pci_bus *bus,
196 unsigned int flags,
197 resource_size_t offset)
198{
199 struct resource *r;
200 struct pci_dev *dev;
201 struct pci_bus *cbus;
202 int i;
203
204 pr_devel(" -> OBR %s [%x] +%016llx\n",
205 bus->self ? pci_name(bus->self) : "root", flags, offset);
206
207 for (i = 0; i < 2; i++) {
208 r = bus->resource[i];
209 if (r && (r->flags & flags)) {
210 bus->resource[i]->start += offset;
211 bus->resource[i]->end += offset;
212 }
213 }
214 list_for_each_entry(dev, &bus->devices, bus_list)
215 pnv_ioda_offset_dev(dev, flags, offset);
216 list_for_each_entry(cbus, &bus->children, node)
217 pnv_ioda_offset_bus(cbus, flags, offset);
218
219 pr_devel(" <- OBR %s [%x]\n",
220 bus->self ? pci_name(bus->self) : "root", flags);
221}
222
223/* This is the guts of our IODA resource allocation. This is called
224 * recursively for each bus in the system. It calculates all the
225 * necessary size and requirements for children and assign them
226 * resources such that:
227 *
228 * - Each function fits in it's own contiguous set of IO/M32
229 * segment
230 *
231 * - All segments behind a P2P bridge are contiguous and obey
232 * alignment constraints of those bridges
233 */
234static void __devinit pnv_ioda_calc_bus(struct pci_bus *bus, unsigned int flags,
235 resource_size_t *size,
236 resource_size_t *align)
237{
238 struct pci_controller *hose = pci_bus_to_host(bus);
239 struct pnv_phb *phb = hose->private_data;
240 resource_size_t dev_size, dev_align, start;
241 resource_size_t min_align, min_balign;
242 struct pci_dev *cdev;
243 struct pci_bus *cbus;
244 struct list_head head;
245 struct resource_wrap *w;
246 unsigned int bres;
247
248 *size = *align = 0;
249
250 pr_devel("-> CBR %s [%x]\n",
251 bus->self ? pci_name(bus->self) : "root", flags);
252
253 /* Calculate alignment requirements based on the type
254 * of resource we are working on
255 */
256 if (flags & IORESOURCE_IO) {
257 bres = 0;
258 min_align = phb->ioda.io_segsize;
259 min_balign = 0x1000;
260 } else {
261 bres = 1;
262 min_align = phb->ioda.m32_segsize;
263 min_balign = 0x100000;
264 }
265
266 /* Gather all our children resources ordered by alignment */
267 INIT_LIST_HEAD(&head);
268
269 /* - Busses */
270 list_for_each_entry(cbus, &bus->children, node) {
271 pnv_ioda_calc_bus(cbus, flags, &dev_size, &dev_align);
272 pnv_ioda_add_wrap(&head, cbus, NULL, dev_size, dev_align);
273 }
274
275 /* - Devices */
276 list_for_each_entry(cdev, &bus->devices, bus_list) {
277 pnv_ioda_calc_dev(cdev, flags, &dev_size, &dev_align);
278 /* Align them to segment size */
279 if (dev_align < min_align)
280 dev_align = min_align;
281 pnv_ioda_add_wrap(&head, NULL, cdev, dev_size, dev_align);
282 }
283 if (list_empty(&head))
284 goto empty;
285
286 /* Now we can do two things: assign offsets to them within that
287 * level and get our total alignment & size requirements. The
288 * assignment algorithm is going to be uber-trivial for now, we
289 * can try to be smarter later at filling out holes.
290 */
291 start = bus->self ? 0 : bus->resource[bres]->start;
292
293 /* Don't hand out IO 0 */
294 if ((flags & IORESOURCE_IO) && !bus->self)
295 start += 0x1000;
296
297 while(!list_empty(&head)) {
298 w = list_first_entry(&head, struct resource_wrap, link);
299 list_del(&w->link);
300 if (w->size) {
301 if (start) {
302 start = ALIGN(start, w->align);
303 if (w->dev)
304 pnv_ioda_offset_dev(w->dev,flags,start);
305 else if (w->bus)
306 pnv_ioda_offset_bus(w->bus,flags,start);
307 }
308 if (w->align > *align)
309 *align = w->align;
310 }
311 start += w->size;
312 kfree(w);
313 }
314 *size = start;
315
316 /* Align and setup bridge resources */
317 *align = max_t(resource_size_t, *align,
318 max_t(resource_size_t, min_align, min_balign));
319 *size = ALIGN(*size,
320 max_t(resource_size_t, min_align, min_balign));
321 empty:
322 /* Only setup P2P's, not the PHB itself */
323 if (bus->self) {
324 WARN_ON(bus->resource[bres] == NULL);
325 bus->resource[bres]->start = 0;
326 bus->resource[bres]->flags = (*size) ? flags : 0;
327 bus->resource[bres]->end = (*size) ? (*size - 1) : 0;
328
329 /* Clear prefetch bus resources for now */
330 bus->resource[2]->flags = 0;
331 }
332
333 pr_devel("<- CBR %s [%x] *size=%016llx *align=%016llx\n",
334 bus->self ? pci_name(bus->self) : "root", flags,*size,*align);
335}
336
337static struct pci_dn *pnv_ioda_get_pdn(struct pci_dev *dev)
338{
339 struct device_node *np;
340
341 np = pci_device_to_OF_node(dev);
342 if (!np)
343 return NULL;
344 return PCI_DN(np);
345}
346
347static void __devinit pnv_ioda_setup_pe_segments(struct pci_dev *dev)
348{
349 struct pci_controller *hose = pci_bus_to_host(dev->bus);
350 struct pnv_phb *phb = hose->private_data;
351 struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
352 unsigned int pe, i;
353 resource_size_t pos;
354 struct resource io_res;
355 struct resource m32_res;
356 struct pci_bus_region region;
357 int rc;
358
359 /* Anything not referenced in the device-tree gets PE#0 */
360 pe = pdn ? pdn->pe_number : 0;
361
362 /* Calculate the device min/max */
363 io_res.start = m32_res.start = (resource_size_t)-1;
364 io_res.end = m32_res.end = 0;
365 io_res.flags = IORESOURCE_IO;
366 m32_res.flags = IORESOURCE_MEM;
367
368 for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
369 struct resource *r = NULL;
370 if (dev->resource[i].flags & IORESOURCE_IO)
371 r = &io_res;
372 if (dev->resource[i].flags & IORESOURCE_MEM)
373 r = &m32_res;
374 if (!r)
375 continue;
376 if (dev->resource[i].start < r->start)
377 r->start = dev->resource[i].start;
378 if (dev->resource[i].end > r->end)
379 r->end = dev->resource[i].end;
380 }
381
382 /* Setup IO segments */
383 if (io_res.start < io_res.end) {
384 pcibios_resource_to_bus(dev, &region, &io_res);
385 pos = region.start;
386 i = pos / phb->ioda.io_segsize;
387 while(i < phb->ioda.total_pe && pos <= region.end) {
388 if (phb->ioda.io_segmap[i]) {
389 pr_err("%s: Trying to use IO seg #%d which is"
390 " already used by PE# %d\n",
391 pci_name(dev), i,
392 phb->ioda.io_segmap[i]);
393 /* XXX DO SOMETHING TO DISABLE DEVICE ? */
394 break;
395 }
396 phb->ioda.io_segmap[i] = pe;
397 rc = opal_pci_map_pe_mmio_window(phb->opal_id, pe,
398 OPAL_IO_WINDOW_TYPE,
399 0, i);
400 if (rc != OPAL_SUCCESS) {
401 pr_err("%s: OPAL error %d setting up mapping"
402 " for IO seg# %d\n",
403 pci_name(dev), rc, i);
404 /* XXX DO SOMETHING TO DISABLE DEVICE ? */
405 break;
406 }
407 pos += phb->ioda.io_segsize;
408 i++;
409 };
410 }
411
412 /* Setup M32 segments */
413 if (m32_res.start < m32_res.end) {
414 pcibios_resource_to_bus(dev, &region, &m32_res);
415 pos = region.start;
416 i = pos / phb->ioda.m32_segsize;
417 while(i < phb->ioda.total_pe && pos <= region.end) {
418 if (phb->ioda.m32_segmap[i]) {
419 pr_err("%s: Trying to use M32 seg #%d which is"
420 " already used by PE# %d\n",
421 pci_name(dev), i,
422 phb->ioda.m32_segmap[i]);
423 /* XXX DO SOMETHING TO DISABLE DEVICE ? */
424 break;
425 }
426 phb->ioda.m32_segmap[i] = pe;
427 rc = opal_pci_map_pe_mmio_window(phb->opal_id, pe,
428 OPAL_M32_WINDOW_TYPE,
429 0, i);
430 if (rc != OPAL_SUCCESS) {
431 pr_err("%s: OPAL error %d setting up mapping"
432 " for M32 seg# %d\n",
433 pci_name(dev), rc, i);
434 /* XXX DO SOMETHING TO DISABLE DEVICE ? */
435 break;
436 }
437 pos += phb->ioda.m32_segsize;
438 i++;
439 }
440 }
441}
442
443/* Check if a resource still fits in the total IO or M32 range
444 * for a given PHB
445 */
446static int __devinit pnv_ioda_resource_fit(struct pci_controller *hose,
447 struct resource *r)
448{
449 struct resource *bounds;
450
451 if (r->flags & IORESOURCE_IO)
452 bounds = &hose->io_resource;
453 else if (r->flags & IORESOURCE_MEM)
454 bounds = &hose->mem_resources[0];
455 else
456 return 1;
457
458 if (r->start >= bounds->start && r->end <= bounds->end)
459 return 1;
460 r->flags = 0;
461 return 0;
462}
463
464static void __devinit pnv_ioda_update_resources(struct pci_bus *bus)
465{
466 struct pci_controller *hose = pci_bus_to_host(bus);
467 struct pci_bus *cbus;
468 struct pci_dev *cdev;
469 unsigned int i;
470
471 /* We used to clear all device enables here. However it looks like
472 * clearing MEM enable causes Obsidian (IPR SCS) to go bonkers,
473 * and shoot fatal errors to the PHB which in turns fences itself
474 * and we can't recover from that ... yet. So for now, let's leave
475 * the enables as-is and hope for the best.
476 */
477
478 /* Check if bus resources fit in our IO or M32 range */
479 for (i = 0; bus->self && (i < 2); i++) {
480 struct resource *r = bus->resource[i];
481 if (r && !pnv_ioda_resource_fit(hose, r))
482 pr_err("%s: Bus %d resource %d disabled, no room\n",
483 pci_name(bus->self), bus->number, i);
484 }
485
486 /* Update self if it's not a PHB */
487 if (bus->self)
488 pci_setup_bridge(bus);
489
490 /* Update child devices */
491 list_for_each_entry(cdev, &bus->devices, bus_list) {
492 /* Check if resource fits, if not, disabled it */
493 for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
494 struct resource *r = &cdev->resource[i];
495 if (!pnv_ioda_resource_fit(hose, r))
496 pr_err("%s: Resource %d disabled, no room\n",
497 pci_name(cdev), i);
498 }
499
500 /* Assign segments */
501 pnv_ioda_setup_pe_segments(cdev);
502
503 /* Update HW BARs */
504 for (i = 0; i <= PCI_ROM_RESOURCE; i++)
505 pci_update_resource(cdev, i);
506 }
507
508 /* Update child busses */
509 list_for_each_entry(cbus, &bus->children, node)
510 pnv_ioda_update_resources(cbus);
511}
512
513static int __devinit pnv_ioda_alloc_pe(struct pnv_phb *phb)
514{
515 unsigned long pe;
516
517 do {
518 pe = find_next_zero_bit(phb->ioda.pe_alloc,
519 phb->ioda.total_pe, 0);
520 if (pe >= phb->ioda.total_pe)
521 return IODA_INVALID_PE;
522 } while(test_and_set_bit(pe, phb->ioda.pe_alloc));
523
524 phb->ioda.pe_array[pe].pe_number = pe;
525 return pe;
526}
527
528static void __devinit pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
529{
530 WARN_ON(phb->ioda.pe_array[pe].pdev);
531
532 memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe));
533 clear_bit(pe, phb->ioda.pe_alloc);
534}
535
536/* Currently those 2 are only used when MSIs are enabled, this will change
537 * but in the meantime, we need to protect them to avoid warnings
538 */
539#ifdef CONFIG_PCI_MSI
540static struct pnv_ioda_pe * __devinit __pnv_ioda_get_one_pe(struct pci_dev *dev)
541{
542 struct pci_controller *hose = pci_bus_to_host(dev->bus);
543 struct pnv_phb *phb = hose->private_data;
544 struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
545
546 if (!pdn)
547 return NULL;
548 if (pdn->pe_number == IODA_INVALID_PE)
549 return NULL;
550 return &phb->ioda.pe_array[pdn->pe_number];
551}
552
553static struct pnv_ioda_pe * __devinit pnv_ioda_get_pe(struct pci_dev *dev)
554{
555 struct pnv_ioda_pe *pe = __pnv_ioda_get_one_pe(dev);
556
557 while (!pe && dev->bus->self) {
558 dev = dev->bus->self;
559 pe = __pnv_ioda_get_one_pe(dev);
560 if (pe)
561 pe = pe->bus_pe;
562 }
563 return pe;
564}
565#endif /* CONFIG_PCI_MSI */
566
567static int __devinit pnv_ioda_configure_pe(struct pnv_phb *phb,
568 struct pnv_ioda_pe *pe)
569{
570 struct pci_dev *parent;
571 uint8_t bcomp, dcomp, fcomp;
572 long rc, rid_end, rid;
573
574 /* Bus validation ? */
575 if (pe->pbus) {
576 int count;
577
578 dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
579 fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
580 parent = pe->pbus->self;
581 count = pe->pbus->subordinate - pe->pbus->secondary + 1;
582 switch(count) {
583 case 1: bcomp = OpalPciBusAll; break;
584 case 2: bcomp = OpalPciBus7Bits; break;
585 case 4: bcomp = OpalPciBus6Bits; break;
586 case 8: bcomp = OpalPciBus5Bits; break;
587 case 16: bcomp = OpalPciBus4Bits; break;
588 case 32: bcomp = OpalPciBus3Bits; break;
589 default:
590 pr_err("%s: Number of subordinate busses %d"
591 " unsupported\n",
592 pci_name(pe->pbus->self), count);
593 /* Do an exact match only */
594 bcomp = OpalPciBusAll;
595 }
596 rid_end = pe->rid + (count << 8);
597 } else {
598 parent = pe->pdev->bus->self;
599 bcomp = OpalPciBusAll;
600 dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
601 fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
602 rid_end = pe->rid + 1;
603 }
604
605 /* Associate PE in PELT */
606 rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
607 bcomp, dcomp, fcomp, OPAL_MAP_PE);
608 if (rc) {
609 pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
610 return -ENXIO;
611 }
612 opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
613 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
614
615 /* Add to all parents PELT-V */
616 while (parent) {
617 struct pci_dn *pdn = pnv_ioda_get_pdn(parent);
618 if (pdn && pdn->pe_number != IODA_INVALID_PE) {
619 rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
620 pe->pe_number, OPAL_ADD_PE_TO_DOMAIN);
621 /* XXX What to do in case of error ? */
622 }
623 parent = parent->bus->self;
624 }
625 /* Setup reverse map */
626 for (rid = pe->rid; rid < rid_end; rid++)
627 phb->ioda.pe_rmap[rid] = pe->pe_number;
628
629 /* Setup one MVTs on IODA1 */
630 if (phb->type == PNV_PHB_IODA1) {
631 pe->mve_number = pe->pe_number;
632 rc = opal_pci_set_mve(phb->opal_id, pe->mve_number,
633 pe->pe_number);
634 if (rc) {
635 pe_err(pe, "OPAL error %ld setting up MVE %d\n",
636 rc, pe->mve_number);
637 pe->mve_number = -1;
638 } else {
639 rc = opal_pci_set_mve_enable(phb->opal_id,
640 pe->mve_number, OPAL_ENABLE_MVE);
641 if (rc) {
642 pe_err(pe, "OPAL error %ld enabling MVE %d\n",
643 rc, pe->mve_number);
644 pe->mve_number = -1;
645 }
646 }
647 } else if (phb->type == PNV_PHB_IODA2)
648 pe->mve_number = 0;
649
650 return 0;
651}
652
653static void __devinit pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
654 struct pnv_ioda_pe *pe)
655{
656 struct pnv_ioda_pe *lpe;
657
658 list_for_each_entry(lpe, &phb->ioda.pe_list, link) {
659 if (lpe->dma_weight < pe->dma_weight) {
660 list_add_tail(&pe->link, &lpe->link);
661 return;
662 }
663 }
664 list_add_tail(&pe->link, &phb->ioda.pe_list);
665}
666
667static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
668{
669 /* This is quite simplistic. The "base" weight of a device
670 * is 10. 0 means no DMA is to be accounted for it.
671 */
672
673 /* If it's a bridge, no DMA */
674 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
675 return 0;
676
677 /* Reduce the weight of slow USB controllers */
678 if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
679 dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
680 dev->class == PCI_CLASS_SERIAL_USB_EHCI)
681 return 3;
682
683 /* Increase the weight of RAID (includes Obsidian) */
684 if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
685 return 15;
686
687 /* Default */
688 return 10;
689}
690
691static struct pnv_ioda_pe * __devinit pnv_ioda_setup_dev_PE(struct pci_dev *dev)
692{
693 struct pci_controller *hose = pci_bus_to_host(dev->bus);
694 struct pnv_phb *phb = hose->private_data;
695 struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
696 struct pnv_ioda_pe *pe;
697 int pe_num;
698
699 if (!pdn) {
700 pr_err("%s: Device tree node not associated properly\n",
701 pci_name(dev));
702 return NULL;
703 }
704 if (pdn->pe_number != IODA_INVALID_PE)
705 return NULL;
706
707 /* PE#0 has been pre-set */
708 if (dev->bus->number == 0)
709 pe_num = 0;
710 else
711 pe_num = pnv_ioda_alloc_pe(phb);
712 if (pe_num == IODA_INVALID_PE) {
713 pr_warning("%s: Not enough PE# available, disabling device\n",
714 pci_name(dev));
715 return NULL;
716 }
717
718 /* NOTE: We get only one ref to the pci_dev for the pdn, not for the
719 * pointer in the PE data structure, both should be destroyed at the
720 * same time. However, this needs to be looked at more closely again
721 * once we actually start removing things (Hotplug, SR-IOV, ...)
722 *
723 * At some point we want to remove the PDN completely anyways
724 */
725 pe = &phb->ioda.pe_array[pe_num];
726 pci_dev_get(dev);
727 pdn->pcidev = dev;
728 pdn->pe_number = pe_num;
729 pe->pdev = dev;
730 pe->pbus = NULL;
731 pe->tce32_seg = -1;
732 pe->mve_number = -1;
733 pe->rid = dev->bus->number << 8 | pdn->devfn;
734
735 pe_info(pe, "Associated device to PE\n");
736
737 if (pnv_ioda_configure_pe(phb, pe)) {
738 /* XXX What do we do here ? */
739 if (pe_num)
740 pnv_ioda_free_pe(phb, pe_num);
741 pdn->pe_number = IODA_INVALID_PE;
742 pe->pdev = NULL;
743 pci_dev_put(dev);
744 return NULL;
745 }
746
747 /* Assign a DMA weight to the device */
748 pe->dma_weight = pnv_ioda_dma_weight(dev);
749 if (pe->dma_weight != 0) {
750 phb->ioda.dma_weight += pe->dma_weight;
751 phb->ioda.dma_pe_count++;
752 }
753
754 /* Link the PE */
755 pnv_ioda_link_pe_by_weight(phb, pe);
756
757 return pe;
758}
759
760static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
761{
762 struct pci_dev *dev;
763
764 list_for_each_entry(dev, &bus->devices, bus_list) {
765 struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
766
767 if (pdn == NULL) {
768 pr_warn("%s: No device node associated with device !\n",
769 pci_name(dev));
770 continue;
771 }
772 pci_dev_get(dev);
773 pdn->pcidev = dev;
774 pdn->pe_number = pe->pe_number;
775 pe->dma_weight += pnv_ioda_dma_weight(dev);
776 if (dev->subordinate)
777 pnv_ioda_setup_same_PE(dev->subordinate, pe);
778 }
779}
780
781static void __devinit pnv_ioda_setup_bus_PE(struct pci_dev *dev,
782 struct pnv_ioda_pe *ppe)
783{
784 struct pci_controller *hose = pci_bus_to_host(dev->bus);
785 struct pnv_phb *phb = hose->private_data;
786 struct pci_bus *bus = dev->subordinate;
787 struct pnv_ioda_pe *pe;
788 int pe_num;
789
790 if (!bus) {
791 pr_warning("%s: Bridge without a subordinate bus !\n",
792 pci_name(dev));
793 return;
794 }
795 pe_num = pnv_ioda_alloc_pe(phb);
796 if (pe_num == IODA_INVALID_PE) {
797 pr_warning("%s: Not enough PE# available, disabling bus\n",
798 pci_name(dev));
799 return;
800 }
801
802 pe = &phb->ioda.pe_array[pe_num];
803 ppe->bus_pe = pe;
804 pe->pbus = bus;
805 pe->pdev = NULL;
806 pe->tce32_seg = -1;
807 pe->mve_number = -1;
808 pe->rid = bus->secondary << 8;
809 pe->dma_weight = 0;
810
811 pe_info(pe, "Secondary busses %d..%d associated with PE\n",
812 bus->secondary, bus->subordinate);
813
814 if (pnv_ioda_configure_pe(phb, pe)) {
815 /* XXX What do we do here ? */
816 if (pe_num)
817 pnv_ioda_free_pe(phb, pe_num);
818 pe->pbus = NULL;
819 return;
820 }
821
822 /* Associate it with all child devices */
823 pnv_ioda_setup_same_PE(bus, pe);
824
825 /* Account for one DMA PE if at least one DMA capable device exist
826 * below the bridge
827 */
828 if (pe->dma_weight != 0) {
829 phb->ioda.dma_weight += pe->dma_weight;
830 phb->ioda.dma_pe_count++;
831 }
832
833 /* Link the PE */
834 pnv_ioda_link_pe_by_weight(phb, pe);
835}
836
837static void __devinit pnv_ioda_setup_PEs(struct pci_bus *bus)
838{
839 struct pci_dev *dev;
840 struct pnv_ioda_pe *pe;
841
842 list_for_each_entry(dev, &bus->devices, bus_list) {
843 pe = pnv_ioda_setup_dev_PE(dev);
844 if (pe == NULL)
845 continue;
846 /* Leaving the PCIe domain ... single PE# */
847 if (dev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
848 pnv_ioda_setup_bus_PE(dev, pe);
849 else if (dev->subordinate)
850 pnv_ioda_setup_PEs(dev->subordinate);
851 }
852}
853
854static void __devinit pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb,
855 struct pci_dev *dev)
856{
857 /* We delay DMA setup after we have assigned all PE# */
858}
859
860static void __devinit pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
861 struct pci_bus *bus)
862{
863 struct pci_dev *dev;
864
865 list_for_each_entry(dev, &bus->devices, bus_list) {
866 set_iommu_table_base(&dev->dev, &pe->tce32_table);
867 if (dev->subordinate)
868 pnv_ioda_setup_bus_dma(pe, dev->subordinate);
869 }
870}
871
872static void __devinit pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
873 struct pnv_ioda_pe *pe,
874 unsigned int base,
875 unsigned int segs)
876{
877
878 struct page *tce_mem = NULL;
879 const __be64 *swinvp;
880 struct iommu_table *tbl;
881 unsigned int i;
882 int64_t rc;
883 void *addr;
884
885 /* 256M DMA window, 4K TCE pages, 8 bytes TCE */
886#define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8)
887
888 /* XXX FIXME: Handle 64-bit only DMA devices */
889 /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
890 /* XXX FIXME: Allocate multi-level tables on PHB3 */
891
892 /* We shouldn't already have a 32-bit DMA associated */
893 if (WARN_ON(pe->tce32_seg >= 0))
894 return;
895
896 /* Grab a 32-bit TCE table */
897 pe->tce32_seg = base;
898 pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
899 (base << 28), ((base + segs) << 28) - 1);
900
901 /* XXX Currently, we allocate one big contiguous table for the
902 * TCEs. We only really need one chunk per 256M of TCE space
903 * (ie per segment) but that's an optimization for later, it
904 * requires some added smarts with our get/put_tce implementation
905 */
906 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
907 get_order(TCE32_TABLE_SIZE * segs));
908 if (!tce_mem) {
909 pe_err(pe, " Failed to allocate a 32-bit TCE memory\n");
910 goto fail;
911 }
912 addr = page_address(tce_mem);
913 memset(addr, 0, TCE32_TABLE_SIZE * segs);
914
915 /* Configure HW */
916 for (i = 0; i < segs; i++) {
917 rc = opal_pci_map_pe_dma_window(phb->opal_id,
918 pe->pe_number,
919 base + i, 1,
920 __pa(addr) + TCE32_TABLE_SIZE * i,
921 TCE32_TABLE_SIZE, 0x1000);
922 if (rc) {
923 pe_err(pe, " Failed to configure 32-bit TCE table,"
924 " err %ld\n", rc);
925 goto fail;
926 }
927 }
928
929 /* Setup linux iommu table */
930 tbl = &pe->tce32_table;
931 pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
932 base << 28);
933
934 /* OPAL variant of P7IOC SW invalidated TCEs */
935 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
936 if (swinvp) {
937 /* We need a couple more fields -- an address and a data
938 * to or. Since the bus is only printed out on table free
939 * errors, and on the first pass the data will be a relative
940 * bus number, print that out instead.
941 */
942 tbl->it_busno = 0;
943 tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
944 tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE
945 | TCE_PCI_SWINV_PAIR;
946 }
947 iommu_init_table(tbl, phb->hose->node);
948
949 if (pe->pdev)
950 set_iommu_table_base(&pe->pdev->dev, tbl);
951 else
952 pnv_ioda_setup_bus_dma(pe, pe->pbus);
953
954 return;
955 fail:
956 /* XXX Failure: Try to fallback to 64-bit only ? */
957 if (pe->tce32_seg >= 0)
958 pe->tce32_seg = -1;
959 if (tce_mem)
960 __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
961}
962
963static void __devinit pnv_ioda_setup_dma(struct pnv_phb *phb)
964{
965 struct pci_controller *hose = phb->hose;
966 unsigned int residual, remaining, segs, tw, base;
967 struct pnv_ioda_pe *pe;
968
969 /* If we have more PE# than segments available, hand out one
970 * per PE until we run out and let the rest fail. If not,
971 * then we assign at least one segment per PE, plus more based
972 * on the amount of devices under that PE
973 */
974 if (phb->ioda.dma_pe_count > phb->ioda.tce32_count)
975 residual = 0;
976 else
977 residual = phb->ioda.tce32_count -
978 phb->ioda.dma_pe_count;
979
980 pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n",
981 hose->global_number, phb->ioda.tce32_count);
982 pr_info("PCI: %d PE# for a total weight of %d\n",
983 phb->ioda.dma_pe_count, phb->ioda.dma_weight);
984
985 /* Walk our PE list and configure their DMA segments, hand them
986 * out one base segment plus any residual segments based on
987 * weight
988 */
989 remaining = phb->ioda.tce32_count;
990 tw = phb->ioda.dma_weight;
991 base = 0;
992 list_for_each_entry(pe, &phb->ioda.pe_list, link) {
993 if (!pe->dma_weight)
994 continue;
995 if (!remaining) {
996 pe_warn(pe, "No DMA32 resources available\n");
997 continue;
998 }
999 segs = 1;
1000 if (residual) {
1001 segs += ((pe->dma_weight * residual) + (tw / 2)) / tw;
1002 if (segs > remaining)
1003 segs = remaining;
1004 }
1005 pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
1006 pe->dma_weight, segs);
1007 pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
1008 remaining -= segs;
1009 base += segs;
1010 }
1011}
1012
1013#ifdef CONFIG_PCI_MSI
1014static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
1015 unsigned int hwirq, unsigned int is_64,
1016 struct msi_msg *msg)
1017{
1018 struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
1019 unsigned int xive_num = hwirq - phb->msi_base;
1020 uint64_t addr64;
1021 uint32_t addr32, data;
1022 int rc;
1023
1024 /* No PE assigned ? bail out ... no MSI for you ! */
1025 if (pe == NULL)
1026 return -ENXIO;
1027
1028 /* Check if we have an MVE */
1029 if (pe->mve_number < 0)
1030 return -ENXIO;
1031
1032 /* Assign XIVE to PE */
1033 rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num);
1034 if (rc) {
1035 pr_warn("%s: OPAL error %d setting XIVE %d PE\n",
1036 pci_name(dev), rc, xive_num);
1037 return -EIO;
1038 }
1039
1040 if (is_64) {
1041 rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1,
1042 &addr64, &data);
1043 if (rc) {
1044 pr_warn("%s: OPAL error %d getting 64-bit MSI data\n",
1045 pci_name(dev), rc);
1046 return -EIO;
1047 }
1048 msg->address_hi = addr64 >> 32;
1049 msg->address_lo = addr64 & 0xfffffffful;
1050 } else {
1051 rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1,
1052 &addr32, &data);
1053 if (rc) {
1054 pr_warn("%s: OPAL error %d getting 32-bit MSI data\n",
1055 pci_name(dev), rc);
1056 return -EIO;
1057 }
1058 msg->address_hi = 0;
1059 msg->address_lo = addr32;
1060 }
1061 msg->data = data;
1062
1063 pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d),"
1064 " address=%x_%08x data=%x PE# %d\n",
1065 pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num,
1066 msg->address_hi, msg->address_lo, data, pe->pe_number);
1067
1068 return 0;
1069}
1070
1071static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
1072{
1073 unsigned int bmap_size;
1074 const __be32 *prop = of_get_property(phb->hose->dn,
1075 "ibm,opal-msi-ranges", NULL);
1076 if (!prop) {
1077 /* BML Fallback */
1078 prop = of_get_property(phb->hose->dn, "msi-ranges", NULL);
1079 }
1080 if (!prop)
1081 return;
1082
1083 phb->msi_base = be32_to_cpup(prop);
1084 phb->msi_count = be32_to_cpup(prop + 1);
1085 bmap_size = BITS_TO_LONGS(phb->msi_count) * sizeof(unsigned long);
1086 phb->msi_map = zalloc_maybe_bootmem(bmap_size, GFP_KERNEL);
1087 if (!phb->msi_map) {
1088 pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
1089 phb->hose->global_number);
1090 return;
1091 }
1092 phb->msi_setup = pnv_pci_ioda_msi_setup;
1093 phb->msi32_support = 1;
1094 pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
1095 phb->msi_count, phb->msi_base);
1096}
1097#else
1098static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
1099#endif /* CONFIG_PCI_MSI */
1100
1101/* This is the starting point of our IODA specific resource
1102 * allocation process
1103 */
1104static void __devinit pnv_pci_ioda_fixup_phb(struct pci_controller *hose)
1105{
1106 resource_size_t size, align;
1107 struct pci_bus *child;
1108
1109 /* Associate PEs per functions */
1110 pnv_ioda_setup_PEs(hose->bus);
1111
1112 /* Calculate all resources */
1113 pnv_ioda_calc_bus(hose->bus, IORESOURCE_IO, &size, &align);
1114 pnv_ioda_calc_bus(hose->bus, IORESOURCE_MEM, &size, &align);
1115
1116 /* Apply then to HW */
1117 pnv_ioda_update_resources(hose->bus);
1118
1119 /* Setup DMA */
1120 pnv_ioda_setup_dma(hose->private_data);
1121
1122 /* Configure PCI Express settings */
1123 list_for_each_entry(child, &hose->bus->children, node) {
1124 struct pci_dev *self = child->self;
1125 if (!self)
1126 continue;
1127 pcie_bus_configure_settings(child, self->pcie_mpss);
1128 }
1129}
1130
1131/* Prevent enabling devices for which we couldn't properly
1132 * assign a PE
1133 */
1134static int __devinit pnv_pci_enable_device_hook(struct pci_dev *dev)
1135{
1136 struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
1137
1138 if (!pdn || pdn->pe_number == IODA_INVALID_PE)
1139 return -EINVAL;
1140 return 0;
1141}
1142
1143static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
1144 u32 devfn)
1145{
1146 return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
1147}
1148
1149void __init pnv_pci_init_ioda1_phb(struct device_node *np)
1150{
1151 struct pci_controller *hose;
1152 static int primary = 1;
1153 struct pnv_phb *phb;
1154 unsigned long size, m32map_off, iomap_off, pemap_off;
1155 const u64 *prop64;
1156 u64 phb_id;
1157 void *aux;
1158 long rc;
1159
1160 pr_info(" Initializing IODA OPAL PHB %s\n", np->full_name);
1161
1162 prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
1163 if (!prop64) {
1164 pr_err(" Missing \"ibm,opal-phbid\" property !\n");
1165 return;
1166 }
1167 phb_id = be64_to_cpup(prop64);
1168 pr_debug(" PHB-ID : 0x%016llx\n", phb_id);
1169
1170 phb = alloc_bootmem(sizeof(struct pnv_phb));
1171 if (phb) {
1172 memset(phb, 0, sizeof(struct pnv_phb));
1173 phb->hose = hose = pcibios_alloc_controller(np);
1174 }
1175 if (!phb || !phb->hose) {
1176 pr_err("PCI: Failed to allocate PCI controller for %s\n",
1177 np->full_name);
1178 return;
1179 }
1180
1181 spin_lock_init(&phb->lock);
1182 /* XXX Use device-tree */
1183 hose->first_busno = 0;
1184 hose->last_busno = 0xff;
1185 hose->private_data = phb;
1186 phb->opal_id = phb_id;
1187 phb->type = PNV_PHB_IODA1;
1188
1189 /* Detect specific models for error handling */
1190 if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
1191 phb->model = PNV_PHB_MODEL_P7IOC;
1192 else
1193 phb->model = PNV_PHB_MODEL_UNKNOWN;
1194
1195 /* We parse "ranges" now since we need to deduce the register base
1196 * from the IO base
1197 */
1198 pci_process_bridge_OF_ranges(phb->hose, np, primary);
1199 primary = 0;
1200
1201 /* Magic formula from Milton */
1202 phb->regs = of_iomap(np, 0);
1203 if (phb->regs == NULL)
1204 pr_err(" Failed to map registers !\n");
1205
1206
1207 /* XXX This is hack-a-thon. This needs to be changed so that:
1208 * - we obtain stuff like PE# etc... from device-tree
1209 * - we properly re-allocate M32 ourselves
1210 * (the OFW one isn't very good)
1211 */
1212
1213 /* Initialize more IODA stuff */
1214 phb->ioda.total_pe = 128;
1215
1216 phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
1217 /* OFW Has already off top 64k of M32 space (MSI space) */
1218 phb->ioda.m32_size += 0x10000;
1219
1220 phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe;
1221 phb->ioda.m32_pci_base = hose->mem_resources[0].start -
1222 hose->pci_mem_offset;
1223 phb->ioda.io_size = hose->pci_io_size;
1224 phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe;
1225 phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
1226
1227 /* Allocate aux data & arrays */
1228 size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
1229 m32map_off = size;
1230 size += phb->ioda.total_pe;
1231 iomap_off = size;
1232 size += phb->ioda.total_pe;
1233 pemap_off = size;
1234 size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe);
1235 aux = alloc_bootmem(size);
1236 memset(aux, 0, size);
1237 phb->ioda.pe_alloc = aux;
1238 phb->ioda.m32_segmap = aux + m32map_off;
1239 phb->ioda.io_segmap = aux + iomap_off;
1240 phb->ioda.pe_array = aux + pemap_off;
1241 set_bit(0, phb->ioda.pe_alloc);
1242
1243 INIT_LIST_HEAD(&phb->ioda.pe_list);
1244
1245 /* Calculate how many 32-bit TCE segments we have */
1246 phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
1247
1248 /* Clear unusable m64 */
1249 hose->mem_resources[1].flags = 0;
1250 hose->mem_resources[1].start = 0;
1251 hose->mem_resources[1].end = 0;
1252 hose->mem_resources[2].flags = 0;
1253 hose->mem_resources[2].start = 0;
1254 hose->mem_resources[2].end = 0;
1255
1256#if 0
1257 rc = opal_pci_set_phb_mem_window(opal->phb_id,
1258 window_type,
1259 window_num,
1260 starting_real_address,
1261 starting_pci_address,
1262 segment_size);
1263#endif
1264
1265 pr_info(" %d PE's M32: 0x%x [segment=0x%x] IO: 0x%x [segment=0x%x]\n",
1266 phb->ioda.total_pe,
1267 phb->ioda.m32_size, phb->ioda.m32_segsize,
1268 phb->ioda.io_size, phb->ioda.io_segsize);
1269
1270 if (phb->regs) {
1271 pr_devel(" BUID = 0x%016llx\n", in_be64(phb->regs + 0x100));
1272 pr_devel(" PHB2_CR = 0x%016llx\n", in_be64(phb->regs + 0x160));
1273 pr_devel(" IO_BAR = 0x%016llx\n", in_be64(phb->regs + 0x170));
1274 pr_devel(" IO_BAMR = 0x%016llx\n", in_be64(phb->regs + 0x178));
1275 pr_devel(" IO_SAR = 0x%016llx\n", in_be64(phb->regs + 0x180));
1276 pr_devel(" M32_BAR = 0x%016llx\n", in_be64(phb->regs + 0x190));
1277 pr_devel(" M32_BAMR = 0x%016llx\n", in_be64(phb->regs + 0x198));
1278 pr_devel(" M32_SAR = 0x%016llx\n", in_be64(phb->regs + 0x1a0));
1279 }
1280 phb->hose->ops = &pnv_pci_ops;
1281
1282 /* Setup RID -> PE mapping function */
1283 phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
1284
1285 /* Setup TCEs */
1286 phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
1287
1288 /* Setup MSI support */
1289 pnv_pci_init_ioda_msis(phb);
1290
1291 /* We set both probe_only and PCI_REASSIGN_ALL_RSRC. This is an
1292 * odd combination which essentially means that we skip all resource
1293 * fixups and assignments in the generic code, and do it all
1294 * ourselves here
1295 */
1296 pci_probe_only = 1;
1297 ppc_md.pcibios_fixup_phb = pnv_pci_ioda_fixup_phb;
1298 ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
1299 pci_add_flags(PCI_REASSIGN_ALL_RSRC);
1300
1301 /* Reset IODA tables to a clean state */
1302 rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET);
1303 if (rc)
1304 pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc);
1305 opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE);
1306}
1307
1308void __init pnv_pci_init_ioda_hub(struct device_node *np)
1309{
1310 struct device_node *phbn;
1311 const u64 *prop64;
1312 u64 hub_id;
1313
1314 pr_info("Probing IODA IO-Hub %s\n", np->full_name);
1315
1316 prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
1317 if (!prop64) {
1318 pr_err(" Missing \"ibm,opal-hubid\" property !\n");
1319 return;
1320 }
1321 hub_id = be64_to_cpup(prop64);
1322 pr_devel(" HUB-ID : 0x%016llx\n", hub_id);
1323
1324 /* Count child PHBs */
1325 for_each_child_of_node(np, phbn) {
1326 /* Look for IODA1 PHBs */
1327 if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
1328 pnv_pci_init_ioda1_phb(phbn);
1329 }
1330}
diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
index 4c80f7c77d56..264967770c3a 100644
--- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c
+++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
@@ -137,6 +137,7 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np,
137 phb->hose->private_data = phb; 137 phb->hose->private_data = phb;
138 phb->opal_id = phb_id; 138 phb->opal_id = phb_id;
139 phb->type = PNV_PHB_P5IOC2; 139 phb->type = PNV_PHB_P5IOC2;
140 phb->model = PNV_PHB_MODEL_P5IOC2;
140 141
141 phb->regs = of_iomap(np, 0); 142 phb->regs = of_iomap(np, 0);
142 143
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 85bb66d7f933..a70bc1e385eb 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -144,6 +144,112 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev)
144} 144}
145#endif /* CONFIG_PCI_MSI */ 145#endif /* CONFIG_PCI_MSI */
146 146
147static void pnv_pci_dump_p7ioc_diag_data(struct pnv_phb *phb)
148{
149 struct OpalIoP7IOCPhbErrorData *data = &phb->diag.p7ioc;
150 int i;
151
152 pr_info("PHB %d diagnostic data:\n", phb->hose->global_number);
153
154 pr_info(" brdgCtl = 0x%08x\n", data->brdgCtl);
155
156 pr_info(" portStatusReg = 0x%08x\n", data->portStatusReg);
157 pr_info(" rootCmplxStatus = 0x%08x\n", data->rootCmplxStatus);
158 pr_info(" busAgentStatus = 0x%08x\n", data->busAgentStatus);
159
160 pr_info(" deviceStatus = 0x%08x\n", data->deviceStatus);
161 pr_info(" slotStatus = 0x%08x\n", data->slotStatus);
162 pr_info(" linkStatus = 0x%08x\n", data->linkStatus);
163 pr_info(" devCmdStatus = 0x%08x\n", data->devCmdStatus);
164 pr_info(" devSecStatus = 0x%08x\n", data->devSecStatus);
165
166 pr_info(" rootErrorStatus = 0x%08x\n", data->rootErrorStatus);
167 pr_info(" uncorrErrorStatus = 0x%08x\n", data->uncorrErrorStatus);
168 pr_info(" corrErrorStatus = 0x%08x\n", data->corrErrorStatus);
169 pr_info(" tlpHdr1 = 0x%08x\n", data->tlpHdr1);
170 pr_info(" tlpHdr2 = 0x%08x\n", data->tlpHdr2);
171 pr_info(" tlpHdr3 = 0x%08x\n", data->tlpHdr3);
172 pr_info(" tlpHdr4 = 0x%08x\n", data->tlpHdr4);
173 pr_info(" sourceId = 0x%08x\n", data->sourceId);
174
175 pr_info(" errorClass = 0x%016llx\n", data->errorClass);
176 pr_info(" correlator = 0x%016llx\n", data->correlator);
177
178 pr_info(" p7iocPlssr = 0x%016llx\n", data->p7iocPlssr);
179 pr_info(" p7iocCsr = 0x%016llx\n", data->p7iocCsr);
180 pr_info(" lemFir = 0x%016llx\n", data->lemFir);
181 pr_info(" lemErrorMask = 0x%016llx\n", data->lemErrorMask);
182 pr_info(" lemWOF = 0x%016llx\n", data->lemWOF);
183 pr_info(" phbErrorStatus = 0x%016llx\n", data->phbErrorStatus);
184 pr_info(" phbFirstErrorStatus = 0x%016llx\n", data->phbFirstErrorStatus);
185 pr_info(" phbErrorLog0 = 0x%016llx\n", data->phbErrorLog0);
186 pr_info(" phbErrorLog1 = 0x%016llx\n", data->phbErrorLog1);
187 pr_info(" mmioErrorStatus = 0x%016llx\n", data->mmioErrorStatus);
188 pr_info(" mmioFirstErrorStatus = 0x%016llx\n", data->mmioFirstErrorStatus);
189 pr_info(" mmioErrorLog0 = 0x%016llx\n", data->mmioErrorLog0);
190 pr_info(" mmioErrorLog1 = 0x%016llx\n", data->mmioErrorLog1);
191 pr_info(" dma0ErrorStatus = 0x%016llx\n", data->dma0ErrorStatus);
192 pr_info(" dma0FirstErrorStatus = 0x%016llx\n", data->dma0FirstErrorStatus);
193 pr_info(" dma0ErrorLog0 = 0x%016llx\n", data->dma0ErrorLog0);
194 pr_info(" dma0ErrorLog1 = 0x%016llx\n", data->dma0ErrorLog1);
195 pr_info(" dma1ErrorStatus = 0x%016llx\n", data->dma1ErrorStatus);
196 pr_info(" dma1FirstErrorStatus = 0x%016llx\n", data->dma1FirstErrorStatus);
197 pr_info(" dma1ErrorLog0 = 0x%016llx\n", data->dma1ErrorLog0);
198 pr_info(" dma1ErrorLog1 = 0x%016llx\n", data->dma1ErrorLog1);
199
200 for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
201 if ((data->pestA[i] >> 63) == 0 &&
202 (data->pestB[i] >> 63) == 0)
203 continue;
204 pr_info(" PE[%3d] PESTA = 0x%016llx\n", i, data->pestA[i]);
205 pr_info(" PESTB = 0x%016llx\n", data->pestB[i]);
206 }
207}
208
209static void pnv_pci_dump_phb_diag_data(struct pnv_phb *phb)
210{
211 switch(phb->model) {
212 case PNV_PHB_MODEL_P7IOC:
213 pnv_pci_dump_p7ioc_diag_data(phb);
214 break;
215 default:
216 pr_warning("PCI %d: Can't decode this PHB diag data\n",
217 phb->hose->global_number);
218 }
219}
220
221static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
222{
223 unsigned long flags, rc;
224 int has_diag;
225
226 spin_lock_irqsave(&phb->lock, flags);
227
228 rc = opal_pci_get_phb_diag_data(phb->opal_id, phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE);
229 has_diag = (rc == OPAL_SUCCESS);
230
231 rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
232 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
233 if (rc) {
234 pr_warning("PCI %d: Failed to clear EEH freeze state"
235 " for PE#%d, err %ld\n",
236 phb->hose->global_number, pe_no, rc);
237
238 /* For now, let's only display the diag buffer when we fail to clear
239 * the EEH status. We'll do more sensible things later when we have
240 * proper EEH support. We need to make sure we don't pollute ourselves
241 * with the normal errors generated when probing empty slots
242 */
243 if (has_diag)
244 pnv_pci_dump_phb_diag_data(phb);
245 else
246 pr_warning("PCI %d: No diag data available\n",
247 phb->hose->global_number);
248 }
249
250 spin_unlock_irqrestore(&phb->lock, flags);
251}
252
147static void pnv_pci_config_check_eeh(struct pnv_phb *phb, struct pci_bus *bus, 253static void pnv_pci_config_check_eeh(struct pnv_phb *phb, struct pci_bus *bus,
148 u32 bdfn) 254 u32 bdfn)
149{ 255{
@@ -165,15 +271,8 @@ static void pnv_pci_config_check_eeh(struct pnv_phb *phb, struct pci_bus *bus,
165 } 271 }
166 cfg_dbg(" -> EEH check, bdfn=%04x PE%d fstate=%x\n", 272 cfg_dbg(" -> EEH check, bdfn=%04x PE%d fstate=%x\n",
167 bdfn, pe_no, fstate); 273 bdfn, pe_no, fstate);
168 if (fstate != 0) { 274 if (fstate != 0)
169 rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, 275 pnv_pci_handle_eeh_config(phb, pe_no);
170 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
171 if (rc) {
172 pr_warning("PCI %d: Failed to clear EEH freeze state"
173 " for PE#%d, err %lld\n",
174 phb->hose->global_number, pe_no, rc);
175 }
176 }
177} 276}
178 277
179static int pnv_pci_read_config(struct pci_bus *bus, 278static int pnv_pci_read_config(struct pci_bus *bus,
@@ -257,12 +356,54 @@ struct pci_ops pnv_pci_ops = {
257 .write = pnv_pci_write_config, 356 .write = pnv_pci_write_config,
258}; 357};
259 358
359
360static void pnv_tce_invalidate(struct iommu_table *tbl,
361 u64 *startp, u64 *endp)
362{
363 u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index;
364 unsigned long start, end, inc;
365
366 start = __pa(startp);
367 end = __pa(endp);
368
369
370 /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */
371 if (tbl->it_busno) {
372 start <<= 12;
373 end <<= 12;
374 inc = 128 << 12;
375 start |= tbl->it_busno;
376 end |= tbl->it_busno;
377 }
378 /* p7ioc-style invalidation, 2 TCEs per write */
379 else if (tbl->it_type & TCE_PCI_SWINV_PAIR) {
380 start |= (1ull << 63);
381 end |= (1ull << 63);
382 inc = 16;
383 }
384 /* Default (older HW) */
385 else
386 inc = 128;
387
388 end |= inc - 1; /* round up end to be different than start */
389
390 mb(); /* Ensure above stores are visible */
391 while (start <= end) {
392 __raw_writeq(start, invalidate);
393 start += inc;
394 }
395 /* The iommu layer will do another mb() for us on build() and
396 * we don't care on free()
397 */
398}
399
400
260static int pnv_tce_build(struct iommu_table *tbl, long index, long npages, 401static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
261 unsigned long uaddr, enum dma_data_direction direction, 402 unsigned long uaddr, enum dma_data_direction direction,
262 struct dma_attrs *attrs) 403 struct dma_attrs *attrs)
263{ 404{
264 u64 proto_tce; 405 u64 proto_tce;
265 u64 *tcep; 406 u64 *tcep, *tces;
266 u64 rpn; 407 u64 rpn;
267 408
268 proto_tce = TCE_PCI_READ; // Read allowed 409 proto_tce = TCE_PCI_READ; // Read allowed
@@ -270,25 +411,33 @@ static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
270 if (direction != DMA_TO_DEVICE) 411 if (direction != DMA_TO_DEVICE)
271 proto_tce |= TCE_PCI_WRITE; 412 proto_tce |= TCE_PCI_WRITE;
272 413
273 tcep = ((u64 *)tbl->it_base) + index; 414 tces = tcep = ((u64 *)tbl->it_base) + index - tbl->it_offset;
415 rpn = __pa(uaddr) >> TCE_SHIFT;
274 416
275 while (npages--) { 417 while (npages--)
276 /* can't move this out since we might cross LMB boundary */ 418 *(tcep++) = proto_tce | (rpn++ << TCE_RPN_SHIFT);
277 rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; 419
278 *tcep = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; 420 /* Some implementations won't cache invalid TCEs and thus may not
421 * need that flush. We'll probably turn it_type into a bit mask
422 * of flags if that becomes the case
423 */
424 if (tbl->it_type & TCE_PCI_SWINV_CREATE)
425 pnv_tce_invalidate(tbl, tces, tcep - 1);
279 426
280 uaddr += TCE_PAGE_SIZE;
281 tcep++;
282 }
283 return 0; 427 return 0;
284} 428}
285 429
286static void pnv_tce_free(struct iommu_table *tbl, long index, long npages) 430static void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
287{ 431{
288 u64 *tcep = ((u64 *)tbl->it_base) + index; 432 u64 *tcep, *tces;
433
434 tces = tcep = ((u64 *)tbl->it_base) + index - tbl->it_offset;
289 435
290 while (npages--) 436 while (npages--)
291 *(tcep++) = 0; 437 *(tcep++) = 0;
438
439 if (tbl->it_type & TCE_PCI_SWINV_FREE)
440 pnv_tce_invalidate(tbl, tces, tcep - 1);
292} 441}
293 442
294void pnv_pci_setup_iommu_table(struct iommu_table *tbl, 443void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
@@ -308,13 +457,14 @@ static struct iommu_table * __devinit
308pnv_pci_setup_bml_iommu(struct pci_controller *hose) 457pnv_pci_setup_bml_iommu(struct pci_controller *hose)
309{ 458{
310 struct iommu_table *tbl; 459 struct iommu_table *tbl;
311 const __be64 *basep; 460 const __be64 *basep, *swinvp;
312 const __be32 *sizep; 461 const __be32 *sizep;
313 462
314 basep = of_get_property(hose->dn, "linux,tce-base", NULL); 463 basep = of_get_property(hose->dn, "linux,tce-base", NULL);
315 sizep = of_get_property(hose->dn, "linux,tce-size", NULL); 464 sizep = of_get_property(hose->dn, "linux,tce-size", NULL);
316 if (basep == NULL || sizep == NULL) { 465 if (basep == NULL || sizep == NULL) {
317 pr_err("PCI: %s has missing tce entries !\n", hose->dn->full_name); 466 pr_err("PCI: %s has missing tce entries !\n",
467 hose->dn->full_name);
318 return NULL; 468 return NULL;
319 } 469 }
320 tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, hose->node); 470 tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, hose->node);
@@ -323,6 +473,15 @@ pnv_pci_setup_bml_iommu(struct pci_controller *hose)
323 pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)), 473 pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)),
324 be32_to_cpup(sizep), 0); 474 be32_to_cpup(sizep), 0);
325 iommu_init_table(tbl, hose->node); 475 iommu_init_table(tbl, hose->node);
476
477 /* Deal with SW invalidated TCEs when needed (BML way) */
478 swinvp = of_get_property(hose->dn, "linux,tce-sw-invalidate-info",
479 NULL);
480 if (swinvp) {
481 tbl->it_busno = swinvp[1];
482 tbl->it_index = (unsigned long)ioremap(swinvp[0], 8);
483 tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
484 }
326 return tbl; 485 return tbl;
327} 486}
328 487
@@ -356,6 +515,13 @@ static void __devinit pnv_pci_dma_dev_setup(struct pci_dev *pdev)
356 pnv_pci_dma_fallback_setup(hose, pdev); 515 pnv_pci_dma_fallback_setup(hose, pdev);
357} 516}
358 517
518/* Fixup wrong class code in p7ioc root complex */
519static void __devinit pnv_p7ioc_rc_quirk(struct pci_dev *dev)
520{
521 dev->class = PCI_CLASS_BRIDGE_PCI << 8;
522}
523DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk);
524
359static int pnv_pci_probe_mode(struct pci_bus *bus) 525static int pnv_pci_probe_mode(struct pci_bus *bus)
360{ 526{
361 struct pci_controller *hose = pci_bus_to_host(bus); 527 struct pci_controller *hose = pci_bus_to_host(bus);
@@ -400,12 +566,24 @@ void __init pnv_pci_init(void)
400 init_pci_config_tokens(); 566 init_pci_config_tokens();
401 find_and_init_phbs(); 567 find_and_init_phbs();
402#endif /* CONFIG_PPC_POWERNV_RTAS */ 568#endif /* CONFIG_PPC_POWERNV_RTAS */
403 } else { 569 }
404 /* OPAL is here, do our normal stuff */ 570 /* OPAL is here, do our normal stuff */
571 else {
572 int found_ioda = 0;
573
574 /* Look for IODA IO-Hubs. We don't support mixing IODA
575 * and p5ioc2 due to the need to change some global
576 * probing flags
577 */
578 for_each_compatible_node(np, NULL, "ibm,ioda-hub") {
579 pnv_pci_init_ioda_hub(np);
580 found_ioda = 1;
581 }
405 582
406 /* Look for p5ioc2 IO-Hubs */ 583 /* Look for p5ioc2 IO-Hubs */
407 for_each_compatible_node(np, NULL, "ibm,p5ioc2") 584 if (!found_ioda)
408 pnv_pci_init_p5ioc2_hub(np); 585 for_each_compatible_node(np, NULL, "ibm,p5ioc2")
586 pnv_pci_init_p5ioc2_hub(np);
409 } 587 }
410 588
411 /* Setup the linkage between OF nodes and PHBs */ 589 /* Setup the linkage between OF nodes and PHBs */
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index d4dbc4950936..8bc479634643 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -9,9 +9,63 @@ enum pnv_phb_type {
9 PNV_PHB_IODA2, 9 PNV_PHB_IODA2,
10}; 10};
11 11
12/* Precise PHB model for error management */
13enum pnv_phb_model {
14 PNV_PHB_MODEL_UNKNOWN,
15 PNV_PHB_MODEL_P5IOC2,
16 PNV_PHB_MODEL_P7IOC,
17};
18
19#define PNV_PCI_DIAG_BUF_SIZE 4096
20
21/* Data associated with a PE, including IOMMU tracking etc.. */
22struct pnv_ioda_pe {
23 /* A PE can be associated with a single device or an
24 * entire bus (& children). In the former case, pdev
25 * is populated, in the later case, pbus is.
26 */
27 struct pci_dev *pdev;
28 struct pci_bus *pbus;
29
30 /* Effective RID (device RID for a device PE and base bus
31 * RID with devfn 0 for a bus PE)
32 */
33 unsigned int rid;
34
35 /* PE number */
36 unsigned int pe_number;
37
38 /* "Weight" assigned to the PE for the sake of DMA resource
39 * allocations
40 */
41 unsigned int dma_weight;
42
43 /* This is a PCI-E -> PCI-X bridge, this points to the
44 * corresponding bus PE
45 */
46 struct pnv_ioda_pe *bus_pe;
47
48 /* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
49 int tce32_seg;
50 int tce32_segcount;
51 struct iommu_table tce32_table;
52
53 /* XXX TODO: Add support for additional 64-bit iommus */
54
55 /* MSIs. MVE index is identical for for 32 and 64 bit MSI
56 * and -1 if not supported. (It's actually identical to the
57 * PE number)
58 */
59 int mve_number;
60
61 /* Link in list of PE#s */
62 struct list_head link;
63};
64
12struct pnv_phb { 65struct pnv_phb {
13 struct pci_controller *hose; 66 struct pci_controller *hose;
14 enum pnv_phb_type type; 67 enum pnv_phb_type type;
68 enum pnv_phb_model model;
15 u64 opal_id; 69 u64 opal_id;
16 void __iomem *regs; 70 void __iomem *regs;
17 spinlock_t lock; 71 spinlock_t lock;
@@ -34,7 +88,52 @@ struct pnv_phb {
34 struct { 88 struct {
35 struct iommu_table iommu_table; 89 struct iommu_table iommu_table;
36 } p5ioc2; 90 } p5ioc2;
91
92 struct {
93 /* Global bridge info */
94 unsigned int total_pe;
95 unsigned int m32_size;
96 unsigned int m32_segsize;
97 unsigned int m32_pci_base;
98 unsigned int io_size;
99 unsigned int io_segsize;
100 unsigned int io_pci_base;
101
102 /* PE allocation bitmap */
103 unsigned long *pe_alloc;
104
105 /* M32 & IO segment maps */
106 unsigned int *m32_segmap;
107 unsigned int *io_segmap;
108 struct pnv_ioda_pe *pe_array;
109
110 /* Reverse map of PEs, will have to extend if
111 * we are to support more than 256 PEs, indexed
112 * bus { bus, devfn }
113 */
114 unsigned char pe_rmap[0x10000];
115
116 /* 32-bit TCE tables allocation */
117 unsigned long tce32_count;
118
119 /* Total "weight" for the sake of DMA resources
120 * allocation
121 */
122 unsigned int dma_weight;
123 unsigned int dma_pe_count;
124
125 /* Sorted list of used PE's, sorted at
126 * boot for resource allocation purposes
127 */
128 struct list_head pe_list;
129 } ioda;
37 }; 130 };
131
132 /* PHB status structure */
133 union {
134 unsigned char blob[PNV_PCI_DIAG_BUF_SIZE];
135 struct OpalIoP7IOCPhbErrorData p7ioc;
136 } diag;
38}; 137};
39 138
40extern struct pci_ops pnv_pci_ops; 139extern struct pci_ops pnv_pci_ops;
@@ -43,6 +142,7 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
43 void *tce_mem, u64 tce_size, 142 void *tce_mem, u64 tce_size,
44 u64 dma_offset); 143 u64 dma_offset);
45extern void pnv_pci_init_p5ioc2_hub(struct device_node *np); 144extern void pnv_pci_init_p5ioc2_hub(struct device_node *np);
145extern void pnv_pci_init_ioda_hub(struct device_node *np);
46 146
47 147
48#endif /* __POWERNV_PCI_H */ 148#endif /* __POWERNV_PCI_H */
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index e87736685243..17210c526c52 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -75,7 +75,7 @@ int __devinit pnv_smp_kick_cpu(int nr)
75 /* On OPAL v2 the CPU are still spinning inside OPAL itself, 75 /* On OPAL v2 the CPU are still spinning inside OPAL itself,
76 * get them back now 76 * get them back now
77 */ 77 */
78 if (firmware_has_feature(FW_FEATURE_OPALv2)) { 78 if (!paca[nr].cpu_start && firmware_has_feature(FW_FEATURE_OPALv2)) {
79 pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n", nr, pcpu); 79 pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n", nr, pcpu);
80 rc = opal_start_cpu(pcpu, start_here); 80 rc = opal_start_cpu(pcpu, start_here);
81 if (rc != OPAL_SUCCESS) 81 if (rc != OPAL_SUCCESS)