aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-07-04 13:29:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-04 13:29:23 -0400
commit65b97fb7303050fc826e518cf67fc283da23314f (patch)
tree595e7f04d65d95a39d65bd2dcf2385b3b6ea7969 /arch/powerpc/platforms
parentddcf6600b133697adbafd96e080818bdc0dfd028 (diff)
parent1d8b368ab4aacfc3f864655baad4d31a3028ec1a (diff)
Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
Pull powerpc updates from Ben Herrenschmidt: "This is the powerpc changes for the 3.11 merge window. In addition to the usual bug fixes and small updates, the main highlights are: - Support for transparent huge pages by Aneesh Kumar for 64-bit server processors. This allows the use of 16M pages as transparent huge pages on kernels compiled with a 64K base page size. - Base VFIO support for KVM on power by Alexey Kardashevskiy - Wiring up of our nvram to the pstore infrastructure, including putting compressed oopses in there by Aruna Balakrishnaiah - Move, rework and improve our "EEH" (basically PCI error handling and recovery) infrastructure. It is no longer specific to pseries but is now usable by the new "powernv" platform as well (no hypervisor) by Gavin Shan. - I fixed some bugs in our math-emu instruction decoding and made it usable to emulate some optional FP instructions on processors with hard FP that lack them (such as fsqrt on Freescale embedded processors). - Support for Power8 "Event Based Branch" facility by Michael Ellerman. This facility allows what is basically "userspace interrupts" for performance monitor events. - A bunch of Transactional Memory vs. Signals bug fixes and HW breakpoint/watchpoint fixes by Michael Neuling. And more ... I appologize in advance if I've failed to highlight something that somebody deemed worth it." * 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (156 commits) pstore: Add hsize argument in write_buf call of pstore_ftrace_call powerpc/fsl: add MPIC timer wakeup support powerpc/mpic: create mpic subsystem object powerpc/mpic: add global timer support powerpc/mpic: add irq_set_wake support powerpc/85xx: enable coreint for all the 64bit boards powerpc/8xx: Erroneous double irq_eoi() on CPM IRQ in MPC8xx powerpc/fsl: Enable CONFIG_E1000E in mpc85xx_smp_defconfig powerpc/mpic: Add get_version API both for internal and external use powerpc: Handle both new style and old style reserve maps powerpc/hw_brk: Fix off by one error when validating DAWR region end powerpc/pseries: Support compression of oops text via pstore powerpc/pseries: Re-organise the oops compression code pstore: Pass header size in the pstore write callback powerpc/powernv: Fix iommu initialization again powerpc/pseries: Inform the hypervisor we are using EBB regs powerpc/perf: Add power8 EBB support powerpc/perf: Core EBB support for 64-bit book3s powerpc/perf: Drop MMCRA from thread_struct powerpc/perf: Don't enable if we have zero events ...
Diffstat (limited to 'arch/powerpc/platforms')
-rw-r--r--arch/powerpc/platforms/44x/currituck.c43
-rw-r--r--arch/powerpc/platforms/44x/iss4xx.c4
-rw-r--r--arch/powerpc/platforms/512x/mpc5121_ads.c6
-rw-r--r--arch/powerpc/platforms/512x/mpc512x.h12
-rw-r--r--arch/powerpc/platforms/512x/mpc512x_generic.c4
-rw-r--r--arch/powerpc/platforms/512x/mpc512x_shared.c31
-rw-r--r--arch/powerpc/platforms/512x/pdm360ng.c4
-rw-r--r--arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c12
-rw-r--r--arch/powerpc/platforms/85xx/p5020_ds.c5
-rw-r--r--arch/powerpc/platforms/85xx/p5040_ds.c5
-rw-r--r--arch/powerpc/platforms/85xx/smp.c6
-rw-r--r--arch/powerpc/platforms/85xx/t4240_qds.c5
-rw-r--r--arch/powerpc/platforms/8xx/m8xx_setup.c14
-rw-r--r--arch/powerpc/platforms/Kconfig26
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype1
-rw-r--r--arch/powerpc/platforms/cell/beat_htab.c16
-rw-r--r--arch/powerpc/platforms/cell/smp.c2
-rw-r--r--arch/powerpc/platforms/powermac/smp.c2
-rw-r--r--arch/powerpc/platforms/powernv/Makefile1
-rw-r--r--arch/powerpc/platforms/powernv/eeh-ioda.c916
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c379
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S3
-rw-r--r--arch/powerpc/platforms/powernv/opal.c69
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c62
-rw-r--r--arch/powerpc/platforms/powernv/pci-p5ioc2.c11
-rw-r--r--arch/powerpc/platforms/powernv/pci.c139
-rw-r--r--arch/powerpc/platforms/powernv/pci.h35
-rw-r--r--arch/powerpc/platforms/powernv/setup.c4
-rw-r--r--arch/powerpc/platforms/powernv/smp.c4
-rw-r--r--arch/powerpc/platforms/ps3/htab.c5
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig5
-rw-r--r--arch/powerpc/platforms/pseries/Makefile4
-rw-r--r--arch/powerpc/platforms/pseries/eeh.c942
-rw-r--r--arch/powerpc/platforms/pseries/eeh_cache.c319
-rw-r--r--arch/powerpc/platforms/pseries/eeh_dev.c112
-rw-r--r--arch/powerpc/platforms/pseries/eeh_driver.c552
-rw-r--r--arch/powerpc/platforms/pseries/eeh_event.c142
-rw-r--r--arch/powerpc/platforms/pseries/eeh_pe.c653
-rw-r--r--arch/powerpc/platforms/pseries/eeh_sysfs.c75
-rw-r--r--arch/powerpc/platforms/pseries/io_event_irq.c2
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c4
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c142
-rw-r--r--arch/powerpc/platforms/pseries/nvram.c554
-rw-r--r--arch/powerpc/platforms/pseries/pci_dlpar.c85
-rw-r--r--arch/powerpc/platforms/pseries/ras.c8
-rw-r--r--arch/powerpc/platforms/pseries/smp.c2
46 files changed, 2300 insertions, 3127 deletions
diff --git a/arch/powerpc/platforms/44x/currituck.c b/arch/powerpc/platforms/44x/currituck.c
index ecd3890c40d7..7f1b71a01c6a 100644
--- a/arch/powerpc/platforms/44x/currituck.c
+++ b/arch/powerpc/platforms/44x/currituck.c
@@ -91,12 +91,12 @@ static void __init ppc47x_init_irq(void)
91} 91}
92 92
93#ifdef CONFIG_SMP 93#ifdef CONFIG_SMP
94static void __cpuinit smp_ppc47x_setup_cpu(int cpu) 94static void smp_ppc47x_setup_cpu(int cpu)
95{ 95{
96 mpic_setup_this_cpu(); 96 mpic_setup_this_cpu();
97} 97}
98 98
99static int __cpuinit smp_ppc47x_kick_cpu(int cpu) 99static int smp_ppc47x_kick_cpu(int cpu)
100{ 100{
101 struct device_node *cpunode = of_get_cpu_node(cpu, NULL); 101 struct device_node *cpunode = of_get_cpu_node(cpu, NULL);
102 const u64 *spin_table_addr_prop; 102 const u64 *spin_table_addr_prop;
@@ -176,13 +176,48 @@ static int __init ppc47x_probe(void)
176 return 1; 176 return 1;
177} 177}
178 178
179static int board_rev = -1;
180static int __init ppc47x_get_board_rev(void)
181{
182 u8 fpga_reg0;
183 void *fpga;
184 struct device_node *np;
185
186 np = of_find_compatible_node(NULL, NULL, "ibm,currituck-fpga");
187 if (!np)
188 goto fail;
189
190 fpga = of_iomap(np, 0);
191 of_node_put(np);
192 if (!fpga)
193 goto fail;
194
195 fpga_reg0 = ioread8(fpga);
196 board_rev = fpga_reg0 & 0x03;
197 pr_info("%s: Found board revision %d\n", __func__, board_rev);
198 iounmap(fpga);
199 return 0;
200
201fail:
202 pr_info("%s: Unable to find board revision\n", __func__);
203 return 0;
204}
205machine_arch_initcall(ppc47x, ppc47x_get_board_rev);
206
179/* Use USB controller should have been hardware swizzled but it wasn't :( */ 207/* Use USB controller should have been hardware swizzled but it wasn't :( */
180static void ppc47x_pci_irq_fixup(struct pci_dev *dev) 208static void ppc47x_pci_irq_fixup(struct pci_dev *dev)
181{ 209{
182 if (dev->vendor == 0x1033 && (dev->device == 0x0035 || 210 if (dev->vendor == 0x1033 && (dev->device == 0x0035 ||
183 dev->device == 0x00e0)) { 211 dev->device == 0x00e0)) {
184 dev->irq = irq_create_mapping(NULL, 47); 212 if (board_rev == 0) {
185 pr_info("%s: Mapping irq 47 %d\n", __func__, dev->irq); 213 dev->irq = irq_create_mapping(NULL, 47);
214 pr_info("%s: Mapping irq %d\n", __func__, dev->irq);
215 } else if (board_rev == 2) {
216 dev->irq = irq_create_mapping(NULL, 49);
217 pr_info("%s: Mapping irq %d\n", __func__, dev->irq);
218 } else {
219 pr_alert("%s: Unknown board revision\n", __func__);
220 }
186 } 221 }
187} 222}
188 223
diff --git a/arch/powerpc/platforms/44x/iss4xx.c b/arch/powerpc/platforms/44x/iss4xx.c
index a28a8629727e..4241bc825800 100644
--- a/arch/powerpc/platforms/44x/iss4xx.c
+++ b/arch/powerpc/platforms/44x/iss4xx.c
@@ -81,12 +81,12 @@ static void __init iss4xx_init_irq(void)
81} 81}
82 82
83#ifdef CONFIG_SMP 83#ifdef CONFIG_SMP
84static void __cpuinit smp_iss4xx_setup_cpu(int cpu) 84static void smp_iss4xx_setup_cpu(int cpu)
85{ 85{
86 mpic_setup_this_cpu(); 86 mpic_setup_this_cpu();
87} 87}
88 88
89static int __cpuinit smp_iss4xx_kick_cpu(int cpu) 89static int smp_iss4xx_kick_cpu(int cpu)
90{ 90{
91 struct device_node *cpunode = of_get_cpu_node(cpu, NULL); 91 struct device_node *cpunode = of_get_cpu_node(cpu, NULL);
92 const u64 *spin_table_addr_prop; 92 const u64 *spin_table_addr_prop;
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.c b/arch/powerpc/platforms/512x/mpc5121_ads.c
index 0a134e0469ef..3e90ece10ae9 100644
--- a/arch/powerpc/platforms/512x/mpc5121_ads.c
+++ b/arch/powerpc/platforms/512x/mpc5121_ads.c
@@ -43,9 +43,7 @@ static void __init mpc5121_ads_setup_arch(void)
43 mpc83xx_add_bridge(np); 43 mpc83xx_add_bridge(np);
44#endif 44#endif
45 45
46#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) 46 mpc512x_setup_arch();
47 mpc512x_setup_diu();
48#endif
49} 47}
50 48
51static void __init mpc5121_ads_init_IRQ(void) 49static void __init mpc5121_ads_init_IRQ(void)
@@ -69,7 +67,7 @@ define_machine(mpc5121_ads) {
69 .probe = mpc5121_ads_probe, 67 .probe = mpc5121_ads_probe,
70 .setup_arch = mpc5121_ads_setup_arch, 68 .setup_arch = mpc5121_ads_setup_arch,
71 .init = mpc512x_init, 69 .init = mpc512x_init,
72 .init_early = mpc512x_init_diu, 70 .init_early = mpc512x_init_early,
73 .init_IRQ = mpc5121_ads_init_IRQ, 71 .init_IRQ = mpc5121_ads_init_IRQ,
74 .get_irq = ipic_get_irq, 72 .get_irq = ipic_get_irq,
75 .calibrate_decr = generic_calibrate_decr, 73 .calibrate_decr = generic_calibrate_decr,
diff --git a/arch/powerpc/platforms/512x/mpc512x.h b/arch/powerpc/platforms/512x/mpc512x.h
index 0a8e60023944..cc97f022d028 100644
--- a/arch/powerpc/platforms/512x/mpc512x.h
+++ b/arch/powerpc/platforms/512x/mpc512x.h
@@ -12,18 +12,12 @@
12#ifndef __MPC512X_H__ 12#ifndef __MPC512X_H__
13#define __MPC512X_H__ 13#define __MPC512X_H__
14extern void __init mpc512x_init_IRQ(void); 14extern void __init mpc512x_init_IRQ(void);
15extern void __init mpc512x_init_early(void);
15extern void __init mpc512x_init(void); 16extern void __init mpc512x_init(void);
17extern void __init mpc512x_setup_arch(void);
16extern int __init mpc5121_clk_init(void); 18extern int __init mpc5121_clk_init(void);
17void __init mpc512x_declare_of_platform_devices(void);
18extern const char *mpc512x_select_psc_compat(void); 19extern const char *mpc512x_select_psc_compat(void);
20extern const char *mpc512x_select_reset_compat(void);
19extern void mpc512x_restart(char *cmd); 21extern void mpc512x_restart(char *cmd);
20 22
21#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
22void mpc512x_init_diu(void);
23void mpc512x_setup_diu(void);
24#else
25#define mpc512x_init_diu NULL
26#define mpc512x_setup_diu NULL
27#endif
28
29#endif /* __MPC512X_H__ */ 23#endif /* __MPC512X_H__ */
diff --git a/arch/powerpc/platforms/512x/mpc512x_generic.c b/arch/powerpc/platforms/512x/mpc512x_generic.c
index 5fb919b30924..ce71408781a0 100644
--- a/arch/powerpc/platforms/512x/mpc512x_generic.c
+++ b/arch/powerpc/platforms/512x/mpc512x_generic.c
@@ -45,8 +45,8 @@ define_machine(mpc512x_generic) {
45 .name = "MPC512x generic", 45 .name = "MPC512x generic",
46 .probe = mpc512x_generic_probe, 46 .probe = mpc512x_generic_probe,
47 .init = mpc512x_init, 47 .init = mpc512x_init,
48 .init_early = mpc512x_init_diu, 48 .init_early = mpc512x_init_early,
49 .setup_arch = mpc512x_setup_diu, 49 .setup_arch = mpc512x_setup_arch,
50 .init_IRQ = mpc512x_init_IRQ, 50 .init_IRQ = mpc512x_init_IRQ,
51 .get_irq = ipic_get_irq, 51 .get_irq = ipic_get_irq,
52 .calibrate_decr = generic_calibrate_decr, 52 .calibrate_decr = generic_calibrate_decr,
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
index 6eb94ab99d39..a82a41b4fd91 100644
--- a/arch/powerpc/platforms/512x/mpc512x_shared.c
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -35,8 +35,10 @@ static struct mpc512x_reset_module __iomem *reset_module_base;
35static void __init mpc512x_restart_init(void) 35static void __init mpc512x_restart_init(void)
36{ 36{
37 struct device_node *np; 37 struct device_node *np;
38 const char *reset_compat;
38 39
39 np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-reset"); 40 reset_compat = mpc512x_select_reset_compat();
41 np = of_find_compatible_node(NULL, NULL, reset_compat);
40 if (!np) 42 if (!np)
41 return; 43 return;
42 44
@@ -58,7 +60,7 @@ void mpc512x_restart(char *cmd)
58 ; 60 ;
59} 61}
60 62
61#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) 63#if IS_ENABLED(CONFIG_FB_FSL_DIU)
62 64
63struct fsl_diu_shared_fb { 65struct fsl_diu_shared_fb {
64 u8 gamma[0x300]; /* 32-bit aligned! */ 66 u8 gamma[0x300]; /* 32-bit aligned! */
@@ -355,6 +357,17 @@ const char *mpc512x_select_psc_compat(void)
355 return NULL; 357 return NULL;
356} 358}
357 359
360const char *mpc512x_select_reset_compat(void)
361{
362 if (of_machine_is_compatible("fsl,mpc5121"))
363 return "fsl,mpc5121-reset";
364
365 if (of_machine_is_compatible("fsl,mpc5125"))
366 return "fsl,mpc5125-reset";
367
368 return NULL;
369}
370
358static unsigned int __init get_fifo_size(struct device_node *np, 371static unsigned int __init get_fifo_size(struct device_node *np,
359 char *prop_name) 372 char *prop_name)
360{ 373{
@@ -436,14 +449,26 @@ void __init mpc512x_psc_fifo_init(void)
436 } 449 }
437} 450}
438 451
452void __init mpc512x_init_early(void)
453{
454 mpc512x_restart_init();
455 if (IS_ENABLED(CONFIG_FB_FSL_DIU))
456 mpc512x_init_diu();
457}
458
439void __init mpc512x_init(void) 459void __init mpc512x_init(void)
440{ 460{
441 mpc5121_clk_init(); 461 mpc5121_clk_init();
442 mpc512x_declare_of_platform_devices(); 462 mpc512x_declare_of_platform_devices();
443 mpc512x_restart_init();
444 mpc512x_psc_fifo_init(); 463 mpc512x_psc_fifo_init();
445} 464}
446 465
466void __init mpc512x_setup_arch(void)
467{
468 if (IS_ENABLED(CONFIG_FB_FSL_DIU))
469 mpc512x_setup_diu();
470}
471
447/** 472/**
448 * mpc512x_cs_config - Setup chip select configuration 473 * mpc512x_cs_config - Setup chip select configuration
449 * @cs: chip select number 474 * @cs: chip select number
diff --git a/arch/powerpc/platforms/512x/pdm360ng.c b/arch/powerpc/platforms/512x/pdm360ng.c
index 0575e858291c..24b314d7bd5f 100644
--- a/arch/powerpc/platforms/512x/pdm360ng.c
+++ b/arch/powerpc/platforms/512x/pdm360ng.c
@@ -119,9 +119,9 @@ static int __init pdm360ng_probe(void)
119define_machine(pdm360ng) { 119define_machine(pdm360ng) {
120 .name = "PDM360NG", 120 .name = "PDM360NG",
121 .probe = pdm360ng_probe, 121 .probe = pdm360ng_probe,
122 .setup_arch = mpc512x_setup_diu, 122 .setup_arch = mpc512x_setup_arch,
123 .init = pdm360ng_init, 123 .init = pdm360ng_init,
124 .init_early = mpc512x_init_diu, 124 .init_early = mpc512x_init_early,
125 .init_IRQ = mpc512x_init_IRQ, 125 .init_IRQ = mpc512x_init_IRQ,
126 .get_irq = ipic_get_irq, 126 .get_irq = ipic_get_irq,
127 .calibrate_decr = generic_calibrate_decr, 127 .calibrate_decr = generic_calibrate_decr,
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
index 624cb51d19c9..7bc315822935 100644
--- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
+++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
@@ -231,17 +231,7 @@ static struct i2c_driver mcu_driver = {
231 .id_table = mcu_ids, 231 .id_table = mcu_ids,
232}; 232};
233 233
234static int __init mcu_init(void) 234module_i2c_driver(mcu_driver);
235{
236 return i2c_add_driver(&mcu_driver);
237}
238module_init(mcu_init);
239
240static void __exit mcu_exit(void)
241{
242 i2c_del_driver(&mcu_driver);
243}
244module_exit(mcu_exit);
245 235
246MODULE_DESCRIPTION("Power Management and GPIO expander driver for " 236MODULE_DESCRIPTION("Power Management and GPIO expander driver for "
247 "MPC8349E-mITX-compatible MCU"); 237 "MPC8349E-mITX-compatible MCU");
diff --git a/arch/powerpc/platforms/85xx/p5020_ds.c b/arch/powerpc/platforms/85xx/p5020_ds.c
index 753a42c29d4d..39cfa4044e6c 100644
--- a/arch/powerpc/platforms/85xx/p5020_ds.c
+++ b/arch/powerpc/platforms/85xx/p5020_ds.c
@@ -75,12 +75,7 @@ define_machine(p5020_ds) {
75#ifdef CONFIG_PCI 75#ifdef CONFIG_PCI
76 .pcibios_fixup_bus = fsl_pcibios_fixup_bus, 76 .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
77#endif 77#endif
78/* coreint doesn't play nice with lazy EE, use legacy mpic for now */
79#ifdef CONFIG_PPC64
80 .get_irq = mpic_get_irq,
81#else
82 .get_irq = mpic_get_coreint_irq, 78 .get_irq = mpic_get_coreint_irq,
83#endif
84 .restart = fsl_rstcr_restart, 79 .restart = fsl_rstcr_restart,
85 .calibrate_decr = generic_calibrate_decr, 80 .calibrate_decr = generic_calibrate_decr,
86 .progress = udbg_progress, 81 .progress = udbg_progress,
diff --git a/arch/powerpc/platforms/85xx/p5040_ds.c b/arch/powerpc/platforms/85xx/p5040_ds.c
index 11381851828e..f70e74cddf97 100644
--- a/arch/powerpc/platforms/85xx/p5040_ds.c
+++ b/arch/powerpc/platforms/85xx/p5040_ds.c
@@ -66,12 +66,7 @@ define_machine(p5040_ds) {
66#ifdef CONFIG_PCI 66#ifdef CONFIG_PCI
67 .pcibios_fixup_bus = fsl_pcibios_fixup_bus, 67 .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
68#endif 68#endif
69/* coreint doesn't play nice with lazy EE, use legacy mpic for now */
70#ifdef CONFIG_PPC64
71 .get_irq = mpic_get_irq,
72#else
73 .get_irq = mpic_get_coreint_irq, 69 .get_irq = mpic_get_coreint_irq,
74#endif
75 .restart = fsl_rstcr_restart, 70 .restart = fsl_rstcr_restart,
76 .calibrate_decr = generic_calibrate_decr, 71 .calibrate_decr = generic_calibrate_decr,
77 .progress = udbg_progress, 72 .progress = udbg_progress,
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index 6a1759939c6b..5ced4f5bb2b2 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -99,7 +99,7 @@ static void mpc85xx_take_timebase(void)
99} 99}
100 100
101#ifdef CONFIG_HOTPLUG_CPU 101#ifdef CONFIG_HOTPLUG_CPU
102static void __cpuinit smp_85xx_mach_cpu_die(void) 102static void smp_85xx_mach_cpu_die(void)
103{ 103{
104 unsigned int cpu = smp_processor_id(); 104 unsigned int cpu = smp_processor_id();
105 u32 tmp; 105 u32 tmp;
@@ -141,7 +141,7 @@ static inline u32 read_spin_table_addr_l(void *spin_table)
141 return in_be32(&((struct epapr_spin_table *)spin_table)->addr_l); 141 return in_be32(&((struct epapr_spin_table *)spin_table)->addr_l);
142} 142}
143 143
144static int __cpuinit smp_85xx_kick_cpu(int nr) 144static int smp_85xx_kick_cpu(int nr)
145{ 145{
146 unsigned long flags; 146 unsigned long flags;
147 const u64 *cpu_rel_addr; 147 const u64 *cpu_rel_addr;
@@ -362,7 +362,7 @@ static void mpc85xx_smp_machine_kexec(struct kimage *image)
362} 362}
363#endif /* CONFIG_KEXEC */ 363#endif /* CONFIG_KEXEC */
364 364
365static void __cpuinit smp_85xx_setup_cpu(int cpu_nr) 365static void smp_85xx_setup_cpu(int cpu_nr)
366{ 366{
367 if (smp_85xx_ops.probe == smp_mpic_probe) 367 if (smp_85xx_ops.probe == smp_mpic_probe)
368 mpic_setup_this_cpu(); 368 mpic_setup_this_cpu();
diff --git a/arch/powerpc/platforms/85xx/t4240_qds.c b/arch/powerpc/platforms/85xx/t4240_qds.c
index 5998e9f33304..91ead6b1b8af 100644
--- a/arch/powerpc/platforms/85xx/t4240_qds.c
+++ b/arch/powerpc/platforms/85xx/t4240_qds.c
@@ -75,12 +75,7 @@ define_machine(t4240_qds) {
75#ifdef CONFIG_PCI 75#ifdef CONFIG_PCI
76 .pcibios_fixup_bus = fsl_pcibios_fixup_bus, 76 .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
77#endif 77#endif
78/* coreint doesn't play nice with lazy EE, use legacy mpic for now */
79#ifdef CONFIG_PPC64
80 .get_irq = mpic_get_irq,
81#else
82 .get_irq = mpic_get_coreint_irq, 78 .get_irq = mpic_get_coreint_irq,
83#endif
84 .restart = fsl_rstcr_restart, 79 .restart = fsl_rstcr_restart,
85 .calibrate_decr = generic_calibrate_decr, 80 .calibrate_decr = generic_calibrate_decr,
86 .progress = udbg_progress, 81 .progress = udbg_progress,
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
index 1e121088826f..587a2828b06c 100644
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -43,6 +43,7 @@ static irqreturn_t timebase_interrupt(int irq, void *dev)
43 43
44static struct irqaction tbint_irqaction = { 44static struct irqaction tbint_irqaction = {
45 .handler = timebase_interrupt, 45 .handler = timebase_interrupt,
46 .flags = IRQF_NO_THREAD,
46 .name = "tbint", 47 .name = "tbint",
47}; 48};
48 49
@@ -218,19 +219,12 @@ void mpc8xx_restart(char *cmd)
218 219
219static void cpm_cascade(unsigned int irq, struct irq_desc *desc) 220static void cpm_cascade(unsigned int irq, struct irq_desc *desc)
220{ 221{
221 struct irq_chip *chip; 222 struct irq_chip *chip = irq_desc_get_chip(desc);
222 int cascade_irq; 223 int cascade_irq = cpm_get_irq();
223
224 if ((cascade_irq = cpm_get_irq()) >= 0) {
225 struct irq_desc *cdesc = irq_to_desc(cascade_irq);
226 224
225 if (cascade_irq >= 0)
227 generic_handle_irq(cascade_irq); 226 generic_handle_irq(cascade_irq);
228 227
229 chip = irq_desc_get_chip(cdesc);
230 chip->irq_eoi(&cdesc->irq_data);
231 }
232
233 chip = irq_desc_get_chip(desc);
234 chip->irq_eoi(&desc->irq_data); 228 chip->irq_eoi(&desc->irq_data);
235} 229}
236 230
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index e17cdfc5ba40..d703775bda30 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -86,6 +86,27 @@ config MPIC
86 bool 86 bool
87 default n 87 default n
88 88
89config MPIC_TIMER
90 bool "MPIC Global Timer"
91 depends on MPIC && FSL_SOC
92 default n
93 help
94 The MPIC global timer is a hardware timer inside the
95 Freescale PIC complying with OpenPIC standard. When the
96 specified interval times out, the hardware timer generates
97 an interrupt. The driver currently is only tested on fsl
98 chip, but it can potentially support other global timers
99 complying with the OpenPIC standard.
100
101config FSL_MPIC_TIMER_WAKEUP
102 tristate "Freescale MPIC global timer wakeup driver"
103 depends on FSL_SOC && MPIC_TIMER && PM
104 default n
105 help
106 The driver provides a way to wake up the system by MPIC
107 timer.
108 e.g. "echo 5 > /sys/devices/system/mpic/timer_wakeup"
109
89config PPC_EPAPR_HV_PIC 110config PPC_EPAPR_HV_PIC
90 bool 111 bool
91 default n 112 default n
@@ -164,6 +185,11 @@ config IBMEBUS
164 help 185 help
165 Bus device driver for GX bus based adapters. 186 Bus device driver for GX bus based adapters.
166 187
188config EEH
189 bool
190 depends on (PPC_POWERNV || PPC_PSERIES) && PCI
191 default y
192
167config PPC_MPC106 193config PPC_MPC106
168 bool 194 bool
169 default n 195 default n
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 7819c40a6bc3..47d9a03dd415 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -71,6 +71,7 @@ config PPC_BOOK3S_64
71 select PPC_FPU 71 select PPC_FPU
72 select PPC_HAVE_PMU_SUPPORT 72 select PPC_HAVE_PMU_SUPPORT
73 select SYS_SUPPORTS_HUGETLBFS 73 select SYS_SUPPORTS_HUGETLBFS
74 select HAVE_ARCH_TRANSPARENT_HUGEPAGE if PPC_64K_PAGES
74 75
75config PPC_BOOK3E_64 76config PPC_BOOK3E_64
76 bool "Embedded processors" 77 bool "Embedded processors"
diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
index 246e1d8b3af3..c34ee4e60873 100644
--- a/arch/powerpc/platforms/cell/beat_htab.c
+++ b/arch/powerpc/platforms/cell/beat_htab.c
@@ -185,7 +185,8 @@ static void beat_lpar_hptab_clear(void)
185static long beat_lpar_hpte_updatepp(unsigned long slot, 185static long beat_lpar_hpte_updatepp(unsigned long slot,
186 unsigned long newpp, 186 unsigned long newpp,
187 unsigned long vpn, 187 unsigned long vpn,
188 int psize, int ssize, int local) 188 int psize, int apsize,
189 int ssize, int local)
189{ 190{
190 unsigned long lpar_rc; 191 unsigned long lpar_rc;
191 u64 dummy0, dummy1; 192 u64 dummy0, dummy1;
@@ -274,7 +275,8 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
274} 275}
275 276
276static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, 277static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
277 int psize, int ssize, int local) 278 int psize, int apsize,
279 int ssize, int local)
278{ 280{
279 unsigned long want_v; 281 unsigned long want_v;
280 unsigned long lpar_rc; 282 unsigned long lpar_rc;
@@ -364,9 +366,10 @@ static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
364 * already zero. For now I am paranoid. 366 * already zero. For now I am paranoid.
365 */ 367 */
366static long beat_lpar_hpte_updatepp_v3(unsigned long slot, 368static long beat_lpar_hpte_updatepp_v3(unsigned long slot,
367 unsigned long newpp, 369 unsigned long newpp,
368 unsigned long vpn, 370 unsigned long vpn,
369 int psize, int ssize, int local) 371 int psize, int apsize,
372 int ssize, int local)
370{ 373{
371 unsigned long lpar_rc; 374 unsigned long lpar_rc;
372 unsigned long want_v; 375 unsigned long want_v;
@@ -394,7 +397,8 @@ static long beat_lpar_hpte_updatepp_v3(unsigned long slot,
394} 397}
395 398
396static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long vpn, 399static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long vpn,
397 int psize, int ssize, int local) 400 int psize, int apsize,
401 int ssize, int local)
398{ 402{
399 unsigned long want_v; 403 unsigned long want_v;
400 unsigned long lpar_rc; 404 unsigned long lpar_rc;
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index d35dbbc8ec79..f75f6fcac729 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -142,7 +142,7 @@ static int smp_cell_cpu_bootable(unsigned int nr)
142 * during boot if the user requests it. Odd-numbered 142 * during boot if the user requests it. Odd-numbered
143 * cpus are assumed to be secondary threads. 143 * cpus are assumed to be secondary threads.
144 */ 144 */
145 if (system_state < SYSTEM_RUNNING && 145 if (system_state == SYSTEM_BOOTING &&
146 cpu_has_feature(CPU_FTR_SMT) && 146 cpu_has_feature(CPU_FTR_SMT) &&
147 !smt_enabled_at_boot && cpu_thread_in_core(nr) != 0) 147 !smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
148 return 0; 148 return 0;
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index bdb738a69e41..49c9f9501c21 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -885,7 +885,7 @@ static int smp_core99_cpu_notify(struct notifier_block *self,
885 return NOTIFY_OK; 885 return NOTIFY_OK;
886} 886}
887 887
888static struct notifier_block __cpuinitdata smp_core99_cpu_nb = { 888static struct notifier_block smp_core99_cpu_nb = {
889 .notifier_call = smp_core99_cpu_notify, 889 .notifier_call = smp_core99_cpu_notify,
890}; 890};
891#endif /* CONFIG_HOTPLUG_CPU */ 891#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index bcc3cb48a44e..7fe595152478 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -3,3 +3,4 @@ obj-y += opal-rtc.o opal-nvram.o
3 3
4obj-$(CONFIG_SMP) += smp.o 4obj-$(CONFIG_SMP) += smp.o
5obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o 5obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o
6obj-$(CONFIG_EEH) += eeh-ioda.o eeh-powernv.o
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
new file mode 100644
index 000000000000..0cd1c4a71755
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -0,0 +1,916 @@
1/*
2 * The file intends to implement the functions needed by EEH, which is
3 * built on IODA compliant chip. Actually, lots of functions related
4 * to EEH would be built based on the OPAL APIs.
5 *
6 * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 */
13
14#include <linux/bootmem.h>
15#include <linux/debugfs.h>
16#include <linux/delay.h>
17#include <linux/init.h>
18#include <linux/io.h>
19#include <linux/irq.h>
20#include <linux/kernel.h>
21#include <linux/msi.h>
22#include <linux/notifier.h>
23#include <linux/pci.h>
24#include <linux/string.h>
25
26#include <asm/eeh.h>
27#include <asm/eeh_event.h>
28#include <asm/io.h>
29#include <asm/iommu.h>
30#include <asm/msi_bitmap.h>
31#include <asm/opal.h>
32#include <asm/pci-bridge.h>
33#include <asm/ppc-pci.h>
34#include <asm/tce.h>
35
36#include "powernv.h"
37#include "pci.h"
38
39/* Debugging option */
40#ifdef IODA_EEH_DBG_ON
41#define IODA_EEH_DBG(args...) pr_info(args)
42#else
43#define IODA_EEH_DBG(args...)
44#endif
45
46static char *hub_diag = NULL;
47static int ioda_eeh_nb_init = 0;
48
49static int ioda_eeh_event(struct notifier_block *nb,
50 unsigned long events, void *change)
51{
52 uint64_t changed_evts = (uint64_t)change;
53
54 /* We simply send special EEH event */
55 if ((changed_evts & OPAL_EVENT_PCI_ERROR) &&
56 (events & OPAL_EVENT_PCI_ERROR))
57 eeh_send_failure_event(NULL);
58
59 return 0;
60}
61
62static struct notifier_block ioda_eeh_nb = {
63 .notifier_call = ioda_eeh_event,
64 .next = NULL,
65 .priority = 0
66};
67
68#ifdef CONFIG_DEBUG_FS
69static int ioda_eeh_dbgfs_set(void *data, u64 val)
70{
71 struct pci_controller *hose = data;
72 struct pnv_phb *phb = hose->private_data;
73
74 out_be64(phb->regs + 0xD10, val);
75 return 0;
76}
77
78static int ioda_eeh_dbgfs_get(void *data, u64 *val)
79{
80 struct pci_controller *hose = data;
81 struct pnv_phb *phb = hose->private_data;
82
83 *val = in_be64(phb->regs + 0xD10);
84 return 0;
85}
86
87DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_dbgfs_ops, ioda_eeh_dbgfs_get,
88 ioda_eeh_dbgfs_set, "0x%llx\n");
89#endif /* CONFIG_DEBUG_FS */
90
91/**
92 * ioda_eeh_post_init - Chip dependent post initialization
93 * @hose: PCI controller
94 *
95 * The function will be called after eeh PEs and devices
96 * have been built. That means the EEH is ready to supply
97 * service with I/O cache.
98 */
99static int ioda_eeh_post_init(struct pci_controller *hose)
100{
101 struct pnv_phb *phb = hose->private_data;
102 int ret;
103
104 /* Register OPAL event notifier */
105 if (!ioda_eeh_nb_init) {
106 ret = opal_notifier_register(&ioda_eeh_nb);
107 if (ret) {
108 pr_err("%s: Can't register OPAL event notifier (%d)\n",
109 __func__, ret);
110 return ret;
111 }
112
113 ioda_eeh_nb_init = 1;
114 }
115
116 /* FIXME: Enable it for PHB3 later */
117 if (phb->type == PNV_PHB_IODA1) {
118 if (!hub_diag) {
119 hub_diag = (char *)__get_free_page(GFP_KERNEL |
120 __GFP_ZERO);
121 if (!hub_diag) {
122 pr_err("%s: Out of memory !\n",
123 __func__);
124 return -ENOMEM;
125 }
126 }
127
128#ifdef CONFIG_DEBUG_FS
129 if (phb->dbgfs)
130 debugfs_create_file("err_injct", 0600,
131 phb->dbgfs, hose,
132 &ioda_eeh_dbgfs_ops);
133#endif
134
135 phb->eeh_state |= PNV_EEH_STATE_ENABLED;
136 }
137
138 return 0;
139}
140
141/**
142 * ioda_eeh_set_option - Set EEH operation or I/O setting
143 * @pe: EEH PE
144 * @option: options
145 *
146 * Enable or disable EEH option for the indicated PE. The
147 * function also can be used to enable I/O or DMA for the
148 * PE.
149 */
150static int ioda_eeh_set_option(struct eeh_pe *pe, int option)
151{
152 s64 ret;
153 u32 pe_no;
154 struct pci_controller *hose = pe->phb;
155 struct pnv_phb *phb = hose->private_data;
156
157 /* Check on PE number */
158 if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) {
159 pr_err("%s: PE address %x out of range [0, %x] "
160 "on PHB#%x\n",
161 __func__, pe->addr, phb->ioda.total_pe,
162 hose->global_number);
163 return -EINVAL;
164 }
165
166 pe_no = pe->addr;
167 switch (option) {
168 case EEH_OPT_DISABLE:
169 ret = -EEXIST;
170 break;
171 case EEH_OPT_ENABLE:
172 ret = 0;
173 break;
174 case EEH_OPT_THAW_MMIO:
175 ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
176 OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO);
177 if (ret) {
178 pr_warning("%s: Failed to enable MMIO for "
179 "PHB#%x-PE#%x, err=%lld\n",
180 __func__, hose->global_number, pe_no, ret);
181 return -EIO;
182 }
183
184 break;
185 case EEH_OPT_THAW_DMA:
186 ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
187 OPAL_EEH_ACTION_CLEAR_FREEZE_DMA);
188 if (ret) {
189 pr_warning("%s: Failed to enable DMA for "
190 "PHB#%x-PE#%x, err=%lld\n",
191 __func__, hose->global_number, pe_no, ret);
192 return -EIO;
193 }
194
195 break;
196 default:
197 pr_warning("%s: Invalid option %d\n", __func__, option);
198 return -EINVAL;
199 }
200
201 return ret;
202}
203
204/**
205 * ioda_eeh_get_state - Retrieve the state of PE
206 * @pe: EEH PE
207 *
208 * The PE's state should be retrieved from the PEEV, PEST
209 * IODA tables. Since the OPAL has exported the function
210 * to do it, it'd better to use that.
211 */
212static int ioda_eeh_get_state(struct eeh_pe *pe)
213{
214 s64 ret = 0;
215 u8 fstate;
216 u16 pcierr;
217 u32 pe_no;
218 int result;
219 struct pci_controller *hose = pe->phb;
220 struct pnv_phb *phb = hose->private_data;
221
222 /*
223 * Sanity check on PE address. The PHB PE address should
224 * be zero.
225 */
226 if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) {
227 pr_err("%s: PE address %x out of range [0, %x] "
228 "on PHB#%x\n",
229 __func__, pe->addr, phb->ioda.total_pe,
230 hose->global_number);
231 return EEH_STATE_NOT_SUPPORT;
232 }
233
234 /* Retrieve PE status through OPAL */
235 pe_no = pe->addr;
236 ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no,
237 &fstate, &pcierr, NULL);
238 if (ret) {
239 pr_err("%s: Failed to get EEH status on "
240 "PHB#%x-PE#%x\n, err=%lld\n",
241 __func__, hose->global_number, pe_no, ret);
242 return EEH_STATE_NOT_SUPPORT;
243 }
244
245 /* Check PHB status */
246 if (pe->type & EEH_PE_PHB) {
247 result = 0;
248 result &= ~EEH_STATE_RESET_ACTIVE;
249
250 if (pcierr != OPAL_EEH_PHB_ERROR) {
251 result |= EEH_STATE_MMIO_ACTIVE;
252 result |= EEH_STATE_DMA_ACTIVE;
253 result |= EEH_STATE_MMIO_ENABLED;
254 result |= EEH_STATE_DMA_ENABLED;
255 }
256
257 return result;
258 }
259
260 /* Parse result out */
261 result = 0;
262 switch (fstate) {
263 case OPAL_EEH_STOPPED_NOT_FROZEN:
264 result &= ~EEH_STATE_RESET_ACTIVE;
265 result |= EEH_STATE_MMIO_ACTIVE;
266 result |= EEH_STATE_DMA_ACTIVE;
267 result |= EEH_STATE_MMIO_ENABLED;
268 result |= EEH_STATE_DMA_ENABLED;
269 break;
270 case OPAL_EEH_STOPPED_MMIO_FREEZE:
271 result &= ~EEH_STATE_RESET_ACTIVE;
272 result |= EEH_STATE_DMA_ACTIVE;
273 result |= EEH_STATE_DMA_ENABLED;
274 break;
275 case OPAL_EEH_STOPPED_DMA_FREEZE:
276 result &= ~EEH_STATE_RESET_ACTIVE;
277 result |= EEH_STATE_MMIO_ACTIVE;
278 result |= EEH_STATE_MMIO_ENABLED;
279 break;
280 case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
281 result &= ~EEH_STATE_RESET_ACTIVE;
282 break;
283 case OPAL_EEH_STOPPED_RESET:
284 result |= EEH_STATE_RESET_ACTIVE;
285 break;
286 case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
287 result |= EEH_STATE_UNAVAILABLE;
288 break;
289 case OPAL_EEH_STOPPED_PERM_UNAVAIL:
290 result |= EEH_STATE_NOT_SUPPORT;
291 break;
292 default:
293 pr_warning("%s: Unexpected EEH status 0x%x "
294 "on PHB#%x-PE#%x\n",
295 __func__, fstate, hose->global_number, pe_no);
296 }
297
298 return result;
299}
300
301static int ioda_eeh_pe_clear(struct eeh_pe *pe)
302{
303 struct pci_controller *hose;
304 struct pnv_phb *phb;
305 u32 pe_no;
306 u8 fstate;
307 u16 pcierr;
308 s64 ret;
309
310 pe_no = pe->addr;
311 hose = pe->phb;
312 phb = pe->phb->private_data;
313
314 /* Clear the EEH error on the PE */
315 ret = opal_pci_eeh_freeze_clear(phb->opal_id,
316 pe_no, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
317 if (ret) {
318 pr_err("%s: Failed to clear EEH error for "
319 "PHB#%x-PE#%x, err=%lld\n",
320 __func__, hose->global_number, pe_no, ret);
321 return -EIO;
322 }
323
324 /*
325 * Read the PE state back and verify that the frozen
326 * state has been removed.
327 */
328 ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no,
329 &fstate, &pcierr, NULL);
330 if (ret) {
331 pr_err("%s: Failed to get EEH status on "
332 "PHB#%x-PE#%x\n, err=%lld\n",
333 __func__, hose->global_number, pe_no, ret);
334 return -EIO;
335 }
336
337 if (fstate != OPAL_EEH_STOPPED_NOT_FROZEN) {
338 pr_err("%s: Frozen state not cleared on "
339 "PHB#%x-PE#%x, sts=%x\n",
340 __func__, hose->global_number, pe_no, fstate);
341 return -EIO;
342 }
343
344 return 0;
345}
346
347static s64 ioda_eeh_phb_poll(struct pnv_phb *phb)
348{
349 s64 rc = OPAL_HARDWARE;
350
351 while (1) {
352 rc = opal_pci_poll(phb->opal_id);
353 if (rc <= 0)
354 break;
355
356 msleep(rc);
357 }
358
359 return rc;
360}
361
362static int ioda_eeh_phb_reset(struct pci_controller *hose, int option)
363{
364 struct pnv_phb *phb = hose->private_data;
365 s64 rc = OPAL_HARDWARE;
366
367 pr_debug("%s: Reset PHB#%x, option=%d\n",
368 __func__, hose->global_number, option);
369
370 /* Issue PHB complete reset request */
371 if (option == EEH_RESET_FUNDAMENTAL ||
372 option == EEH_RESET_HOT)
373 rc = opal_pci_reset(phb->opal_id,
374 OPAL_PHB_COMPLETE,
375 OPAL_ASSERT_RESET);
376 else if (option == EEH_RESET_DEACTIVATE)
377 rc = opal_pci_reset(phb->opal_id,
378 OPAL_PHB_COMPLETE,
379 OPAL_DEASSERT_RESET);
380 if (rc < 0)
381 goto out;
382
383 /*
384 * Poll state of the PHB until the request is done
385 * successfully.
386 */
387 rc = ioda_eeh_phb_poll(phb);
388out:
389 if (rc != OPAL_SUCCESS)
390 return -EIO;
391
392 return 0;
393}
394
395static int ioda_eeh_root_reset(struct pci_controller *hose, int option)
396{
397 struct pnv_phb *phb = hose->private_data;
398 s64 rc = OPAL_SUCCESS;
399
400 pr_debug("%s: Reset PHB#%x, option=%d\n",
401 __func__, hose->global_number, option);
402
403 /*
404 * During the reset deassert time, we needn't care
405 * the reset scope because the firmware does nothing
406 * for fundamental or hot reset during deassert phase.
407 */
408 if (option == EEH_RESET_FUNDAMENTAL)
409 rc = opal_pci_reset(phb->opal_id,
410 OPAL_PCI_FUNDAMENTAL_RESET,
411 OPAL_ASSERT_RESET);
412 else if (option == EEH_RESET_HOT)
413 rc = opal_pci_reset(phb->opal_id,
414 OPAL_PCI_HOT_RESET,
415 OPAL_ASSERT_RESET);
416 else if (option == EEH_RESET_DEACTIVATE)
417 rc = opal_pci_reset(phb->opal_id,
418 OPAL_PCI_HOT_RESET,
419 OPAL_DEASSERT_RESET);
420 if (rc < 0)
421 goto out;
422
423 /* Poll state of the PHB until the request is done */
424 rc = ioda_eeh_phb_poll(phb);
425out:
426 if (rc != OPAL_SUCCESS)
427 return -EIO;
428
429 return 0;
430}
431
432static int ioda_eeh_bridge_reset(struct pci_controller *hose,
433 struct pci_dev *dev, int option)
434{
435 u16 ctrl;
436
437 pr_debug("%s: Reset device %04x:%02x:%02x.%01x with option %d\n",
438 __func__, hose->global_number, dev->bus->number,
439 PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), option);
440
441 switch (option) {
442 case EEH_RESET_FUNDAMENTAL:
443 case EEH_RESET_HOT:
444 pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl);
445 ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
446 pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
447 break;
448 case EEH_RESET_DEACTIVATE:
449 pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl);
450 ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
451 pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
452 break;
453 }
454
455 return 0;
456}
457
458/**
459 * ioda_eeh_reset - Reset the indicated PE
460 * @pe: EEH PE
461 * @option: reset option
462 *
463 * Do reset on the indicated PE. For PCI bus sensitive PE,
464 * we need to reset the parent p2p bridge. The PHB has to
465 * be reinitialized if the p2p bridge is root bridge. For
466 * PCI device sensitive PE, we will try to reset the device
467 * through FLR. For now, we don't have OPAL APIs to do HARD
468 * reset yet, so all reset would be SOFT (HOT) reset.
469 */
470static int ioda_eeh_reset(struct eeh_pe *pe, int option)
471{
472 struct pci_controller *hose = pe->phb;
473 struct eeh_dev *edev;
474 struct pci_dev *dev;
475 int ret;
476
477 /*
478 * Anyway, we have to clear the problematic state for the
479 * corresponding PE. However, we needn't do it if the PE
480 * is PHB associated. That means the PHB is having fatal
481 * errors and it needs reset. Further more, the AIB interface
482 * isn't reliable any more.
483 */
484 if (!(pe->type & EEH_PE_PHB) &&
485 (option == EEH_RESET_HOT ||
486 option == EEH_RESET_FUNDAMENTAL)) {
487 ret = ioda_eeh_pe_clear(pe);
488 if (ret)
489 return -EIO;
490 }
491
492 /*
493 * The rules applied to reset, either fundamental or hot reset:
494 *
495 * We always reset the direct upstream bridge of the PE. If the
496 * direct upstream bridge isn't root bridge, we always take hot
497 * reset no matter what option (fundamental or hot) is. Otherwise,
498 * we should do the reset according to the required option.
499 */
500 if (pe->type & EEH_PE_PHB) {
501 ret = ioda_eeh_phb_reset(hose, option);
502 } else {
503 if (pe->type & EEH_PE_DEVICE) {
504 /*
505 * If it's device PE, we didn't refer to the parent
506 * PCI bus yet. So we have to figure it out indirectly.
507 */
508 edev = list_first_entry(&pe->edevs,
509 struct eeh_dev, list);
510 dev = eeh_dev_to_pci_dev(edev);
511 dev = dev->bus->self;
512 } else {
513 /*
514 * If it's bus PE, the parent PCI bus is already there
515 * and just pick it up.
516 */
517 dev = pe->bus->self;
518 }
519
520 /*
521 * Do reset based on the fact that the direct upstream bridge
522 * is root bridge (port) or not.
523 */
524 if (dev->bus->number == 0)
525 ret = ioda_eeh_root_reset(hose, option);
526 else
527 ret = ioda_eeh_bridge_reset(hose, dev, option);
528 }
529
530 return ret;
531}
532
533/**
534 * ioda_eeh_get_log - Retrieve error log
535 * @pe: EEH PE
536 * @severity: Severity level of the log
537 * @drv_log: buffer to store the log
538 * @len: space of the log buffer
539 *
540 * The function is used to retrieve error log from P7IOC.
541 */
542static int ioda_eeh_get_log(struct eeh_pe *pe, int severity,
543 char *drv_log, unsigned long len)
544{
545 s64 ret;
546 unsigned long flags;
547 struct pci_controller *hose = pe->phb;
548 struct pnv_phb *phb = hose->private_data;
549
550 spin_lock_irqsave(&phb->lock, flags);
551
552 ret = opal_pci_get_phb_diag_data2(phb->opal_id,
553 phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE);
554 if (ret) {
555 spin_unlock_irqrestore(&phb->lock, flags);
556 pr_warning("%s: Failed to get log for PHB#%x-PE#%x\n",
557 __func__, hose->global_number, pe->addr);
558 return -EIO;
559 }
560
561 /*
562 * FIXME: We probably need log the error in somewhere.
563 * Lets make it up in future.
564 */
565 /* pr_info("%s", phb->diag.blob); */
566
567 spin_unlock_irqrestore(&phb->lock, flags);
568
569 return 0;
570}
571
572/**
573 * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE
574 * @pe: EEH PE
575 *
576 * For particular PE, it might have included PCI bridges. In order
577 * to make the PE work properly, those PCI bridges should be configured
578 * correctly. However, we need do nothing on P7IOC since the reset
579 * function will do everything that should be covered by the function.
580 */
581static int ioda_eeh_configure_bridge(struct eeh_pe *pe)
582{
583 return 0;
584}
585
586static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data)
587{
588 /* GEM */
589 pr_info(" GEM XFIR: %016llx\n", data->gemXfir);
590 pr_info(" GEM RFIR: %016llx\n", data->gemRfir);
591 pr_info(" GEM RIRQFIR: %016llx\n", data->gemRirqfir);
592 pr_info(" GEM Mask: %016llx\n", data->gemMask);
593 pr_info(" GEM RWOF: %016llx\n", data->gemRwof);
594
595 /* LEM */
596 pr_info(" LEM FIR: %016llx\n", data->lemFir);
597 pr_info(" LEM Error Mask: %016llx\n", data->lemErrMask);
598 pr_info(" LEM Action 0: %016llx\n", data->lemAction0);
599 pr_info(" LEM Action 1: %016llx\n", data->lemAction1);
600 pr_info(" LEM WOF: %016llx\n", data->lemWof);
601}
602
603static void ioda_eeh_hub_diag(struct pci_controller *hose)
604{
605 struct pnv_phb *phb = hose->private_data;
606 struct OpalIoP7IOCErrorData *data;
607 long rc;
608
609 data = (struct OpalIoP7IOCErrorData *)ioda_eeh_hub_diag;
610 rc = opal_pci_get_hub_diag_data(phb->hub_id, data, PAGE_SIZE);
611 if (rc != OPAL_SUCCESS) {
612 pr_warning("%s: Failed to get HUB#%llx diag-data (%ld)\n",
613 __func__, phb->hub_id, rc);
614 return;
615 }
616
617 switch (data->type) {
618 case OPAL_P7IOC_DIAG_TYPE_RGC:
619 pr_info("P7IOC diag-data for RGC\n\n");
620 ioda_eeh_hub_diag_common(data);
621 pr_info(" RGC Status: %016llx\n", data->rgc.rgcStatus);
622 pr_info(" RGC LDCP: %016llx\n", data->rgc.rgcLdcp);
623 break;
624 case OPAL_P7IOC_DIAG_TYPE_BI:
625 pr_info("P7IOC diag-data for BI %s\n\n",
626 data->bi.biDownbound ? "Downbound" : "Upbound");
627 ioda_eeh_hub_diag_common(data);
628 pr_info(" BI LDCP 0: %016llx\n", data->bi.biLdcp0);
629 pr_info(" BI LDCP 1: %016llx\n", data->bi.biLdcp1);
630 pr_info(" BI LDCP 2: %016llx\n", data->bi.biLdcp2);
631 pr_info(" BI Fence Status: %016llx\n", data->bi.biFenceStatus);
632 break;
633 case OPAL_P7IOC_DIAG_TYPE_CI:
634 pr_info("P7IOC diag-data for CI Port %d\\nn",
635 data->ci.ciPort);
636 ioda_eeh_hub_diag_common(data);
637 pr_info(" CI Port Status: %016llx\n", data->ci.ciPortStatus);
638 pr_info(" CI Port LDCP: %016llx\n", data->ci.ciPortLdcp);
639 break;
640 case OPAL_P7IOC_DIAG_TYPE_MISC:
641 pr_info("P7IOC diag-data for MISC\n\n");
642 ioda_eeh_hub_diag_common(data);
643 break;
644 case OPAL_P7IOC_DIAG_TYPE_I2C:
645 pr_info("P7IOC diag-data for I2C\n\n");
646 ioda_eeh_hub_diag_common(data);
647 break;
648 default:
649 pr_warning("%s: Invalid type of HUB#%llx diag-data (%d)\n",
650 __func__, phb->hub_id, data->type);
651 }
652}
653
654static void ioda_eeh_p7ioc_phb_diag(struct pci_controller *hose,
655 struct OpalIoPhbErrorCommon *common)
656{
657 struct OpalIoP7IOCPhbErrorData *data;
658 int i;
659
660 data = (struct OpalIoP7IOCPhbErrorData *)common;
661
662 pr_info("P7IOC PHB#%x Diag-data (Version: %d)\n\n",
663 hose->global_number, common->version);
664
665 pr_info(" brdgCtl: %08x\n", data->brdgCtl);
666
667 pr_info(" portStatusReg: %08x\n", data->portStatusReg);
668 pr_info(" rootCmplxStatus: %08x\n", data->rootCmplxStatus);
669 pr_info(" busAgentStatus: %08x\n", data->busAgentStatus);
670
671 pr_info(" deviceStatus: %08x\n", data->deviceStatus);
672 pr_info(" slotStatus: %08x\n", data->slotStatus);
673 pr_info(" linkStatus: %08x\n", data->linkStatus);
674 pr_info(" devCmdStatus: %08x\n", data->devCmdStatus);
675 pr_info(" devSecStatus: %08x\n", data->devSecStatus);
676
677 pr_info(" rootErrorStatus: %08x\n", data->rootErrorStatus);
678 pr_info(" uncorrErrorStatus: %08x\n", data->uncorrErrorStatus);
679 pr_info(" corrErrorStatus: %08x\n", data->corrErrorStatus);
680 pr_info(" tlpHdr1: %08x\n", data->tlpHdr1);
681 pr_info(" tlpHdr2: %08x\n", data->tlpHdr2);
682 pr_info(" tlpHdr3: %08x\n", data->tlpHdr3);
683 pr_info(" tlpHdr4: %08x\n", data->tlpHdr4);
684 pr_info(" sourceId: %08x\n", data->sourceId);
685
686 pr_info(" errorClass: %016llx\n", data->errorClass);
687 pr_info(" correlator: %016llx\n", data->correlator);
688 pr_info(" p7iocPlssr: %016llx\n", data->p7iocPlssr);
689 pr_info(" p7iocCsr: %016llx\n", data->p7iocCsr);
690 pr_info(" lemFir: %016llx\n", data->lemFir);
691 pr_info(" lemErrorMask: %016llx\n", data->lemErrorMask);
692 pr_info(" lemWOF: %016llx\n", data->lemWOF);
693 pr_info(" phbErrorStatus: %016llx\n", data->phbErrorStatus);
694 pr_info(" phbFirstErrorStatus: %016llx\n", data->phbFirstErrorStatus);
695 pr_info(" phbErrorLog0: %016llx\n", data->phbErrorLog0);
696 pr_info(" phbErrorLog1: %016llx\n", data->phbErrorLog1);
697 pr_info(" mmioErrorStatus: %016llx\n", data->mmioErrorStatus);
698 pr_info(" mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus);
699 pr_info(" mmioErrorLog0: %016llx\n", data->mmioErrorLog0);
700 pr_info(" mmioErrorLog1: %016llx\n", data->mmioErrorLog1);
701 pr_info(" dma0ErrorStatus: %016llx\n", data->dma0ErrorStatus);
702 pr_info(" dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus);
703 pr_info(" dma0ErrorLog0: %016llx\n", data->dma0ErrorLog0);
704 pr_info(" dma0ErrorLog1: %016llx\n", data->dma0ErrorLog1);
705 pr_info(" dma1ErrorStatus: %016llx\n", data->dma1ErrorStatus);
706 pr_info(" dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus);
707 pr_info(" dma1ErrorLog0: %016llx\n", data->dma1ErrorLog0);
708 pr_info(" dma1ErrorLog1: %016llx\n", data->dma1ErrorLog1);
709
710 for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
711 if ((data->pestA[i] >> 63) == 0 &&
712 (data->pestB[i] >> 63) == 0)
713 continue;
714
715 pr_info(" PE[%3d] PESTA: %016llx\n", i, data->pestA[i]);
716 pr_info(" PESTB: %016llx\n", data->pestB[i]);
717 }
718}
719
720static void ioda_eeh_phb_diag(struct pci_controller *hose)
721{
722 struct pnv_phb *phb = hose->private_data;
723 struct OpalIoPhbErrorCommon *common;
724 long rc;
725
726 common = (struct OpalIoPhbErrorCommon *)phb->diag.blob;
727 rc = opal_pci_get_phb_diag_data2(phb->opal_id, common, PAGE_SIZE);
728 if (rc != OPAL_SUCCESS) {
729 pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n",
730 __func__, hose->global_number, rc);
731 return;
732 }
733
734 switch (common->ioType) {
735 case OPAL_PHB_ERROR_DATA_TYPE_P7IOC:
736 ioda_eeh_p7ioc_phb_diag(hose, common);
737 break;
738 default:
739 pr_warning("%s: Unrecognized I/O chip %d\n",
740 __func__, common->ioType);
741 }
742}
743
744static int ioda_eeh_get_phb_pe(struct pci_controller *hose,
745 struct eeh_pe **pe)
746{
747 struct eeh_pe *phb_pe;
748
749 phb_pe = eeh_phb_pe_get(hose);
750 if (!phb_pe) {
751 pr_warning("%s Can't find PE for PHB#%d\n",
752 __func__, hose->global_number);
753 return -EEXIST;
754 }
755
756 *pe = phb_pe;
757 return 0;
758}
759
760static int ioda_eeh_get_pe(struct pci_controller *hose,
761 u16 pe_no, struct eeh_pe **pe)
762{
763 struct eeh_pe *phb_pe, *dev_pe;
764 struct eeh_dev dev;
765
766 /* Find the PHB PE */
767 if (ioda_eeh_get_phb_pe(hose, &phb_pe))
768 return -EEXIST;
769
770 /* Find the PE according to PE# */
771 memset(&dev, 0, sizeof(struct eeh_dev));
772 dev.phb = hose;
773 dev.pe_config_addr = pe_no;
774 dev_pe = eeh_pe_get(&dev);
775 if (!dev_pe) {
776 pr_warning("%s: Can't find PE for PHB#%x - PE#%x\n",
777 __func__, hose->global_number, pe_no);
778 return -EEXIST;
779 }
780
781 *pe = dev_pe;
782 return 0;
783}
784
785/**
786 * ioda_eeh_next_error - Retrieve next error for EEH core to handle
787 * @pe: The affected PE
788 *
789 * The function is expected to be called by EEH core while it gets
790 * special EEH event (without binding PE). The function calls to
791 * OPAL APIs for next error to handle. The informational error is
792 * handled internally by platform. However, the dead IOC, dead PHB,
793 * fenced PHB and frozen PE should be handled by EEH core eventually.
794 */
795static int ioda_eeh_next_error(struct eeh_pe **pe)
796{
797 struct pci_controller *hose, *tmp;
798 struct pnv_phb *phb;
799 u64 frozen_pe_no;
800 u16 err_type, severity;
801 long rc;
802 int ret = 1;
803
804 /*
805 * While running here, it's safe to purge the event queue.
806 * And we should keep the cached OPAL notifier event sychronized
807 * between the kernel and firmware.
808 */
809 eeh_remove_event(NULL);
810 opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
811
812 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
813 /*
814 * If the subordinate PCI buses of the PHB has been
815 * removed, we needn't take care of it any more.
816 */
817 phb = hose->private_data;
818 if (phb->eeh_state & PNV_EEH_STATE_REMOVED)
819 continue;
820
821 rc = opal_pci_next_error(phb->opal_id,
822 &frozen_pe_no, &err_type, &severity);
823
824 /* If OPAL API returns error, we needn't proceed */
825 if (rc != OPAL_SUCCESS) {
826 IODA_EEH_DBG("%s: Invalid return value on "
827 "PHB#%x (0x%lx) from opal_pci_next_error",
828 __func__, hose->global_number, rc);
829 continue;
830 }
831
832 /* If the PHB doesn't have error, stop processing */
833 if (err_type == OPAL_EEH_NO_ERROR ||
834 severity == OPAL_EEH_SEV_NO_ERROR) {
835 IODA_EEH_DBG("%s: No error found on PHB#%x\n",
836 __func__, hose->global_number);
837 continue;
838 }
839
840 /*
841 * Processing the error. We're expecting the error with
842 * highest priority reported upon multiple errors on the
843 * specific PHB.
844 */
845 IODA_EEH_DBG("%s: Error (%d, %d, %d) on PHB#%x\n",
846 err_type, severity, pe_no, hose->global_number);
847 switch (err_type) {
848 case OPAL_EEH_IOC_ERROR:
849 if (severity == OPAL_EEH_SEV_IOC_DEAD) {
850 list_for_each_entry_safe(hose, tmp,
851 &hose_list, list_node) {
852 phb = hose->private_data;
853 phb->eeh_state |= PNV_EEH_STATE_REMOVED;
854 }
855
856 pr_err("EEH: dead IOC detected\n");
857 ret = 4;
858 goto out;
859 } else if (severity == OPAL_EEH_SEV_INF) {
860 pr_info("EEH: IOC informative error "
861 "detected\n");
862 ioda_eeh_hub_diag(hose);
863 }
864
865 break;
866 case OPAL_EEH_PHB_ERROR:
867 if (severity == OPAL_EEH_SEV_PHB_DEAD) {
868 if (ioda_eeh_get_phb_pe(hose, pe))
869 break;
870
871 pr_err("EEH: dead PHB#%x detected\n",
872 hose->global_number);
873 phb->eeh_state |= PNV_EEH_STATE_REMOVED;
874 ret = 3;
875 goto out;
876 } else if (severity == OPAL_EEH_SEV_PHB_FENCED) {
877 if (ioda_eeh_get_phb_pe(hose, pe))
878 break;
879
880 pr_err("EEH: fenced PHB#%x detected\n",
881 hose->global_number);
882 ret = 2;
883 goto out;
884 } else if (severity == OPAL_EEH_SEV_INF) {
885 pr_info("EEH: PHB#%x informative error "
886 "detected\n",
887 hose->global_number);
888 ioda_eeh_phb_diag(hose);
889 }
890
891 break;
892 case OPAL_EEH_PE_ERROR:
893 if (ioda_eeh_get_pe(hose, frozen_pe_no, pe))
894 break;
895
896 pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
897 (*pe)->addr, (*pe)->phb->global_number);
898 ret = 1;
899 goto out;
900 }
901 }
902
903 ret = 0;
904out:
905 return ret;
906}
907
908struct pnv_eeh_ops ioda_eeh_ops = {
909 .post_init = ioda_eeh_post_init,
910 .set_option = ioda_eeh_set_option,
911 .get_state = ioda_eeh_get_state,
912 .reset = ioda_eeh_reset,
913 .get_log = ioda_eeh_get_log,
914 .configure_bridge = ioda_eeh_configure_bridge,
915 .next_error = ioda_eeh_next_error
916};
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
new file mode 100644
index 000000000000..969cce73055a
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -0,0 +1,379 @@
1/*
2 * The file intends to implement the platform dependent EEH operations on
3 * powernv platform. Actually, the powernv was created in order to fully
4 * hypervisor support.
5 *
6 * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 */
13
14#include <linux/atomic.h>
15#include <linux/delay.h>
16#include <linux/export.h>
17#include <linux/init.h>
18#include <linux/list.h>
19#include <linux/msi.h>
20#include <linux/of.h>
21#include <linux/pci.h>
22#include <linux/proc_fs.h>
23#include <linux/rbtree.h>
24#include <linux/sched.h>
25#include <linux/seq_file.h>
26#include <linux/spinlock.h>
27
28#include <asm/eeh.h>
29#include <asm/eeh_event.h>
30#include <asm/firmware.h>
31#include <asm/io.h>
32#include <asm/iommu.h>
33#include <asm/machdep.h>
34#include <asm/msi_bitmap.h>
35#include <asm/opal.h>
36#include <asm/ppc-pci.h>
37
38#include "powernv.h"
39#include "pci.h"
40
41/**
42 * powernv_eeh_init - EEH platform dependent initialization
43 *
44 * EEH platform dependent initialization on powernv
45 */
46static int powernv_eeh_init(void)
47{
48 /* We require OPALv3 */
49 if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
50 pr_warning("%s: OPALv3 is required !\n", __func__);
51 return -EINVAL;
52 }
53
54 /* Set EEH probe mode */
55 eeh_probe_mode_set(EEH_PROBE_MODE_DEV);
56
57 return 0;
58}
59
60/**
61 * powernv_eeh_post_init - EEH platform dependent post initialization
62 *
63 * EEH platform dependent post initialization on powernv. When
64 * the function is called, the EEH PEs and devices should have
65 * been built. If the I/O cache staff has been built, EEH is
66 * ready to supply service.
67 */
68static int powernv_eeh_post_init(void)
69{
70 struct pci_controller *hose;
71 struct pnv_phb *phb;
72 int ret = 0;
73
74 list_for_each_entry(hose, &hose_list, list_node) {
75 phb = hose->private_data;
76
77 if (phb->eeh_ops && phb->eeh_ops->post_init) {
78 ret = phb->eeh_ops->post_init(hose);
79 if (ret)
80 break;
81 }
82 }
83
84 return ret;
85}
86
87/**
88 * powernv_eeh_dev_probe - Do probe on PCI device
89 * @dev: PCI device
90 * @flag: unused
91 *
92 * When EEH module is installed during system boot, all PCI devices
93 * are checked one by one to see if it supports EEH. The function
94 * is introduced for the purpose. By default, EEH has been enabled
95 * on all PCI devices. That's to say, we only need do necessary
96 * initialization on the corresponding eeh device and create PE
97 * accordingly.
98 *
99 * It's notable that's unsafe to retrieve the EEH device through
100 * the corresponding PCI device. During the PCI device hotplug, which
101 * was possiblly triggered by EEH core, the binding between EEH device
102 * and the PCI device isn't built yet.
103 */
104static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
105{
106 struct pci_controller *hose = pci_bus_to_host(dev->bus);
107 struct pnv_phb *phb = hose->private_data;
108 struct device_node *dn = pci_device_to_OF_node(dev);
109 struct eeh_dev *edev = of_node_to_eeh_dev(dn);
110
111 /*
112 * When probing the root bridge, which doesn't have any
113 * subordinate PCI devices. We don't have OF node for
114 * the root bridge. So it's not reasonable to continue
115 * the probing.
116 */
117 if (!dn || !edev)
118 return 0;
119
120 /* Skip for PCI-ISA bridge */
121 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
122 return 0;
123
124 /* Initialize eeh device */
125 edev->class_code = dev->class;
126 edev->mode = 0;
127 edev->config_addr = ((dev->bus->number << 8) | dev->devfn);
128 edev->pe_config_addr = phb->bdfn_to_pe(phb, dev->bus, dev->devfn & 0xff);
129
130 /* Create PE */
131 eeh_add_to_parent_pe(edev);
132
133 /*
134 * Enable EEH explicitly so that we will do EEH check
135 * while accessing I/O stuff
136 *
137 * FIXME: Enable that for PHB3 later
138 */
139 if (phb->type == PNV_PHB_IODA1)
140 eeh_subsystem_enabled = 1;
141
142 /* Save memory bars */
143 eeh_save_bars(edev);
144
145 return 0;
146}
147
148/**
149 * powernv_eeh_set_option - Initialize EEH or MMIO/DMA reenable
150 * @pe: EEH PE
151 * @option: operation to be issued
152 *
153 * The function is used to control the EEH functionality globally.
154 * Currently, following options are support according to PAPR:
155 * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
156 */
157static int powernv_eeh_set_option(struct eeh_pe *pe, int option)
158{
159 struct pci_controller *hose = pe->phb;
160 struct pnv_phb *phb = hose->private_data;
161 int ret = -EEXIST;
162
163 /*
164 * What we need do is pass it down for hardware
165 * implementation to handle it.
166 */
167 if (phb->eeh_ops && phb->eeh_ops->set_option)
168 ret = phb->eeh_ops->set_option(pe, option);
169
170 return ret;
171}
172
173/**
174 * powernv_eeh_get_pe_addr - Retrieve PE address
175 * @pe: EEH PE
176 *
177 * Retrieve the PE address according to the given tranditional
178 * PCI BDF (Bus/Device/Function) address.
179 */
180static int powernv_eeh_get_pe_addr(struct eeh_pe *pe)
181{
182 return pe->addr;
183}
184
185/**
186 * powernv_eeh_get_state - Retrieve PE state
187 * @pe: EEH PE
188 * @delay: delay while PE state is temporarily unavailable
189 *
190 * Retrieve the state of the specified PE. For IODA-compitable
191 * platform, it should be retrieved from IODA table. Therefore,
192 * we prefer passing down to hardware implementation to handle
193 * it.
194 */
195static int powernv_eeh_get_state(struct eeh_pe *pe, int *delay)
196{
197 struct pci_controller *hose = pe->phb;
198 struct pnv_phb *phb = hose->private_data;
199 int ret = EEH_STATE_NOT_SUPPORT;
200
201 if (phb->eeh_ops && phb->eeh_ops->get_state) {
202 ret = phb->eeh_ops->get_state(pe);
203
204 /*
205 * If the PE state is temporarily unavailable,
206 * to inform the EEH core delay for default
207 * period (1 second)
208 */
209 if (delay) {
210 *delay = 0;
211 if (ret & EEH_STATE_UNAVAILABLE)
212 *delay = 1000;
213 }
214 }
215
216 return ret;
217}
218
219/**
220 * powernv_eeh_reset - Reset the specified PE
221 * @pe: EEH PE
222 * @option: reset option
223 *
224 * Reset the specified PE
225 */
226static int powernv_eeh_reset(struct eeh_pe *pe, int option)
227{
228 struct pci_controller *hose = pe->phb;
229 struct pnv_phb *phb = hose->private_data;
230 int ret = -EEXIST;
231
232 if (phb->eeh_ops && phb->eeh_ops->reset)
233 ret = phb->eeh_ops->reset(pe, option);
234
235 return ret;
236}
237
238/**
239 * powernv_eeh_wait_state - Wait for PE state
240 * @pe: EEH PE
241 * @max_wait: maximal period in microsecond
242 *
243 * Wait for the state of associated PE. It might take some time
244 * to retrieve the PE's state.
245 */
246static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
247{
248 int ret;
249 int mwait;
250
251 while (1) {
252 ret = powernv_eeh_get_state(pe, &mwait);
253
254 /*
255 * If the PE's state is temporarily unavailable,
256 * we have to wait for the specified time. Otherwise,
257 * the PE's state will be returned immediately.
258 */
259 if (ret != EEH_STATE_UNAVAILABLE)
260 return ret;
261
262 max_wait -= mwait;
263 if (max_wait <= 0) {
264 pr_warning("%s: Timeout getting PE#%x's state (%d)\n",
265 __func__, pe->addr, max_wait);
266 return EEH_STATE_NOT_SUPPORT;
267 }
268
269 msleep(mwait);
270 }
271
272 return EEH_STATE_NOT_SUPPORT;
273}
274
275/**
276 * powernv_eeh_get_log - Retrieve error log
277 * @pe: EEH PE
278 * @severity: temporary or permanent error log
279 * @drv_log: driver log to be combined with retrieved error log
280 * @len: length of driver log
281 *
282 * Retrieve the temporary or permanent error from the PE.
283 */
284static int powernv_eeh_get_log(struct eeh_pe *pe, int severity,
285 char *drv_log, unsigned long len)
286{
287 struct pci_controller *hose = pe->phb;
288 struct pnv_phb *phb = hose->private_data;
289 int ret = -EEXIST;
290
291 if (phb->eeh_ops && phb->eeh_ops->get_log)
292 ret = phb->eeh_ops->get_log(pe, severity, drv_log, len);
293
294 return ret;
295}
296
297/**
298 * powernv_eeh_configure_bridge - Configure PCI bridges in the indicated PE
299 * @pe: EEH PE
300 *
301 * The function will be called to reconfigure the bridges included
302 * in the specified PE so that the mulfunctional PE would be recovered
303 * again.
304 */
305static int powernv_eeh_configure_bridge(struct eeh_pe *pe)
306{
307 struct pci_controller *hose = pe->phb;
308 struct pnv_phb *phb = hose->private_data;
309 int ret = 0;
310
311 if (phb->eeh_ops && phb->eeh_ops->configure_bridge)
312 ret = phb->eeh_ops->configure_bridge(pe);
313
314 return ret;
315}
316
317/**
318 * powernv_eeh_next_error - Retrieve next EEH error to handle
319 * @pe: Affected PE
320 *
321 * Using OPAL API, to retrieve next EEH error for EEH core to handle
322 */
323static int powernv_eeh_next_error(struct eeh_pe **pe)
324{
325 struct pci_controller *hose;
326 struct pnv_phb *phb = NULL;
327
328 list_for_each_entry(hose, &hose_list, list_node) {
329 phb = hose->private_data;
330 break;
331 }
332
333 if (phb && phb->eeh_ops->next_error)
334 return phb->eeh_ops->next_error(pe);
335
336 return -EEXIST;
337}
338
339static struct eeh_ops powernv_eeh_ops = {
340 .name = "powernv",
341 .init = powernv_eeh_init,
342 .post_init = powernv_eeh_post_init,
343 .of_probe = NULL,
344 .dev_probe = powernv_eeh_dev_probe,
345 .set_option = powernv_eeh_set_option,
346 .get_pe_addr = powernv_eeh_get_pe_addr,
347 .get_state = powernv_eeh_get_state,
348 .reset = powernv_eeh_reset,
349 .wait_state = powernv_eeh_wait_state,
350 .get_log = powernv_eeh_get_log,
351 .configure_bridge = powernv_eeh_configure_bridge,
352 .read_config = pnv_pci_cfg_read,
353 .write_config = pnv_pci_cfg_write,
354 .next_error = powernv_eeh_next_error
355};
356
357/**
358 * eeh_powernv_init - Register platform dependent EEH operations
359 *
360 * EEH initialization on powernv platform. This function should be
361 * called before any EEH related functions.
362 */
363static int __init eeh_powernv_init(void)
364{
365 int ret = -EINVAL;
366
367 if (!machine_is(powernv))
368 return ret;
369
370 ret = eeh_ops_register(&powernv_eeh_ops);
371 if (!ret)
372 pr_info("EEH: PowerNV platform initialized\n");
373 else
374 pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret);
375
376 return ret;
377}
378
379early_initcall(eeh_powernv_init);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 6fabe92eafb6..e88863ffb135 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -107,4 +107,7 @@ OPAL_CALL(opal_pci_mask_pe_error, OPAL_PCI_MASK_PE_ERROR);
107OPAL_CALL(opal_set_slot_led_status, OPAL_SET_SLOT_LED_STATUS); 107OPAL_CALL(opal_set_slot_led_status, OPAL_SET_SLOT_LED_STATUS);
108OPAL_CALL(opal_get_epow_status, OPAL_GET_EPOW_STATUS); 108OPAL_CALL(opal_get_epow_status, OPAL_GET_EPOW_STATUS);
109OPAL_CALL(opal_set_system_attention_led, OPAL_SET_SYSTEM_ATTENTION_LED); 109OPAL_CALL(opal_set_system_attention_led, OPAL_SET_SYSTEM_ATTENTION_LED);
110OPAL_CALL(opal_pci_next_error, OPAL_PCI_NEXT_ERROR);
111OPAL_CALL(opal_pci_poll, OPAL_PCI_POLL);
110OPAL_CALL(opal_pci_msi_eoi, OPAL_PCI_MSI_EOI); 112OPAL_CALL(opal_pci_msi_eoi, OPAL_PCI_MSI_EOI);
113OPAL_CALL(opal_pci_get_phb_diag_data2, OPAL_PCI_GET_PHB_DIAG_DATA2);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 628c564ceadb..106301fd2fa5 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -15,6 +15,7 @@
15#include <linux/of.h> 15#include <linux/of.h>
16#include <linux/of_platform.h> 16#include <linux/of_platform.h>
17#include <linux/interrupt.h> 17#include <linux/interrupt.h>
18#include <linux/notifier.h>
18#include <linux/slab.h> 19#include <linux/slab.h>
19#include <asm/opal.h> 20#include <asm/opal.h>
20#include <asm/firmware.h> 21#include <asm/firmware.h>
@@ -31,6 +32,10 @@ static DEFINE_SPINLOCK(opal_write_lock);
31extern u64 opal_mc_secondary_handler[]; 32extern u64 opal_mc_secondary_handler[];
32static unsigned int *opal_irqs; 33static unsigned int *opal_irqs;
33static unsigned int opal_irq_count; 34static unsigned int opal_irq_count;
35static ATOMIC_NOTIFIER_HEAD(opal_notifier_head);
36static DEFINE_SPINLOCK(opal_notifier_lock);
37static uint64_t last_notified_mask = 0x0ul;
38static atomic_t opal_notifier_hold = ATOMIC_INIT(0);
34 39
35int __init early_init_dt_scan_opal(unsigned long node, 40int __init early_init_dt_scan_opal(unsigned long node,
36 const char *uname, int depth, void *data) 41 const char *uname, int depth, void *data)
@@ -95,6 +100,68 @@ static int __init opal_register_exception_handlers(void)
95 100
96early_initcall(opal_register_exception_handlers); 101early_initcall(opal_register_exception_handlers);
97 102
103int opal_notifier_register(struct notifier_block *nb)
104{
105 if (!nb) {
106 pr_warning("%s: Invalid argument (%p)\n",
107 __func__, nb);
108 return -EINVAL;
109 }
110
111 atomic_notifier_chain_register(&opal_notifier_head, nb);
112 return 0;
113}
114
115static void opal_do_notifier(uint64_t events)
116{
117 unsigned long flags;
118 uint64_t changed_mask;
119
120 if (atomic_read(&opal_notifier_hold))
121 return;
122
123 spin_lock_irqsave(&opal_notifier_lock, flags);
124 changed_mask = last_notified_mask ^ events;
125 last_notified_mask = events;
126 spin_unlock_irqrestore(&opal_notifier_lock, flags);
127
128 /*
129 * We feed with the event bits and changed bits for
130 * enough information to the callback.
131 */
132 atomic_notifier_call_chain(&opal_notifier_head,
133 events, (void *)changed_mask);
134}
135
136void opal_notifier_update_evt(uint64_t evt_mask,
137 uint64_t evt_val)
138{
139 unsigned long flags;
140
141 spin_lock_irqsave(&opal_notifier_lock, flags);
142 last_notified_mask &= ~evt_mask;
143 last_notified_mask |= evt_val;
144 spin_unlock_irqrestore(&opal_notifier_lock, flags);
145}
146
147void opal_notifier_enable(void)
148{
149 int64_t rc;
150 uint64_t evt = 0;
151
152 atomic_set(&opal_notifier_hold, 0);
153
154 /* Process pending events */
155 rc = opal_poll_events(&evt);
156 if (rc == OPAL_SUCCESS && evt)
157 opal_do_notifier(evt);
158}
159
160void opal_notifier_disable(void)
161{
162 atomic_set(&opal_notifier_hold, 1);
163}
164
98int opal_get_chars(uint32_t vtermno, char *buf, int count) 165int opal_get_chars(uint32_t vtermno, char *buf, int count)
99{ 166{
100 s64 len, rc; 167 s64 len, rc;
@@ -297,7 +364,7 @@ static irqreturn_t opal_interrupt(int irq, void *data)
297 364
298 opal_handle_interrupt(virq_to_hw(irq), &events); 365 opal_handle_interrupt(virq_to_hw(irq), &events);
299 366
300 /* XXX TODO: Do something with the events */ 367 opal_do_notifier(events);
301 368
302 return IRQ_HANDLED; 369 return IRQ_HANDLED;
303} 370}
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 9c9d15e4cdf2..49b57b9f835d 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -13,6 +13,7 @@
13 13
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/pci.h> 15#include <linux/pci.h>
16#include <linux/debugfs.h>
16#include <linux/delay.h> 17#include <linux/delay.h>
17#include <linux/string.h> 18#include <linux/string.h>
18#include <linux/init.h> 19#include <linux/init.h>
@@ -32,6 +33,7 @@
32#include <asm/iommu.h> 33#include <asm/iommu.h>
33#include <asm/tce.h> 34#include <asm/tce.h>
34#include <asm/xics.h> 35#include <asm/xics.h>
36#include <asm/debug.h>
35 37
36#include "powernv.h" 38#include "powernv.h"
37#include "pci.h" 39#include "pci.h"
@@ -441,6 +443,17 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
441 set_iommu_table_base(&pdev->dev, &pe->tce32_table); 443 set_iommu_table_base(&pdev->dev, &pe->tce32_table);
442} 444}
443 445
446static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
447{
448 struct pci_dev *dev;
449
450 list_for_each_entry(dev, &bus->devices, bus_list) {
451 set_iommu_table_base(&dev->dev, &pe->tce32_table);
452 if (dev->subordinate)
453 pnv_ioda_setup_bus_dma(pe, dev->subordinate);
454 }
455}
456
444static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, 457static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
445 u64 *startp, u64 *endp) 458 u64 *startp, u64 *endp)
446{ 459{
@@ -595,6 +608,12 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
595 TCE_PCI_SWINV_PAIR; 608 TCE_PCI_SWINV_PAIR;
596 } 609 }
597 iommu_init_table(tbl, phb->hose->node); 610 iommu_init_table(tbl, phb->hose->node);
611 iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number);
612
613 if (pe->pdev)
614 set_iommu_table_base(&pe->pdev->dev, tbl);
615 else
616 pnv_ioda_setup_bus_dma(pe, pe->pbus);
598 617
599 return; 618 return;
600 fail: 619 fail:
@@ -667,6 +686,11 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
667 } 686 }
668 iommu_init_table(tbl, phb->hose->node); 687 iommu_init_table(tbl, phb->hose->node);
669 688
689 if (pe->pdev)
690 set_iommu_table_base(&pe->pdev->dev, tbl);
691 else
692 pnv_ioda_setup_bus_dma(pe, pe->pbus);
693
670 return; 694 return;
671fail: 695fail:
672 if (pe->tce32_seg >= 0) 696 if (pe->tce32_seg >= 0)
@@ -968,11 +992,38 @@ static void pnv_pci_ioda_setup_DMA(void)
968 } 992 }
969} 993}
970 994
995static void pnv_pci_ioda_create_dbgfs(void)
996{
997#ifdef CONFIG_DEBUG_FS
998 struct pci_controller *hose, *tmp;
999 struct pnv_phb *phb;
1000 char name[16];
1001
1002 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
1003 phb = hose->private_data;
1004
1005 sprintf(name, "PCI%04x", hose->global_number);
1006 phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root);
1007 if (!phb->dbgfs)
1008 pr_warning("%s: Error on creating debugfs on PHB#%x\n",
1009 __func__, hose->global_number);
1010 }
1011#endif /* CONFIG_DEBUG_FS */
1012}
1013
971static void pnv_pci_ioda_fixup(void) 1014static void pnv_pci_ioda_fixup(void)
972{ 1015{
973 pnv_pci_ioda_setup_PEs(); 1016 pnv_pci_ioda_setup_PEs();
974 pnv_pci_ioda_setup_seg(); 1017 pnv_pci_ioda_setup_seg();
975 pnv_pci_ioda_setup_DMA(); 1018 pnv_pci_ioda_setup_DMA();
1019
1020 pnv_pci_ioda_create_dbgfs();
1021
1022#ifdef CONFIG_EEH
1023 eeh_probe_mode_set(EEH_PROBE_MODE_DEV);
1024 eeh_addr_cache_build();
1025 eeh_init();
1026#endif
976} 1027}
977 1028
978/* 1029/*
@@ -1049,7 +1100,8 @@ static void pnv_pci_ioda_shutdown(struct pnv_phb *phb)
1049 OPAL_ASSERT_RESET); 1100 OPAL_ASSERT_RESET);
1050} 1101}
1051 1102
1052void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) 1103void __init pnv_pci_init_ioda_phb(struct device_node *np,
1104 u64 hub_id, int ioda_type)
1053{ 1105{
1054 struct pci_controller *hose; 1106 struct pci_controller *hose;
1055 static int primary = 1; 1107 static int primary = 1;
@@ -1087,6 +1139,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
1087 hose->first_busno = 0; 1139 hose->first_busno = 0;
1088 hose->last_busno = 0xff; 1140 hose->last_busno = 0xff;
1089 hose->private_data = phb; 1141 hose->private_data = phb;
1142 phb->hub_id = hub_id;
1090 phb->opal_id = phb_id; 1143 phb->opal_id = phb_id;
1091 phb->type = ioda_type; 1144 phb->type = ioda_type;
1092 1145
@@ -1172,6 +1225,9 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
1172 phb->ioda.io_size, phb->ioda.io_segsize); 1225 phb->ioda.io_size, phb->ioda.io_segsize);
1173 1226
1174 phb->hose->ops = &pnv_pci_ops; 1227 phb->hose->ops = &pnv_pci_ops;
1228#ifdef CONFIG_EEH
1229 phb->eeh_ops = &ioda_eeh_ops;
1230#endif
1175 1231
1176 /* Setup RID -> PE mapping function */ 1232 /* Setup RID -> PE mapping function */
1177 phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe; 1233 phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
@@ -1212,7 +1268,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
1212 1268
1213void pnv_pci_init_ioda2_phb(struct device_node *np) 1269void pnv_pci_init_ioda2_phb(struct device_node *np)
1214{ 1270{
1215 pnv_pci_init_ioda_phb(np, PNV_PHB_IODA2); 1271 pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
1216} 1272}
1217 1273
1218void __init pnv_pci_init_ioda_hub(struct device_node *np) 1274void __init pnv_pci_init_ioda_hub(struct device_node *np)
@@ -1235,6 +1291,6 @@ void __init pnv_pci_init_ioda_hub(struct device_node *np)
1235 for_each_child_of_node(np, phbn) { 1291 for_each_child_of_node(np, phbn) {
1236 /* Look for IODA1 PHBs */ 1292 /* Look for IODA1 PHBs */
1237 if (of_device_is_compatible(phbn, "ibm,ioda-phb")) 1293 if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
1238 pnv_pci_init_ioda_phb(phbn, PNV_PHB_IODA1); 1294 pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1);
1239 } 1295 }
1240} 1296}
diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
index 92b37a0186c9..b68db6325c1b 100644
--- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c
+++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
@@ -86,13 +86,16 @@ static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { }
86static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb, 86static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb,
87 struct pci_dev *pdev) 87 struct pci_dev *pdev)
88{ 88{
89 if (phb->p5ioc2.iommu_table.it_map == NULL) 89 if (phb->p5ioc2.iommu_table.it_map == NULL) {
90 iommu_init_table(&phb->p5ioc2.iommu_table, phb->hose->node); 90 iommu_init_table(&phb->p5ioc2.iommu_table, phb->hose->node);
91 iommu_register_group(&phb->p5ioc2.iommu_table,
92 pci_domain_nr(phb->hose->bus), phb->opal_id);
93 }
91 94
92 set_iommu_table_base(&pdev->dev, &phb->p5ioc2.iommu_table); 95 set_iommu_table_base(&pdev->dev, &phb->p5ioc2.iommu_table);
93} 96}
94 97
95static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, 98static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
96 void *tce_mem, u64 tce_size) 99 void *tce_mem, u64 tce_size)
97{ 100{
98 struct pnv_phb *phb; 101 struct pnv_phb *phb;
@@ -133,6 +136,7 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np,
133 phb->hose->first_busno = 0; 136 phb->hose->first_busno = 0;
134 phb->hose->last_busno = 0xff; 137 phb->hose->last_busno = 0xff;
135 phb->hose->private_data = phb; 138 phb->hose->private_data = phb;
139 phb->hub_id = hub_id;
136 phb->opal_id = phb_id; 140 phb->opal_id = phb_id;
137 phb->type = PNV_PHB_P5IOC2; 141 phb->type = PNV_PHB_P5IOC2;
138 phb->model = PNV_PHB_MODEL_P5IOC2; 142 phb->model = PNV_PHB_MODEL_P5IOC2;
@@ -226,7 +230,8 @@ void __init pnv_pci_init_p5ioc2_hub(struct device_node *np)
226 for_each_child_of_node(np, phbn) { 230 for_each_child_of_node(np, phbn) {
227 if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") || 231 if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") ||
228 of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) { 232 of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) {
229 pnv_pci_init_p5ioc2_phb(phbn, tce_mem, tce_per_phb); 233 pnv_pci_init_p5ioc2_phb(phbn, hub_id,
234 tce_mem, tce_per_phb);
230 tce_mem += tce_per_phb; 235 tce_mem += tce_per_phb;
231 } 236 }
232 } 237 }
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 277343cc6a3d..a28d3b5e6393 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -20,6 +20,7 @@
20#include <linux/irq.h> 20#include <linux/irq.h>
21#include <linux/io.h> 21#include <linux/io.h>
22#include <linux/msi.h> 22#include <linux/msi.h>
23#include <linux/iommu.h>
23 24
24#include <asm/sections.h> 25#include <asm/sections.h>
25#include <asm/io.h> 26#include <asm/io.h>
@@ -32,6 +33,8 @@
32#include <asm/iommu.h> 33#include <asm/iommu.h>
33#include <asm/tce.h> 34#include <asm/tce.h>
34#include <asm/firmware.h> 35#include <asm/firmware.h>
36#include <asm/eeh_event.h>
37#include <asm/eeh.h>
35 38
36#include "powernv.h" 39#include "powernv.h"
37#include "pci.h" 40#include "pci.h"
@@ -202,7 +205,8 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
202 205
203 spin_lock_irqsave(&phb->lock, flags); 206 spin_lock_irqsave(&phb->lock, flags);
204 207
205 rc = opal_pci_get_phb_diag_data(phb->opal_id, phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE); 208 rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
209 PNV_PCI_DIAG_BUF_SIZE);
206 has_diag = (rc == OPAL_SUCCESS); 210 has_diag = (rc == OPAL_SUCCESS);
207 211
208 rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, 212 rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
@@ -227,43 +231,50 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
227 spin_unlock_irqrestore(&phb->lock, flags); 231 spin_unlock_irqrestore(&phb->lock, flags);
228} 232}
229 233
230static void pnv_pci_config_check_eeh(struct pnv_phb *phb, struct pci_bus *bus, 234static void pnv_pci_config_check_eeh(struct pnv_phb *phb,
231 u32 bdfn) 235 struct device_node *dn)
232{ 236{
233 s64 rc; 237 s64 rc;
234 u8 fstate; 238 u8 fstate;
235 u16 pcierr; 239 u16 pcierr;
236 u32 pe_no; 240 u32 pe_no;
237 241
238 /* Get PE# if we support IODA */ 242 /*
239 pe_no = phb->bdfn_to_pe ? phb->bdfn_to_pe(phb, bus, bdfn & 0xff) : 0; 243 * Get the PE#. During the PCI probe stage, we might not
244 * setup that yet. So all ER errors should be mapped to
245 * PE#0
246 */
247 pe_no = PCI_DN(dn)->pe_number;
248 if (pe_no == IODA_INVALID_PE)
249 pe_no = 0;
240 250
241 /* Read freeze status */ 251 /* Read freeze status */
242 rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, &fstate, &pcierr, 252 rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, &fstate, &pcierr,
243 NULL); 253 NULL);
244 if (rc) { 254 if (rc) {
245 pr_warning("PCI %d: Failed to read EEH status for PE#%d," 255 pr_warning("%s: Can't read EEH status (PE#%d) for "
246 " err %lld\n", phb->hose->global_number, pe_no, rc); 256 "%s, err %lld\n",
257 __func__, pe_no, dn->full_name, rc);
247 return; 258 return;
248 } 259 }
249 cfg_dbg(" -> EEH check, bdfn=%04x PE%d fstate=%x\n", 260 cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n",
250 bdfn, pe_no, fstate); 261 (PCI_DN(dn)->busno << 8) | (PCI_DN(dn)->devfn),
262 pe_no, fstate);
251 if (fstate != 0) 263 if (fstate != 0)
252 pnv_pci_handle_eeh_config(phb, pe_no); 264 pnv_pci_handle_eeh_config(phb, pe_no);
253} 265}
254 266
255static int pnv_pci_read_config(struct pci_bus *bus, 267int pnv_pci_cfg_read(struct device_node *dn,
256 unsigned int devfn, 268 int where, int size, u32 *val)
257 int where, int size, u32 *val)
258{ 269{
259 struct pci_controller *hose = pci_bus_to_host(bus); 270 struct pci_dn *pdn = PCI_DN(dn);
260 struct pnv_phb *phb = hose->private_data; 271 struct pnv_phb *phb = pdn->phb->private_data;
261 u32 bdfn = (((uint64_t)bus->number) << 8) | devfn; 272 u32 bdfn = (pdn->busno << 8) | pdn->devfn;
273#ifdef CONFIG_EEH
274 struct eeh_pe *phb_pe = NULL;
275#endif
262 s64 rc; 276 s64 rc;
263 277
264 if (hose == NULL)
265 return PCIBIOS_DEVICE_NOT_FOUND;
266
267 switch (size) { 278 switch (size) {
268 case 1: { 279 case 1: {
269 u8 v8; 280 u8 v8;
@@ -287,28 +298,43 @@ static int pnv_pci_read_config(struct pci_bus *bus,
287 default: 298 default:
288 return PCIBIOS_FUNC_NOT_SUPPORTED; 299 return PCIBIOS_FUNC_NOT_SUPPORTED;
289 } 300 }
290 cfg_dbg("pnv_pci_read_config bus: %x devfn: %x +%x/%x -> %08x\n", 301 cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
291 bus->number, devfn, where, size, *val); 302 __func__, pdn->busno, pdn->devfn, where, size, *val);
292 303
293 /* Check if the PHB got frozen due to an error (no response) */ 304 /*
294 pnv_pci_config_check_eeh(phb, bus, bdfn); 305 * Check if the specified PE has been put into frozen
306 * state. On the other hand, we needn't do that while
307 * the PHB has been put into frozen state because of
308 * PHB-fatal errors.
309 */
310#ifdef CONFIG_EEH
311 phb_pe = eeh_phb_pe_get(pdn->phb);
312 if (phb_pe && (phb_pe->state & EEH_PE_ISOLATED))
313 return PCIBIOS_SUCCESSFUL;
314
315 if (phb->eeh_state & PNV_EEH_STATE_ENABLED) {
316 if (*val == EEH_IO_ERROR_VALUE(size) &&
317 eeh_dev_check_failure(of_node_to_eeh_dev(dn)))
318 return PCIBIOS_DEVICE_NOT_FOUND;
319 } else {
320 pnv_pci_config_check_eeh(phb, dn);
321 }
322#else
323 pnv_pci_config_check_eeh(phb, dn);
324#endif
295 325
296 return PCIBIOS_SUCCESSFUL; 326 return PCIBIOS_SUCCESSFUL;
297} 327}
298 328
299static int pnv_pci_write_config(struct pci_bus *bus, 329int pnv_pci_cfg_write(struct device_node *dn,
300 unsigned int devfn, 330 int where, int size, u32 val)
301 int where, int size, u32 val)
302{ 331{
303 struct pci_controller *hose = pci_bus_to_host(bus); 332 struct pci_dn *pdn = PCI_DN(dn);
304 struct pnv_phb *phb = hose->private_data; 333 struct pnv_phb *phb = pdn->phb->private_data;
305 u32 bdfn = (((uint64_t)bus->number) << 8) | devfn; 334 u32 bdfn = (pdn->busno << 8) | pdn->devfn;
306
307 if (hose == NULL)
308 return PCIBIOS_DEVICE_NOT_FOUND;
309 335
310 cfg_dbg("pnv_pci_write_config bus: %x devfn: %x +%x/%x -> %08x\n", 336 cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
311 bus->number, devfn, where, size, val); 337 pdn->busno, pdn->devfn, where, size, val);
312 switch (size) { 338 switch (size) {
313 case 1: 339 case 1:
314 opal_pci_config_write_byte(phb->opal_id, bdfn, where, val); 340 opal_pci_config_write_byte(phb->opal_id, bdfn, where, val);
@@ -322,14 +348,54 @@ static int pnv_pci_write_config(struct pci_bus *bus,
322 default: 348 default:
323 return PCIBIOS_FUNC_NOT_SUPPORTED; 349 return PCIBIOS_FUNC_NOT_SUPPORTED;
324 } 350 }
351
325 /* Check if the PHB got frozen due to an error (no response) */ 352 /* Check if the PHB got frozen due to an error (no response) */
326 pnv_pci_config_check_eeh(phb, bus, bdfn); 353#ifdef CONFIG_EEH
354 if (!(phb->eeh_state & PNV_EEH_STATE_ENABLED))
355 pnv_pci_config_check_eeh(phb, dn);
356#else
357 pnv_pci_config_check_eeh(phb, dn);
358#endif
327 359
328 return PCIBIOS_SUCCESSFUL; 360 return PCIBIOS_SUCCESSFUL;
329} 361}
330 362
363static int pnv_pci_read_config(struct pci_bus *bus,
364 unsigned int devfn,
365 int where, int size, u32 *val)
366{
367 struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
368 struct pci_dn *pdn;
369
370 for (dn = busdn->child; dn; dn = dn->sibling) {
371 pdn = PCI_DN(dn);
372 if (pdn && pdn->devfn == devfn)
373 return pnv_pci_cfg_read(dn, where, size, val);
374 }
375
376 *val = 0xFFFFFFFF;
377 return PCIBIOS_DEVICE_NOT_FOUND;
378
379}
380
381static int pnv_pci_write_config(struct pci_bus *bus,
382 unsigned int devfn,
383 int where, int size, u32 val)
384{
385 struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
386 struct pci_dn *pdn;
387
388 for (dn = busdn->child; dn; dn = dn->sibling) {
389 pdn = PCI_DN(dn);
390 if (pdn && pdn->devfn == devfn)
391 return pnv_pci_cfg_write(dn, where, size, val);
392 }
393
394 return PCIBIOS_DEVICE_NOT_FOUND;
395}
396
331struct pci_ops pnv_pci_ops = { 397struct pci_ops pnv_pci_ops = {
332 .read = pnv_pci_read_config, 398 .read = pnv_pci_read_config,
333 .write = pnv_pci_write_config, 399 .write = pnv_pci_write_config,
334}; 400};
335 401
@@ -412,6 +478,7 @@ static struct iommu_table *pnv_pci_setup_bml_iommu(struct pci_controller *hose)
412 pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)), 478 pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)),
413 be32_to_cpup(sizep), 0); 479 be32_to_cpup(sizep), 0);
414 iommu_init_table(tbl, hose->node); 480 iommu_init_table(tbl, hose->node);
481 iommu_register_group(tbl, pci_domain_nr(hose->bus), 0);
415 482
416 /* Deal with SW invalidated TCEs when needed (BML way) */ 483 /* Deal with SW invalidated TCEs when needed (BML way) */
417 swinvp = of_get_property(hose->dn, "linux,tce-sw-invalidate-info", 484 swinvp = of_get_property(hose->dn, "linux,tce-sw-invalidate-info",
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 25d76c4df50b..d633c64e05a1 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -66,15 +66,43 @@ struct pnv_ioda_pe {
66 struct list_head list; 66 struct list_head list;
67}; 67};
68 68
69/* IOC dependent EEH operations */
70#ifdef CONFIG_EEH
71struct pnv_eeh_ops {
72 int (*post_init)(struct pci_controller *hose);
73 int (*set_option)(struct eeh_pe *pe, int option);
74 int (*get_state)(struct eeh_pe *pe);
75 int (*reset)(struct eeh_pe *pe, int option);
76 int (*get_log)(struct eeh_pe *pe, int severity,
77 char *drv_log, unsigned long len);
78 int (*configure_bridge)(struct eeh_pe *pe);
79 int (*next_error)(struct eeh_pe **pe);
80};
81
82#define PNV_EEH_STATE_ENABLED (1 << 0) /* EEH enabled */
83#define PNV_EEH_STATE_REMOVED (1 << 1) /* PHB removed */
84
85#endif /* CONFIG_EEH */
86
69struct pnv_phb { 87struct pnv_phb {
70 struct pci_controller *hose; 88 struct pci_controller *hose;
71 enum pnv_phb_type type; 89 enum pnv_phb_type type;
72 enum pnv_phb_model model; 90 enum pnv_phb_model model;
91 u64 hub_id;
73 u64 opal_id; 92 u64 opal_id;
74 void __iomem *regs; 93 void __iomem *regs;
75 int initialized; 94 int initialized;
76 spinlock_t lock; 95 spinlock_t lock;
77 96
97#ifdef CONFIG_EEH
98 struct pnv_eeh_ops *eeh_ops;
99 int eeh_state;
100#endif
101
102#ifdef CONFIG_DEBUG_FS
103 struct dentry *dbgfs;
104#endif
105
78#ifdef CONFIG_PCI_MSI 106#ifdef CONFIG_PCI_MSI
79 unsigned int msi_base; 107 unsigned int msi_base;
80 unsigned int msi32_support; 108 unsigned int msi32_support;
@@ -150,7 +178,14 @@ struct pnv_phb {
150}; 178};
151 179
152extern struct pci_ops pnv_pci_ops; 180extern struct pci_ops pnv_pci_ops;
181#ifdef CONFIG_EEH
182extern struct pnv_eeh_ops ioda_eeh_ops;
183#endif
153 184
185int pnv_pci_cfg_read(struct device_node *dn,
186 int where, int size, u32 *val);
187int pnv_pci_cfg_write(struct device_node *dn,
188 int where, int size, u32 val);
154extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, 189extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
155 void *tce_mem, u64 tce_size, 190 void *tce_mem, u64 tce_size,
156 u64 dma_offset); 191 u64 dma_offset);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index d4459bfc92f7..84438af96c05 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -93,6 +93,8 @@ static void __noreturn pnv_restart(char *cmd)
93{ 93{
94 long rc = OPAL_BUSY; 94 long rc = OPAL_BUSY;
95 95
96 opal_notifier_disable();
97
96 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 98 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
97 rc = opal_cec_reboot(); 99 rc = opal_cec_reboot();
98 if (rc == OPAL_BUSY_EVENT) 100 if (rc == OPAL_BUSY_EVENT)
@@ -108,6 +110,8 @@ static void __noreturn pnv_power_off(void)
108{ 110{
109 long rc = OPAL_BUSY; 111 long rc = OPAL_BUSY;
110 112
113 opal_notifier_disable();
114
111 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 115 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
112 rc = opal_cec_power_down(0); 116 rc = opal_cec_power_down(0);
113 if (rc == OPAL_BUSY_EVENT) 117 if (rc == OPAL_BUSY_EVENT)
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 88c9459c3e07..89e3857af4e0 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -40,7 +40,7 @@
40#define DBG(fmt...) 40#define DBG(fmt...)
41#endif 41#endif
42 42
43static void __cpuinit pnv_smp_setup_cpu(int cpu) 43static void pnv_smp_setup_cpu(int cpu)
44{ 44{
45 if (cpu != boot_cpuid) 45 if (cpu != boot_cpuid)
46 xics_setup_cpu(); 46 xics_setup_cpu();
@@ -51,7 +51,7 @@ static int pnv_smp_cpu_bootable(unsigned int nr)
51 /* Special case - we inhibit secondary thread startup 51 /* Special case - we inhibit secondary thread startup
52 * during boot if the user requests it. 52 * during boot if the user requests it.
53 */ 53 */
54 if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) { 54 if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) {
55 if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0) 55 if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
56 return 0; 56 return 0;
57 if (smt_enabled_at_boot 57 if (smt_enabled_at_boot
diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c
index 177a2f70700c..3e270e3412ae 100644
--- a/arch/powerpc/platforms/ps3/htab.c
+++ b/arch/powerpc/platforms/ps3/htab.c
@@ -109,7 +109,8 @@ static long ps3_hpte_remove(unsigned long hpte_group)
109} 109}
110 110
111static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp, 111static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp,
112 unsigned long vpn, int psize, int ssize, int local) 112 unsigned long vpn, int psize, int apsize,
113 int ssize, int local)
113{ 114{
114 int result; 115 int result;
115 u64 hpte_v, want_v, hpte_rs; 116 u64 hpte_v, want_v, hpte_rs;
@@ -162,7 +163,7 @@ static void ps3_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
162} 163}
163 164
164static void ps3_hpte_invalidate(unsigned long slot, unsigned long vpn, 165static void ps3_hpte_invalidate(unsigned long slot, unsigned long vpn,
165 int psize, int ssize, int local) 166 int psize, int apsize, int ssize, int local)
166{ 167{
167 unsigned long flags; 168 unsigned long flags;
168 int result; 169 int result;
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 4459eff7a75a..1bd3399146ed 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -33,11 +33,6 @@ config PPC_SPLPAR
33 processors, that is, which share physical processors between 33 processors, that is, which share physical processors between
34 two or more partitions. 34 two or more partitions.
35 35
36config EEH
37 bool
38 depends on PPC_PSERIES && PCI
39 default y
40
41config PSERIES_MSI 36config PSERIES_MSI
42 bool 37 bool
43 depends on PCI_MSI && EEH 38 depends on PCI_MSI && EEH
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 53866e537a92..8ae010381316 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -6,9 +6,7 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \
6 firmware.o power.o dlpar.o mobility.o 6 firmware.o power.o dlpar.o mobility.o
7obj-$(CONFIG_SMP) += smp.o 7obj-$(CONFIG_SMP) += smp.o
8obj-$(CONFIG_SCANLOG) += scanlog.o 8obj-$(CONFIG_SCANLOG) += scanlog.o
9obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \ 9obj-$(CONFIG_EEH) += eeh_pseries.o
10 eeh_driver.o eeh_event.o eeh_sysfs.o \
11 eeh_pseries.o
12obj-$(CONFIG_KEXEC) += kexec.o 10obj-$(CONFIG_KEXEC) += kexec.o
13obj-$(CONFIG_PCI) += pci.o pci_dlpar.o 11obj-$(CONFIG_PCI) += pci.o pci_dlpar.o
14obj-$(CONFIG_PSERIES_MSI) += msi.o 12obj-$(CONFIG_PSERIES_MSI) += msi.o
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
deleted file mode 100644
index 6b73d6c44f51..000000000000
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ /dev/null
@@ -1,942 +0,0 @@
1/*
2 * Copyright IBM Corporation 2001, 2005, 2006
3 * Copyright Dave Engebretsen & Todd Inglett 2001
4 * Copyright Linas Vepstas 2005, 2006
5 * Copyright 2001-2012 IBM Corporation.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com>
22 */
23
24#include <linux/delay.h>
25#include <linux/sched.h>
26#include <linux/init.h>
27#include <linux/list.h>
28#include <linux/pci.h>
29#include <linux/proc_fs.h>
30#include <linux/rbtree.h>
31#include <linux/seq_file.h>
32#include <linux/spinlock.h>
33#include <linux/export.h>
34#include <linux/of.h>
35
36#include <linux/atomic.h>
37#include <asm/eeh.h>
38#include <asm/eeh_event.h>
39#include <asm/io.h>
40#include <asm/machdep.h>
41#include <asm/ppc-pci.h>
42#include <asm/rtas.h>
43
44
45/** Overview:
46 * EEH, or "Extended Error Handling" is a PCI bridge technology for
47 * dealing with PCI bus errors that can't be dealt with within the
48 * usual PCI framework, except by check-stopping the CPU. Systems
49 * that are designed for high-availability/reliability cannot afford
50 * to crash due to a "mere" PCI error, thus the need for EEH.
51 * An EEH-capable bridge operates by converting a detected error
52 * into a "slot freeze", taking the PCI adapter off-line, making
53 * the slot behave, from the OS'es point of view, as if the slot
54 * were "empty": all reads return 0xff's and all writes are silently
55 * ignored. EEH slot isolation events can be triggered by parity
56 * errors on the address or data busses (e.g. during posted writes),
57 * which in turn might be caused by low voltage on the bus, dust,
58 * vibration, humidity, radioactivity or plain-old failed hardware.
59 *
60 * Note, however, that one of the leading causes of EEH slot
61 * freeze events are buggy device drivers, buggy device microcode,
62 * or buggy device hardware. This is because any attempt by the
63 * device to bus-master data to a memory address that is not
64 * assigned to the device will trigger a slot freeze. (The idea
65 * is to prevent devices-gone-wild from corrupting system memory).
66 * Buggy hardware/drivers will have a miserable time co-existing
67 * with EEH.
68 *
69 * Ideally, a PCI device driver, when suspecting that an isolation
70 * event has occurred (e.g. by reading 0xff's), will then ask EEH
71 * whether this is the case, and then take appropriate steps to
72 * reset the PCI slot, the PCI device, and then resume operations.
73 * However, until that day, the checking is done here, with the
74 * eeh_check_failure() routine embedded in the MMIO macros. If
75 * the slot is found to be isolated, an "EEH Event" is synthesized
76 * and sent out for processing.
77 */
78
79/* If a device driver keeps reading an MMIO register in an interrupt
80 * handler after a slot isolation event, it might be broken.
81 * This sets the threshold for how many read attempts we allow
82 * before printing an error message.
83 */
84#define EEH_MAX_FAILS 2100000
85
86/* Time to wait for a PCI slot to report status, in milliseconds */
87#define PCI_BUS_RESET_WAIT_MSEC (60*1000)
88
89/* Platform dependent EEH operations */
90struct eeh_ops *eeh_ops = NULL;
91
92int eeh_subsystem_enabled;
93EXPORT_SYMBOL(eeh_subsystem_enabled);
94
95/*
96 * EEH probe mode support. The intention is to support multiple
97 * platforms for EEH. Some platforms like pSeries do PCI emunation
98 * based on device tree. However, other platforms like powernv probe
99 * PCI devices from hardware. The flag is used to distinguish that.
100 * In addition, struct eeh_ops::probe would be invoked for particular
101 * OF node or PCI device so that the corresponding PE would be created
102 * there.
103 */
104int eeh_probe_mode;
105
106/* Global EEH mutex */
107DEFINE_MUTEX(eeh_mutex);
108
109/* Lock to avoid races due to multiple reports of an error */
110static DEFINE_RAW_SPINLOCK(confirm_error_lock);
111
112/* Buffer for reporting pci register dumps. Its here in BSS, and
113 * not dynamically alloced, so that it ends up in RMO where RTAS
114 * can access it.
115 */
116#define EEH_PCI_REGS_LOG_LEN 4096
117static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
118
119/*
120 * The struct is used to maintain the EEH global statistic
121 * information. Besides, the EEH global statistics will be
122 * exported to user space through procfs
123 */
124struct eeh_stats {
125 u64 no_device; /* PCI device not found */
126 u64 no_dn; /* OF node not found */
127 u64 no_cfg_addr; /* Config address not found */
128 u64 ignored_check; /* EEH check skipped */
129 u64 total_mmio_ffs; /* Total EEH checks */
130 u64 false_positives; /* Unnecessary EEH checks */
131 u64 slot_resets; /* PE reset */
132};
133
134static struct eeh_stats eeh_stats;
135
136#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
137
138/**
139 * eeh_gather_pci_data - Copy assorted PCI config space registers to buff
140 * @edev: device to report data for
141 * @buf: point to buffer in which to log
142 * @len: amount of room in buffer
143 *
144 * This routine captures assorted PCI configuration space data,
145 * and puts them into a buffer for RTAS error logging.
146 */
147static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
148{
149 struct device_node *dn = eeh_dev_to_of_node(edev);
150 struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
151 u32 cfg;
152 int cap, i;
153 int n = 0;
154
155 n += scnprintf(buf+n, len-n, "%s\n", dn->full_name);
156 printk(KERN_WARNING "EEH: of node=%s\n", dn->full_name);
157
158 eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg);
159 n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
160 printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg);
161
162 eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg);
163 n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
164 printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg);
165
166 if (!dev) {
167 printk(KERN_WARNING "EEH: no PCI device for this of node\n");
168 return n;
169 }
170
171 /* Gather bridge-specific registers */
172 if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
173 eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg);
174 n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
175 printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg);
176
177 eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg);
178 n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
179 printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg);
180 }
181
182 /* Dump out the PCI-X command and status regs */
183 cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
184 if (cap) {
185 eeh_ops->read_config(dn, cap, 4, &cfg);
186 n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
187 printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg);
188
189 eeh_ops->read_config(dn, cap+4, 4, &cfg);
190 n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
191 printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg);
192 }
193
194 /* If PCI-E capable, dump PCI-E cap 10, and the AER */
195 cap = pci_find_capability(dev, PCI_CAP_ID_EXP);
196 if (cap) {
197 n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
198 printk(KERN_WARNING
199 "EEH: PCI-E capabilities and status follow:\n");
200
201 for (i=0; i<=8; i++) {
202 eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
203 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
204 printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg);
205 }
206
207 cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
208 if (cap) {
209 n += scnprintf(buf+n, len-n, "pci-e AER:\n");
210 printk(KERN_WARNING
211 "EEH: PCI-E AER capability register set follows:\n");
212
213 for (i=0; i<14; i++) {
214 eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
215 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
216 printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg);
217 }
218 }
219 }
220
221 return n;
222}
223
224/**
225 * eeh_slot_error_detail - Generate combined log including driver log and error log
226 * @pe: EEH PE
227 * @severity: temporary or permanent error log
228 *
229 * This routine should be called to generate the combined log, which
230 * is comprised of driver log and error log. The driver log is figured
231 * out from the config space of the corresponding PCI device, while
232 * the error log is fetched through platform dependent function call.
233 */
234void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
235{
236 size_t loglen = 0;
237 struct eeh_dev *edev;
238
239 eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
240 eeh_ops->configure_bridge(pe);
241 eeh_pe_restore_bars(pe);
242
243 pci_regs_buf[0] = 0;
244 eeh_pe_for_each_dev(pe, edev) {
245 loglen += eeh_gather_pci_data(edev, pci_regs_buf,
246 EEH_PCI_REGS_LOG_LEN);
247 }
248
249 eeh_ops->get_log(pe, severity, pci_regs_buf, loglen);
250}
251
252/**
253 * eeh_token_to_phys - Convert EEH address token to phys address
254 * @token: I/O token, should be address in the form 0xA....
255 *
256 * This routine should be called to convert virtual I/O address
257 * to physical one.
258 */
259static inline unsigned long eeh_token_to_phys(unsigned long token)
260{
261 pte_t *ptep;
262 unsigned long pa;
263
264 ptep = find_linux_pte(init_mm.pgd, token);
265 if (!ptep)
266 return token;
267 pa = pte_pfn(*ptep) << PAGE_SHIFT;
268
269 return pa | (token & (PAGE_SIZE-1));
270}
271
272/**
273 * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze
274 * @edev: eeh device
275 *
276 * Check for an EEH failure for the given device node. Call this
277 * routine if the result of a read was all 0xff's and you want to
278 * find out if this is due to an EEH slot freeze. This routine
279 * will query firmware for the EEH status.
280 *
281 * Returns 0 if there has not been an EEH error; otherwise returns
282 * a non-zero value and queues up a slot isolation event notification.
283 *
284 * It is safe to call this routine in an interrupt context.
285 */
286int eeh_dev_check_failure(struct eeh_dev *edev)
287{
288 int ret;
289 unsigned long flags;
290 struct device_node *dn;
291 struct pci_dev *dev;
292 struct eeh_pe *pe;
293 int rc = 0;
294 const char *location;
295
296 eeh_stats.total_mmio_ffs++;
297
298 if (!eeh_subsystem_enabled)
299 return 0;
300
301 if (!edev) {
302 eeh_stats.no_dn++;
303 return 0;
304 }
305 dn = eeh_dev_to_of_node(edev);
306 dev = eeh_dev_to_pci_dev(edev);
307 pe = edev->pe;
308
309 /* Access to IO BARs might get this far and still not want checking. */
310 if (!pe) {
311 eeh_stats.ignored_check++;
312 pr_debug("EEH: Ignored check for %s %s\n",
313 eeh_pci_name(dev), dn->full_name);
314 return 0;
315 }
316
317 if (!pe->addr && !pe->config_addr) {
318 eeh_stats.no_cfg_addr++;
319 return 0;
320 }
321
322 /* If we already have a pending isolation event for this
323 * slot, we know it's bad already, we don't need to check.
324 * Do this checking under a lock; as multiple PCI devices
325 * in one slot might report errors simultaneously, and we
326 * only want one error recovery routine running.
327 */
328 raw_spin_lock_irqsave(&confirm_error_lock, flags);
329 rc = 1;
330 if (pe->state & EEH_PE_ISOLATED) {
331 pe->check_count++;
332 if (pe->check_count % EEH_MAX_FAILS == 0) {
333 location = of_get_property(dn, "ibm,loc-code", NULL);
334 printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
335 "location=%s driver=%s pci addr=%s\n",
336 pe->check_count, location,
337 eeh_driver_name(dev), eeh_pci_name(dev));
338 printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
339 eeh_driver_name(dev));
340 dump_stack();
341 }
342 goto dn_unlock;
343 }
344
345 /*
346 * Now test for an EEH failure. This is VERY expensive.
347 * Note that the eeh_config_addr may be a parent device
348 * in the case of a device behind a bridge, or it may be
349 * function zero of a multi-function device.
350 * In any case they must share a common PHB.
351 */
352 ret = eeh_ops->get_state(pe, NULL);
353
354 /* Note that config-io to empty slots may fail;
355 * they are empty when they don't have children.
356 * We will punt with the following conditions: Failure to get
357 * PE's state, EEH not support and Permanently unavailable
358 * state, PE is in good state.
359 */
360 if ((ret < 0) ||
361 (ret == EEH_STATE_NOT_SUPPORT) ||
362 (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
363 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
364 eeh_stats.false_positives++;
365 pe->false_positives++;
366 rc = 0;
367 goto dn_unlock;
368 }
369
370 eeh_stats.slot_resets++;
371
372 /* Avoid repeated reports of this failure, including problems
373 * with other functions on this device, and functions under
374 * bridges.
375 */
376 eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
377 raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
378
379 eeh_send_failure_event(pe);
380
381 /* Most EEH events are due to device driver bugs. Having
382 * a stack trace will help the device-driver authors figure
383 * out what happened. So print that out.
384 */
385 WARN(1, "EEH: failure detected\n");
386 return 1;
387
388dn_unlock:
389 raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
390 return rc;
391}
392
393EXPORT_SYMBOL_GPL(eeh_dev_check_failure);
394
395/**
396 * eeh_check_failure - Check if all 1's data is due to EEH slot freeze
397 * @token: I/O token, should be address in the form 0xA....
398 * @val: value, should be all 1's (XXX why do we need this arg??)
399 *
400 * Check for an EEH failure at the given token address. Call this
401 * routine if the result of a read was all 0xff's and you want to
402 * find out if this is due to an EEH slot freeze event. This routine
403 * will query firmware for the EEH status.
404 *
405 * Note this routine is safe to call in an interrupt context.
406 */
407unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
408{
409 unsigned long addr;
410 struct eeh_dev *edev;
411
412 /* Finding the phys addr + pci device; this is pretty quick. */
413 addr = eeh_token_to_phys((unsigned long __force) token);
414 edev = eeh_addr_cache_get_dev(addr);
415 if (!edev) {
416 eeh_stats.no_device++;
417 return val;
418 }
419
420 eeh_dev_check_failure(edev);
421
422 pci_dev_put(eeh_dev_to_pci_dev(edev));
423 return val;
424}
425
426EXPORT_SYMBOL(eeh_check_failure);
427
428
429/**
430 * eeh_pci_enable - Enable MMIO or DMA transfers for this slot
431 * @pe: EEH PE
432 *
433 * This routine should be called to reenable frozen MMIO or DMA
434 * so that it would work correctly again. It's useful while doing
435 * recovery or log collection on the indicated device.
436 */
437int eeh_pci_enable(struct eeh_pe *pe, int function)
438{
439 int rc;
440
441 rc = eeh_ops->set_option(pe, function);
442 if (rc)
443 pr_warning("%s: Unexpected state change %d on PHB#%d-PE#%x, err=%d\n",
444 __func__, function, pe->phb->global_number, pe->addr, rc);
445
446 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
447 if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) &&
448 (function == EEH_OPT_THAW_MMIO))
449 return 0;
450
451 return rc;
452}
453
454/**
455 * pcibios_set_pcie_slot_reset - Set PCI-E reset state
456 * @dev: pci device struct
457 * @state: reset state to enter
458 *
459 * Return value:
460 * 0 if success
461 */
462int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
463{
464 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
465 struct eeh_pe *pe = edev->pe;
466
467 if (!pe) {
468 pr_err("%s: No PE found on PCI device %s\n",
469 __func__, pci_name(dev));
470 return -EINVAL;
471 }
472
473 switch (state) {
474 case pcie_deassert_reset:
475 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
476 break;
477 case pcie_hot_reset:
478 eeh_ops->reset(pe, EEH_RESET_HOT);
479 break;
480 case pcie_warm_reset:
481 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
482 break;
483 default:
484 return -EINVAL;
485 };
486
487 return 0;
488}
489
490/**
491 * eeh_set_pe_freset - Check the required reset for the indicated device
492 * @data: EEH device
493 * @flag: return value
494 *
495 * Each device might have its preferred reset type: fundamental or
496 * hot reset. The routine is used to collected the information for
497 * the indicated device and its children so that the bunch of the
498 * devices could be reset properly.
499 */
500static void *eeh_set_dev_freset(void *data, void *flag)
501{
502 struct pci_dev *dev;
503 unsigned int *freset = (unsigned int *)flag;
504 struct eeh_dev *edev = (struct eeh_dev *)data;
505
506 dev = eeh_dev_to_pci_dev(edev);
507 if (dev)
508 *freset |= dev->needs_freset;
509
510 return NULL;
511}
512
513/**
514 * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second
515 * @pe: EEH PE
516 *
517 * Assert the PCI #RST line for 1/4 second.
518 */
519static void eeh_reset_pe_once(struct eeh_pe *pe)
520{
521 unsigned int freset = 0;
522
523 /* Determine type of EEH reset required for
524 * Partitionable Endpoint, a hot-reset (1)
525 * or a fundamental reset (3).
526 * A fundamental reset required by any device under
527 * Partitionable Endpoint trumps hot-reset.
528 */
529 eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset);
530
531 if (freset)
532 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
533 else
534 eeh_ops->reset(pe, EEH_RESET_HOT);
535
536 /* The PCI bus requires that the reset be held high for at least
537 * a 100 milliseconds. We wait a bit longer 'just in case'.
538 */
539#define PCI_BUS_RST_HOLD_TIME_MSEC 250
540 msleep(PCI_BUS_RST_HOLD_TIME_MSEC);
541
542 /* We might get hit with another EEH freeze as soon as the
543 * pci slot reset line is dropped. Make sure we don't miss
544 * these, and clear the flag now.
545 */
546 eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
547
548 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
549
550 /* After a PCI slot has been reset, the PCI Express spec requires
551 * a 1.5 second idle time for the bus to stabilize, before starting
552 * up traffic.
553 */
554#define PCI_BUS_SETTLE_TIME_MSEC 1800
555 msleep(PCI_BUS_SETTLE_TIME_MSEC);
556}
557
558/**
559 * eeh_reset_pe - Reset the indicated PE
560 * @pe: EEH PE
561 *
562 * This routine should be called to reset indicated device, including
563 * PE. A PE might include multiple PCI devices and sometimes PCI bridges
564 * might be involved as well.
565 */
566int eeh_reset_pe(struct eeh_pe *pe)
567{
568 int i, rc;
569
570 /* Take three shots at resetting the bus */
571 for (i=0; i<3; i++) {
572 eeh_reset_pe_once(pe);
573
574 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
575 if (rc == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE))
576 return 0;
577
578 if (rc < 0) {
579 pr_err("%s: Unrecoverable slot failure on PHB#%d-PE#%x",
580 __func__, pe->phb->global_number, pe->addr);
581 return -1;
582 }
583 pr_err("EEH: bus reset %d failed on PHB#%d-PE#%x, rc=%d\n",
584 i+1, pe->phb->global_number, pe->addr, rc);
585 }
586
587 return -1;
588}
589
590/**
591 * eeh_save_bars - Save device bars
592 * @edev: PCI device associated EEH device
593 *
594 * Save the values of the device bars. Unlike the restore
595 * routine, this routine is *not* recursive. This is because
596 * PCI devices are added individually; but, for the restore,
597 * an entire slot is reset at a time.
598 */
599void eeh_save_bars(struct eeh_dev *edev)
600{
601 int i;
602 struct device_node *dn;
603
604 if (!edev)
605 return;
606 dn = eeh_dev_to_of_node(edev);
607
608 for (i = 0; i < 16; i++)
609 eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]);
610}
611
612/**
613 * eeh_ops_register - Register platform dependent EEH operations
614 * @ops: platform dependent EEH operations
615 *
616 * Register the platform dependent EEH operation callback
617 * functions. The platform should call this function before
618 * any other EEH operations.
619 */
620int __init eeh_ops_register(struct eeh_ops *ops)
621{
622 if (!ops->name) {
623 pr_warning("%s: Invalid EEH ops name for %p\n",
624 __func__, ops);
625 return -EINVAL;
626 }
627
628 if (eeh_ops && eeh_ops != ops) {
629 pr_warning("%s: EEH ops of platform %s already existing (%s)\n",
630 __func__, eeh_ops->name, ops->name);
631 return -EEXIST;
632 }
633
634 eeh_ops = ops;
635
636 return 0;
637}
638
639/**
640 * eeh_ops_unregister - Unreigster platform dependent EEH operations
641 * @name: name of EEH platform operations
642 *
643 * Unregister the platform dependent EEH operation callback
644 * functions.
645 */
646int __exit eeh_ops_unregister(const char *name)
647{
648 if (!name || !strlen(name)) {
649 pr_warning("%s: Invalid EEH ops name\n",
650 __func__);
651 return -EINVAL;
652 }
653
654 if (eeh_ops && !strcmp(eeh_ops->name, name)) {
655 eeh_ops = NULL;
656 return 0;
657 }
658
659 return -EEXIST;
660}
661
662/**
663 * eeh_init - EEH initialization
664 *
665 * Initialize EEH by trying to enable it for all of the adapters in the system.
666 * As a side effect we can determine here if eeh is supported at all.
667 * Note that we leave EEH on so failed config cycles won't cause a machine
668 * check. If a user turns off EEH for a particular adapter they are really
669 * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't
670 * grant access to a slot if EEH isn't enabled, and so we always enable
671 * EEH for all slots/all devices.
672 *
673 * The eeh-force-off option disables EEH checking globally, for all slots.
674 * Even if force-off is set, the EEH hardware is still enabled, so that
675 * newer systems can boot.
676 */
677static int __init eeh_init(void)
678{
679 struct pci_controller *hose, *tmp;
680 struct device_node *phb;
681 int ret;
682
683 /* call platform initialization function */
684 if (!eeh_ops) {
685 pr_warning("%s: Platform EEH operation not found\n",
686 __func__);
687 return -EEXIST;
688 } else if ((ret = eeh_ops->init())) {
689 pr_warning("%s: Failed to call platform init function (%d)\n",
690 __func__, ret);
691 return ret;
692 }
693
694 raw_spin_lock_init(&confirm_error_lock);
695
696 /* Enable EEH for all adapters */
697 if (eeh_probe_mode_devtree()) {
698 list_for_each_entry_safe(hose, tmp,
699 &hose_list, list_node) {
700 phb = hose->dn;
701 traverse_pci_devices(phb, eeh_ops->of_probe, NULL);
702 }
703 }
704
705 if (eeh_subsystem_enabled)
706 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
707 else
708 pr_warning("EEH: No capable adapters found\n");
709
710 return ret;
711}
712
713core_initcall_sync(eeh_init);
714
715/**
716 * eeh_add_device_early - Enable EEH for the indicated device_node
717 * @dn: device node for which to set up EEH
718 *
719 * This routine must be used to perform EEH initialization for PCI
720 * devices that were added after system boot (e.g. hotplug, dlpar).
721 * This routine must be called before any i/o is performed to the
722 * adapter (inluding any config-space i/o).
723 * Whether this actually enables EEH or not for this device depends
724 * on the CEC architecture, type of the device, on earlier boot
725 * command-line arguments & etc.
726 */
727static void eeh_add_device_early(struct device_node *dn)
728{
729 struct pci_controller *phb;
730
731 if (!of_node_to_eeh_dev(dn))
732 return;
733 phb = of_node_to_eeh_dev(dn)->phb;
734
735 /* USB Bus children of PCI devices will not have BUID's */
736 if (NULL == phb || 0 == phb->buid)
737 return;
738
739 /* FIXME: hotplug support on POWERNV */
740 eeh_ops->of_probe(dn, NULL);
741}
742
743/**
744 * eeh_add_device_tree_early - Enable EEH for the indicated device
745 * @dn: device node
746 *
747 * This routine must be used to perform EEH initialization for the
748 * indicated PCI device that was added after system boot (e.g.
749 * hotplug, dlpar).
750 */
751void eeh_add_device_tree_early(struct device_node *dn)
752{
753 struct device_node *sib;
754
755 for_each_child_of_node(dn, sib)
756 eeh_add_device_tree_early(sib);
757 eeh_add_device_early(dn);
758}
759EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
760
761/**
762 * eeh_add_device_late - Perform EEH initialization for the indicated pci device
763 * @dev: pci device for which to set up EEH
764 *
765 * This routine must be used to complete EEH initialization for PCI
766 * devices that were added after system boot (e.g. hotplug, dlpar).
767 */
768static void eeh_add_device_late(struct pci_dev *dev)
769{
770 struct device_node *dn;
771 struct eeh_dev *edev;
772
773 if (!dev || !eeh_subsystem_enabled)
774 return;
775
776 pr_debug("EEH: Adding device %s\n", pci_name(dev));
777
778 dn = pci_device_to_OF_node(dev);
779 edev = of_node_to_eeh_dev(dn);
780 if (edev->pdev == dev) {
781 pr_debug("EEH: Already referenced !\n");
782 return;
783 }
784 WARN_ON(edev->pdev);
785
786 pci_dev_get(dev);
787 edev->pdev = dev;
788 dev->dev.archdata.edev = edev;
789
790 eeh_addr_cache_insert_dev(dev);
791}
792
793/**
794 * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus
795 * @bus: PCI bus
796 *
797 * This routine must be used to perform EEH initialization for PCI
798 * devices which are attached to the indicated PCI bus. The PCI bus
799 * is added after system boot through hotplug or dlpar.
800 */
801void eeh_add_device_tree_late(struct pci_bus *bus)
802{
803 struct pci_dev *dev;
804
805 list_for_each_entry(dev, &bus->devices, bus_list) {
806 eeh_add_device_late(dev);
807 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
808 struct pci_bus *subbus = dev->subordinate;
809 if (subbus)
810 eeh_add_device_tree_late(subbus);
811 }
812 }
813}
814EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
815
816/**
817 * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus
818 * @bus: PCI bus
819 *
820 * This routine must be used to add EEH sysfs files for PCI
821 * devices which are attached to the indicated PCI bus. The PCI bus
822 * is added after system boot through hotplug or dlpar.
823 */
824void eeh_add_sysfs_files(struct pci_bus *bus)
825{
826 struct pci_dev *dev;
827
828 list_for_each_entry(dev, &bus->devices, bus_list) {
829 eeh_sysfs_add_device(dev);
830 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
831 struct pci_bus *subbus = dev->subordinate;
832 if (subbus)
833 eeh_add_sysfs_files(subbus);
834 }
835 }
836}
837EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
838
839/**
840 * eeh_remove_device - Undo EEH setup for the indicated pci device
841 * @dev: pci device to be removed
842 * @purge_pe: remove the PE or not
843 *
844 * This routine should be called when a device is removed from
845 * a running system (e.g. by hotplug or dlpar). It unregisters
846 * the PCI device from the EEH subsystem. I/O errors affecting
847 * this device will no longer be detected after this call; thus,
848 * i/o errors affecting this slot may leave this device unusable.
849 */
850static void eeh_remove_device(struct pci_dev *dev, int purge_pe)
851{
852 struct eeh_dev *edev;
853
854 if (!dev || !eeh_subsystem_enabled)
855 return;
856 edev = pci_dev_to_eeh_dev(dev);
857
858 /* Unregister the device with the EEH/PCI address search system */
859 pr_debug("EEH: Removing device %s\n", pci_name(dev));
860
861 if (!edev || !edev->pdev) {
862 pr_debug("EEH: Not referenced !\n");
863 return;
864 }
865 edev->pdev = NULL;
866 dev->dev.archdata.edev = NULL;
867 pci_dev_put(dev);
868
869 eeh_rmv_from_parent_pe(edev, purge_pe);
870 eeh_addr_cache_rmv_dev(dev);
871 eeh_sysfs_remove_device(dev);
872}
873
874/**
875 * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device
876 * @dev: PCI device
877 * @purge_pe: remove the corresponding PE or not
878 *
879 * This routine must be called when a device is removed from the
880 * running system through hotplug or dlpar. The corresponding
881 * PCI address cache will be removed.
882 */
883void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe)
884{
885 struct pci_bus *bus = dev->subordinate;
886 struct pci_dev *child, *tmp;
887
888 eeh_remove_device(dev, purge_pe);
889
890 if (bus && dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
891 list_for_each_entry_safe(child, tmp, &bus->devices, bus_list)
892 eeh_remove_bus_device(child, purge_pe);
893 }
894}
895EXPORT_SYMBOL_GPL(eeh_remove_bus_device);
896
897static int proc_eeh_show(struct seq_file *m, void *v)
898{
899 if (0 == eeh_subsystem_enabled) {
900 seq_printf(m, "EEH Subsystem is globally disabled\n");
901 seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs);
902 } else {
903 seq_printf(m, "EEH Subsystem is enabled\n");
904 seq_printf(m,
905 "no device=%llu\n"
906 "no device node=%llu\n"
907 "no config address=%llu\n"
908 "check not wanted=%llu\n"
909 "eeh_total_mmio_ffs=%llu\n"
910 "eeh_false_positives=%llu\n"
911 "eeh_slot_resets=%llu\n",
912 eeh_stats.no_device,
913 eeh_stats.no_dn,
914 eeh_stats.no_cfg_addr,
915 eeh_stats.ignored_check,
916 eeh_stats.total_mmio_ffs,
917 eeh_stats.false_positives,
918 eeh_stats.slot_resets);
919 }
920
921 return 0;
922}
923
924static int proc_eeh_open(struct inode *inode, struct file *file)
925{
926 return single_open(file, proc_eeh_show, NULL);
927}
928
929static const struct file_operations proc_eeh_operations = {
930 .open = proc_eeh_open,
931 .read = seq_read,
932 .llseek = seq_lseek,
933 .release = single_release,
934};
935
936static int __init eeh_init_proc(void)
937{
938 if (machine_is(pseries))
939 proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
940 return 0;
941}
942__initcall(eeh_init_proc);
diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c
deleted file mode 100644
index 5ce3ba7ad137..000000000000
--- a/arch/powerpc/platforms/pseries/eeh_cache.c
+++ /dev/null
@@ -1,319 +0,0 @@
1/*
2 * PCI address cache; allows the lookup of PCI devices based on I/O address
3 *
4 * Copyright IBM Corporation 2004
5 * Copyright Linas Vepstas <linas@austin.ibm.com> 2004
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#include <linux/list.h>
23#include <linux/pci.h>
24#include <linux/rbtree.h>
25#include <linux/slab.h>
26#include <linux/spinlock.h>
27#include <linux/atomic.h>
28#include <asm/pci-bridge.h>
29#include <asm/ppc-pci.h>
30
31
32/**
33 * The pci address cache subsystem. This subsystem places
34 * PCI device address resources into a red-black tree, sorted
35 * according to the address range, so that given only an i/o
36 * address, the corresponding PCI device can be **quickly**
37 * found. It is safe to perform an address lookup in an interrupt
38 * context; this ability is an important feature.
39 *
40 * Currently, the only customer of this code is the EEH subsystem;
41 * thus, this code has been somewhat tailored to suit EEH better.
42 * In particular, the cache does *not* hold the addresses of devices
43 * for which EEH is not enabled.
44 *
45 * (Implementation Note: The RB tree seems to be better/faster
46 * than any hash algo I could think of for this problem, even
47 * with the penalty of slow pointer chases for d-cache misses).
48 */
49struct pci_io_addr_range {
50 struct rb_node rb_node;
51 unsigned long addr_lo;
52 unsigned long addr_hi;
53 struct eeh_dev *edev;
54 struct pci_dev *pcidev;
55 unsigned int flags;
56};
57
58static struct pci_io_addr_cache {
59 struct rb_root rb_root;
60 spinlock_t piar_lock;
61} pci_io_addr_cache_root;
62
63static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr)
64{
65 struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;
66
67 while (n) {
68 struct pci_io_addr_range *piar;
69 piar = rb_entry(n, struct pci_io_addr_range, rb_node);
70
71 if (addr < piar->addr_lo) {
72 n = n->rb_left;
73 } else {
74 if (addr > piar->addr_hi) {
75 n = n->rb_right;
76 } else {
77 pci_dev_get(piar->pcidev);
78 return piar->edev;
79 }
80 }
81 }
82
83 return NULL;
84}
85
86/**
87 * eeh_addr_cache_get_dev - Get device, given only address
88 * @addr: mmio (PIO) phys address or i/o port number
89 *
90 * Given an mmio phys address, or a port number, find a pci device
91 * that implements this address. Be sure to pci_dev_put the device
92 * when finished. I/O port numbers are assumed to be offset
93 * from zero (that is, they do *not* have pci_io_addr added in).
94 * It is safe to call this function within an interrupt.
95 */
96struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr)
97{
98 struct eeh_dev *edev;
99 unsigned long flags;
100
101 spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
102 edev = __eeh_addr_cache_get_device(addr);
103 spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
104 return edev;
105}
106
107#ifdef DEBUG
108/*
109 * Handy-dandy debug print routine, does nothing more
110 * than print out the contents of our addr cache.
111 */
112static void eeh_addr_cache_print(struct pci_io_addr_cache *cache)
113{
114 struct rb_node *n;
115 int cnt = 0;
116
117 n = rb_first(&cache->rb_root);
118 while (n) {
119 struct pci_io_addr_range *piar;
120 piar = rb_entry(n, struct pci_io_addr_range, rb_node);
121 pr_debug("PCI: %s addr range %d [%lx-%lx]: %s\n",
122 (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,
123 piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev));
124 cnt++;
125 n = rb_next(n);
126 }
127}
128#endif
129
130/* Insert address range into the rb tree. */
131static struct pci_io_addr_range *
132eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
133 unsigned long ahi, unsigned int flags)
134{
135 struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;
136 struct rb_node *parent = NULL;
137 struct pci_io_addr_range *piar;
138
139 /* Walk tree, find a place to insert into tree */
140 while (*p) {
141 parent = *p;
142 piar = rb_entry(parent, struct pci_io_addr_range, rb_node);
143 if (ahi < piar->addr_lo) {
144 p = &parent->rb_left;
145 } else if (alo > piar->addr_hi) {
146 p = &parent->rb_right;
147 } else {
148 if (dev != piar->pcidev ||
149 alo != piar->addr_lo || ahi != piar->addr_hi) {
150 pr_warning("PIAR: overlapping address range\n");
151 }
152 return piar;
153 }
154 }
155 piar = kzalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);
156 if (!piar)
157 return NULL;
158
159 pci_dev_get(dev);
160 piar->addr_lo = alo;
161 piar->addr_hi = ahi;
162 piar->edev = pci_dev_to_eeh_dev(dev);
163 piar->pcidev = dev;
164 piar->flags = flags;
165
166#ifdef DEBUG
167 pr_debug("PIAR: insert range=[%lx:%lx] dev=%s\n",
168 alo, ahi, pci_name(dev));
169#endif
170
171 rb_link_node(&piar->rb_node, parent, p);
172 rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
173
174 return piar;
175}
176
177static void __eeh_addr_cache_insert_dev(struct pci_dev *dev)
178{
179 struct device_node *dn;
180 struct eeh_dev *edev;
181 int i;
182
183 dn = pci_device_to_OF_node(dev);
184 if (!dn) {
185 pr_warning("PCI: no pci dn found for dev=%s\n", pci_name(dev));
186 return;
187 }
188
189 edev = of_node_to_eeh_dev(dn);
190 if (!edev) {
191 pr_warning("PCI: no EEH dev found for dn=%s\n",
192 dn->full_name);
193 return;
194 }
195
196 /* Skip any devices for which EEH is not enabled. */
197 if (!edev->pe) {
198#ifdef DEBUG
199 pr_info("PCI: skip building address cache for=%s - %s\n",
200 pci_name(dev), dn->full_name);
201#endif
202 return;
203 }
204
205 /* Walk resources on this device, poke them into the tree */
206 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
207 unsigned long start = pci_resource_start(dev,i);
208 unsigned long end = pci_resource_end(dev,i);
209 unsigned int flags = pci_resource_flags(dev,i);
210
211 /* We are interested only bus addresses, not dma or other stuff */
212 if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM)))
213 continue;
214 if (start == 0 || ~start == 0 || end == 0 || ~end == 0)
215 continue;
216 eeh_addr_cache_insert(dev, start, end, flags);
217 }
218}
219
220/**
221 * eeh_addr_cache_insert_dev - Add a device to the address cache
222 * @dev: PCI device whose I/O addresses we are interested in.
223 *
224 * In order to support the fast lookup of devices based on addresses,
225 * we maintain a cache of devices that can be quickly searched.
226 * This routine adds a device to that cache.
227 */
228void eeh_addr_cache_insert_dev(struct pci_dev *dev)
229{
230 unsigned long flags;
231
232 /* Ignore PCI bridges */
233 if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE)
234 return;
235
236 spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
237 __eeh_addr_cache_insert_dev(dev);
238 spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
239}
240
241static inline void __eeh_addr_cache_rmv_dev(struct pci_dev *dev)
242{
243 struct rb_node *n;
244
245restart:
246 n = rb_first(&pci_io_addr_cache_root.rb_root);
247 while (n) {
248 struct pci_io_addr_range *piar;
249 piar = rb_entry(n, struct pci_io_addr_range, rb_node);
250
251 if (piar->pcidev == dev) {
252 rb_erase(n, &pci_io_addr_cache_root.rb_root);
253 pci_dev_put(piar->pcidev);
254 kfree(piar);
255 goto restart;
256 }
257 n = rb_next(n);
258 }
259}
260
261/**
262 * eeh_addr_cache_rmv_dev - remove pci device from addr cache
263 * @dev: device to remove
264 *
265 * Remove a device from the addr-cache tree.
266 * This is potentially expensive, since it will walk
267 * the tree multiple times (once per resource).
268 * But so what; device removal doesn't need to be that fast.
269 */
270void eeh_addr_cache_rmv_dev(struct pci_dev *dev)
271{
272 unsigned long flags;
273
274 spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
275 __eeh_addr_cache_rmv_dev(dev);
276 spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
277}
278
279/**
280 * eeh_addr_cache_build - Build a cache of I/O addresses
281 *
282 * Build a cache of pci i/o addresses. This cache will be used to
283 * find the pci device that corresponds to a given address.
284 * This routine scans all pci busses to build the cache.
285 * Must be run late in boot process, after the pci controllers
286 * have been scanned for devices (after all device resources are known).
287 */
288void __init eeh_addr_cache_build(void)
289{
290 struct device_node *dn;
291 struct eeh_dev *edev;
292 struct pci_dev *dev = NULL;
293
294 spin_lock_init(&pci_io_addr_cache_root.piar_lock);
295
296 for_each_pci_dev(dev) {
297 dn = pci_device_to_OF_node(dev);
298 if (!dn)
299 continue;
300
301 edev = of_node_to_eeh_dev(dn);
302 if (!edev)
303 continue;
304
305 pci_dev_get(dev); /* matching put is in eeh_remove_device() */
306 dev->dev.archdata.edev = edev;
307 edev->pdev = dev;
308
309 eeh_addr_cache_insert_dev(dev);
310
311 eeh_sysfs_add_device(dev);
312 }
313
314#ifdef DEBUG
315 /* Verify tree built up above, echo back the list of addrs. */
316 eeh_addr_cache_print(&pci_io_addr_cache_root);
317#endif
318}
319
diff --git a/arch/powerpc/platforms/pseries/eeh_dev.c b/arch/powerpc/platforms/pseries/eeh_dev.c
deleted file mode 100644
index 1efa28f5fc54..000000000000
--- a/arch/powerpc/platforms/pseries/eeh_dev.c
+++ /dev/null
@@ -1,112 +0,0 @@
1/*
2 * The file intends to implement dynamic creation of EEH device, which will
3 * be bound with OF node and PCI device simutaneously. The EEH devices would
4 * be foundamental information for EEH core components to work proerly. Besides,
5 * We have to support multiple situations where dynamic creation of EEH device
6 * is required:
7 *
8 * 1) Before PCI emunation starts, we need create EEH devices according to the
9 * PCI sensitive OF nodes.
10 * 2) When PCI emunation is done, we need do the binding between PCI device and
11 * the associated EEH device.
12 * 3) DR (Dynamic Reconfiguration) would create PCI sensitive OF node. EEH device
13 * will be created while PCI sensitive OF node is detected from DR.
14 * 4) PCI hotplug needs redoing the binding between PCI device and EEH device. If
15 * PHB is newly inserted, we also need create EEH devices accordingly.
16 *
17 * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
18 *
19 * This program is free software; you can redistribute it and/or modify
20 * it under the terms of the GNU General Public License as published by
21 * the Free Software Foundation; either version 2 of the License, or
22 * (at your option) any later version.
23 *
24 * This program is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU General Public License for more details.
28 *
29 * You should have received a copy of the GNU General Public License
30 * along with this program; if not, write to the Free Software
31 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
32 */
33
34#include <linux/export.h>
35#include <linux/gfp.h>
36#include <linux/init.h>
37#include <linux/kernel.h>
38#include <linux/pci.h>
39#include <linux/string.h>
40
41#include <asm/pci-bridge.h>
42#include <asm/ppc-pci.h>
43
44/**
45 * eeh_dev_init - Create EEH device according to OF node
46 * @dn: device node
47 * @data: PHB
48 *
49 * It will create EEH device according to the given OF node. The function
50 * might be called by PCI emunation, DR, PHB hotplug.
51 */
52void *eeh_dev_init(struct device_node *dn, void *data)
53{
54 struct pci_controller *phb = data;
55 struct eeh_dev *edev;
56
57 /* Allocate EEH device */
58 edev = kzalloc(sizeof(*edev), GFP_KERNEL);
59 if (!edev) {
60 pr_warning("%s: out of memory\n", __func__);
61 return NULL;
62 }
63
64 /* Associate EEH device with OF node */
65 PCI_DN(dn)->edev = edev;
66 edev->dn = dn;
67 edev->phb = phb;
68 INIT_LIST_HEAD(&edev->list);
69
70 return NULL;
71}
72
73/**
74 * eeh_dev_phb_init_dynamic - Create EEH devices for devices included in PHB
75 * @phb: PHB
76 *
77 * Scan the PHB OF node and its child association, then create the
78 * EEH devices accordingly
79 */
80void eeh_dev_phb_init_dynamic(struct pci_controller *phb)
81{
82 struct device_node *dn = phb->dn;
83
84 /* EEH PE for PHB */
85 eeh_phb_pe_create(phb);
86
87 /* EEH device for PHB */
88 eeh_dev_init(dn, phb);
89
90 /* EEH devices for children OF nodes */
91 traverse_pci_devices(dn, eeh_dev_init, phb);
92}
93
94/**
95 * eeh_dev_phb_init - Create EEH devices for devices included in existing PHBs
96 *
97 * Scan all the existing PHBs and create EEH devices for their OF
98 * nodes and their children OF nodes
99 */
100static int __init eeh_dev_phb_init(void)
101{
102 struct pci_controller *phb, *tmp;
103
104 list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
105 eeh_dev_phb_init_dynamic(phb);
106
107 pr_info("EEH: devices created\n");
108
109 return 0;
110}
111
112core_initcall(eeh_dev_phb_init);
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
deleted file mode 100644
index a3fefb61097c..000000000000
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ /dev/null
@@ -1,552 +0,0 @@
1/*
2 * PCI Error Recovery Driver for RPA-compliant PPC64 platform.
3 * Copyright IBM Corp. 2004 2005
4 * Copyright Linas Vepstas <linas@linas.org> 2004, 2005
5 *
6 * All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or (at
11 * your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
16 * NON INFRINGEMENT. See the GNU General Public License for more
17 * details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 *
23 * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
24 */
25#include <linux/delay.h>
26#include <linux/interrupt.h>
27#include <linux/irq.h>
28#include <linux/module.h>
29#include <linux/pci.h>
30#include <asm/eeh.h>
31#include <asm/eeh_event.h>
32#include <asm/ppc-pci.h>
33#include <asm/pci-bridge.h>
34#include <asm/prom.h>
35#include <asm/rtas.h>
36
37/**
38 * eeh_pcid_name - Retrieve name of PCI device driver
39 * @pdev: PCI device
40 *
41 * This routine is used to retrieve the name of PCI device driver
42 * if that's valid.
43 */
44static inline const char *eeh_pcid_name(struct pci_dev *pdev)
45{
46 if (pdev && pdev->dev.driver)
47 return pdev->dev.driver->name;
48 return "";
49}
50
51/**
52 * eeh_pcid_get - Get the PCI device driver
53 * @pdev: PCI device
54 *
55 * The function is used to retrieve the PCI device driver for
56 * the indicated PCI device. Besides, we will increase the reference
57 * of the PCI device driver to prevent that being unloaded on
58 * the fly. Otherwise, kernel crash would be seen.
59 */
60static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev)
61{
62 if (!pdev || !pdev->driver)
63 return NULL;
64
65 if (!try_module_get(pdev->driver->driver.owner))
66 return NULL;
67
68 return pdev->driver;
69}
70
71/**
72 * eeh_pcid_put - Dereference on the PCI device driver
73 * @pdev: PCI device
74 *
75 * The function is called to do dereference on the PCI device
76 * driver of the indicated PCI device.
77 */
78static inline void eeh_pcid_put(struct pci_dev *pdev)
79{
80 if (!pdev || !pdev->driver)
81 return;
82
83 module_put(pdev->driver->driver.owner);
84}
85
86#if 0
87static void print_device_node_tree(struct pci_dn *pdn, int dent)
88{
89 int i;
90 struct device_node *pc;
91
92 if (!pdn)
93 return;
94 for (i = 0; i < dent; i++)
95 printk(" ");
96 printk("dn=%s mode=%x \tcfg_addr=%x pe_addr=%x \tfull=%s\n",
97 pdn->node->name, pdn->eeh_mode, pdn->eeh_config_addr,
98 pdn->eeh_pe_config_addr, pdn->node->full_name);
99 dent += 3;
100 pc = pdn->node->child;
101 while (pc) {
102 print_device_node_tree(PCI_DN(pc), dent);
103 pc = pc->sibling;
104 }
105}
106#endif
107
108/**
109 * eeh_disable_irq - Disable interrupt for the recovering device
110 * @dev: PCI device
111 *
112 * This routine must be called when reporting temporary or permanent
113 * error to the particular PCI device to disable interrupt of that
114 * device. If the device has enabled MSI or MSI-X interrupt, we needn't
115 * do real work because EEH should freeze DMA transfers for those PCI
116 * devices encountering EEH errors, which includes MSI or MSI-X.
117 */
118static void eeh_disable_irq(struct pci_dev *dev)
119{
120 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
121
122 /* Don't disable MSI and MSI-X interrupts. They are
123 * effectively disabled by the DMA Stopped state
124 * when an EEH error occurs.
125 */
126 if (dev->msi_enabled || dev->msix_enabled)
127 return;
128
129 if (!irq_has_action(dev->irq))
130 return;
131
132 edev->mode |= EEH_DEV_IRQ_DISABLED;
133 disable_irq_nosync(dev->irq);
134}
135
136/**
137 * eeh_enable_irq - Enable interrupt for the recovering device
138 * @dev: PCI device
139 *
140 * This routine must be called to enable interrupt while failed
141 * device could be resumed.
142 */
143static void eeh_enable_irq(struct pci_dev *dev)
144{
145 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
146
147 if ((edev->mode) & EEH_DEV_IRQ_DISABLED) {
148 edev->mode &= ~EEH_DEV_IRQ_DISABLED;
149 enable_irq(dev->irq);
150 }
151}
152
153/**
154 * eeh_report_error - Report pci error to each device driver
155 * @data: eeh device
156 * @userdata: return value
157 *
158 * Report an EEH error to each device driver, collect up and
159 * merge the device driver responses. Cumulative response
160 * passed back in "userdata".
161 */
162static void *eeh_report_error(void *data, void *userdata)
163{
164 struct eeh_dev *edev = (struct eeh_dev *)data;
165 struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
166 enum pci_ers_result rc, *res = userdata;
167 struct pci_driver *driver;
168
169 /* We might not have the associated PCI device,
170 * then we should continue for next one.
171 */
172 if (!dev) return NULL;
173 dev->error_state = pci_channel_io_frozen;
174
175 driver = eeh_pcid_get(dev);
176 if (!driver) return NULL;
177
178 eeh_disable_irq(dev);
179
180 if (!driver->err_handler ||
181 !driver->err_handler->error_detected) {
182 eeh_pcid_put(dev);
183 return NULL;
184 }
185
186 rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
187
188 /* A driver that needs a reset trumps all others */
189 if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
190 if (*res == PCI_ERS_RESULT_NONE) *res = rc;
191
192 eeh_pcid_put(dev);
193 return NULL;
194}
195
196/**
197 * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled
198 * @data: eeh device
199 * @userdata: return value
200 *
201 * Tells each device driver that IO ports, MMIO and config space I/O
202 * are now enabled. Collects up and merges the device driver responses.
203 * Cumulative response passed back in "userdata".
204 */
205static void *eeh_report_mmio_enabled(void *data, void *userdata)
206{
207 struct eeh_dev *edev = (struct eeh_dev *)data;
208 struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
209 enum pci_ers_result rc, *res = userdata;
210 struct pci_driver *driver;
211
212 driver = eeh_pcid_get(dev);
213 if (!driver) return NULL;
214
215 if (!driver->err_handler ||
216 !driver->err_handler->mmio_enabled) {
217 eeh_pcid_put(dev);
218 return NULL;
219 }
220
221 rc = driver->err_handler->mmio_enabled(dev);
222
223 /* A driver that needs a reset trumps all others */
224 if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
225 if (*res == PCI_ERS_RESULT_NONE) *res = rc;
226
227 eeh_pcid_put(dev);
228 return NULL;
229}
230
231/**
232 * eeh_report_reset - Tell device that slot has been reset
233 * @data: eeh device
234 * @userdata: return value
235 *
236 * This routine must be called while EEH tries to reset particular
237 * PCI device so that the associated PCI device driver could take
238 * some actions, usually to save data the driver needs so that the
239 * driver can work again while the device is recovered.
240 */
241static void *eeh_report_reset(void *data, void *userdata)
242{
243 struct eeh_dev *edev = (struct eeh_dev *)data;
244 struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
245 enum pci_ers_result rc, *res = userdata;
246 struct pci_driver *driver;
247
248 if (!dev) return NULL;
249 dev->error_state = pci_channel_io_normal;
250
251 driver = eeh_pcid_get(dev);
252 if (!driver) return NULL;
253
254 eeh_enable_irq(dev);
255
256 if (!driver->err_handler ||
257 !driver->err_handler->slot_reset) {
258 eeh_pcid_put(dev);
259 return NULL;
260 }
261
262 rc = driver->err_handler->slot_reset(dev);
263 if ((*res == PCI_ERS_RESULT_NONE) ||
264 (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc;
265 if (*res == PCI_ERS_RESULT_DISCONNECT &&
266 rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
267
268 eeh_pcid_put(dev);
269 return NULL;
270}
271
272/**
273 * eeh_report_resume - Tell device to resume normal operations
274 * @data: eeh device
275 * @userdata: return value
276 *
277 * This routine must be called to notify the device driver that it
278 * could resume so that the device driver can do some initialization
279 * to make the recovered device work again.
280 */
281static void *eeh_report_resume(void *data, void *userdata)
282{
283 struct eeh_dev *edev = (struct eeh_dev *)data;
284 struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
285 struct pci_driver *driver;
286
287 if (!dev) return NULL;
288 dev->error_state = pci_channel_io_normal;
289
290 driver = eeh_pcid_get(dev);
291 if (!driver) return NULL;
292
293 eeh_enable_irq(dev);
294
295 if (!driver->err_handler ||
296 !driver->err_handler->resume) {
297 eeh_pcid_put(dev);
298 return NULL;
299 }
300
301 driver->err_handler->resume(dev);
302
303 eeh_pcid_put(dev);
304 return NULL;
305}
306
307/**
308 * eeh_report_failure - Tell device driver that device is dead.
309 * @data: eeh device
310 * @userdata: return value
311 *
312 * This informs the device driver that the device is permanently
313 * dead, and that no further recovery attempts will be made on it.
314 */
315static void *eeh_report_failure(void *data, void *userdata)
316{
317 struct eeh_dev *edev = (struct eeh_dev *)data;
318 struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
319 struct pci_driver *driver;
320
321 if (!dev) return NULL;
322 dev->error_state = pci_channel_io_perm_failure;
323
324 driver = eeh_pcid_get(dev);
325 if (!driver) return NULL;
326
327 eeh_disable_irq(dev);
328
329 if (!driver->err_handler ||
330 !driver->err_handler->error_detected) {
331 eeh_pcid_put(dev);
332 return NULL;
333 }
334
335 driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
336
337 eeh_pcid_put(dev);
338 return NULL;
339}
340
341/**
342 * eeh_reset_device - Perform actual reset of a pci slot
343 * @pe: EEH PE
344 * @bus: PCI bus corresponding to the isolcated slot
345 *
346 * This routine must be called to do reset on the indicated PE.
347 * During the reset, udev might be invoked because those affected
348 * PCI devices will be removed and then added.
349 */
350static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
351{
352 int cnt, rc;
353
354 /* pcibios will clear the counter; save the value */
355 cnt = pe->freeze_count;
356
357 /*
358 * We don't remove the corresponding PE instances because
359 * we need the information afterwords. The attached EEH
360 * devices are expected to be attached soon when calling
361 * into pcibios_add_pci_devices().
362 */
363 if (bus)
364 __pcibios_remove_pci_devices(bus, 0);
365
366 /* Reset the pci controller. (Asserts RST#; resets config space).
367 * Reconfigure bridges and devices. Don't try to bring the system
368 * up if the reset failed for some reason.
369 */
370 rc = eeh_reset_pe(pe);
371 if (rc)
372 return rc;
373
374 /* Restore PE */
375 eeh_ops->configure_bridge(pe);
376 eeh_pe_restore_bars(pe);
377
378 /* Give the system 5 seconds to finish running the user-space
379 * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes,
380 * this is a hack, but if we don't do this, and try to bring
381 * the device up before the scripts have taken it down,
382 * potentially weird things happen.
383 */
384 if (bus) {
385 ssleep(5);
386 pcibios_add_pci_devices(bus);
387 }
388 pe->freeze_count = cnt;
389
390 return 0;
391}
392
393/* The longest amount of time to wait for a pci device
394 * to come back on line, in seconds.
395 */
396#define MAX_WAIT_FOR_RECOVERY 150
397
398/**
399 * eeh_handle_event - Reset a PCI device after hard lockup.
400 * @pe: EEH PE
401 *
402 * While PHB detects address or data parity errors on particular PCI
403 * slot, the associated PE will be frozen. Besides, DMA's occurring
404 * to wild addresses (which usually happen due to bugs in device
405 * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
406 * #PERR or other misc PCI-related errors also can trigger EEH errors.
407 *
408 * Recovery process consists of unplugging the device driver (which
409 * generated hotplug events to userspace), then issuing a PCI #RST to
410 * the device, then reconfiguring the PCI config space for all bridges
411 * & devices under this slot, and then finally restarting the device
412 * drivers (which cause a second set of hotplug events to go out to
413 * userspace).
414 */
415void eeh_handle_event(struct eeh_pe *pe)
416{
417 struct pci_bus *frozen_bus;
418 int rc = 0;
419 enum pci_ers_result result = PCI_ERS_RESULT_NONE;
420
421 frozen_bus = eeh_pe_bus_get(pe);
422 if (!frozen_bus) {
423 pr_err("%s: Cannot find PCI bus for PHB#%d-PE#%x\n",
424 __func__, pe->phb->global_number, pe->addr);
425 return;
426 }
427
428 pe->freeze_count++;
429 if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES)
430 goto excess_failures;
431 pr_warning("EEH: This PCI device has failed %d times in the last hour\n",
432 pe->freeze_count);
433
434 /* Walk the various device drivers attached to this slot through
435 * a reset sequence, giving each an opportunity to do what it needs
436 * to accomplish the reset. Each child gets a report of the
437 * status ... if any child can't handle the reset, then the entire
438 * slot is dlpar removed and added.
439 */
440 eeh_pe_dev_traverse(pe, eeh_report_error, &result);
441
442 /* Get the current PCI slot state. This can take a long time,
443 * sometimes over 3 seconds for certain systems.
444 */
445 rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
446 if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
447 printk(KERN_WARNING "EEH: Permanent failure\n");
448 goto hard_fail;
449 }
450
451 /* Since rtas may enable MMIO when posting the error log,
452 * don't post the error log until after all dev drivers
453 * have been informed.
454 */
455 eeh_slot_error_detail(pe, EEH_LOG_TEMP);
456
457 /* If all device drivers were EEH-unaware, then shut
458 * down all of the device drivers, and hope they
459 * go down willingly, without panicing the system.
460 */
461 if (result == PCI_ERS_RESULT_NONE) {
462 rc = eeh_reset_device(pe, frozen_bus);
463 if (rc) {
464 printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc);
465 goto hard_fail;
466 }
467 }
468
469 /* If all devices reported they can proceed, then re-enable MMIO */
470 if (result == PCI_ERS_RESULT_CAN_RECOVER) {
471 rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
472
473 if (rc < 0)
474 goto hard_fail;
475 if (rc) {
476 result = PCI_ERS_RESULT_NEED_RESET;
477 } else {
478 result = PCI_ERS_RESULT_NONE;
479 eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result);
480 }
481 }
482
483 /* If all devices reported they can proceed, then re-enable DMA */
484 if (result == PCI_ERS_RESULT_CAN_RECOVER) {
485 rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
486
487 if (rc < 0)
488 goto hard_fail;
489 if (rc)
490 result = PCI_ERS_RESULT_NEED_RESET;
491 else
492 result = PCI_ERS_RESULT_RECOVERED;
493 }
494
495 /* If any device has a hard failure, then shut off everything. */
496 if (result == PCI_ERS_RESULT_DISCONNECT) {
497 printk(KERN_WARNING "EEH: Device driver gave up\n");
498 goto hard_fail;
499 }
500
501 /* If any device called out for a reset, then reset the slot */
502 if (result == PCI_ERS_RESULT_NEED_RESET) {
503 rc = eeh_reset_device(pe, NULL);
504 if (rc) {
505 printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc);
506 goto hard_fail;
507 }
508 result = PCI_ERS_RESULT_NONE;
509 eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
510 }
511
512 /* All devices should claim they have recovered by now. */
513 if ((result != PCI_ERS_RESULT_RECOVERED) &&
514 (result != PCI_ERS_RESULT_NONE)) {
515 printk(KERN_WARNING "EEH: Not recovered\n");
516 goto hard_fail;
517 }
518
519 /* Tell all device drivers that they can resume operations */
520 eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
521
522 return;
523
524excess_failures:
525 /*
526 * About 90% of all real-life EEH failures in the field
527 * are due to poorly seated PCI cards. Only 10% or so are
528 * due to actual, failed cards.
529 */
530 pr_err("EEH: PHB#%d-PE#%x has failed %d times in the\n"
531 "last hour and has been permanently disabled.\n"
532 "Please try reseating or replacing it.\n",
533 pe->phb->global_number, pe->addr,
534 pe->freeze_count);
535 goto perm_error;
536
537hard_fail:
538 pr_err("EEH: Unable to recover from failure from PHB#%d-PE#%x.\n"
539 "Please try reseating or replacing it\n",
540 pe->phb->global_number, pe->addr);
541
542perm_error:
543 eeh_slot_error_detail(pe, EEH_LOG_PERM);
544
545 /* Notify all devices that they're about to go down. */
546 eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
547
548 /* Shut down the device drivers for good. */
549 if (frozen_bus)
550 pcibios_remove_pci_devices(frozen_bus);
551}
552
diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c
deleted file mode 100644
index 185bedd926df..000000000000
--- a/arch/powerpc/platforms/pseries/eeh_event.c
+++ /dev/null
@@ -1,142 +0,0 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
15 *
16 * Copyright (c) 2005 Linas Vepstas <linas@linas.org>
17 */
18
19#include <linux/delay.h>
20#include <linux/list.h>
21#include <linux/mutex.h>
22#include <linux/sched.h>
23#include <linux/pci.h>
24#include <linux/slab.h>
25#include <linux/workqueue.h>
26#include <linux/kthread.h>
27#include <asm/eeh_event.h>
28#include <asm/ppc-pci.h>
29
30/** Overview:
31 * EEH error states may be detected within exception handlers;
32 * however, the recovery processing needs to occur asynchronously
33 * in a normal kernel context and not an interrupt context.
34 * This pair of routines creates an event and queues it onto a
35 * work-queue, where a worker thread can drive recovery.
36 */
37
38/* EEH event workqueue setup. */
39static DEFINE_SPINLOCK(eeh_eventlist_lock);
40LIST_HEAD(eeh_eventlist);
41static void eeh_thread_launcher(struct work_struct *);
42DECLARE_WORK(eeh_event_wq, eeh_thread_launcher);
43
44/* Serialize reset sequences for a given pci device */
45DEFINE_MUTEX(eeh_event_mutex);
46
47/**
48 * eeh_event_handler - Dispatch EEH events.
49 * @dummy - unused
50 *
51 * The detection of a frozen slot can occur inside an interrupt,
52 * where it can be hard to do anything about it. The goal of this
53 * routine is to pull these detection events out of the context
54 * of the interrupt handler, and re-dispatch them for processing
55 * at a later time in a normal context.
56 */
57static int eeh_event_handler(void * dummy)
58{
59 unsigned long flags;
60 struct eeh_event *event;
61 struct eeh_pe *pe;
62
63 spin_lock_irqsave(&eeh_eventlist_lock, flags);
64 event = NULL;
65
66 /* Unqueue the event, get ready to process. */
67 if (!list_empty(&eeh_eventlist)) {
68 event = list_entry(eeh_eventlist.next, struct eeh_event, list);
69 list_del(&event->list);
70 }
71 spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
72
73 if (event == NULL)
74 return 0;
75
76 /* Serialize processing of EEH events */
77 mutex_lock(&eeh_event_mutex);
78 pe = event->pe;
79 eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
80 pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n",
81 pe->phb->global_number, pe->addr);
82
83 set_current_state(TASK_INTERRUPTIBLE); /* Don't add to load average */
84 eeh_handle_event(pe);
85 eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
86
87 kfree(event);
88 mutex_unlock(&eeh_event_mutex);
89
90 /* If there are no new errors after an hour, clear the counter. */
91 if (pe && pe->freeze_count > 0) {
92 msleep_interruptible(3600*1000);
93 if (pe->freeze_count > 0)
94 pe->freeze_count--;
95
96 }
97
98 return 0;
99}
100
101/**
102 * eeh_thread_launcher - Start kernel thread to handle EEH events
103 * @dummy - unused
104 *
105 * This routine is called to start the kernel thread for processing
106 * EEH event.
107 */
108static void eeh_thread_launcher(struct work_struct *dummy)
109{
110 if (IS_ERR(kthread_run(eeh_event_handler, NULL, "eehd")))
111 printk(KERN_ERR "Failed to start EEH daemon\n");
112}
113
114/**
115 * eeh_send_failure_event - Generate a PCI error event
116 * @pe: EEH PE
117 *
118 * This routine can be called within an interrupt context;
119 * the actual event will be delivered in a normal context
120 * (from a workqueue).
121 */
122int eeh_send_failure_event(struct eeh_pe *pe)
123{
124 unsigned long flags;
125 struct eeh_event *event;
126
127 event = kzalloc(sizeof(*event), GFP_ATOMIC);
128 if (!event) {
129 pr_err("EEH: out of memory, event not handled\n");
130 return -ENOMEM;
131 }
132 event->pe = pe;
133
134 /* We may or may not be called in an interrupt context */
135 spin_lock_irqsave(&eeh_eventlist_lock, flags);
136 list_add(&event->list, &eeh_eventlist);
137 spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
138
139 schedule_work(&eeh_event_wq);
140
141 return 0;
142}
diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c
deleted file mode 100644
index 9d4a9e8562b2..000000000000
--- a/arch/powerpc/platforms/pseries/eeh_pe.c
+++ /dev/null
@@ -1,653 +0,0 @@
1/*
2 * The file intends to implement PE based on the information from
3 * platforms. Basically, there have 3 types of PEs: PHB/Bus/Device.
4 * All the PEs should be organized as hierarchy tree. The first level
5 * of the tree will be associated to existing PHBs since the particular
6 * PE is only meaningful in one PHB domain.
7 *
8 * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24
25#include <linux/export.h>
26#include <linux/gfp.h>
27#include <linux/init.h>
28#include <linux/kernel.h>
29#include <linux/pci.h>
30#include <linux/string.h>
31
32#include <asm/pci-bridge.h>
33#include <asm/ppc-pci.h>
34
35static LIST_HEAD(eeh_phb_pe);
36
37/**
38 * eeh_pe_alloc - Allocate PE
39 * @phb: PCI controller
40 * @type: PE type
41 *
42 * Allocate PE instance dynamically.
43 */
44static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type)
45{
46 struct eeh_pe *pe;
47
48 /* Allocate PHB PE */
49 pe = kzalloc(sizeof(struct eeh_pe), GFP_KERNEL);
50 if (!pe) return NULL;
51
52 /* Initialize PHB PE */
53 pe->type = type;
54 pe->phb = phb;
55 INIT_LIST_HEAD(&pe->child_list);
56 INIT_LIST_HEAD(&pe->child);
57 INIT_LIST_HEAD(&pe->edevs);
58
59 return pe;
60}
61
62/**
63 * eeh_phb_pe_create - Create PHB PE
64 * @phb: PCI controller
65 *
66 * The function should be called while the PHB is detected during
67 * system boot or PCI hotplug in order to create PHB PE.
68 */
69int eeh_phb_pe_create(struct pci_controller *phb)
70{
71 struct eeh_pe *pe;
72
73 /* Allocate PHB PE */
74 pe = eeh_pe_alloc(phb, EEH_PE_PHB);
75 if (!pe) {
76 pr_err("%s: out of memory!\n", __func__);
77 return -ENOMEM;
78 }
79
80 /* Put it into the list */
81 eeh_lock();
82 list_add_tail(&pe->child, &eeh_phb_pe);
83 eeh_unlock();
84
85 pr_debug("EEH: Add PE for PHB#%d\n", phb->global_number);
86
87 return 0;
88}
89
90/**
91 * eeh_phb_pe_get - Retrieve PHB PE based on the given PHB
92 * @phb: PCI controller
93 *
94 * The overall PEs form hierarchy tree. The first layer of the
95 * hierarchy tree is composed of PHB PEs. The function is used
96 * to retrieve the corresponding PHB PE according to the given PHB.
97 */
98static struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb)
99{
100 struct eeh_pe *pe;
101
102 list_for_each_entry(pe, &eeh_phb_pe, child) {
103 /*
104 * Actually, we needn't check the type since
105 * the PE for PHB has been determined when that
106 * was created.
107 */
108 if ((pe->type & EEH_PE_PHB) && pe->phb == phb)
109 return pe;
110 }
111
112 return NULL;
113}
114
115/**
116 * eeh_pe_next - Retrieve the next PE in the tree
117 * @pe: current PE
118 * @root: root PE
119 *
120 * The function is used to retrieve the next PE in the
121 * hierarchy PE tree.
122 */
123static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe,
124 struct eeh_pe *root)
125{
126 struct list_head *next = pe->child_list.next;
127
128 if (next == &pe->child_list) {
129 while (1) {
130 if (pe == root)
131 return NULL;
132 next = pe->child.next;
133 if (next != &pe->parent->child_list)
134 break;
135 pe = pe->parent;
136 }
137 }
138
139 return list_entry(next, struct eeh_pe, child);
140}
141
142/**
143 * eeh_pe_traverse - Traverse PEs in the specified PHB
144 * @root: root PE
145 * @fn: callback
146 * @flag: extra parameter to callback
147 *
148 * The function is used to traverse the specified PE and its
149 * child PEs. The traversing is to be terminated once the
150 * callback returns something other than NULL, or no more PEs
151 * to be traversed.
152 */
153static void *eeh_pe_traverse(struct eeh_pe *root,
154 eeh_traverse_func fn, void *flag)
155{
156 struct eeh_pe *pe;
157 void *ret;
158
159 for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
160 ret = fn(pe, flag);
161 if (ret) return ret;
162 }
163
164 return NULL;
165}
166
167/**
168 * eeh_pe_dev_traverse - Traverse the devices from the PE
169 * @root: EEH PE
170 * @fn: function callback
171 * @flag: extra parameter to callback
172 *
173 * The function is used to traverse the devices of the specified
174 * PE and its child PEs.
175 */
176void *eeh_pe_dev_traverse(struct eeh_pe *root,
177 eeh_traverse_func fn, void *flag)
178{
179 struct eeh_pe *pe;
180 struct eeh_dev *edev;
181 void *ret;
182
183 if (!root) {
184 pr_warning("%s: Invalid PE %p\n", __func__, root);
185 return NULL;
186 }
187
188 eeh_lock();
189
190 /* Traverse root PE */
191 for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
192 eeh_pe_for_each_dev(pe, edev) {
193 ret = fn(edev, flag);
194 if (ret) {
195 eeh_unlock();
196 return ret;
197 }
198 }
199 }
200
201 eeh_unlock();
202
203 return NULL;
204}
205
206/**
207 * __eeh_pe_get - Check the PE address
208 * @data: EEH PE
209 * @flag: EEH device
210 *
211 * For one particular PE, it can be identified by PE address
212 * or tranditional BDF address. BDF address is composed of
213 * Bus/Device/Function number. The extra data referred by flag
214 * indicates which type of address should be used.
215 */
216static void *__eeh_pe_get(void *data, void *flag)
217{
218 struct eeh_pe *pe = (struct eeh_pe *)data;
219 struct eeh_dev *edev = (struct eeh_dev *)flag;
220
221 /* Unexpected PHB PE */
222 if (pe->type & EEH_PE_PHB)
223 return NULL;
224
225 /* We prefer PE address */
226 if (edev->pe_config_addr &&
227 (edev->pe_config_addr == pe->addr))
228 return pe;
229
230 /* Try BDF address */
231 if (edev->pe_config_addr &&
232 (edev->config_addr == pe->config_addr))
233 return pe;
234
235 return NULL;
236}
237
238/**
239 * eeh_pe_get - Search PE based on the given address
240 * @edev: EEH device
241 *
242 * Search the corresponding PE based on the specified address which
243 * is included in the eeh device. The function is used to check if
244 * the associated PE has been created against the PE address. It's
245 * notable that the PE address has 2 format: traditional PE address
246 * which is composed of PCI bus/device/function number, or unified
247 * PE address.
248 */
249static struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
250{
251 struct eeh_pe *root = eeh_phb_pe_get(edev->phb);
252 struct eeh_pe *pe;
253
254 pe = eeh_pe_traverse(root, __eeh_pe_get, edev);
255
256 return pe;
257}
258
259/**
260 * eeh_pe_get_parent - Retrieve the parent PE
261 * @edev: EEH device
262 *
263 * The whole PEs existing in the system are organized as hierarchy
264 * tree. The function is used to retrieve the parent PE according
265 * to the parent EEH device.
266 */
267static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
268{
269 struct device_node *dn;
270 struct eeh_dev *parent;
271
272 /*
273 * It might have the case for the indirect parent
274 * EEH device already having associated PE, but
275 * the direct parent EEH device doesn't have yet.
276 */
277 dn = edev->dn->parent;
278 while (dn) {
279 /* We're poking out of PCI territory */
280 if (!PCI_DN(dn)) return NULL;
281
282 parent = of_node_to_eeh_dev(dn);
283 /* We're poking out of PCI territory */
284 if (!parent) return NULL;
285
286 if (parent->pe)
287 return parent->pe;
288
289 dn = dn->parent;
290 }
291
292 return NULL;
293}
294
295/**
296 * eeh_add_to_parent_pe - Add EEH device to parent PE
297 * @edev: EEH device
298 *
299 * Add EEH device to the parent PE. If the parent PE already
300 * exists, the PE type will be changed to EEH_PE_BUS. Otherwise,
301 * we have to create new PE to hold the EEH device and the new
302 * PE will be linked to its parent PE as well.
303 */
304int eeh_add_to_parent_pe(struct eeh_dev *edev)
305{
306 struct eeh_pe *pe, *parent;
307
308 eeh_lock();
309
310 /*
311 * Search the PE has been existing or not according
312 * to the PE address. If that has been existing, the
313 * PE should be composed of PCI bus and its subordinate
314 * components.
315 */
316 pe = eeh_pe_get(edev);
317 if (pe && !(pe->type & EEH_PE_INVALID)) {
318 if (!edev->pe_config_addr) {
319 eeh_unlock();
320 pr_err("%s: PE with addr 0x%x already exists\n",
321 __func__, edev->config_addr);
322 return -EEXIST;
323 }
324
325 /* Mark the PE as type of PCI bus */
326 pe->type = EEH_PE_BUS;
327 edev->pe = pe;
328
329 /* Put the edev to PE */
330 list_add_tail(&edev->list, &pe->edevs);
331 eeh_unlock();
332 pr_debug("EEH: Add %s to Bus PE#%x\n",
333 edev->dn->full_name, pe->addr);
334
335 return 0;
336 } else if (pe && (pe->type & EEH_PE_INVALID)) {
337 list_add_tail(&edev->list, &pe->edevs);
338 edev->pe = pe;
339 /*
340 * We're running to here because of PCI hotplug caused by
341 * EEH recovery. We need clear EEH_PE_INVALID until the top.
342 */
343 parent = pe;
344 while (parent) {
345 if (!(parent->type & EEH_PE_INVALID))
346 break;
347 parent->type &= ~EEH_PE_INVALID;
348 parent = parent->parent;
349 }
350 eeh_unlock();
351 pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
352 edev->dn->full_name, pe->addr, pe->parent->addr);
353
354 return 0;
355 }
356
357 /* Create a new EEH PE */
358 pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE);
359 if (!pe) {
360 eeh_unlock();
361 pr_err("%s: out of memory!\n", __func__);
362 return -ENOMEM;
363 }
364 pe->addr = edev->pe_config_addr;
365 pe->config_addr = edev->config_addr;
366
367 /*
368 * Put the new EEH PE into hierarchy tree. If the parent
369 * can't be found, the newly created PE will be attached
370 * to PHB directly. Otherwise, we have to associate the
371 * PE with its parent.
372 */
373 parent = eeh_pe_get_parent(edev);
374 if (!parent) {
375 parent = eeh_phb_pe_get(edev->phb);
376 if (!parent) {
377 eeh_unlock();
378 pr_err("%s: No PHB PE is found (PHB Domain=%d)\n",
379 __func__, edev->phb->global_number);
380 edev->pe = NULL;
381 kfree(pe);
382 return -EEXIST;
383 }
384 }
385 pe->parent = parent;
386
387 /*
388 * Put the newly created PE into the child list and
389 * link the EEH device accordingly.
390 */
391 list_add_tail(&pe->child, &parent->child_list);
392 list_add_tail(&edev->list, &pe->edevs);
393 edev->pe = pe;
394 eeh_unlock();
395 pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
396 edev->dn->full_name, pe->addr, pe->parent->addr);
397
398 return 0;
399}
400
401/**
402 * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE
403 * @edev: EEH device
404 * @purge_pe: remove PE or not
405 *
406 * The PE hierarchy tree might be changed when doing PCI hotplug.
407 * Also, the PCI devices or buses could be removed from the system
408 * during EEH recovery. So we have to call the function remove the
409 * corresponding PE accordingly if necessary.
410 */
411int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe)
412{
413 struct eeh_pe *pe, *parent, *child;
414 int cnt;
415
416 if (!edev->pe) {
417 pr_warning("%s: No PE found for EEH device %s\n",
418 __func__, edev->dn->full_name);
419 return -EEXIST;
420 }
421
422 eeh_lock();
423
424 /* Remove the EEH device */
425 pe = edev->pe;
426 edev->pe = NULL;
427 list_del(&edev->list);
428
429 /*
430 * Check if the parent PE includes any EEH devices.
431 * If not, we should delete that. Also, we should
432 * delete the parent PE if it doesn't have associated
433 * child PEs and EEH devices.
434 */
435 while (1) {
436 parent = pe->parent;
437 if (pe->type & EEH_PE_PHB)
438 break;
439
440 if (purge_pe) {
441 if (list_empty(&pe->edevs) &&
442 list_empty(&pe->child_list)) {
443 list_del(&pe->child);
444 kfree(pe);
445 } else {
446 break;
447 }
448 } else {
449 if (list_empty(&pe->edevs)) {
450 cnt = 0;
451 list_for_each_entry(child, &pe->child_list, child) {
452 if (!(child->type & EEH_PE_INVALID)) {
453 cnt++;
454 break;
455 }
456 }
457
458 if (!cnt)
459 pe->type |= EEH_PE_INVALID;
460 else
461 break;
462 }
463 }
464
465 pe = parent;
466 }
467
468 eeh_unlock();
469
470 return 0;
471}
472
473/**
474 * __eeh_pe_state_mark - Mark the state for the PE
475 * @data: EEH PE
476 * @flag: state
477 *
478 * The function is used to mark the indicated state for the given
479 * PE. Also, the associated PCI devices will be put into IO frozen
480 * state as well.
481 */
482static void *__eeh_pe_state_mark(void *data, void *flag)
483{
484 struct eeh_pe *pe = (struct eeh_pe *)data;
485 int state = *((int *)flag);
486 struct eeh_dev *tmp;
487 struct pci_dev *pdev;
488
489 /*
490 * Mark the PE with the indicated state. Also,
491 * the associated PCI device will be put into
492 * I/O frozen state to avoid I/O accesses from
493 * the PCI device driver.
494 */
495 pe->state |= state;
496 eeh_pe_for_each_dev(pe, tmp) {
497 pdev = eeh_dev_to_pci_dev(tmp);
498 if (pdev)
499 pdev->error_state = pci_channel_io_frozen;
500 }
501
502 return NULL;
503}
504
505/**
506 * eeh_pe_state_mark - Mark specified state for PE and its associated device
507 * @pe: EEH PE
508 *
509 * EEH error affects the current PE and its child PEs. The function
510 * is used to mark appropriate state for the affected PEs and the
511 * associated devices.
512 */
513void eeh_pe_state_mark(struct eeh_pe *pe, int state)
514{
515 eeh_lock();
516 eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
517 eeh_unlock();
518}
519
520/**
521 * __eeh_pe_state_clear - Clear state for the PE
522 * @data: EEH PE
523 * @flag: state
524 *
525 * The function is used to clear the indicated state from the
526 * given PE. Besides, we also clear the check count of the PE
527 * as well.
528 */
529static void *__eeh_pe_state_clear(void *data, void *flag)
530{
531 struct eeh_pe *pe = (struct eeh_pe *)data;
532 int state = *((int *)flag);
533
534 pe->state &= ~state;
535 pe->check_count = 0;
536
537 return NULL;
538}
539
540/**
541 * eeh_pe_state_clear - Clear state for the PE and its children
542 * @pe: PE
543 * @state: state to be cleared
544 *
545 * When the PE and its children has been recovered from error,
546 * we need clear the error state for that. The function is used
547 * for the purpose.
548 */
549void eeh_pe_state_clear(struct eeh_pe *pe, int state)
550{
551 eeh_lock();
552 eeh_pe_traverse(pe, __eeh_pe_state_clear, &state);
553 eeh_unlock();
554}
555
556/**
557 * eeh_restore_one_device_bars - Restore the Base Address Registers for one device
558 * @data: EEH device
559 * @flag: Unused
560 *
561 * Loads the PCI configuration space base address registers,
562 * the expansion ROM base address, the latency timer, and etc.
563 * from the saved values in the device node.
564 */
565static void *eeh_restore_one_device_bars(void *data, void *flag)
566{
567 int i;
568 u32 cmd;
569 struct eeh_dev *edev = (struct eeh_dev *)data;
570 struct device_node *dn = eeh_dev_to_of_node(edev);
571
572 for (i = 4; i < 10; i++)
573 eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
574 /* 12 == Expansion ROM Address */
575 eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]);
576
577#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
578#define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
579
580 eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
581 SAVED_BYTE(PCI_CACHE_LINE_SIZE));
582 eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
583 SAVED_BYTE(PCI_LATENCY_TIMER));
584
585 /* max latency, min grant, interrupt pin and line */
586 eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
587
588 /*
589 * Restore PERR & SERR bits, some devices require it,
590 * don't touch the other command bits
591 */
592 eeh_ops->read_config(dn, PCI_COMMAND, 4, &cmd);
593 if (edev->config_space[1] & PCI_COMMAND_PARITY)
594 cmd |= PCI_COMMAND_PARITY;
595 else
596 cmd &= ~PCI_COMMAND_PARITY;
597 if (edev->config_space[1] & PCI_COMMAND_SERR)
598 cmd |= PCI_COMMAND_SERR;
599 else
600 cmd &= ~PCI_COMMAND_SERR;
601 eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd);
602
603 return NULL;
604}
605
606/**
607 * eeh_pe_restore_bars - Restore the PCI config space info
608 * @pe: EEH PE
609 *
610 * This routine performs a recursive walk to the children
611 * of this device as well.
612 */
613void eeh_pe_restore_bars(struct eeh_pe *pe)
614{
615 /*
616 * We needn't take the EEH lock since eeh_pe_dev_traverse()
617 * will take that.
618 */
619 eeh_pe_dev_traverse(pe, eeh_restore_one_device_bars, NULL);
620}
621
622/**
623 * eeh_pe_bus_get - Retrieve PCI bus according to the given PE
624 * @pe: EEH PE
625 *
626 * Retrieve the PCI bus according to the given PE. Basically,
627 * there're 3 types of PEs: PHB/Bus/Device. For PHB PE, the
628 * primary PCI bus will be retrieved. The parent bus will be
629 * returned for BUS PE. However, we don't have associated PCI
630 * bus for DEVICE PE.
631 */
632struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
633{
634 struct pci_bus *bus = NULL;
635 struct eeh_dev *edev;
636 struct pci_dev *pdev;
637
638 eeh_lock();
639
640 if (pe->type & EEH_PE_PHB) {
641 bus = pe->phb->bus;
642 } else if (pe->type & EEH_PE_BUS ||
643 pe->type & EEH_PE_DEVICE) {
644 edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
645 pdev = eeh_dev_to_pci_dev(edev);
646 if (pdev)
647 bus = pdev->bus;
648 }
649
650 eeh_unlock();
651
652 return bus;
653}
diff --git a/arch/powerpc/platforms/pseries/eeh_sysfs.c b/arch/powerpc/platforms/pseries/eeh_sysfs.c
deleted file mode 100644
index d37708360f2e..000000000000
--- a/arch/powerpc/platforms/pseries/eeh_sysfs.c
+++ /dev/null
@@ -1,75 +0,0 @@
1/*
2 * Sysfs entries for PCI Error Recovery for PAPR-compliant platform.
3 * Copyright IBM Corporation 2007
4 * Copyright Linas Vepstas <linas@austin.ibm.com> 2007
5 *
6 * All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or (at
11 * your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
16 * NON INFRINGEMENT. See the GNU General Public License for more
17 * details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 *
23 * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
24 */
25#include <linux/pci.h>
26#include <linux/stat.h>
27#include <asm/ppc-pci.h>
28#include <asm/pci-bridge.h>
29
30/**
31 * EEH_SHOW_ATTR -- Create sysfs entry for eeh statistic
32 * @_name: name of file in sysfs directory
33 * @_memb: name of member in struct pci_dn to access
34 * @_format: printf format for display
35 *
36 * All of the attributes look very similar, so just
37 * auto-gen a cut-n-paste routine to display them.
38 */
39#define EEH_SHOW_ATTR(_name,_memb,_format) \
40static ssize_t eeh_show_##_name(struct device *dev, \
41 struct device_attribute *attr, char *buf) \
42{ \
43 struct pci_dev *pdev = to_pci_dev(dev); \
44 struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); \
45 \
46 if (!edev) \
47 return 0; \
48 \
49 return sprintf(buf, _format "\n", edev->_memb); \
50} \
51static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL);
52
53EEH_SHOW_ATTR(eeh_mode, mode, "0x%x");
54EEH_SHOW_ATTR(eeh_config_addr, config_addr, "0x%x");
55EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x");
56
57void eeh_sysfs_add_device(struct pci_dev *pdev)
58{
59 int rc=0;
60
61 rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
62 rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr);
63 rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
64
65 if (rc)
66 printk(KERN_WARNING "EEH: Unable to create sysfs entries\n");
67}
68
69void eeh_sysfs_remove_device(struct pci_dev *pdev)
70{
71 device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
72 device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr);
73 device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
74}
75
diff --git a/arch/powerpc/platforms/pseries/io_event_irq.c b/arch/powerpc/platforms/pseries/io_event_irq.c
index ef9d9d84c7d5..5ea88d1541f7 100644
--- a/arch/powerpc/platforms/pseries/io_event_irq.c
+++ b/arch/powerpc/platforms/pseries/io_event_irq.c
@@ -115,7 +115,7 @@ static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog)
115 * by scope or event type alone. For example, Torrent ISR route change 115 * by scope or event type alone. For example, Torrent ISR route change
116 * event is reported with scope 0x00 (Not Applicatable) rather than 116 * event is reported with scope 0x00 (Not Applicatable) rather than
117 * 0x3B (Torrent-hub). It is better to let the clients to identify 117 * 0x3B (Torrent-hub). It is better to let the clients to identify
118 * who owns the the event. 118 * who owns the event.
119 */ 119 */
120 120
121static irqreturn_t ioei_interrupt(int irq, void *dev_id) 121static irqreturn_t ioei_interrupt(int irq, void *dev_id)
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 86ae364900d6..23fc1dcf4434 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -614,6 +614,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
614 614
615 iommu_table_setparms(pci->phb, dn, tbl); 615 iommu_table_setparms(pci->phb, dn, tbl);
616 pci->iommu_table = iommu_init_table(tbl, pci->phb->node); 616 pci->iommu_table = iommu_init_table(tbl, pci->phb->node);
617 iommu_register_group(tbl, pci_domain_nr(bus), 0);
617 618
618 /* Divide the rest (1.75GB) among the children */ 619 /* Divide the rest (1.75GB) among the children */
619 pci->phb->dma_window_size = 0x80000000ul; 620 pci->phb->dma_window_size = 0x80000000ul;
@@ -658,6 +659,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
658 ppci->phb->node); 659 ppci->phb->node);
659 iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window); 660 iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window);
660 ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node); 661 ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node);
662 iommu_register_group(tbl, pci_domain_nr(bus), 0);
661 pr_debug(" created table: %p\n", ppci->iommu_table); 663 pr_debug(" created table: %p\n", ppci->iommu_table);
662 } 664 }
663} 665}
@@ -684,6 +686,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
684 phb->node); 686 phb->node);
685 iommu_table_setparms(phb, dn, tbl); 687 iommu_table_setparms(phb, dn, tbl);
686 PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node); 688 PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node);
689 iommu_register_group(tbl, pci_domain_nr(phb->bus), 0);
687 set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table); 690 set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table);
688 return; 691 return;
689 } 692 }
@@ -1184,6 +1187,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
1184 pci->phb->node); 1187 pci->phb->node);
1185 iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window); 1188 iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window);
1186 pci->iommu_table = iommu_init_table(tbl, pci->phb->node); 1189 pci->iommu_table = iommu_init_table(tbl, pci->phb->node);
1190 iommu_register_group(tbl, pci_domain_nr(pci->phb->bus), 0);
1187 pr_debug(" created table: %p\n", pci->iommu_table); 1191 pr_debug(" created table: %p\n", pci->iommu_table);
1188 } else { 1192 } else {
1189 pr_debug(" found DMA window, table: %p\n", pci->iommu_table); 1193 pr_debug(" found DMA window, table: %p\n", pci->iommu_table);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 6d62072a7d5a..02d6e21619bb 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -45,6 +45,13 @@
45#include "plpar_wrappers.h" 45#include "plpar_wrappers.h"
46#include "pseries.h" 46#include "pseries.h"
47 47
48/* Flag bits for H_BULK_REMOVE */
49#define HBR_REQUEST 0x4000000000000000UL
50#define HBR_RESPONSE 0x8000000000000000UL
51#define HBR_END 0xc000000000000000UL
52#define HBR_AVPN 0x0200000000000000UL
53#define HBR_ANDCOND 0x0100000000000000UL
54
48 55
49/* in hvCall.S */ 56/* in hvCall.S */
50EXPORT_SYMBOL(plpar_hcall); 57EXPORT_SYMBOL(plpar_hcall);
@@ -64,6 +71,9 @@ void vpa_init(int cpu)
64 if (cpu_has_feature(CPU_FTR_ALTIVEC)) 71 if (cpu_has_feature(CPU_FTR_ALTIVEC))
65 lppaca_of(cpu).vmxregs_in_use = 1; 72 lppaca_of(cpu).vmxregs_in_use = 1;
66 73
74 if (cpu_has_feature(CPU_FTR_ARCH_207S))
75 lppaca_of(cpu).ebb_regs_in_use = 1;
76
67 addr = __pa(&lppaca_of(cpu)); 77 addr = __pa(&lppaca_of(cpu));
68 ret = register_vpa(hwcpu, addr); 78 ret = register_vpa(hwcpu, addr);
69 79
@@ -240,7 +250,8 @@ static void pSeries_lpar_hptab_clear(void)
240static long pSeries_lpar_hpte_updatepp(unsigned long slot, 250static long pSeries_lpar_hpte_updatepp(unsigned long slot,
241 unsigned long newpp, 251 unsigned long newpp,
242 unsigned long vpn, 252 unsigned long vpn,
243 int psize, int ssize, int local) 253 int psize, int apsize,
254 int ssize, int local)
244{ 255{
245 unsigned long lpar_rc; 256 unsigned long lpar_rc;
246 unsigned long flags = (newpp & 7) | H_AVPN; 257 unsigned long flags = (newpp & 7) | H_AVPN;
@@ -328,7 +339,8 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
328} 339}
329 340
330static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, 341static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
331 int psize, int ssize, int local) 342 int psize, int apsize,
343 int ssize, int local)
332{ 344{
333 unsigned long want_v; 345 unsigned long want_v;
334 unsigned long lpar_rc; 346 unsigned long lpar_rc;
@@ -345,6 +357,113 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
345 BUG_ON(lpar_rc != H_SUCCESS); 357 BUG_ON(lpar_rc != H_SUCCESS);
346} 358}
347 359
360/*
361 * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need
362 * to make sure that we avoid bouncing the hypervisor tlbie lock.
363 */
364#define PPC64_HUGE_HPTE_BATCH 12
365
366static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
367 unsigned long *vpn, int count,
368 int psize, int ssize)
369{
370 unsigned long param[8];
371 int i = 0, pix = 0, rc;
372 unsigned long flags = 0;
373 int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
374
375 if (lock_tlbie)
376 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
377
378 for (i = 0; i < count; i++) {
379
380 if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
381 pSeries_lpar_hpte_invalidate(slot[i], vpn[i], psize, 0,
382 ssize, 0);
383 } else {
384 param[pix] = HBR_REQUEST | HBR_AVPN | slot[i];
385 param[pix+1] = hpte_encode_avpn(vpn[i], psize, ssize);
386 pix += 2;
387 if (pix == 8) {
388 rc = plpar_hcall9(H_BULK_REMOVE, param,
389 param[0], param[1], param[2],
390 param[3], param[4], param[5],
391 param[6], param[7]);
392 BUG_ON(rc != H_SUCCESS);
393 pix = 0;
394 }
395 }
396 }
397 if (pix) {
398 param[pix] = HBR_END;
399 rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1],
400 param[2], param[3], param[4], param[5],
401 param[6], param[7]);
402 BUG_ON(rc != H_SUCCESS);
403 }
404
405 if (lock_tlbie)
406 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
407}
408
409static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm,
410 unsigned char *hpte_slot_array,
411 unsigned long addr, int psize)
412{
413 int ssize = 0, i, index = 0;
414 unsigned long s_addr = addr;
415 unsigned int max_hpte_count, valid;
416 unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH];
417 unsigned long slot_array[PPC64_HUGE_HPTE_BATCH];
418 unsigned long shift, hidx, vpn = 0, vsid, hash, slot;
419
420 shift = mmu_psize_defs[psize].shift;
421 max_hpte_count = 1U << (PMD_SHIFT - shift);
422
423 for (i = 0; i < max_hpte_count; i++) {
424 valid = hpte_valid(hpte_slot_array, i);
425 if (!valid)
426 continue;
427 hidx = hpte_hash_index(hpte_slot_array, i);
428
429 /* get the vpn */
430 addr = s_addr + (i * (1ul << shift));
431 if (!is_kernel_addr(addr)) {
432 ssize = user_segment_size(addr);
433 vsid = get_vsid(mm->context.id, addr, ssize);
434 WARN_ON(vsid == 0);
435 } else {
436 vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
437 ssize = mmu_kernel_ssize;
438 }
439
440 vpn = hpt_vpn(addr, vsid, ssize);
441 hash = hpt_hash(vpn, shift, ssize);
442 if (hidx & _PTEIDX_SECONDARY)
443 hash = ~hash;
444
445 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
446 slot += hidx & _PTEIDX_GROUP_IX;
447
448 slot_array[index] = slot;
449 vpn_array[index] = vpn;
450 if (index == PPC64_HUGE_HPTE_BATCH - 1) {
451 /*
452 * Now do a bluk invalidate
453 */
454 __pSeries_lpar_hugepage_invalidate(slot_array,
455 vpn_array,
456 PPC64_HUGE_HPTE_BATCH,
457 psize, ssize);
458 index = 0;
459 } else
460 index++;
461 }
462 if (index)
463 __pSeries_lpar_hugepage_invalidate(slot_array, vpn_array,
464 index, psize, ssize);
465}
466
348static void pSeries_lpar_hpte_removebolted(unsigned long ea, 467static void pSeries_lpar_hpte_removebolted(unsigned long ea,
349 int psize, int ssize) 468 int psize, int ssize)
350{ 469{
@@ -356,17 +475,12 @@ static void pSeries_lpar_hpte_removebolted(unsigned long ea,
356 475
357 slot = pSeries_lpar_hpte_find(vpn, psize, ssize); 476 slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
358 BUG_ON(slot == -1); 477 BUG_ON(slot == -1);
359 478 /*
360 pSeries_lpar_hpte_invalidate(slot, vpn, psize, ssize, 0); 479 * lpar doesn't use the passed actual page size
480 */
481 pSeries_lpar_hpte_invalidate(slot, vpn, psize, 0, ssize, 0);
361} 482}
362 483
363/* Flag bits for H_BULK_REMOVE */
364#define HBR_REQUEST 0x4000000000000000UL
365#define HBR_RESPONSE 0x8000000000000000UL
366#define HBR_END 0xc000000000000000UL
367#define HBR_AVPN 0x0200000000000000UL
368#define HBR_ANDCOND 0x0100000000000000UL
369
370/* 484/*
371 * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie 485 * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
372 * lock. 486 * lock.
@@ -400,8 +514,11 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
400 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 514 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
401 slot += hidx & _PTEIDX_GROUP_IX; 515 slot += hidx & _PTEIDX_GROUP_IX;
402 if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { 516 if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
517 /*
518 * lpar doesn't use the passed actual page size
519 */
403 pSeries_lpar_hpte_invalidate(slot, vpn, psize, 520 pSeries_lpar_hpte_invalidate(slot, vpn, psize,
404 ssize, local); 521 0, ssize, local);
405 } else { 522 } else {
406 param[pix] = HBR_REQUEST | HBR_AVPN | slot; 523 param[pix] = HBR_REQUEST | HBR_AVPN | slot;
407 param[pix+1] = hpte_encode_avpn(vpn, psize, 524 param[pix+1] = hpte_encode_avpn(vpn, psize,
@@ -452,6 +569,7 @@ void __init hpte_init_lpar(void)
452 ppc_md.hpte_removebolted = pSeries_lpar_hpte_removebolted; 569 ppc_md.hpte_removebolted = pSeries_lpar_hpte_removebolted;
453 ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range; 570 ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range;
454 ppc_md.hpte_clear_all = pSeries_lpar_hptab_clear; 571 ppc_md.hpte_clear_all = pSeries_lpar_hptab_clear;
572 ppc_md.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
455} 573}
456 574
457#ifdef CONFIG_PPC_SMLPAR 575#ifdef CONFIG_PPC_SMLPAR
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index 8733a86ad52e..9f8671a44551 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -18,6 +18,7 @@
18#include <linux/spinlock.h> 18#include <linux/spinlock.h>
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/kmsg_dump.h> 20#include <linux/kmsg_dump.h>
21#include <linux/pstore.h>
21#include <linux/ctype.h> 22#include <linux/ctype.h>
22#include <linux/zlib.h> 23#include <linux/zlib.h>
23#include <asm/uaccess.h> 24#include <asm/uaccess.h>
@@ -29,6 +30,13 @@
29/* Max bytes to read/write in one go */ 30/* Max bytes to read/write in one go */
30#define NVRW_CNT 0x20 31#define NVRW_CNT 0x20
31 32
33/*
34 * Set oops header version to distingush between old and new format header.
35 * lnx,oops-log partition max size is 4000, header version > 4000 will
36 * help in identifying new header.
37 */
38#define OOPS_HDR_VERSION 5000
39
32static unsigned int nvram_size; 40static unsigned int nvram_size;
33static int nvram_fetch, nvram_store; 41static int nvram_fetch, nvram_store;
34static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */ 42static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */
@@ -45,20 +53,23 @@ struct nvram_os_partition {
45 int min_size; /* minimum acceptable size (0 means req_size) */ 53 int min_size; /* minimum acceptable size (0 means req_size) */
46 long size; /* size of data portion (excluding err_log_info) */ 54 long size; /* size of data portion (excluding err_log_info) */
47 long index; /* offset of data portion of partition */ 55 long index; /* offset of data portion of partition */
56 bool os_partition; /* partition initialized by OS, not FW */
48}; 57};
49 58
50static struct nvram_os_partition rtas_log_partition = { 59static struct nvram_os_partition rtas_log_partition = {
51 .name = "ibm,rtas-log", 60 .name = "ibm,rtas-log",
52 .req_size = 2079, 61 .req_size = 2079,
53 .min_size = 1055, 62 .min_size = 1055,
54 .index = -1 63 .index = -1,
64 .os_partition = true
55}; 65};
56 66
57static struct nvram_os_partition oops_log_partition = { 67static struct nvram_os_partition oops_log_partition = {
58 .name = "lnx,oops-log", 68 .name = "lnx,oops-log",
59 .req_size = 4000, 69 .req_size = 4000,
60 .min_size = 2000, 70 .min_size = 2000,
61 .index = -1 71 .index = -1,
72 .os_partition = true
62}; 73};
63 74
64static const char *pseries_nvram_os_partitions[] = { 75static const char *pseries_nvram_os_partitions[] = {
@@ -67,6 +78,12 @@ static const char *pseries_nvram_os_partitions[] = {
67 NULL 78 NULL
68}; 79};
69 80
81struct oops_log_info {
82 u16 version;
83 u16 report_length;
84 u64 timestamp;
85} __attribute__((packed));
86
70static void oops_to_nvram(struct kmsg_dumper *dumper, 87static void oops_to_nvram(struct kmsg_dumper *dumper,
71 enum kmsg_dump_reason reason); 88 enum kmsg_dump_reason reason);
72 89
@@ -83,28 +100,28 @@ static unsigned long last_unread_rtas_event; /* timestamp */
83 100
84 * big_oops_buf[] holds the uncompressed text we're capturing. 101 * big_oops_buf[] holds the uncompressed text we're capturing.
85 * 102 *
86 * oops_buf[] holds the compressed text, preceded by a prefix. 103 * oops_buf[] holds the compressed text, preceded by a oops header.
87 * The prefix is just a u16 holding the length of the compressed* text. 104 * oops header has u16 holding the version of oops header (to differentiate
88 * (*Or uncompressed, if compression fails.) oops_buf[] gets written 105 * between old and new format header) followed by u16 holding the length of
89 * to NVRAM. 106 * the compressed* text (*Or uncompressed, if compression fails.) and u64
107 * holding the timestamp. oops_buf[] gets written to NVRAM.
90 * 108 *
91 * oops_len points to the prefix. oops_data points to the compressed text. 109 * oops_log_info points to the header. oops_data points to the compressed text.
92 * 110 *
93 * +- oops_buf 111 * +- oops_buf
94 * | +- oops_data 112 * | +- oops_data
95 * v v 113 * v v
96 * +------------+-----------------------------------------------+ 114 * +-----------+-----------+-----------+------------------------+
97 * | length | text | 115 * | version | length | timestamp | text |
98 * | (2 bytes) | (oops_data_sz bytes) | 116 * | (2 bytes) | (2 bytes) | (8 bytes) | (oops_data_sz bytes) |
99 * +------------+-----------------------------------------------+ 117 * +-----------+-----------+-----------+------------------------+
100 * ^ 118 * ^
101 * +- oops_len 119 * +- oops_log_info
102 * 120 *
103 * We preallocate these buffers during init to avoid kmalloc during oops/panic. 121 * We preallocate these buffers during init to avoid kmalloc during oops/panic.
104 */ 122 */
105static size_t big_oops_buf_sz; 123static size_t big_oops_buf_sz;
106static char *big_oops_buf, *oops_buf; 124static char *big_oops_buf, *oops_buf;
107static u16 *oops_len;
108static char *oops_data; 125static char *oops_data;
109static size_t oops_data_sz; 126static size_t oops_data_sz;
110 127
@@ -114,6 +131,30 @@ static size_t oops_data_sz;
114#define MEM_LEVEL 4 131#define MEM_LEVEL 4
115static struct z_stream_s stream; 132static struct z_stream_s stream;
116 133
134#ifdef CONFIG_PSTORE
135static struct nvram_os_partition of_config_partition = {
136 .name = "of-config",
137 .index = -1,
138 .os_partition = false
139};
140
141static struct nvram_os_partition common_partition = {
142 .name = "common",
143 .index = -1,
144 .os_partition = false
145};
146
147static enum pstore_type_id nvram_type_ids[] = {
148 PSTORE_TYPE_DMESG,
149 PSTORE_TYPE_PPC_RTAS,
150 PSTORE_TYPE_PPC_OF,
151 PSTORE_TYPE_PPC_COMMON,
152 -1
153};
154static int read_type;
155static unsigned long last_rtas_event;
156#endif
157
117static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index) 158static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index)
118{ 159{
119 unsigned int i; 160 unsigned int i;
@@ -275,48 +316,72 @@ int nvram_write_error_log(char * buff, int length,
275{ 316{
276 int rc = nvram_write_os_partition(&rtas_log_partition, buff, length, 317 int rc = nvram_write_os_partition(&rtas_log_partition, buff, length,
277 err_type, error_log_cnt); 318 err_type, error_log_cnt);
278 if (!rc) 319 if (!rc) {
279 last_unread_rtas_event = get_seconds(); 320 last_unread_rtas_event = get_seconds();
321#ifdef CONFIG_PSTORE
322 last_rtas_event = get_seconds();
323#endif
324 }
325
280 return rc; 326 return rc;
281} 327}
282 328
283/* nvram_read_error_log 329/* nvram_read_partition
284 * 330 *
285 * Reads nvram for error log for at most 'length' 331 * Reads nvram partition for at most 'length'
286 */ 332 */
287int nvram_read_error_log(char * buff, int length, 333int nvram_read_partition(struct nvram_os_partition *part, char *buff,
288 unsigned int * err_type, unsigned int * error_log_cnt) 334 int length, unsigned int *err_type,
335 unsigned int *error_log_cnt)
289{ 336{
290 int rc; 337 int rc;
291 loff_t tmp_index; 338 loff_t tmp_index;
292 struct err_log_info info; 339 struct err_log_info info;
293 340
294 if (rtas_log_partition.index == -1) 341 if (part->index == -1)
295 return -1; 342 return -1;
296 343
297 if (length > rtas_log_partition.size) 344 if (length > part->size)
298 length = rtas_log_partition.size; 345 length = part->size;
299 346
300 tmp_index = rtas_log_partition.index; 347 tmp_index = part->index;
301 348
302 rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index); 349 if (part->os_partition) {
303 if (rc <= 0) { 350 rc = ppc_md.nvram_read((char *)&info,
304 printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); 351 sizeof(struct err_log_info),
305 return rc; 352 &tmp_index);
353 if (rc <= 0) {
354 pr_err("%s: Failed nvram_read (%d)\n", __FUNCTION__,
355 rc);
356 return rc;
357 }
306 } 358 }
307 359
308 rc = ppc_md.nvram_read(buff, length, &tmp_index); 360 rc = ppc_md.nvram_read(buff, length, &tmp_index);
309 if (rc <= 0) { 361 if (rc <= 0) {
310 printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); 362 pr_err("%s: Failed nvram_read (%d)\n", __FUNCTION__, rc);
311 return rc; 363 return rc;
312 } 364 }
313 365
314 *error_log_cnt = info.seq_num; 366 if (part->os_partition) {
315 *err_type = info.error_type; 367 *error_log_cnt = info.seq_num;
368 *err_type = info.error_type;
369 }
316 370
317 return 0; 371 return 0;
318} 372}
319 373
374/* nvram_read_error_log
375 *
376 * Reads nvram for error log for at most 'length'
377 */
378int nvram_read_error_log(char *buff, int length,
379 unsigned int *err_type, unsigned int *error_log_cnt)
380{
381 return nvram_read_partition(&rtas_log_partition, buff, length,
382 err_type, error_log_cnt);
383}
384
320/* This doesn't actually zero anything, but it sets the event_logged 385/* This doesn't actually zero anything, but it sets the event_logged
321 * word to tell that this event is safely in syslog. 386 * word to tell that this event is safely in syslog.
322 */ 387 */
@@ -405,6 +470,349 @@ static int __init pseries_nvram_init_os_partition(struct nvram_os_partition
405 return 0; 470 return 0;
406} 471}
407 472
473/*
474 * Are we using the ibm,rtas-log for oops/panic reports? And if so,
475 * would logging this oops/panic overwrite an RTAS event that rtas_errd
476 * hasn't had a chance to read and process? Return 1 if so, else 0.
477 *
478 * We assume that if rtas_errd hasn't read the RTAS event in
479 * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to.
480 */
481static int clobbering_unread_rtas_event(void)
482{
483 return (oops_log_partition.index == rtas_log_partition.index
484 && last_unread_rtas_event
485 && get_seconds() - last_unread_rtas_event <=
486 NVRAM_RTAS_READ_TIMEOUT);
487}
488
489/* Derived from logfs_compress() */
490static int nvram_compress(const void *in, void *out, size_t inlen,
491 size_t outlen)
492{
493 int err, ret;
494
495 ret = -EIO;
496 err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS,
497 MEM_LEVEL, Z_DEFAULT_STRATEGY);
498 if (err != Z_OK)
499 goto error;
500
501 stream.next_in = in;
502 stream.avail_in = inlen;
503 stream.total_in = 0;
504 stream.next_out = out;
505 stream.avail_out = outlen;
506 stream.total_out = 0;
507
508 err = zlib_deflate(&stream, Z_FINISH);
509 if (err != Z_STREAM_END)
510 goto error;
511
512 err = zlib_deflateEnd(&stream);
513 if (err != Z_OK)
514 goto error;
515
516 if (stream.total_out >= stream.total_in)
517 goto error;
518
519 ret = stream.total_out;
520error:
521 return ret;
522}
523
524/* Compress the text from big_oops_buf into oops_buf. */
525static int zip_oops(size_t text_len)
526{
527 struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
528 int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len,
529 oops_data_sz);
530 if (zipped_len < 0) {
531 pr_err("nvram: compression failed; returned %d\n", zipped_len);
532 pr_err("nvram: logging uncompressed oops/panic report\n");
533 return -1;
534 }
535 oops_hdr->version = OOPS_HDR_VERSION;
536 oops_hdr->report_length = (u16) zipped_len;
537 oops_hdr->timestamp = get_seconds();
538 return 0;
539}
540
541#ifdef CONFIG_PSTORE
542/* Derived from logfs_uncompress */
543int nvram_decompress(void *in, void *out, size_t inlen, size_t outlen)
544{
545 int err, ret;
546
547 ret = -EIO;
548 err = zlib_inflateInit(&stream);
549 if (err != Z_OK)
550 goto error;
551
552 stream.next_in = in;
553 stream.avail_in = inlen;
554 stream.total_in = 0;
555 stream.next_out = out;
556 stream.avail_out = outlen;
557 stream.total_out = 0;
558
559 err = zlib_inflate(&stream, Z_FINISH);
560 if (err != Z_STREAM_END)
561 goto error;
562
563 err = zlib_inflateEnd(&stream);
564 if (err != Z_OK)
565 goto error;
566
567 ret = stream.total_out;
568error:
569 return ret;
570}
571
572static int unzip_oops(char *oops_buf, char *big_buf)
573{
574 struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
575 u64 timestamp = oops_hdr->timestamp;
576 char *big_oops_data = NULL;
577 char *oops_data_buf = NULL;
578 size_t big_oops_data_sz;
579 int unzipped_len;
580
581 big_oops_data = big_buf + sizeof(struct oops_log_info);
582 big_oops_data_sz = big_oops_buf_sz - sizeof(struct oops_log_info);
583 oops_data_buf = oops_buf + sizeof(struct oops_log_info);
584
585 unzipped_len = nvram_decompress(oops_data_buf, big_oops_data,
586 oops_hdr->report_length,
587 big_oops_data_sz);
588
589 if (unzipped_len < 0) {
590 pr_err("nvram: decompression failed; returned %d\n",
591 unzipped_len);
592 return -1;
593 }
594 oops_hdr = (struct oops_log_info *)big_buf;
595 oops_hdr->version = OOPS_HDR_VERSION;
596 oops_hdr->report_length = (u16) unzipped_len;
597 oops_hdr->timestamp = timestamp;
598 return 0;
599}
600
601static int nvram_pstore_open(struct pstore_info *psi)
602{
603 /* Reset the iterator to start reading partitions again */
604 read_type = -1;
605 return 0;
606}
607
608/**
609 * nvram_pstore_write - pstore write callback for nvram
610 * @type: Type of message logged
611 * @reason: reason behind dump (oops/panic)
612 * @id: identifier to indicate the write performed
613 * @part: pstore writes data to registered buffer in parts,
614 * part number will indicate the same.
615 * @count: Indicates oops count
616 * @hsize: Size of header added by pstore
617 * @size: number of bytes written to the registered buffer
618 * @psi: registered pstore_info structure
619 *
620 * Called by pstore_dump() when an oops or panic report is logged in the
621 * printk buffer.
622 * Returns 0 on successful write.
623 */
624static int nvram_pstore_write(enum pstore_type_id type,
625 enum kmsg_dump_reason reason,
626 u64 *id, unsigned int part, int count,
627 size_t hsize, size_t size,
628 struct pstore_info *psi)
629{
630 int rc;
631 unsigned int err_type = ERR_TYPE_KERNEL_PANIC;
632 struct oops_log_info *oops_hdr = (struct oops_log_info *) oops_buf;
633
634 /* part 1 has the recent messages from printk buffer */
635 if (part > 1 || type != PSTORE_TYPE_DMESG ||
636 clobbering_unread_rtas_event())
637 return -1;
638
639 oops_hdr->version = OOPS_HDR_VERSION;
640 oops_hdr->report_length = (u16) size;
641 oops_hdr->timestamp = get_seconds();
642
643 if (big_oops_buf) {
644 rc = zip_oops(size);
645 /*
646 * If compression fails copy recent log messages from
647 * big_oops_buf to oops_data.
648 */
649 if (rc != 0) {
650 size_t diff = size - oops_data_sz + hsize;
651
652 if (size > oops_data_sz) {
653 memcpy(oops_data, big_oops_buf, hsize);
654 memcpy(oops_data + hsize, big_oops_buf + diff,
655 oops_data_sz - hsize);
656
657 oops_hdr->report_length = (u16) oops_data_sz;
658 } else
659 memcpy(oops_data, big_oops_buf, size);
660 } else
661 err_type = ERR_TYPE_KERNEL_PANIC_GZ;
662 }
663
664 rc = nvram_write_os_partition(&oops_log_partition, oops_buf,
665 (int) (sizeof(*oops_hdr) + oops_hdr->report_length), err_type,
666 count);
667
668 if (rc != 0)
669 return rc;
670
671 *id = part;
672 return 0;
673}
674
675/*
676 * Reads the oops/panic report, rtas, of-config and common partition.
677 * Returns the length of the data we read from each partition.
678 * Returns 0 if we've been called before.
679 */
680static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
681 int *count, struct timespec *time, char **buf,
682 struct pstore_info *psi)
683{
684 struct oops_log_info *oops_hdr;
685 unsigned int err_type, id_no, size = 0;
686 struct nvram_os_partition *part = NULL;
687 char *buff = NULL, *big_buff = NULL;
688 int rc, sig = 0;
689 loff_t p;
690
691read_partition:
692 read_type++;
693
694 switch (nvram_type_ids[read_type]) {
695 case PSTORE_TYPE_DMESG:
696 part = &oops_log_partition;
697 *type = PSTORE_TYPE_DMESG;
698 break;
699 case PSTORE_TYPE_PPC_RTAS:
700 part = &rtas_log_partition;
701 *type = PSTORE_TYPE_PPC_RTAS;
702 time->tv_sec = last_rtas_event;
703 time->tv_nsec = 0;
704 break;
705 case PSTORE_TYPE_PPC_OF:
706 sig = NVRAM_SIG_OF;
707 part = &of_config_partition;
708 *type = PSTORE_TYPE_PPC_OF;
709 *id = PSTORE_TYPE_PPC_OF;
710 time->tv_sec = 0;
711 time->tv_nsec = 0;
712 break;
713 case PSTORE_TYPE_PPC_COMMON:
714 sig = NVRAM_SIG_SYS;
715 part = &common_partition;
716 *type = PSTORE_TYPE_PPC_COMMON;
717 *id = PSTORE_TYPE_PPC_COMMON;
718 time->tv_sec = 0;
719 time->tv_nsec = 0;
720 break;
721 default:
722 return 0;
723 }
724
725 if (!part->os_partition) {
726 p = nvram_find_partition(part->name, sig, &size);
727 if (p <= 0) {
728 pr_err("nvram: Failed to find partition %s, "
729 "err %d\n", part->name, (int)p);
730 return 0;
731 }
732 part->index = p;
733 part->size = size;
734 }
735
736 buff = kmalloc(part->size, GFP_KERNEL);
737
738 if (!buff)
739 return -ENOMEM;
740
741 if (nvram_read_partition(part, buff, part->size, &err_type, &id_no)) {
742 kfree(buff);
743 return 0;
744 }
745
746 *count = 0;
747
748 if (part->os_partition)
749 *id = id_no;
750
751 if (nvram_type_ids[read_type] == PSTORE_TYPE_DMESG) {
752 oops_hdr = (struct oops_log_info *)buff;
753 *buf = buff + sizeof(*oops_hdr);
754
755 if (err_type == ERR_TYPE_KERNEL_PANIC_GZ) {
756 big_buff = kmalloc(big_oops_buf_sz, GFP_KERNEL);
757 if (!big_buff)
758 return -ENOMEM;
759
760 rc = unzip_oops(buff, big_buff);
761
762 if (rc != 0) {
763 kfree(buff);
764 kfree(big_buff);
765 goto read_partition;
766 }
767
768 oops_hdr = (struct oops_log_info *)big_buff;
769 *buf = big_buff + sizeof(*oops_hdr);
770 kfree(buff);
771 }
772
773 time->tv_sec = oops_hdr->timestamp;
774 time->tv_nsec = 0;
775 return oops_hdr->report_length;
776 }
777
778 *buf = buff;
779 return part->size;
780}
781
782static struct pstore_info nvram_pstore_info = {
783 .owner = THIS_MODULE,
784 .name = "nvram",
785 .open = nvram_pstore_open,
786 .read = nvram_pstore_read,
787 .write = nvram_pstore_write,
788};
789
790static int nvram_pstore_init(void)
791{
792 int rc = 0;
793
794 if (big_oops_buf) {
795 nvram_pstore_info.buf = big_oops_buf;
796 nvram_pstore_info.bufsize = big_oops_buf_sz;
797 } else {
798 nvram_pstore_info.buf = oops_data;
799 nvram_pstore_info.bufsize = oops_data_sz;
800 }
801
802 rc = pstore_register(&nvram_pstore_info);
803 if (rc != 0)
804 pr_err("nvram: pstore_register() failed, defaults to "
805 "kmsg_dump; returned %d\n", rc);
806
807 return rc;
808}
809#else
810static int nvram_pstore_init(void)
811{
812 return -1;
813}
814#endif
815
408static void __init nvram_init_oops_partition(int rtas_partition_exists) 816static void __init nvram_init_oops_partition(int rtas_partition_exists)
409{ 817{
410 int rc; 818 int rc;
@@ -425,9 +833,8 @@ static void __init nvram_init_oops_partition(int rtas_partition_exists)
425 oops_log_partition.name); 833 oops_log_partition.name);
426 return; 834 return;
427 } 835 }
428 oops_len = (u16*) oops_buf; 836 oops_data = oops_buf + sizeof(struct oops_log_info);
429 oops_data = oops_buf + sizeof(u16); 837 oops_data_sz = oops_log_partition.size - sizeof(struct oops_log_info);
430 oops_data_sz = oops_log_partition.size - sizeof(u16);
431 838
432 /* 839 /*
433 * Figure compression (preceded by elimination of each line's <n> 840 * Figure compression (preceded by elimination of each line's <n>
@@ -452,6 +859,11 @@ static void __init nvram_init_oops_partition(int rtas_partition_exists)
452 stream.workspace = NULL; 859 stream.workspace = NULL;
453 } 860 }
454 861
862 rc = nvram_pstore_init();
863
864 if (!rc)
865 return;
866
455 rc = kmsg_dump_register(&nvram_kmsg_dumper); 867 rc = kmsg_dump_register(&nvram_kmsg_dumper);
456 if (rc != 0) { 868 if (rc != 0) {
457 pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc); 869 pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc);
@@ -501,70 +913,6 @@ int __init pSeries_nvram_init(void)
501 return 0; 913 return 0;
502} 914}
503 915
504/*
505 * Are we using the ibm,rtas-log for oops/panic reports? And if so,
506 * would logging this oops/panic overwrite an RTAS event that rtas_errd
507 * hasn't had a chance to read and process? Return 1 if so, else 0.
508 *
509 * We assume that if rtas_errd hasn't read the RTAS event in
510 * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to.
511 */
512static int clobbering_unread_rtas_event(void)
513{
514 return (oops_log_partition.index == rtas_log_partition.index
515 && last_unread_rtas_event
516 && get_seconds() - last_unread_rtas_event <=
517 NVRAM_RTAS_READ_TIMEOUT);
518}
519
520/* Derived from logfs_compress() */
521static int nvram_compress(const void *in, void *out, size_t inlen,
522 size_t outlen)
523{
524 int err, ret;
525
526 ret = -EIO;
527 err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS,
528 MEM_LEVEL, Z_DEFAULT_STRATEGY);
529 if (err != Z_OK)
530 goto error;
531
532 stream.next_in = in;
533 stream.avail_in = inlen;
534 stream.total_in = 0;
535 stream.next_out = out;
536 stream.avail_out = outlen;
537 stream.total_out = 0;
538
539 err = zlib_deflate(&stream, Z_FINISH);
540 if (err != Z_STREAM_END)
541 goto error;
542
543 err = zlib_deflateEnd(&stream);
544 if (err != Z_OK)
545 goto error;
546
547 if (stream.total_out >= stream.total_in)
548 goto error;
549
550 ret = stream.total_out;
551error:
552 return ret;
553}
554
555/* Compress the text from big_oops_buf into oops_buf. */
556static int zip_oops(size_t text_len)
557{
558 int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len,
559 oops_data_sz);
560 if (zipped_len < 0) {
561 pr_err("nvram: compression failed; returned %d\n", zipped_len);
562 pr_err("nvram: logging uncompressed oops/panic report\n");
563 return -1;
564 }
565 *oops_len = (u16) zipped_len;
566 return 0;
567}
568 916
569/* 917/*
570 * This is our kmsg_dump callback, called after an oops or panic report 918 * This is our kmsg_dump callback, called after an oops or panic report
@@ -576,6 +924,7 @@ static int zip_oops(size_t text_len)
576static void oops_to_nvram(struct kmsg_dumper *dumper, 924static void oops_to_nvram(struct kmsg_dumper *dumper,
577 enum kmsg_dump_reason reason) 925 enum kmsg_dump_reason reason)
578{ 926{
927 struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
579 static unsigned int oops_count = 0; 928 static unsigned int oops_count = 0;
580 static bool panicking = false; 929 static bool panicking = false;
581 static DEFINE_SPINLOCK(lock); 930 static DEFINE_SPINLOCK(lock);
@@ -619,14 +968,17 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
619 } 968 }
620 if (rc != 0) { 969 if (rc != 0) {
621 kmsg_dump_rewind(dumper); 970 kmsg_dump_rewind(dumper);
622 kmsg_dump_get_buffer(dumper, true, 971 kmsg_dump_get_buffer(dumper, false,
623 oops_data, oops_data_sz, &text_len); 972 oops_data, oops_data_sz, &text_len);
624 err_type = ERR_TYPE_KERNEL_PANIC; 973 err_type = ERR_TYPE_KERNEL_PANIC;
625 *oops_len = (u16) text_len; 974 oops_hdr->version = OOPS_HDR_VERSION;
975 oops_hdr->report_length = (u16) text_len;
976 oops_hdr->timestamp = get_seconds();
626 } 977 }
627 978
628 (void) nvram_write_os_partition(&oops_log_partition, oops_buf, 979 (void) nvram_write_os_partition(&oops_log_partition, oops_buf,
629 (int) (sizeof(*oops_len) + *oops_len), err_type, ++oops_count); 980 (int) (sizeof(*oops_hdr) + oops_hdr->report_length), err_type,
981 ++oops_count);
630 982
631 spin_unlock_irqrestore(&lock, flags); 983 spin_unlock_irqrestore(&lock, flags);
632} 984}
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index c91b22be9288..efe61374f6ea 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -64,91 +64,6 @@ pcibios_find_pci_bus(struct device_node *dn)
64} 64}
65EXPORT_SYMBOL_GPL(pcibios_find_pci_bus); 65EXPORT_SYMBOL_GPL(pcibios_find_pci_bus);
66 66
67/**
68 * __pcibios_remove_pci_devices - remove all devices under this bus
69 * @bus: the indicated PCI bus
70 * @purge_pe: destroy the PE on removal of PCI devices
71 *
72 * Remove all of the PCI devices under this bus both from the
73 * linux pci device tree, and from the powerpc EEH address cache.
74 * By default, the corresponding PE will be destroied during the
75 * normal PCI hotplug path. For PCI hotplug during EEH recovery,
76 * the corresponding PE won't be destroied and deallocated.
77 */
78void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe)
79{
80 struct pci_dev *dev, *tmp;
81 struct pci_bus *child_bus;
82
83 /* First go down child busses */
84 list_for_each_entry(child_bus, &bus->children, node)
85 __pcibios_remove_pci_devices(child_bus, purge_pe);
86
87 pr_debug("PCI: Removing devices on bus %04x:%02x\n",
88 pci_domain_nr(bus), bus->number);
89 list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) {
90 pr_debug(" * Removing %s...\n", pci_name(dev));
91 eeh_remove_bus_device(dev, purge_pe);
92 pci_stop_and_remove_bus_device(dev);
93 }
94}
95
96/**
97 * pcibios_remove_pci_devices - remove all devices under this bus
98 *
99 * Remove all of the PCI devices under this bus both from the
100 * linux pci device tree, and from the powerpc EEH address cache.
101 */
102void pcibios_remove_pci_devices(struct pci_bus *bus)
103{
104 __pcibios_remove_pci_devices(bus, 1);
105}
106EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices);
107
108/**
109 * pcibios_add_pci_devices - adds new pci devices to bus
110 *
111 * This routine will find and fixup new pci devices under
112 * the indicated bus. This routine presumes that there
113 * might already be some devices under this bridge, so
114 * it carefully tries to add only new devices. (And that
115 * is how this routine differs from other, similar pcibios
116 * routines.)
117 */
118void pcibios_add_pci_devices(struct pci_bus * bus)
119{
120 int slotno, num, mode, pass, max;
121 struct pci_dev *dev;
122 struct device_node *dn = pci_bus_to_OF_node(bus);
123
124 eeh_add_device_tree_early(dn);
125
126 mode = PCI_PROBE_NORMAL;
127 if (ppc_md.pci_probe_mode)
128 mode = ppc_md.pci_probe_mode(bus);
129
130 if (mode == PCI_PROBE_DEVTREE) {
131 /* use ofdt-based probe */
132 of_rescan_bus(dn, bus);
133 } else if (mode == PCI_PROBE_NORMAL) {
134 /* use legacy probe */
135 slotno = PCI_SLOT(PCI_DN(dn->child)->devfn);
136 num = pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
137 if (!num)
138 return;
139 pcibios_setup_bus_devices(bus);
140 max = bus->busn_res.start;
141 for (pass=0; pass < 2; pass++)
142 list_for_each_entry(dev, &bus->devices, bus_list) {
143 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
144 dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
145 max = pci_scan_bridge(bus, dev, max, pass);
146 }
147 }
148 pcibios_finish_adding_to_bus(bus);
149}
150EXPORT_SYMBOL_GPL(pcibios_add_pci_devices);
151
152struct pci_controller *init_phb_dynamic(struct device_node *dn) 67struct pci_controller *init_phb_dynamic(struct device_node *dn)
153{ 68{
154 struct pci_controller *phb; 69 struct pci_controller *phb;
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index c4dfccd3a3d9..7b3cbde8c783 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -83,7 +83,7 @@ static void handle_system_shutdown(char event_modifier)
83 switch (event_modifier) { 83 switch (event_modifier) {
84 case EPOW_SHUTDOWN_NORMAL: 84 case EPOW_SHUTDOWN_NORMAL:
85 pr_emerg("Firmware initiated power off"); 85 pr_emerg("Firmware initiated power off");
86 orderly_poweroff(1); 86 orderly_poweroff(true);
87 break; 87 break;
88 88
89 case EPOW_SHUTDOWN_ON_UPS: 89 case EPOW_SHUTDOWN_ON_UPS:
@@ -95,13 +95,13 @@ static void handle_system_shutdown(char event_modifier)
95 pr_emerg("Loss of system critical functions reported by " 95 pr_emerg("Loss of system critical functions reported by "
96 "firmware"); 96 "firmware");
97 pr_emerg("Check RTAS error log for details"); 97 pr_emerg("Check RTAS error log for details");
98 orderly_poweroff(1); 98 orderly_poweroff(true);
99 break; 99 break;
100 100
101 case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH: 101 case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
102 pr_emerg("Ambient temperature too high reported by firmware"); 102 pr_emerg("Ambient temperature too high reported by firmware");
103 pr_emerg("Check RTAS error log for details"); 103 pr_emerg("Check RTAS error log for details");
104 orderly_poweroff(1); 104 orderly_poweroff(true);
105 break; 105 break;
106 106
107 default: 107 default:
@@ -162,7 +162,7 @@ void rtas_parse_epow_errlog(struct rtas_error_log *log)
162 162
163 case EPOW_SYSTEM_HALT: 163 case EPOW_SYSTEM_HALT:
164 pr_emerg("Firmware initiated power off"); 164 pr_emerg("Firmware initiated power off");
165 orderly_poweroff(1); 165 orderly_poweroff(true);
166 break; 166 break;
167 167
168 case EPOW_MAIN_ENCLOSURE: 168 case EPOW_MAIN_ENCLOSURE:
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 12bc8c3663ad..306643cc9dbc 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -192,7 +192,7 @@ static int smp_pSeries_cpu_bootable(unsigned int nr)
192 /* Special case - we inhibit secondary thread startup 192 /* Special case - we inhibit secondary thread startup
193 * during boot if the user requests it. 193 * during boot if the user requests it.
194 */ 194 */
195 if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) { 195 if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) {
196 if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0) 196 if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
197 return 0; 197 return 0;
198 if (smt_enabled_at_boot 198 if (smt_enabled_at_boot