aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-08-15 14:11:45 -0400
committerSage Weil <sage@inktank.com>2013-08-15 14:11:45 -0400
commitee3e542fec6e69bc9fb668698889a37d93950ddf (patch)
treee74ee766a4764769ef1d3d45d266b4dea64101d3 /arch/powerpc/kernel
parentfe2a801b50c0bb8039d627e5ae1fec249d10ff39 (diff)
parentf1d6e17f540af37bb1891480143669ba7636c4cf (diff)
Merge remote-tracking branch 'linus/master' into testing
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile4
-rw-r--r--arch/powerpc/kernel/asm-offsets.c10
-rw-r--r--arch/powerpc/kernel/cacheinfo.c36
-rw-r--r--arch/powerpc/kernel/cputable.c20
-rw-r--r--arch/powerpc/kernel/crash_dump.c10
-rw-r--r--arch/powerpc/kernel/eeh.c1068
-rw-r--r--arch/powerpc/kernel/eeh_cache.c310
-rw-r--r--arch/powerpc/kernel/eeh_dev.c112
-rw-r--r--arch/powerpc/kernel/eeh_driver.c732
-rw-r--r--arch/powerpc/kernel/eeh_event.c182
-rw-r--r--arch/powerpc/kernel/eeh_pe.c792
-rw-r--r--arch/powerpc/kernel/eeh_sysfs.c95
-rw-r--r--arch/powerpc/kernel/entry_64.S66
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S57
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c3
-rw-r--r--arch/powerpc/kernel/ibmebus.c22
-rw-r--r--arch/powerpc/kernel/idle.c4
-rw-r--r--arch/powerpc/kernel/io-workarounds.c11
-rw-r--r--arch/powerpc/kernel/iommu.c323
-rw-r--r--arch/powerpc/kernel/irq.c4
-rw-r--r--arch/powerpc/kernel/kprobes.c20
-rw-r--r--arch/powerpc/kernel/kvm.c9
-rw-r--r--arch/powerpc/kernel/nvram_64.c20
-rw-r--r--arch/powerpc/kernel/pci-common.c2
-rw-r--r--arch/powerpc/kernel/pci-hotplug.c110
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c61
-rw-r--r--arch/powerpc/kernel/proc_powerpc.c20
-rw-r--r--arch/powerpc/kernel/process.c14
-rw-r--r--arch/powerpc/kernel/prom.c42
-rw-r--r--arch/powerpc/kernel/prom_init.c5
-rw-r--r--arch/powerpc/kernel/ptrace.c30
-rw-r--r--arch/powerpc/kernel/reloc_32.S3
-rw-r--r--arch/powerpc/kernel/rtas.c4
-rw-r--r--arch/powerpc/kernel/setup_64.c2
-rw-r--r--arch/powerpc/kernel/signal_32.c70
-rw-r--r--arch/powerpc/kernel/signal_64.c8
-rw-r--r--arch/powerpc/kernel/smp.c12
-rw-r--r--arch/powerpc/kernel/sysfs.c6
-rw-r--r--arch/powerpc/kernel/time.c1
-rw-r--r--arch/powerpc/kernel/tm.S38
-rw-r--r--arch/powerpc/kernel/traps.c93
-rw-r--r--arch/powerpc/kernel/udbg.c2
-rw-r--r--arch/powerpc/kernel/vdso.c2
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S3
44 files changed, 4172 insertions, 266 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index f960a7944553..a8619bfe879e 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -58,6 +58,8 @@ obj-$(CONFIG_RTAS_PROC) += rtas-proc.o
58obj-$(CONFIG_LPARCFG) += lparcfg.o 58obj-$(CONFIG_LPARCFG) += lparcfg.o
59obj-$(CONFIG_IBMVIO) += vio.o 59obj-$(CONFIG_IBMVIO) += vio.o
60obj-$(CONFIG_IBMEBUS) += ibmebus.o 60obj-$(CONFIG_IBMEBUS) += ibmebus.o
61obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
62 eeh_driver.o eeh_event.o eeh_sysfs.o
61obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o 63obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
62obj-$(CONFIG_CRASH_DUMP) += crash_dump.o 64obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
63obj-$(CONFIG_FA_DUMP) += fadump.o 65obj-$(CONFIG_FA_DUMP) += fadump.o
@@ -100,7 +102,7 @@ obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o
100obj-$(CONFIG_STACKTRACE) += stacktrace.o 102obj-$(CONFIG_STACKTRACE) += stacktrace.o
101obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o 103obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o
102 104
103pci64-$(CONFIG_PPC64) += pci_dn.o isa-bridge.o 105pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o
104obj-$(CONFIG_PCI) += pci_$(CONFIG_WORD_SIZE).o $(pci64-y) \ 106obj-$(CONFIG_PCI) += pci_$(CONFIG_WORD_SIZE).o $(pci64-y) \
105 pci-common.o pci_of_scan.o 107 pci-common.o pci_of_scan.o
106obj-$(CONFIG_PCI_MSI) += msi.o 108obj-$(CONFIG_PCI_MSI) += msi.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 6f16ffafa6f0..8207459efe56 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -105,9 +105,6 @@ int main(void)
105 DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid)); 105 DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid));
106#else /* CONFIG_PPC64 */ 106#else /* CONFIG_PPC64 */
107 DEFINE(PGDIR, offsetof(struct thread_struct, pgdir)); 107 DEFINE(PGDIR, offsetof(struct thread_struct, pgdir));
108#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
109 DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0));
110#endif
111#ifdef CONFIG_SPE 108#ifdef CONFIG_SPE
112 DEFINE(THREAD_EVR0, offsetof(struct thread_struct, evr[0])); 109 DEFINE(THREAD_EVR0, offsetof(struct thread_struct, evr[0]));
113 DEFINE(THREAD_ACC, offsetof(struct thread_struct, acc)); 110 DEFINE(THREAD_ACC, offsetof(struct thread_struct, acc));
@@ -115,6 +112,9 @@ int main(void)
115 DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe)); 112 DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe));
116#endif /* CONFIG_SPE */ 113#endif /* CONFIG_SPE */
117#endif /* CONFIG_PPC64 */ 114#endif /* CONFIG_PPC64 */
115#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
116 DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0));
117#endif
118#ifdef CONFIG_KVM_BOOK3S_32_HANDLER 118#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
119 DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu)); 119 DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu));
120#endif 120#endif
@@ -132,13 +132,15 @@ int main(void)
132 DEFINE(THREAD_SIER, offsetof(struct thread_struct, sier)); 132 DEFINE(THREAD_SIER, offsetof(struct thread_struct, sier));
133 DEFINE(THREAD_MMCR0, offsetof(struct thread_struct, mmcr0)); 133 DEFINE(THREAD_MMCR0, offsetof(struct thread_struct, mmcr0));
134 DEFINE(THREAD_MMCR2, offsetof(struct thread_struct, mmcr2)); 134 DEFINE(THREAD_MMCR2, offsetof(struct thread_struct, mmcr2));
135 DEFINE(THREAD_MMCRA, offsetof(struct thread_struct, mmcra));
136#endif 135#endif
137#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 136#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
138 DEFINE(PACATMSCRATCH, offsetof(struct paca_struct, tm_scratch)); 137 DEFINE(PACATMSCRATCH, offsetof(struct paca_struct, tm_scratch));
139 DEFINE(THREAD_TM_TFHAR, offsetof(struct thread_struct, tm_tfhar)); 138 DEFINE(THREAD_TM_TFHAR, offsetof(struct thread_struct, tm_tfhar));
140 DEFINE(THREAD_TM_TEXASR, offsetof(struct thread_struct, tm_texasr)); 139 DEFINE(THREAD_TM_TEXASR, offsetof(struct thread_struct, tm_texasr));
141 DEFINE(THREAD_TM_TFIAR, offsetof(struct thread_struct, tm_tfiar)); 140 DEFINE(THREAD_TM_TFIAR, offsetof(struct thread_struct, tm_tfiar));
141 DEFINE(THREAD_TM_TAR, offsetof(struct thread_struct, tm_tar));
142 DEFINE(THREAD_TM_PPR, offsetof(struct thread_struct, tm_ppr));
143 DEFINE(THREAD_TM_DSCR, offsetof(struct thread_struct, tm_dscr));
142 DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs)); 144 DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs));
143 DEFINE(THREAD_TRANSACT_VR0, offsetof(struct thread_struct, 145 DEFINE(THREAD_TRANSACT_VR0, offsetof(struct thread_struct,
144 transact_vr[0])); 146 transact_vr[0]));
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index 92c6b008dd2b..9262cf2bec4b 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -131,7 +131,8 @@ static const char *cache_type_string(const struct cache *cache)
131 return cache_type_info[cache->type].name; 131 return cache_type_info[cache->type].name;
132} 132}
133 133
134static void __cpuinit cache_init(struct cache *cache, int type, int level, struct device_node *ofnode) 134static void cache_init(struct cache *cache, int type, int level,
135 struct device_node *ofnode)
135{ 136{
136 cache->type = type; 137 cache->type = type;
137 cache->level = level; 138 cache->level = level;
@@ -140,7 +141,7 @@ static void __cpuinit cache_init(struct cache *cache, int type, int level, struc
140 list_add(&cache->list, &cache_list); 141 list_add(&cache->list, &cache_list);
141} 142}
142 143
143static struct cache *__cpuinit new_cache(int type, int level, struct device_node *ofnode) 144static struct cache *new_cache(int type, int level, struct device_node *ofnode)
144{ 145{
145 struct cache *cache; 146 struct cache *cache;
146 147
@@ -324,7 +325,8 @@ static bool cache_node_is_unified(const struct device_node *np)
324 return of_get_property(np, "cache-unified", NULL); 325 return of_get_property(np, "cache-unified", NULL);
325} 326}
326 327
327static struct cache *__cpuinit cache_do_one_devnode_unified(struct device_node *node, int level) 328static struct cache *cache_do_one_devnode_unified(struct device_node *node,
329 int level)
328{ 330{
329 struct cache *cache; 331 struct cache *cache;
330 332
@@ -335,7 +337,8 @@ static struct cache *__cpuinit cache_do_one_devnode_unified(struct device_node *
335 return cache; 337 return cache;
336} 338}
337 339
338static struct cache *__cpuinit cache_do_one_devnode_split(struct device_node *node, int level) 340static struct cache *cache_do_one_devnode_split(struct device_node *node,
341 int level)
339{ 342{
340 struct cache *dcache, *icache; 343 struct cache *dcache, *icache;
341 344
@@ -357,7 +360,7 @@ err:
357 return NULL; 360 return NULL;
358} 361}
359 362
360static struct cache *__cpuinit cache_do_one_devnode(struct device_node *node, int level) 363static struct cache *cache_do_one_devnode(struct device_node *node, int level)
361{ 364{
362 struct cache *cache; 365 struct cache *cache;
363 366
@@ -369,7 +372,8 @@ static struct cache *__cpuinit cache_do_one_devnode(struct device_node *node, in
369 return cache; 372 return cache;
370} 373}
371 374
372static struct cache *__cpuinit cache_lookup_or_instantiate(struct device_node *node, int level) 375static struct cache *cache_lookup_or_instantiate(struct device_node *node,
376 int level)
373{ 377{
374 struct cache *cache; 378 struct cache *cache;
375 379
@@ -385,7 +389,7 @@ static struct cache *__cpuinit cache_lookup_or_instantiate(struct device_node *n
385 return cache; 389 return cache;
386} 390}
387 391
388static void __cpuinit link_cache_lists(struct cache *smaller, struct cache *bigger) 392static void link_cache_lists(struct cache *smaller, struct cache *bigger)
389{ 393{
390 while (smaller->next_local) { 394 while (smaller->next_local) {
391 if (smaller->next_local == bigger) 395 if (smaller->next_local == bigger)
@@ -396,13 +400,13 @@ static void __cpuinit link_cache_lists(struct cache *smaller, struct cache *bigg
396 smaller->next_local = bigger; 400 smaller->next_local = bigger;
397} 401}
398 402
399static void __cpuinit do_subsidiary_caches_debugcheck(struct cache *cache) 403static void do_subsidiary_caches_debugcheck(struct cache *cache)
400{ 404{
401 WARN_ON_ONCE(cache->level != 1); 405 WARN_ON_ONCE(cache->level != 1);
402 WARN_ON_ONCE(strcmp(cache->ofnode->type, "cpu")); 406 WARN_ON_ONCE(strcmp(cache->ofnode->type, "cpu"));
403} 407}
404 408
405static void __cpuinit do_subsidiary_caches(struct cache *cache) 409static void do_subsidiary_caches(struct cache *cache)
406{ 410{
407 struct device_node *subcache_node; 411 struct device_node *subcache_node;
408 int level = cache->level; 412 int level = cache->level;
@@ -423,7 +427,7 @@ static void __cpuinit do_subsidiary_caches(struct cache *cache)
423 } 427 }
424} 428}
425 429
426static struct cache *__cpuinit cache_chain_instantiate(unsigned int cpu_id) 430static struct cache *cache_chain_instantiate(unsigned int cpu_id)
427{ 431{
428 struct device_node *cpu_node; 432 struct device_node *cpu_node;
429 struct cache *cpu_cache = NULL; 433 struct cache *cpu_cache = NULL;
@@ -448,7 +452,7 @@ out:
448 return cpu_cache; 452 return cpu_cache;
449} 453}
450 454
451static struct cache_dir *__cpuinit cacheinfo_create_cache_dir(unsigned int cpu_id) 455static struct cache_dir *cacheinfo_create_cache_dir(unsigned int cpu_id)
452{ 456{
453 struct cache_dir *cache_dir; 457 struct cache_dir *cache_dir;
454 struct device *dev; 458 struct device *dev;
@@ -653,7 +657,7 @@ static struct kobj_type cache_index_type = {
653 .default_attrs = cache_index_default_attrs, 657 .default_attrs = cache_index_default_attrs,
654}; 658};
655 659
656static void __cpuinit cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir) 660static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
657{ 661{
658 const char *cache_name; 662 const char *cache_name;
659 const char *cache_type; 663 const char *cache_type;
@@ -696,7 +700,8 @@ static void __cpuinit cacheinfo_create_index_opt_attrs(struct cache_index_dir *d
696 kfree(buf); 700 kfree(buf);
697} 701}
698 702
699static void __cpuinit cacheinfo_create_index_dir(struct cache *cache, int index, struct cache_dir *cache_dir) 703static void cacheinfo_create_index_dir(struct cache *cache, int index,
704 struct cache_dir *cache_dir)
700{ 705{
701 struct cache_index_dir *index_dir; 706 struct cache_index_dir *index_dir;
702 int rc; 707 int rc;
@@ -722,7 +727,8 @@ err:
722 kfree(index_dir); 727 kfree(index_dir);
723} 728}
724 729
725static void __cpuinit cacheinfo_sysfs_populate(unsigned int cpu_id, struct cache *cache_list) 730static void cacheinfo_sysfs_populate(unsigned int cpu_id,
731 struct cache *cache_list)
726{ 732{
727 struct cache_dir *cache_dir; 733 struct cache_dir *cache_dir;
728 struct cache *cache; 734 struct cache *cache;
@@ -740,7 +746,7 @@ static void __cpuinit cacheinfo_sysfs_populate(unsigned int cpu_id, struct cache
740 } 746 }
741} 747}
742 748
743void __cpuinit cacheinfo_cpu_online(unsigned int cpu_id) 749void cacheinfo_cpu_online(unsigned int cpu_id)
744{ 750{
745 struct cache *cache; 751 struct cache *cache;
746 752
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 2a45d0f04385..22973a74df73 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -494,9 +494,27 @@ static struct cpu_spec __initdata cpu_specs[] = {
494 .cpu_restore = __restore_cpu_power7, 494 .cpu_restore = __restore_cpu_power7,
495 .platform = "power7+", 495 .platform = "power7+",
496 }, 496 },
497 { /* Power8 */ 497 { /* Power8E */
498 .pvr_mask = 0xffff0000, 498 .pvr_mask = 0xffff0000,
499 .pvr_value = 0x004b0000, 499 .pvr_value = 0x004b0000,
500 .cpu_name = "POWER8E (raw)",
501 .cpu_features = CPU_FTRS_POWER8,
502 .cpu_user_features = COMMON_USER_POWER8,
503 .cpu_user_features2 = COMMON_USER2_POWER8,
504 .mmu_features = MMU_FTRS_POWER8,
505 .icache_bsize = 128,
506 .dcache_bsize = 128,
507 .num_pmcs = 6,
508 .pmc_type = PPC_PMC_IBM,
509 .oprofile_cpu_type = "ppc64/power8",
510 .oprofile_type = PPC_OPROFILE_INVALID,
511 .cpu_setup = __setup_cpu_power8,
512 .cpu_restore = __restore_cpu_power8,
513 .platform = "power8",
514 },
515 { /* Power8 */
516 .pvr_mask = 0xffff0000,
517 .pvr_value = 0x004d0000,
500 .cpu_name = "POWER8 (raw)", 518 .cpu_name = "POWER8 (raw)",
501 .cpu_features = CPU_FTRS_POWER8, 519 .cpu_features = CPU_FTRS_POWER8,
502 .cpu_user_features = COMMON_USER_POWER8, 520 .cpu_user_features = COMMON_USER_POWER8,
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 9ec3fe174cba..779a78c26435 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -69,16 +69,6 @@ void __init setup_kdump_trampoline(void)
69} 69}
70#endif /* CONFIG_NONSTATIC_KERNEL */ 70#endif /* CONFIG_NONSTATIC_KERNEL */
71 71
72static int __init parse_savemaxmem(char *p)
73{
74 if (p)
75 saved_max_pfn = (memparse(p, &p) >> PAGE_SHIFT) - 1;
76
77 return 1;
78}
79__setup("savemaxmem=", parse_savemaxmem);
80
81
82static size_t copy_oldmem_vaddr(void *vaddr, char *buf, size_t csize, 72static size_t copy_oldmem_vaddr(void *vaddr, char *buf, size_t csize,
83 unsigned long offset, int userbuf) 73 unsigned long offset, int userbuf)
84{ 74{
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
new file mode 100644
index 000000000000..55593ee2d5aa
--- /dev/null
+++ b/arch/powerpc/kernel/eeh.c
@@ -0,0 +1,1068 @@
1/*
2 * Copyright IBM Corporation 2001, 2005, 2006
3 * Copyright Dave Engebretsen & Todd Inglett 2001
4 * Copyright Linas Vepstas 2005, 2006
5 * Copyright 2001-2012 IBM Corporation.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com>
22 */
23
24#include <linux/delay.h>
25#include <linux/sched.h>
26#include <linux/init.h>
27#include <linux/list.h>
28#include <linux/pci.h>
29#include <linux/proc_fs.h>
30#include <linux/rbtree.h>
31#include <linux/seq_file.h>
32#include <linux/spinlock.h>
33#include <linux/export.h>
34#include <linux/of.h>
35
36#include <linux/atomic.h>
37#include <asm/eeh.h>
38#include <asm/eeh_event.h>
39#include <asm/io.h>
40#include <asm/machdep.h>
41#include <asm/ppc-pci.h>
42#include <asm/rtas.h>
43
44
45/** Overview:
46 * EEH, or "Extended Error Handling" is a PCI bridge technology for
47 * dealing with PCI bus errors that can't be dealt with within the
48 * usual PCI framework, except by check-stopping the CPU. Systems
49 * that are designed for high-availability/reliability cannot afford
50 * to crash due to a "mere" PCI error, thus the need for EEH.
51 * An EEH-capable bridge operates by converting a detected error
52 * into a "slot freeze", taking the PCI adapter off-line, making
53 * the slot behave, from the OS'es point of view, as if the slot
54 * were "empty": all reads return 0xff's and all writes are silently
55 * ignored. EEH slot isolation events can be triggered by parity
56 * errors on the address or data busses (e.g. during posted writes),
57 * which in turn might be caused by low voltage on the bus, dust,
58 * vibration, humidity, radioactivity or plain-old failed hardware.
59 *
60 * Note, however, that one of the leading causes of EEH slot
61 * freeze events are buggy device drivers, buggy device microcode,
62 * or buggy device hardware. This is because any attempt by the
63 * device to bus-master data to a memory address that is not
64 * assigned to the device will trigger a slot freeze. (The idea
65 * is to prevent devices-gone-wild from corrupting system memory).
66 * Buggy hardware/drivers will have a miserable time co-existing
67 * with EEH.
68 *
69 * Ideally, a PCI device driver, when suspecting that an isolation
70 * event has occurred (e.g. by reading 0xff's), will then ask EEH
71 * whether this is the case, and then take appropriate steps to
72 * reset the PCI slot, the PCI device, and then resume operations.
73 * However, until that day, the checking is done here, with the
74 * eeh_check_failure() routine embedded in the MMIO macros. If
75 * the slot is found to be isolated, an "EEH Event" is synthesized
76 * and sent out for processing.
77 */
78
79/* If a device driver keeps reading an MMIO register in an interrupt
80 * handler after a slot isolation event, it might be broken.
81 * This sets the threshold for how many read attempts we allow
82 * before printing an error message.
83 */
84#define EEH_MAX_FAILS 2100000
85
86/* Time to wait for a PCI slot to report status, in milliseconds */
87#define PCI_BUS_RESET_WAIT_MSEC (60*1000)
88
89/* Platform dependent EEH operations */
90struct eeh_ops *eeh_ops = NULL;
91
92int eeh_subsystem_enabled;
93EXPORT_SYMBOL(eeh_subsystem_enabled);
94
95/*
96 * EEH probe mode support. The intention is to support multiple
97 * platforms for EEH. Some platforms like pSeries do PCI emunation
98 * based on device tree. However, other platforms like powernv probe
99 * PCI devices from hardware. The flag is used to distinguish that.
100 * In addition, struct eeh_ops::probe would be invoked for particular
101 * OF node or PCI device so that the corresponding PE would be created
102 * there.
103 */
104int eeh_probe_mode;
105
106/* Lock to avoid races due to multiple reports of an error */
107DEFINE_RAW_SPINLOCK(confirm_error_lock);
108
109/* Buffer for reporting pci register dumps. Its here in BSS, and
110 * not dynamically alloced, so that it ends up in RMO where RTAS
111 * can access it.
112 */
113#define EEH_PCI_REGS_LOG_LEN 4096
114static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
115
116/*
117 * The struct is used to maintain the EEH global statistic
118 * information. Besides, the EEH global statistics will be
119 * exported to user space through procfs
120 */
121struct eeh_stats {
122 u64 no_device; /* PCI device not found */
123 u64 no_dn; /* OF node not found */
124 u64 no_cfg_addr; /* Config address not found */
125 u64 ignored_check; /* EEH check skipped */
126 u64 total_mmio_ffs; /* Total EEH checks */
127 u64 false_positives; /* Unnecessary EEH checks */
128 u64 slot_resets; /* PE reset */
129};
130
131static struct eeh_stats eeh_stats;
132
133#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
134
135/**
136 * eeh_gather_pci_data - Copy assorted PCI config space registers to buff
137 * @edev: device to report data for
138 * @buf: point to buffer in which to log
139 * @len: amount of room in buffer
140 *
141 * This routine captures assorted PCI configuration space data,
142 * and puts them into a buffer for RTAS error logging.
143 */
144static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
145{
146 struct device_node *dn = eeh_dev_to_of_node(edev);
147 struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
148 u32 cfg;
149 int cap, i;
150 int n = 0;
151
152 n += scnprintf(buf+n, len-n, "%s\n", dn->full_name);
153 printk(KERN_WARNING "EEH: of node=%s\n", dn->full_name);
154
155 eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg);
156 n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
157 printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg);
158
159 eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg);
160 n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
161 printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg);
162
163 if (!dev) {
164 printk(KERN_WARNING "EEH: no PCI device for this of node\n");
165 return n;
166 }
167
168 /* Gather bridge-specific registers */
169 if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
170 eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg);
171 n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
172 printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg);
173
174 eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg);
175 n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
176 printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg);
177 }
178
179 /* Dump out the PCI-X command and status regs */
180 cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
181 if (cap) {
182 eeh_ops->read_config(dn, cap, 4, &cfg);
183 n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
184 printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg);
185
186 eeh_ops->read_config(dn, cap+4, 4, &cfg);
187 n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
188 printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg);
189 }
190
191 /* If PCI-E capable, dump PCI-E cap 10, and the AER */
192 cap = pci_find_capability(dev, PCI_CAP_ID_EXP);
193 if (cap) {
194 n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
195 printk(KERN_WARNING
196 "EEH: PCI-E capabilities and status follow:\n");
197
198 for (i=0; i<=8; i++) {
199 eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
200 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
201 printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg);
202 }
203
204 cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
205 if (cap) {
206 n += scnprintf(buf+n, len-n, "pci-e AER:\n");
207 printk(KERN_WARNING
208 "EEH: PCI-E AER capability register set follows:\n");
209
210 for (i=0; i<14; i++) {
211 eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
212 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
213 printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg);
214 }
215 }
216 }
217
218 return n;
219}
220
221/**
222 * eeh_slot_error_detail - Generate combined log including driver log and error log
223 * @pe: EEH PE
224 * @severity: temporary or permanent error log
225 *
226 * This routine should be called to generate the combined log, which
227 * is comprised of driver log and error log. The driver log is figured
228 * out from the config space of the corresponding PCI device, while
229 * the error log is fetched through platform dependent function call.
230 */
231void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
232{
233 size_t loglen = 0;
234 struct eeh_dev *edev, *tmp;
235 bool valid_cfg_log = true;
236
237 /*
238 * When the PHB is fenced or dead, it's pointless to collect
239 * the data from PCI config space because it should return
240 * 0xFF's. For ER, we still retrieve the data from the PCI
241 * config space.
242 */
243 if (eeh_probe_mode_dev() &&
244 (pe->type & EEH_PE_PHB) &&
245 (pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD)))
246 valid_cfg_log = false;
247
248 if (valid_cfg_log) {
249 eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
250 eeh_ops->configure_bridge(pe);
251 eeh_pe_restore_bars(pe);
252
253 pci_regs_buf[0] = 0;
254 eeh_pe_for_each_dev(pe, edev, tmp) {
255 loglen += eeh_gather_pci_data(edev, pci_regs_buf + loglen,
256 EEH_PCI_REGS_LOG_LEN - loglen);
257 }
258 }
259
260 eeh_ops->get_log(pe, severity, pci_regs_buf, loglen);
261}
262
263/**
264 * eeh_token_to_phys - Convert EEH address token to phys address
265 * @token: I/O token, should be address in the form 0xA....
266 *
267 * This routine should be called to convert virtual I/O address
268 * to physical one.
269 */
270static inline unsigned long eeh_token_to_phys(unsigned long token)
271{
272 pte_t *ptep;
273 unsigned long pa;
274 int hugepage_shift;
275
276 /*
277 * We won't find hugepages here, iomem
278 */
279 ptep = find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift);
280 if (!ptep)
281 return token;
282 WARN_ON(hugepage_shift);
283 pa = pte_pfn(*ptep) << PAGE_SHIFT;
284
285 return pa | (token & (PAGE_SIZE-1));
286}
287
288/*
289 * On PowerNV platform, we might already have fenced PHB there.
290 * For that case, it's meaningless to recover frozen PE. Intead,
291 * We have to handle fenced PHB firstly.
292 */
293static int eeh_phb_check_failure(struct eeh_pe *pe)
294{
295 struct eeh_pe *phb_pe;
296 unsigned long flags;
297 int ret;
298
299 if (!eeh_probe_mode_dev())
300 return -EPERM;
301
302 /* Find the PHB PE */
303 phb_pe = eeh_phb_pe_get(pe->phb);
304 if (!phb_pe) {
305 pr_warning("%s Can't find PE for PHB#%d\n",
306 __func__, pe->phb->global_number);
307 return -EEXIST;
308 }
309
310 /* If the PHB has been in problematic state */
311 eeh_serialize_lock(&flags);
312 if (phb_pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD)) {
313 ret = 0;
314 goto out;
315 }
316
317 /* Check PHB state */
318 ret = eeh_ops->get_state(phb_pe, NULL);
319 if ((ret < 0) ||
320 (ret == EEH_STATE_NOT_SUPPORT) ||
321 (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
322 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
323 ret = 0;
324 goto out;
325 }
326
327 /* Isolate the PHB and send event */
328 eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
329 eeh_serialize_unlock(flags);
330 eeh_send_failure_event(phb_pe);
331
332 pr_err("EEH: PHB#%x failure detected\n",
333 phb_pe->phb->global_number);
334 dump_stack();
335
336 return 1;
337out:
338 eeh_serialize_unlock(flags);
339 return ret;
340}
341
342/**
343 * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze
344 * @edev: eeh device
345 *
346 * Check for an EEH failure for the given device node. Call this
347 * routine if the result of a read was all 0xff's and you want to
348 * find out if this is due to an EEH slot freeze. This routine
349 * will query firmware for the EEH status.
350 *
351 * Returns 0 if there has not been an EEH error; otherwise returns
352 * a non-zero value and queues up a slot isolation event notification.
353 *
354 * It is safe to call this routine in an interrupt context.
355 */
356int eeh_dev_check_failure(struct eeh_dev *edev)
357{
358 int ret;
359 unsigned long flags;
360 struct device_node *dn;
361 struct pci_dev *dev;
362 struct eeh_pe *pe;
363 int rc = 0;
364 const char *location;
365
366 eeh_stats.total_mmio_ffs++;
367
368 if (!eeh_subsystem_enabled)
369 return 0;
370
371 if (!edev) {
372 eeh_stats.no_dn++;
373 return 0;
374 }
375 dn = eeh_dev_to_of_node(edev);
376 dev = eeh_dev_to_pci_dev(edev);
377 pe = edev->pe;
378
379 /* Access to IO BARs might get this far and still not want checking. */
380 if (!pe) {
381 eeh_stats.ignored_check++;
382 pr_debug("EEH: Ignored check for %s %s\n",
383 eeh_pci_name(dev), dn->full_name);
384 return 0;
385 }
386
387 if (!pe->addr && !pe->config_addr) {
388 eeh_stats.no_cfg_addr++;
389 return 0;
390 }
391
392 /*
393 * On PowerNV platform, we might already have fenced PHB
394 * there and we need take care of that firstly.
395 */
396 ret = eeh_phb_check_failure(pe);
397 if (ret > 0)
398 return ret;
399
400 /* If we already have a pending isolation event for this
401 * slot, we know it's bad already, we don't need to check.
402 * Do this checking under a lock; as multiple PCI devices
403 * in one slot might report errors simultaneously, and we
404 * only want one error recovery routine running.
405 */
406 eeh_serialize_lock(&flags);
407 rc = 1;
408 if (pe->state & EEH_PE_ISOLATED) {
409 pe->check_count++;
410 if (pe->check_count % EEH_MAX_FAILS == 0) {
411 location = of_get_property(dn, "ibm,loc-code", NULL);
412 printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
413 "location=%s driver=%s pci addr=%s\n",
414 pe->check_count, location,
415 eeh_driver_name(dev), eeh_pci_name(dev));
416 printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
417 eeh_driver_name(dev));
418 dump_stack();
419 }
420 goto dn_unlock;
421 }
422
423 /*
424 * Now test for an EEH failure. This is VERY expensive.
425 * Note that the eeh_config_addr may be a parent device
426 * in the case of a device behind a bridge, or it may be
427 * function zero of a multi-function device.
428 * In any case they must share a common PHB.
429 */
430 ret = eeh_ops->get_state(pe, NULL);
431
432 /* Note that config-io to empty slots may fail;
433 * they are empty when they don't have children.
434 * We will punt with the following conditions: Failure to get
435 * PE's state, EEH not support and Permanently unavailable
436 * state, PE is in good state.
437 */
438 if ((ret < 0) ||
439 (ret == EEH_STATE_NOT_SUPPORT) ||
440 (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
441 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
442 eeh_stats.false_positives++;
443 pe->false_positives++;
444 rc = 0;
445 goto dn_unlock;
446 }
447
448 eeh_stats.slot_resets++;
449
450 /* Avoid repeated reports of this failure, including problems
451 * with other functions on this device, and functions under
452 * bridges.
453 */
454 eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
455 eeh_serialize_unlock(flags);
456
457 eeh_send_failure_event(pe);
458
459 /* Most EEH events are due to device driver bugs. Having
460 * a stack trace will help the device-driver authors figure
461 * out what happened. So print that out.
462 */
463 pr_err("EEH: Frozen PE#%x detected on PHB#%x\n",
464 pe->addr, pe->phb->global_number);
465 dump_stack();
466
467 return 1;
468
469dn_unlock:
470 eeh_serialize_unlock(flags);
471 return rc;
472}
473
474EXPORT_SYMBOL_GPL(eeh_dev_check_failure);
475
476/**
477 * eeh_check_failure - Check if all 1's data is due to EEH slot freeze
478 * @token: I/O token, should be address in the form 0xA....
479 * @val: value, should be all 1's (XXX why do we need this arg??)
480 *
481 * Check for an EEH failure at the given token address. Call this
482 * routine if the result of a read was all 0xff's and you want to
483 * find out if this is due to an EEH slot freeze event. This routine
484 * will query firmware for the EEH status.
485 *
486 * Note this routine is safe to call in an interrupt context.
487 */
488unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
489{
490 unsigned long addr;
491 struct eeh_dev *edev;
492
493 /* Finding the phys addr + pci device; this is pretty quick. */
494 addr = eeh_token_to_phys((unsigned long __force) token);
495 edev = eeh_addr_cache_get_dev(addr);
496 if (!edev) {
497 eeh_stats.no_device++;
498 return val;
499 }
500
501 eeh_dev_check_failure(edev);
502 return val;
503}
504
505EXPORT_SYMBOL(eeh_check_failure);
506
507
508/**
509 * eeh_pci_enable - Enable MMIO or DMA transfers for this slot
510 * @pe: EEH PE
511 *
512 * This routine should be called to reenable frozen MMIO or DMA
513 * so that it would work correctly again. It's useful while doing
514 * recovery or log collection on the indicated device.
515 */
516int eeh_pci_enable(struct eeh_pe *pe, int function)
517{
518 int rc;
519
520 rc = eeh_ops->set_option(pe, function);
521 if (rc)
522 pr_warning("%s: Unexpected state change %d on PHB#%d-PE#%x, err=%d\n",
523 __func__, function, pe->phb->global_number, pe->addr, rc);
524
525 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
526 if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) &&
527 (function == EEH_OPT_THAW_MMIO))
528 return 0;
529
530 return rc;
531}
532
533/**
534 * pcibios_set_pcie_slot_reset - Set PCI-E reset state
535 * @dev: pci device struct
536 * @state: reset state to enter
537 *
538 * Return value:
539 * 0 if success
540 */
541int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
542{
543 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
544 struct eeh_pe *pe = edev->pe;
545
546 if (!pe) {
547 pr_err("%s: No PE found on PCI device %s\n",
548 __func__, pci_name(dev));
549 return -EINVAL;
550 }
551
552 switch (state) {
553 case pcie_deassert_reset:
554 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
555 break;
556 case pcie_hot_reset:
557 eeh_ops->reset(pe, EEH_RESET_HOT);
558 break;
559 case pcie_warm_reset:
560 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
561 break;
562 default:
563 return -EINVAL;
564 };
565
566 return 0;
567}
568
569/**
570 * eeh_set_pe_freset - Check the required reset for the indicated device
571 * @data: EEH device
572 * @flag: return value
573 *
574 * Each device might have its preferred reset type: fundamental or
575 * hot reset. The routine is used to collected the information for
576 * the indicated device and its children so that the bunch of the
577 * devices could be reset properly.
578 */
579static void *eeh_set_dev_freset(void *data, void *flag)
580{
581 struct pci_dev *dev;
582 unsigned int *freset = (unsigned int *)flag;
583 struct eeh_dev *edev = (struct eeh_dev *)data;
584
585 dev = eeh_dev_to_pci_dev(edev);
586 if (dev)
587 *freset |= dev->needs_freset;
588
589 return NULL;
590}
591
592/**
593 * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second
594 * @pe: EEH PE
595 *
596 * Assert the PCI #RST line for 1/4 second.
597 */
598static void eeh_reset_pe_once(struct eeh_pe *pe)
599{
600 unsigned int freset = 0;
601
602 /* Determine type of EEH reset required for
603 * Partitionable Endpoint, a hot-reset (1)
604 * or a fundamental reset (3).
605 * A fundamental reset required by any device under
606 * Partitionable Endpoint trumps hot-reset.
607 */
608 eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset);
609
610 if (freset)
611 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
612 else
613 eeh_ops->reset(pe, EEH_RESET_HOT);
614
615 /* The PCI bus requires that the reset be held high for at least
616 * a 100 milliseconds. We wait a bit longer 'just in case'.
617 */
618#define PCI_BUS_RST_HOLD_TIME_MSEC 250
619 msleep(PCI_BUS_RST_HOLD_TIME_MSEC);
620
621 /* We might get hit with another EEH freeze as soon as the
622 * pci slot reset line is dropped. Make sure we don't miss
623 * these, and clear the flag now.
624 */
625 eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
626
627 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
628
629 /* After a PCI slot has been reset, the PCI Express spec requires
630 * a 1.5 second idle time for the bus to stabilize, before starting
631 * up traffic.
632 */
633#define PCI_BUS_SETTLE_TIME_MSEC 1800
634 msleep(PCI_BUS_SETTLE_TIME_MSEC);
635}
636
637/**
638 * eeh_reset_pe - Reset the indicated PE
639 * @pe: EEH PE
640 *
641 * This routine should be called to reset indicated device, including
642 * PE. A PE might include multiple PCI devices and sometimes PCI bridges
643 * might be involved as well.
644 */
645int eeh_reset_pe(struct eeh_pe *pe)
646{
647 int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
648 int i, rc;
649
650 /* Take three shots at resetting the bus */
651 for (i=0; i<3; i++) {
652 eeh_reset_pe_once(pe);
653
654 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
655 if ((rc & flags) == flags)
656 return 0;
657
658 if (rc < 0) {
659 pr_err("%s: Unrecoverable slot failure on PHB#%d-PE#%x",
660 __func__, pe->phb->global_number, pe->addr);
661 return -1;
662 }
663 pr_err("EEH: bus reset %d failed on PHB#%d-PE#%x, rc=%d\n",
664 i+1, pe->phb->global_number, pe->addr, rc);
665 }
666
667 return -1;
668}
669
670/**
671 * eeh_save_bars - Save device bars
672 * @edev: PCI device associated EEH device
673 *
674 * Save the values of the device bars. Unlike the restore
675 * routine, this routine is *not* recursive. This is because
676 * PCI devices are added individually; but, for the restore,
677 * an entire slot is reset at a time.
678 */
679void eeh_save_bars(struct eeh_dev *edev)
680{
681 int i;
682 struct device_node *dn;
683
684 if (!edev)
685 return;
686 dn = eeh_dev_to_of_node(edev);
687
688 for (i = 0; i < 16; i++)
689 eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]);
690}
691
692/**
693 * eeh_ops_register - Register platform dependent EEH operations
694 * @ops: platform dependent EEH operations
695 *
696 * Register the platform dependent EEH operation callback
697 * functions. The platform should call this function before
698 * any other EEH operations.
699 */
700int __init eeh_ops_register(struct eeh_ops *ops)
701{
702 if (!ops->name) {
703 pr_warning("%s: Invalid EEH ops name for %p\n",
704 __func__, ops);
705 return -EINVAL;
706 }
707
708 if (eeh_ops && eeh_ops != ops) {
709 pr_warning("%s: EEH ops of platform %s already existing (%s)\n",
710 __func__, eeh_ops->name, ops->name);
711 return -EEXIST;
712 }
713
714 eeh_ops = ops;
715
716 return 0;
717}
718
719/**
720 * eeh_ops_unregister - Unreigster platform dependent EEH operations
721 * @name: name of EEH platform operations
722 *
723 * Unregister the platform dependent EEH operation callback
724 * functions.
725 */
726int __exit eeh_ops_unregister(const char *name)
727{
728 if (!name || !strlen(name)) {
729 pr_warning("%s: Invalid EEH ops name\n",
730 __func__);
731 return -EINVAL;
732 }
733
734 if (eeh_ops && !strcmp(eeh_ops->name, name)) {
735 eeh_ops = NULL;
736 return 0;
737 }
738
739 return -EEXIST;
740}
741
742/**
743 * eeh_init - EEH initialization
744 *
745 * Initialize EEH by trying to enable it for all of the adapters in the system.
746 * As a side effect we can determine here if eeh is supported at all.
747 * Note that we leave EEH on so failed config cycles won't cause a machine
748 * check. If a user turns off EEH for a particular adapter they are really
749 * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't
750 * grant access to a slot if EEH isn't enabled, and so we always enable
751 * EEH for all slots/all devices.
752 *
753 * The eeh-force-off option disables EEH checking globally, for all slots.
754 * Even if force-off is set, the EEH hardware is still enabled, so that
755 * newer systems can boot.
756 */
757int eeh_init(void)
758{
759 struct pci_controller *hose, *tmp;
760 struct device_node *phb;
761 static int cnt = 0;
762 int ret = 0;
763
764 /*
765 * We have to delay the initialization on PowerNV after
766 * the PCI hierarchy tree has been built because the PEs
767 * are figured out based on PCI devices instead of device
768 * tree nodes
769 */
770 if (machine_is(powernv) && cnt++ <= 0)
771 return ret;
772
773 /* call platform initialization function */
774 if (!eeh_ops) {
775 pr_warning("%s: Platform EEH operation not found\n",
776 __func__);
777 return -EEXIST;
778 } else if ((ret = eeh_ops->init())) {
779 pr_warning("%s: Failed to call platform init function (%d)\n",
780 __func__, ret);
781 return ret;
782 }
783
784 /* Initialize EEH event */
785 ret = eeh_event_init();
786 if (ret)
787 return ret;
788
789 /* Enable EEH for all adapters */
790 if (eeh_probe_mode_devtree()) {
791 list_for_each_entry_safe(hose, tmp,
792 &hose_list, list_node) {
793 phb = hose->dn;
794 traverse_pci_devices(phb, eeh_ops->of_probe, NULL);
795 }
796 } else if (eeh_probe_mode_dev()) {
797 list_for_each_entry_safe(hose, tmp,
798 &hose_list, list_node)
799 pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL);
800 } else {
801 pr_warning("%s: Invalid probe mode %d\n",
802 __func__, eeh_probe_mode);
803 return -EINVAL;
804 }
805
806 /*
807 * Call platform post-initialization. Actually, It's good chance
808 * to inform platform that EEH is ready to supply service if the
809 * I/O cache stuff has been built up.
810 */
811 if (eeh_ops->post_init) {
812 ret = eeh_ops->post_init();
813 if (ret)
814 return ret;
815 }
816
817 if (eeh_subsystem_enabled)
818 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
819 else
820 pr_warning("EEH: No capable adapters found\n");
821
822 return ret;
823}
824
825core_initcall_sync(eeh_init);
826
827/**
828 * eeh_add_device_early - Enable EEH for the indicated device_node
829 * @dn: device node for which to set up EEH
830 *
831 * This routine must be used to perform EEH initialization for PCI
832 * devices that were added after system boot (e.g. hotplug, dlpar).
833 * This routine must be called before any i/o is performed to the
834 * adapter (inluding any config-space i/o).
835 * Whether this actually enables EEH or not for this device depends
836 * on the CEC architecture, type of the device, on earlier boot
837 * command-line arguments & etc.
838 */
839void eeh_add_device_early(struct device_node *dn)
840{
841 struct pci_controller *phb;
842
843 /*
844 * If we're doing EEH probe based on PCI device, we
845 * would delay the probe until late stage because
846 * the PCI device isn't available this moment.
847 */
848 if (!eeh_probe_mode_devtree())
849 return;
850
851 if (!of_node_to_eeh_dev(dn))
852 return;
853 phb = of_node_to_eeh_dev(dn)->phb;
854
855 /* USB Bus children of PCI devices will not have BUID's */
856 if (NULL == phb || 0 == phb->buid)
857 return;
858
859 eeh_ops->of_probe(dn, NULL);
860}
861
862/**
863 * eeh_add_device_tree_early - Enable EEH for the indicated device
864 * @dn: device node
865 *
866 * This routine must be used to perform EEH initialization for the
867 * indicated PCI device that was added after system boot (e.g.
868 * hotplug, dlpar).
869 */
870void eeh_add_device_tree_early(struct device_node *dn)
871{
872 struct device_node *sib;
873
874 for_each_child_of_node(dn, sib)
875 eeh_add_device_tree_early(sib);
876 eeh_add_device_early(dn);
877}
878EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
879
880/**
881 * eeh_add_device_late - Perform EEH initialization for the indicated pci device
882 * @dev: pci device for which to set up EEH
883 *
884 * This routine must be used to complete EEH initialization for PCI
885 * devices that were added after system boot (e.g. hotplug, dlpar).
886 */
887void eeh_add_device_late(struct pci_dev *dev)
888{
889 struct device_node *dn;
890 struct eeh_dev *edev;
891
892 if (!dev || !eeh_subsystem_enabled)
893 return;
894
895 pr_debug("EEH: Adding device %s\n", pci_name(dev));
896
897 dn = pci_device_to_OF_node(dev);
898 edev = of_node_to_eeh_dev(dn);
899 if (edev->pdev == dev) {
900 pr_debug("EEH: Already referenced !\n");
901 return;
902 }
903
904 /*
905 * The EEH cache might not be removed correctly because of
906 * unbalanced kref to the device during unplug time, which
907 * relies on pcibios_release_device(). So we have to remove
908 * that here explicitly.
909 */
910 if (edev->pdev) {
911 eeh_rmv_from_parent_pe(edev);
912 eeh_addr_cache_rmv_dev(edev->pdev);
913 eeh_sysfs_remove_device(edev->pdev);
914 edev->mode &= ~EEH_DEV_SYSFS;
915
916 edev->pdev = NULL;
917 dev->dev.archdata.edev = NULL;
918 }
919
920 edev->pdev = dev;
921 dev->dev.archdata.edev = edev;
922
923 /*
924 * We have to do the EEH probe here because the PCI device
925 * hasn't been created yet in the early stage.
926 */
927 if (eeh_probe_mode_dev())
928 eeh_ops->dev_probe(dev, NULL);
929
930 eeh_addr_cache_insert_dev(dev);
931}
932
933/**
934 * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus
935 * @bus: PCI bus
936 *
937 * This routine must be used to perform EEH initialization for PCI
938 * devices which are attached to the indicated PCI bus. The PCI bus
939 * is added after system boot through hotplug or dlpar.
940 */
941void eeh_add_device_tree_late(struct pci_bus *bus)
942{
943 struct pci_dev *dev;
944
945 list_for_each_entry(dev, &bus->devices, bus_list) {
946 eeh_add_device_late(dev);
947 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
948 struct pci_bus *subbus = dev->subordinate;
949 if (subbus)
950 eeh_add_device_tree_late(subbus);
951 }
952 }
953}
954EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
955
956/**
957 * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus
958 * @bus: PCI bus
959 *
960 * This routine must be used to add EEH sysfs files for PCI
961 * devices which are attached to the indicated PCI bus. The PCI bus
962 * is added after system boot through hotplug or dlpar.
963 */
964void eeh_add_sysfs_files(struct pci_bus *bus)
965{
966 struct pci_dev *dev;
967
968 list_for_each_entry(dev, &bus->devices, bus_list) {
969 eeh_sysfs_add_device(dev);
970 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
971 struct pci_bus *subbus = dev->subordinate;
972 if (subbus)
973 eeh_add_sysfs_files(subbus);
974 }
975 }
976}
977EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
978
979/**
980 * eeh_remove_device - Undo EEH setup for the indicated pci device
981 * @dev: pci device to be removed
982 *
983 * This routine should be called when a device is removed from
984 * a running system (e.g. by hotplug or dlpar). It unregisters
985 * the PCI device from the EEH subsystem. I/O errors affecting
986 * this device will no longer be detected after this call; thus,
987 * i/o errors affecting this slot may leave this device unusable.
988 */
989void eeh_remove_device(struct pci_dev *dev)
990{
991 struct eeh_dev *edev;
992
993 if (!dev || !eeh_subsystem_enabled)
994 return;
995 edev = pci_dev_to_eeh_dev(dev);
996
997 /* Unregister the device with the EEH/PCI address search system */
998 pr_debug("EEH: Removing device %s\n", pci_name(dev));
999
1000 if (!edev || !edev->pdev || !edev->pe) {
1001 pr_debug("EEH: Not referenced !\n");
1002 return;
1003 }
1004
1005 /*
1006 * During the hotplug for EEH error recovery, we need the EEH
1007 * device attached to the parent PE in order for BAR restore
1008 * a bit later. So we keep it for BAR restore and remove it
1009 * from the parent PE during the BAR resotre.
1010 */
1011 edev->pdev = NULL;
1012 dev->dev.archdata.edev = NULL;
1013 if (!(edev->pe->state & EEH_PE_KEEP))
1014 eeh_rmv_from_parent_pe(edev);
1015 else
1016 edev->mode |= EEH_DEV_DISCONNECTED;
1017
1018 eeh_addr_cache_rmv_dev(dev);
1019 eeh_sysfs_remove_device(dev);
1020 edev->mode &= ~EEH_DEV_SYSFS;
1021}
1022
1023static int proc_eeh_show(struct seq_file *m, void *v)
1024{
1025 if (0 == eeh_subsystem_enabled) {
1026 seq_printf(m, "EEH Subsystem is globally disabled\n");
1027 seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs);
1028 } else {
1029 seq_printf(m, "EEH Subsystem is enabled\n");
1030 seq_printf(m,
1031 "no device=%llu\n"
1032 "no device node=%llu\n"
1033 "no config address=%llu\n"
1034 "check not wanted=%llu\n"
1035 "eeh_total_mmio_ffs=%llu\n"
1036 "eeh_false_positives=%llu\n"
1037 "eeh_slot_resets=%llu\n",
1038 eeh_stats.no_device,
1039 eeh_stats.no_dn,
1040 eeh_stats.no_cfg_addr,
1041 eeh_stats.ignored_check,
1042 eeh_stats.total_mmio_ffs,
1043 eeh_stats.false_positives,
1044 eeh_stats.slot_resets);
1045 }
1046
1047 return 0;
1048}
1049
1050static int proc_eeh_open(struct inode *inode, struct file *file)
1051{
1052 return single_open(file, proc_eeh_show, NULL);
1053}
1054
1055static const struct file_operations proc_eeh_operations = {
1056 .open = proc_eeh_open,
1057 .read = seq_read,
1058 .llseek = seq_lseek,
1059 .release = single_release,
1060};
1061
1062static int __init eeh_init_proc(void)
1063{
1064 if (machine_is(pseries) || machine_is(powernv))
1065 proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
1066 return 0;
1067}
1068__initcall(eeh_init_proc);
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
new file mode 100644
index 000000000000..e8c9fd546a5c
--- /dev/null
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -0,0 +1,310 @@
1/*
2 * PCI address cache; allows the lookup of PCI devices based on I/O address
3 *
4 * Copyright IBM Corporation 2004
5 * Copyright Linas Vepstas <linas@austin.ibm.com> 2004
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#include <linux/list.h>
23#include <linux/pci.h>
24#include <linux/rbtree.h>
25#include <linux/slab.h>
26#include <linux/spinlock.h>
27#include <linux/atomic.h>
28#include <asm/pci-bridge.h>
29#include <asm/ppc-pci.h>
30
31
32/**
33 * The pci address cache subsystem. This subsystem places
34 * PCI device address resources into a red-black tree, sorted
35 * according to the address range, so that given only an i/o
36 * address, the corresponding PCI device can be **quickly**
37 * found. It is safe to perform an address lookup in an interrupt
38 * context; this ability is an important feature.
39 *
40 * Currently, the only customer of this code is the EEH subsystem;
41 * thus, this code has been somewhat tailored to suit EEH better.
42 * In particular, the cache does *not* hold the addresses of devices
43 * for which EEH is not enabled.
44 *
45 * (Implementation Note: The RB tree seems to be better/faster
46 * than any hash algo I could think of for this problem, even
47 * with the penalty of slow pointer chases for d-cache misses).
48 */
49struct pci_io_addr_range {
50 struct rb_node rb_node;
51 unsigned long addr_lo;
52 unsigned long addr_hi;
53 struct eeh_dev *edev;
54 struct pci_dev *pcidev;
55 unsigned int flags;
56};
57
58static struct pci_io_addr_cache {
59 struct rb_root rb_root;
60 spinlock_t piar_lock;
61} pci_io_addr_cache_root;
62
63static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr)
64{
65 struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;
66
67 while (n) {
68 struct pci_io_addr_range *piar;
69 piar = rb_entry(n, struct pci_io_addr_range, rb_node);
70
71 if (addr < piar->addr_lo)
72 n = n->rb_left;
73 else if (addr > piar->addr_hi)
74 n = n->rb_right;
75 else
76 return piar->edev;
77 }
78
79 return NULL;
80}
81
82/**
83 * eeh_addr_cache_get_dev - Get device, given only address
84 * @addr: mmio (PIO) phys address or i/o port number
85 *
86 * Given an mmio phys address, or a port number, find a pci device
87 * that implements this address. Be sure to pci_dev_put the device
88 * when finished. I/O port numbers are assumed to be offset
89 * from zero (that is, they do *not* have pci_io_addr added in).
90 * It is safe to call this function within an interrupt.
91 */
92struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr)
93{
94 struct eeh_dev *edev;
95 unsigned long flags;
96
97 spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
98 edev = __eeh_addr_cache_get_device(addr);
99 spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
100 return edev;
101}
102
103#ifdef DEBUG
104/*
105 * Handy-dandy debug print routine, does nothing more
106 * than print out the contents of our addr cache.
107 */
108static void eeh_addr_cache_print(struct pci_io_addr_cache *cache)
109{
110 struct rb_node *n;
111 int cnt = 0;
112
113 n = rb_first(&cache->rb_root);
114 while (n) {
115 struct pci_io_addr_range *piar;
116 piar = rb_entry(n, struct pci_io_addr_range, rb_node);
117 pr_debug("PCI: %s addr range %d [%lx-%lx]: %s\n",
118 (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,
119 piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev));
120 cnt++;
121 n = rb_next(n);
122 }
123}
124#endif
125
126/* Insert address range into the rb tree. */
127static struct pci_io_addr_range *
128eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
129 unsigned long ahi, unsigned int flags)
130{
131 struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;
132 struct rb_node *parent = NULL;
133 struct pci_io_addr_range *piar;
134
135 /* Walk tree, find a place to insert into tree */
136 while (*p) {
137 parent = *p;
138 piar = rb_entry(parent, struct pci_io_addr_range, rb_node);
139 if (ahi < piar->addr_lo) {
140 p = &parent->rb_left;
141 } else if (alo > piar->addr_hi) {
142 p = &parent->rb_right;
143 } else {
144 if (dev != piar->pcidev ||
145 alo != piar->addr_lo || ahi != piar->addr_hi) {
146 pr_warning("PIAR: overlapping address range\n");
147 }
148 return piar;
149 }
150 }
151 piar = kzalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);
152 if (!piar)
153 return NULL;
154
155 piar->addr_lo = alo;
156 piar->addr_hi = ahi;
157 piar->edev = pci_dev_to_eeh_dev(dev);
158 piar->pcidev = dev;
159 piar->flags = flags;
160
161#ifdef DEBUG
162 pr_debug("PIAR: insert range=[%lx:%lx] dev=%s\n",
163 alo, ahi, pci_name(dev));
164#endif
165
166 rb_link_node(&piar->rb_node, parent, p);
167 rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
168
169 return piar;
170}
171
172static void __eeh_addr_cache_insert_dev(struct pci_dev *dev)
173{
174 struct device_node *dn;
175 struct eeh_dev *edev;
176 int i;
177
178 dn = pci_device_to_OF_node(dev);
179 if (!dn) {
180 pr_warning("PCI: no pci dn found for dev=%s\n", pci_name(dev));
181 return;
182 }
183
184 edev = of_node_to_eeh_dev(dn);
185 if (!edev) {
186 pr_warning("PCI: no EEH dev found for dn=%s\n",
187 dn->full_name);
188 return;
189 }
190
191 /* Skip any devices for which EEH is not enabled. */
192 if (!eeh_probe_mode_dev() && !edev->pe) {
193#ifdef DEBUG
194 pr_info("PCI: skip building address cache for=%s - %s\n",
195 pci_name(dev), dn->full_name);
196#endif
197 return;
198 }
199
200 /* Walk resources on this device, poke them into the tree */
201 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
202 unsigned long start = pci_resource_start(dev,i);
203 unsigned long end = pci_resource_end(dev,i);
204 unsigned int flags = pci_resource_flags(dev,i);
205
206 /* We are interested only bus addresses, not dma or other stuff */
207 if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM)))
208 continue;
209 if (start == 0 || ~start == 0 || end == 0 || ~end == 0)
210 continue;
211 eeh_addr_cache_insert(dev, start, end, flags);
212 }
213}
214
215/**
216 * eeh_addr_cache_insert_dev - Add a device to the address cache
217 * @dev: PCI device whose I/O addresses we are interested in.
218 *
219 * In order to support the fast lookup of devices based on addresses,
220 * we maintain a cache of devices that can be quickly searched.
221 * This routine adds a device to that cache.
222 */
223void eeh_addr_cache_insert_dev(struct pci_dev *dev)
224{
225 unsigned long flags;
226
227 /* Ignore PCI bridges */
228 if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE)
229 return;
230
231 spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
232 __eeh_addr_cache_insert_dev(dev);
233 spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
234}
235
236static inline void __eeh_addr_cache_rmv_dev(struct pci_dev *dev)
237{
238 struct rb_node *n;
239
240restart:
241 n = rb_first(&pci_io_addr_cache_root.rb_root);
242 while (n) {
243 struct pci_io_addr_range *piar;
244 piar = rb_entry(n, struct pci_io_addr_range, rb_node);
245
246 if (piar->pcidev == dev) {
247 rb_erase(n, &pci_io_addr_cache_root.rb_root);
248 kfree(piar);
249 goto restart;
250 }
251 n = rb_next(n);
252 }
253}
254
255/**
256 * eeh_addr_cache_rmv_dev - remove pci device from addr cache
257 * @dev: device to remove
258 *
259 * Remove a device from the addr-cache tree.
260 * This is potentially expensive, since it will walk
261 * the tree multiple times (once per resource).
262 * But so what; device removal doesn't need to be that fast.
263 */
264void eeh_addr_cache_rmv_dev(struct pci_dev *dev)
265{
266 unsigned long flags;
267
268 spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
269 __eeh_addr_cache_rmv_dev(dev);
270 spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
271}
272
273/**
274 * eeh_addr_cache_build - Build a cache of I/O addresses
275 *
276 * Build a cache of pci i/o addresses. This cache will be used to
277 * find the pci device that corresponds to a given address.
278 * This routine scans all pci busses to build the cache.
279 * Must be run late in boot process, after the pci controllers
280 * have been scanned for devices (after all device resources are known).
281 */
282void eeh_addr_cache_build(void)
283{
284 struct device_node *dn;
285 struct eeh_dev *edev;
286 struct pci_dev *dev = NULL;
287
288 spin_lock_init(&pci_io_addr_cache_root.piar_lock);
289
290 for_each_pci_dev(dev) {
291 dn = pci_device_to_OF_node(dev);
292 if (!dn)
293 continue;
294
295 edev = of_node_to_eeh_dev(dn);
296 if (!edev)
297 continue;
298
299 dev->dev.archdata.edev = edev;
300 edev->pdev = dev;
301
302 eeh_addr_cache_insert_dev(dev);
303 eeh_sysfs_add_device(dev);
304 }
305
306#ifdef DEBUG
307 /* Verify tree built up above, echo back the list of addrs. */
308 eeh_addr_cache_print(&pci_io_addr_cache_root);
309#endif
310}
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
new file mode 100644
index 000000000000..1efa28f5fc54
--- /dev/null
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -0,0 +1,112 @@
1/*
2 * The file intends to implement dynamic creation of EEH device, which will
3 * be bound with OF node and PCI device simutaneously. The EEH devices would
4 * be foundamental information for EEH core components to work proerly. Besides,
5 * We have to support multiple situations where dynamic creation of EEH device
6 * is required:
7 *
8 * 1) Before PCI emunation starts, we need create EEH devices according to the
9 * PCI sensitive OF nodes.
10 * 2) When PCI emunation is done, we need do the binding between PCI device and
11 * the associated EEH device.
12 * 3) DR (Dynamic Reconfiguration) would create PCI sensitive OF node. EEH device
13 * will be created while PCI sensitive OF node is detected from DR.
14 * 4) PCI hotplug needs redoing the binding between PCI device and EEH device. If
15 * PHB is newly inserted, we also need create EEH devices accordingly.
16 *
17 * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
18 *
19 * This program is free software; you can redistribute it and/or modify
20 * it under the terms of the GNU General Public License as published by
21 * the Free Software Foundation; either version 2 of the License, or
22 * (at your option) any later version.
23 *
24 * This program is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU General Public License for more details.
28 *
29 * You should have received a copy of the GNU General Public License
30 * along with this program; if not, write to the Free Software
31 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
32 */
33
34#include <linux/export.h>
35#include <linux/gfp.h>
36#include <linux/init.h>
37#include <linux/kernel.h>
38#include <linux/pci.h>
39#include <linux/string.h>
40
41#include <asm/pci-bridge.h>
42#include <asm/ppc-pci.h>
43
44/**
45 * eeh_dev_init - Create EEH device according to OF node
46 * @dn: device node
47 * @data: PHB
48 *
49 * It will create EEH device according to the given OF node. The function
50 * might be called by PCI emunation, DR, PHB hotplug.
51 */
52void *eeh_dev_init(struct device_node *dn, void *data)
53{
54 struct pci_controller *phb = data;
55 struct eeh_dev *edev;
56
57 /* Allocate EEH device */
58 edev = kzalloc(sizeof(*edev), GFP_KERNEL);
59 if (!edev) {
60 pr_warning("%s: out of memory\n", __func__);
61 return NULL;
62 }
63
64 /* Associate EEH device with OF node */
65 PCI_DN(dn)->edev = edev;
66 edev->dn = dn;
67 edev->phb = phb;
68 INIT_LIST_HEAD(&edev->list);
69
70 return NULL;
71}
72
73/**
74 * eeh_dev_phb_init_dynamic - Create EEH devices for devices included in PHB
75 * @phb: PHB
76 *
77 * Scan the PHB OF node and its child association, then create the
78 * EEH devices accordingly
79 */
80void eeh_dev_phb_init_dynamic(struct pci_controller *phb)
81{
82 struct device_node *dn = phb->dn;
83
84 /* EEH PE for PHB */
85 eeh_phb_pe_create(phb);
86
87 /* EEH device for PHB */
88 eeh_dev_init(dn, phb);
89
90 /* EEH devices for children OF nodes */
91 traverse_pci_devices(dn, eeh_dev_init, phb);
92}
93
94/**
95 * eeh_dev_phb_init - Create EEH devices for devices included in existing PHBs
96 *
97 * Scan all the existing PHBs and create EEH devices for their OF
98 * nodes and their children OF nodes
99 */
100static int __init eeh_dev_phb_init(void)
101{
102 struct pci_controller *phb, *tmp;
103
104 list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
105 eeh_dev_phb_init_dynamic(phb);
106
107 pr_info("EEH: devices created\n");
108
109 return 0;
110}
111
112core_initcall(eeh_dev_phb_init);
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
new file mode 100644
index 000000000000..36bed5a12750
--- /dev/null
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -0,0 +1,732 @@
1/*
2 * PCI Error Recovery Driver for RPA-compliant PPC64 platform.
3 * Copyright IBM Corp. 2004 2005
4 * Copyright Linas Vepstas <linas@linas.org> 2004, 2005
5 *
6 * All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or (at
11 * your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
16 * NON INFRINGEMENT. See the GNU General Public License for more
17 * details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 *
23 * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
24 */
25#include <linux/delay.h>
26#include <linux/interrupt.h>
27#include <linux/irq.h>
28#include <linux/module.h>
29#include <linux/pci.h>
30#include <asm/eeh.h>
31#include <asm/eeh_event.h>
32#include <asm/ppc-pci.h>
33#include <asm/pci-bridge.h>
34#include <asm/prom.h>
35#include <asm/rtas.h>
36
37/**
38 * eeh_pcid_name - Retrieve name of PCI device driver
39 * @pdev: PCI device
40 *
41 * This routine is used to retrieve the name of PCI device driver
42 * if that's valid.
43 */
44static inline const char *eeh_pcid_name(struct pci_dev *pdev)
45{
46 if (pdev && pdev->dev.driver)
47 return pdev->dev.driver->name;
48 return "";
49}
50
51/**
52 * eeh_pcid_get - Get the PCI device driver
53 * @pdev: PCI device
54 *
55 * The function is used to retrieve the PCI device driver for
56 * the indicated PCI device. Besides, we will increase the reference
57 * of the PCI device driver to prevent that being unloaded on
58 * the fly. Otherwise, kernel crash would be seen.
59 */
60static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev)
61{
62 if (!pdev || !pdev->driver)
63 return NULL;
64
65 if (!try_module_get(pdev->driver->driver.owner))
66 return NULL;
67
68 return pdev->driver;
69}
70
71/**
72 * eeh_pcid_put - Dereference on the PCI device driver
73 * @pdev: PCI device
74 *
75 * The function is called to do dereference on the PCI device
76 * driver of the indicated PCI device.
77 */
78static inline void eeh_pcid_put(struct pci_dev *pdev)
79{
80 if (!pdev || !pdev->driver)
81 return;
82
83 module_put(pdev->driver->driver.owner);
84}
85
86#if 0
87static void print_device_node_tree(struct pci_dn *pdn, int dent)
88{
89 int i;
90 struct device_node *pc;
91
92 if (!pdn)
93 return;
94 for (i = 0; i < dent; i++)
95 printk(" ");
96 printk("dn=%s mode=%x \tcfg_addr=%x pe_addr=%x \tfull=%s\n",
97 pdn->node->name, pdn->eeh_mode, pdn->eeh_config_addr,
98 pdn->eeh_pe_config_addr, pdn->node->full_name);
99 dent += 3;
100 pc = pdn->node->child;
101 while (pc) {
102 print_device_node_tree(PCI_DN(pc), dent);
103 pc = pc->sibling;
104 }
105}
106#endif
107
108/**
109 * eeh_disable_irq - Disable interrupt for the recovering device
110 * @dev: PCI device
111 *
112 * This routine must be called when reporting temporary or permanent
113 * error to the particular PCI device to disable interrupt of that
114 * device. If the device has enabled MSI or MSI-X interrupt, we needn't
115 * do real work because EEH should freeze DMA transfers for those PCI
116 * devices encountering EEH errors, which includes MSI or MSI-X.
117 */
118static void eeh_disable_irq(struct pci_dev *dev)
119{
120 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
121
122 /* Don't disable MSI and MSI-X interrupts. They are
123 * effectively disabled by the DMA Stopped state
124 * when an EEH error occurs.
125 */
126 if (dev->msi_enabled || dev->msix_enabled)
127 return;
128
129 if (!irq_has_action(dev->irq))
130 return;
131
132 edev->mode |= EEH_DEV_IRQ_DISABLED;
133 disable_irq_nosync(dev->irq);
134}
135
136/**
137 * eeh_enable_irq - Enable interrupt for the recovering device
138 * @dev: PCI device
139 *
140 * This routine must be called to enable interrupt while failed
141 * device could be resumed.
142 */
143static void eeh_enable_irq(struct pci_dev *dev)
144{
145 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
146 struct irq_desc *desc;
147
148 if ((edev->mode) & EEH_DEV_IRQ_DISABLED) {
149 edev->mode &= ~EEH_DEV_IRQ_DISABLED;
150
151 desc = irq_to_desc(dev->irq);
152 if (desc && desc->depth > 0)
153 enable_irq(dev->irq);
154 }
155}
156
157/**
158 * eeh_report_error - Report pci error to each device driver
159 * @data: eeh device
160 * @userdata: return value
161 *
162 * Report an EEH error to each device driver, collect up and
163 * merge the device driver responses. Cumulative response
164 * passed back in "userdata".
165 */
166static void *eeh_report_error(void *data, void *userdata)
167{
168 struct eeh_dev *edev = (struct eeh_dev *)data;
169 struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
170 enum pci_ers_result rc, *res = userdata;
171 struct pci_driver *driver;
172
173 /* We might not have the associated PCI device,
174 * then we should continue for next one.
175 */
176 if (!dev) return NULL;
177 dev->error_state = pci_channel_io_frozen;
178
179 driver = eeh_pcid_get(dev);
180 if (!driver) return NULL;
181
182 eeh_disable_irq(dev);
183
184 if (!driver->err_handler ||
185 !driver->err_handler->error_detected) {
186 eeh_pcid_put(dev);
187 return NULL;
188 }
189
190 rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
191
192 /* A driver that needs a reset trumps all others */
193 if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
194 if (*res == PCI_ERS_RESULT_NONE) *res = rc;
195
196 eeh_pcid_put(dev);
197 return NULL;
198}
199
200/**
201 * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled
202 * @data: eeh device
203 * @userdata: return value
204 *
205 * Tells each device driver that IO ports, MMIO and config space I/O
206 * are now enabled. Collects up and merges the device driver responses.
207 * Cumulative response passed back in "userdata".
208 */
209static void *eeh_report_mmio_enabled(void *data, void *userdata)
210{
211 struct eeh_dev *edev = (struct eeh_dev *)data;
212 struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
213 enum pci_ers_result rc, *res = userdata;
214 struct pci_driver *driver;
215
216 driver = eeh_pcid_get(dev);
217 if (!driver) return NULL;
218
219 if (!driver->err_handler ||
220 !driver->err_handler->mmio_enabled) {
221 eeh_pcid_put(dev);
222 return NULL;
223 }
224
225 rc = driver->err_handler->mmio_enabled(dev);
226
227 /* A driver that needs a reset trumps all others */
228 if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
229 if (*res == PCI_ERS_RESULT_NONE) *res = rc;
230
231 eeh_pcid_put(dev);
232 return NULL;
233}
234
235/**
236 * eeh_report_reset - Tell device that slot has been reset
237 * @data: eeh device
238 * @userdata: return value
239 *
240 * This routine must be called while EEH tries to reset particular
241 * PCI device so that the associated PCI device driver could take
242 * some actions, usually to save data the driver needs so that the
243 * driver can work again while the device is recovered.
244 */
245static void *eeh_report_reset(void *data, void *userdata)
246{
247 struct eeh_dev *edev = (struct eeh_dev *)data;
248 struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
249 enum pci_ers_result rc, *res = userdata;
250 struct pci_driver *driver;
251
252 if (!dev) return NULL;
253 dev->error_state = pci_channel_io_normal;
254
255 driver = eeh_pcid_get(dev);
256 if (!driver) return NULL;
257
258 eeh_enable_irq(dev);
259
260 if (!driver->err_handler ||
261 !driver->err_handler->slot_reset) {
262 eeh_pcid_put(dev);
263 return NULL;
264 }
265
266 rc = driver->err_handler->slot_reset(dev);
267 if ((*res == PCI_ERS_RESULT_NONE) ||
268 (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc;
269 if (*res == PCI_ERS_RESULT_DISCONNECT &&
270 rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
271
272 eeh_pcid_put(dev);
273 return NULL;
274}
275
276/**
277 * eeh_report_resume - Tell device to resume normal operations
278 * @data: eeh device
279 * @userdata: return value
280 *
281 * This routine must be called to notify the device driver that it
282 * could resume so that the device driver can do some initialization
283 * to make the recovered device work again.
284 */
285static void *eeh_report_resume(void *data, void *userdata)
286{
287 struct eeh_dev *edev = (struct eeh_dev *)data;
288 struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
289 struct pci_driver *driver;
290
291 if (!dev) return NULL;
292 dev->error_state = pci_channel_io_normal;
293
294 driver = eeh_pcid_get(dev);
295 if (!driver) return NULL;
296
297 eeh_enable_irq(dev);
298
299 if (!driver->err_handler ||
300 !driver->err_handler->resume) {
301 eeh_pcid_put(dev);
302 return NULL;
303 }
304
305 driver->err_handler->resume(dev);
306
307 eeh_pcid_put(dev);
308 return NULL;
309}
310
311/**
312 * eeh_report_failure - Tell device driver that device is dead.
313 * @data: eeh device
314 * @userdata: return value
315 *
316 * This informs the device driver that the device is permanently
317 * dead, and that no further recovery attempts will be made on it.
318 */
319static void *eeh_report_failure(void *data, void *userdata)
320{
321 struct eeh_dev *edev = (struct eeh_dev *)data;
322 struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
323 struct pci_driver *driver;
324
325 if (!dev) return NULL;
326 dev->error_state = pci_channel_io_perm_failure;
327
328 driver = eeh_pcid_get(dev);
329 if (!driver) return NULL;
330
331 eeh_disable_irq(dev);
332
333 if (!driver->err_handler ||
334 !driver->err_handler->error_detected) {
335 eeh_pcid_put(dev);
336 return NULL;
337 }
338
339 driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
340
341 eeh_pcid_put(dev);
342 return NULL;
343}
344
345static void *eeh_rmv_device(void *data, void *userdata)
346{
347 struct pci_driver *driver;
348 struct eeh_dev *edev = (struct eeh_dev *)data;
349 struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
350 int *removed = (int *)userdata;
351
352 /*
353 * Actually, we should remove the PCI bridges as well.
354 * However, that's lots of complexity to do that,
355 * particularly some of devices under the bridge might
356 * support EEH. So we just care about PCI devices for
357 * simplicity here.
358 */
359 if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE))
360 return NULL;
361 driver = eeh_pcid_get(dev);
362 if (driver && driver->err_handler)
363 return NULL;
364
365 /* Remove it from PCI subsystem */
366 pr_debug("EEH: Removing %s without EEH sensitive driver\n",
367 pci_name(dev));
368 edev->bus = dev->bus;
369 edev->mode |= EEH_DEV_DISCONNECTED;
370 (*removed)++;
371
372 pci_stop_and_remove_bus_device(dev);
373
374 return NULL;
375}
376
377static void *eeh_pe_detach_dev(void *data, void *userdata)
378{
379 struct eeh_pe *pe = (struct eeh_pe *)data;
380 struct eeh_dev *edev, *tmp;
381
382 eeh_pe_for_each_dev(pe, edev, tmp) {
383 if (!(edev->mode & EEH_DEV_DISCONNECTED))
384 continue;
385
386 edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED);
387 eeh_rmv_from_parent_pe(edev);
388 }
389
390 return NULL;
391}
392
393/**
394 * eeh_reset_device - Perform actual reset of a pci slot
395 * @pe: EEH PE
396 * @bus: PCI bus corresponding to the isolcated slot
397 *
398 * This routine must be called to do reset on the indicated PE.
399 * During the reset, udev might be invoked because those affected
400 * PCI devices will be removed and then added.
401 */
402static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
403{
404 struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
405 struct timeval tstamp;
406 int cnt, rc, removed = 0;
407
408 /* pcibios will clear the counter; save the value */
409 cnt = pe->freeze_count;
410 tstamp = pe->tstamp;
411
412 /*
413 * We don't remove the corresponding PE instances because
414 * we need the information afterwords. The attached EEH
415 * devices are expected to be attached soon when calling
416 * into pcibios_add_pci_devices().
417 */
418 eeh_pe_state_mark(pe, EEH_PE_KEEP);
419 if (bus)
420 pcibios_remove_pci_devices(bus);
421 else if (frozen_bus)
422 eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed);
423
424 /* Reset the pci controller. (Asserts RST#; resets config space).
425 * Reconfigure bridges and devices. Don't try to bring the system
426 * up if the reset failed for some reason.
427 */
428 rc = eeh_reset_pe(pe);
429 if (rc)
430 return rc;
431
432 /* Restore PE */
433 eeh_ops->configure_bridge(pe);
434 eeh_pe_restore_bars(pe);
435
436 /* Give the system 5 seconds to finish running the user-space
437 * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes,
438 * this is a hack, but if we don't do this, and try to bring
439 * the device up before the scripts have taken it down,
440 * potentially weird things happen.
441 */
442 if (bus) {
443 pr_info("EEH: Sleep 5s ahead of complete hotplug\n");
444 ssleep(5);
445
446 /*
447 * The EEH device is still connected with its parent
448 * PE. We should disconnect it so the binding can be
449 * rebuilt when adding PCI devices.
450 */
451 eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
452 pcibios_add_pci_devices(bus);
453 } else if (frozen_bus && removed) {
454 pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
455 ssleep(5);
456
457 eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
458 pcibios_add_pci_devices(frozen_bus);
459 }
460 eeh_pe_state_clear(pe, EEH_PE_KEEP);
461
462 pe->tstamp = tstamp;
463 pe->freeze_count = cnt;
464
465 return 0;
466}
467
468/* The longest amount of time to wait for a pci device
469 * to come back on line, in seconds.
470 */
471#define MAX_WAIT_FOR_RECOVERY 150
472
473static void eeh_handle_normal_event(struct eeh_pe *pe)
474{
475 struct pci_bus *frozen_bus;
476 int rc = 0;
477 enum pci_ers_result result = PCI_ERS_RESULT_NONE;
478
479 frozen_bus = eeh_pe_bus_get(pe);
480 if (!frozen_bus) {
481 pr_err("%s: Cannot find PCI bus for PHB#%d-PE#%x\n",
482 __func__, pe->phb->global_number, pe->addr);
483 return;
484 }
485
486 eeh_pe_update_time_stamp(pe);
487 pe->freeze_count++;
488 if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES)
489 goto excess_failures;
490 pr_warning("EEH: This PCI device has failed %d times in the last hour\n",
491 pe->freeze_count);
492
493 /* Walk the various device drivers attached to this slot through
494 * a reset sequence, giving each an opportunity to do what it needs
495 * to accomplish the reset. Each child gets a report of the
496 * status ... if any child can't handle the reset, then the entire
497 * slot is dlpar removed and added.
498 */
499 pr_info("EEH: Notify device drivers to shutdown\n");
500 eeh_pe_dev_traverse(pe, eeh_report_error, &result);
501
502 /* Get the current PCI slot state. This can take a long time,
503 * sometimes over 3 seconds for certain systems.
504 */
505 rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
506 if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
507 pr_warning("EEH: Permanent failure\n");
508 goto hard_fail;
509 }
510
511 /* Since rtas may enable MMIO when posting the error log,
512 * don't post the error log until after all dev drivers
513 * have been informed.
514 */
515 pr_info("EEH: Collect temporary log\n");
516 eeh_slot_error_detail(pe, EEH_LOG_TEMP);
517
518 /* If all device drivers were EEH-unaware, then shut
519 * down all of the device drivers, and hope they
520 * go down willingly, without panicing the system.
521 */
522 if (result == PCI_ERS_RESULT_NONE) {
523 pr_info("EEH: Reset with hotplug activity\n");
524 rc = eeh_reset_device(pe, frozen_bus);
525 if (rc) {
526 pr_warning("%s: Unable to reset, err=%d\n",
527 __func__, rc);
528 goto hard_fail;
529 }
530 }
531
532 /* If all devices reported they can proceed, then re-enable MMIO */
533 if (result == PCI_ERS_RESULT_CAN_RECOVER) {
534 pr_info("EEH: Enable I/O for affected devices\n");
535 rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
536
537 if (rc < 0)
538 goto hard_fail;
539 if (rc) {
540 result = PCI_ERS_RESULT_NEED_RESET;
541 } else {
542 pr_info("EEH: Notify device drivers to resume I/O\n");
543 result = PCI_ERS_RESULT_NONE;
544 eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result);
545 }
546 }
547
548 /* If all devices reported they can proceed, then re-enable DMA */
549 if (result == PCI_ERS_RESULT_CAN_RECOVER) {
550 pr_info("EEH: Enabled DMA for affected devices\n");
551 rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
552
553 if (rc < 0)
554 goto hard_fail;
555 if (rc)
556 result = PCI_ERS_RESULT_NEED_RESET;
557 else
558 result = PCI_ERS_RESULT_RECOVERED;
559 }
560
561 /* If any device has a hard failure, then shut off everything. */
562 if (result == PCI_ERS_RESULT_DISCONNECT) {
563 pr_warning("EEH: Device driver gave up\n");
564 goto hard_fail;
565 }
566
567 /* If any device called out for a reset, then reset the slot */
568 if (result == PCI_ERS_RESULT_NEED_RESET) {
569 pr_info("EEH: Reset without hotplug activity\n");
570 rc = eeh_reset_device(pe, NULL);
571 if (rc) {
572 pr_warning("%s: Cannot reset, err=%d\n",
573 __func__, rc);
574 goto hard_fail;
575 }
576
577 pr_info("EEH: Notify device drivers "
578 "the completion of reset\n");
579 result = PCI_ERS_RESULT_NONE;
580 eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
581 }
582
583 /* All devices should claim they have recovered by now. */
584 if ((result != PCI_ERS_RESULT_RECOVERED) &&
585 (result != PCI_ERS_RESULT_NONE)) {
586 pr_warning("EEH: Not recovered\n");
587 goto hard_fail;
588 }
589
590 /* Tell all device drivers that they can resume operations */
591 pr_info("EEH: Notify device driver to resume\n");
592 eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
593
594 return;
595
596excess_failures:
597 /*
598 * About 90% of all real-life EEH failures in the field
599 * are due to poorly seated PCI cards. Only 10% or so are
600 * due to actual, failed cards.
601 */
602 pr_err("EEH: PHB#%d-PE#%x has failed %d times in the\n"
603 "last hour and has been permanently disabled.\n"
604 "Please try reseating or replacing it.\n",
605 pe->phb->global_number, pe->addr,
606 pe->freeze_count);
607 goto perm_error;
608
609hard_fail:
610 pr_err("EEH: Unable to recover from failure from PHB#%d-PE#%x.\n"
611 "Please try reseating or replacing it\n",
612 pe->phb->global_number, pe->addr);
613
614perm_error:
615 eeh_slot_error_detail(pe, EEH_LOG_PERM);
616
617 /* Notify all devices that they're about to go down. */
618 eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
619
620 /* Shut down the device drivers for good. */
621 if (frozen_bus)
622 pcibios_remove_pci_devices(frozen_bus);
623}
624
625static void eeh_handle_special_event(void)
626{
627 struct eeh_pe *pe, *phb_pe;
628 struct pci_bus *bus;
629 struct pci_controller *hose, *tmp;
630 unsigned long flags;
631 int rc = 0;
632
633 /*
634 * The return value from next_error() has been classified as follows.
635 * It might be good to enumerate them. However, next_error() is only
636 * supported by PowerNV platform for now. So it would be fine to use
637 * integer directly:
638 *
639 * 4 - Dead IOC 3 - Dead PHB
640 * 2 - Fenced PHB 1 - Frozen PE
641 * 0 - No error found
642 *
643 */
644 rc = eeh_ops->next_error(&pe);
645 if (rc <= 0)
646 return;
647
648 switch (rc) {
649 case 4:
650 /* Mark all PHBs in dead state */
651 eeh_serialize_lock(&flags);
652 list_for_each_entry_safe(hose, tmp,
653 &hose_list, list_node) {
654 phb_pe = eeh_phb_pe_get(hose);
655 if (!phb_pe) continue;
656
657 eeh_pe_state_mark(phb_pe,
658 EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
659 }
660 eeh_serialize_unlock(flags);
661
662 /* Purge all events */
663 eeh_remove_event(NULL);
664 break;
665 case 3:
666 case 2:
667 case 1:
668 /* Mark the PE in fenced state */
669 eeh_serialize_lock(&flags);
670 if (rc == 3)
671 eeh_pe_state_mark(pe,
672 EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
673 else
674 eeh_pe_state_mark(pe,
675 EEH_PE_ISOLATED | EEH_PE_RECOVERING);
676 eeh_serialize_unlock(flags);
677
678 /* Purge all events of the PHB */
679 eeh_remove_event(pe);
680 break;
681 default:
682 pr_err("%s: Invalid value %d from next_error()\n",
683 __func__, rc);
684 return;
685 }
686
687 /*
688 * For fenced PHB and frozen PE, it's handled as normal
689 * event. We have to remove the affected PHBs for dead
690 * PHB and IOC
691 */
692 if (rc == 2 || rc == 1)
693 eeh_handle_normal_event(pe);
694 else {
695 list_for_each_entry_safe(hose, tmp,
696 &hose_list, list_node) {
697 phb_pe = eeh_phb_pe_get(hose);
698 if (!phb_pe || !(phb_pe->state & EEH_PE_PHB_DEAD))
699 continue;
700
701 bus = eeh_pe_bus_get(phb_pe);
702 /* Notify all devices that they're about to go down. */
703 eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
704 pcibios_remove_pci_devices(bus);
705 }
706 }
707}
708
709/**
710 * eeh_handle_event - Reset a PCI device after hard lockup.
711 * @pe: EEH PE
712 *
713 * While PHB detects address or data parity errors on particular PCI
714 * slot, the associated PE will be frozen. Besides, DMA's occurring
715 * to wild addresses (which usually happen due to bugs in device
716 * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
717 * #PERR or other misc PCI-related errors also can trigger EEH errors.
718 *
719 * Recovery process consists of unplugging the device driver (which
720 * generated hotplug events to userspace), then issuing a PCI #RST to
721 * the device, then reconfiguring the PCI config space for all bridges
722 * & devices under this slot, and then finally restarting the device
723 * drivers (which cause a second set of hotplug events to go out to
724 * userspace).
725 */
726void eeh_handle_event(struct eeh_pe *pe)
727{
728 if (pe)
729 eeh_handle_normal_event(pe);
730 else
731 eeh_handle_special_event();
732}
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
new file mode 100644
index 000000000000..d27c5afc90ae
--- /dev/null
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -0,0 +1,182 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
15 *
16 * Copyright (c) 2005 Linas Vepstas <linas@linas.org>
17 */
18
19#include <linux/delay.h>
20#include <linux/list.h>
21#include <linux/sched.h>
22#include <linux/semaphore.h>
23#include <linux/pci.h>
24#include <linux/slab.h>
25#include <linux/kthread.h>
26#include <asm/eeh_event.h>
27#include <asm/ppc-pci.h>
28
29/** Overview:
30 * EEH error states may be detected within exception handlers;
31 * however, the recovery processing needs to occur asynchronously
32 * in a normal kernel context and not an interrupt context.
33 * This pair of routines creates an event and queues it onto a
34 * work-queue, where a worker thread can drive recovery.
35 */
36
37static DEFINE_SPINLOCK(eeh_eventlist_lock);
38static struct semaphore eeh_eventlist_sem;
39LIST_HEAD(eeh_eventlist);
40
41/**
42 * eeh_event_handler - Dispatch EEH events.
43 * @dummy - unused
44 *
45 * The detection of a frozen slot can occur inside an interrupt,
46 * where it can be hard to do anything about it. The goal of this
47 * routine is to pull these detection events out of the context
48 * of the interrupt handler, and re-dispatch them for processing
49 * at a later time in a normal context.
50 */
51static int eeh_event_handler(void * dummy)
52{
53 unsigned long flags;
54 struct eeh_event *event;
55 struct eeh_pe *pe;
56
57 while (!kthread_should_stop()) {
58 if (down_interruptible(&eeh_eventlist_sem))
59 break;
60
61 /* Fetch EEH event from the queue */
62 spin_lock_irqsave(&eeh_eventlist_lock, flags);
63 event = NULL;
64 if (!list_empty(&eeh_eventlist)) {
65 event = list_entry(eeh_eventlist.next,
66 struct eeh_event, list);
67 list_del(&event->list);
68 }
69 spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
70 if (!event)
71 continue;
72
73 /* We might have event without binding PE */
74 pe = event->pe;
75 if (pe) {
76 eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
77 pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n",
78 pe->phb->global_number, pe->addr);
79 eeh_handle_event(pe);
80 eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
81 } else {
82 eeh_handle_event(NULL);
83 }
84
85 kfree(event);
86 }
87
88 return 0;
89}
90
91/**
92 * eeh_event_init - Start kernel thread to handle EEH events
93 *
94 * This routine is called to start the kernel thread for processing
95 * EEH event.
96 */
97int eeh_event_init(void)
98{
99 struct task_struct *t;
100 int ret = 0;
101
102 /* Initialize semaphore */
103 sema_init(&eeh_eventlist_sem, 0);
104
105 t = kthread_run(eeh_event_handler, NULL, "eehd");
106 if (IS_ERR(t)) {
107 ret = PTR_ERR(t);
108 pr_err("%s: Failed to start EEH daemon (%d)\n",
109 __func__, ret);
110 return ret;
111 }
112
113 return 0;
114}
115
116/**
117 * eeh_send_failure_event - Generate a PCI error event
118 * @pe: EEH PE
119 *
120 * This routine can be called within an interrupt context;
121 * the actual event will be delivered in a normal context
122 * (from a workqueue).
123 */
124int eeh_send_failure_event(struct eeh_pe *pe)
125{
126 unsigned long flags;
127 struct eeh_event *event;
128
129 event = kzalloc(sizeof(*event), GFP_ATOMIC);
130 if (!event) {
131 pr_err("EEH: out of memory, event not handled\n");
132 return -ENOMEM;
133 }
134 event->pe = pe;
135
136 /* We may or may not be called in an interrupt context */
137 spin_lock_irqsave(&eeh_eventlist_lock, flags);
138 list_add(&event->list, &eeh_eventlist);
139 spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
140
141 /* For EEH deamon to knick in */
142 up(&eeh_eventlist_sem);
143
144 return 0;
145}
146
147/**
148 * eeh_remove_event - Remove EEH event from the queue
149 * @pe: Event binding to the PE
150 *
151 * On PowerNV platform, we might have subsequent coming events
152 * is part of the former one. For that case, those subsequent
153 * coming events are totally duplicated and unnecessary, thus
154 * they should be removed.
155 */
156void eeh_remove_event(struct eeh_pe *pe)
157{
158 unsigned long flags;
159 struct eeh_event *event, *tmp;
160
161 spin_lock_irqsave(&eeh_eventlist_lock, flags);
162 list_for_each_entry_safe(event, tmp, &eeh_eventlist, list) {
163 /*
164 * If we don't have valid PE passed in, that means
165 * we already have event corresponding to dead IOC
166 * and all events should be purged.
167 */
168 if (!pe) {
169 list_del(&event->list);
170 kfree(event);
171 } else if (pe->type & EEH_PE_PHB) {
172 if (event->pe && event->pe->phb == pe->phb) {
173 list_del(&event->list);
174 kfree(event);
175 }
176 } else if (event->pe == pe) {
177 list_del(&event->list);
178 kfree(event);
179 }
180 }
181 spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
182}
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
new file mode 100644
index 000000000000..f9450537e335
--- /dev/null
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -0,0 +1,792 @@
1/*
2 * The file intends to implement PE based on the information from
3 * platforms. Basically, there have 3 types of PEs: PHB/Bus/Device.
4 * All the PEs should be organized as hierarchy tree. The first level
5 * of the tree will be associated to existing PHBs since the particular
6 * PE is only meaningful in one PHB domain.
7 *
8 * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24
25#include <linux/delay.h>
26#include <linux/export.h>
27#include <linux/gfp.h>
28#include <linux/init.h>
29#include <linux/kernel.h>
30#include <linux/pci.h>
31#include <linux/string.h>
32
33#include <asm/pci-bridge.h>
34#include <asm/ppc-pci.h>
35
36static LIST_HEAD(eeh_phb_pe);
37
38/**
39 * eeh_pe_alloc - Allocate PE
40 * @phb: PCI controller
41 * @type: PE type
42 *
43 * Allocate PE instance dynamically.
44 */
45static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type)
46{
47 struct eeh_pe *pe;
48
49 /* Allocate PHB PE */
50 pe = kzalloc(sizeof(struct eeh_pe), GFP_KERNEL);
51 if (!pe) return NULL;
52
53 /* Initialize PHB PE */
54 pe->type = type;
55 pe->phb = phb;
56 INIT_LIST_HEAD(&pe->child_list);
57 INIT_LIST_HEAD(&pe->child);
58 INIT_LIST_HEAD(&pe->edevs);
59
60 return pe;
61}
62
63/**
64 * eeh_phb_pe_create - Create PHB PE
65 * @phb: PCI controller
66 *
67 * The function should be called while the PHB is detected during
68 * system boot or PCI hotplug in order to create PHB PE.
69 */
70int eeh_phb_pe_create(struct pci_controller *phb)
71{
72 struct eeh_pe *pe;
73
74 /* Allocate PHB PE */
75 pe = eeh_pe_alloc(phb, EEH_PE_PHB);
76 if (!pe) {
77 pr_err("%s: out of memory!\n", __func__);
78 return -ENOMEM;
79 }
80
81 /* Put it into the list */
82 list_add_tail(&pe->child, &eeh_phb_pe);
83
84 pr_debug("EEH: Add PE for PHB#%d\n", phb->global_number);
85
86 return 0;
87}
88
89/**
90 * eeh_phb_pe_get - Retrieve PHB PE based on the given PHB
91 * @phb: PCI controller
92 *
93 * The overall PEs form hierarchy tree. The first layer of the
94 * hierarchy tree is composed of PHB PEs. The function is used
95 * to retrieve the corresponding PHB PE according to the given PHB.
96 */
97struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb)
98{
99 struct eeh_pe *pe;
100
101 list_for_each_entry(pe, &eeh_phb_pe, child) {
102 /*
103 * Actually, we needn't check the type since
104 * the PE for PHB has been determined when that
105 * was created.
106 */
107 if ((pe->type & EEH_PE_PHB) && pe->phb == phb)
108 return pe;
109 }
110
111 return NULL;
112}
113
114/**
115 * eeh_pe_next - Retrieve the next PE in the tree
116 * @pe: current PE
117 * @root: root PE
118 *
119 * The function is used to retrieve the next PE in the
120 * hierarchy PE tree.
121 */
122static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe,
123 struct eeh_pe *root)
124{
125 struct list_head *next = pe->child_list.next;
126
127 if (next == &pe->child_list) {
128 while (1) {
129 if (pe == root)
130 return NULL;
131 next = pe->child.next;
132 if (next != &pe->parent->child_list)
133 break;
134 pe = pe->parent;
135 }
136 }
137
138 return list_entry(next, struct eeh_pe, child);
139}
140
141/**
142 * eeh_pe_traverse - Traverse PEs in the specified PHB
143 * @root: root PE
144 * @fn: callback
145 * @flag: extra parameter to callback
146 *
147 * The function is used to traverse the specified PE and its
148 * child PEs. The traversing is to be terminated once the
149 * callback returns something other than NULL, or no more PEs
150 * to be traversed.
151 */
152void *eeh_pe_traverse(struct eeh_pe *root,
153 eeh_traverse_func fn, void *flag)
154{
155 struct eeh_pe *pe;
156 void *ret;
157
158 for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
159 ret = fn(pe, flag);
160 if (ret) return ret;
161 }
162
163 return NULL;
164}
165
166/**
167 * eeh_pe_dev_traverse - Traverse the devices from the PE
168 * @root: EEH PE
169 * @fn: function callback
170 * @flag: extra parameter to callback
171 *
172 * The function is used to traverse the devices of the specified
173 * PE and its child PEs.
174 */
175void *eeh_pe_dev_traverse(struct eeh_pe *root,
176 eeh_traverse_func fn, void *flag)
177{
178 struct eeh_pe *pe;
179 struct eeh_dev *edev, *tmp;
180 void *ret;
181
182 if (!root) {
183 pr_warning("%s: Invalid PE %p\n", __func__, root);
184 return NULL;
185 }
186
187 /* Traverse root PE */
188 for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
189 eeh_pe_for_each_dev(pe, edev, tmp) {
190 ret = fn(edev, flag);
191 if (ret)
192 return ret;
193 }
194 }
195
196 return NULL;
197}
198
199/**
200 * __eeh_pe_get - Check the PE address
201 * @data: EEH PE
202 * @flag: EEH device
203 *
204 * For one particular PE, it can be identified by PE address
205 * or tranditional BDF address. BDF address is composed of
206 * Bus/Device/Function number. The extra data referred by flag
207 * indicates which type of address should be used.
208 */
209static void *__eeh_pe_get(void *data, void *flag)
210{
211 struct eeh_pe *pe = (struct eeh_pe *)data;
212 struct eeh_dev *edev = (struct eeh_dev *)flag;
213
214 /* Unexpected PHB PE */
215 if (pe->type & EEH_PE_PHB)
216 return NULL;
217
218 /* We prefer PE address */
219 if (edev->pe_config_addr &&
220 (edev->pe_config_addr == pe->addr))
221 return pe;
222
223 /* Try BDF address */
224 if (edev->config_addr &&
225 (edev->config_addr == pe->config_addr))
226 return pe;
227
228 return NULL;
229}
230
231/**
232 * eeh_pe_get - Search PE based on the given address
233 * @edev: EEH device
234 *
235 * Search the corresponding PE based on the specified address which
236 * is included in the eeh device. The function is used to check if
237 * the associated PE has been created against the PE address. It's
238 * notable that the PE address has 2 format: traditional PE address
239 * which is composed of PCI bus/device/function number, or unified
240 * PE address.
241 */
242struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
243{
244 struct eeh_pe *root = eeh_phb_pe_get(edev->phb);
245 struct eeh_pe *pe;
246
247 pe = eeh_pe_traverse(root, __eeh_pe_get, edev);
248
249 return pe;
250}
251
252/**
253 * eeh_pe_get_parent - Retrieve the parent PE
254 * @edev: EEH device
255 *
256 * The whole PEs existing in the system are organized as hierarchy
257 * tree. The function is used to retrieve the parent PE according
258 * to the parent EEH device.
259 */
260static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
261{
262 struct device_node *dn;
263 struct eeh_dev *parent;
264
265 /*
266 * It might have the case for the indirect parent
267 * EEH device already having associated PE, but
268 * the direct parent EEH device doesn't have yet.
269 */
270 dn = edev->dn->parent;
271 while (dn) {
272 /* We're poking out of PCI territory */
273 if (!PCI_DN(dn)) return NULL;
274
275 parent = of_node_to_eeh_dev(dn);
276 /* We're poking out of PCI territory */
277 if (!parent) return NULL;
278
279 if (parent->pe)
280 return parent->pe;
281
282 dn = dn->parent;
283 }
284
285 return NULL;
286}
287
288/**
289 * eeh_add_to_parent_pe - Add EEH device to parent PE
290 * @edev: EEH device
291 *
292 * Add EEH device to the parent PE. If the parent PE already
293 * exists, the PE type will be changed to EEH_PE_BUS. Otherwise,
294 * we have to create new PE to hold the EEH device and the new
295 * PE will be linked to its parent PE as well.
296 */
297int eeh_add_to_parent_pe(struct eeh_dev *edev)
298{
299 struct eeh_pe *pe, *parent;
300
301 /*
302 * Search the PE has been existing or not according
303 * to the PE address. If that has been existing, the
304 * PE should be composed of PCI bus and its subordinate
305 * components.
306 */
307 pe = eeh_pe_get(edev);
308 if (pe && !(pe->type & EEH_PE_INVALID)) {
309 if (!edev->pe_config_addr) {
310 pr_err("%s: PE with addr 0x%x already exists\n",
311 __func__, edev->config_addr);
312 return -EEXIST;
313 }
314
315 /* Mark the PE as type of PCI bus */
316 pe->type = EEH_PE_BUS;
317 edev->pe = pe;
318
319 /* Put the edev to PE */
320 list_add_tail(&edev->list, &pe->edevs);
321 pr_debug("EEH: Add %s to Bus PE#%x\n",
322 edev->dn->full_name, pe->addr);
323
324 return 0;
325 } else if (pe && (pe->type & EEH_PE_INVALID)) {
326 list_add_tail(&edev->list, &pe->edevs);
327 edev->pe = pe;
328 /*
329 * We're running to here because of PCI hotplug caused by
330 * EEH recovery. We need clear EEH_PE_INVALID until the top.
331 */
332 parent = pe;
333 while (parent) {
334 if (!(parent->type & EEH_PE_INVALID))
335 break;
336 parent->type &= ~(EEH_PE_INVALID | EEH_PE_KEEP);
337 parent = parent->parent;
338 }
339 pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
340 edev->dn->full_name, pe->addr, pe->parent->addr);
341
342 return 0;
343 }
344
345 /* Create a new EEH PE */
346 pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE);
347 if (!pe) {
348 pr_err("%s: out of memory!\n", __func__);
349 return -ENOMEM;
350 }
351 pe->addr = edev->pe_config_addr;
352 pe->config_addr = edev->config_addr;
353
354 /*
355 * While doing PE reset, we probably hot-reset the
356 * upstream bridge. However, the PCI devices including
357 * the associated EEH devices might be removed when EEH
358 * core is doing recovery. So that won't safe to retrieve
359 * the bridge through downstream EEH device. We have to
360 * trace the parent PCI bus, then the upstream bridge.
361 */
362 if (eeh_probe_mode_dev())
363 pe->bus = eeh_dev_to_pci_dev(edev)->bus;
364
365 /*
366 * Put the new EEH PE into hierarchy tree. If the parent
367 * can't be found, the newly created PE will be attached
368 * to PHB directly. Otherwise, we have to associate the
369 * PE with its parent.
370 */
371 parent = eeh_pe_get_parent(edev);
372 if (!parent) {
373 parent = eeh_phb_pe_get(edev->phb);
374 if (!parent) {
375 pr_err("%s: No PHB PE is found (PHB Domain=%d)\n",
376 __func__, edev->phb->global_number);
377 edev->pe = NULL;
378 kfree(pe);
379 return -EEXIST;
380 }
381 }
382 pe->parent = parent;
383
384 /*
385 * Put the newly created PE into the child list and
386 * link the EEH device accordingly.
387 */
388 list_add_tail(&pe->child, &parent->child_list);
389 list_add_tail(&edev->list, &pe->edevs);
390 edev->pe = pe;
391 pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
392 edev->dn->full_name, pe->addr, pe->parent->addr);
393
394 return 0;
395}
396
397/**
398 * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE
399 * @edev: EEH device
400 *
401 * The PE hierarchy tree might be changed when doing PCI hotplug.
402 * Also, the PCI devices or buses could be removed from the system
403 * during EEH recovery. So we have to call the function remove the
404 * corresponding PE accordingly if necessary.
405 */
406int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
407{
408 struct eeh_pe *pe, *parent, *child;
409 int cnt;
410
411 if (!edev->pe) {
412 pr_debug("%s: No PE found for EEH device %s\n",
413 __func__, edev->dn->full_name);
414 return -EEXIST;
415 }
416
417 /* Remove the EEH device */
418 pe = edev->pe;
419 edev->pe = NULL;
420 list_del(&edev->list);
421
422 /*
423 * Check if the parent PE includes any EEH devices.
424 * If not, we should delete that. Also, we should
425 * delete the parent PE if it doesn't have associated
426 * child PEs and EEH devices.
427 */
428 while (1) {
429 parent = pe->parent;
430 if (pe->type & EEH_PE_PHB)
431 break;
432
433 if (!(pe->state & EEH_PE_KEEP)) {
434 if (list_empty(&pe->edevs) &&
435 list_empty(&pe->child_list)) {
436 list_del(&pe->child);
437 kfree(pe);
438 } else {
439 break;
440 }
441 } else {
442 if (list_empty(&pe->edevs)) {
443 cnt = 0;
444 list_for_each_entry(child, &pe->child_list, child) {
445 if (!(child->type & EEH_PE_INVALID)) {
446 cnt++;
447 break;
448 }
449 }
450
451 if (!cnt)
452 pe->type |= EEH_PE_INVALID;
453 else
454 break;
455 }
456 }
457
458 pe = parent;
459 }
460
461 return 0;
462}
463
464/**
465 * eeh_pe_update_time_stamp - Update PE's frozen time stamp
466 * @pe: EEH PE
467 *
468 * We have time stamp for each PE to trace its time of getting
469 * frozen in last hour. The function should be called to update
470 * the time stamp on first error of the specific PE. On the other
471 * handle, we needn't account for errors happened in last hour.
472 */
473void eeh_pe_update_time_stamp(struct eeh_pe *pe)
474{
475 struct timeval tstamp;
476
477 if (!pe) return;
478
479 if (pe->freeze_count <= 0) {
480 pe->freeze_count = 0;
481 do_gettimeofday(&pe->tstamp);
482 } else {
483 do_gettimeofday(&tstamp);
484 if (tstamp.tv_sec - pe->tstamp.tv_sec > 3600) {
485 pe->tstamp = tstamp;
486 pe->freeze_count = 0;
487 }
488 }
489}
490
491/**
492 * __eeh_pe_state_mark - Mark the state for the PE
493 * @data: EEH PE
494 * @flag: state
495 *
496 * The function is used to mark the indicated state for the given
497 * PE. Also, the associated PCI devices will be put into IO frozen
498 * state as well.
499 */
500static void *__eeh_pe_state_mark(void *data, void *flag)
501{
502 struct eeh_pe *pe = (struct eeh_pe *)data;
503 int state = *((int *)flag);
504 struct eeh_dev *edev, *tmp;
505 struct pci_dev *pdev;
506
507 /*
508 * Mark the PE with the indicated state. Also,
509 * the associated PCI device will be put into
510 * I/O frozen state to avoid I/O accesses from
511 * the PCI device driver.
512 */
513 pe->state |= state;
514 eeh_pe_for_each_dev(pe, edev, tmp) {
515 pdev = eeh_dev_to_pci_dev(edev);
516 if (pdev)
517 pdev->error_state = pci_channel_io_frozen;
518 }
519
520 return NULL;
521}
522
523/**
524 * eeh_pe_state_mark - Mark specified state for PE and its associated device
525 * @pe: EEH PE
526 *
527 * EEH error affects the current PE and its child PEs. The function
528 * is used to mark appropriate state for the affected PEs and the
529 * associated devices.
530 */
531void eeh_pe_state_mark(struct eeh_pe *pe, int state)
532{
533 eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
534}
535
536/**
537 * __eeh_pe_state_clear - Clear state for the PE
538 * @data: EEH PE
539 * @flag: state
540 *
541 * The function is used to clear the indicated state from the
542 * given PE. Besides, we also clear the check count of the PE
543 * as well.
544 */
545static void *__eeh_pe_state_clear(void *data, void *flag)
546{
547 struct eeh_pe *pe = (struct eeh_pe *)data;
548 int state = *((int *)flag);
549
550 pe->state &= ~state;
551 pe->check_count = 0;
552
553 return NULL;
554}
555
556/**
557 * eeh_pe_state_clear - Clear state for the PE and its children
558 * @pe: PE
559 * @state: state to be cleared
560 *
561 * When the PE and its children has been recovered from error,
562 * we need clear the error state for that. The function is used
563 * for the purpose.
564 */
565void eeh_pe_state_clear(struct eeh_pe *pe, int state)
566{
567 eeh_pe_traverse(pe, __eeh_pe_state_clear, &state);
568}
569
570/*
571 * Some PCI bridges (e.g. PLX bridges) have primary/secondary
572 * buses assigned explicitly by firmware, and we probably have
573 * lost that after reset. So we have to delay the check until
574 * the PCI-CFG registers have been restored for the parent
575 * bridge.
576 *
577 * Don't use normal PCI-CFG accessors, which probably has been
578 * blocked on normal path during the stage. So we need utilize
579 * eeh operations, which is always permitted.
580 */
581static void eeh_bridge_check_link(struct eeh_dev *edev,
582 struct device_node *dn)
583{
584 int cap;
585 uint32_t val;
586 int timeout = 0;
587
588 /*
589 * We only check root port and downstream ports of
590 * PCIe switches
591 */
592 if (!(edev->mode & (EEH_DEV_ROOT_PORT | EEH_DEV_DS_PORT)))
593 return;
594
595 pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n",
596 __func__, edev->phb->global_number,
597 edev->config_addr >> 8,
598 PCI_SLOT(edev->config_addr & 0xFF),
599 PCI_FUNC(edev->config_addr & 0xFF));
600
601 /* Check slot status */
602 cap = edev->pcie_cap;
603 eeh_ops->read_config(dn, cap + PCI_EXP_SLTSTA, 2, &val);
604 if (!(val & PCI_EXP_SLTSTA_PDS)) {
605 pr_debug(" No card in the slot (0x%04x) !\n", val);
606 return;
607 }
608
609 /* Check power status if we have the capability */
610 eeh_ops->read_config(dn, cap + PCI_EXP_SLTCAP, 2, &val);
611 if (val & PCI_EXP_SLTCAP_PCP) {
612 eeh_ops->read_config(dn, cap + PCI_EXP_SLTCTL, 2, &val);
613 if (val & PCI_EXP_SLTCTL_PCC) {
614 pr_debug(" In power-off state, power it on ...\n");
615 val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC);
616 val |= (0x0100 & PCI_EXP_SLTCTL_PIC);
617 eeh_ops->write_config(dn, cap + PCI_EXP_SLTCTL, 2, val);
618 msleep(2 * 1000);
619 }
620 }
621
622 /* Enable link */
623 eeh_ops->read_config(dn, cap + PCI_EXP_LNKCTL, 2, &val);
624 val &= ~PCI_EXP_LNKCTL_LD;
625 eeh_ops->write_config(dn, cap + PCI_EXP_LNKCTL, 2, val);
626
627 /* Check link */
628 eeh_ops->read_config(dn, cap + PCI_EXP_LNKCAP, 4, &val);
629 if (!(val & PCI_EXP_LNKCAP_DLLLARC)) {
630 pr_debug(" No link reporting capability (0x%08x) \n", val);
631 msleep(1000);
632 return;
633 }
634
635 /* Wait the link is up until timeout (5s) */
636 timeout = 0;
637 while (timeout < 5000) {
638 msleep(20);
639 timeout += 20;
640
641 eeh_ops->read_config(dn, cap + PCI_EXP_LNKSTA, 2, &val);
642 if (val & PCI_EXP_LNKSTA_DLLLA)
643 break;
644 }
645
646 if (val & PCI_EXP_LNKSTA_DLLLA)
647 pr_debug(" Link up (%s)\n",
648 (val & PCI_EXP_LNKSTA_CLS_2_5GB) ? "2.5GB" : "5GB");
649 else
650 pr_debug(" Link not ready (0x%04x)\n", val);
651}
652
653#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
654#define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
655
656static void eeh_restore_bridge_bars(struct eeh_dev *edev,
657 struct device_node *dn)
658{
659 int i;
660
661 /*
662 * Device BARs: 0x10 - 0x18
663 * Bus numbers and windows: 0x18 - 0x30
664 */
665 for (i = 4; i < 13; i++)
666 eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
667 /* Rom: 0x38 */
668 eeh_ops->write_config(dn, 14*4, 4, edev->config_space[14]);
669
670 /* Cache line & Latency timer: 0xC 0xD */
671 eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
672 SAVED_BYTE(PCI_CACHE_LINE_SIZE));
673 eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
674 SAVED_BYTE(PCI_LATENCY_TIMER));
675 /* Max latency, min grant, interrupt ping and line: 0x3C */
676 eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
677
678 /* PCI Command: 0x4 */
679 eeh_ops->write_config(dn, PCI_COMMAND, 4, edev->config_space[1]);
680
681 /* Check the PCIe link is ready */
682 eeh_bridge_check_link(edev, dn);
683}
684
685static void eeh_restore_device_bars(struct eeh_dev *edev,
686 struct device_node *dn)
687{
688 int i;
689 u32 cmd;
690
691 for (i = 4; i < 10; i++)
692 eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
693 /* 12 == Expansion ROM Address */
694 eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]);
695
696 eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
697 SAVED_BYTE(PCI_CACHE_LINE_SIZE));
698 eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
699 SAVED_BYTE(PCI_LATENCY_TIMER));
700
701 /* max latency, min grant, interrupt pin and line */
702 eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
703
704 /*
705 * Restore PERR & SERR bits, some devices require it,
706 * don't touch the other command bits
707 */
708 eeh_ops->read_config(dn, PCI_COMMAND, 4, &cmd);
709 if (edev->config_space[1] & PCI_COMMAND_PARITY)
710 cmd |= PCI_COMMAND_PARITY;
711 else
712 cmd &= ~PCI_COMMAND_PARITY;
713 if (edev->config_space[1] & PCI_COMMAND_SERR)
714 cmd |= PCI_COMMAND_SERR;
715 else
716 cmd &= ~PCI_COMMAND_SERR;
717 eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd);
718}
719
720/**
721 * eeh_restore_one_device_bars - Restore the Base Address Registers for one device
722 * @data: EEH device
723 * @flag: Unused
724 *
725 * Loads the PCI configuration space base address registers,
726 * the expansion ROM base address, the latency timer, and etc.
727 * from the saved values in the device node.
728 */
729static void *eeh_restore_one_device_bars(void *data, void *flag)
730{
731 struct eeh_dev *edev = (struct eeh_dev *)data;
732 struct device_node *dn = eeh_dev_to_of_node(edev);
733
734 /* Do special restore for bridges */
735 if (edev->mode & EEH_DEV_BRIDGE)
736 eeh_restore_bridge_bars(edev, dn);
737 else
738 eeh_restore_device_bars(edev, dn);
739
740 return NULL;
741}
742
743/**
744 * eeh_pe_restore_bars - Restore the PCI config space info
745 * @pe: EEH PE
746 *
747 * This routine performs a recursive walk to the children
748 * of this device as well.
749 */
750void eeh_pe_restore_bars(struct eeh_pe *pe)
751{
752 /*
753 * We needn't take the EEH lock since eeh_pe_dev_traverse()
754 * will take that.
755 */
756 eeh_pe_dev_traverse(pe, eeh_restore_one_device_bars, NULL);
757}
758
759/**
760 * eeh_pe_bus_get - Retrieve PCI bus according to the given PE
761 * @pe: EEH PE
762 *
763 * Retrieve the PCI bus according to the given PE. Basically,
764 * there're 3 types of PEs: PHB/Bus/Device. For PHB PE, the
765 * primary PCI bus will be retrieved. The parent bus will be
766 * returned for BUS PE. However, we don't have associated PCI
767 * bus for DEVICE PE.
768 */
769struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
770{
771 struct pci_bus *bus = NULL;
772 struct eeh_dev *edev;
773 struct pci_dev *pdev;
774
775 if (pe->type & EEH_PE_PHB) {
776 bus = pe->phb->bus;
777 } else if (pe->type & EEH_PE_BUS ||
778 pe->type & EEH_PE_DEVICE) {
779 if (pe->bus) {
780 bus = pe->bus;
781 goto out;
782 }
783
784 edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
785 pdev = eeh_dev_to_pci_dev(edev);
786 if (pdev)
787 bus = pdev->bus;
788 }
789
790out:
791 return bus;
792}
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
new file mode 100644
index 000000000000..5d753d4f2c75
--- /dev/null
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -0,0 +1,95 @@
1/*
2 * Sysfs entries for PCI Error Recovery for PAPR-compliant platform.
3 * Copyright IBM Corporation 2007
4 * Copyright Linas Vepstas <linas@austin.ibm.com> 2007
5 *
6 * All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or (at
11 * your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
16 * NON INFRINGEMENT. See the GNU General Public License for more
17 * details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 *
23 * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
24 */
25#include <linux/pci.h>
26#include <linux/stat.h>
27#include <asm/ppc-pci.h>
28#include <asm/pci-bridge.h>
29
30/**
31 * EEH_SHOW_ATTR -- Create sysfs entry for eeh statistic
32 * @_name: name of file in sysfs directory
33 * @_memb: name of member in struct pci_dn to access
34 * @_format: printf format for display
35 *
36 * All of the attributes look very similar, so just
37 * auto-gen a cut-n-paste routine to display them.
38 */
39#define EEH_SHOW_ATTR(_name,_memb,_format) \
40static ssize_t eeh_show_##_name(struct device *dev, \
41 struct device_attribute *attr, char *buf) \
42{ \
43 struct pci_dev *pdev = to_pci_dev(dev); \
44 struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); \
45 \
46 if (!edev) \
47 return 0; \
48 \
49 return sprintf(buf, _format "\n", edev->_memb); \
50} \
51static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL);
52
53EEH_SHOW_ATTR(eeh_mode, mode, "0x%x");
54EEH_SHOW_ATTR(eeh_config_addr, config_addr, "0x%x");
55EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x");
56
57void eeh_sysfs_add_device(struct pci_dev *pdev)
58{
59 struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
60 int rc=0;
61
62 if (edev && (edev->mode & EEH_DEV_SYSFS))
63 return;
64
65 rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
66 rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr);
67 rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
68
69 if (rc)
70 printk(KERN_WARNING "EEH: Unable to create sysfs entries\n");
71 else if (edev)
72 edev->mode |= EEH_DEV_SYSFS;
73}
74
75void eeh_sysfs_remove_device(struct pci_dev *pdev)
76{
77 struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
78
79 /*
80 * The parent directory might have been removed. We needn't
81 * continue for that case.
82 */
83 if (!pdev->dev.kobj.sd) {
84 if (edev)
85 edev->mode &= ~EEH_DEV_SYSFS;
86 return;
87 }
88
89 device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
90 device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr);
91 device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
92
93 if (edev)
94 edev->mode &= ~EEH_DEV_SYSFS;
95}
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 8741c854e03d..2bd0b885b0fe 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -449,15 +449,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
449 449
450#ifdef CONFIG_PPC_BOOK3S_64 450#ifdef CONFIG_PPC_BOOK3S_64
451BEGIN_FTR_SECTION 451BEGIN_FTR_SECTION
452 /*
453 * Back up the TAR across context switches. Note that the TAR is not
454 * available for use in the kernel. (To provide this, the TAR should
455 * be backed up/restored on exception entry/exit instead, and be in
456 * pt_regs. FIXME, this should be in pt_regs anyway (for debug).)
457 */
458 mfspr r0,SPRN_TAR
459 std r0,THREAD_TAR(r3)
460
461 /* Event based branch registers */ 452 /* Event based branch registers */
462 mfspr r0, SPRN_BESCR 453 mfspr r0, SPRN_BESCR
463 std r0, THREAD_BESCR(r3) 454 std r0, THREAD_BESCR(r3)
@@ -584,9 +575,34 @@ BEGIN_FTR_SECTION
584 ld r7,DSCR_DEFAULT@toc(2) 575 ld r7,DSCR_DEFAULT@toc(2)
585 ld r0,THREAD_DSCR(r4) 576 ld r0,THREAD_DSCR(r4)
586 cmpwi r6,0 577 cmpwi r6,0
578 li r8, FSCR_DSCR
587 bne 1f 579 bne 1f
588 ld r0,0(r7) 580 ld r0,0(r7)
5891: cmpd r0,r25 581 b 3f
5821:
583 BEGIN_FTR_SECTION_NESTED(70)
584 mfspr r6, SPRN_FSCR
585 or r6, r6, r8
586 mtspr SPRN_FSCR, r6
587 BEGIN_FTR_SECTION_NESTED(69)
588 mfspr r6, SPRN_HFSCR
589 or r6, r6, r8
590 mtspr SPRN_HFSCR, r6
591 END_FTR_SECTION_NESTED(CPU_FTR_HVMODE, CPU_FTR_HVMODE, 69)
592 b 4f
593 END_FTR_SECTION_NESTED(CPU_FTR_ARCH_207S, CPU_FTR_ARCH_207S, 70)
5943:
595 BEGIN_FTR_SECTION_NESTED(70)
596 mfspr r6, SPRN_FSCR
597 andc r6, r6, r8
598 mtspr SPRN_FSCR, r6
599 BEGIN_FTR_SECTION_NESTED(69)
600 mfspr r6, SPRN_HFSCR
601 andc r6, r6, r8
602 mtspr SPRN_HFSCR, r6
603 END_FTR_SECTION_NESTED(CPU_FTR_HVMODE, CPU_FTR_HVMODE, 69)
604 END_FTR_SECTION_NESTED(CPU_FTR_ARCH_207S, CPU_FTR_ARCH_207S, 70)
6054: cmpd r0,r25
590 beq 2f 606 beq 2f
591 mtspr SPRN_DSCR,r0 607 mtspr SPRN_DSCR,r0
5922: 6082:
@@ -629,21 +645,43 @@ _GLOBAL(ret_from_except_lite)
629 645
630 CURRENT_THREAD_INFO(r9, r1) 646 CURRENT_THREAD_INFO(r9, r1)
631 ld r3,_MSR(r1) 647 ld r3,_MSR(r1)
648#ifdef CONFIG_PPC_BOOK3E
649 ld r10,PACACURRENT(r13)
650#endif /* CONFIG_PPC_BOOK3E */
632 ld r4,TI_FLAGS(r9) 651 ld r4,TI_FLAGS(r9)
633 andi. r3,r3,MSR_PR 652 andi. r3,r3,MSR_PR
634 beq resume_kernel 653 beq resume_kernel
654#ifdef CONFIG_PPC_BOOK3E
655 lwz r3,(THREAD+THREAD_DBCR0)(r10)
656#endif /* CONFIG_PPC_BOOK3E */
635 657
636 /* Check current_thread_info()->flags */ 658 /* Check current_thread_info()->flags */
637 andi. r0,r4,_TIF_USER_WORK_MASK 659 andi. r0,r4,_TIF_USER_WORK_MASK
660#ifdef CONFIG_PPC_BOOK3E
661 bne 1f
662 /*
663 * Check to see if the dbcr0 register is set up to debug.
664 * Use the internal debug mode bit to do this.
665 */
666 andis. r0,r3,DBCR0_IDM@h
638 beq restore 667 beq restore
639 668 mfmsr r0
640 andi. r0,r4,_TIF_NEED_RESCHED 669 rlwinm r0,r0,0,~MSR_DE /* Clear MSR.DE */
641 beq 1f 670 mtmsr r0
671 mtspr SPRN_DBCR0,r3
672 li r10, -1
673 mtspr SPRN_DBSR,r10
674 b restore
675#else
676 beq restore
677#endif
6781: andi. r0,r4,_TIF_NEED_RESCHED
679 beq 2f
642 bl .restore_interrupts 680 bl .restore_interrupts
643 SCHEDULE_USER 681 SCHEDULE_USER
644 b .ret_from_except_lite 682 b .ret_from_except_lite
645 683
6461: bl .save_nvgprs 6842: bl .save_nvgprs
647 bl .restore_interrupts 685 bl .restore_interrupts
648 addi r3,r1,STACK_FRAME_OVERHEAD 686 addi r3,r1,STACK_FRAME_OVERHEAD
649 bl .do_notify_resume 687 bl .do_notify_resume
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 40e4a17c8ba0..902ca3c6b4b6 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -341,10 +341,17 @@ vsx_unavailable_pSeries_1:
341 EXCEPTION_PROLOG_0(PACA_EXGEN) 341 EXCEPTION_PROLOG_0(PACA_EXGEN)
342 b vsx_unavailable_pSeries 342 b vsx_unavailable_pSeries
343 343
344facility_unavailable_trampoline:
344 . = 0xf60 345 . = 0xf60
345 SET_SCRATCH0(r13) 346 SET_SCRATCH0(r13)
346 EXCEPTION_PROLOG_0(PACA_EXGEN) 347 EXCEPTION_PROLOG_0(PACA_EXGEN)
347 b tm_unavailable_pSeries 348 b facility_unavailable_pSeries
349
350hv_facility_unavailable_trampoline:
351 . = 0xf80
352 SET_SCRATCH0(r13)
353 EXCEPTION_PROLOG_0(PACA_EXGEN)
354 b facility_unavailable_hv
348 355
349#ifdef CONFIG_CBE_RAS 356#ifdef CONFIG_CBE_RAS
350 STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error) 357 STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error)
@@ -522,8 +529,10 @@ denorm_done:
522 KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf20) 529 KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf20)
523 STD_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) 530 STD_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable)
524 KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40) 531 KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40)
525 STD_EXCEPTION_PSERIES_OOL(0xf60, tm_unavailable) 532 STD_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable)
526 KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf60) 533 KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf60)
534 STD_EXCEPTION_HV_OOL(0xf82, facility_unavailable)
535 KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xf82)
527 536
528/* 537/*
529 * An interrupt came in while soft-disabled. We set paca->irq_happened, then: 538 * An interrupt came in while soft-disabled. We set paca->irq_happened, then:
@@ -793,14 +802,10 @@ system_call_relon_pSeries:
793 STD_RELON_EXCEPTION_PSERIES(0x4d00, 0xd00, single_step) 802 STD_RELON_EXCEPTION_PSERIES(0x4d00, 0xd00, single_step)
794 803
795 . = 0x4e00 804 . = 0x4e00
796 SET_SCRATCH0(r13) 805 b . /* Can't happen, see v2.07 Book III-S section 6.5 */
797 EXCEPTION_PROLOG_0(PACA_EXGEN)
798 b h_data_storage_relon_hv
799 806
800 . = 0x4e20 807 . = 0x4e20
801 SET_SCRATCH0(r13) 808 b . /* Can't happen, see v2.07 Book III-S section 6.5 */
802 EXCEPTION_PROLOG_0(PACA_EXGEN)
803 b h_instr_storage_relon_hv
804 809
805 . = 0x4e40 810 . = 0x4e40
806 SET_SCRATCH0(r13) 811 SET_SCRATCH0(r13)
@@ -808,9 +813,7 @@ system_call_relon_pSeries:
808 b emulation_assist_relon_hv 813 b emulation_assist_relon_hv
809 814
810 . = 0x4e60 815 . = 0x4e60
811 SET_SCRATCH0(r13) 816 b . /* Can't happen, see v2.07 Book III-S section 6.5 */
812 EXCEPTION_PROLOG_0(PACA_EXGEN)
813 b hmi_exception_relon_hv
814 817
815 . = 0x4e80 818 . = 0x4e80
816 SET_SCRATCH0(r13) 819 SET_SCRATCH0(r13)
@@ -835,11 +838,17 @@ vsx_unavailable_relon_pSeries_1:
835 EXCEPTION_PROLOG_0(PACA_EXGEN) 838 EXCEPTION_PROLOG_0(PACA_EXGEN)
836 b vsx_unavailable_relon_pSeries 839 b vsx_unavailable_relon_pSeries
837 840
838tm_unavailable_relon_pSeries_1: 841facility_unavailable_relon_trampoline:
839 . = 0x4f60 842 . = 0x4f60
840 SET_SCRATCH0(r13) 843 SET_SCRATCH0(r13)
841 EXCEPTION_PROLOG_0(PACA_EXGEN) 844 EXCEPTION_PROLOG_0(PACA_EXGEN)
842 b tm_unavailable_relon_pSeries 845 b facility_unavailable_relon_pSeries
846
847hv_facility_unavailable_relon_trampoline:
848 . = 0x4f80
849 SET_SCRATCH0(r13)
850 EXCEPTION_PROLOG_0(PACA_EXGEN)
851 b hv_facility_unavailable_relon_hv
843 852
844 STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint) 853 STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint)
845#ifdef CONFIG_PPC_DENORMALISATION 854#ifdef CONFIG_PPC_DENORMALISATION
@@ -1165,36 +1174,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
1165 bl .vsx_unavailable_exception 1174 bl .vsx_unavailable_exception
1166 b .ret_from_except 1175 b .ret_from_except
1167 1176
1168 .align 7 1177 STD_EXCEPTION_COMMON(0xf60, facility_unavailable, .facility_unavailable_exception)
1169 .globl tm_unavailable_common 1178 STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, .facility_unavailable_exception)
1170tm_unavailable_common:
1171 EXCEPTION_PROLOG_COMMON(0xf60, PACA_EXGEN)
1172 bl .save_nvgprs
1173 DISABLE_INTS
1174 addi r3,r1,STACK_FRAME_OVERHEAD
1175 bl .tm_unavailable_exception
1176 b .ret_from_except
1177 1179
1178 .align 7 1180 .align 7
1179 .globl __end_handlers 1181 .globl __end_handlers
1180__end_handlers: 1182__end_handlers:
1181 1183
1182 /* Equivalents to the above handlers for relocation-on interrupt vectors */ 1184 /* Equivalents to the above handlers for relocation-on interrupt vectors */
1183 STD_RELON_EXCEPTION_HV_OOL(0xe00, h_data_storage)
1184 KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe00)
1185 STD_RELON_EXCEPTION_HV_OOL(0xe20, h_instr_storage)
1186 KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe20)
1187 STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist) 1185 STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist)
1188 KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe40)
1189 STD_RELON_EXCEPTION_HV_OOL(0xe60, hmi_exception)
1190 KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe60)
1191 MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell) 1186 MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell)
1192 KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe80)
1193 1187
1194 STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) 1188 STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor)
1195 STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) 1189 STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable)
1196 STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) 1190 STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable)
1197 STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, tm_unavailable) 1191 STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable)
1192 STD_RELON_EXCEPTION_HV_OOL(0xf80, hv_facility_unavailable)
1198 1193
1199#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) 1194#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
1200/* 1195/*
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index a949bdfc9623..f0b47d1a6b0e 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -176,7 +176,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
176 length_max = 512 ; /* 64 doublewords */ 176 length_max = 512 ; /* 64 doublewords */
177 /* DAWR region can't cross 512 boundary */ 177 /* DAWR region can't cross 512 boundary */
178 if ((bp->attr.bp_addr >> 10) != 178 if ((bp->attr.bp_addr >> 10) !=
179 ((bp->attr.bp_addr + bp->attr.bp_len) >> 10)) 179 ((bp->attr.bp_addr + bp->attr.bp_len - 1) >> 10))
180 return -EINVAL; 180 return -EINVAL;
181 } 181 }
182 if (info->len > 182 if (info->len >
@@ -250,6 +250,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args)
250 * we still need to single-step the instruction, but we don't 250 * we still need to single-step the instruction, but we don't
251 * generate an event. 251 * generate an event.
252 */ 252 */
253 info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
253 if (!((bp->attr.bp_addr <= dar) && 254 if (!((bp->attr.bp_addr <= dar) &&
254 (dar - bp->attr.bp_addr < bp->attr.bp_len))) 255 (dar - bp->attr.bp_addr < bp->attr.bp_len)))
255 info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; 256 info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
index 8220baa46faf..16a7c2326d48 100644
--- a/arch/powerpc/kernel/ibmebus.c
+++ b/arch/powerpc/kernel/ibmebus.c
@@ -205,7 +205,7 @@ static int ibmebus_create_devices(const struct of_device_id *matches)
205 return ret; 205 return ret;
206} 206}
207 207
208int ibmebus_register_driver(struct of_platform_driver *drv) 208int ibmebus_register_driver(struct platform_driver *drv)
209{ 209{
210 /* If the driver uses devices that ibmebus doesn't know, add them */ 210 /* If the driver uses devices that ibmebus doesn't know, add them */
211 ibmebus_create_devices(drv->driver.of_match_table); 211 ibmebus_create_devices(drv->driver.of_match_table);
@@ -215,7 +215,7 @@ int ibmebus_register_driver(struct of_platform_driver *drv)
215} 215}
216EXPORT_SYMBOL(ibmebus_register_driver); 216EXPORT_SYMBOL(ibmebus_register_driver);
217 217
218void ibmebus_unregister_driver(struct of_platform_driver *drv) 218void ibmebus_unregister_driver(struct platform_driver *drv)
219{ 219{
220 driver_unregister(&drv->driver); 220 driver_unregister(&drv->driver);
221} 221}
@@ -338,11 +338,10 @@ static int ibmebus_bus_bus_match(struct device *dev, struct device_driver *drv)
338static int ibmebus_bus_device_probe(struct device *dev) 338static int ibmebus_bus_device_probe(struct device *dev)
339{ 339{
340 int error = -ENODEV; 340 int error = -ENODEV;
341 struct of_platform_driver *drv; 341 struct platform_driver *drv;
342 struct platform_device *of_dev; 342 struct platform_device *of_dev;
343 const struct of_device_id *match;
344 343
345 drv = to_of_platform_driver(dev->driver); 344 drv = to_platform_driver(dev->driver);
346 of_dev = to_platform_device(dev); 345 of_dev = to_platform_device(dev);
347 346
348 if (!drv->probe) 347 if (!drv->probe)
@@ -350,9 +349,8 @@ static int ibmebus_bus_device_probe(struct device *dev)
350 349
351 of_dev_get(of_dev); 350 of_dev_get(of_dev);
352 351
353 match = of_match_device(drv->driver.of_match_table, dev); 352 if (of_driver_match_device(dev, dev->driver))
354 if (match) 353 error = drv->probe(of_dev);
355 error = drv->probe(of_dev, match);
356 if (error) 354 if (error)
357 of_dev_put(of_dev); 355 of_dev_put(of_dev);
358 356
@@ -362,7 +360,7 @@ static int ibmebus_bus_device_probe(struct device *dev)
362static int ibmebus_bus_device_remove(struct device *dev) 360static int ibmebus_bus_device_remove(struct device *dev)
363{ 361{
364 struct platform_device *of_dev = to_platform_device(dev); 362 struct platform_device *of_dev = to_platform_device(dev);
365 struct of_platform_driver *drv = to_of_platform_driver(dev->driver); 363 struct platform_driver *drv = to_platform_driver(dev->driver);
366 364
367 if (dev->driver && drv->remove) 365 if (dev->driver && drv->remove)
368 drv->remove(of_dev); 366 drv->remove(of_dev);
@@ -372,7 +370,7 @@ static int ibmebus_bus_device_remove(struct device *dev)
372static void ibmebus_bus_device_shutdown(struct device *dev) 370static void ibmebus_bus_device_shutdown(struct device *dev)
373{ 371{
374 struct platform_device *of_dev = to_platform_device(dev); 372 struct platform_device *of_dev = to_platform_device(dev);
375 struct of_platform_driver *drv = to_of_platform_driver(dev->driver); 373 struct platform_driver *drv = to_platform_driver(dev->driver);
376 374
377 if (dev->driver && drv->shutdown) 375 if (dev->driver && drv->shutdown)
378 drv->shutdown(of_dev); 376 drv->shutdown(of_dev);
@@ -419,7 +417,7 @@ struct device_attribute ibmebus_bus_device_attrs[] = {
419static int ibmebus_bus_legacy_suspend(struct device *dev, pm_message_t mesg) 417static int ibmebus_bus_legacy_suspend(struct device *dev, pm_message_t mesg)
420{ 418{
421 struct platform_device *of_dev = to_platform_device(dev); 419 struct platform_device *of_dev = to_platform_device(dev);
422 struct of_platform_driver *drv = to_of_platform_driver(dev->driver); 420 struct platform_driver *drv = to_platform_driver(dev->driver);
423 int ret = 0; 421 int ret = 0;
424 422
425 if (dev->driver && drv->suspend) 423 if (dev->driver && drv->suspend)
@@ -430,7 +428,7 @@ static int ibmebus_bus_legacy_suspend(struct device *dev, pm_message_t mesg)
430static int ibmebus_bus_legacy_resume(struct device *dev) 428static int ibmebus_bus_legacy_resume(struct device *dev)
431{ 429{
432 struct platform_device *of_dev = to_platform_device(dev); 430 struct platform_device *of_dev = to_platform_device(dev);
433 struct of_platform_driver *drv = to_of_platform_driver(dev->driver); 431 struct platform_driver *drv = to_platform_driver(dev->driver);
434 int ret = 0; 432 int ret = 0;
435 433
436 if (dev->driver && drv->resume) 434 if (dev->driver && drv->resume)
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 939ea7ef0dc8..d7216c9abda1 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -85,7 +85,7 @@ int powersave_nap;
85/* 85/*
86 * Register the sysctl to set/clear powersave_nap. 86 * Register the sysctl to set/clear powersave_nap.
87 */ 87 */
88static ctl_table powersave_nap_ctl_table[]={ 88static struct ctl_table powersave_nap_ctl_table[] = {
89 { 89 {
90 .procname = "powersave-nap", 90 .procname = "powersave-nap",
91 .data = &powersave_nap, 91 .data = &powersave_nap,
@@ -95,7 +95,7 @@ static ctl_table powersave_nap_ctl_table[]={
95 }, 95 },
96 {} 96 {}
97}; 97};
98static ctl_table powersave_nap_sysctl_root[] = { 98static struct ctl_table powersave_nap_sysctl_root[] = {
99 { 99 {
100 .procname = "kernel", 100 .procname = "kernel",
101 .mode = 0555, 101 .mode = 0555,
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index 50e90b7e7139..fa0b54b2a362 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -55,6 +55,7 @@ static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr)
55 55
56struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) 56struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
57{ 57{
58 unsigned hugepage_shift;
58 struct iowa_bus *bus; 59 struct iowa_bus *bus;
59 int token; 60 int token;
60 61
@@ -70,11 +71,17 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
70 if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) 71 if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
71 return NULL; 72 return NULL;
72 73
73 ptep = find_linux_pte(init_mm.pgd, vaddr); 74 ptep = find_linux_pte_or_hugepte(init_mm.pgd, vaddr,
75 &hugepage_shift);
74 if (ptep == NULL) 76 if (ptep == NULL)
75 paddr = 0; 77 paddr = 0;
76 else 78 else {
79 /*
80 * we don't have hugepages backing iomem
81 */
82 WARN_ON(hugepage_shift);
77 paddr = pte_pfn(*ptep) << PAGE_SHIFT; 83 paddr = pte_pfn(*ptep) << PAGE_SHIFT;
84 }
78 bus = iowa_pci_find(vaddr, paddr); 85 bus = iowa_pci_find(vaddr, paddr);
79 86
80 if (bus == NULL) 87 if (bus == NULL)
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index c0d0dbddfba1..b20ff173a671 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -36,6 +36,8 @@
36#include <linux/hash.h> 36#include <linux/hash.h>
37#include <linux/fault-inject.h> 37#include <linux/fault-inject.h>
38#include <linux/pci.h> 38#include <linux/pci.h>
39#include <linux/iommu.h>
40#include <linux/sched.h>
39#include <asm/io.h> 41#include <asm/io.h>
40#include <asm/prom.h> 42#include <asm/prom.h>
41#include <asm/iommu.h> 43#include <asm/iommu.h>
@@ -44,6 +46,7 @@
44#include <asm/kdump.h> 46#include <asm/kdump.h>
45#include <asm/fadump.h> 47#include <asm/fadump.h>
46#include <asm/vio.h> 48#include <asm/vio.h>
49#include <asm/tce.h>
47 50
48#define DBG(...) 51#define DBG(...)
49 52
@@ -724,6 +727,13 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
724 if (tbl->it_offset == 0) 727 if (tbl->it_offset == 0)
725 clear_bit(0, tbl->it_map); 728 clear_bit(0, tbl->it_map);
726 729
730#ifdef CONFIG_IOMMU_API
731 if (tbl->it_group) {
732 iommu_group_put(tbl->it_group);
733 BUG_ON(tbl->it_group);
734 }
735#endif
736
727 /* verify that table contains no entries */ 737 /* verify that table contains no entries */
728 if (!bitmap_empty(tbl->it_map, tbl->it_size)) 738 if (!bitmap_empty(tbl->it_map, tbl->it_size))
729 pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name); 739 pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name);
@@ -860,3 +870,316 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,
860 free_pages((unsigned long)vaddr, get_order(size)); 870 free_pages((unsigned long)vaddr, get_order(size));
861 } 871 }
862} 872}
873
874#ifdef CONFIG_IOMMU_API
875/*
876 * SPAPR TCE API
877 */
878static void group_release(void *iommu_data)
879{
880 struct iommu_table *tbl = iommu_data;
881 tbl->it_group = NULL;
882}
883
884void iommu_register_group(struct iommu_table *tbl,
885 int pci_domain_number, unsigned long pe_num)
886{
887 struct iommu_group *grp;
888 char *name;
889
890 grp = iommu_group_alloc();
891 if (IS_ERR(grp)) {
892 pr_warn("powerpc iommu api: cannot create new group, err=%ld\n",
893 PTR_ERR(grp));
894 return;
895 }
896 tbl->it_group = grp;
897 iommu_group_set_iommudata(grp, tbl, group_release);
898 name = kasprintf(GFP_KERNEL, "domain%d-pe%lx",
899 pci_domain_number, pe_num);
900 if (!name)
901 return;
902 iommu_group_set_name(grp, name);
903 kfree(name);
904}
905
906enum dma_data_direction iommu_tce_direction(unsigned long tce)
907{
908 if ((tce & TCE_PCI_READ) && (tce & TCE_PCI_WRITE))
909 return DMA_BIDIRECTIONAL;
910 else if (tce & TCE_PCI_READ)
911 return DMA_TO_DEVICE;
912 else if (tce & TCE_PCI_WRITE)
913 return DMA_FROM_DEVICE;
914 else
915 return DMA_NONE;
916}
917EXPORT_SYMBOL_GPL(iommu_tce_direction);
918
919void iommu_flush_tce(struct iommu_table *tbl)
920{
921 /* Flush/invalidate TLB caches if necessary */
922 if (ppc_md.tce_flush)
923 ppc_md.tce_flush(tbl);
924
925 /* Make sure updates are seen by hardware */
926 mb();
927}
928EXPORT_SYMBOL_GPL(iommu_flush_tce);
929
930int iommu_tce_clear_param_check(struct iommu_table *tbl,
931 unsigned long ioba, unsigned long tce_value,
932 unsigned long npages)
933{
934 /* ppc_md.tce_free() does not support any value but 0 */
935 if (tce_value)
936 return -EINVAL;
937
938 if (ioba & ~IOMMU_PAGE_MASK)
939 return -EINVAL;
940
941 ioba >>= IOMMU_PAGE_SHIFT;
942 if (ioba < tbl->it_offset)
943 return -EINVAL;
944
945 if ((ioba + npages) > (tbl->it_offset + tbl->it_size))
946 return -EINVAL;
947
948 return 0;
949}
950EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check);
951
952int iommu_tce_put_param_check(struct iommu_table *tbl,
953 unsigned long ioba, unsigned long tce)
954{
955 if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ)))
956 return -EINVAL;
957
958 if (tce & ~(IOMMU_PAGE_MASK | TCE_PCI_WRITE | TCE_PCI_READ))
959 return -EINVAL;
960
961 if (ioba & ~IOMMU_PAGE_MASK)
962 return -EINVAL;
963
964 ioba >>= IOMMU_PAGE_SHIFT;
965 if (ioba < tbl->it_offset)
966 return -EINVAL;
967
968 if ((ioba + 1) > (tbl->it_offset + tbl->it_size))
969 return -EINVAL;
970
971 return 0;
972}
973EXPORT_SYMBOL_GPL(iommu_tce_put_param_check);
974
975unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry)
976{
977 unsigned long oldtce;
978 struct iommu_pool *pool = get_pool(tbl, entry);
979
980 spin_lock(&(pool->lock));
981
982 oldtce = ppc_md.tce_get(tbl, entry);
983 if (oldtce & (TCE_PCI_WRITE | TCE_PCI_READ))
984 ppc_md.tce_free(tbl, entry, 1);
985 else
986 oldtce = 0;
987
988 spin_unlock(&(pool->lock));
989
990 return oldtce;
991}
992EXPORT_SYMBOL_GPL(iommu_clear_tce);
993
994int iommu_clear_tces_and_put_pages(struct iommu_table *tbl,
995 unsigned long entry, unsigned long pages)
996{
997 unsigned long oldtce;
998 struct page *page;
999
1000 for ( ; pages; --pages, ++entry) {
1001 oldtce = iommu_clear_tce(tbl, entry);
1002 if (!oldtce)
1003 continue;
1004
1005 page = pfn_to_page(oldtce >> PAGE_SHIFT);
1006 WARN_ON(!page);
1007 if (page) {
1008 if (oldtce & TCE_PCI_WRITE)
1009 SetPageDirty(page);
1010 put_page(page);
1011 }
1012 }
1013
1014 return 0;
1015}
1016EXPORT_SYMBOL_GPL(iommu_clear_tces_and_put_pages);
1017
1018/*
1019 * hwaddr is a kernel virtual address here (0xc... bazillion),
1020 * tce_build converts it to a physical address.
1021 */
1022int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
1023 unsigned long hwaddr, enum dma_data_direction direction)
1024{
1025 int ret = -EBUSY;
1026 unsigned long oldtce;
1027 struct iommu_pool *pool = get_pool(tbl, entry);
1028
1029 spin_lock(&(pool->lock));
1030
1031 oldtce = ppc_md.tce_get(tbl, entry);
1032 /* Add new entry if it is not busy */
1033 if (!(oldtce & (TCE_PCI_WRITE | TCE_PCI_READ)))
1034 ret = ppc_md.tce_build(tbl, entry, 1, hwaddr, direction, NULL);
1035
1036 spin_unlock(&(pool->lock));
1037
1038 /* if (unlikely(ret))
1039 pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
1040 __func__, hwaddr, entry << IOMMU_PAGE_SHIFT,
1041 hwaddr, ret); */
1042
1043 return ret;
1044}
1045EXPORT_SYMBOL_GPL(iommu_tce_build);
1046
1047int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry,
1048 unsigned long tce)
1049{
1050 int ret;
1051 struct page *page = NULL;
1052 unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK & ~PAGE_MASK;
1053 enum dma_data_direction direction = iommu_tce_direction(tce);
1054
1055 ret = get_user_pages_fast(tce & PAGE_MASK, 1,
1056 direction != DMA_TO_DEVICE, &page);
1057 if (unlikely(ret != 1)) {
1058 /* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n",
1059 tce, entry << IOMMU_PAGE_SHIFT, ret); */
1060 return -EFAULT;
1061 }
1062 hwaddr = (unsigned long) page_address(page) + offset;
1063
1064 ret = iommu_tce_build(tbl, entry, hwaddr, direction);
1065 if (ret)
1066 put_page(page);
1067
1068 if (ret < 0)
1069 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n",
1070 __func__, entry << IOMMU_PAGE_SHIFT, tce, ret);
1071
1072 return ret;
1073}
1074EXPORT_SYMBOL_GPL(iommu_put_tce_user_mode);
1075
1076int iommu_take_ownership(struct iommu_table *tbl)
1077{
1078 unsigned long sz = (tbl->it_size + 7) >> 3;
1079
1080 if (tbl->it_offset == 0)
1081 clear_bit(0, tbl->it_map);
1082
1083 if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
1084 pr_err("iommu_tce: it_map is not empty");
1085 return -EBUSY;
1086 }
1087
1088 memset(tbl->it_map, 0xff, sz);
1089 iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
1090
1091 return 0;
1092}
1093EXPORT_SYMBOL_GPL(iommu_take_ownership);
1094
1095void iommu_release_ownership(struct iommu_table *tbl)
1096{
1097 unsigned long sz = (tbl->it_size + 7) >> 3;
1098
1099 iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
1100 memset(tbl->it_map, 0, sz);
1101
1102 /* Restore bit#0 set by iommu_init_table() */
1103 if (tbl->it_offset == 0)
1104 set_bit(0, tbl->it_map);
1105}
1106EXPORT_SYMBOL_GPL(iommu_release_ownership);
1107
1108static int iommu_add_device(struct device *dev)
1109{
1110 struct iommu_table *tbl;
1111 int ret = 0;
1112
1113 if (WARN_ON(dev->iommu_group)) {
1114 pr_warn("iommu_tce: device %s is already in iommu group %d, skipping\n",
1115 dev_name(dev),
1116 iommu_group_id(dev->iommu_group));
1117 return -EBUSY;
1118 }
1119
1120 tbl = get_iommu_table_base(dev);
1121 if (!tbl || !tbl->it_group) {
1122 pr_debug("iommu_tce: skipping device %s with no tbl\n",
1123 dev_name(dev));
1124 return 0;
1125 }
1126
1127 pr_debug("iommu_tce: adding %s to iommu group %d\n",
1128 dev_name(dev), iommu_group_id(tbl->it_group));
1129
1130 ret = iommu_group_add_device(tbl->it_group, dev);
1131 if (ret < 0)
1132 pr_err("iommu_tce: %s has not been added, ret=%d\n",
1133 dev_name(dev), ret);
1134
1135 return ret;
1136}
1137
1138static void iommu_del_device(struct device *dev)
1139{
1140 iommu_group_remove_device(dev);
1141}
1142
1143static int iommu_bus_notifier(struct notifier_block *nb,
1144 unsigned long action, void *data)
1145{
1146 struct device *dev = data;
1147
1148 switch (action) {
1149 case BUS_NOTIFY_ADD_DEVICE:
1150 return iommu_add_device(dev);
1151 case BUS_NOTIFY_DEL_DEVICE:
1152 iommu_del_device(dev);
1153 return 0;
1154 default:
1155 return 0;
1156 }
1157}
1158
1159static struct notifier_block tce_iommu_bus_nb = {
1160 .notifier_call = iommu_bus_notifier,
1161};
1162
1163static int __init tce_iommu_init(void)
1164{
1165 struct pci_dev *pdev = NULL;
1166
1167 BUILD_BUG_ON(PAGE_SIZE < IOMMU_PAGE_SIZE);
1168
1169 for_each_pci_dev(pdev)
1170 iommu_add_device(&pdev->dev);
1171
1172 bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
1173 return 0;
1174}
1175
1176subsys_initcall_sync(tce_iommu_init);
1177
1178#else
1179
1180void iommu_register_group(struct iommu_table *tbl,
1181 int pci_domain_number, unsigned long pe_num)
1182{
1183}
1184
1185#endif /* CONFIG_IOMMU_API */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index ea185e0b3cae..c69440cef7af 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -116,8 +116,6 @@ static inline notrace int decrementer_check_overflow(void)
116 u64 now = get_tb_or_rtc(); 116 u64 now = get_tb_or_rtc();
117 u64 *next_tb = &__get_cpu_var(decrementers_next_tb); 117 u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
118 118
119 if (now >= *next_tb)
120 set_dec(1);
121 return now >= *next_tb; 119 return now >= *next_tb;
122} 120}
123 121
@@ -364,7 +362,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
364 seq_printf(p, "%10u ", per_cpu(irq_stat, j).spurious_irqs); 362 seq_printf(p, "%10u ", per_cpu(irq_stat, j).spurious_irqs);
365 seq_printf(p, " Spurious interrupts\n"); 363 seq_printf(p, " Spurious interrupts\n");
366 364
367 seq_printf(p, "%*s: ", prec, "CNT"); 365 seq_printf(p, "%*s: ", prec, "PMI");
368 for_each_online_cpu(j) 366 for_each_online_cpu(j)
369 seq_printf(p, "%10u ", per_cpu(irq_stat, j).pmu_irqs); 367 seq_printf(p, "%10u ", per_cpu(irq_stat, j).pmu_irqs);
370 seq_printf(p, " Performance monitoring interrupts\n"); 368 seq_printf(p, " Performance monitoring interrupts\n");
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 11f5b03a0b06..2156ea90eb54 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -36,12 +36,6 @@
36#include <asm/sstep.h> 36#include <asm/sstep.h>
37#include <asm/uaccess.h> 37#include <asm/uaccess.h>
38 38
39#ifdef CONFIG_PPC_ADV_DEBUG_REGS
40#define MSR_SINGLESTEP (MSR_DE)
41#else
42#define MSR_SINGLESTEP (MSR_SE)
43#endif
44
45DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; 39DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
46DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); 40DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
47 41
@@ -104,19 +98,7 @@ void __kprobes arch_remove_kprobe(struct kprobe *p)
104 98
105static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) 99static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
106{ 100{
107 /* We turn off async exceptions to ensure that the single step will 101 enable_single_step(regs);
108 * be for the instruction we have the kprobe on, if we dont its
109 * possible we'd get the single step reported for an exception handler
110 * like Decrementer or External Interrupt */
111 regs->msr &= ~MSR_EE;
112 regs->msr |= MSR_SINGLESTEP;
113#ifdef CONFIG_PPC_ADV_DEBUG_REGS
114 regs->msr &= ~MSR_CE;
115 mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
116#ifdef CONFIG_PPC_47x
117 isync();
118#endif
119#endif
120 102
121 /* 103 /*
122 * On powerpc we should single step on the original 104 * On powerpc we should single step on the original
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 6782221d49bd..db28032e320e 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -750,13 +750,8 @@ EXPORT_SYMBOL_GPL(kvm_hypercall);
750 750
751static __init void kvm_free_tmp(void) 751static __init void kvm_free_tmp(void)
752{ 752{
753 unsigned long start, end; 753 free_reserved_area(&kvm_tmp[kvm_tmp_index],
754 754 &kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL);
755 start = (ulong)&kvm_tmp[kvm_tmp_index + (PAGE_SIZE - 1)] & PAGE_MASK;
756 end = (ulong)&kvm_tmp[ARRAY_SIZE(kvm_tmp)] & PAGE_MASK;
757
758 /* Free the tmp space we don't need */
759 free_reserved_area(start, end, 0, NULL);
760} 755}
761 756
762static int __init kvm_guest_init(void) 757static int __init kvm_guest_init(void)
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 48fbc2b97e95..8213ee1eb05a 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -84,22 +84,30 @@ static ssize_t dev_nvram_read(struct file *file, char __user *buf,
84 char *tmp = NULL; 84 char *tmp = NULL;
85 ssize_t size; 85 ssize_t size;
86 86
87 ret = -ENODEV; 87 if (!ppc_md.nvram_size) {
88 if (!ppc_md.nvram_size) 88 ret = -ENODEV;
89 goto out; 89 goto out;
90 }
90 91
91 ret = 0;
92 size = ppc_md.nvram_size(); 92 size = ppc_md.nvram_size();
93 if (*ppos >= size || size < 0) 93 if (size < 0) {
94 ret = size;
95 goto out;
96 }
97
98 if (*ppos >= size) {
99 ret = 0;
94 goto out; 100 goto out;
101 }
95 102
96 count = min_t(size_t, count, size - *ppos); 103 count = min_t(size_t, count, size - *ppos);
97 count = min(count, PAGE_SIZE); 104 count = min(count, PAGE_SIZE);
98 105
99 ret = -ENOMEM;
100 tmp = kmalloc(count, GFP_KERNEL); 106 tmp = kmalloc(count, GFP_KERNEL);
101 if (!tmp) 107 if (!tmp) {
108 ret = -ENOMEM;
102 goto out; 109 goto out;
110 }
103 111
104 ret = ppc_md.nvram_read(tmp, count, ppos); 112 ret = ppc_md.nvram_read(tmp, count, ppos);
105 if (ret <= 0) 113 if (ret <= 0)
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index f46914a0f33e..7d22a675fe1a 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1462,6 +1462,8 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus)
1462 /* Allocate bus and devices resources */ 1462 /* Allocate bus and devices resources */
1463 pcibios_allocate_bus_resources(bus); 1463 pcibios_allocate_bus_resources(bus);
1464 pcibios_claim_one_bus(bus); 1464 pcibios_claim_one_bus(bus);
1465 if (!pci_has_flag(PCI_PROBE_ONLY))
1466 pci_assign_unassigned_bus_resources(bus);
1465 1467
1466 /* Fixup EEH */ 1468 /* Fixup EEH */
1467 eeh_add_device_tree_late(bus); 1469 eeh_add_device_tree_late(bus);
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
new file mode 100644
index 000000000000..c1e17ae68a08
--- /dev/null
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -0,0 +1,110 @@
1/*
2 * Derived from "arch/powerpc/platforms/pseries/pci_dlpar.c"
3 *
4 * Copyright (C) 2003 Linda Xie <lxie@us.ibm.com>
5 * Copyright (C) 2005 International Business Machines
6 *
7 * Updates, 2005, John Rose <johnrose@austin.ibm.com>
8 * Updates, 2005, Linas Vepstas <linas@austin.ibm.com>
9 * Updates, 2013, Gavin Shan <shangw@linux.vnet.ibm.com>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 */
16
17#include <linux/pci.h>
18#include <linux/export.h>
19#include <asm/pci-bridge.h>
20#include <asm/ppc-pci.h>
21#include <asm/firmware.h>
22#include <asm/eeh.h>
23
24/**
25 * pcibios_release_device - release PCI device
26 * @dev: PCI device
27 *
28 * The function is called before releasing the indicated PCI device.
29 */
30void pcibios_release_device(struct pci_dev *dev)
31{
32 eeh_remove_device(dev);
33}
34
35/**
36 * pcibios_remove_pci_devices - remove all devices under this bus
37 * @bus: the indicated PCI bus
38 *
39 * Remove all of the PCI devices under this bus both from the
40 * linux pci device tree, and from the powerpc EEH address cache.
41 */
42void pcibios_remove_pci_devices(struct pci_bus *bus)
43{
44 struct pci_dev *dev, *tmp;
45 struct pci_bus *child_bus;
46
47 /* First go down child busses */
48 list_for_each_entry(child_bus, &bus->children, node)
49 pcibios_remove_pci_devices(child_bus);
50
51 pr_debug("PCI: Removing devices on bus %04x:%02x\n",
52 pci_domain_nr(bus), bus->number);
53 list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) {
54 pr_debug(" Removing %s...\n", pci_name(dev));
55 pci_stop_and_remove_bus_device(dev);
56 }
57}
58
59EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices);
60
61/**
62 * pcibios_add_pci_devices - adds new pci devices to bus
63 * @bus: the indicated PCI bus
64 *
65 * This routine will find and fixup new pci devices under
66 * the indicated bus. This routine presumes that there
67 * might already be some devices under this bridge, so
68 * it carefully tries to add only new devices. (And that
69 * is how this routine differs from other, similar pcibios
70 * routines.)
71 */
72void pcibios_add_pci_devices(struct pci_bus * bus)
73{
74 int slotno, mode, pass, max;
75 struct pci_dev *dev;
76 struct device_node *dn = pci_bus_to_OF_node(bus);
77
78 eeh_add_device_tree_early(dn);
79
80 mode = PCI_PROBE_NORMAL;
81 if (ppc_md.pci_probe_mode)
82 mode = ppc_md.pci_probe_mode(bus);
83
84 if (mode == PCI_PROBE_DEVTREE) {
85 /* use ofdt-based probe */
86 of_rescan_bus(dn, bus);
87 } else if (mode == PCI_PROBE_NORMAL) {
88 /*
89 * Use legacy probe. In the partial hotplug case, we
90 * probably have grandchildren devices unplugged. So
91 * we don't check the return value from pci_scan_slot() in
92 * order for fully rescan all the way down to pick them up.
93 * They can have been removed during partial hotplug.
94 */
95 slotno = PCI_SLOT(PCI_DN(dn->child)->devfn);
96 pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
97 pcibios_setup_bus_devices(bus);
98 max = bus->busn_res.start;
99 for (pass = 0; pass < 2; pass++) {
100 list_for_each_entry(dev, &bus->devices, bus_list) {
101 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
102 dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
103 max = pci_scan_bridge(bus, dev,
104 max, pass);
105 }
106 }
107 }
108 pcibios_finish_adding_to_bus(bus);
109}
110EXPORT_SYMBOL_GPL(pcibios_add_pci_devices);
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 2a67e9baa59f..15d9105323bf 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -128,7 +128,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
128 const char *type; 128 const char *type;
129 struct pci_slot *slot; 129 struct pci_slot *slot;
130 130
131 dev = alloc_pci_dev(); 131 dev = pci_alloc_dev(bus);
132 if (!dev) 132 if (!dev)
133 return NULL; 133 return NULL;
134 type = of_get_property(node, "device_type", NULL); 134 type = of_get_property(node, "device_type", NULL);
@@ -137,7 +137,6 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
137 137
138 pr_debug(" create device, devfn: %x, type: %s\n", devfn, type); 138 pr_debug(" create device, devfn: %x, type: %s\n", devfn, type);
139 139
140 dev->bus = bus;
141 dev->dev.of_node = of_node_get(node); 140 dev->dev.of_node = of_node_get(node);
142 dev->dev.parent = bus->bridge; 141 dev->dev.parent = bus->bridge;
143 dev->dev.bus = &pci_bus_type; 142 dev->dev.bus = &pci_bus_type;
@@ -165,7 +164,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
165 pr_debug(" class: 0x%x\n", dev->class); 164 pr_debug(" class: 0x%x\n", dev->class);
166 pr_debug(" revision: 0x%x\n", dev->revision); 165 pr_debug(" revision: 0x%x\n", dev->revision);
167 166
168 dev->current_state = 4; /* unknown power state */ 167 dev->current_state = PCI_UNKNOWN; /* unknown power state */
169 dev->error_state = pci_channel_io_normal; 168 dev->error_state = pci_channel_io_normal;
170 dev->dma_mask = 0xffffffff; 169 dev->dma_mask = 0xffffffff;
171 170
@@ -231,11 +230,14 @@ void of_scan_pci_bridge(struct pci_dev *dev)
231 return; 230 return;
232 } 231 }
233 232
234 bus = pci_add_new_bus(dev->bus, dev, busrange[0]); 233 bus = pci_find_bus(pci_domain_nr(dev->bus), busrange[0]);
235 if (!bus) { 234 if (!bus) {
236 printk(KERN_ERR "Failed to create pci bus for %s\n", 235 bus = pci_add_new_bus(dev->bus, dev, busrange[0]);
237 node->full_name); 236 if (!bus) {
238 return; 237 printk(KERN_ERR "Failed to create pci bus for %s\n",
238 node->full_name);
239 return;
240 }
239 } 241 }
240 242
241 bus->primary = dev->bus->number; 243 bus->primary = dev->bus->number;
@@ -293,6 +295,38 @@ void of_scan_pci_bridge(struct pci_dev *dev)
293} 295}
294EXPORT_SYMBOL(of_scan_pci_bridge); 296EXPORT_SYMBOL(of_scan_pci_bridge);
295 297
298static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
299 struct device_node *dn)
300{
301 struct pci_dev *dev = NULL;
302 const u32 *reg;
303 int reglen, devfn;
304
305 pr_debug(" * %s\n", dn->full_name);
306 if (!of_device_is_available(dn))
307 return NULL;
308
309 reg = of_get_property(dn, "reg", &reglen);
310 if (reg == NULL || reglen < 20)
311 return NULL;
312 devfn = (reg[0] >> 8) & 0xff;
313
314 /* Check if the PCI device is already there */
315 dev = pci_get_slot(bus, devfn);
316 if (dev) {
317 pci_dev_put(dev);
318 return dev;
319 }
320
321 /* create a new pci_dev for this device */
322 dev = of_create_pci_dev(dn, bus, devfn);
323 if (!dev)
324 return NULL;
325
326 pr_debug(" dev header type: %x\n", dev->hdr_type);
327 return dev;
328}
329
296/** 330/**
297 * __of_scan_bus - given a PCI bus node, setup bus and scan for child devices 331 * __of_scan_bus - given a PCI bus node, setup bus and scan for child devices
298 * @node: device tree node for the PCI bus 332 * @node: device tree node for the PCI bus
@@ -303,8 +337,6 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus,
303 int rescan_existing) 337 int rescan_existing)
304{ 338{
305 struct device_node *child; 339 struct device_node *child;
306 const u32 *reg;
307 int reglen, devfn;
308 struct pci_dev *dev; 340 struct pci_dev *dev;
309 341
310 pr_debug("of_scan_bus(%s) bus no %d...\n", 342 pr_debug("of_scan_bus(%s) bus no %d...\n",
@@ -312,16 +344,7 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus,
312 344
313 /* Scan direct children */ 345 /* Scan direct children */
314 for_each_child_of_node(node, child) { 346 for_each_child_of_node(node, child) {
315 pr_debug(" * %s\n", child->full_name); 347 dev = of_scan_pci_dev(bus, child);
316 if (!of_device_is_available(child))
317 continue;
318 reg = of_get_property(child, "reg", &reglen);
319 if (reg == NULL || reglen < 20)
320 continue;
321 devfn = (reg[0] >> 8) & 0xff;
322
323 /* create a new pci_dev for this device */
324 dev = of_create_pci_dev(child, bus, devfn);
325 if (!dev) 348 if (!dev)
326 continue; 349 continue;
327 pr_debug(" dev header type: %x\n", dev->hdr_type); 350 pr_debug(" dev header type: %x\n", dev->hdr_type);
diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c
index feb8580fdc84..c30612aad68e 100644
--- a/arch/powerpc/kernel/proc_powerpc.c
+++ b/arch/powerpc/kernel/proc_powerpc.c
@@ -29,25 +29,9 @@
29 29
30#ifdef CONFIG_PPC64 30#ifdef CONFIG_PPC64
31 31
32static loff_t page_map_seek( struct file *file, loff_t off, int whence) 32static loff_t page_map_seek(struct file *file, loff_t off, int whence)
33{ 33{
34 loff_t new; 34 return fixed_size_llseek(file, off, whence, PAGE_SIZE);
35 switch(whence) {
36 case 0:
37 new = off;
38 break;
39 case 1:
40 new = file->f_pos + off;
41 break;
42 case 2:
43 new = PAGE_SIZE + off;
44 break;
45 default:
46 return -EINVAL;
47 }
48 if ( new < 0 || new > PAGE_SIZE )
49 return -EINVAL;
50 return (file->f_pos = new);
51} 35}
52 36
53static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes, 37static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 076d1242507a..8083be20fe5e 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -600,6 +600,16 @@ struct task_struct *__switch_to(struct task_struct *prev,
600 struct ppc64_tlb_batch *batch; 600 struct ppc64_tlb_batch *batch;
601#endif 601#endif
602 602
603 /* Back up the TAR across context switches.
604 * Note that the TAR is not available for use in the kernel. (To
605 * provide this, the TAR should be backed up/restored on exception
606 * entry/exit instead, and be in pt_regs. FIXME, this should be in
607 * pt_regs anyway (for debug).)
608 * Save the TAR here before we do treclaim/trecheckpoint as these
609 * will change the TAR.
610 */
611 save_tar(&prev->thread);
612
603 __switch_to_tm(prev); 613 __switch_to_tm(prev);
604 614
605#ifdef CONFIG_SMP 615#ifdef CONFIG_SMP
@@ -916,7 +926,11 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
916 flush_altivec_to_thread(src); 926 flush_altivec_to_thread(src);
917 flush_vsx_to_thread(src); 927 flush_vsx_to_thread(src);
918 flush_spe_to_thread(src); 928 flush_spe_to_thread(src);
929
919 *dst = *src; 930 *dst = *src;
931
932 clear_task_ebb(dst);
933
920 return 0; 934 return 0;
921} 935}
922 936
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 8b6f7a99cce2..eb23ac92abb9 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -559,6 +559,35 @@ void __init early_init_dt_setup_initrd_arch(unsigned long start,
559} 559}
560#endif 560#endif
561 561
562static void __init early_reserve_mem_dt(void)
563{
564 unsigned long i, len, dt_root;
565 const __be32 *prop;
566
567 dt_root = of_get_flat_dt_root();
568
569 prop = of_get_flat_dt_prop(dt_root, "reserved-ranges", &len);
570
571 if (!prop)
572 return;
573
574 DBG("Found new-style reserved-ranges\n");
575
576 /* Each reserved range is an (address,size) pair, 2 cells each,
577 * totalling 4 cells per range. */
578 for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
579 u64 base, size;
580
581 base = of_read_number(prop + (i * 4) + 0, 2);
582 size = of_read_number(prop + (i * 4) + 2, 2);
583
584 if (size) {
585 DBG("reserving: %llx -> %llx\n", base, size);
586 memblock_reserve(base, size);
587 }
588 }
589}
590
562static void __init early_reserve_mem(void) 591static void __init early_reserve_mem(void)
563{ 592{
564 u64 base, size; 593 u64 base, size;
@@ -574,12 +603,16 @@ static void __init early_reserve_mem(void)
574 self_size = initial_boot_params->totalsize; 603 self_size = initial_boot_params->totalsize;
575 memblock_reserve(self_base, self_size); 604 memblock_reserve(self_base, self_size);
576 605
606 /* Look for the new "reserved-regions" property in the DT */
607 early_reserve_mem_dt();
608
577#ifdef CONFIG_BLK_DEV_INITRD 609#ifdef CONFIG_BLK_DEV_INITRD
578 /* then reserve the initrd, if any */ 610 /* Then reserve the initrd, if any */
579 if (initrd_start && (initrd_end > initrd_start)) 611 if (initrd_start && (initrd_end > initrd_start)) {
580 memblock_reserve(_ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE), 612 memblock_reserve(_ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE),
581 _ALIGN_UP(initrd_end, PAGE_SIZE) - 613 _ALIGN_UP(initrd_end, PAGE_SIZE) -
582 _ALIGN_DOWN(initrd_start, PAGE_SIZE)); 614 _ALIGN_DOWN(initrd_start, PAGE_SIZE));
615 }
583#endif /* CONFIG_BLK_DEV_INITRD */ 616#endif /* CONFIG_BLK_DEV_INITRD */
584 617
585#ifdef CONFIG_PPC32 618#ifdef CONFIG_PPC32
@@ -591,6 +624,8 @@ static void __init early_reserve_mem(void)
591 u32 base_32, size_32; 624 u32 base_32, size_32;
592 u32 *reserve_map_32 = (u32 *)reserve_map; 625 u32 *reserve_map_32 = (u32 *)reserve_map;
593 626
627 DBG("Found old 32-bit reserve map\n");
628
594 while (1) { 629 while (1) {
595 base_32 = *(reserve_map_32++); 630 base_32 = *(reserve_map_32++);
596 size_32 = *(reserve_map_32++); 631 size_32 = *(reserve_map_32++);
@@ -605,6 +640,9 @@ static void __init early_reserve_mem(void)
605 return; 640 return;
606 } 641 }
607#endif 642#endif
643 DBG("Processing reserve map\n");
644
645 /* Handle the reserve map in the fdt blob if it exists */
608 while (1) { 646 while (1) {
609 base = *(reserve_map++); 647 base = *(reserve_map++);
610 size = *(reserve_map++); 648 size = *(reserve_map++);
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 5eccda9fd33f..607902424e73 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -644,7 +644,8 @@ unsigned char ibm_architecture_vec[] = {
644 W(0xfffe0000), W(0x003a0000), /* POWER5/POWER5+ */ 644 W(0xfffe0000), W(0x003a0000), /* POWER5/POWER5+ */
645 W(0xffff0000), W(0x003e0000), /* POWER6 */ 645 W(0xffff0000), W(0x003e0000), /* POWER6 */
646 W(0xffff0000), W(0x003f0000), /* POWER7 */ 646 W(0xffff0000), W(0x003f0000), /* POWER7 */
647 W(0xffff0000), W(0x004b0000), /* POWER8 */ 647 W(0xffff0000), W(0x004b0000), /* POWER8E */
648 W(0xffff0000), W(0x004d0000), /* POWER8 */
648 W(0xffffffff), W(0x0f000004), /* all 2.07-compliant */ 649 W(0xffffffff), W(0x0f000004), /* all 2.07-compliant */
649 W(0xffffffff), W(0x0f000003), /* all 2.06-compliant */ 650 W(0xffffffff), W(0x0f000003), /* all 2.06-compliant */
650 W(0xffffffff), W(0x0f000002), /* all 2.05-compliant */ 651 W(0xffffffff), W(0x0f000002), /* all 2.05-compliant */
@@ -706,7 +707,7 @@ unsigned char ibm_architecture_vec[] = {
706 * must match by the macro below. Update the definition if 707 * must match by the macro below. Update the definition if
707 * the structure layout changes. 708 * the structure layout changes.
708 */ 709 */
709#define IBM_ARCH_VEC_NRCORES_OFFSET 117 710#define IBM_ARCH_VEC_NRCORES_OFFSET 125
710 W(NR_CPUS), /* number of cores supported */ 711 W(NR_CPUS), /* number of cores supported */
711 0, 712 0,
712 0, 713 0,
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 98c2fc198712..9a0d24c390a3 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -975,16 +975,12 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
975 hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; 975 hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
976 hw_brk.len = 8; 976 hw_brk.len = 8;
977#ifdef CONFIG_HAVE_HW_BREAKPOINT 977#ifdef CONFIG_HAVE_HW_BREAKPOINT
978 if (ptrace_get_breakpoints(task) < 0)
979 return -ESRCH;
980
981 bp = thread->ptrace_bps[0]; 978 bp = thread->ptrace_bps[0];
982 if ((!data) || !(hw_brk.type & HW_BRK_TYPE_RDWR)) { 979 if ((!data) || !(hw_brk.type & HW_BRK_TYPE_RDWR)) {
983 if (bp) { 980 if (bp) {
984 unregister_hw_breakpoint(bp); 981 unregister_hw_breakpoint(bp);
985 thread->ptrace_bps[0] = NULL; 982 thread->ptrace_bps[0] = NULL;
986 } 983 }
987 ptrace_put_breakpoints(task);
988 return 0; 984 return 0;
989 } 985 }
990 if (bp) { 986 if (bp) {
@@ -997,11 +993,9 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
997 993
998 ret = modify_user_hw_breakpoint(bp, &attr); 994 ret = modify_user_hw_breakpoint(bp, &attr);
999 if (ret) { 995 if (ret) {
1000 ptrace_put_breakpoints(task);
1001 return ret; 996 return ret;
1002 } 997 }
1003 thread->ptrace_bps[0] = bp; 998 thread->ptrace_bps[0] = bp;
1004 ptrace_put_breakpoints(task);
1005 thread->hw_brk = hw_brk; 999 thread->hw_brk = hw_brk;
1006 return 0; 1000 return 0;
1007 } 1001 }
@@ -1016,12 +1010,9 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
1016 ptrace_triggered, NULL, task); 1010 ptrace_triggered, NULL, task);
1017 if (IS_ERR(bp)) { 1011 if (IS_ERR(bp)) {
1018 thread->ptrace_bps[0] = NULL; 1012 thread->ptrace_bps[0] = NULL;
1019 ptrace_put_breakpoints(task);
1020 return PTR_ERR(bp); 1013 return PTR_ERR(bp);
1021 } 1014 }
1022 1015
1023 ptrace_put_breakpoints(task);
1024
1025#endif /* CONFIG_HAVE_HW_BREAKPOINT */ 1016#endif /* CONFIG_HAVE_HW_BREAKPOINT */
1026 task->thread.hw_brk = hw_brk; 1017 task->thread.hw_brk = hw_brk;
1027#else /* CONFIG_PPC_ADV_DEBUG_REGS */ 1018#else /* CONFIG_PPC_ADV_DEBUG_REGS */
@@ -1440,24 +1431,19 @@ static long ppc_set_hwdebug(struct task_struct *child,
1440 if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) 1431 if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
1441 brk.type |= HW_BRK_TYPE_WRITE; 1432 brk.type |= HW_BRK_TYPE_WRITE;
1442#ifdef CONFIG_HAVE_HW_BREAKPOINT 1433#ifdef CONFIG_HAVE_HW_BREAKPOINT
1443 if (ptrace_get_breakpoints(child) < 0)
1444 return -ESRCH;
1445
1446 /* 1434 /*
1447 * Check if the request is for 'range' breakpoints. We can 1435 * Check if the request is for 'range' breakpoints. We can
1448 * support it if range < 8 bytes. 1436 * support it if range < 8 bytes.
1449 */ 1437 */
1450 if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) { 1438 if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
1451 len = bp_info->addr2 - bp_info->addr; 1439 len = bp_info->addr2 - bp_info->addr;
1452 } else if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) { 1440 else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
1453 ptrace_put_breakpoints(child); 1441 len = 1;
1442 else
1454 return -EINVAL; 1443 return -EINVAL;
1455 }
1456 bp = thread->ptrace_bps[0]; 1444 bp = thread->ptrace_bps[0];
1457 if (bp) { 1445 if (bp)
1458 ptrace_put_breakpoints(child);
1459 return -ENOSPC; 1446 return -ENOSPC;
1460 }
1461 1447
1462 /* Create a new breakpoint request if one doesn't exist already */ 1448 /* Create a new breakpoint request if one doesn't exist already */
1463 hw_breakpoint_init(&attr); 1449 hw_breakpoint_init(&attr);
@@ -1469,11 +1455,9 @@ static long ppc_set_hwdebug(struct task_struct *child,
1469 ptrace_triggered, NULL, child); 1455 ptrace_triggered, NULL, child);
1470 if (IS_ERR(bp)) { 1456 if (IS_ERR(bp)) {
1471 thread->ptrace_bps[0] = NULL; 1457 thread->ptrace_bps[0] = NULL;
1472 ptrace_put_breakpoints(child);
1473 return PTR_ERR(bp); 1458 return PTR_ERR(bp);
1474 } 1459 }
1475 1460
1476 ptrace_put_breakpoints(child);
1477 return 1; 1461 return 1;
1478#endif /* CONFIG_HAVE_HW_BREAKPOINT */ 1462#endif /* CONFIG_HAVE_HW_BREAKPOINT */
1479 1463
@@ -1517,16 +1501,12 @@ static long ppc_del_hwdebug(struct task_struct *child, long data)
1517 return -EINVAL; 1501 return -EINVAL;
1518 1502
1519#ifdef CONFIG_HAVE_HW_BREAKPOINT 1503#ifdef CONFIG_HAVE_HW_BREAKPOINT
1520 if (ptrace_get_breakpoints(child) < 0)
1521 return -ESRCH;
1522
1523 bp = thread->ptrace_bps[0]; 1504 bp = thread->ptrace_bps[0];
1524 if (bp) { 1505 if (bp) {
1525 unregister_hw_breakpoint(bp); 1506 unregister_hw_breakpoint(bp);
1526 thread->ptrace_bps[0] = NULL; 1507 thread->ptrace_bps[0] = NULL;
1527 } else 1508 } else
1528 ret = -ENOENT; 1509 ret = -ENOENT;
1529 ptrace_put_breakpoints(child);
1530 return ret; 1510 return ret;
1531#else /* CONFIG_HAVE_HW_BREAKPOINT */ 1511#else /* CONFIG_HAVE_HW_BREAKPOINT */
1532 if (child->thread.hw_brk.address == 0) 1512 if (child->thread.hw_brk.address == 0)
diff --git a/arch/powerpc/kernel/reloc_32.S b/arch/powerpc/kernel/reloc_32.S
index ef46ba6e094f..f366fedb0872 100644
--- a/arch/powerpc/kernel/reloc_32.S
+++ b/arch/powerpc/kernel/reloc_32.S
@@ -166,7 +166,7 @@ ha16:
166 /* R_PPC_ADDR16_LO */ 166 /* R_PPC_ADDR16_LO */
167lo16: 167lo16:
168 cmpwi r4, R_PPC_ADDR16_LO 168 cmpwi r4, R_PPC_ADDR16_LO
169 bne nxtrela 169 bne unknown_type
170 lwz r4, 0(r9) /* r_offset */ 170 lwz r4, 0(r9) /* r_offset */
171 lwz r0, 8(r9) /* r_addend */ 171 lwz r0, 8(r9) /* r_addend */
172 add r0, r0, r3 172 add r0, r0, r3
@@ -191,6 +191,7 @@ nxtrela:
191 dcbst r4,r7 191 dcbst r4,r7
192 sync /* Ensure the data is flushed before icbi */ 192 sync /* Ensure the data is flushed before icbi */
193 icbi r4,r7 193 icbi r4,r7
194unknown_type:
194 cmpwi r8, 0 /* relasz = 0 ? */ 195 cmpwi r8, 0 /* relasz = 0 ? */
195 ble done 196 ble done
196 add r9, r9, r6 /* move to next entry in the .rela table */ 197 add r9, r9, r6 /* move to next entry in the .rela table */
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 52add6f3e201..80b5ef403f68 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1172,7 +1172,7 @@ int __init early_init_dt_scan_rtas(unsigned long node,
1172static arch_spinlock_t timebase_lock; 1172static arch_spinlock_t timebase_lock;
1173static u64 timebase = 0; 1173static u64 timebase = 0;
1174 1174
1175void __cpuinit rtas_give_timebase(void) 1175void rtas_give_timebase(void)
1176{ 1176{
1177 unsigned long flags; 1177 unsigned long flags;
1178 1178
@@ -1189,7 +1189,7 @@ void __cpuinit rtas_give_timebase(void)
1189 local_irq_restore(flags); 1189 local_irq_restore(flags);
1190} 1190}
1191 1191
1192void __cpuinit rtas_take_timebase(void) 1192void rtas_take_timebase(void)
1193{ 1193{
1194 while (!timebase) 1194 while (!timebase)
1195 barrier(); 1195 barrier();
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index e379d3fd1694..389fb8077cc9 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -76,7 +76,7 @@
76#endif 76#endif
77 77
78int boot_cpuid = 0; 78int boot_cpuid = 0;
79int __initdata spinning_secondaries; 79int spinning_secondaries;
80u64 ppc64_pft_size; 80u64 ppc64_pft_size;
81 81
82/* Pick defaults since we might want to patch instructions 82/* Pick defaults since we might want to patch instructions
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 201385c3a1ae..0f83122e6676 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -407,7 +407,8 @@ inline unsigned long copy_transact_fpr_from_user(struct task_struct *task,
407 * altivec/spe instructions at some point. 407 * altivec/spe instructions at some point.
408 */ 408 */
409static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, 409static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
410 int sigret, int ctx_has_vsx_region) 410 struct mcontext __user *tm_frame, int sigret,
411 int ctx_has_vsx_region)
411{ 412{
412 unsigned long msr = regs->msr; 413 unsigned long msr = regs->msr;
413 414
@@ -475,6 +476,12 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
475 476
476 if (__put_user(msr, &frame->mc_gregs[PT_MSR])) 477 if (__put_user(msr, &frame->mc_gregs[PT_MSR]))
477 return 1; 478 return 1;
479 /* We need to write 0 the MSR top 32 bits in the tm frame so that we
480 * can check it on the restore to see if TM is active
481 */
482 if (tm_frame && __put_user(0, &tm_frame->mc_gregs[PT_MSR]))
483 return 1;
484
478 if (sigret) { 485 if (sigret) {
479 /* Set up the sigreturn trampoline: li r0,sigret; sc */ 486 /* Set up the sigreturn trampoline: li r0,sigret; sc */
480 if (__put_user(0x38000000UL + sigret, &frame->tramp[0]) 487 if (__put_user(0x38000000UL + sigret, &frame->tramp[0])
@@ -747,7 +754,7 @@ static long restore_tm_user_regs(struct pt_regs *regs,
747 struct mcontext __user *tm_sr) 754 struct mcontext __user *tm_sr)
748{ 755{
749 long err; 756 long err;
750 unsigned long msr; 757 unsigned long msr, msr_hi;
751#ifdef CONFIG_VSX 758#ifdef CONFIG_VSX
752 int i; 759 int i;
753#endif 760#endif
@@ -852,8 +859,11 @@ static long restore_tm_user_regs(struct pt_regs *regs,
852 tm_enable(); 859 tm_enable();
853 /* This loads the checkpointed FP/VEC state, if used */ 860 /* This loads the checkpointed FP/VEC state, if used */
854 tm_recheckpoint(&current->thread, msr); 861 tm_recheckpoint(&current->thread, msr);
855 /* The task has moved into TM state S, so ensure MSR reflects this */ 862 /* Get the top half of the MSR */
856 regs->msr = (regs->msr & ~MSR_TS_MASK) | MSR_TS_S; 863 if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR]))
864 return 1;
865 /* Pull in MSR TM from user context */
866 regs->msr = (regs->msr & ~MSR_TS_MASK) | ((msr_hi<<32) & MSR_TS_MASK);
857 867
858 /* This loads the speculative FP/VEC state, if used */ 868 /* This loads the speculative FP/VEC state, if used */
859 if (msr & MSR_FP) { 869 if (msr & MSR_FP) {
@@ -952,6 +962,7 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
952{ 962{
953 struct rt_sigframe __user *rt_sf; 963 struct rt_sigframe __user *rt_sf;
954 struct mcontext __user *frame; 964 struct mcontext __user *frame;
965 struct mcontext __user *tm_frame = NULL;
955 void __user *addr; 966 void __user *addr;
956 unsigned long newsp = 0; 967 unsigned long newsp = 0;
957 int sigret; 968 int sigret;
@@ -985,23 +996,24 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
985 } 996 }
986 997
987#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 998#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
999 tm_frame = &rt_sf->uc_transact.uc_mcontext;
988 if (MSR_TM_ACTIVE(regs->msr)) { 1000 if (MSR_TM_ACTIVE(regs->msr)) {
989 if (save_tm_user_regs(regs, &rt_sf->uc.uc_mcontext, 1001 if (save_tm_user_regs(regs, frame, tm_frame, sigret))
990 &rt_sf->uc_transact.uc_mcontext, sigret))
991 goto badframe; 1002 goto badframe;
992 } 1003 }
993 else 1004 else
994#endif 1005#endif
995 if (save_user_regs(regs, frame, sigret, 1)) 1006 {
1007 if (save_user_regs(regs, frame, tm_frame, sigret, 1))
996 goto badframe; 1008 goto badframe;
1009 }
997 regs->link = tramp; 1010 regs->link = tramp;
998 1011
999#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 1012#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1000 if (MSR_TM_ACTIVE(regs->msr)) { 1013 if (MSR_TM_ACTIVE(regs->msr)) {
1001 if (__put_user((unsigned long)&rt_sf->uc_transact, 1014 if (__put_user((unsigned long)&rt_sf->uc_transact,
1002 &rt_sf->uc.uc_link) 1015 &rt_sf->uc.uc_link)
1003 || __put_user(to_user_ptr(&rt_sf->uc_transact.uc_mcontext), 1016 || __put_user((unsigned long)tm_frame, &rt_sf->uc_transact.uc_regs))
1004 &rt_sf->uc_transact.uc_regs))
1005 goto badframe; 1017 goto badframe;
1006 } 1018 }
1007 else 1019 else
@@ -1170,7 +1182,7 @@ long sys_swapcontext(struct ucontext __user *old_ctx,
1170 mctx = (struct mcontext __user *) 1182 mctx = (struct mcontext __user *)
1171 ((unsigned long) &old_ctx->uc_mcontext & ~0xfUL); 1183 ((unsigned long) &old_ctx->uc_mcontext & ~0xfUL);
1172 if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size) 1184 if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size)
1173 || save_user_regs(regs, mctx, 0, ctx_has_vsx_region) 1185 || save_user_regs(regs, mctx, NULL, 0, ctx_has_vsx_region)
1174 || put_sigset_t(&old_ctx->uc_sigmask, &current->blocked) 1186 || put_sigset_t(&old_ctx->uc_sigmask, &current->blocked)
1175 || __put_user(to_user_ptr(mctx), &old_ctx->uc_regs)) 1187 || __put_user(to_user_ptr(mctx), &old_ctx->uc_regs))
1176 return -EFAULT; 1188 return -EFAULT;
@@ -1233,7 +1245,7 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
1233 if (__get_user(msr_hi, &mcp->mc_gregs[PT_MSR])) 1245 if (__get_user(msr_hi, &mcp->mc_gregs[PT_MSR]))
1234 goto bad; 1246 goto bad;
1235 1247
1236 if (MSR_TM_SUSPENDED(msr_hi<<32)) { 1248 if (MSR_TM_ACTIVE(msr_hi<<32)) {
1237 /* We only recheckpoint on return if we're 1249 /* We only recheckpoint on return if we're
1238 * transaction. 1250 * transaction.
1239 */ 1251 */
@@ -1392,6 +1404,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka,
1392{ 1404{
1393 struct sigcontext __user *sc; 1405 struct sigcontext __user *sc;
1394 struct sigframe __user *frame; 1406 struct sigframe __user *frame;
1407 struct mcontext __user *tm_mctx = NULL;
1395 unsigned long newsp = 0; 1408 unsigned long newsp = 0;
1396 int sigret; 1409 int sigret;
1397 unsigned long tramp; 1410 unsigned long tramp;
@@ -1425,6 +1438,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka,
1425 } 1438 }
1426 1439
1427#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 1440#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1441 tm_mctx = &frame->mctx_transact;
1428 if (MSR_TM_ACTIVE(regs->msr)) { 1442 if (MSR_TM_ACTIVE(regs->msr)) {
1429 if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact, 1443 if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact,
1430 sigret)) 1444 sigret))
@@ -1432,8 +1446,10 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka,
1432 } 1446 }
1433 else 1447 else
1434#endif 1448#endif
1435 if (save_user_regs(regs, &frame->mctx, sigret, 1)) 1449 {
1450 if (save_user_regs(regs, &frame->mctx, tm_mctx, sigret, 1))
1436 goto badframe; 1451 goto badframe;
1452 }
1437 1453
1438 regs->link = tramp; 1454 regs->link = tramp;
1439 1455
@@ -1481,16 +1497,22 @@ badframe:
1481long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, 1497long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
1482 struct pt_regs *regs) 1498 struct pt_regs *regs)
1483{ 1499{
1500 struct sigframe __user *sf;
1484 struct sigcontext __user *sc; 1501 struct sigcontext __user *sc;
1485 struct sigcontext sigctx; 1502 struct sigcontext sigctx;
1486 struct mcontext __user *sr; 1503 struct mcontext __user *sr;
1487 void __user *addr; 1504 void __user *addr;
1488 sigset_t set; 1505 sigset_t set;
1506#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1507 struct mcontext __user *mcp, *tm_mcp;
1508 unsigned long msr_hi;
1509#endif
1489 1510
1490 /* Always make any pending restarted system calls return -EINTR */ 1511 /* Always make any pending restarted system calls return -EINTR */
1491 current_thread_info()->restart_block.fn = do_no_restart_syscall; 1512 current_thread_info()->restart_block.fn = do_no_restart_syscall;
1492 1513
1493 sc = (struct sigcontext __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE); 1514 sf = (struct sigframe __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE);
1515 sc = &sf->sctx;
1494 addr = sc; 1516 addr = sc;
1495 if (copy_from_user(&sigctx, sc, sizeof(sigctx))) 1517 if (copy_from_user(&sigctx, sc, sizeof(sigctx)))
1496 goto badframe; 1518 goto badframe;
@@ -1507,11 +1529,25 @@ long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
1507#endif 1529#endif
1508 set_current_blocked(&set); 1530 set_current_blocked(&set);
1509 1531
1510 sr = (struct mcontext __user *)from_user_ptr(sigctx.regs); 1532#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1511 addr = sr; 1533 mcp = (struct mcontext __user *)&sf->mctx;
1512 if (!access_ok(VERIFY_READ, sr, sizeof(*sr)) 1534 tm_mcp = (struct mcontext __user *)&sf->mctx_transact;
1513 || restore_user_regs(regs, sr, 1)) 1535 if (__get_user(msr_hi, &tm_mcp->mc_gregs[PT_MSR]))
1514 goto badframe; 1536 goto badframe;
1537 if (MSR_TM_ACTIVE(msr_hi<<32)) {
1538 if (!cpu_has_feature(CPU_FTR_TM))
1539 goto badframe;
1540 if (restore_tm_user_regs(regs, mcp, tm_mcp))
1541 goto badframe;
1542 } else
1543#endif
1544 {
1545 sr = (struct mcontext __user *)from_user_ptr(sigctx.regs);
1546 addr = sr;
1547 if (!access_ok(VERIFY_READ, sr, sizeof(*sr))
1548 || restore_user_regs(regs, sr, 1))
1549 goto badframe;
1550 }
1515 1551
1516 set_thread_flag(TIF_RESTOREALL); 1552 set_thread_flag(TIF_RESTOREALL);
1517 return 0; 1553 return 0;
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 345947367ec0..887e99d85bc2 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -410,6 +410,10 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
410 410
411 /* get MSR separately, transfer the LE bit if doing signal return */ 411 /* get MSR separately, transfer the LE bit if doing signal return */
412 err |= __get_user(msr, &sc->gp_regs[PT_MSR]); 412 err |= __get_user(msr, &sc->gp_regs[PT_MSR]);
413 /* pull in MSR TM from user context */
414 regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK);
415
416 /* pull in MSR LE from user context */
413 regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); 417 regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
414 418
415 /* The following non-GPR non-FPR non-VR state is also checkpointed: */ 419 /* The following non-GPR non-FPR non-VR state is also checkpointed: */
@@ -505,8 +509,6 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
505 tm_enable(); 509 tm_enable();
506 /* This loads the checkpointed FP/VEC state, if used */ 510 /* This loads the checkpointed FP/VEC state, if used */
507 tm_recheckpoint(&current->thread, msr); 511 tm_recheckpoint(&current->thread, msr);
508 /* The task has moved into TM state S, so ensure MSR reflects this: */
509 regs->msr = (regs->msr & ~MSR_TS_MASK) | __MASK(33);
510 512
511 /* This loads the speculative FP/VEC state, if used */ 513 /* This loads the speculative FP/VEC state, if used */
512 if (msr & MSR_FP) { 514 if (msr & MSR_FP) {
@@ -654,7 +656,7 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5,
654#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 656#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
655 if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) 657 if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
656 goto badframe; 658 goto badframe;
657 if (MSR_TM_SUSPENDED(msr)) { 659 if (MSR_TM_ACTIVE(msr)) {
658 /* We recheckpoint on return. */ 660 /* We recheckpoint on return. */
659 struct ucontext __user *uc_transact; 661 struct ucontext __user *uc_transact;
660 if (__get_user(uc_transact, &uc->uc_link)) 662 if (__get_user(uc_transact, &uc->uc_link))
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index ee7ac5e6e28a..38b0ba65a735 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -480,7 +480,7 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
480 secondary_ti = current_set[cpu] = ti; 480 secondary_ti = current_set[cpu] = ti;
481} 481}
482 482
483int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle) 483int __cpu_up(unsigned int cpu, struct task_struct *tidle)
484{ 484{
485 int rc, c; 485 int rc, c;
486 486
@@ -610,7 +610,7 @@ static struct device_node *cpu_to_l2cache(int cpu)
610} 610}
611 611
612/* Activate a secondary processor. */ 612/* Activate a secondary processor. */
613__cpuinit void start_secondary(void *unused) 613void start_secondary(void *unused)
614{ 614{
615 unsigned int cpu = smp_processor_id(); 615 unsigned int cpu = smp_processor_id();
616 struct device_node *l2_cache; 616 struct device_node *l2_cache;
@@ -637,12 +637,10 @@ __cpuinit void start_secondary(void *unused)
637 637
638 vdso_getcpu_init(); 638 vdso_getcpu_init();
639#endif 639#endif
640 notify_cpu_starting(cpu);
641 set_cpu_online(cpu, true);
642 /* Update sibling maps */ 640 /* Update sibling maps */
643 base = cpu_first_thread_sibling(cpu); 641 base = cpu_first_thread_sibling(cpu);
644 for (i = 0; i < threads_per_core; i++) { 642 for (i = 0; i < threads_per_core; i++) {
645 if (cpu_is_offline(base + i)) 643 if (cpu_is_offline(base + i) && (cpu != base + i))
646 continue; 644 continue;
647 cpumask_set_cpu(cpu, cpu_sibling_mask(base + i)); 645 cpumask_set_cpu(cpu, cpu_sibling_mask(base + i));
648 cpumask_set_cpu(base + i, cpu_sibling_mask(cpu)); 646 cpumask_set_cpu(base + i, cpu_sibling_mask(cpu));
@@ -667,6 +665,10 @@ __cpuinit void start_secondary(void *unused)
667 } 665 }
668 of_node_put(l2_cache); 666 of_node_put(l2_cache);
669 667
668 smp_wmb();
669 notify_cpu_starting(cpu);
670 set_cpu_online(cpu, true);
671
670 local_irq_enable(); 672 local_irq_enable();
671 673
672 cpu_startup_entry(CPUHP_ONLINE); 674 cpu_startup_entry(CPUHP_ONLINE);
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index e68a84568b8b..27a90b99ef67 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -341,7 +341,7 @@ static struct device_attribute pa6t_attrs[] = {
341#endif /* HAS_PPC_PMC_PA6T */ 341#endif /* HAS_PPC_PMC_PA6T */
342#endif /* HAS_PPC_PMC_CLASSIC */ 342#endif /* HAS_PPC_PMC_CLASSIC */
343 343
344static void __cpuinit register_cpu_online(unsigned int cpu) 344static void register_cpu_online(unsigned int cpu)
345{ 345{
346 struct cpu *c = &per_cpu(cpu_devices, cpu); 346 struct cpu *c = &per_cpu(cpu_devices, cpu);
347 struct device *s = &c->dev; 347 struct device *s = &c->dev;
@@ -502,7 +502,7 @@ ssize_t arch_cpu_release(const char *buf, size_t count)
502 502
503#endif /* CONFIG_HOTPLUG_CPU */ 503#endif /* CONFIG_HOTPLUG_CPU */
504 504
505static int __cpuinit sysfs_cpu_notify(struct notifier_block *self, 505static int sysfs_cpu_notify(struct notifier_block *self,
506 unsigned long action, void *hcpu) 506 unsigned long action, void *hcpu)
507{ 507{
508 unsigned int cpu = (unsigned int)(long)hcpu; 508 unsigned int cpu = (unsigned int)(long)hcpu;
@@ -522,7 +522,7 @@ static int __cpuinit sysfs_cpu_notify(struct notifier_block *self,
522 return NOTIFY_OK; 522 return NOTIFY_OK;
523} 523}
524 524
525static struct notifier_block __cpuinitdata sysfs_cpu_nb = { 525static struct notifier_block sysfs_cpu_nb = {
526 .notifier_call = sysfs_cpu_notify, 526 .notifier_call = sysfs_cpu_notify,
527}; 527};
528 528
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 5fc29ad7e26f..65ab9e909377 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -631,7 +631,6 @@ static int __init get_freq(char *name, int cells, unsigned long *val)
631 return found; 631 return found;
632} 632}
633 633
634/* should become __cpuinit when secondary_cpu_time_init also is */
635void start_cpu_decrementer(void) 634void start_cpu_decrementer(void)
636{ 635{
637#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) 636#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 2da67e7a16d5..0554d1f6d70d 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -112,9 +112,18 @@ _GLOBAL(tm_reclaim)
112 std r3, STACK_PARAM(0)(r1) 112 std r3, STACK_PARAM(0)(r1)
113 SAVE_NVGPRS(r1) 113 SAVE_NVGPRS(r1)
114 114
115 /* We need to setup MSR for VSX register save instructions. Here we
116 * also clear the MSR RI since when we do the treclaim, we won't have a
117 * valid kernel pointer for a while. We clear RI here as it avoids
118 * adding another mtmsr closer to the treclaim. This makes the region
119 * maked as non-recoverable wider than it needs to be but it saves on
120 * inserting another mtmsrd later.
121 */
115 mfmsr r14 122 mfmsr r14
116 mr r15, r14 123 mr r15, r14
117 ori r15, r15, MSR_FP 124 ori r15, r15, MSR_FP
125 li r16, MSR_RI
126 andc r15, r15, r16
118 oris r15, r15, MSR_VEC@h 127 oris r15, r15, MSR_VEC@h
119#ifdef CONFIG_VSX 128#ifdef CONFIG_VSX
120 BEGIN_FTR_SECTION 129 BEGIN_FTR_SECTION
@@ -224,6 +233,16 @@ dont_backup_fp:
224 std r5, _CCR(r7) 233 std r5, _CCR(r7)
225 std r6, _XER(r7) 234 std r6, _XER(r7)
226 235
236
237 /* ******************** TAR, PPR, DSCR ********** */
238 mfspr r3, SPRN_TAR
239 mfspr r4, SPRN_PPR
240 mfspr r5, SPRN_DSCR
241
242 std r3, THREAD_TM_TAR(r12)
243 std r4, THREAD_TM_PPR(r12)
244 std r5, THREAD_TM_DSCR(r12)
245
227 /* MSR and flags: We don't change CRs, and we don't need to alter 246 /* MSR and flags: We don't change CRs, and we don't need to alter
228 * MSR. 247 * MSR.
229 */ 248 */
@@ -338,6 +357,16 @@ dont_restore_fp:
338 mtmsr r6 /* FP/Vec off again! */ 357 mtmsr r6 /* FP/Vec off again! */
339 358
340restore_gprs: 359restore_gprs:
360
361 /* ******************** TAR, PPR, DSCR ********** */
362 ld r4, THREAD_TM_TAR(r3)
363 ld r5, THREAD_TM_PPR(r3)
364 ld r6, THREAD_TM_DSCR(r3)
365
366 mtspr SPRN_TAR, r4
367 mtspr SPRN_PPR, r5
368 mtspr SPRN_DSCR, r6
369
341 /* ******************** CR,LR,CCR,MSR ********** */ 370 /* ******************** CR,LR,CCR,MSR ********** */
342 ld r3, _CTR(r7) 371 ld r3, _CTR(r7)
343 ld r4, _LINK(r7) 372 ld r4, _LINK(r7)
@@ -349,9 +378,10 @@ restore_gprs:
349 mtcr r5 378 mtcr r5
350 mtxer r6 379 mtxer r6
351 380
352 /* MSR and flags: We don't change CRs, and we don't need to alter 381 /* Clear the MSR RI since we are about to change R1. EE is already off
353 * MSR.
354 */ 382 */
383 li r4, 0
384 mtmsrd r4, 1
355 385
356 REST_4GPRS(0, r7) /* GPR0-3 */ 386 REST_4GPRS(0, r7) /* GPR0-3 */
357 REST_GPR(4, r7) /* GPR4-6 */ 387 REST_GPR(4, r7) /* GPR4-6 */
@@ -377,6 +407,10 @@ restore_gprs:
377 GET_PACA(r13) 407 GET_PACA(r13)
378 GET_SCRATCH0(r1) 408 GET_SCRATCH0(r1)
379 409
410 /* R1 is restored, so we are recoverable again. EE is still off */
411 li r4, MSR_RI
412 mtmsrd r4, 1
413
380 REST_NVGPRS(r1) 414 REST_NVGPRS(r1)
381 415
382 addi r1, r1, TM_FRAME_SIZE 416 addi r1, r1, TM_FRAME_SIZE
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index c0e5caf8ccc7..e435bc089ea3 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -44,9 +44,7 @@
44#include <asm/machdep.h> 44#include <asm/machdep.h>
45#include <asm/rtas.h> 45#include <asm/rtas.h>
46#include <asm/pmc.h> 46#include <asm/pmc.h>
47#ifdef CONFIG_PPC32
48#include <asm/reg.h> 47#include <asm/reg.h>
49#endif
50#ifdef CONFIG_PMAC_BACKLIGHT 48#ifdef CONFIG_PMAC_BACKLIGHT
51#include <asm/backlight.h> 49#include <asm/backlight.h>
52#endif 50#endif
@@ -866,6 +864,10 @@ static int emulate_string_inst(struct pt_regs *regs, u32 instword)
866 u8 val; 864 u8 val;
867 u32 shift = 8 * (3 - (pos & 0x3)); 865 u32 shift = 8 * (3 - (pos & 0x3));
868 866
867 /* if process is 32-bit, clear upper 32 bits of EA */
868 if ((regs->msr & MSR_64BIT) == 0)
869 EA &= 0xFFFFFFFF;
870
869 switch ((instword & PPC_INST_STRING_MASK)) { 871 switch ((instword & PPC_INST_STRING_MASK)) {
870 case PPC_INST_LSWX: 872 case PPC_INST_LSWX:
871 case PPC_INST_LSWI: 873 case PPC_INST_LSWI:
@@ -1125,7 +1127,17 @@ void __kprobes program_check_exception(struct pt_regs *regs)
1125 * ESR_DST (!?) or 0. In the process of chasing this with the 1127 * ESR_DST (!?) or 0. In the process of chasing this with the
1126 * hardware people - not sure if it can happen on any illegal 1128 * hardware people - not sure if it can happen on any illegal
1127 * instruction or only on FP instructions, whether there is a 1129 * instruction or only on FP instructions, whether there is a
1128 * pattern to occurrences etc. -dgibson 31/Mar/2003 */ 1130 * pattern to occurrences etc. -dgibson 31/Mar/2003
1131 */
1132
1133 /*
1134 * If we support a HW FPU, we need to ensure the FP state
1135 * if flushed into the thread_struct before attempting
1136 * emulation
1137 */
1138#ifdef CONFIG_PPC_FPU
1139 flush_fp_to_thread(current);
1140#endif
1129 switch (do_mathemu(regs)) { 1141 switch (do_mathemu(regs)) {
1130 case 0: 1142 case 0:
1131 emulate_single_step(regs); 1143 emulate_single_step(regs);
@@ -1282,26 +1294,63 @@ void vsx_unavailable_exception(struct pt_regs *regs)
1282 die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT); 1294 die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT);
1283} 1295}
1284 1296
1285void tm_unavailable_exception(struct pt_regs *regs) 1297#ifdef CONFIG_PPC64
1298void facility_unavailable_exception(struct pt_regs *regs)
1286{ 1299{
1300 static char *facility_strings[] = {
1301 [FSCR_FP_LG] = "FPU",
1302 [FSCR_VECVSX_LG] = "VMX/VSX",
1303 [FSCR_DSCR_LG] = "DSCR",
1304 [FSCR_PM_LG] = "PMU SPRs",
1305 [FSCR_BHRB_LG] = "BHRB",
1306 [FSCR_TM_LG] = "TM",
1307 [FSCR_EBB_LG] = "EBB",
1308 [FSCR_TAR_LG] = "TAR",
1309 };
1310 char *facility = "unknown";
1311 u64 value;
1312 u8 status;
1313 bool hv;
1314
1315 hv = (regs->trap == 0xf80);
1316 if (hv)
1317 value = mfspr(SPRN_HFSCR);
1318 else
1319 value = mfspr(SPRN_FSCR);
1320
1321 status = value >> 56;
1322 if (status == FSCR_DSCR_LG) {
1323 /* User is acessing the DSCR. Set the inherit bit and allow
1324 * the user to set it directly in future by setting via the
1325 * H/FSCR DSCR bit.
1326 */
1327 current->thread.dscr_inherit = 1;
1328 if (hv)
1329 mtspr(SPRN_HFSCR, value | HFSCR_DSCR);
1330 else
1331 mtspr(SPRN_FSCR, value | FSCR_DSCR);
1332 return;
1333 }
1334
1335 if ((status < ARRAY_SIZE(facility_strings)) &&
1336 facility_strings[status])
1337 facility = facility_strings[status];
1338
1287 /* We restore the interrupt state now */ 1339 /* We restore the interrupt state now */
1288 if (!arch_irq_disabled_regs(regs)) 1340 if (!arch_irq_disabled_regs(regs))
1289 local_irq_enable(); 1341 local_irq_enable();
1290 1342
1291 /* Currently we never expect a TMU exception. Catch 1343 pr_err("%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n",
1292 * this and kill the process! 1344 hv ? "Hypervisor " : "", facility, regs->nip, regs->msr);
1293 */
1294 printk(KERN_EMERG "Unexpected TM unavailable exception at %lx "
1295 "(msr %lx)\n",
1296 regs->nip, regs->msr);
1297 1345
1298 if (user_mode(regs)) { 1346 if (user_mode(regs)) {
1299 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); 1347 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1300 return; 1348 return;
1301 } 1349 }
1302 1350
1303 die("Unexpected TM unavailable exception", regs, SIGABRT); 1351 die("Unexpected facility unavailable exception", regs, SIGABRT);
1304} 1352}
1353#endif
1305 1354
1306#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 1355#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1307 1356
@@ -1396,8 +1445,7 @@ void performance_monitor_exception(struct pt_regs *regs)
1396void SoftwareEmulation(struct pt_regs *regs) 1445void SoftwareEmulation(struct pt_regs *regs)
1397{ 1446{
1398 extern int do_mathemu(struct pt_regs *); 1447 extern int do_mathemu(struct pt_regs *);
1399 extern int Soft_emulate_8xx(struct pt_regs *); 1448#if defined(CONFIG_MATH_EMULATION)
1400#if defined(CONFIG_MATH_EMULATION) || defined(CONFIG_8XX_MINIMAL_FPEMU)
1401 int errcode; 1449 int errcode;
1402#endif 1450#endif
1403 1451
@@ -1430,23 +1478,6 @@ void SoftwareEmulation(struct pt_regs *regs)
1430 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); 1478 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1431 return; 1479 return;
1432 } 1480 }
1433
1434#elif defined(CONFIG_8XX_MINIMAL_FPEMU)
1435 errcode = Soft_emulate_8xx(regs);
1436 if (errcode >= 0)
1437 PPC_WARN_EMULATED(8xx, regs);
1438
1439 switch (errcode) {
1440 case 0:
1441 emulate_single_step(regs);
1442 return;
1443 case 1:
1444 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1445 return;
1446 case -EFAULT:
1447 _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
1448 return;
1449 }
1450#else 1481#else
1451 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); 1482 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1452#endif 1483#endif
@@ -1796,8 +1827,6 @@ struct ppc_emulated ppc_emulated = {
1796 WARN_EMULATED_SETUP(unaligned), 1827 WARN_EMULATED_SETUP(unaligned),
1797#ifdef CONFIG_MATH_EMULATION 1828#ifdef CONFIG_MATH_EMULATION
1798 WARN_EMULATED_SETUP(math), 1829 WARN_EMULATED_SETUP(math),
1799#elif defined(CONFIG_8XX_MINIMAL_FPEMU)
1800 WARN_EMULATED_SETUP(8xx),
1801#endif 1830#endif
1802#ifdef CONFIG_VSX 1831#ifdef CONFIG_VSX
1803 WARN_EMULATED_SETUP(vsx), 1832 WARN_EMULATED_SETUP(vsx),
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index 9d3fdcd66290..a15837519dca 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -50,7 +50,7 @@ void __init udbg_early_init(void)
50 udbg_init_debug_beat(); 50 udbg_init_debug_beat();
51#elif defined(CONFIG_PPC_EARLY_DEBUG_PAS_REALMODE) 51#elif defined(CONFIG_PPC_EARLY_DEBUG_PAS_REALMODE)
52 udbg_init_pas_realmode(); 52 udbg_init_pas_realmode();
53#elif defined(CONFIG_BOOTX_TEXT) 53#elif defined(CONFIG_PPC_EARLY_DEBUG_BOOTX)
54 udbg_init_btext(); 54 udbg_init_btext();
55#elif defined(CONFIG_PPC_EARLY_DEBUG_44x) 55#elif defined(CONFIG_PPC_EARLY_DEBUG_44x)
56 /* PPC44x debug */ 56 /* PPC44x debug */
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index d4f463ac65b1..1d9c92621b36 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -711,7 +711,7 @@ static void __init vdso_setup_syscall_map(void)
711} 711}
712 712
713#ifdef CONFIG_PPC64 713#ifdef CONFIG_PPC64
714int __cpuinit vdso_getcpu_init(void) 714int vdso_getcpu_init(void)
715{ 715{
716 unsigned long cpu, node, val; 716 unsigned long cpu, node, val;
717 717
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 654e479802f2..f096e72262f4 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -38,9 +38,6 @@ jiffies = jiffies_64 + 4;
38#endif 38#endif
39SECTIONS 39SECTIONS
40{ 40{
41 . = 0;
42 reloc_start = .;
43
44 . = KERNELBASE; 41 . = KERNELBASE;
45 42
46/* 43/*