aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2012-03-26 11:18:44 -0400
committerIngo Molnar <mingo@kernel.org>2012-03-26 11:19:03 -0400
commit7fd52392c56361a40f0c630a82b36b95ca31eac6 (patch)
tree14091de24c6b28ea4cae9826f98aeedb7be091f5 /arch/powerpc/kernel
parentb01c3a0010aabadf745f3e7fdb9cab682e0a28a2 (diff)
parente22057c8599373e5caef0bc42bdb95d2a361ab0d (diff)
Merge branch 'linus' into perf/urgent
Merge reason: we need to fix a non-trivial merge conflict. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile10
-rw-r--r--arch/powerpc/kernel/asm-offsets.c16
-rw-r--r--arch/powerpc/kernel/cputable.c20
-rw-r--r--arch/powerpc/kernel/dbell.c2
-rw-r--r--arch/powerpc/kernel/e500-pmu.c134
-rw-r--r--arch/powerpc/kernel/entry_64.S250
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S236
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S314
-rw-r--r--arch/powerpc/kernel/fadump.c1315
-rw-r--r--arch/powerpc/kernel/head_32.S4
-rw-r--r--arch/powerpc/kernel/head_40x.S4
-rw-r--r--arch/powerpc/kernel/head_64.S62
-rw-r--r--arch/powerpc/kernel/head_8xx.S4
-rw-r--r--arch/powerpc/kernel/head_booke.h4
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S2
-rw-r--r--arch/powerpc/kernel/ibmebus.c2
-rw-r--r--arch/powerpc/kernel/idle.c6
-rw-r--r--arch/powerpc/kernel/idle_book3e.S25
-rw-r--r--arch/powerpc/kernel/idle_power4.S24
-rw-r--r--arch/powerpc/kernel/idle_power7.S23
-rw-r--r--arch/powerpc/kernel/iommu.c8
-rw-r--r--arch/powerpc/kernel/irq.c829
-rw-r--r--arch/powerpc/kernel/isa-bridge.c3
-rw-r--r--arch/powerpc/kernel/lparcfg.c108
-rw-r--r--arch/powerpc/kernel/misc.S1
-rw-r--r--arch/powerpc/kernel/mpc7450-pmu.c422
-rw-r--r--arch/powerpc/kernel/of_platform.c6
-rw-r--r--arch/powerpc/kernel/paca.c12
-rw-r--r--arch/powerpc/kernel/pci-common.c101
-rw-r--r--arch/powerpc/kernel/pci_32.c6
-rw-r--r--arch/powerpc/kernel/pci_64.c7
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c12
-rw-r--r--arch/powerpc/kernel/perf_callchain.c492
-rw-r--r--arch/powerpc/kernel/perf_event.c1448
-rw-r--r--arch/powerpc/kernel/perf_event_fsl_emb.c688
-rw-r--r--arch/powerpc/kernel/pmc.c1
-rw-r--r--arch/powerpc/kernel/power4-pmu.c621
-rw-r--r--arch/powerpc/kernel/power5+-pmu.c690
-rw-r--r--arch/powerpc/kernel/power5-pmu.c629
-rw-r--r--arch/powerpc/kernel/power6-pmu.c552
-rw-r--r--arch/powerpc/kernel/power7-pmu.c379
-rw-r--r--arch/powerpc/kernel/ppc970-pmu.c502
-rw-r--r--arch/powerpc/kernel/process.c27
-rw-r--r--arch/powerpc/kernel/prom.c98
-rw-r--r--arch/powerpc/kernel/prom_init.c15
-rw-r--r--arch/powerpc/kernel/rtas_pci.c13
-rw-r--r--arch/powerpc/kernel/setup-common.c14
-rw-r--r--arch/powerpc/kernel/signal.c13
-rw-r--r--arch/powerpc/kernel/signal_32.c11
-rw-r--r--arch/powerpc/kernel/sysfs.c7
-rw-r--r--arch/powerpc/kernel/time.c116
-rw-r--r--arch/powerpc/kernel/traps.c6
-rw-r--r--arch/powerpc/kernel/vdso.c10
-rw-r--r--arch/powerpc/kernel/vio.c18
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S5
55 files changed, 2076 insertions, 8251 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index ee728e433aa2..f5808a35688c 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -60,6 +60,7 @@ obj-$(CONFIG_IBMVIO) += vio.o
60obj-$(CONFIG_IBMEBUS) += ibmebus.o 60obj-$(CONFIG_IBMEBUS) += ibmebus.o
61obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o 61obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
62obj-$(CONFIG_CRASH_DUMP) += crash_dump.o 62obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
63obj-$(CONFIG_FA_DUMP) += fadump.o
63ifeq ($(CONFIG_PPC32),y) 64ifeq ($(CONFIG_PPC32),y)
64obj-$(CONFIG_E500) += idle_e500.o 65obj-$(CONFIG_E500) += idle_e500.o
65endif 66endif
@@ -113,15 +114,6 @@ obj-$(CONFIG_PPC_IO_WORKAROUNDS) += io-workarounds.o
113obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 114obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
114obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o 115obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
115obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o 116obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
116obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o
117
118obj-$(CONFIG_PPC_PERF_CTRS) += perf_event.o
119obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \
120 power5+-pmu.o power6-pmu.o power7-pmu.o
121obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o
122
123obj-$(CONFIG_FSL_EMB_PERF_EVENT) += perf_event_fsl_emb.o
124obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o
125 117
126obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o 118obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
127 119
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 04caee7d9bc1..cc492e48ddfa 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -46,9 +46,6 @@
46#include <asm/hvcall.h> 46#include <asm/hvcall.h>
47#include <asm/xics.h> 47#include <asm/xics.h>
48#endif 48#endif
49#ifdef CONFIG_PPC_ISERIES
50#include <asm/iseries/alpaca.h>
51#endif
52#ifdef CONFIG_PPC_POWERNV 49#ifdef CONFIG_PPC_POWERNV
53#include <asm/opal.h> 50#include <asm/opal.h>
54#endif 51#endif
@@ -147,7 +144,7 @@ int main(void)
147 DEFINE(PACAKBASE, offsetof(struct paca_struct, kernelbase)); 144 DEFINE(PACAKBASE, offsetof(struct paca_struct, kernelbase));
148 DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); 145 DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
149 DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); 146 DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
150 DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); 147 DEFINE(PACAIRQHAPPENED, offsetof(struct paca_struct, irq_happened));
151 DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); 148 DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
152#ifdef CONFIG_PPC_MM_SLICES 149#ifdef CONFIG_PPC_MM_SLICES
153 DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, 150 DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
@@ -384,17 +381,6 @@ int main(void)
384 DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); 381 DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
385#endif 382#endif
386 383
387#ifdef CONFIG_PPC_ISERIES
388 /* the assembler miscalculates the VSID values */
389 DEFINE(PAGE_OFFSET_ESID, GET_ESID(PAGE_OFFSET));
390 DEFINE(PAGE_OFFSET_VSID, KERNEL_VSID(PAGE_OFFSET));
391 DEFINE(VMALLOC_START_ESID, GET_ESID(VMALLOC_START));
392 DEFINE(VMALLOC_START_VSID, KERNEL_VSID(VMALLOC_START));
393
394 /* alpaca */
395 DEFINE(ALPACA_SIZE, sizeof(struct alpaca));
396#endif
397
398 DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE); 384 DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE);
399 DEFINE(PTE_SIZE, sizeof(pte_t)); 385 DEFINE(PTE_SIZE, sizeof(pte_t));
400 386
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 81db9e2a8a20..138ae183c440 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1816,7 +1816,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
1816 .platform = "ppc440", 1816 .platform = "ppc440",
1817 }, 1817 },
1818 { /* 464 in APM821xx */ 1818 { /* 464 in APM821xx */
1819 .pvr_mask = 0xffffff00, 1819 .pvr_mask = 0xfffffff0,
1820 .pvr_value = 0x12C41C80, 1820 .pvr_value = 0x12C41C80,
1821 .cpu_name = "APM821XX", 1821 .cpu_name = "APM821XX",
1822 .cpu_features = CPU_FTRS_44X, 1822 .cpu_features = CPU_FTRS_44X,
@@ -2019,6 +2019,24 @@ static struct cpu_spec __initdata cpu_specs[] = {
2019 .machine_check = machine_check_e500mc, 2019 .machine_check = machine_check_e500mc,
2020 .platform = "ppce5500", 2020 .platform = "ppce5500",
2021 }, 2021 },
2022 { /* e6500 */
2023 .pvr_mask = 0xffff0000,
2024 .pvr_value = 0x80400000,
2025 .cpu_name = "e6500",
2026 .cpu_features = CPU_FTRS_E6500,
2027 .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
2028 .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
2029 MMU_FTR_USE_TLBILX,
2030 .icache_bsize = 64,
2031 .dcache_bsize = 64,
2032 .num_pmcs = 4,
2033 .oprofile_cpu_type = "ppc/e6500",
2034 .oprofile_type = PPC_OPROFILE_FSL_EMB,
2035 .cpu_setup = __setup_cpu_e5500,
2036 .cpu_restore = __restore_cpu_e5500,
2037 .machine_check = machine_check_e500mc,
2038 .platform = "ppce6500",
2039 },
2022#ifdef CONFIG_PPC32 2040#ifdef CONFIG_PPC32
2023 { /* default match */ 2041 { /* default match */
2024 .pvr_mask = 0x00000000, 2042 .pvr_mask = 0x00000000,
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index 2cc451aaaca7..5b25c8060fd6 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -37,6 +37,8 @@ void doorbell_exception(struct pt_regs *regs)
37 37
38 irq_enter(); 38 irq_enter();
39 39
40 may_hard_irq_enable();
41
40 smp_ipi_demux(); 42 smp_ipi_demux();
41 43
42 irq_exit(); 44 irq_exit();
diff --git a/arch/powerpc/kernel/e500-pmu.c b/arch/powerpc/kernel/e500-pmu.c
deleted file mode 100644
index cb2e2949c8d1..000000000000
--- a/arch/powerpc/kernel/e500-pmu.c
+++ /dev/null
@@ -1,134 +0,0 @@
1/*
2 * Performance counter support for e500 family processors.
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 * Copyright 2010 Freescale Semiconductor, Inc.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12#include <linux/string.h>
13#include <linux/perf_event.h>
14#include <asm/reg.h>
15#include <asm/cputable.h>
16
17/*
18 * Map of generic hardware event types to hardware events
19 * Zero if unsupported
20 */
21static int e500_generic_events[] = {
22 [PERF_COUNT_HW_CPU_CYCLES] = 1,
23 [PERF_COUNT_HW_INSTRUCTIONS] = 2,
24 [PERF_COUNT_HW_CACHE_MISSES] = 41, /* Data L1 cache reloads */
25 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 12,
26 [PERF_COUNT_HW_BRANCH_MISSES] = 15,
27};
28
29#define C(x) PERF_COUNT_HW_CACHE_##x
30
31/*
32 * Table of generalized cache-related events.
33 * 0 means not supported, -1 means nonsensical, other values
34 * are event codes.
35 */
36static int e500_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
37 /*
38 * D-cache misses are not split into read/write/prefetch;
39 * use raw event 41.
40 */
41 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
42 [C(OP_READ)] = { 27, 0 },
43 [C(OP_WRITE)] = { 28, 0 },
44 [C(OP_PREFETCH)] = { 29, 0 },
45 },
46 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
47 [C(OP_READ)] = { 2, 60 },
48 [C(OP_WRITE)] = { -1, -1 },
49 [C(OP_PREFETCH)] = { 0, 0 },
50 },
51 /*
52 * Assuming LL means L2, it's not a good match for this model.
53 * It allocates only on L1 castout or explicit prefetch, and
54 * does not have separate read/write events (but it does have
55 * separate instruction/data events).
56 */
57 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
58 [C(OP_READ)] = { 0, 0 },
59 [C(OP_WRITE)] = { 0, 0 },
60 [C(OP_PREFETCH)] = { 0, 0 },
61 },
62 /*
63 * There are data/instruction MMU misses, but that's a miss on
64 * the chip's internal level-one TLB which is probably not
65 * what the user wants. Instead, unified level-two TLB misses
66 * are reported here.
67 */
68 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
69 [C(OP_READ)] = { 26, 66 },
70 [C(OP_WRITE)] = { -1, -1 },
71 [C(OP_PREFETCH)] = { -1, -1 },
72 },
73 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
74 [C(OP_READ)] = { 12, 15 },
75 [C(OP_WRITE)] = { -1, -1 },
76 [C(OP_PREFETCH)] = { -1, -1 },
77 },
78 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
79 [C(OP_READ)] = { -1, -1 },
80 [C(OP_WRITE)] = { -1, -1 },
81 [C(OP_PREFETCH)] = { -1, -1 },
82 },
83};
84
85static int num_events = 128;
86
87/* Upper half of event id is PMLCb, for threshold events */
88static u64 e500_xlate_event(u64 event_id)
89{
90 u32 event_low = (u32)event_id;
91 u64 ret;
92
93 if (event_low >= num_events)
94 return 0;
95
96 ret = FSL_EMB_EVENT_VALID;
97
98 if (event_low >= 76 && event_low <= 81) {
99 ret |= FSL_EMB_EVENT_RESTRICTED;
100 ret |= event_id &
101 (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH);
102 } else if (event_id &
103 (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH)) {
104 /* Threshold requested on non-threshold event */
105 return 0;
106 }
107
108 return ret;
109}
110
111static struct fsl_emb_pmu e500_pmu = {
112 .name = "e500 family",
113 .n_counter = 4,
114 .n_restricted = 2,
115 .xlate_event = e500_xlate_event,
116 .n_generic = ARRAY_SIZE(e500_generic_events),
117 .generic_events = e500_generic_events,
118 .cache_events = &e500_cache_events,
119};
120
121static int init_e500_pmu(void)
122{
123 if (!cur_cpu_spec->oprofile_cpu_type)
124 return -ENODEV;
125
126 if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500mc"))
127 num_events = 256;
128 else if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500"))
129 return -ENODEV;
130
131 return register_fsl_emb_pmu(&e500_pmu);
132}
133
134early_initcall(init_e500_pmu);
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 866462cbe2d8..f8a7a1a1a9f4 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -32,6 +32,7 @@
32#include <asm/ptrace.h> 32#include <asm/ptrace.h>
33#include <asm/irqflags.h> 33#include <asm/irqflags.h>
34#include <asm/ftrace.h> 34#include <asm/ftrace.h>
35#include <asm/hw_irq.h>
35 36
36/* 37/*
37 * System calls. 38 * System calls.
@@ -115,39 +116,33 @@ BEGIN_FW_FTR_SECTION
115END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) 116END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
116#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */ 117#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */
117 118
118#ifdef CONFIG_TRACE_IRQFLAGS 119 /*
119 bl .trace_hardirqs_on 120 * A syscall should always be called with interrupts enabled
120 REST_GPR(0,r1) 121 * so we just unconditionally hard-enable here. When some kind
121 REST_4GPRS(3,r1) 122 * of irq tracing is used, we additionally check that condition
122 REST_2GPRS(7,r1) 123 * is correct
123 addi r9,r1,STACK_FRAME_OVERHEAD 124 */
124 ld r12,_MSR(r1) 125#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG)
125#endif /* CONFIG_TRACE_IRQFLAGS */ 126 lbz r10,PACASOFTIRQEN(r13)
126 li r10,1 127 xori r10,r10,1
127 stb r10,PACASOFTIRQEN(r13) 1281: tdnei r10,0
128 stb r10,PACAHARDIRQEN(r13) 129 EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
129 std r10,SOFTE(r1) 130#endif
130#ifdef CONFIG_PPC_ISERIES
131BEGIN_FW_FTR_SECTION
132 /* Hack for handling interrupts when soft-enabling on iSeries */
133 cmpdi cr1,r0,0x5555 /* syscall 0x5555 */
134 andi. r10,r12,MSR_PR /* from kernel */
135 crand 4*cr0+eq,4*cr1+eq,4*cr0+eq
136 bne 2f
137 b hardware_interrupt_entry
1382:
139END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
140#endif /* CONFIG_PPC_ISERIES */
141 131
142 /* Hard enable interrupts */
143#ifdef CONFIG_PPC_BOOK3E 132#ifdef CONFIG_PPC_BOOK3E
144 wrteei 1 133 wrteei 1
145#else 134#else
146 mfmsr r11 135 ld r11,PACAKMSR(r13)
147 ori r11,r11,MSR_EE 136 ori r11,r11,MSR_EE
148 mtmsrd r11,1 137 mtmsrd r11,1
149#endif /* CONFIG_PPC_BOOK3E */ 138#endif /* CONFIG_PPC_BOOK3E */
150 139
140 /* We do need to set SOFTE in the stack frame or the return
141 * from interrupt will be painful
142 */
143 li r10,1
144 std r10,SOFTE(r1)
145
151#ifdef SHOW_SYSCALLS 146#ifdef SHOW_SYSCALLS
152 bl .do_show_syscall 147 bl .do_show_syscall
153 REST_GPR(0,r1) 148 REST_GPR(0,r1)
@@ -198,16 +193,14 @@ syscall_exit:
198 andi. r10,r8,MSR_RI 193 andi. r10,r8,MSR_RI
199 beq- unrecov_restore 194 beq- unrecov_restore
200#endif 195#endif
201 196 /*
202 /* Disable interrupts so current_thread_info()->flags can't change, 197 * Disable interrupts so current_thread_info()->flags can't change,
203 * and so that we don't get interrupted after loading SRR0/1. 198 * and so that we don't get interrupted after loading SRR0/1.
204 */ 199 */
205#ifdef CONFIG_PPC_BOOK3E 200#ifdef CONFIG_PPC_BOOK3E
206 wrteei 0 201 wrteei 0
207#else 202#else
208 mfmsr r10 203 ld r10,PACAKMSR(r13)
209 rldicl r10,r10,48,1
210 rotldi r10,r10,16
211 mtmsrd r10,1 204 mtmsrd r10,1
212#endif /* CONFIG_PPC_BOOK3E */ 205#endif /* CONFIG_PPC_BOOK3E */
213 206
@@ -319,7 +312,7 @@ syscall_exit_work:
319#ifdef CONFIG_PPC_BOOK3E 312#ifdef CONFIG_PPC_BOOK3E
320 wrteei 1 313 wrteei 1
321#else 314#else
322 mfmsr r10 315 ld r10,PACAKMSR(r13)
323 ori r10,r10,MSR_EE 316 ori r10,r10,MSR_EE
324 mtmsrd r10,1 317 mtmsrd r10,1
325#endif /* CONFIG_PPC_BOOK3E */ 318#endif /* CONFIG_PPC_BOOK3E */
@@ -565,10 +558,8 @@ _GLOBAL(ret_from_except_lite)
565#ifdef CONFIG_PPC_BOOK3E 558#ifdef CONFIG_PPC_BOOK3E
566 wrteei 0 559 wrteei 0
567#else 560#else
568 mfmsr r10 /* Get current interrupt state */ 561 ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */
569 rldicl r9,r10,48,1 /* clear MSR_EE */ 562 mtmsrd r10,1 /* Update machine state */
570 rotldi r9,r9,16
571 mtmsrd r9,1 /* Update machine state */
572#endif /* CONFIG_PPC_BOOK3E */ 563#endif /* CONFIG_PPC_BOOK3E */
573 564
574#ifdef CONFIG_PREEMPT 565#ifdef CONFIG_PREEMPT
@@ -591,25 +582,74 @@ _GLOBAL(ret_from_except_lite)
591 ld r4,TI_FLAGS(r9) 582 ld r4,TI_FLAGS(r9)
592 andi. r0,r4,_TIF_USER_WORK_MASK 583 andi. r0,r4,_TIF_USER_WORK_MASK
593 bne do_work 584 bne do_work
594#endif 585#endif /* !CONFIG_PREEMPT */
595 586
587 .globl fast_exc_return_irq
588fast_exc_return_irq:
596restore: 589restore:
597BEGIN_FW_FTR_SECTION 590 /*
591 * This is the main kernel exit path, we first check if we
592 * have to change our interrupt state.
593 */
598 ld r5,SOFTE(r1) 594 ld r5,SOFTE(r1)
599FW_FTR_SECTION_ELSE 595 lbz r6,PACASOFTIRQEN(r13)
600 b .Liseries_check_pending_irqs 596 cmpwi cr1,r5,0
601ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) 597 cmpw cr0,r5,r6
6022: 598 beq cr0,4f
603 TRACE_AND_RESTORE_IRQ(r5); 599
600 /* We do, handle disable first, which is easy */
601 bne cr1,3f;
602 li r0,0
603 stb r0,PACASOFTIRQEN(r13);
604 TRACE_DISABLE_INTS
605 b 4f
604 606
605 /* extract EE bit and use it to restore paca->hard_enabled */ 6073: /*
606 ld r3,_MSR(r1) 608 * We are about to soft-enable interrupts (we are hard disabled
607 rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ 609 * at this point). We check if there's anything that needs to
608 stb r4,PACAHARDIRQEN(r13) 610 * be replayed first.
611 */
612 lbz r0,PACAIRQHAPPENED(r13)
613 cmpwi cr0,r0,0
614 bne- restore_check_irq_replay
609 615
616 /*
617 * Get here when nothing happened while soft-disabled, just
618 * soft-enable and move-on. We will hard-enable as a side
619 * effect of rfi
620 */
621restore_no_replay:
622 TRACE_ENABLE_INTS
623 li r0,1
624 stb r0,PACASOFTIRQEN(r13);
625
626 /*
627 * Final return path. BookE is handled in a different file
628 */
6294:
610#ifdef CONFIG_PPC_BOOK3E 630#ifdef CONFIG_PPC_BOOK3E
611 b .exception_return_book3e 631 b .exception_return_book3e
612#else 632#else
633 /*
634 * Clear the reservation. If we know the CPU tracks the address of
635 * the reservation then we can potentially save some cycles and use
636 * a larx. On POWER6 and POWER7 this is significantly faster.
637 */
638BEGIN_FTR_SECTION
639 stdcx. r0,0,r1 /* to clear the reservation */
640FTR_SECTION_ELSE
641 ldarx r4,0,r1
642ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
643
644 /*
645 * Some code path such as load_up_fpu or altivec return directly
646 * here. They run entirely hard disabled and do not alter the
647 * interrupt state. They also don't use lwarx/stwcx. and thus
648 * are known not to leave dangling reservations.
649 */
650 .globl fast_exception_return
651fast_exception_return:
652 ld r3,_MSR(r1)
613 ld r4,_CTR(r1) 653 ld r4,_CTR(r1)
614 ld r0,_LINK(r1) 654 ld r0,_LINK(r1)
615 mtctr r4 655 mtctr r4
@@ -623,28 +663,18 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
623 beq- unrecov_restore 663 beq- unrecov_restore
624 664
625 /* 665 /*
626 * Clear the reservation. If we know the CPU tracks the address of
627 * the reservation then we can potentially save some cycles and use
628 * a larx. On POWER6 and POWER7 this is significantly faster.
629 */
630BEGIN_FTR_SECTION
631 stdcx. r0,0,r1 /* to clear the reservation */
632FTR_SECTION_ELSE
633 ldarx r4,0,r1
634ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
635
636 /*
637 * Clear RI before restoring r13. If we are returning to 666 * Clear RI before restoring r13. If we are returning to
638 * userspace and we take an exception after restoring r13, 667 * userspace and we take an exception after restoring r13,
639 * we end up corrupting the userspace r13 value. 668 * we end up corrupting the userspace r13 value.
640 */ 669 */
641 mfmsr r4 670 ld r4,PACAKMSR(r13) /* Get kernel MSR without EE */
642 andc r4,r4,r0 /* r0 contains MSR_RI here */ 671 andc r4,r4,r0 /* r0 contains MSR_RI here */
643 mtmsrd r4,1 672 mtmsrd r4,1
644 673
645 /* 674 /*
646 * r13 is our per cpu area, only restore it if we are returning to 675 * r13 is our per cpu area, only restore it if we are returning to
647 * userspace 676 * userspace the value stored in the stack frame may belong to
677 * another CPU.
648 */ 678 */
649 andi. r0,r3,MSR_PR 679 andi. r0,r3,MSR_PR
650 beq 1f 680 beq 1f
@@ -669,30 +699,55 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
669 699
670#endif /* CONFIG_PPC_BOOK3E */ 700#endif /* CONFIG_PPC_BOOK3E */
671 701
672.Liseries_check_pending_irqs: 702 /*
673#ifdef CONFIG_PPC_ISERIES 703 * Something did happen, check if a re-emit is needed
674 ld r5,SOFTE(r1) 704 * (this also clears paca->irq_happened)
675 cmpdi 0,r5,0 705 */
676 beq 2b 706restore_check_irq_replay:
677 /* Check for pending interrupts (iSeries) */ 707 /* XXX: We could implement a fast path here where we check
678 ld r3,PACALPPACAPTR(r13) 708 * for irq_happened being just 0x01, in which case we can
679 ld r3,LPPACAANYINT(r3) 709 * clear it and return. That means that we would potentially
680 cmpdi r3,0 710 * miss a decrementer having wrapped all the way around.
681 beq+ 2b /* skip do_IRQ if no interrupts */ 711 *
682 712 * Still, this might be useful for things like hash_page
683 li r3,0 713 */
684 stb r3,PACASOFTIRQEN(r13) /* ensure we are soft-disabled */ 714 bl .__check_irq_replay
685#ifdef CONFIG_TRACE_IRQFLAGS 715 cmpwi cr0,r3,0
686 bl .trace_hardirqs_off 716 beq restore_no_replay
687 mfmsr r10 717
688#endif 718 /*
689 ori r10,r10,MSR_EE 719 * We need to re-emit an interrupt. We do so by re-using our
690 mtmsrd r10 /* hard-enable again */ 720 * existing exception frame. We first change the trap value,
691 addi r3,r1,STACK_FRAME_OVERHEAD 721 * but we need to ensure we preserve the low nibble of it
692 bl .do_IRQ 722 */
693 b .ret_from_except_lite /* loop back and handle more */ 723 ld r4,_TRAP(r1)
694#endif 724 clrldi r4,r4,60
725 or r4,r4,r3
726 std r4,_TRAP(r1)
695 727
728 /*
729 * Then find the right handler and call it. Interrupts are
730 * still soft-disabled and we keep them that way.
731 */
732 cmpwi cr0,r3,0x500
733 bne 1f
734 addi r3,r1,STACK_FRAME_OVERHEAD;
735 bl .do_IRQ
736 b .ret_from_except
7371: cmpwi cr0,r3,0x900
738 bne 1f
739 addi r3,r1,STACK_FRAME_OVERHEAD;
740 bl .timer_interrupt
741 b .ret_from_except
742#ifdef CONFIG_PPC_BOOK3E
7431: cmpwi cr0,r3,0x280
744 bne 1f
745 addi r3,r1,STACK_FRAME_OVERHEAD;
746 bl .doorbell_exception
747 b .ret_from_except
748#endif /* CONFIG_PPC_BOOK3E */
7491: b .ret_from_except /* What else to do here ? */
750
696do_work: 751do_work:
697#ifdef CONFIG_PREEMPT 752#ifdef CONFIG_PREEMPT
698 andi. r0,r3,MSR_PR /* Returning to user mode? */ 753 andi. r0,r3,MSR_PR /* Returning to user mode? */
@@ -705,31 +760,22 @@ do_work:
705 crandc eq,cr1*4+eq,eq 760 crandc eq,cr1*4+eq,eq
706 bne restore 761 bne restore
707 762
708 /* Here we are preempting the current task. 763 /*
709 * 764 * Here we are preempting the current task. We want to make
710 * Ensure interrupts are soft-disabled. We also properly mark 765 * sure we are soft-disabled first
711 * the PACA to reflect the fact that they are hard-disabled
712 * and trace the change
713 */ 766 */
714 li r0,0 767 SOFT_DISABLE_INTS(r3,r4)
715 stb r0,PACASOFTIRQEN(r13)
716 stb r0,PACAHARDIRQEN(r13)
717 TRACE_DISABLE_INTS
718
719 /* Call the scheduler with soft IRQs off */
7201: bl .preempt_schedule_irq 7681: bl .preempt_schedule_irq
721 769
722 /* Hard-disable interrupts again (and update PACA) */ 770 /* Hard-disable interrupts again (and update PACA) */
723#ifdef CONFIG_PPC_BOOK3E 771#ifdef CONFIG_PPC_BOOK3E
724 wrteei 0 772 wrteei 0
725#else 773#else
726 mfmsr r10 774 ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */
727 rldicl r10,r10,48,1
728 rotldi r10,r10,16
729 mtmsrd r10,1 775 mtmsrd r10,1
730#endif /* CONFIG_PPC_BOOK3E */ 776#endif /* CONFIG_PPC_BOOK3E */
731 li r0,0 777 li r0,PACA_IRQ_HARD_DIS
732 stb r0,PACAHARDIRQEN(r13) 778 stb r0,PACAIRQHAPPENED(r13)
733 779
734 /* Re-test flags and eventually loop */ 780 /* Re-test flags and eventually loop */
735 clrrdi r9,r1,THREAD_SHIFT 781 clrrdi r9,r1,THREAD_SHIFT
@@ -751,14 +797,12 @@ user_work:
751 797
752 andi. r0,r4,_TIF_NEED_RESCHED 798 andi. r0,r4,_TIF_NEED_RESCHED
753 beq 1f 799 beq 1f
754 li r5,1 800 bl .restore_interrupts
755 TRACE_AND_RESTORE_IRQ(r5);
756 bl .schedule 801 bl .schedule
757 b .ret_from_except_lite 802 b .ret_from_except_lite
758 803
7591: bl .save_nvgprs 8041: bl .save_nvgprs
760 li r5,1 805 bl .restore_interrupts
761 TRACE_AND_RESTORE_IRQ(r5);
762 addi r3,r1,STACK_FRAME_OVERHEAD 806 addi r3,r1,STACK_FRAME_OVERHEAD
763 bl .do_notify_resume 807 bl .do_notify_resume
764 b .ret_from_except 808 b .ret_from_except
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 429983c06f91..7215cc2495df 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -24,6 +24,7 @@
24#include <asm/ptrace.h> 24#include <asm/ptrace.h>
25#include <asm/ppc-opcode.h> 25#include <asm/ppc-opcode.h>
26#include <asm/mmu.h> 26#include <asm/mmu.h>
27#include <asm/hw_irq.h>
27 28
28/* XXX This will ultimately add space for a special exception save 29/* XXX This will ultimately add space for a special exception save
29 * structure used to save things like SRR0/SRR1, SPRGs, MAS, etc... 30 * structure used to save things like SRR0/SRR1, SPRGs, MAS, etc...
@@ -77,59 +78,55 @@
77#define SPRN_MC_SRR1 SPRN_MCSRR1 78#define SPRN_MC_SRR1 SPRN_MCSRR1
78 79
79#define NORMAL_EXCEPTION_PROLOG(n, addition) \ 80#define NORMAL_EXCEPTION_PROLOG(n, addition) \
80 EXCEPTION_PROLOG(n, GEN, addition##_GEN) 81 EXCEPTION_PROLOG(n, GEN, addition##_GEN(n))
81 82
82#define CRIT_EXCEPTION_PROLOG(n, addition) \ 83#define CRIT_EXCEPTION_PROLOG(n, addition) \
83 EXCEPTION_PROLOG(n, CRIT, addition##_CRIT) 84 EXCEPTION_PROLOG(n, CRIT, addition##_CRIT(n))
84 85
85#define DBG_EXCEPTION_PROLOG(n, addition) \ 86#define DBG_EXCEPTION_PROLOG(n, addition) \
86 EXCEPTION_PROLOG(n, DBG, addition##_DBG) 87 EXCEPTION_PROLOG(n, DBG, addition##_DBG(n))
87 88
88#define MC_EXCEPTION_PROLOG(n, addition) \ 89#define MC_EXCEPTION_PROLOG(n, addition) \
89 EXCEPTION_PROLOG(n, MC, addition##_MC) 90 EXCEPTION_PROLOG(n, MC, addition##_MC(n))
90 91
91 92
92/* Variants of the "addition" argument for the prolog 93/* Variants of the "addition" argument for the prolog
93 */ 94 */
94#define PROLOG_ADDITION_NONE_GEN 95#define PROLOG_ADDITION_NONE_GEN(n)
95#define PROLOG_ADDITION_NONE_CRIT 96#define PROLOG_ADDITION_NONE_CRIT(n)
96#define PROLOG_ADDITION_NONE_DBG 97#define PROLOG_ADDITION_NONE_DBG(n)
97#define PROLOG_ADDITION_NONE_MC 98#define PROLOG_ADDITION_NONE_MC(n)
98 99
99#define PROLOG_ADDITION_MASKABLE_GEN \ 100#define PROLOG_ADDITION_MASKABLE_GEN(n) \
100 lbz r11,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \ 101 lbz r11,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \
101 cmpwi cr0,r11,0; /* yes -> go out of line */ \ 102 cmpwi cr0,r11,0; /* yes -> go out of line */ \
102 beq masked_interrupt_book3e; 103 beq masked_interrupt_book3e_##n
103 104
104#define PROLOG_ADDITION_2REGS_GEN \ 105#define PROLOG_ADDITION_2REGS_GEN(n) \
105 std r14,PACA_EXGEN+EX_R14(r13); \ 106 std r14,PACA_EXGEN+EX_R14(r13); \
106 std r15,PACA_EXGEN+EX_R15(r13) 107 std r15,PACA_EXGEN+EX_R15(r13)
107 108
108#define PROLOG_ADDITION_1REG_GEN \ 109#define PROLOG_ADDITION_1REG_GEN(n) \
109 std r14,PACA_EXGEN+EX_R14(r13); 110 std r14,PACA_EXGEN+EX_R14(r13);
110 111
111#define PROLOG_ADDITION_2REGS_CRIT \ 112#define PROLOG_ADDITION_2REGS_CRIT(n) \
112 std r14,PACA_EXCRIT+EX_R14(r13); \ 113 std r14,PACA_EXCRIT+EX_R14(r13); \
113 std r15,PACA_EXCRIT+EX_R15(r13) 114 std r15,PACA_EXCRIT+EX_R15(r13)
114 115
115#define PROLOG_ADDITION_2REGS_DBG \ 116#define PROLOG_ADDITION_2REGS_DBG(n) \
116 std r14,PACA_EXDBG+EX_R14(r13); \ 117 std r14,PACA_EXDBG+EX_R14(r13); \
117 std r15,PACA_EXDBG+EX_R15(r13) 118 std r15,PACA_EXDBG+EX_R15(r13)
118 119
119#define PROLOG_ADDITION_2REGS_MC \ 120#define PROLOG_ADDITION_2REGS_MC(n) \
120 std r14,PACA_EXMC+EX_R14(r13); \ 121 std r14,PACA_EXMC+EX_R14(r13); \
121 std r15,PACA_EXMC+EX_R15(r13) 122 std r15,PACA_EXMC+EX_R15(r13)
122 123
123#define PROLOG_ADDITION_DOORBELL_GEN \
124 lbz r11,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \
125 cmpwi cr0,r11,0; /* yes -> go out of line */ \
126 beq masked_doorbell_book3e
127
128 124
129/* Core exception code for all exceptions except TLB misses. 125/* Core exception code for all exceptions except TLB misses.
130 * XXX: Needs to make SPRN_SPRG_GEN depend on exception type 126 * XXX: Needs to make SPRN_SPRG_GEN depend on exception type
131 */ 127 */
132#define EXCEPTION_COMMON(n, excf, ints) \ 128#define EXCEPTION_COMMON(n, excf, ints) \
129exc_##n##_common: \
133 std r0,GPR0(r1); /* save r0 in stackframe */ \ 130 std r0,GPR0(r1); /* save r0 in stackframe */ \
134 std r2,GPR2(r1); /* save r2 in stackframe */ \ 131 std r2,GPR2(r1); /* save r2 in stackframe */ \
135 SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ 132 SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
@@ -167,20 +164,21 @@
167 std r0,RESULT(r1); /* clear regs->result */ \ 164 std r0,RESULT(r1); /* clear regs->result */ \
168 ints; 165 ints;
169 166
170/* Variants for the "ints" argument */ 167/* Variants for the "ints" argument. This one does nothing when we want
168 * to keep interrupts in their original state
169 */
171#define INTS_KEEP 170#define INTS_KEEP
172#define INTS_DISABLE_SOFT \ 171
173 stb r0,PACASOFTIRQEN(r13); /* mark interrupts soft-disabled */ \ 172/* This second version is meant for exceptions that don't immediately
174 TRACE_DISABLE_INTS; 173 * hard-enable. We set a bit in paca->irq_happened to ensure that
175#define INTS_DISABLE_HARD \ 174 * a subsequent call to arch_local_irq_restore() will properly
176 stb r0,PACAHARDIRQEN(r13); /* and hard disabled */ 175 * hard-enable and avoid the fast-path
177#define INTS_DISABLE_ALL \ 176 */
178 INTS_DISABLE_SOFT \ 177#define INTS_DISABLE SOFT_DISABLE_INTS(r3,r4)
179 INTS_DISABLE_HARD 178
180 179/* This is called by exceptions that used INTS_KEEP (that did not touch
181/* This is called by exceptions that used INTS_KEEP (that is did not clear 180 * irq indicators in the PACA). This will restore MSR:EE to it's previous
182 * neither soft nor hard IRQ indicators in the PACA. This will restore MSR:EE 181 * value
183 * to it's previous value
184 * 182 *
185 * XXX In the long run, we may want to open-code it in order to separate the 183 * XXX In the long run, we may want to open-code it in order to separate the
186 * load from the wrtee, thus limiting the latency caused by the dependency 184 * load from the wrtee, thus limiting the latency caused by the dependency
@@ -238,7 +236,7 @@ exc_##n##_bad_stack: \
238#define MASKABLE_EXCEPTION(trapnum, label, hdlr, ack) \ 236#define MASKABLE_EXCEPTION(trapnum, label, hdlr, ack) \
239 START_EXCEPTION(label); \ 237 START_EXCEPTION(label); \
240 NORMAL_EXCEPTION_PROLOG(trapnum, PROLOG_ADDITION_MASKABLE) \ 238 NORMAL_EXCEPTION_PROLOG(trapnum, PROLOG_ADDITION_MASKABLE) \
241 EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE_ALL) \ 239 EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE) \
242 ack(r8); \ 240 ack(r8); \
243 CHECK_NAPPING(); \ 241 CHECK_NAPPING(); \
244 addi r3,r1,STACK_FRAME_OVERHEAD; \ 242 addi r3,r1,STACK_FRAME_OVERHEAD; \
@@ -289,7 +287,7 @@ interrupt_end_book3e:
289/* Critical Input Interrupt */ 287/* Critical Input Interrupt */
290 START_EXCEPTION(critical_input); 288 START_EXCEPTION(critical_input);
291 CRIT_EXCEPTION_PROLOG(0x100, PROLOG_ADDITION_NONE) 289 CRIT_EXCEPTION_PROLOG(0x100, PROLOG_ADDITION_NONE)
292// EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE_ALL) 290// EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE)
293// bl special_reg_save_crit 291// bl special_reg_save_crit
294// CHECK_NAPPING(); 292// CHECK_NAPPING();
295// addi r3,r1,STACK_FRAME_OVERHEAD 293// addi r3,r1,STACK_FRAME_OVERHEAD
@@ -300,7 +298,7 @@ interrupt_end_book3e:
300/* Machine Check Interrupt */ 298/* Machine Check Interrupt */
301 START_EXCEPTION(machine_check); 299 START_EXCEPTION(machine_check);
302 CRIT_EXCEPTION_PROLOG(0x200, PROLOG_ADDITION_NONE) 300 CRIT_EXCEPTION_PROLOG(0x200, PROLOG_ADDITION_NONE)
303// EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE_ALL) 301// EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE)
304// bl special_reg_save_mc 302// bl special_reg_save_mc
305// addi r3,r1,STACK_FRAME_OVERHEAD 303// addi r3,r1,STACK_FRAME_OVERHEAD
306// CHECK_NAPPING(); 304// CHECK_NAPPING();
@@ -313,7 +311,7 @@ interrupt_end_book3e:
313 NORMAL_EXCEPTION_PROLOG(0x300, PROLOG_ADDITION_2REGS) 311 NORMAL_EXCEPTION_PROLOG(0x300, PROLOG_ADDITION_2REGS)
314 mfspr r14,SPRN_DEAR 312 mfspr r14,SPRN_DEAR
315 mfspr r15,SPRN_ESR 313 mfspr r15,SPRN_ESR
316 EXCEPTION_COMMON(0x300, PACA_EXGEN, INTS_KEEP) 314 EXCEPTION_COMMON(0x300, PACA_EXGEN, INTS_DISABLE)
317 b storage_fault_common 315 b storage_fault_common
318 316
319/* Instruction Storage Interrupt */ 317/* Instruction Storage Interrupt */
@@ -321,7 +319,7 @@ interrupt_end_book3e:
321 NORMAL_EXCEPTION_PROLOG(0x400, PROLOG_ADDITION_2REGS) 319 NORMAL_EXCEPTION_PROLOG(0x400, PROLOG_ADDITION_2REGS)
322 li r15,0 320 li r15,0
323 mr r14,r10 321 mr r14,r10
324 EXCEPTION_COMMON(0x400, PACA_EXGEN, INTS_KEEP) 322 EXCEPTION_COMMON(0x400, PACA_EXGEN, INTS_DISABLE)
325 b storage_fault_common 323 b storage_fault_common
326 324
327/* External Input Interrupt */ 325/* External Input Interrupt */
@@ -339,12 +337,11 @@ interrupt_end_book3e:
339 START_EXCEPTION(program); 337 START_EXCEPTION(program);
340 NORMAL_EXCEPTION_PROLOG(0x700, PROLOG_ADDITION_1REG) 338 NORMAL_EXCEPTION_PROLOG(0x700, PROLOG_ADDITION_1REG)
341 mfspr r14,SPRN_ESR 339 mfspr r14,SPRN_ESR
342 EXCEPTION_COMMON(0x700, PACA_EXGEN, INTS_DISABLE_SOFT) 340 EXCEPTION_COMMON(0x700, PACA_EXGEN, INTS_DISABLE)
343 std r14,_DSISR(r1) 341 std r14,_DSISR(r1)
344 addi r3,r1,STACK_FRAME_OVERHEAD 342 addi r3,r1,STACK_FRAME_OVERHEAD
345 ld r14,PACA_EXGEN+EX_R14(r13) 343 ld r14,PACA_EXGEN+EX_R14(r13)
346 bl .save_nvgprs 344 bl .save_nvgprs
347 INTS_RESTORE_HARD
348 bl .program_check_exception 345 bl .program_check_exception
349 b .ret_from_except 346 b .ret_from_except
350 347
@@ -353,15 +350,16 @@ interrupt_end_book3e:
353 NORMAL_EXCEPTION_PROLOG(0x800, PROLOG_ADDITION_NONE) 350 NORMAL_EXCEPTION_PROLOG(0x800, PROLOG_ADDITION_NONE)
354 /* we can probably do a shorter exception entry for that one... */ 351 /* we can probably do a shorter exception entry for that one... */
355 EXCEPTION_COMMON(0x800, PACA_EXGEN, INTS_KEEP) 352 EXCEPTION_COMMON(0x800, PACA_EXGEN, INTS_KEEP)
356 bne 1f /* if from user, just load it up */ 353 ld r12,_MSR(r1)
354 andi. r0,r12,MSR_PR;
355 beq- 1f
356 bl .load_up_fpu
357 b fast_exception_return
3581: INTS_DISABLE
357 bl .save_nvgprs 359 bl .save_nvgprs
358 addi r3,r1,STACK_FRAME_OVERHEAD 360 addi r3,r1,STACK_FRAME_OVERHEAD
359 INTS_RESTORE_HARD
360 bl .kernel_fp_unavailable_exception 361 bl .kernel_fp_unavailable_exception
361 BUG_OPCODE 362 b .ret_from_except
3621: ld r12,_MSR(r1)
363 bl .load_up_fpu
364 b fast_exception_return
365 363
366/* Decrementer Interrupt */ 364/* Decrementer Interrupt */
367 MASKABLE_EXCEPTION(0x900, decrementer, .timer_interrupt, ACK_DEC) 365 MASKABLE_EXCEPTION(0x900, decrementer, .timer_interrupt, ACK_DEC)
@@ -372,7 +370,7 @@ interrupt_end_book3e:
372/* Watchdog Timer Interrupt */ 370/* Watchdog Timer Interrupt */
373 START_EXCEPTION(watchdog); 371 START_EXCEPTION(watchdog);
374 CRIT_EXCEPTION_PROLOG(0x9f0, PROLOG_ADDITION_NONE) 372 CRIT_EXCEPTION_PROLOG(0x9f0, PROLOG_ADDITION_NONE)
375// EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE_ALL) 373// EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE)
376// bl special_reg_save_crit 374// bl special_reg_save_crit
377// CHECK_NAPPING(); 375// CHECK_NAPPING();
378// addi r3,r1,STACK_FRAME_OVERHEAD 376// addi r3,r1,STACK_FRAME_OVERHEAD
@@ -391,10 +389,9 @@ interrupt_end_book3e:
391/* Auxiliary Processor Unavailable Interrupt */ 389/* Auxiliary Processor Unavailable Interrupt */
392 START_EXCEPTION(ap_unavailable); 390 START_EXCEPTION(ap_unavailable);
393 NORMAL_EXCEPTION_PROLOG(0xf20, PROLOG_ADDITION_NONE) 391 NORMAL_EXCEPTION_PROLOG(0xf20, PROLOG_ADDITION_NONE)
394 EXCEPTION_COMMON(0xf20, PACA_EXGEN, INTS_KEEP) 392 EXCEPTION_COMMON(0xf20, PACA_EXGEN, INTS_DISABLE)
395 addi r3,r1,STACK_FRAME_OVERHEAD
396 bl .save_nvgprs 393 bl .save_nvgprs
397 INTS_RESTORE_HARD 394 addi r3,r1,STACK_FRAME_OVERHEAD
398 bl .unknown_exception 395 bl .unknown_exception
399 b .ret_from_except 396 b .ret_from_except
400 397
@@ -450,7 +447,7 @@ interrupt_end_book3e:
450 mfspr r15,SPRN_SPRG_CRIT_SCRATCH 447 mfspr r15,SPRN_SPRG_CRIT_SCRATCH
451 mtspr SPRN_SPRG_GEN_SCRATCH,r15 448 mtspr SPRN_SPRG_GEN_SCRATCH,r15
452 mfspr r14,SPRN_DBSR 449 mfspr r14,SPRN_DBSR
453 EXCEPTION_COMMON(0xd00, PACA_EXCRIT, INTS_DISABLE_ALL) 450 EXCEPTION_COMMON(0xd00, PACA_EXCRIT, INTS_DISABLE)
454 std r14,_DSISR(r1) 451 std r14,_DSISR(r1)
455 addi r3,r1,STACK_FRAME_OVERHEAD 452 addi r3,r1,STACK_FRAME_OVERHEAD
456 mr r4,r14 453 mr r4,r14
@@ -465,7 +462,7 @@ kernel_dbg_exc:
465 462
466/* Debug exception as a debug interrupt*/ 463/* Debug exception as a debug interrupt*/
467 START_EXCEPTION(debug_debug); 464 START_EXCEPTION(debug_debug);
468 DBG_EXCEPTION_PROLOG(0xd00, PROLOG_ADDITION_2REGS) 465 DBG_EXCEPTION_PROLOG(0xd08, PROLOG_ADDITION_2REGS)
469 466
470 /* 467 /*
471 * If there is a single step or branch-taken exception in an 468 * If there is a single step or branch-taken exception in an
@@ -515,7 +512,7 @@ kernel_dbg_exc:
515 mfspr r15,SPRN_SPRG_DBG_SCRATCH 512 mfspr r15,SPRN_SPRG_DBG_SCRATCH
516 mtspr SPRN_SPRG_GEN_SCRATCH,r15 513 mtspr SPRN_SPRG_GEN_SCRATCH,r15
517 mfspr r14,SPRN_DBSR 514 mfspr r14,SPRN_DBSR
518 EXCEPTION_COMMON(0xd00, PACA_EXDBG, INTS_DISABLE_ALL) 515 EXCEPTION_COMMON(0xd08, PACA_EXDBG, INTS_DISABLE)
519 std r14,_DSISR(r1) 516 std r14,_DSISR(r1)
520 addi r3,r1,STACK_FRAME_OVERHEAD 517 addi r3,r1,STACK_FRAME_OVERHEAD
521 mr r4,r14 518 mr r4,r14
@@ -525,21 +522,20 @@ kernel_dbg_exc:
525 bl .DebugException 522 bl .DebugException
526 b .ret_from_except 523 b .ret_from_except
527 524
528 MASKABLE_EXCEPTION(0x260, perfmon, .performance_monitor_exception, ACK_NONE) 525 START_EXCEPTION(perfmon);
529 526 NORMAL_EXCEPTION_PROLOG(0x260, PROLOG_ADDITION_NONE)
530/* Doorbell interrupt */ 527 EXCEPTION_COMMON(0x260, PACA_EXGEN, INTS_DISABLE)
531 START_EXCEPTION(doorbell)
532 NORMAL_EXCEPTION_PROLOG(0x2070, PROLOG_ADDITION_DOORBELL)
533 EXCEPTION_COMMON(0x2070, PACA_EXGEN, INTS_DISABLE_ALL)
534 CHECK_NAPPING()
535 addi r3,r1,STACK_FRAME_OVERHEAD 528 addi r3,r1,STACK_FRAME_OVERHEAD
536 bl .doorbell_exception 529 bl .performance_monitor_exception
537 b .ret_from_except_lite 530 b .ret_from_except_lite
538 531
532/* Doorbell interrupt */
533 MASKABLE_EXCEPTION(0x280, doorbell, .doorbell_exception, ACK_NONE)
534
539/* Doorbell critical Interrupt */ 535/* Doorbell critical Interrupt */
540 START_EXCEPTION(doorbell_crit); 536 START_EXCEPTION(doorbell_crit);
541 CRIT_EXCEPTION_PROLOG(0x2080, PROLOG_ADDITION_NONE) 537 CRIT_EXCEPTION_PROLOG(0x2a0, PROLOG_ADDITION_NONE)
542// EXCEPTION_COMMON(0x2080, PACA_EXCRIT, INTS_DISABLE_ALL) 538// EXCEPTION_COMMON(0x2a0, PACA_EXCRIT, INTS_DISABLE)
543// bl special_reg_save_crit 539// bl special_reg_save_crit
544// CHECK_NAPPING(); 540// CHECK_NAPPING();
545// addi r3,r1,STACK_FRAME_OVERHEAD 541// addi r3,r1,STACK_FRAME_OVERHEAD
@@ -547,36 +543,114 @@ kernel_dbg_exc:
547// b ret_from_crit_except 543// b ret_from_crit_except
548 b . 544 b .
549 545
546/* Guest Doorbell */
550 MASKABLE_EXCEPTION(0x2c0, guest_doorbell, .unknown_exception, ACK_NONE) 547 MASKABLE_EXCEPTION(0x2c0, guest_doorbell, .unknown_exception, ACK_NONE)
551 MASKABLE_EXCEPTION(0x2e0, guest_doorbell_crit, .unknown_exception, ACK_NONE)
552 MASKABLE_EXCEPTION(0x310, hypercall, .unknown_exception, ACK_NONE)
553 MASKABLE_EXCEPTION(0x320, ehpriv, .unknown_exception, ACK_NONE)
554 548
549/* Guest Doorbell critical Interrupt */
550 START_EXCEPTION(guest_doorbell_crit);
551 CRIT_EXCEPTION_PROLOG(0x2e0, PROLOG_ADDITION_NONE)
552// EXCEPTION_COMMON(0x2e0, PACA_EXCRIT, INTS_DISABLE)
553// bl special_reg_save_crit
554// CHECK_NAPPING();
555// addi r3,r1,STACK_FRAME_OVERHEAD
556// bl .guest_doorbell_critical_exception
557// b ret_from_crit_except
558 b .
559
560/* Hypervisor call */
561 START_EXCEPTION(hypercall);
562 NORMAL_EXCEPTION_PROLOG(0x310, PROLOG_ADDITION_NONE)
563 EXCEPTION_COMMON(0x310, PACA_EXGEN, INTS_KEEP)
564 addi r3,r1,STACK_FRAME_OVERHEAD
565 bl .save_nvgprs
566 INTS_RESTORE_HARD
567 bl .unknown_exception
568 b .ret_from_except
569
570/* Embedded Hypervisor priviledged */
571 START_EXCEPTION(ehpriv);
572 NORMAL_EXCEPTION_PROLOG(0x320, PROLOG_ADDITION_NONE)
573 EXCEPTION_COMMON(0x320, PACA_EXGEN, INTS_KEEP)
574 addi r3,r1,STACK_FRAME_OVERHEAD
575 bl .save_nvgprs
576 INTS_RESTORE_HARD
577 bl .unknown_exception
578 b .ret_from_except
555 579
556/* 580/*
557 * An interrupt came in while soft-disabled; clear EE in SRR1, 581 * An interrupt came in while soft-disabled; We mark paca->irq_happened
558 * clear paca->hard_enabled and return. 582 * accordingly and if the interrupt is level sensitive, we hard disable
559 */ 583 */
560masked_doorbell_book3e:
561 mtcr r10
562 /* Resend the doorbell to fire again when ints enabled */
563 mfspr r10,SPRN_PIR
564 PPC_MSGSND(r10)
565 b masked_interrupt_book3e_common
566 584
567masked_interrupt_book3e: 585masked_interrupt_book3e_0x500:
586 /* XXX When adding support for EPR, use PACA_IRQ_EE_EDGE */
587 li r11,PACA_IRQ_EE
588 b masked_interrupt_book3e_full_mask
589
590masked_interrupt_book3e_0x900:
591 ACK_DEC(r11);
592 li r11,PACA_IRQ_DEC
593 b masked_interrupt_book3e_no_mask
594masked_interrupt_book3e_0x980:
595 ACK_FIT(r11);
596 li r11,PACA_IRQ_DEC
597 b masked_interrupt_book3e_no_mask
598masked_interrupt_book3e_0x280:
599masked_interrupt_book3e_0x2c0:
600 li r11,PACA_IRQ_DBELL
601 b masked_interrupt_book3e_no_mask
602
603masked_interrupt_book3e_no_mask:
604 mtcr r10
605 lbz r10,PACAIRQHAPPENED(r13)
606 or r10,r10,r11
607 stb r10,PACAIRQHAPPENED(r13)
608 b 1f
609masked_interrupt_book3e_full_mask:
568 mtcr r10 610 mtcr r10
569masked_interrupt_book3e_common: 611 lbz r10,PACAIRQHAPPENED(r13)
570 stb r11,PACAHARDIRQEN(r13) 612 or r10,r10,r11
613 stb r10,PACAIRQHAPPENED(r13)
571 mfspr r10,SPRN_SRR1 614 mfspr r10,SPRN_SRR1
572 rldicl r11,r10,48,1 /* clear MSR_EE */ 615 rldicl r11,r10,48,1 /* clear MSR_EE */
573 rotldi r10,r11,16 616 rotldi r10,r11,16
574 mtspr SPRN_SRR1,r10 617 mtspr SPRN_SRR1,r10
575 ld r10,PACA_EXGEN+EX_R10(r13); /* restore registers */ 6181: ld r10,PACA_EXGEN+EX_R10(r13);
576 ld r11,PACA_EXGEN+EX_R11(r13); 619 ld r11,PACA_EXGEN+EX_R11(r13);
577 mfspr r13,SPRN_SPRG_GEN_SCRATCH; 620 mfspr r13,SPRN_SPRG_GEN_SCRATCH;
578 rfi 621 rfi
579 b . 622 b .
623/*
624 * Called from arch_local_irq_enable when an interrupt needs
625 * to be resent. r3 contains either 0x500,0x900,0x260 or 0x280
626 * to indicate the kind of interrupt. MSR:EE is already off.
627 * We generate a stackframe like if a real interrupt had happened.
628 *
629 * Note: While MSR:EE is off, we need to make sure that _MSR
630 * in the generated frame has EE set to 1 or the exception
631 * handler will not properly re-enable them.
632 */
633_GLOBAL(__replay_interrupt)
634 /* We are going to jump to the exception common code which
635 * will retrieve various register values from the PACA which
636 * we don't give a damn about.
637 */
638 mflr r10
639 mfmsr r11
640 mfcr r4
641 mtspr SPRN_SPRG_GEN_SCRATCH,r13;
642 std r1,PACA_EXGEN+EX_R1(r13);
643 stw r4,PACA_EXGEN+EX_CR(r13);
644 ori r11,r11,MSR_EE
645 subi r1,r1,INT_FRAME_SIZE;
646 cmpwi cr0,r3,0x500
647 beq exc_0x500_common
648 cmpwi cr0,r3,0x900
649 beq exc_0x900_common
650 cmpwi cr0,r3,0x280
651 beq exc_0x280_common
652 blr
653
580 654
581/* 655/*
582 * This is called from 0x300 and 0x400 handlers after the prologs with 656 * This is called from 0x300 and 0x400 handlers after the prologs with
@@ -591,7 +665,6 @@ storage_fault_common:
591 mr r5,r15 665 mr r5,r15
592 ld r14,PACA_EXGEN+EX_R14(r13) 666 ld r14,PACA_EXGEN+EX_R14(r13)
593 ld r15,PACA_EXGEN+EX_R15(r13) 667 ld r15,PACA_EXGEN+EX_R15(r13)
594 INTS_RESTORE_HARD
595 bl .do_page_fault 668 bl .do_page_fault
596 cmpdi r3,0 669 cmpdi r3,0
597 bne- 1f 670 bne- 1f
@@ -680,6 +753,8 @@ BAD_STACK_TRAMPOLINE(0x000)
680BAD_STACK_TRAMPOLINE(0x100) 753BAD_STACK_TRAMPOLINE(0x100)
681BAD_STACK_TRAMPOLINE(0x200) 754BAD_STACK_TRAMPOLINE(0x200)
682BAD_STACK_TRAMPOLINE(0x260) 755BAD_STACK_TRAMPOLINE(0x260)
756BAD_STACK_TRAMPOLINE(0x280)
757BAD_STACK_TRAMPOLINE(0x2a0)
683BAD_STACK_TRAMPOLINE(0x2c0) 758BAD_STACK_TRAMPOLINE(0x2c0)
684BAD_STACK_TRAMPOLINE(0x2e0) 759BAD_STACK_TRAMPOLINE(0x2e0)
685BAD_STACK_TRAMPOLINE(0x300) 760BAD_STACK_TRAMPOLINE(0x300)
@@ -697,11 +772,10 @@ BAD_STACK_TRAMPOLINE(0xa00)
697BAD_STACK_TRAMPOLINE(0xb00) 772BAD_STACK_TRAMPOLINE(0xb00)
698BAD_STACK_TRAMPOLINE(0xc00) 773BAD_STACK_TRAMPOLINE(0xc00)
699BAD_STACK_TRAMPOLINE(0xd00) 774BAD_STACK_TRAMPOLINE(0xd00)
775BAD_STACK_TRAMPOLINE(0xd08)
700BAD_STACK_TRAMPOLINE(0xe00) 776BAD_STACK_TRAMPOLINE(0xe00)
701BAD_STACK_TRAMPOLINE(0xf00) 777BAD_STACK_TRAMPOLINE(0xf00)
702BAD_STACK_TRAMPOLINE(0xf20) 778BAD_STACK_TRAMPOLINE(0xf20)
703BAD_STACK_TRAMPOLINE(0x2070)
704BAD_STACK_TRAMPOLINE(0x2080)
705 779
706 .globl bad_stack_book3e 780 .globl bad_stack_book3e
707bad_stack_book3e: 781bad_stack_book3e:
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 15c5a4f6de01..2d0868a4e2f0 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -12,6 +12,7 @@
12 * 12 *
13 */ 13 */
14 14
15#include <asm/hw_irq.h>
15#include <asm/exception-64s.h> 16#include <asm/exception-64s.h>
16#include <asm/ptrace.h> 17#include <asm/ptrace.h>
17 18
@@ -19,7 +20,7 @@
19 * We layout physical memory as follows: 20 * We layout physical memory as follows:
20 * 0x0000 - 0x00ff : Secondary processor spin code 21 * 0x0000 - 0x00ff : Secondary processor spin code
21 * 0x0100 - 0x2fff : pSeries Interrupt prologs 22 * 0x0100 - 0x2fff : pSeries Interrupt prologs
22 * 0x3000 - 0x5fff : interrupt support, iSeries and common interrupt prologs 23 * 0x3000 - 0x5fff : interrupt support common interrupt prologs
23 * 0x6000 - 0x6fff : Initial (CPU0) segment table 24 * 0x6000 - 0x6fff : Initial (CPU0) segment table
24 * 0x7000 - 0x7fff : FWNMI data area 25 * 0x7000 - 0x7fff : FWNMI data area
25 * 0x8000 - : Early init and support code 26 * 0x8000 - : Early init and support code
@@ -356,34 +357,60 @@ do_stab_bolted_pSeries:
356 KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40) 357 KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40)
357 358
358/* 359/*
359 * An interrupt came in while soft-disabled; clear EE in SRR1, 360 * An interrupt came in while soft-disabled. We set paca->irq_happened,
360 * clear paca->hard_enabled and return. 361 * then, if it was a decrementer interrupt, we bump the dec to max and
362 * and return, else we hard disable and return. This is called with
363 * r10 containing the value to OR to the paca field.
361 */ 364 */
362masked_interrupt: 365#define MASKED_INTERRUPT(_H) \
363 stb r10,PACAHARDIRQEN(r13) 366masked_##_H##interrupt: \
364 mtcrf 0x80,r9 367 std r11,PACA_EXGEN+EX_R11(r13); \
365 ld r9,PACA_EXGEN+EX_R9(r13) 368 lbz r11,PACAIRQHAPPENED(r13); \
366 mfspr r10,SPRN_SRR1 369 or r11,r11,r10; \
367 rldicl r10,r10,48,1 /* clear MSR_EE */ 370 stb r11,PACAIRQHAPPENED(r13); \
368 rotldi r10,r10,16 371 andi. r10,r10,PACA_IRQ_DEC; \
369 mtspr SPRN_SRR1,r10 372 beq 1f; \
370 ld r10,PACA_EXGEN+EX_R10(r13) 373 lis r10,0x7fff; \
371 GET_SCRATCH0(r13) 374 ori r10,r10,0xffff; \
372 rfid 375 mtspr SPRN_DEC,r10; \
376 b 2f; \
3771: mfspr r10,SPRN_##_H##SRR1; \
378 rldicl r10,r10,48,1; /* clear MSR_EE */ \
379 rotldi r10,r10,16; \
380 mtspr SPRN_##_H##SRR1,r10; \
3812: mtcrf 0x80,r9; \
382 ld r9,PACA_EXGEN+EX_R9(r13); \
383 ld r10,PACA_EXGEN+EX_R10(r13); \
384 ld r11,PACA_EXGEN+EX_R11(r13); \
385 GET_SCRATCH0(r13); \
386 ##_H##rfid; \
373 b . 387 b .
388
389 MASKED_INTERRUPT()
390 MASKED_INTERRUPT(H)
374 391
375masked_Hinterrupt: 392/*
376 stb r10,PACAHARDIRQEN(r13) 393 * Called from arch_local_irq_enable when an interrupt needs
377 mtcrf 0x80,r9 394 * to be resent. r3 contains 0x500 or 0x900 to indicate which
378 ld r9,PACA_EXGEN+EX_R9(r13) 395 * kind of interrupt. MSR:EE is already off. We generate a
379 mfspr r10,SPRN_HSRR1 396 * stackframe like if a real interrupt had happened.
380 rldicl r10,r10,48,1 /* clear MSR_EE */ 397 *
381 rotldi r10,r10,16 398 * Note: While MSR:EE is off, we need to make sure that _MSR
382 mtspr SPRN_HSRR1,r10 399 * in the generated frame has EE set to 1 or the exception
383 ld r10,PACA_EXGEN+EX_R10(r13) 400 * handler will not properly re-enable them.
384 GET_SCRATCH0(r13) 401 */
385 hrfid 402_GLOBAL(__replay_interrupt)
386 b . 403 /* We are going to jump to the exception common code which
404 * will retrieve various register values from the PACA which
405 * we don't give a damn about, so we don't bother storing them.
406 */
407 mfmsr r12
408 mflr r11
409 mfcr r9
410 ori r12,r12,MSR_EE
411 andi. r3,r3,0x0800
412 bne decrementer_common
413 b hardware_interrupt_common
387 414
388#ifdef CONFIG_PPC_PSERIES 415#ifdef CONFIG_PPC_PSERIES
389/* 416/*
@@ -458,14 +485,15 @@ machine_check_common:
458 bl .machine_check_exception 485 bl .machine_check_exception
459 b .ret_from_except 486 b .ret_from_except
460 487
461 STD_EXCEPTION_COMMON_LITE(0x900, decrementer, .timer_interrupt) 488 STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ)
489 STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, .timer_interrupt)
462 STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception) 490 STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception)
463 STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception) 491 STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception)
464 STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception) 492 STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception)
465 STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception) 493 STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception)
466 STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception) 494 STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception)
467 STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception) 495 STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception)
468 STD_EXCEPTION_COMMON_IDLE(0xf00, performance_monitor, .performance_monitor_exception) 496 STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, .performance_monitor_exception)
469 STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception) 497 STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception)
470#ifdef CONFIG_ALTIVEC 498#ifdef CONFIG_ALTIVEC
471 STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception) 499 STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception)
@@ -482,6 +510,9 @@ machine_check_common:
482system_call_entry: 510system_call_entry:
483 b system_call_common 511 b system_call_common
484 512
513ppc64_runlatch_on_trampoline:
514 b .__ppc64_runlatch_on
515
485/* 516/*
486 * Here we have detected that the kernel stack pointer is bad. 517 * Here we have detected that the kernel stack pointer is bad.
487 * R9 contains the saved CR, r13 points to the paca, 518 * R9 contains the saved CR, r13 points to the paca,
@@ -555,6 +586,8 @@ data_access_common:
555 mfspr r10,SPRN_DSISR 586 mfspr r10,SPRN_DSISR
556 stw r10,PACA_EXGEN+EX_DSISR(r13) 587 stw r10,PACA_EXGEN+EX_DSISR(r13)
557 EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN) 588 EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN)
589 DISABLE_INTS
590 ld r12,_MSR(r1)
558 ld r3,PACA_EXGEN+EX_DAR(r13) 591 ld r3,PACA_EXGEN+EX_DAR(r13)
559 lwz r4,PACA_EXGEN+EX_DSISR(r13) 592 lwz r4,PACA_EXGEN+EX_DSISR(r13)
560 li r5,0x300 593 li r5,0x300
@@ -569,6 +602,7 @@ h_data_storage_common:
569 stw r10,PACA_EXGEN+EX_DSISR(r13) 602 stw r10,PACA_EXGEN+EX_DSISR(r13)
570 EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN) 603 EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN)
571 bl .save_nvgprs 604 bl .save_nvgprs
605 DISABLE_INTS
572 addi r3,r1,STACK_FRAME_OVERHEAD 606 addi r3,r1,STACK_FRAME_OVERHEAD
573 bl .unknown_exception 607 bl .unknown_exception
574 b .ret_from_except 608 b .ret_from_except
@@ -577,6 +611,8 @@ h_data_storage_common:
577 .globl instruction_access_common 611 .globl instruction_access_common
578instruction_access_common: 612instruction_access_common:
579 EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN) 613 EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN)
614 DISABLE_INTS
615 ld r12,_MSR(r1)
580 ld r3,_NIP(r1) 616 ld r3,_NIP(r1)
581 andis. r4,r12,0x5820 617 andis. r4,r12,0x5820
582 li r5,0x400 618 li r5,0x400
@@ -672,12 +708,6 @@ _GLOBAL(slb_miss_realmode)
672 ld r10,PACA_EXSLB+EX_LR(r13) 708 ld r10,PACA_EXSLB+EX_LR(r13)
673 ld r3,PACA_EXSLB+EX_R3(r13) 709 ld r3,PACA_EXSLB+EX_R3(r13)
674 lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ 710 lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
675#ifdef CONFIG_PPC_ISERIES
676BEGIN_FW_FTR_SECTION
677 ld r11,PACALPPACAPTR(r13)
678 ld r11,LPPACASRR0(r11) /* get SRR0 value */
679END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
680#endif /* CONFIG_PPC_ISERIES */
681 711
682 mtlr r10 712 mtlr r10
683 713
@@ -690,12 +720,6 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
690 mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ 720 mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
691.machine pop 721.machine pop
692 722
693#ifdef CONFIG_PPC_ISERIES
694BEGIN_FW_FTR_SECTION
695 mtspr SPRN_SRR0,r11
696 mtspr SPRN_SRR1,r12
697END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
698#endif /* CONFIG_PPC_ISERIES */
699 ld r9,PACA_EXSLB+EX_R9(r13) 723 ld r9,PACA_EXSLB+EX_R9(r13)
700 ld r10,PACA_EXSLB+EX_R10(r13) 724 ld r10,PACA_EXSLB+EX_R10(r13)
701 ld r11,PACA_EXSLB+EX_R11(r13) 725 ld r11,PACA_EXSLB+EX_R11(r13)
@@ -704,13 +728,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
704 rfid 728 rfid
705 b . /* prevent speculative execution */ 729 b . /* prevent speculative execution */
706 730
7072: 7312: mfspr r11,SPRN_SRR0
708#ifdef CONFIG_PPC_ISERIES
709BEGIN_FW_FTR_SECTION
710 b unrecov_slb
711END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
712#endif /* CONFIG_PPC_ISERIES */
713 mfspr r11,SPRN_SRR0
714 ld r10,PACAKBASE(r13) 732 ld r10,PACAKBASE(r13)
715 LOAD_HANDLER(r10,unrecov_slb) 733 LOAD_HANDLER(r10,unrecov_slb)
716 mtspr SPRN_SRR0,r10 734 mtspr SPRN_SRR0,r10
@@ -727,20 +745,6 @@ unrecov_slb:
727 bl .unrecoverable_exception 745 bl .unrecoverable_exception
728 b 1b 746 b 1b
729 747
730 .align 7
731 .globl hardware_interrupt_common
732 .globl hardware_interrupt_entry
733hardware_interrupt_common:
734 EXCEPTION_PROLOG_COMMON(0x500, PACA_EXGEN)
735 FINISH_NAP
736hardware_interrupt_entry:
737 DISABLE_INTS
738BEGIN_FTR_SECTION
739 bl .ppc64_runlatch_on
740END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
741 addi r3,r1,STACK_FRAME_OVERHEAD
742 bl .do_IRQ
743 b .ret_from_except_lite
744 748
745#ifdef CONFIG_PPC_970_NAP 749#ifdef CONFIG_PPC_970_NAP
746power4_fixup_nap: 750power4_fixup_nap:
@@ -785,8 +789,8 @@ fp_unavailable_common:
785 EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN) 789 EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN)
786 bne 1f /* if from user, just load it up */ 790 bne 1f /* if from user, just load it up */
787 bl .save_nvgprs 791 bl .save_nvgprs
792 DISABLE_INTS
788 addi r3,r1,STACK_FRAME_OVERHEAD 793 addi r3,r1,STACK_FRAME_OVERHEAD
789 ENABLE_INTS
790 bl .kernel_fp_unavailable_exception 794 bl .kernel_fp_unavailable_exception
791 BUG_OPCODE 795 BUG_OPCODE
7921: bl .load_up_fpu 7961: bl .load_up_fpu
@@ -805,8 +809,8 @@ BEGIN_FTR_SECTION
805END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) 809END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
806#endif 810#endif
807 bl .save_nvgprs 811 bl .save_nvgprs
812 DISABLE_INTS
808 addi r3,r1,STACK_FRAME_OVERHEAD 813 addi r3,r1,STACK_FRAME_OVERHEAD
809 ENABLE_INTS
810 bl .altivec_unavailable_exception 814 bl .altivec_unavailable_exception
811 b .ret_from_except 815 b .ret_from_except
812 816
@@ -816,13 +820,14 @@ vsx_unavailable_common:
816 EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN) 820 EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN)
817#ifdef CONFIG_VSX 821#ifdef CONFIG_VSX
818BEGIN_FTR_SECTION 822BEGIN_FTR_SECTION
819 bne .load_up_vsx 823 beq 1f
824 b .load_up_vsx
8201: 8251:
821END_FTR_SECTION_IFSET(CPU_FTR_VSX) 826END_FTR_SECTION_IFSET(CPU_FTR_VSX)
822#endif 827#endif
823 bl .save_nvgprs 828 bl .save_nvgprs
829 DISABLE_INTS
824 addi r3,r1,STACK_FRAME_OVERHEAD 830 addi r3,r1,STACK_FRAME_OVERHEAD
825 ENABLE_INTS
826 bl .vsx_unavailable_exception 831 bl .vsx_unavailable_exception
827 b .ret_from_except 832 b .ret_from_except
828 833
@@ -831,66 +836,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
831__end_handlers: 836__end_handlers:
832 837
833/* 838/*
834 * Return from an exception with minimal checks.
835 * The caller is assumed to have done EXCEPTION_PROLOG_COMMON.
836 * If interrupts have been enabled, or anything has been
837 * done that might have changed the scheduling status of
838 * any task or sent any task a signal, you should use
839 * ret_from_except or ret_from_except_lite instead of this.
840 */
841fast_exc_return_irq: /* restores irq state too */
842 ld r3,SOFTE(r1)
843 TRACE_AND_RESTORE_IRQ(r3);
844 ld r12,_MSR(r1)
845 rldicl r4,r12,49,63 /* get MSR_EE to LSB */
846 stb r4,PACAHARDIRQEN(r13) /* restore paca->hard_enabled */
847 b 1f
848
849 .globl fast_exception_return
850fast_exception_return:
851 ld r12,_MSR(r1)
8521: ld r11,_NIP(r1)
853 andi. r3,r12,MSR_RI /* check if RI is set */
854 beq- unrecov_fer
855
856#ifdef CONFIG_VIRT_CPU_ACCOUNTING
857 andi. r3,r12,MSR_PR
858 beq 2f
859 ACCOUNT_CPU_USER_EXIT(r3, r4)
8602:
861#endif
862
863 ld r3,_CCR(r1)
864 ld r4,_LINK(r1)
865 ld r5,_CTR(r1)
866 ld r6,_XER(r1)
867 mtcr r3
868 mtlr r4
869 mtctr r5
870 mtxer r6
871 REST_GPR(0, r1)
872 REST_8GPRS(2, r1)
873
874 mfmsr r10
875 rldicl r10,r10,48,1 /* clear EE */
876 rldicr r10,r10,16,61 /* clear RI (LE is 0 already) */
877 mtmsrd r10,1
878
879 mtspr SPRN_SRR1,r12
880 mtspr SPRN_SRR0,r11
881 REST_4GPRS(10, r1)
882 ld r1,GPR1(r1)
883 rfid
884 b . /* prevent speculative execution */
885
886unrecov_fer:
887 bl .save_nvgprs
8881: addi r3,r1,STACK_FRAME_OVERHEAD
889 bl .unrecoverable_exception
890 b 1b
891
892
893/*
894 * Hash table stuff 839 * Hash table stuff
895 */ 840 */
896 .align 7 841 .align 7
@@ -912,28 +857,6 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
912 lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ 857 lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
913 andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ 858 andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */
914 bne 77f /* then don't call hash_page now */ 859 bne 77f /* then don't call hash_page now */
915
916 /*
917 * On iSeries, we soft-disable interrupts here, then
918 * hard-enable interrupts so that the hash_page code can spin on
919 * the hash_table_lock without problems on a shared processor.
920 */
921 DISABLE_INTS
922
923 /*
924 * Currently, trace_hardirqs_off() will be called by DISABLE_INTS
925 * and will clobber volatile registers when irq tracing is enabled
926 * so we need to reload them. It may be possible to be smarter here
927 * and move the irq tracing elsewhere but let's keep it simple for
928 * now
929 */
930#ifdef CONFIG_TRACE_IRQFLAGS
931 ld r3,_DAR(r1)
932 ld r4,_DSISR(r1)
933 ld r5,_TRAP(r1)
934 ld r12,_MSR(r1)
935 clrrdi r5,r5,4
936#endif /* CONFIG_TRACE_IRQFLAGS */
937 /* 860 /*
938 * We need to set the _PAGE_USER bit if MSR_PR is set or if we are 861 * We need to set the _PAGE_USER bit if MSR_PR is set or if we are
939 * accessing a userspace segment (even from the kernel). We assume 862 * accessing a userspace segment (even from the kernel). We assume
@@ -951,62 +874,25 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
951 * r4 contains the required access permissions 874 * r4 contains the required access permissions
952 * r5 contains the trap number 875 * r5 contains the trap number
953 * 876 *
954 * at return r3 = 0 for success 877 * at return r3 = 0 for success, 1 for page fault, negative for error
955 */ 878 */
956 bl .hash_page /* build HPTE if possible */ 879 bl .hash_page /* build HPTE if possible */
957 cmpdi r3,0 /* see if hash_page succeeded */ 880 cmpdi r3,0 /* see if hash_page succeeded */
958 881
959BEGIN_FW_FTR_SECTION 882 /* Success */
960 /*
961 * If we had interrupts soft-enabled at the point where the
962 * DSI/ISI occurred, and an interrupt came in during hash_page,
963 * handle it now.
964 * We jump to ret_from_except_lite rather than fast_exception_return
965 * because ret_from_except_lite will check for and handle pending
966 * interrupts if necessary.
967 */
968 beq 13f
969END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
970
971BEGIN_FW_FTR_SECTION
972 /*
973 * Here we have interrupts hard-disabled, so it is sufficient
974 * to restore paca->{soft,hard}_enable and get out.
975 */
976 beq fast_exc_return_irq /* Return from exception on success */ 883 beq fast_exc_return_irq /* Return from exception on success */
977END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES)
978
979 /* For a hash failure, we don't bother re-enabling interrupts */
980 ble- 12f
981
982 /*
983 * hash_page couldn't handle it, set soft interrupt enable back
984 * to what it was before the trap. Note that .arch_local_irq_restore
985 * handles any interrupts pending at this point.
986 */
987 ld r3,SOFTE(r1)
988 TRACE_AND_RESTORE_IRQ_PARTIAL(r3, 11f)
989 bl .arch_local_irq_restore
990 b 11f
991 884
992/* We have a data breakpoint exception - handle it */ 885 /* Error */
993handle_dabr_fault: 886 blt- 13f
994 bl .save_nvgprs
995 ld r4,_DAR(r1)
996 ld r5,_DSISR(r1)
997 addi r3,r1,STACK_FRAME_OVERHEAD
998 bl .do_dabr
999 b .ret_from_except_lite
1000 887
1001/* Here we have a page fault that hash_page can't handle. */ 888/* Here we have a page fault that hash_page can't handle. */
1002handle_page_fault: 889handle_page_fault:
1003 ENABLE_INTS
100411: ld r4,_DAR(r1) 89011: ld r4,_DAR(r1)
1005 ld r5,_DSISR(r1) 891 ld r5,_DSISR(r1)
1006 addi r3,r1,STACK_FRAME_OVERHEAD 892 addi r3,r1,STACK_FRAME_OVERHEAD
1007 bl .do_page_fault 893 bl .do_page_fault
1008 cmpdi r3,0 894 cmpdi r3,0
1009 beq+ 13f 895 beq+ 12f
1010 bl .save_nvgprs 896 bl .save_nvgprs
1011 mr r5,r3 897 mr r5,r3
1012 addi r3,r1,STACK_FRAME_OVERHEAD 898 addi r3,r1,STACK_FRAME_OVERHEAD
@@ -1014,12 +900,20 @@ handle_page_fault:
1014 bl .bad_page_fault 900 bl .bad_page_fault
1015 b .ret_from_except 901 b .ret_from_except
1016 902
101713: b .ret_from_except_lite 903/* We have a data breakpoint exception - handle it */
904handle_dabr_fault:
905 bl .save_nvgprs
906 ld r4,_DAR(r1)
907 ld r5,_DSISR(r1)
908 addi r3,r1,STACK_FRAME_OVERHEAD
909 bl .do_dabr
91012: b .ret_from_except_lite
911
1018 912
1019/* We have a page fault that hash_page could handle but HV refused 913/* We have a page fault that hash_page could handle but HV refused
1020 * the PTE insertion 914 * the PTE insertion
1021 */ 915 */
102212: bl .save_nvgprs 91613: bl .save_nvgprs
1023 mr r5,r3 917 mr r5,r3
1024 addi r3,r1,STACK_FRAME_OVERHEAD 918 addi r3,r1,STACK_FRAME_OVERHEAD
1025 ld r4,_DAR(r1) 919 ld r4,_DAR(r1)
@@ -1141,51 +1035,19 @@ _GLOBAL(do_stab_bolted)
1141 .= 0x7000 1035 .= 0x7000
1142 .globl fwnmi_data_area 1036 .globl fwnmi_data_area
1143fwnmi_data_area: 1037fwnmi_data_area:
1144#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
1145 1038
1146 /* iSeries does not use the FWNMI stuff, so it is safe to put
1147 * this here, even if we later allow kernels that will boot on
1148 * both pSeries and iSeries */
1149#ifdef CONFIG_PPC_ISERIES
1150 . = LPARMAP_PHYS
1151 .globl xLparMap
1152xLparMap:
1153 .quad HvEsidsToMap /* xNumberEsids */
1154 .quad HvRangesToMap /* xNumberRanges */
1155 .quad STAB0_PAGE /* xSegmentTableOffs */
1156 .zero 40 /* xRsvd */
1157 /* xEsids (HvEsidsToMap entries of 2 quads) */
1158 .quad PAGE_OFFSET_ESID /* xKernelEsid */
1159 .quad PAGE_OFFSET_VSID /* xKernelVsid */
1160 .quad VMALLOC_START_ESID /* xKernelEsid */
1161 .quad VMALLOC_START_VSID /* xKernelVsid */
1162 /* xRanges (HvRangesToMap entries of 3 quads) */
1163 .quad HvPagesToMap /* xPages */
1164 .quad 0 /* xOffset */
1165 .quad PAGE_OFFSET_VSID << (SID_SHIFT - HW_PAGE_SHIFT) /* xVPN */
1166
1167#endif /* CONFIG_PPC_ISERIES */
1168
1169#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
1170 /* pseries and powernv need to keep the whole page from 1039 /* pseries and powernv need to keep the whole page from
1171 * 0x7000 to 0x8000 free for use by the firmware 1040 * 0x7000 to 0x8000 free for use by the firmware
1172 */ 1041 */
1173 . = 0x8000 1042 . = 0x8000
1174#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ 1043#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
1175 1044
1176/* 1045/* Space for CPU0's segment table */
1177 * Space for CPU0's segment table. 1046 .balign 4096
1178 *
1179 * On iSeries, the hypervisor must fill in at least one entry before
1180 * we get control (with relocate on). The address is given to the hv
1181 * as a page number (see xLparMap above), so this must be at a
1182 * fixed address (the linker can't compute (u64)&initial_stab >>
1183 * PAGE_SHIFT).
1184 */
1185 . = STAB0_OFFSET /* 0x8000 */
1186 .globl initial_stab 1047 .globl initial_stab
1187initial_stab: 1048initial_stab:
1188 .space 4096 1049 .space 4096
1050
1189#ifdef CONFIG_PPC_POWERNV 1051#ifdef CONFIG_PPC_POWERNV
1190_GLOBAL(opal_mc_secondary_handler) 1052_GLOBAL(opal_mc_secondary_handler)
1191 HMT_MEDIUM 1053 HMT_MEDIUM
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
new file mode 100644
index 000000000000..cfe7a38708c3
--- /dev/null
+++ b/arch/powerpc/kernel/fadump.c
@@ -0,0 +1,1315 @@
1/*
2 * Firmware Assisted dump: A robust mechanism to get reliable kernel crash
3 * dump with assistance from firmware. This approach does not use kexec,
4 * instead firmware assists in booting the kdump kernel while preserving
5 * memory contents. The most of the code implementation has been adapted
6 * from phyp assisted dump implementation written by Linas Vepstas and
7 * Manish Ahuja
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 *
23 * Copyright 2011 IBM Corporation
24 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
25 */
26
27#undef DEBUG
28#define pr_fmt(fmt) "fadump: " fmt
29
30#include <linux/string.h>
31#include <linux/memblock.h>
32#include <linux/delay.h>
33#include <linux/debugfs.h>
34#include <linux/seq_file.h>
35#include <linux/crash_dump.h>
36#include <linux/kobject.h>
37#include <linux/sysfs.h>
38
39#include <asm/page.h>
40#include <asm/prom.h>
41#include <asm/rtas.h>
42#include <asm/fadump.h>
43
44static struct fw_dump fw_dump;
45static struct fadump_mem_struct fdm;
46static const struct fadump_mem_struct *fdm_active;
47
48static DEFINE_MUTEX(fadump_mutex);
49struct fad_crash_memory_ranges crash_memory_ranges[INIT_CRASHMEM_RANGES];
50int crash_mem_ranges;
51
52/* Scan the Firmware Assisted dump configuration details. */
53int __init early_init_dt_scan_fw_dump(unsigned long node,
54 const char *uname, int depth, void *data)
55{
56 __be32 *sections;
57 int i, num_sections;
58 unsigned long size;
59 const int *token;
60
61 if (depth != 1 || strcmp(uname, "rtas") != 0)
62 return 0;
63
64 /*
65 * Check if Firmware Assisted dump is supported. if yes, check
66 * if dump has been initiated on last reboot.
67 */
68 token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
69 if (!token)
70 return 0;
71
72 fw_dump.fadump_supported = 1;
73 fw_dump.ibm_configure_kernel_dump = *token;
74
75 /*
76 * The 'ibm,kernel-dump' rtas node is present only if there is
77 * dump data waiting for us.
78 */
79 fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
80 if (fdm_active)
81 fw_dump.dump_active = 1;
82
83 /* Get the sizes required to store dump data for the firmware provided
84 * dump sections.
85 * For each dump section type supported, a 32bit cell which defines
86 * the ID of a supported section followed by two 32 bit cells which
87 * gives teh size of the section in bytes.
88 */
89 sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
90 &size);
91
92 if (!sections)
93 return 0;
94
95 num_sections = size / (3 * sizeof(u32));
96
97 for (i = 0; i < num_sections; i++, sections += 3) {
98 u32 type = (u32)of_read_number(sections, 1);
99
100 switch (type) {
101 case FADUMP_CPU_STATE_DATA:
102 fw_dump.cpu_state_data_size =
103 of_read_ulong(&sections[1], 2);
104 break;
105 case FADUMP_HPTE_REGION:
106 fw_dump.hpte_region_size =
107 of_read_ulong(&sections[1], 2);
108 break;
109 }
110 }
111 return 1;
112}
113
114int is_fadump_active(void)
115{
116 return fw_dump.dump_active;
117}
118
119/* Print firmware assisted dump configurations for debugging purpose. */
120static void fadump_show_config(void)
121{
122 pr_debug("Support for firmware-assisted dump (fadump): %s\n",
123 (fw_dump.fadump_supported ? "present" : "no support"));
124
125 if (!fw_dump.fadump_supported)
126 return;
127
128 pr_debug("Fadump enabled : %s\n",
129 (fw_dump.fadump_enabled ? "yes" : "no"));
130 pr_debug("Dump Active : %s\n",
131 (fw_dump.dump_active ? "yes" : "no"));
132 pr_debug("Dump section sizes:\n");
133 pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
134 pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size);
135 pr_debug("Boot memory size : %lx\n", fw_dump.boot_memory_size);
136}
137
138static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm,
139 unsigned long addr)
140{
141 if (!fdm)
142 return 0;
143
144 memset(fdm, 0, sizeof(struct fadump_mem_struct));
145 addr = addr & PAGE_MASK;
146
147 fdm->header.dump_format_version = 0x00000001;
148 fdm->header.dump_num_sections = 3;
149 fdm->header.dump_status_flag = 0;
150 fdm->header.offset_first_dump_section =
151 (u32)offsetof(struct fadump_mem_struct, cpu_state_data);
152
153 /*
154 * Fields for disk dump option.
155 * We are not using disk dump option, hence set these fields to 0.
156 */
157 fdm->header.dd_block_size = 0;
158 fdm->header.dd_block_offset = 0;
159 fdm->header.dd_num_blocks = 0;
160 fdm->header.dd_offset_disk_path = 0;
161
162 /* set 0 to disable an automatic dump-reboot. */
163 fdm->header.max_time_auto = 0;
164
165 /* Kernel dump sections */
166 /* cpu state data section. */
167 fdm->cpu_state_data.request_flag = FADUMP_REQUEST_FLAG;
168 fdm->cpu_state_data.source_data_type = FADUMP_CPU_STATE_DATA;
169 fdm->cpu_state_data.source_address = 0;
170 fdm->cpu_state_data.source_len = fw_dump.cpu_state_data_size;
171 fdm->cpu_state_data.destination_address = addr;
172 addr += fw_dump.cpu_state_data_size;
173
174 /* hpte region section */
175 fdm->hpte_region.request_flag = FADUMP_REQUEST_FLAG;
176 fdm->hpte_region.source_data_type = FADUMP_HPTE_REGION;
177 fdm->hpte_region.source_address = 0;
178 fdm->hpte_region.source_len = fw_dump.hpte_region_size;
179 fdm->hpte_region.destination_address = addr;
180 addr += fw_dump.hpte_region_size;
181
182 /* RMA region section */
183 fdm->rmr_region.request_flag = FADUMP_REQUEST_FLAG;
184 fdm->rmr_region.source_data_type = FADUMP_REAL_MODE_REGION;
185 fdm->rmr_region.source_address = RMA_START;
186 fdm->rmr_region.source_len = fw_dump.boot_memory_size;
187 fdm->rmr_region.destination_address = addr;
188 addr += fw_dump.boot_memory_size;
189
190 return addr;
191}
192
193/**
194 * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM
195 *
196 * Function to find the largest memory size we need to reserve during early
197 * boot process. This will be the size of the memory that is required for a
198 * kernel to boot successfully.
199 *
200 * This function has been taken from phyp-assisted dump feature implementation.
201 *
202 * returns larger of 256MB or 5% rounded down to multiples of 256MB.
203 *
204 * TODO: Come up with better approach to find out more accurate memory size
205 * that is required for a kernel to boot successfully.
206 *
207 */
208static inline unsigned long fadump_calculate_reserve_size(void)
209{
210 unsigned long size;
211
212 /*
213 * Check if the size is specified through fadump_reserve_mem= cmdline
214 * option. If yes, then use that.
215 */
216 if (fw_dump.reserve_bootvar)
217 return fw_dump.reserve_bootvar;
218
219 /* divide by 20 to get 5% of value */
220 size = memblock_end_of_DRAM() / 20;
221
222 /* round it down in multiples of 256 */
223 size = size & ~0x0FFFFFFFUL;
224
225 /* Truncate to memory_limit. We don't want to over reserve the memory.*/
226 if (memory_limit && size > memory_limit)
227 size = memory_limit;
228
229 return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM);
230}
231
232/*
233 * Calculate the total memory size required to be reserved for
234 * firmware-assisted dump registration.
235 */
236static unsigned long get_fadump_area_size(void)
237{
238 unsigned long size = 0;
239
240 size += fw_dump.cpu_state_data_size;
241 size += fw_dump.hpte_region_size;
242 size += fw_dump.boot_memory_size;
243 size += sizeof(struct fadump_crash_info_header);
244 size += sizeof(struct elfhdr); /* ELF core header.*/
245 size += sizeof(struct elf_phdr); /* place holder for cpu notes */
246 /* Program headers for crash memory regions. */
247 size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2);
248
249 size = PAGE_ALIGN(size);
250 return size;
251}
252
253int __init fadump_reserve_mem(void)
254{
255 unsigned long base, size, memory_boundary;
256
257 if (!fw_dump.fadump_enabled)
258 return 0;
259
260 if (!fw_dump.fadump_supported) {
261 printk(KERN_INFO "Firmware-assisted dump is not supported on"
262 " this hardware\n");
263 fw_dump.fadump_enabled = 0;
264 return 0;
265 }
266 /*
267 * Initialize boot memory size
268 * If dump is active then we have already calculated the size during
269 * first kernel.
270 */
271 if (fdm_active)
272 fw_dump.boot_memory_size = fdm_active->rmr_region.source_len;
273 else
274 fw_dump.boot_memory_size = fadump_calculate_reserve_size();
275
276 /*
277 * Calculate the memory boundary.
278 * If memory_limit is less than actual memory boundary then reserve
279 * the memory for fadump beyond the memory_limit and adjust the
280 * memory_limit accordingly, so that the running kernel can run with
281 * specified memory_limit.
282 */
283 if (memory_limit && memory_limit < memblock_end_of_DRAM()) {
284 size = get_fadump_area_size();
285 if ((memory_limit + size) < memblock_end_of_DRAM())
286 memory_limit += size;
287 else
288 memory_limit = memblock_end_of_DRAM();
289 printk(KERN_INFO "Adjusted memory_limit for firmware-assisted"
290 " dump, now %#016llx\n",
291 (unsigned long long)memory_limit);
292 }
293 if (memory_limit)
294 memory_boundary = memory_limit;
295 else
296 memory_boundary = memblock_end_of_DRAM();
297
298 if (fw_dump.dump_active) {
299 printk(KERN_INFO "Firmware-assisted dump is active.\n");
300 /*
301 * If last boot has crashed then reserve all the memory
302 * above boot_memory_size so that we don't touch it until
303 * dump is written to disk by userspace tool. This memory
304 * will be released for general use once the dump is saved.
305 */
306 base = fw_dump.boot_memory_size;
307 size = memory_boundary - base;
308 memblock_reserve(base, size);
309 printk(KERN_INFO "Reserved %ldMB of memory at %ldMB "
310 "for saving crash dump\n",
311 (unsigned long)(size >> 20),
312 (unsigned long)(base >> 20));
313
314 fw_dump.fadumphdr_addr =
315 fdm_active->rmr_region.destination_address +
316 fdm_active->rmr_region.source_len;
317 pr_debug("fadumphdr_addr = %p\n",
318 (void *) fw_dump.fadumphdr_addr);
319 } else {
320 /* Reserve the memory at the top of memory. */
321 size = get_fadump_area_size();
322 base = memory_boundary - size;
323 memblock_reserve(base, size);
324 printk(KERN_INFO "Reserved %ldMB of memory at %ldMB "
325 "for firmware-assisted dump\n",
326 (unsigned long)(size >> 20),
327 (unsigned long)(base >> 20));
328 }
329 fw_dump.reserve_dump_area_start = base;
330 fw_dump.reserve_dump_area_size = size;
331 return 1;
332}
333
334/* Look for fadump= cmdline option. */
335static int __init early_fadump_param(char *p)
336{
337 if (!p)
338 return 1;
339
340 if (strncmp(p, "on", 2) == 0)
341 fw_dump.fadump_enabled = 1;
342 else if (strncmp(p, "off", 3) == 0)
343 fw_dump.fadump_enabled = 0;
344
345 return 0;
346}
347early_param("fadump", early_fadump_param);
348
349/* Look for fadump_reserve_mem= cmdline option */
350static int __init early_fadump_reserve_mem(char *p)
351{
352 if (p)
353 fw_dump.reserve_bootvar = memparse(p, &p);
354 return 0;
355}
356early_param("fadump_reserve_mem", early_fadump_reserve_mem);
357
358static void register_fw_dump(struct fadump_mem_struct *fdm)
359{
360 int rc;
361 unsigned int wait_time;
362
363 pr_debug("Registering for firmware-assisted kernel dump...\n");
364
365 /* TODO: Add upper time limit for the delay */
366 do {
367 rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
368 FADUMP_REGISTER, fdm,
369 sizeof(struct fadump_mem_struct));
370
371 wait_time = rtas_busy_delay_time(rc);
372 if (wait_time)
373 mdelay(wait_time);
374
375 } while (wait_time);
376
377 switch (rc) {
378 case -1:
379 printk(KERN_ERR "Failed to register firmware-assisted kernel"
380 " dump. Hardware Error(%d).\n", rc);
381 break;
382 case -3:
383 printk(KERN_ERR "Failed to register firmware-assisted kernel"
384 " dump. Parameter Error(%d).\n", rc);
385 break;
386 case -9:
387 printk(KERN_ERR "firmware-assisted kernel dump is already "
388 " registered.");
389 fw_dump.dump_registered = 1;
390 break;
391 case 0:
392 printk(KERN_INFO "firmware-assisted kernel dump registration"
393 " is successful\n");
394 fw_dump.dump_registered = 1;
395 break;
396 }
397}
398
399void crash_fadump(struct pt_regs *regs, const char *str)
400{
401 struct fadump_crash_info_header *fdh = NULL;
402
403 if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
404 return;
405
406 fdh = __va(fw_dump.fadumphdr_addr);
407 crashing_cpu = smp_processor_id();
408 fdh->crashing_cpu = crashing_cpu;
409 crash_save_vmcoreinfo();
410
411 if (regs)
412 fdh->regs = *regs;
413 else
414 ppc_save_regs(&fdh->regs);
415
416 fdh->cpu_online_mask = *cpu_online_mask;
417
418 /* Call ibm,os-term rtas call to trigger firmware assisted dump */
419 rtas_os_term((char *)str);
420}
421
422#define GPR_MASK 0xffffff0000000000
423static inline int fadump_gpr_index(u64 id)
424{
425 int i = -1;
426 char str[3];
427
428 if ((id & GPR_MASK) == REG_ID("GPR")) {
429 /* get the digits at the end */
430 id &= ~GPR_MASK;
431 id >>= 24;
432 str[2] = '\0';
433 str[1] = id & 0xff;
434 str[0] = (id >> 8) & 0xff;
435 sscanf(str, "%d", &i);
436 if (i > 31)
437 i = -1;
438 }
439 return i;
440}
441
442static inline void fadump_set_regval(struct pt_regs *regs, u64 reg_id,
443 u64 reg_val)
444{
445 int i;
446
447 i = fadump_gpr_index(reg_id);
448 if (i >= 0)
449 regs->gpr[i] = (unsigned long)reg_val;
450 else if (reg_id == REG_ID("NIA"))
451 regs->nip = (unsigned long)reg_val;
452 else if (reg_id == REG_ID("MSR"))
453 regs->msr = (unsigned long)reg_val;
454 else if (reg_id == REG_ID("CTR"))
455 regs->ctr = (unsigned long)reg_val;
456 else if (reg_id == REG_ID("LR"))
457 regs->link = (unsigned long)reg_val;
458 else if (reg_id == REG_ID("XER"))
459 regs->xer = (unsigned long)reg_val;
460 else if (reg_id == REG_ID("CR"))
461 regs->ccr = (unsigned long)reg_val;
462 else if (reg_id == REG_ID("DAR"))
463 regs->dar = (unsigned long)reg_val;
464 else if (reg_id == REG_ID("DSISR"))
465 regs->dsisr = (unsigned long)reg_val;
466}
467
468static struct fadump_reg_entry*
469fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs)
470{
471 memset(regs, 0, sizeof(struct pt_regs));
472
473 while (reg_entry->reg_id != REG_ID("CPUEND")) {
474 fadump_set_regval(regs, reg_entry->reg_id,
475 reg_entry->reg_value);
476 reg_entry++;
477 }
478 reg_entry++;
479 return reg_entry;
480}
481
482static u32 *fadump_append_elf_note(u32 *buf, char *name, unsigned type,
483 void *data, size_t data_len)
484{
485 struct elf_note note;
486
487 note.n_namesz = strlen(name) + 1;
488 note.n_descsz = data_len;
489 note.n_type = type;
490 memcpy(buf, &note, sizeof(note));
491 buf += (sizeof(note) + 3)/4;
492 memcpy(buf, name, note.n_namesz);
493 buf += (note.n_namesz + 3)/4;
494 memcpy(buf, data, note.n_descsz);
495 buf += (note.n_descsz + 3)/4;
496
497 return buf;
498}
499
500static void fadump_final_note(u32 *buf)
501{
502 struct elf_note note;
503
504 note.n_namesz = 0;
505 note.n_descsz = 0;
506 note.n_type = 0;
507 memcpy(buf, &note, sizeof(note));
508}
509
510static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
511{
512 struct elf_prstatus prstatus;
513
514 memset(&prstatus, 0, sizeof(prstatus));
515 /*
516 * FIXME: How do i get PID? Do I really need it?
517 * prstatus.pr_pid = ????
518 */
519 elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
520 buf = fadump_append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
521 &prstatus, sizeof(prstatus));
522 return buf;
523}
524
525static void fadump_update_elfcore_header(char *bufp)
526{
527 struct elfhdr *elf;
528 struct elf_phdr *phdr;
529
530 elf = (struct elfhdr *)bufp;
531 bufp += sizeof(struct elfhdr);
532
533 /* First note is a place holder for cpu notes info. */
534 phdr = (struct elf_phdr *)bufp;
535
536 if (phdr->p_type == PT_NOTE) {
537 phdr->p_paddr = fw_dump.cpu_notes_buf;
538 phdr->p_offset = phdr->p_paddr;
539 phdr->p_filesz = fw_dump.cpu_notes_buf_size;
540 phdr->p_memsz = fw_dump.cpu_notes_buf_size;
541 }
542 return;
543}
544
545static void *fadump_cpu_notes_buf_alloc(unsigned long size)
546{
547 void *vaddr;
548 struct page *page;
549 unsigned long order, count, i;
550
551 order = get_order(size);
552 vaddr = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
553 if (!vaddr)
554 return NULL;
555
556 count = 1 << order;
557 page = virt_to_page(vaddr);
558 for (i = 0; i < count; i++)
559 SetPageReserved(page + i);
560 return vaddr;
561}
562
563static void fadump_cpu_notes_buf_free(unsigned long vaddr, unsigned long size)
564{
565 struct page *page;
566 unsigned long order, count, i;
567
568 order = get_order(size);
569 count = 1 << order;
570 page = virt_to_page(vaddr);
571 for (i = 0; i < count; i++)
572 ClearPageReserved(page + i);
573 __free_pages(page, order);
574}
575
576/*
577 * Read CPU state dump data and convert it into ELF notes.
578 * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
579 * used to access the data to allow for additional fields to be added without
580 * affecting compatibility. Each list of registers for a CPU starts with
581 * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
582 * 8 Byte ASCII identifier and 8 Byte register value. The register entry
583 * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
584 * of register value. For more details refer to PAPR document.
585 *
586 * Only for the crashing cpu we ignore the CPU dump data and get exact
587 * state from fadump crash info structure populated by first kernel at the
588 * time of crash.
589 */
590static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm)
591{
592 struct fadump_reg_save_area_header *reg_header;
593 struct fadump_reg_entry *reg_entry;
594 struct fadump_crash_info_header *fdh = NULL;
595 void *vaddr;
596 unsigned long addr;
597 u32 num_cpus, *note_buf;
598 struct pt_regs regs;
599 int i, rc = 0, cpu = 0;
600
601 if (!fdm->cpu_state_data.bytes_dumped)
602 return -EINVAL;
603
604 addr = fdm->cpu_state_data.destination_address;
605 vaddr = __va(addr);
606
607 reg_header = vaddr;
608 if (reg_header->magic_number != REGSAVE_AREA_MAGIC) {
609 printk(KERN_ERR "Unable to read register save area.\n");
610 return -ENOENT;
611 }
612 pr_debug("--------CPU State Data------------\n");
613 pr_debug("Magic Number: %llx\n", reg_header->magic_number);
614 pr_debug("NumCpuOffset: %x\n", reg_header->num_cpu_offset);
615
616 vaddr += reg_header->num_cpu_offset;
617 num_cpus = *((u32 *)(vaddr));
618 pr_debug("NumCpus : %u\n", num_cpus);
619 vaddr += sizeof(u32);
620 reg_entry = (struct fadump_reg_entry *)vaddr;
621
622 /* Allocate buffer to hold cpu crash notes. */
623 fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
624 fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size);
625 note_buf = fadump_cpu_notes_buf_alloc(fw_dump.cpu_notes_buf_size);
626 if (!note_buf) {
627 printk(KERN_ERR "Failed to allocate 0x%lx bytes for "
628 "cpu notes buffer\n", fw_dump.cpu_notes_buf_size);
629 return -ENOMEM;
630 }
631 fw_dump.cpu_notes_buf = __pa(note_buf);
632
633 pr_debug("Allocated buffer for cpu notes of size %ld at %p\n",
634 (num_cpus * sizeof(note_buf_t)), note_buf);
635
636 if (fw_dump.fadumphdr_addr)
637 fdh = __va(fw_dump.fadumphdr_addr);
638
639 for (i = 0; i < num_cpus; i++) {
640 if (reg_entry->reg_id != REG_ID("CPUSTRT")) {
641 printk(KERN_ERR "Unable to read CPU state data\n");
642 rc = -ENOENT;
643 goto error_out;
644 }
645 /* Lower 4 bytes of reg_value contains logical cpu id */
646 cpu = reg_entry->reg_value & FADUMP_CPU_ID_MASK;
647 if (!cpumask_test_cpu(cpu, &fdh->cpu_online_mask)) {
648 SKIP_TO_NEXT_CPU(reg_entry);
649 continue;
650 }
651 pr_debug("Reading register data for cpu %d...\n", cpu);
652 if (fdh && fdh->crashing_cpu == cpu) {
653 regs = fdh->regs;
654 note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
655 SKIP_TO_NEXT_CPU(reg_entry);
656 } else {
657 reg_entry++;
658 reg_entry = fadump_read_registers(reg_entry, &regs);
659 note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
660 }
661 }
662 fadump_final_note(note_buf);
663
664 pr_debug("Updating elfcore header (%llx) with cpu notes\n",
665 fdh->elfcorehdr_addr);
666 fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr));
667 return 0;
668
669error_out:
670 fadump_cpu_notes_buf_free((unsigned long)__va(fw_dump.cpu_notes_buf),
671 fw_dump.cpu_notes_buf_size);
672 fw_dump.cpu_notes_buf = 0;
673 fw_dump.cpu_notes_buf_size = 0;
674 return rc;
675
676}
677
678/*
679 * Validate and process the dump data stored by firmware before exporting
680 * it through '/proc/vmcore'.
681 */
682static int __init process_fadump(const struct fadump_mem_struct *fdm_active)
683{
684 struct fadump_crash_info_header *fdh;
685 int rc = 0;
686
687 if (!fdm_active || !fw_dump.fadumphdr_addr)
688 return -EINVAL;
689
690 /* Check if the dump data is valid. */
691 if ((fdm_active->header.dump_status_flag == FADUMP_ERROR_FLAG) ||
692 (fdm_active->cpu_state_data.error_flags != 0) ||
693 (fdm_active->rmr_region.error_flags != 0)) {
694 printk(KERN_ERR "Dump taken by platform is not valid\n");
695 return -EINVAL;
696 }
697 if ((fdm_active->rmr_region.bytes_dumped !=
698 fdm_active->rmr_region.source_len) ||
699 !fdm_active->cpu_state_data.bytes_dumped) {
700 printk(KERN_ERR "Dump taken by platform is incomplete\n");
701 return -EINVAL;
702 }
703
704 /* Validate the fadump crash info header */
705 fdh = __va(fw_dump.fadumphdr_addr);
706 if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
707 printk(KERN_ERR "Crash info header is not valid.\n");
708 return -EINVAL;
709 }
710
711 rc = fadump_build_cpu_notes(fdm_active);
712 if (rc)
713 return rc;
714
715 /*
716 * We are done validating dump info and elfcore header is now ready
717 * to be exported. set elfcorehdr_addr so that vmcore module will
718 * export the elfcore header through '/proc/vmcore'.
719 */
720 elfcorehdr_addr = fdh->elfcorehdr_addr;
721
722 return 0;
723}
724
725static inline void fadump_add_crash_memory(unsigned long long base,
726 unsigned long long end)
727{
728 if (base == end)
729 return;
730
731 pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
732 crash_mem_ranges, base, end - 1, (end - base));
733 crash_memory_ranges[crash_mem_ranges].base = base;
734 crash_memory_ranges[crash_mem_ranges].size = end - base;
735 crash_mem_ranges++;
736}
737
738static void fadump_exclude_reserved_area(unsigned long long start,
739 unsigned long long end)
740{
741 unsigned long long ra_start, ra_end;
742
743 ra_start = fw_dump.reserve_dump_area_start;
744 ra_end = ra_start + fw_dump.reserve_dump_area_size;
745
746 if ((ra_start < end) && (ra_end > start)) {
747 if ((start < ra_start) && (end > ra_end)) {
748 fadump_add_crash_memory(start, ra_start);
749 fadump_add_crash_memory(ra_end, end);
750 } else if (start < ra_start) {
751 fadump_add_crash_memory(start, ra_start);
752 } else if (ra_end < end) {
753 fadump_add_crash_memory(ra_end, end);
754 }
755 } else
756 fadump_add_crash_memory(start, end);
757}
758
759static int fadump_init_elfcore_header(char *bufp)
760{
761 struct elfhdr *elf;
762
763 elf = (struct elfhdr *) bufp;
764 bufp += sizeof(struct elfhdr);
765 memcpy(elf->e_ident, ELFMAG, SELFMAG);
766 elf->e_ident[EI_CLASS] = ELF_CLASS;
767 elf->e_ident[EI_DATA] = ELF_DATA;
768 elf->e_ident[EI_VERSION] = EV_CURRENT;
769 elf->e_ident[EI_OSABI] = ELF_OSABI;
770 memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
771 elf->e_type = ET_CORE;
772 elf->e_machine = ELF_ARCH;
773 elf->e_version = EV_CURRENT;
774 elf->e_entry = 0;
775 elf->e_phoff = sizeof(struct elfhdr);
776 elf->e_shoff = 0;
777 elf->e_flags = ELF_CORE_EFLAGS;
778 elf->e_ehsize = sizeof(struct elfhdr);
779 elf->e_phentsize = sizeof(struct elf_phdr);
780 elf->e_phnum = 0;
781 elf->e_shentsize = 0;
782 elf->e_shnum = 0;
783 elf->e_shstrndx = 0;
784
785 return 0;
786}
787
788/*
789 * Traverse through memblock structure and setup crash memory ranges. These
790 * ranges will be used create PT_LOAD program headers in elfcore header.
791 */
792static void fadump_setup_crash_memory_ranges(void)
793{
794 struct memblock_region *reg;
795 unsigned long long start, end;
796
797 pr_debug("Setup crash memory ranges.\n");
798 crash_mem_ranges = 0;
799 /*
800 * add the first memory chunk (RMA_START through boot_memory_size) as
801 * a separate memory chunk. The reason is, at the time crash firmware
802 * will move the content of this memory chunk to different location
803 * specified during fadump registration. We need to create a separate
804 * program header for this chunk with the correct offset.
805 */
806 fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size);
807
808 for_each_memblock(memory, reg) {
809 start = (unsigned long long)reg->base;
810 end = start + (unsigned long long)reg->size;
811 if (start == RMA_START && end >= fw_dump.boot_memory_size)
812 start = fw_dump.boot_memory_size;
813
814 /* add this range excluding the reserved dump area. */
815 fadump_exclude_reserved_area(start, end);
816 }
817}
818
819/*
820 * If the given physical address falls within the boot memory region then
821 * return the relocated address that points to the dump region reserved
822 * for saving initial boot memory contents.
823 */
824static inline unsigned long fadump_relocate(unsigned long paddr)
825{
826 if (paddr > RMA_START && paddr < fw_dump.boot_memory_size)
827 return fdm.rmr_region.destination_address + paddr;
828 else
829 return paddr;
830}
831
832static int fadump_create_elfcore_headers(char *bufp)
833{
834 struct elfhdr *elf;
835 struct elf_phdr *phdr;
836 int i;
837
838 fadump_init_elfcore_header(bufp);
839 elf = (struct elfhdr *)bufp;
840 bufp += sizeof(struct elfhdr);
841
842 /*
843 * setup ELF PT_NOTE, place holder for cpu notes info. The notes info
844 * will be populated during second kernel boot after crash. Hence
845 * this PT_NOTE will always be the first elf note.
846 *
847 * NOTE: Any new ELF note addition should be placed after this note.
848 */
849 phdr = (struct elf_phdr *)bufp;
850 bufp += sizeof(struct elf_phdr);
851 phdr->p_type = PT_NOTE;
852 phdr->p_flags = 0;
853 phdr->p_vaddr = 0;
854 phdr->p_align = 0;
855
856 phdr->p_offset = 0;
857 phdr->p_paddr = 0;
858 phdr->p_filesz = 0;
859 phdr->p_memsz = 0;
860
861 (elf->e_phnum)++;
862
863 /* setup ELF PT_NOTE for vmcoreinfo */
864 phdr = (struct elf_phdr *)bufp;
865 bufp += sizeof(struct elf_phdr);
866 phdr->p_type = PT_NOTE;
867 phdr->p_flags = 0;
868 phdr->p_vaddr = 0;
869 phdr->p_align = 0;
870
871 phdr->p_paddr = fadump_relocate(paddr_vmcoreinfo_note());
872 phdr->p_offset = phdr->p_paddr;
873 phdr->p_memsz = vmcoreinfo_max_size;
874 phdr->p_filesz = vmcoreinfo_max_size;
875
876 /* Increment number of program headers. */
877 (elf->e_phnum)++;
878
879 /* setup PT_LOAD sections. */
880
881 for (i = 0; i < crash_mem_ranges; i++) {
882 unsigned long long mbase, msize;
883 mbase = crash_memory_ranges[i].base;
884 msize = crash_memory_ranges[i].size;
885
886 if (!msize)
887 continue;
888
889 phdr = (struct elf_phdr *)bufp;
890 bufp += sizeof(struct elf_phdr);
891 phdr->p_type = PT_LOAD;
892 phdr->p_flags = PF_R|PF_W|PF_X;
893 phdr->p_offset = mbase;
894
895 if (mbase == RMA_START) {
896 /*
897 * The entire RMA region will be moved by firmware
898 * to the specified destination_address. Hence set
899 * the correct offset.
900 */
901 phdr->p_offset = fdm.rmr_region.destination_address;
902 }
903
904 phdr->p_paddr = mbase;
905 phdr->p_vaddr = (unsigned long)__va(mbase);
906 phdr->p_filesz = msize;
907 phdr->p_memsz = msize;
908 phdr->p_align = 0;
909
910 /* Increment number of program headers. */
911 (elf->e_phnum)++;
912 }
913 return 0;
914}
915
916static unsigned long init_fadump_header(unsigned long addr)
917{
918 struct fadump_crash_info_header *fdh;
919
920 if (!addr)
921 return 0;
922
923 fw_dump.fadumphdr_addr = addr;
924 fdh = __va(addr);
925 addr += sizeof(struct fadump_crash_info_header);
926
927 memset(fdh, 0, sizeof(struct fadump_crash_info_header));
928 fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
929 fdh->elfcorehdr_addr = addr;
930 /* We will set the crashing cpu id in crash_fadump() during crash. */
931 fdh->crashing_cpu = CPU_UNKNOWN;
932
933 return addr;
934}
935
936static void register_fadump(void)
937{
938 unsigned long addr;
939 void *vaddr;
940
941 /*
942 * If no memory is reserved then we can not register for firmware-
943 * assisted dump.
944 */
945 if (!fw_dump.reserve_dump_area_size)
946 return;
947
948 fadump_setup_crash_memory_ranges();
949
950 addr = fdm.rmr_region.destination_address + fdm.rmr_region.source_len;
951 /* Initialize fadump crash info header. */
952 addr = init_fadump_header(addr);
953 vaddr = __va(addr);
954
955 pr_debug("Creating ELF core headers at %#016lx\n", addr);
956 fadump_create_elfcore_headers(vaddr);
957
958 /* register the future kernel dump with firmware. */
959 register_fw_dump(&fdm);
960}
961
962static int fadump_unregister_dump(struct fadump_mem_struct *fdm)
963{
964 int rc = 0;
965 unsigned int wait_time;
966
967 pr_debug("Un-register firmware-assisted dump\n");
968
969 /* TODO: Add upper time limit for the delay */
970 do {
971 rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
972 FADUMP_UNREGISTER, fdm,
973 sizeof(struct fadump_mem_struct));
974
975 wait_time = rtas_busy_delay_time(rc);
976 if (wait_time)
977 mdelay(wait_time);
978 } while (wait_time);
979
980 if (rc) {
981 printk(KERN_ERR "Failed to un-register firmware-assisted dump."
982 " unexpected error(%d).\n", rc);
983 return rc;
984 }
985 fw_dump.dump_registered = 0;
986 return 0;
987}
988
989static int fadump_invalidate_dump(struct fadump_mem_struct *fdm)
990{
991 int rc = 0;
992 unsigned int wait_time;
993
994 pr_debug("Invalidating firmware-assisted dump registration\n");
995
996 /* TODO: Add upper time limit for the delay */
997 do {
998 rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
999 FADUMP_INVALIDATE, fdm,
1000 sizeof(struct fadump_mem_struct));
1001
1002 wait_time = rtas_busy_delay_time(rc);
1003 if (wait_time)
1004 mdelay(wait_time);
1005 } while (wait_time);
1006
1007 if (rc) {
1008 printk(KERN_ERR "Failed to invalidate firmware-assisted dump "
1009 "rgistration. unexpected error(%d).\n", rc);
1010 return rc;
1011 }
1012 fw_dump.dump_active = 0;
1013 fdm_active = NULL;
1014 return 0;
1015}
1016
1017void fadump_cleanup(void)
1018{
1019 /* Invalidate the registration only if dump is active. */
1020 if (fw_dump.dump_active) {
1021 init_fadump_mem_struct(&fdm,
1022 fdm_active->cpu_state_data.destination_address);
1023 fadump_invalidate_dump(&fdm);
1024 }
1025}
1026
1027/*
1028 * Release the memory that was reserved in early boot to preserve the memory
1029 * contents. The released memory will be available for general use.
1030 */
1031static void fadump_release_memory(unsigned long begin, unsigned long end)
1032{
1033 unsigned long addr;
1034 unsigned long ra_start, ra_end;
1035
1036 ra_start = fw_dump.reserve_dump_area_start;
1037 ra_end = ra_start + fw_dump.reserve_dump_area_size;
1038
1039 for (addr = begin; addr < end; addr += PAGE_SIZE) {
1040 /*
1041 * exclude the dump reserve area. Will reuse it for next
1042 * fadump registration.
1043 */
1044 if (addr <= ra_end && ((addr + PAGE_SIZE) > ra_start))
1045 continue;
1046
1047 ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT));
1048 init_page_count(pfn_to_page(addr >> PAGE_SHIFT));
1049 free_page((unsigned long)__va(addr));
1050 totalram_pages++;
1051 }
1052}
1053
1054static void fadump_invalidate_release_mem(void)
1055{
1056 unsigned long reserved_area_start, reserved_area_end;
1057 unsigned long destination_address;
1058
1059 mutex_lock(&fadump_mutex);
1060 if (!fw_dump.dump_active) {
1061 mutex_unlock(&fadump_mutex);
1062 return;
1063 }
1064
1065 destination_address = fdm_active->cpu_state_data.destination_address;
1066 fadump_cleanup();
1067 mutex_unlock(&fadump_mutex);
1068
1069 /*
1070 * Save the current reserved memory bounds we will require them
1071 * later for releasing the memory for general use.
1072 */
1073 reserved_area_start = fw_dump.reserve_dump_area_start;
1074 reserved_area_end = reserved_area_start +
1075 fw_dump.reserve_dump_area_size;
1076 /*
1077 * Setup reserve_dump_area_start and its size so that we can
1078 * reuse this reserved memory for Re-registration.
1079 */
1080 fw_dump.reserve_dump_area_start = destination_address;
1081 fw_dump.reserve_dump_area_size = get_fadump_area_size();
1082
1083 fadump_release_memory(reserved_area_start, reserved_area_end);
1084 if (fw_dump.cpu_notes_buf) {
1085 fadump_cpu_notes_buf_free(
1086 (unsigned long)__va(fw_dump.cpu_notes_buf),
1087 fw_dump.cpu_notes_buf_size);
1088 fw_dump.cpu_notes_buf = 0;
1089 fw_dump.cpu_notes_buf_size = 0;
1090 }
1091 /* Initialize the kernel dump memory structure for FAD registration. */
1092 init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
1093}
1094
1095static ssize_t fadump_release_memory_store(struct kobject *kobj,
1096 struct kobj_attribute *attr,
1097 const char *buf, size_t count)
1098{
1099 if (!fw_dump.dump_active)
1100 return -EPERM;
1101
1102 if (buf[0] == '1') {
1103 /*
1104 * Take away the '/proc/vmcore'. We are releasing the dump
1105 * memory, hence it will not be valid anymore.
1106 */
1107 vmcore_cleanup();
1108 fadump_invalidate_release_mem();
1109
1110 } else
1111 return -EINVAL;
1112 return count;
1113}
1114
1115static ssize_t fadump_enabled_show(struct kobject *kobj,
1116 struct kobj_attribute *attr,
1117 char *buf)
1118{
1119 return sprintf(buf, "%d\n", fw_dump.fadump_enabled);
1120}
1121
1122static ssize_t fadump_register_show(struct kobject *kobj,
1123 struct kobj_attribute *attr,
1124 char *buf)
1125{
1126 return sprintf(buf, "%d\n", fw_dump.dump_registered);
1127}
1128
1129static ssize_t fadump_register_store(struct kobject *kobj,
1130 struct kobj_attribute *attr,
1131 const char *buf, size_t count)
1132{
1133 int ret = 0;
1134
1135 if (!fw_dump.fadump_enabled || fdm_active)
1136 return -EPERM;
1137
1138 mutex_lock(&fadump_mutex);
1139
1140 switch (buf[0]) {
1141 case '0':
1142 if (fw_dump.dump_registered == 0) {
1143 ret = -EINVAL;
1144 goto unlock_out;
1145 }
1146 /* Un-register Firmware-assisted dump */
1147 fadump_unregister_dump(&fdm);
1148 break;
1149 case '1':
1150 if (fw_dump.dump_registered == 1) {
1151 ret = -EINVAL;
1152 goto unlock_out;
1153 }
1154 /* Register Firmware-assisted dump */
1155 register_fadump();
1156 break;
1157 default:
1158 ret = -EINVAL;
1159 break;
1160 }
1161
1162unlock_out:
1163 mutex_unlock(&fadump_mutex);
1164 return ret < 0 ? ret : count;
1165}
1166
1167static int fadump_region_show(struct seq_file *m, void *private)
1168{
1169 const struct fadump_mem_struct *fdm_ptr;
1170
1171 if (!fw_dump.fadump_enabled)
1172 return 0;
1173
1174 mutex_lock(&fadump_mutex);
1175 if (fdm_active)
1176 fdm_ptr = fdm_active;
1177 else {
1178 mutex_unlock(&fadump_mutex);
1179 fdm_ptr = &fdm;
1180 }
1181
1182 seq_printf(m,
1183 "CPU : [%#016llx-%#016llx] %#llx bytes, "
1184 "Dumped: %#llx\n",
1185 fdm_ptr->cpu_state_data.destination_address,
1186 fdm_ptr->cpu_state_data.destination_address +
1187 fdm_ptr->cpu_state_data.source_len - 1,
1188 fdm_ptr->cpu_state_data.source_len,
1189 fdm_ptr->cpu_state_data.bytes_dumped);
1190 seq_printf(m,
1191 "HPTE: [%#016llx-%#016llx] %#llx bytes, "
1192 "Dumped: %#llx\n",
1193 fdm_ptr->hpte_region.destination_address,
1194 fdm_ptr->hpte_region.destination_address +
1195 fdm_ptr->hpte_region.source_len - 1,
1196 fdm_ptr->hpte_region.source_len,
1197 fdm_ptr->hpte_region.bytes_dumped);
1198 seq_printf(m,
1199 "DUMP: [%#016llx-%#016llx] %#llx bytes, "
1200 "Dumped: %#llx\n",
1201 fdm_ptr->rmr_region.destination_address,
1202 fdm_ptr->rmr_region.destination_address +
1203 fdm_ptr->rmr_region.source_len - 1,
1204 fdm_ptr->rmr_region.source_len,
1205 fdm_ptr->rmr_region.bytes_dumped);
1206
1207 if (!fdm_active ||
1208 (fw_dump.reserve_dump_area_start ==
1209 fdm_ptr->cpu_state_data.destination_address))
1210 goto out;
1211
1212 /* Dump is active. Show reserved memory region. */
1213 seq_printf(m,
1214 " : [%#016llx-%#016llx] %#llx bytes, "
1215 "Dumped: %#llx\n",
1216 (unsigned long long)fw_dump.reserve_dump_area_start,
1217 fdm_ptr->cpu_state_data.destination_address - 1,
1218 fdm_ptr->cpu_state_data.destination_address -
1219 fw_dump.reserve_dump_area_start,
1220 fdm_ptr->cpu_state_data.destination_address -
1221 fw_dump.reserve_dump_area_start);
1222out:
1223 if (fdm_active)
1224 mutex_unlock(&fadump_mutex);
1225 return 0;
1226}
1227
1228static struct kobj_attribute fadump_release_attr = __ATTR(fadump_release_mem,
1229 0200, NULL,
1230 fadump_release_memory_store);
1231static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled,
1232 0444, fadump_enabled_show,
1233 NULL);
1234static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered,
1235 0644, fadump_register_show,
1236 fadump_register_store);
1237
1238static int fadump_region_open(struct inode *inode, struct file *file)
1239{
1240 return single_open(file, fadump_region_show, inode->i_private);
1241}
1242
1243static const struct file_operations fadump_region_fops = {
1244 .open = fadump_region_open,
1245 .read = seq_read,
1246 .llseek = seq_lseek,
1247 .release = single_release,
1248};
1249
1250static void fadump_init_files(void)
1251{
1252 struct dentry *debugfs_file;
1253 int rc = 0;
1254
1255 rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr);
1256 if (rc)
1257 printk(KERN_ERR "fadump: unable to create sysfs file"
1258 " fadump_enabled (%d)\n", rc);
1259
1260 rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr);
1261 if (rc)
1262 printk(KERN_ERR "fadump: unable to create sysfs file"
1263 " fadump_registered (%d)\n", rc);
1264
1265 debugfs_file = debugfs_create_file("fadump_region", 0444,
1266 powerpc_debugfs_root, NULL,
1267 &fadump_region_fops);
1268 if (!debugfs_file)
1269 printk(KERN_ERR "fadump: unable to create debugfs file"
1270 " fadump_region\n");
1271
1272 if (fw_dump.dump_active) {
1273 rc = sysfs_create_file(kernel_kobj, &fadump_release_attr.attr);
1274 if (rc)
1275 printk(KERN_ERR "fadump: unable to create sysfs file"
1276 " fadump_release_mem (%d)\n", rc);
1277 }
1278 return;
1279}
1280
1281/*
1282 * Prepare for firmware-assisted dump.
1283 */
1284int __init setup_fadump(void)
1285{
1286 if (!fw_dump.fadump_enabled)
1287 return 0;
1288
1289 if (!fw_dump.fadump_supported) {
1290 printk(KERN_ERR "Firmware-assisted dump is not supported on"
1291 " this hardware\n");
1292 return 0;
1293 }
1294
1295 fadump_show_config();
1296 /*
1297 * If dump data is available then see if it is valid and prepare for
1298 * saving it to the disk.
1299 */
1300 if (fw_dump.dump_active) {
1301 /*
1302 * if dump process fails then invalidate the registration
1303 * and release memory before proceeding for re-registration.
1304 */
1305 if (process_fadump(fdm_active) < 0)
1306 fadump_invalidate_release_mem();
1307 }
1308 /* Initialize the kernel dump memory structure for FAD registration. */
1309 else if (fw_dump.reserve_dump_area_size)
1310 init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
1311 fadump_init_files();
1312
1313 return 1;
1314}
1315subsys_initcall(setup_fadump);
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 0654dba2c1f1..dc0488b6f6e1 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -395,7 +395,7 @@ DataAccess:
395 bl hash_page 395 bl hash_page
3961: lwz r5,_DSISR(r11) /* get DSISR value */ 3961: lwz r5,_DSISR(r11) /* get DSISR value */
397 mfspr r4,SPRN_DAR 397 mfspr r4,SPRN_DAR
398 EXC_XFER_EE_LITE(0x300, handle_page_fault) 398 EXC_XFER_LITE(0x300, handle_page_fault)
399 399
400 400
401/* Instruction access exception. */ 401/* Instruction access exception. */
@@ -410,7 +410,7 @@ InstructionAccess:
410 bl hash_page 410 bl hash_page
4111: mr r4,r12 4111: mr r4,r12
412 mr r5,r9 412 mr r5,r9
413 EXC_XFER_EE_LITE(0x400, handle_page_fault) 413 EXC_XFER_LITE(0x400, handle_page_fault)
414 414
415/* External interrupt */ 415/* External interrupt */
416 EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) 416 EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 872a6af83bad..4989661b710b 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -394,7 +394,7 @@ label:
394 NORMAL_EXCEPTION_PROLOG 394 NORMAL_EXCEPTION_PROLOG
395 mr r4,r12 /* Pass SRR0 as arg2 */ 395 mr r4,r12 /* Pass SRR0 as arg2 */
396 li r5,0 /* Pass zero as arg3 */ 396 li r5,0 /* Pass zero as arg3 */
397 EXC_XFER_EE_LITE(0x400, handle_page_fault) 397 EXC_XFER_LITE(0x400, handle_page_fault)
398 398
399/* 0x0500 - External Interrupt Exception */ 399/* 0x0500 - External Interrupt Exception */
400 EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) 400 EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
@@ -747,7 +747,7 @@ DataAccess:
747 mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ 747 mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
748 stw r5,_ESR(r11) 748 stw r5,_ESR(r11)
749 mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ 749 mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
750 EXC_XFER_EE_LITE(0x300, handle_page_fault) 750 EXC_XFER_LITE(0x300, handle_page_fault)
751 751
752/* Other PowerPC processors, namely those derived from the 6xx-series 752/* Other PowerPC processors, namely those derived from the 6xx-series
753 * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved. 753 * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved.
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 06c7251c1bf7..58bddee8e1e8 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -32,13 +32,13 @@
32#include <asm/cputable.h> 32#include <asm/cputable.h>
33#include <asm/setup.h> 33#include <asm/setup.h>
34#include <asm/hvcall.h> 34#include <asm/hvcall.h>
35#include <asm/iseries/lpar_map.h>
36#include <asm/thread_info.h> 35#include <asm/thread_info.h>
37#include <asm/firmware.h> 36#include <asm/firmware.h>
38#include <asm/page_64.h> 37#include <asm/page_64.h>
39#include <asm/irqflags.h> 38#include <asm/irqflags.h>
40#include <asm/kvm_book3s_asm.h> 39#include <asm/kvm_book3s_asm.h>
41#include <asm/ptrace.h> 40#include <asm/ptrace.h>
41#include <asm/hw_irq.h>
42 42
43/* The physical memory is laid out such that the secondary processor 43/* The physical memory is laid out such that the secondary processor
44 * spin code sits at 0x0000...0x00ff. On server, the vectors follow 44 * spin code sits at 0x0000...0x00ff. On server, the vectors follow
@@ -57,10 +57,6 @@
57 * entry in r9 for debugging purposes 57 * entry in r9 for debugging purposes
58 * 2. Secondary processors enter at 0x60 with PIR in gpr3 58 * 2. Secondary processors enter at 0x60 with PIR in gpr3
59 * 59 *
60 * For iSeries:
61 * 1. The MMU is on (as it always is for iSeries)
62 * 2. The kernel is entered at system_reset_iSeries
63 *
64 * For Book3E processors: 60 * For Book3E processors:
65 * 1. The MMU is on running in AS0 in a state defined in ePAPR 61 * 1. The MMU is on running in AS0 in a state defined in ePAPR
66 * 2. The kernel is entered at __start 62 * 2. The kernel is entered at __start
@@ -93,15 +89,6 @@ __secondary_hold_spinloop:
93__secondary_hold_acknowledge: 89__secondary_hold_acknowledge:
94 .llong 0x0 90 .llong 0x0
95 91
96#ifdef CONFIG_PPC_ISERIES
97 /*
98 * At offset 0x20, there is a pointer to iSeries LPAR data.
99 * This is required by the hypervisor
100 */
101 . = 0x20
102 .llong hvReleaseData-KERNELBASE
103#endif /* CONFIG_PPC_ISERIES */
104
105#ifdef CONFIG_RELOCATABLE 92#ifdef CONFIG_RELOCATABLE
106 /* This flag is set to 1 by a loader if the kernel should run 93 /* This flag is set to 1 by a loader if the kernel should run
107 * at the loaded address instead of the linked address. This 94 * at the loaded address instead of the linked address. This
@@ -564,7 +551,8 @@ _GLOBAL(pmac_secondary_start)
564 */ 551 */
565 li r0,0 552 li r0,0
566 stb r0,PACASOFTIRQEN(r13) 553 stb r0,PACASOFTIRQEN(r13)
567 stb r0,PACAHARDIRQEN(r13) 554 li r0,PACA_IRQ_HARD_DIS
555 stb r0,PACAIRQHAPPENED(r13)
568 556
569 /* Create a temp kernel stack for use before relocation is on. */ 557 /* Create a temp kernel stack for use before relocation is on. */
570 ld r1,PACAEMERGSP(r13) 558 ld r1,PACAEMERGSP(r13)
@@ -582,7 +570,7 @@ _GLOBAL(pmac_secondary_start)
582 * 1. Processor number 570 * 1. Processor number
583 * 2. Segment table pointer (virtual address) 571 * 2. Segment table pointer (virtual address)
584 * On entry the following are set: 572 * On entry the following are set:
585 * r1 = stack pointer. vaddr for iSeries, raddr (temp stack) for pSeries 573 * r1 = stack pointer (real addr of temp stack)
586 * r24 = cpu# (in Linux terms) 574 * r24 = cpu# (in Linux terms)
587 * r13 = paca virtual address 575 * r13 = paca virtual address
588 * SPRG_PACA = paca virtual address 576 * SPRG_PACA = paca virtual address
@@ -595,7 +583,7 @@ __secondary_start:
595 /* Set thread priority to MEDIUM */ 583 /* Set thread priority to MEDIUM */
596 HMT_MEDIUM 584 HMT_MEDIUM
597 585
598 /* Initialize the kernel stack. Just a repeat for iSeries. */ 586 /* Initialize the kernel stack */
599 LOAD_REG_ADDR(r3, current_set) 587 LOAD_REG_ADDR(r3, current_set)
600 sldi r28,r24,3 /* get current_set[cpu#] */ 588 sldi r28,r24,3 /* get current_set[cpu#] */
601 ldx r14,r3,r28 589 ldx r14,r3,r28
@@ -615,20 +603,16 @@ __secondary_start:
615 li r7,0 603 li r7,0
616 mtlr r7 604 mtlr r7
617 605
606 /* Mark interrupts soft and hard disabled (they might be enabled
607 * in the PACA when doing hotplug)
608 */
609 stb r7,PACASOFTIRQEN(r13)
610 li r0,PACA_IRQ_HARD_DIS
611 stb r0,PACAIRQHAPPENED(r13)
612
618 /* enable MMU and jump to start_secondary */ 613 /* enable MMU and jump to start_secondary */
619 LOAD_REG_ADDR(r3, .start_secondary_prolog) 614 LOAD_REG_ADDR(r3, .start_secondary_prolog)
620 LOAD_REG_IMMEDIATE(r4, MSR_KERNEL) 615 LOAD_REG_IMMEDIATE(r4, MSR_KERNEL)
621#ifdef CONFIG_PPC_ISERIES
622BEGIN_FW_FTR_SECTION
623 ori r4,r4,MSR_EE
624 li r8,1
625 stb r8,PACAHARDIRQEN(r13)
626END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
627#endif
628BEGIN_FW_FTR_SECTION
629 stb r7,PACAHARDIRQEN(r13)
630END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES)
631 stb r7,PACASOFTIRQEN(r13)
632 616
633 mtspr SPRN_SRR0,r3 617 mtspr SPRN_SRR0,r3
634 mtspr SPRN_SRR1,r4 618 mtspr SPRN_SRR1,r4
@@ -771,22 +755,18 @@ _INIT_GLOBAL(start_here_common)
771 /* Load the TOC (virtual address) */ 755 /* Load the TOC (virtual address) */
772 ld r2,PACATOC(r13) 756 ld r2,PACATOC(r13)
773 757
758 /* Do more system initializations in virtual mode */
774 bl .setup_system 759 bl .setup_system
775 760
776 /* Load up the kernel context */ 761 /* Mark interrupts soft and hard disabled (they might be enabled
7775: 762 * in the PACA when doing hotplug)
778 li r5,0 763 */
779 stb r5,PACASOFTIRQEN(r13) /* Soft Disabled */ 764 li r0,0
780#ifdef CONFIG_PPC_ISERIES 765 stb r0,PACASOFTIRQEN(r13)
781BEGIN_FW_FTR_SECTION 766 li r0,PACA_IRQ_HARD_DIS
782 mfmsr r5 767 stb r0,PACAIRQHAPPENED(r13)
783 ori r5,r5,MSR_EE /* Hard Enabled on iSeries*/
784 mtmsrd r5
785 li r5,1
786END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
787#endif
788 stb r5,PACAHARDIRQEN(r13) /* Hard Disabled on others */
789 768
769 /* Generic kernel entry */
790 bl .start_kernel 770 bl .start_kernel
791 771
792 /* Not reached */ 772 /* Not reached */
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index b68cb173ba2c..b2a5860accfb 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -220,7 +220,7 @@ DataAccess:
220 mfspr r4,SPRN_DAR 220 mfspr r4,SPRN_DAR
221 li r10,0x00f0 221 li r10,0x00f0
222 mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ 222 mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */
223 EXC_XFER_EE_LITE(0x300, handle_page_fault) 223 EXC_XFER_LITE(0x300, handle_page_fault)
224 224
225/* Instruction access exception. 225/* Instruction access exception.
226 * This is "never generated" by the MPC8xx. We jump to it for other 226 * This is "never generated" by the MPC8xx. We jump to it for other
@@ -231,7 +231,7 @@ InstructionAccess:
231 EXCEPTION_PROLOG 231 EXCEPTION_PROLOG
232 mr r4,r12 232 mr r4,r12
233 mr r5,r9 233 mr r5,r9
234 EXC_XFER_EE_LITE(0x400, handle_page_fault) 234 EXC_XFER_LITE(0x400, handle_page_fault)
235 235
236/* External interrupt */ 236/* External interrupt */
237 EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) 237 EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index fc921bf62e15..0e4175388f47 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -359,7 +359,7 @@ label:
359 mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ 359 mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \
360 stw r5,_ESR(r11); \ 360 stw r5,_ESR(r11); \
361 mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \ 361 mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \
362 EXC_XFER_EE_LITE(0x0300, handle_page_fault) 362 EXC_XFER_LITE(0x0300, handle_page_fault)
363 363
364#define INSTRUCTION_STORAGE_EXCEPTION \ 364#define INSTRUCTION_STORAGE_EXCEPTION \
365 START_EXCEPTION(InstructionStorage) \ 365 START_EXCEPTION(InstructionStorage) \
@@ -368,7 +368,7 @@ label:
368 stw r5,_ESR(r11); \ 368 stw r5,_ESR(r11); \
369 mr r4,r12; /* Pass SRR0 as arg2 */ \ 369 mr r4,r12; /* Pass SRR0 as arg2 */ \
370 li r5,0; /* Pass zero as arg3 */ \ 370 li r5,0; /* Pass zero as arg3 */ \
371 EXC_XFER_EE_LITE(0x0400, handle_page_fault) 371 EXC_XFER_LITE(0x0400, handle_page_fault)
372 372
373#define ALIGNMENT_EXCEPTION \ 373#define ALIGNMENT_EXCEPTION \
374 START_EXCEPTION(Alignment) \ 374 START_EXCEPTION(Alignment) \
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index d5d78c4ceef6..28e62598d0e8 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -319,7 +319,7 @@ interrupt_base:
319 mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ 319 mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
320 andis. r10,r5,(ESR_ILK|ESR_DLK)@h 320 andis. r10,r5,(ESR_ILK|ESR_DLK)@h
321 bne 1f 321 bne 1f
322 EXC_XFER_EE_LITE(0x0300, handle_page_fault) 322 EXC_XFER_LITE(0x0300, handle_page_fault)
3231: 3231:
324 addi r3,r1,STACK_FRAME_OVERHEAD 324 addi r3,r1,STACK_FRAME_OVERHEAD
325 EXC_XFER_EE_LITE(0x0300, CacheLockingException) 325 EXC_XFER_EE_LITE(0x0300, CacheLockingException)
diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
index d39ae606ff8d..79bb282e6501 100644
--- a/arch/powerpc/kernel/ibmebus.c
+++ b/arch/powerpc/kernel/ibmebus.c
@@ -713,7 +713,7 @@ static struct dev_pm_ops ibmebus_bus_dev_pm_ops = {
713 713
714struct bus_type ibmebus_bus_type = { 714struct bus_type ibmebus_bus_type = {
715 .name = "ibmebus", 715 .name = "ibmebus",
716 .uevent = of_device_uevent, 716 .uevent = of_device_uevent_modalias,
717 .bus_attrs = ibmebus_bus_attrs, 717 .bus_attrs = ibmebus_bus_attrs,
718 .match = ibmebus_bus_bus_match, 718 .match = ibmebus_bus_bus_match,
719 .probe = ibmebus_bus_device_probe, 719 .probe = ibmebus_bus_device_probe,
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index c97fc60c790c..e8e821146f38 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -84,7 +84,11 @@ void cpu_idle(void)
84 84
85 start_critical_timings(); 85 start_critical_timings();
86 86
87 local_irq_enable(); 87 /* Some power_save functions return with
88 * interrupts enabled, some don't.
89 */
90 if (irqs_disabled())
91 local_irq_enable();
88 set_thread_flag(TIF_POLLING_NRFLAG); 92 set_thread_flag(TIF_POLLING_NRFLAG);
89 93
90 } else { 94 } else {
diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S
index 16c002d6bdf1..ff007b59448d 100644
--- a/arch/powerpc/kernel/idle_book3e.S
+++ b/arch/powerpc/kernel/idle_book3e.S
@@ -29,43 +29,30 @@ _GLOBAL(book3e_idle)
29 wrteei 0 29 wrteei 0
30 30
31 /* Now check if an interrupt came in while we were soft disabled 31 /* Now check if an interrupt came in while we were soft disabled
32 * since we may otherwise lose it (doorbells etc...). We know 32 * since we may otherwise lose it (doorbells etc...).
33 * that since PACAHARDIRQEN will have been cleared in that case.
34 */ 33 */
35 lbz r3,PACAHARDIRQEN(r13) 34 lbz r3,PACAIRQHAPPENED(r13)
36 cmpwi cr0,r3,0 35 cmpwi cr0,r3,0
37 beqlr 36 bnelr
38 37
39 /* Now we are going to mark ourselves as soft and hard enables in 38 /* Now we are going to mark ourselves as soft and hard enabled in
40 * order to be able to take interrupts while asleep. We inform lockdep 39 * order to be able to take interrupts while asleep. We inform lockdep
41 * of that. We don't actually turn interrupts on just yet tho. 40 * of that. We don't actually turn interrupts on just yet tho.
42 */ 41 */
43#ifdef CONFIG_TRACE_IRQFLAGS 42#ifdef CONFIG_TRACE_IRQFLAGS
44 stdu r1,-128(r1) 43 stdu r1,-128(r1)
45 bl .trace_hardirqs_on 44 bl .trace_hardirqs_on
45 addi r1,r1,128
46#endif 46#endif
47 li r0,1 47 li r0,1
48 stb r0,PACASOFTIRQEN(r13) 48 stb r0,PACASOFTIRQEN(r13)
49 stb r0,PACAHARDIRQEN(r13)
50 49
51 /* Interrupts will make use return to LR, so get something we want 50 /* Interrupts will make use return to LR, so get something we want
52 * in there 51 * in there
53 */ 52 */
54 bl 1f 53 bl 1f
55 54
56 /* Hard disable interrupts again */ 55 /* And return (interrupts are on) */
57 wrteei 0
58
59 /* Mark them off again in the PACA as well */
60 li r0,0
61 stb r0,PACASOFTIRQEN(r13)
62 stb r0,PACAHARDIRQEN(r13)
63
64 /* Tell lockdep about it */
65#ifdef CONFIG_TRACE_IRQFLAGS
66 bl .trace_hardirqs_off
67 addi r1,r1,128
68#endif
69 ld r0,16(r1) 56 ld r0,16(r1)
70 mtlr r0 57 mtlr r0
71 blr 58 blr
diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S
index ba3195478600..2c71b0fc9f91 100644
--- a/arch/powerpc/kernel/idle_power4.S
+++ b/arch/powerpc/kernel/idle_power4.S
@@ -14,6 +14,7 @@
14#include <asm/thread_info.h> 14#include <asm/thread_info.h>
15#include <asm/ppc_asm.h> 15#include <asm/ppc_asm.h>
16#include <asm/asm-offsets.h> 16#include <asm/asm-offsets.h>
17#include <asm/irqflags.h>
17 18
18#undef DEBUG 19#undef DEBUG
19 20
@@ -29,14 +30,31 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP)
29 cmpwi 0,r4,0 30 cmpwi 0,r4,0
30 beqlr 31 beqlr
31 32
32 /* Go to NAP now */ 33 /* Hard disable interrupts */
33 mfmsr r7 34 mfmsr r7
34 rldicl r0,r7,48,1 35 rldicl r0,r7,48,1
35 rotldi r0,r0,16 36 rotldi r0,r0,16
36 mtmsrd r0,1 /* hard-disable interrupts */ 37 mtmsrd r0,1
38
39 /* Check if something happened while soft-disabled */
40 lbz r0,PACAIRQHAPPENED(r13)
41 cmpwi cr0,r0,0
42 bnelr
43
44 /* Soft-enable interrupts */
45#ifdef CONFIG_TRACE_IRQFLAGS
46 mflr r0
47 std r0,16(r1)
48 stdu r1,-128(r1)
49 bl .trace_hardirqs_on
50 addi r1,r1,128
51 ld r0,16(r1)
52 mtlr r0
53 mfmsr r7
54#endif /* CONFIG_TRACE_IRQFLAGS */
55
37 li r0,1 56 li r0,1
38 stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */ 57 stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */
39 stb r0,PACAHARDIRQEN(r13)
40BEGIN_FTR_SECTION 58BEGIN_FTR_SECTION
41 DSSALL 59 DSSALL
42 sync 60 sync
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index fcdff198da4b..0cdc9a392839 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -1,5 +1,5 @@
1/* 1/*
2 * This file contains the power_save function for 970-family CPUs. 2 * This file contains the power_save function for Power7 CPUs.
3 * 3 *
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License 5 * modify it under the terms of the GNU General Public License
@@ -15,6 +15,7 @@
15#include <asm/ppc_asm.h> 15#include <asm/ppc_asm.h>
16#include <asm/asm-offsets.h> 16#include <asm/asm-offsets.h>
17#include <asm/ppc-opcode.h> 17#include <asm/ppc-opcode.h>
18#include <asm/hw_irq.h>
18 19
19#undef DEBUG 20#undef DEBUG
20 21
@@ -51,9 +52,25 @@ _GLOBAL(power7_idle)
51 rldicl r9,r9,48,1 52 rldicl r9,r9,48,1
52 rotldi r9,r9,16 53 rotldi r9,r9,16
53 mtmsrd r9,1 /* hard-disable interrupts */ 54 mtmsrd r9,1 /* hard-disable interrupts */
55
56 /* Check if something happened while soft-disabled */
57 lbz r0,PACAIRQHAPPENED(r13)
58 cmpwi cr0,r0,0
59 beq 1f
60 addi r1,r1,INT_FRAME_SIZE
61 ld r0,16(r1)
62 mtlr r0
63 blr
64
651: /* We mark irqs hard disabled as this is the state we'll
66 * be in when returning and we need to tell arch_local_irq_restore()
67 * about it
68 */
69 li r0,PACA_IRQ_HARD_DIS
70 stb r0,PACAIRQHAPPENED(r13)
71
72 /* We haven't lost state ... yet */
54 li r0,0 73 li r0,0
55 stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */
56 stb r0,PACAHARDIRQEN(r13)
57 stb r0,PACA_NAPSTATELOST(r13) 74 stb r0,PACA_NAPSTATELOST(r13)
58 75
59 /* Continue saving state */ 76 /* Continue saving state */
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 0cfcf98aafca..359f078571c7 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -39,6 +39,7 @@
39#include <asm/pci-bridge.h> 39#include <asm/pci-bridge.h>
40#include <asm/machdep.h> 40#include <asm/machdep.h>
41#include <asm/kdump.h> 41#include <asm/kdump.h>
42#include <asm/fadump.h>
42 43
43#define DBG(...) 44#define DBG(...)
44 45
@@ -445,7 +446,12 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
445 446
446static void iommu_table_clear(struct iommu_table *tbl) 447static void iommu_table_clear(struct iommu_table *tbl)
447{ 448{
448 if (!is_kdump_kernel()) { 449 /*
450 * In case of firmware assisted dump system goes through clean
451 * reboot process at the time of system crash. Hence it's safe to
452 * clear the TCE entries if firmware assisted dump is active.
453 */
454 if (!is_kdump_kernel() || is_fadump_active()) {
449 /* Clear the table in case firmware left allocations in it */ 455 /* Clear the table in case firmware left allocations in it */
450 ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size); 456 ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
451 return; 457 return;
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 01e2877e8e04..a3d128e94cff 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -93,20 +93,16 @@ extern int tau_interrupts(int);
93 93
94#ifdef CONFIG_PPC64 94#ifdef CONFIG_PPC64
95 95
96#ifndef CONFIG_SPARSE_IRQ
97EXPORT_SYMBOL(irq_desc);
98#endif
99
100int distribute_irqs = 1; 96int distribute_irqs = 1;
101 97
102static inline notrace unsigned long get_hard_enabled(void) 98static inline notrace unsigned long get_irq_happened(void)
103{ 99{
104 unsigned long enabled; 100 unsigned long happened;
105 101
106 __asm__ __volatile__("lbz %0,%1(13)" 102 __asm__ __volatile__("lbz %0,%1(13)"
107 : "=r" (enabled) : "i" (offsetof(struct paca_struct, hard_enabled))); 103 : "=r" (happened) : "i" (offsetof(struct paca_struct, irq_happened)));
108 104
109 return enabled; 105 return happened;
110} 106}
111 107
112static inline notrace void set_soft_enabled(unsigned long enable) 108static inline notrace void set_soft_enabled(unsigned long enable)
@@ -115,88 +111,162 @@ static inline notrace void set_soft_enabled(unsigned long enable)
115 : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); 111 : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
116} 112}
117 113
118static inline notrace void decrementer_check_overflow(void) 114static inline notrace int decrementer_check_overflow(void)
119{ 115{
120 u64 now = get_tb_or_rtc(); 116 u64 now = get_tb_or_rtc();
121 u64 *next_tb; 117 u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
122 118
123 preempt_disable();
124 next_tb = &__get_cpu_var(decrementers_next_tb);
125
126 if (now >= *next_tb) 119 if (now >= *next_tb)
127 set_dec(1); 120 set_dec(1);
128 preempt_enable(); 121 return now >= *next_tb;
129} 122}
130 123
131notrace void arch_local_irq_restore(unsigned long en) 124/* This is called whenever we are re-enabling interrupts
125 * and returns either 0 (nothing to do) or 500/900 if there's
126 * either an EE or a DEC to generate.
127 *
128 * This is called in two contexts: From arch_local_irq_restore()
129 * before soft-enabling interrupts, and from the exception exit
130 * path when returning from an interrupt from a soft-disabled to
131 * a soft enabled context. In both case we have interrupts hard
132 * disabled.
133 *
134 * We take care of only clearing the bits we handled in the
135 * PACA irq_happened field since we can only re-emit one at a
136 * time and we don't want to "lose" one.
137 */
138notrace unsigned int __check_irq_replay(void)
132{ 139{
133 /* 140 /*
134 * get_paca()->soft_enabled = en; 141 * We use local_paca rather than get_paca() to avoid all
135 * Is it ever valid to use local_irq_restore(0) when soft_enabled is 1? 142 * the debug_smp_processor_id() business in this low level
136 * That was allowed before, and in such a case we do need to take care 143 * function
137 * that gcc will set soft_enabled directly via r13, not choose to use
138 * an intermediate register, lest we're preempted to a different cpu.
139 */ 144 */
140 set_soft_enabled(en); 145 unsigned char happened = local_paca->irq_happened;
141 if (!en) 146
142 return; 147 /* Clear bit 0 which we wouldn't clear otherwise */
148 local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
143 149
144#ifdef CONFIG_PPC_STD_MMU_64 150 /*
145 if (firmware_has_feature(FW_FEATURE_ISERIES)) { 151 * Force the delivery of pending soft-disabled interrupts on PS3.
146 /* 152 * Any HV call will have this side effect.
147 * Do we need to disable preemption here? Not really: in the 153 */
148 * unlikely event that we're preempted to a different cpu in 154 if (firmware_has_feature(FW_FEATURE_PS3_LV1)) {
149 * between getting r13, loading its lppaca_ptr, and loading 155 u64 tmp, tmp2;
150 * its any_int, we might call iseries_handle_interrupts without 156 lv1_get_version_info(&tmp, &tmp2);
151 * an interrupt pending on the new cpu, but that's no disaster,
152 * is it? And the business of preempting us off the old cpu
153 * would itself involve a local_irq_restore which handles the
154 * interrupt to that cpu.
155 *
156 * But use "local_paca->lppaca_ptr" instead of "get_lppaca()"
157 * to avoid any preemption checking added into get_paca().
158 */
159 if (local_paca->lppaca_ptr->int_dword.any_int)
160 iseries_handle_interrupts();
161 } 157 }
162#endif /* CONFIG_PPC_STD_MMU_64 */
163 158
164 /* 159 /*
165 * if (get_paca()->hard_enabled) return; 160 * We may have missed a decrementer interrupt. We check the
166 * But again we need to take care that gcc gets hard_enabled directly 161 * decrementer itself rather than the paca irq_happened field
167 * via r13, not choose to use an intermediate register, lest we're 162 * in case we also had a rollover while hard disabled
168 * preempted to a different cpu in between the two instructions. 163 */
164 local_paca->irq_happened &= ~PACA_IRQ_DEC;
165 if (decrementer_check_overflow())
166 return 0x900;
167
168 /* Finally check if an external interrupt happened */
169 local_paca->irq_happened &= ~PACA_IRQ_EE;
170 if (happened & PACA_IRQ_EE)
171 return 0x500;
172
173#ifdef CONFIG_PPC_BOOK3E
174 /* Finally check if an EPR external interrupt happened
175 * this bit is typically set if we need to handle another
176 * "edge" interrupt from within the MPIC "EPR" handler
177 */
178 local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE;
179 if (happened & PACA_IRQ_EE_EDGE)
180 return 0x500;
181
182 local_paca->irq_happened &= ~PACA_IRQ_DBELL;
183 if (happened & PACA_IRQ_DBELL)
184 return 0x280;
185#endif /* CONFIG_PPC_BOOK3E */
186
187 /* There should be nothing left ! */
188 BUG_ON(local_paca->irq_happened != 0);
189
190 return 0;
191}
192
193notrace void arch_local_irq_restore(unsigned long en)
194{
195 unsigned char irq_happened;
196 unsigned int replay;
197
198 /* Write the new soft-enabled value */
199 set_soft_enabled(en);
200 if (!en)
201 return;
202 /*
203 * From this point onward, we can take interrupts, preempt,
204 * etc... unless we got hard-disabled. We check if an event
205 * happened. If none happened, we know we can just return.
206 *
207 * We may have preempted before the check below, in which case
208 * we are checking the "new" CPU instead of the old one. This
209 * is only a problem if an event happened on the "old" CPU.
210 *
211 * External interrupt events on non-iseries will have caused
212 * interrupts to be hard-disabled, so there is no problem, we
213 * cannot have preempted.
169 */ 214 */
170 if (get_hard_enabled()) 215 irq_happened = get_irq_happened();
216 if (!irq_happened)
171 return; 217 return;
172 218
173 /* 219 /*
174 * Need to hard-enable interrupts here. Since currently disabled, 220 * We need to hard disable to get a trusted value from
175 * no need to take further asm precautions against preemption; but 221 * __check_irq_replay(). We also need to soft-disable
176 * use local_paca instead of get_paca() to avoid preemption checking. 222 * again to avoid warnings in there due to the use of
223 * per-cpu variables.
224 *
225 * We know that if the value in irq_happened is exactly 0x01
226 * then we are already hard disabled (there are other less
227 * common cases that we'll ignore for now), so we skip the
228 * (expensive) mtmsrd.
177 */ 229 */
178 local_paca->hard_enabled = en; 230 if (unlikely(irq_happened != PACA_IRQ_HARD_DIS))
231 __hard_irq_disable();
232 set_soft_enabled(0);
179 233
180 /* 234 /*
181 * Trigger the decrementer if we have a pending event. Some processors 235 * Check if anything needs to be re-emitted. We haven't
182 * only trigger on edge transitions of the sign bit. We might also 236 * soft-enabled yet to avoid warnings in decrementer_check_overflow
183 * have disabled interrupts long enough that the decrementer wrapped 237 * accessing per-cpu variables
184 * to positive.
185 */ 238 */
186 decrementer_check_overflow(); 239 replay = __check_irq_replay();
240
241 /* We can soft-enable now */
242 set_soft_enabled(1);
187 243
188 /* 244 /*
189 * Force the delivery of pending soft-disabled interrupts on PS3. 245 * And replay if we have to. This will return with interrupts
190 * Any HV call will have this side effect. 246 * hard-enabled.
191 */ 247 */
192 if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { 248 if (replay) {
193 u64 tmp, tmp2; 249 __replay_interrupt(replay);
194 lv1_get_version_info(&tmp, &tmp2); 250 return;
195 } 251 }
196 252
253 /* Finally, let's ensure we are hard enabled */
197 __hard_irq_enable(); 254 __hard_irq_enable();
198} 255}
199EXPORT_SYMBOL(arch_local_irq_restore); 256EXPORT_SYMBOL(arch_local_irq_restore);
257
258/*
259 * This is specifically called by assembly code to re-enable interrupts
260 * if they are currently disabled. This is typically called before
261 * schedule() or do_signal() when returning to userspace. We do it
262 * in C to avoid the burden of dealing with lockdep etc...
263 */
264void restore_interrupts(void)
265{
266 if (irqs_disabled())
267 local_irq_enable();
268}
269
200#endif /* CONFIG_PPC64 */ 270#endif /* CONFIG_PPC64 */
201 271
202int arch_show_interrupts(struct seq_file *p, int prec) 272int arch_show_interrupts(struct seq_file *p, int prec)
@@ -364,8 +434,17 @@ void do_IRQ(struct pt_regs *regs)
364 434
365 check_stack_overflow(); 435 check_stack_overflow();
366 436
437 /*
438 * Query the platform PIC for the interrupt & ack it.
439 *
440 * This will typically lower the interrupt line to the CPU
441 */
367 irq = ppc_md.get_irq(); 442 irq = ppc_md.get_irq();
368 443
444 /* We can hard enable interrupts now */
445 may_hard_irq_enable();
446
447 /* And finally process it */
369 if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) 448 if (irq != NO_IRQ && irq != NO_IRQ_IGNORE)
370 handle_one_irq(irq); 449 handle_one_irq(irq);
371 else if (irq != NO_IRQ_IGNORE) 450 else if (irq != NO_IRQ_IGNORE)
@@ -374,15 +453,6 @@ void do_IRQ(struct pt_regs *regs)
374 irq_exit(); 453 irq_exit();
375 set_irq_regs(old_regs); 454 set_irq_regs(old_regs);
376 455
377#ifdef CONFIG_PPC_ISERIES
378 if (firmware_has_feature(FW_FEATURE_ISERIES) &&
379 get_lppaca()->int_dword.fields.decr_int) {
380 get_lppaca()->int_dword.fields.decr_int = 0;
381 /* Signal a fake decrementer interrupt */
382 timer_interrupt(regs);
383 }
384#endif
385
386 trace_irq_exit(regs); 456 trace_irq_exit(regs);
387} 457}
388 458
@@ -490,409 +560,19 @@ void do_softirq(void)
490 local_irq_restore(flags); 560 local_irq_restore(flags);
491} 561}
492 562
493
494/*
495 * IRQ controller and virtual interrupts
496 */
497
498/* The main irq map itself is an array of NR_IRQ entries containing the
499 * associate host and irq number. An entry with a host of NULL is free.
500 * An entry can be allocated if it's free, the allocator always then sets
501 * hwirq first to the host's invalid irq number and then fills ops.
502 */
503struct irq_map_entry {
504 irq_hw_number_t hwirq;
505 struct irq_host *host;
506};
507
508static LIST_HEAD(irq_hosts);
509static DEFINE_RAW_SPINLOCK(irq_big_lock);
510static DEFINE_MUTEX(revmap_trees_mutex);
511static struct irq_map_entry irq_map[NR_IRQS];
512static unsigned int irq_virq_count = NR_IRQS;
513static struct irq_host *irq_default_host;
514
515irq_hw_number_t irqd_to_hwirq(struct irq_data *d) 563irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
516{ 564{
517 return irq_map[d->irq].hwirq; 565 return d->hwirq;
518} 566}
519EXPORT_SYMBOL_GPL(irqd_to_hwirq); 567EXPORT_SYMBOL_GPL(irqd_to_hwirq);
520 568
521irq_hw_number_t virq_to_hw(unsigned int virq) 569irq_hw_number_t virq_to_hw(unsigned int virq)
522{ 570{
523 return irq_map[virq].hwirq; 571 struct irq_data *irq_data = irq_get_irq_data(virq);
572 return WARN_ON(!irq_data) ? 0 : irq_data->hwirq;
524} 573}
525EXPORT_SYMBOL_GPL(virq_to_hw); 574EXPORT_SYMBOL_GPL(virq_to_hw);
526 575
527bool virq_is_host(unsigned int virq, struct irq_host *host)
528{
529 return irq_map[virq].host == host;
530}
531EXPORT_SYMBOL_GPL(virq_is_host);
532
533static int default_irq_host_match(struct irq_host *h, struct device_node *np)
534{
535 return h->of_node != NULL && h->of_node == np;
536}
537
538struct irq_host *irq_alloc_host(struct device_node *of_node,
539 unsigned int revmap_type,
540 unsigned int revmap_arg,
541 struct irq_host_ops *ops,
542 irq_hw_number_t inval_irq)
543{
544 struct irq_host *host;
545 unsigned int size = sizeof(struct irq_host);
546 unsigned int i;
547 unsigned int *rmap;
548 unsigned long flags;
549
550 /* Allocate structure and revmap table if using linear mapping */
551 if (revmap_type == IRQ_HOST_MAP_LINEAR)
552 size += revmap_arg * sizeof(unsigned int);
553 host = kzalloc(size, GFP_KERNEL);
554 if (host == NULL)
555 return NULL;
556
557 /* Fill structure */
558 host->revmap_type = revmap_type;
559 host->inval_irq = inval_irq;
560 host->ops = ops;
561 host->of_node = of_node_get(of_node);
562
563 if (host->ops->match == NULL)
564 host->ops->match = default_irq_host_match;
565
566 raw_spin_lock_irqsave(&irq_big_lock, flags);
567
568 /* If it's a legacy controller, check for duplicates and
569 * mark it as allocated (we use irq 0 host pointer for that
570 */
571 if (revmap_type == IRQ_HOST_MAP_LEGACY) {
572 if (irq_map[0].host != NULL) {
573 raw_spin_unlock_irqrestore(&irq_big_lock, flags);
574 of_node_put(host->of_node);
575 kfree(host);
576 return NULL;
577 }
578 irq_map[0].host = host;
579 }
580
581 list_add(&host->link, &irq_hosts);
582 raw_spin_unlock_irqrestore(&irq_big_lock, flags);
583
584 /* Additional setups per revmap type */
585 switch(revmap_type) {
586 case IRQ_HOST_MAP_LEGACY:
587 /* 0 is always the invalid number for legacy */
588 host->inval_irq = 0;
589 /* setup us as the host for all legacy interrupts */
590 for (i = 1; i < NUM_ISA_INTERRUPTS; i++) {
591 irq_map[i].hwirq = i;
592 smp_wmb();
593 irq_map[i].host = host;
594 smp_wmb();
595
596 /* Legacy flags are left to default at this point,
597 * one can then use irq_create_mapping() to
598 * explicitly change them
599 */
600 ops->map(host, i, i);
601
602 /* Clear norequest flags */
603 irq_clear_status_flags(i, IRQ_NOREQUEST);
604 }
605 break;
606 case IRQ_HOST_MAP_LINEAR:
607 rmap = (unsigned int *)(host + 1);
608 for (i = 0; i < revmap_arg; i++)
609 rmap[i] = NO_IRQ;
610 host->revmap_data.linear.size = revmap_arg;
611 smp_wmb();
612 host->revmap_data.linear.revmap = rmap;
613 break;
614 case IRQ_HOST_MAP_TREE:
615 INIT_RADIX_TREE(&host->revmap_data.tree, GFP_KERNEL);
616 break;
617 default:
618 break;
619 }
620
621 pr_debug("irq: Allocated host of type %d @0x%p\n", revmap_type, host);
622
623 return host;
624}
625
626struct irq_host *irq_find_host(struct device_node *node)
627{
628 struct irq_host *h, *found = NULL;
629 unsigned long flags;
630
631 /* We might want to match the legacy controller last since
632 * it might potentially be set to match all interrupts in
633 * the absence of a device node. This isn't a problem so far
634 * yet though...
635 */
636 raw_spin_lock_irqsave(&irq_big_lock, flags);
637 list_for_each_entry(h, &irq_hosts, link)
638 if (h->ops->match(h, node)) {
639 found = h;
640 break;
641 }
642 raw_spin_unlock_irqrestore(&irq_big_lock, flags);
643 return found;
644}
645EXPORT_SYMBOL_GPL(irq_find_host);
646
647void irq_set_default_host(struct irq_host *host)
648{
649 pr_debug("irq: Default host set to @0x%p\n", host);
650
651 irq_default_host = host;
652}
653
654void irq_set_virq_count(unsigned int count)
655{
656 pr_debug("irq: Trying to set virq count to %d\n", count);
657
658 BUG_ON(count < NUM_ISA_INTERRUPTS);
659 if (count < NR_IRQS)
660 irq_virq_count = count;
661}
662
663static int irq_setup_virq(struct irq_host *host, unsigned int virq,
664 irq_hw_number_t hwirq)
665{
666 int res;
667
668 res = irq_alloc_desc_at(virq, 0);
669 if (res != virq) {
670 pr_debug("irq: -> allocating desc failed\n");
671 goto error;
672 }
673
674 /* map it */
675 smp_wmb();
676 irq_map[virq].hwirq = hwirq;
677 smp_mb();
678
679 if (host->ops->map(host, virq, hwirq)) {
680 pr_debug("irq: -> mapping failed, freeing\n");
681 goto errdesc;
682 }
683
684 irq_clear_status_flags(virq, IRQ_NOREQUEST);
685
686 return 0;
687
688errdesc:
689 irq_free_descs(virq, 1);
690error:
691 irq_free_virt(virq, 1);
692 return -1;
693}
694
695unsigned int irq_create_direct_mapping(struct irq_host *host)
696{
697 unsigned int virq;
698
699 if (host == NULL)
700 host = irq_default_host;
701
702 BUG_ON(host == NULL);
703 WARN_ON(host->revmap_type != IRQ_HOST_MAP_NOMAP);
704
705 virq = irq_alloc_virt(host, 1, 0);
706 if (virq == NO_IRQ) {
707 pr_debug("irq: create_direct virq allocation failed\n");
708 return NO_IRQ;
709 }
710
711 pr_debug("irq: create_direct obtained virq %d\n", virq);
712
713 if (irq_setup_virq(host, virq, virq))
714 return NO_IRQ;
715
716 return virq;
717}
718
719unsigned int irq_create_mapping(struct irq_host *host,
720 irq_hw_number_t hwirq)
721{
722 unsigned int virq, hint;
723
724 pr_debug("irq: irq_create_mapping(0x%p, 0x%lx)\n", host, hwirq);
725
726 /* Look for default host if nececssary */
727 if (host == NULL)
728 host = irq_default_host;
729 if (host == NULL) {
730 printk(KERN_WARNING "irq_create_mapping called for"
731 " NULL host, hwirq=%lx\n", hwirq);
732 WARN_ON(1);
733 return NO_IRQ;
734 }
735 pr_debug("irq: -> using host @%p\n", host);
736
737 /* Check if mapping already exists */
738 virq = irq_find_mapping(host, hwirq);
739 if (virq != NO_IRQ) {
740 pr_debug("irq: -> existing mapping on virq %d\n", virq);
741 return virq;
742 }
743
744 /* Get a virtual interrupt number */
745 if (host->revmap_type == IRQ_HOST_MAP_LEGACY) {
746 /* Handle legacy */
747 virq = (unsigned int)hwirq;
748 if (virq == 0 || virq >= NUM_ISA_INTERRUPTS)
749 return NO_IRQ;
750 return virq;
751 } else {
752 /* Allocate a virtual interrupt number */
753 hint = hwirq % irq_virq_count;
754 virq = irq_alloc_virt(host, 1, hint);
755 if (virq == NO_IRQ) {
756 pr_debug("irq: -> virq allocation failed\n");
757 return NO_IRQ;
758 }
759 }
760
761 if (irq_setup_virq(host, virq, hwirq))
762 return NO_IRQ;
763
764 pr_debug("irq: irq %lu on host %s mapped to virtual irq %u\n",
765 hwirq, host->of_node ? host->of_node->full_name : "null", virq);
766
767 return virq;
768}
769EXPORT_SYMBOL_GPL(irq_create_mapping);
770
771unsigned int irq_create_of_mapping(struct device_node *controller,
772 const u32 *intspec, unsigned int intsize)
773{
774 struct irq_host *host;
775 irq_hw_number_t hwirq;
776 unsigned int type = IRQ_TYPE_NONE;
777 unsigned int virq;
778
779 if (controller == NULL)
780 host = irq_default_host;
781 else
782 host = irq_find_host(controller);
783 if (host == NULL) {
784 printk(KERN_WARNING "irq: no irq host found for %s !\n",
785 controller->full_name);
786 return NO_IRQ;
787 }
788
789 /* If host has no translation, then we assume interrupt line */
790 if (host->ops->xlate == NULL)
791 hwirq = intspec[0];
792 else {
793 if (host->ops->xlate(host, controller, intspec, intsize,
794 &hwirq, &type))
795 return NO_IRQ;
796 }
797
798 /* Create mapping */
799 virq = irq_create_mapping(host, hwirq);
800 if (virq == NO_IRQ)
801 return virq;
802
803 /* Set type if specified and different than the current one */
804 if (type != IRQ_TYPE_NONE &&
805 type != (irqd_get_trigger_type(irq_get_irq_data(virq))))
806 irq_set_irq_type(virq, type);
807 return virq;
808}
809EXPORT_SYMBOL_GPL(irq_create_of_mapping);
810
811void irq_dispose_mapping(unsigned int virq)
812{
813 struct irq_host *host;
814 irq_hw_number_t hwirq;
815
816 if (virq == NO_IRQ)
817 return;
818
819 host = irq_map[virq].host;
820 if (WARN_ON(host == NULL))
821 return;
822
823 /* Never unmap legacy interrupts */
824 if (host->revmap_type == IRQ_HOST_MAP_LEGACY)
825 return;
826
827 irq_set_status_flags(virq, IRQ_NOREQUEST);
828
829 /* remove chip and handler */
830 irq_set_chip_and_handler(virq, NULL, NULL);
831
832 /* Make sure it's completed */
833 synchronize_irq(virq);
834
835 /* Tell the PIC about it */
836 if (host->ops->unmap)
837 host->ops->unmap(host, virq);
838 smp_mb();
839
840 /* Clear reverse map */
841 hwirq = irq_map[virq].hwirq;
842 switch(host->revmap_type) {
843 case IRQ_HOST_MAP_LINEAR:
844 if (hwirq < host->revmap_data.linear.size)
845 host->revmap_data.linear.revmap[hwirq] = NO_IRQ;
846 break;
847 case IRQ_HOST_MAP_TREE:
848 mutex_lock(&revmap_trees_mutex);
849 radix_tree_delete(&host->revmap_data.tree, hwirq);
850 mutex_unlock(&revmap_trees_mutex);
851 break;
852 }
853
854 /* Destroy map */
855 smp_mb();
856 irq_map[virq].hwirq = host->inval_irq;
857
858 irq_free_descs(virq, 1);
859 /* Free it */
860 irq_free_virt(virq, 1);
861}
862EXPORT_SYMBOL_GPL(irq_dispose_mapping);
863
864unsigned int irq_find_mapping(struct irq_host *host,
865 irq_hw_number_t hwirq)
866{
867 unsigned int i;
868 unsigned int hint = hwirq % irq_virq_count;
869
870 /* Look for default host if nececssary */
871 if (host == NULL)
872 host = irq_default_host;
873 if (host == NULL)
874 return NO_IRQ;
875
876 /* legacy -> bail early */
877 if (host->revmap_type == IRQ_HOST_MAP_LEGACY)
878 return hwirq;
879
880 /* Slow path does a linear search of the map */
881 if (hint < NUM_ISA_INTERRUPTS)
882 hint = NUM_ISA_INTERRUPTS;
883 i = hint;
884 do {
885 if (irq_map[i].host == host &&
886 irq_map[i].hwirq == hwirq)
887 return i;
888 i++;
889 if (i >= irq_virq_count)
890 i = NUM_ISA_INTERRUPTS;
891 } while(i != hint);
892 return NO_IRQ;
893}
894EXPORT_SYMBOL_GPL(irq_find_mapping);
895
896#ifdef CONFIG_SMP 576#ifdef CONFIG_SMP
897int irq_choose_cpu(const struct cpumask *mask) 577int irq_choose_cpu(const struct cpumask *mask)
898{ 578{
@@ -929,232 +609,11 @@ int irq_choose_cpu(const struct cpumask *mask)
929} 609}
930#endif 610#endif
931 611
932unsigned int irq_radix_revmap_lookup(struct irq_host *host,
933 irq_hw_number_t hwirq)
934{
935 struct irq_map_entry *ptr;
936 unsigned int virq;
937
938 if (WARN_ON_ONCE(host->revmap_type != IRQ_HOST_MAP_TREE))
939 return irq_find_mapping(host, hwirq);
940
941 /*
942 * The ptr returned references the static global irq_map.
943 * but freeing an irq can delete nodes along the path to
944 * do the lookup via call_rcu.
945 */
946 rcu_read_lock();
947 ptr = radix_tree_lookup(&host->revmap_data.tree, hwirq);
948 rcu_read_unlock();
949
950 /*
951 * If found in radix tree, then fine.
952 * Else fallback to linear lookup - this should not happen in practice
953 * as it means that we failed to insert the node in the radix tree.
954 */
955 if (ptr)
956 virq = ptr - irq_map;
957 else
958 virq = irq_find_mapping(host, hwirq);
959
960 return virq;
961}
962
963void irq_radix_revmap_insert(struct irq_host *host, unsigned int virq,
964 irq_hw_number_t hwirq)
965{
966 if (WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE))
967 return;
968
969 if (virq != NO_IRQ) {
970 mutex_lock(&revmap_trees_mutex);
971 radix_tree_insert(&host->revmap_data.tree, hwirq,
972 &irq_map[virq]);
973 mutex_unlock(&revmap_trees_mutex);
974 }
975}
976
977unsigned int irq_linear_revmap(struct irq_host *host,
978 irq_hw_number_t hwirq)
979{
980 unsigned int *revmap;
981
982 if (WARN_ON_ONCE(host->revmap_type != IRQ_HOST_MAP_LINEAR))
983 return irq_find_mapping(host, hwirq);
984
985 /* Check revmap bounds */
986 if (unlikely(hwirq >= host->revmap_data.linear.size))
987 return irq_find_mapping(host, hwirq);
988
989 /* Check if revmap was allocated */
990 revmap = host->revmap_data.linear.revmap;
991 if (unlikely(revmap == NULL))
992 return irq_find_mapping(host, hwirq);
993
994 /* Fill up revmap with slow path if no mapping found */
995 if (unlikely(revmap[hwirq] == NO_IRQ))
996 revmap[hwirq] = irq_find_mapping(host, hwirq);
997
998 return revmap[hwirq];
999}
1000
1001unsigned int irq_alloc_virt(struct irq_host *host,
1002 unsigned int count,
1003 unsigned int hint)
1004{
1005 unsigned long flags;
1006 unsigned int i, j, found = NO_IRQ;
1007
1008 if (count == 0 || count > (irq_virq_count - NUM_ISA_INTERRUPTS))
1009 return NO_IRQ;
1010
1011 raw_spin_lock_irqsave(&irq_big_lock, flags);
1012
1013 /* Use hint for 1 interrupt if any */
1014 if (count == 1 && hint >= NUM_ISA_INTERRUPTS &&
1015 hint < irq_virq_count && irq_map[hint].host == NULL) {
1016 found = hint;
1017 goto hint_found;
1018 }
1019
1020 /* Look for count consecutive numbers in the allocatable
1021 * (non-legacy) space
1022 */
1023 for (i = NUM_ISA_INTERRUPTS, j = 0; i < irq_virq_count; i++) {
1024 if (irq_map[i].host != NULL)
1025 j = 0;
1026 else
1027 j++;
1028
1029 if (j == count) {
1030 found = i - count + 1;
1031 break;
1032 }
1033 }
1034 if (found == NO_IRQ) {
1035 raw_spin_unlock_irqrestore(&irq_big_lock, flags);
1036 return NO_IRQ;
1037 }
1038 hint_found:
1039 for (i = found; i < (found + count); i++) {
1040 irq_map[i].hwirq = host->inval_irq;
1041 smp_wmb();
1042 irq_map[i].host = host;
1043 }
1044 raw_spin_unlock_irqrestore(&irq_big_lock, flags);
1045 return found;
1046}
1047
1048void irq_free_virt(unsigned int virq, unsigned int count)
1049{
1050 unsigned long flags;
1051 unsigned int i;
1052
1053 WARN_ON (virq < NUM_ISA_INTERRUPTS);
1054 WARN_ON (count == 0 || (virq + count) > irq_virq_count);
1055
1056 if (virq < NUM_ISA_INTERRUPTS) {
1057 if (virq + count < NUM_ISA_INTERRUPTS)
1058 return;
1059 count =- NUM_ISA_INTERRUPTS - virq;
1060 virq = NUM_ISA_INTERRUPTS;
1061 }
1062
1063 if (count > irq_virq_count || virq > irq_virq_count - count) {
1064 if (virq > irq_virq_count)
1065 return;
1066 count = irq_virq_count - virq;
1067 }
1068
1069 raw_spin_lock_irqsave(&irq_big_lock, flags);
1070 for (i = virq; i < (virq + count); i++) {
1071 struct irq_host *host;
1072
1073 host = irq_map[i].host;
1074 irq_map[i].hwirq = host->inval_irq;
1075 smp_wmb();
1076 irq_map[i].host = NULL;
1077 }
1078 raw_spin_unlock_irqrestore(&irq_big_lock, flags);
1079}
1080
1081int arch_early_irq_init(void) 612int arch_early_irq_init(void)
1082{ 613{
1083 return 0; 614 return 0;
1084} 615}
1085 616
1086#ifdef CONFIG_VIRQ_DEBUG
1087static int virq_debug_show(struct seq_file *m, void *private)
1088{
1089 unsigned long flags;
1090 struct irq_desc *desc;
1091 const char *p;
1092 static const char none[] = "none";
1093 void *data;
1094 int i;
1095
1096 seq_printf(m, "%-5s %-7s %-15s %-18s %s\n", "virq", "hwirq",
1097 "chip name", "chip data", "host name");
1098
1099 for (i = 1; i < nr_irqs; i++) {
1100 desc = irq_to_desc(i);
1101 if (!desc)
1102 continue;
1103
1104 raw_spin_lock_irqsave(&desc->lock, flags);
1105
1106 if (desc->action && desc->action->handler) {
1107 struct irq_chip *chip;
1108
1109 seq_printf(m, "%5d ", i);
1110 seq_printf(m, "0x%05lx ", irq_map[i].hwirq);
1111
1112 chip = irq_desc_get_chip(desc);
1113 if (chip && chip->name)
1114 p = chip->name;
1115 else
1116 p = none;
1117 seq_printf(m, "%-15s ", p);
1118
1119 data = irq_desc_get_chip_data(desc);
1120 seq_printf(m, "0x%16p ", data);
1121
1122 if (irq_map[i].host && irq_map[i].host->of_node)
1123 p = irq_map[i].host->of_node->full_name;
1124 else
1125 p = none;
1126 seq_printf(m, "%s\n", p);
1127 }
1128
1129 raw_spin_unlock_irqrestore(&desc->lock, flags);
1130 }
1131
1132 return 0;
1133}
1134
1135static int virq_debug_open(struct inode *inode, struct file *file)
1136{
1137 return single_open(file, virq_debug_show, inode->i_private);
1138}
1139
1140static const struct file_operations virq_debug_fops = {
1141 .open = virq_debug_open,
1142 .read = seq_read,
1143 .llseek = seq_lseek,
1144 .release = single_release,
1145};
1146
1147static int __init irq_debugfs_init(void)
1148{
1149 if (debugfs_create_file("virq_mapping", S_IRUGO, powerpc_debugfs_root,
1150 NULL, &virq_debug_fops) == NULL)
1151 return -ENOMEM;
1152
1153 return 0;
1154}
1155__initcall(irq_debugfs_init);
1156#endif /* CONFIG_VIRQ_DEBUG */
1157
1158#ifdef CONFIG_PPC64 617#ifdef CONFIG_PPC64
1159static int __init setup_noirqdistrib(char *str) 618static int __init setup_noirqdistrib(char *str)
1160{ 619{
diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c
index 479752901ec6..d45ec58703ce 100644
--- a/arch/powerpc/kernel/isa-bridge.c
+++ b/arch/powerpc/kernel/isa-bridge.c
@@ -29,7 +29,6 @@
29#include <asm/pci-bridge.h> 29#include <asm/pci-bridge.h>
30#include <asm/machdep.h> 30#include <asm/machdep.h>
31#include <asm/ppc-pci.h> 31#include <asm/ppc-pci.h>
32#include <asm/firmware.h>
33 32
34unsigned long isa_io_base; /* NULL if no ISA bus */ 33unsigned long isa_io_base; /* NULL if no ISA bus */
35EXPORT_SYMBOL(isa_io_base); 34EXPORT_SYMBOL(isa_io_base);
@@ -261,8 +260,6 @@ static struct notifier_block isa_bridge_notifier = {
261 */ 260 */
262static int __init isa_bridge_init(void) 261static int __init isa_bridge_init(void)
263{ 262{
264 if (firmware_has_feature(FW_FEATURE_ISERIES))
265 return 0;
266 bus_register_notifier(&pci_bus_type, &isa_bridge_notifier); 263 bus_register_notifier(&pci_bus_type, &isa_bridge_notifier);
267 return 0; 264 return 0;
268} 265}
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 578f35f18723..ac12bd80ad95 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -26,7 +26,6 @@
26#include <linux/seq_file.h> 26#include <linux/seq_file.h>
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <asm/uaccess.h> 28#include <asm/uaccess.h>
29#include <asm/iseries/hv_lp_config.h>
30#include <asm/lppaca.h> 29#include <asm/lppaca.h>
31#include <asm/hvcall.h> 30#include <asm/hvcall.h>
32#include <asm/firmware.h> 31#include <asm/firmware.h>
@@ -55,80 +54,14 @@ static unsigned long get_purr(void)
55 int cpu; 54 int cpu;
56 55
57 for_each_possible_cpu(cpu) { 56 for_each_possible_cpu(cpu) {
58 if (firmware_has_feature(FW_FEATURE_ISERIES)) 57 struct cpu_usage *cu;
59 sum_purr += lppaca_of(cpu).emulated_time_base;
60 else {
61 struct cpu_usage *cu;
62 58
63 cu = &per_cpu(cpu_usage_array, cpu); 59 cu = &per_cpu(cpu_usage_array, cpu);
64 sum_purr += cu->current_tb; 60 sum_purr += cu->current_tb;
65 }
66 } 61 }
67 return sum_purr; 62 return sum_purr;
68} 63}
69 64
70#ifdef CONFIG_PPC_ISERIES
71
72/*
73 * Methods used to fetch LPAR data when running on an iSeries platform.
74 */
75static int iseries_lparcfg_data(struct seq_file *m, void *v)
76{
77 unsigned long pool_id;
78 int shared, entitled_capacity, max_entitled_capacity;
79 int processors, max_processors;
80 unsigned long purr = get_purr();
81
82 shared = (int)(local_paca->lppaca_ptr->shared_proc);
83
84 seq_printf(m, "system_active_processors=%d\n",
85 (int)HvLpConfig_getSystemPhysicalProcessors());
86
87 seq_printf(m, "system_potential_processors=%d\n",
88 (int)HvLpConfig_getSystemPhysicalProcessors());
89
90 processors = (int)HvLpConfig_getPhysicalProcessors();
91 seq_printf(m, "partition_active_processors=%d\n", processors);
92
93 max_processors = (int)HvLpConfig_getMaxPhysicalProcessors();
94 seq_printf(m, "partition_potential_processors=%d\n", max_processors);
95
96 if (shared) {
97 entitled_capacity = HvLpConfig_getSharedProcUnits();
98 max_entitled_capacity = HvLpConfig_getMaxSharedProcUnits();
99 } else {
100 entitled_capacity = processors * 100;
101 max_entitled_capacity = max_processors * 100;
102 }
103 seq_printf(m, "partition_entitled_capacity=%d\n", entitled_capacity);
104
105 seq_printf(m, "partition_max_entitled_capacity=%d\n",
106 max_entitled_capacity);
107
108 if (shared) {
109 pool_id = HvLpConfig_getSharedPoolIndex();
110 seq_printf(m, "pool=%d\n", (int)pool_id);
111 seq_printf(m, "pool_capacity=%d\n",
112 (int)(HvLpConfig_getNumProcsInSharedPool(pool_id) *
113 100));
114 seq_printf(m, "purr=%ld\n", purr);
115 }
116
117 seq_printf(m, "shared_processor_mode=%d\n", shared);
118
119 return 0;
120}
121
122#else /* CONFIG_PPC_ISERIES */
123
124static int iseries_lparcfg_data(struct seq_file *m, void *v)
125{
126 return 0;
127}
128
129#endif /* CONFIG_PPC_ISERIES */
130
131#ifdef CONFIG_PPC_PSERIES
132/* 65/*
133 * Methods used to fetch LPAR data when running on a pSeries platform. 66 * Methods used to fetch LPAR data when running on a pSeries platform.
134 */ 67 */
@@ -648,8 +581,7 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
648 u8 new_weight, *new_weight_ptr = &new_weight; 581 u8 new_weight, *new_weight_ptr = &new_weight;
649 ssize_t retval; 582 ssize_t retval;
650 583
651 if (!firmware_has_feature(FW_FEATURE_SPLPAR) || 584 if (!firmware_has_feature(FW_FEATURE_SPLPAR))
652 firmware_has_feature(FW_FEATURE_ISERIES))
653 return -EINVAL; 585 return -EINVAL;
654 586
655 if (count > kbuf_sz) 587 if (count > kbuf_sz)
@@ -709,21 +641,6 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
709 return retval; 641 return retval;
710} 642}
711 643
712#else /* CONFIG_PPC_PSERIES */
713
714static int pseries_lparcfg_data(struct seq_file *m, void *v)
715{
716 return 0;
717}
718
719static ssize_t lparcfg_write(struct file *file, const char __user * buf,
720 size_t count, loff_t * off)
721{
722 return -EINVAL;
723}
724
725#endif /* CONFIG_PPC_PSERIES */
726
727static int lparcfg_data(struct seq_file *m, void *v) 644static int lparcfg_data(struct seq_file *m, void *v)
728{ 645{
729 struct device_node *rootdn; 646 struct device_node *rootdn;
@@ -738,19 +655,11 @@ static int lparcfg_data(struct seq_file *m, void *v)
738 rootdn = of_find_node_by_path("/"); 655 rootdn = of_find_node_by_path("/");
739 if (rootdn) { 656 if (rootdn) {
740 tmp = of_get_property(rootdn, "model", NULL); 657 tmp = of_get_property(rootdn, "model", NULL);
741 if (tmp) { 658 if (tmp)
742 model = tmp; 659 model = tmp;
743 /* Skip "IBM," - see platforms/iseries/dt.c */
744 if (firmware_has_feature(FW_FEATURE_ISERIES))
745 model += 4;
746 }
747 tmp = of_get_property(rootdn, "system-id", NULL); 660 tmp = of_get_property(rootdn, "system-id", NULL);
748 if (tmp) { 661 if (tmp)
749 system_id = tmp; 662 system_id = tmp;
750 /* Skip "IBM," - see platforms/iseries/dt.c */
751 if (firmware_has_feature(FW_FEATURE_ISERIES))
752 system_id += 4;
753 }
754 lp_index_ptr = of_get_property(rootdn, "ibm,partition-no", 663 lp_index_ptr = of_get_property(rootdn, "ibm,partition-no",
755 NULL); 664 NULL);
756 if (lp_index_ptr) 665 if (lp_index_ptr)
@@ -761,8 +670,6 @@ static int lparcfg_data(struct seq_file *m, void *v)
761 seq_printf(m, "system_type=%s\n", model); 670 seq_printf(m, "system_type=%s\n", model);
762 seq_printf(m, "partition_id=%d\n", (int)lp_index); 671 seq_printf(m, "partition_id=%d\n", (int)lp_index);
763 672
764 if (firmware_has_feature(FW_FEATURE_ISERIES))
765 return iseries_lparcfg_data(m, v);
766 return pseries_lparcfg_data(m, v); 673 return pseries_lparcfg_data(m, v);
767} 674}
768 675
@@ -786,8 +693,7 @@ static int __init lparcfg_init(void)
786 umode_t mode = S_IRUSR | S_IRGRP | S_IROTH; 693 umode_t mode = S_IRUSR | S_IRGRP | S_IROTH;
787 694
788 /* Allow writing if we have FW_FEATURE_SPLPAR */ 695 /* Allow writing if we have FW_FEATURE_SPLPAR */
789 if (firmware_has_feature(FW_FEATURE_SPLPAR) && 696 if (firmware_has_feature(FW_FEATURE_SPLPAR))
790 !firmware_has_feature(FW_FEATURE_ISERIES))
791 mode |= S_IWUSR; 697 mode |= S_IWUSR;
792 698
793 ent = proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_fops); 699 ent = proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_fops);
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index b69463ec2010..ba16874fe294 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -5,7 +5,6 @@
5 * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) 5 * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
6 * and Paul Mackerras. 6 * and Paul Mackerras.
7 * 7 *
8 * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com)
9 * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) 8 * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
10 * 9 *
11 * setjmp/longjmp code by Paul Mackerras. 10 * setjmp/longjmp code by Paul Mackerras.
diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c
deleted file mode 100644
index fe21b515ca44..000000000000
--- a/arch/powerpc/kernel/mpc7450-pmu.c
+++ /dev/null
@@ -1,422 +0,0 @@
1/*
2 * Performance counter support for MPC7450-family processors.
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/string.h>
12#include <linux/perf_event.h>
13#include <asm/reg.h>
14#include <asm/cputable.h>
15
16#define N_COUNTER 6 /* Number of hardware counters */
17#define MAX_ALT 3 /* Maximum number of event alternative codes */
18
19/*
20 * Bits in event code for MPC7450 family
21 */
22#define PM_THRMULT_MSKS 0x40000
23#define PM_THRESH_SH 12
24#define PM_THRESH_MSK 0x3f
25#define PM_PMC_SH 8
26#define PM_PMC_MSK 7
27#define PM_PMCSEL_MSK 0x7f
28
29/*
30 * Classify events according to how specific their PMC requirements are.
31 * Result is:
32 * 0: can go on any PMC
33 * 1: can go on PMCs 1-4
34 * 2: can go on PMCs 1,2,4
35 * 3: can go on PMCs 1 or 2
36 * 4: can only go on one PMC
37 * -1: event code is invalid
38 */
39#define N_CLASSES 5
40
41static int mpc7450_classify_event(u32 event)
42{
43 int pmc;
44
45 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
46 if (pmc) {
47 if (pmc > N_COUNTER)
48 return -1;
49 return 4;
50 }
51 event &= PM_PMCSEL_MSK;
52 if (event <= 1)
53 return 0;
54 if (event <= 7)
55 return 1;
56 if (event <= 13)
57 return 2;
58 if (event <= 22)
59 return 3;
60 return -1;
61}
62
63/*
64 * Events using threshold and possible threshold scale:
65 * code scale? name
66 * 11e N PM_INSTQ_EXCEED_CYC
67 * 11f N PM_ALTV_IQ_EXCEED_CYC
68 * 128 Y PM_DTLB_SEARCH_EXCEED_CYC
69 * 12b Y PM_LD_MISS_EXCEED_L1_CYC
70 * 220 N PM_CQ_EXCEED_CYC
71 * 30c N PM_GPR_RB_EXCEED_CYC
72 * 30d ? PM_FPR_IQ_EXCEED_CYC ?
73 * 311 Y PM_ITLB_SEARCH_EXCEED
74 * 410 N PM_GPR_IQ_EXCEED_CYC
75 */
76
77/*
78 * Return use of threshold and threshold scale bits:
79 * 0 = uses neither, 1 = uses threshold, 2 = uses both
80 */
81static int mpc7450_threshold_use(u32 event)
82{
83 int pmc, sel;
84
85 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
86 sel = event & PM_PMCSEL_MSK;
87 switch (pmc) {
88 case 1:
89 if (sel == 0x1e || sel == 0x1f)
90 return 1;
91 if (sel == 0x28 || sel == 0x2b)
92 return 2;
93 break;
94 case 2:
95 if (sel == 0x20)
96 return 1;
97 break;
98 case 3:
99 if (sel == 0xc || sel == 0xd)
100 return 1;
101 if (sel == 0x11)
102 return 2;
103 break;
104 case 4:
105 if (sel == 0x10)
106 return 1;
107 break;
108 }
109 return 0;
110}
111
112/*
113 * Layout of constraint bits:
114 * 33222222222211111111110000000000
115 * 10987654321098765432109876543210
116 * |< >< > < > < ><><><><><><>
117 * TS TV G4 G3 G2P6P5P4P3P2P1
118 *
119 * P1 - P6
120 * 0 - 11: Count of events needing PMC1 .. PMC6
121 *
122 * G2
123 * 12 - 14: Count of events needing PMC1 or PMC2
124 *
125 * G3
126 * 16 - 18: Count of events needing PMC1, PMC2 or PMC4
127 *
128 * G4
129 * 20 - 23: Count of events needing PMC1, PMC2, PMC3 or PMC4
130 *
131 * TV
132 * 24 - 29: Threshold value requested
133 *
134 * TS
135 * 30: Threshold scale value requested
136 */
137
138static u32 pmcbits[N_COUNTER][2] = {
139 { 0x00844002, 0x00111001 }, /* PMC1 mask, value: P1,G2,G3,G4 */
140 { 0x00844008, 0x00111004 }, /* PMC2: P2,G2,G3,G4 */
141 { 0x00800020, 0x00100010 }, /* PMC3: P3,G4 */
142 { 0x00840080, 0x00110040 }, /* PMC4: P4,G3,G4 */
143 { 0x00000200, 0x00000100 }, /* PMC5: P5 */
144 { 0x00000800, 0x00000400 } /* PMC6: P6 */
145};
146
147static u32 classbits[N_CLASSES - 1][2] = {
148 { 0x00000000, 0x00000000 }, /* class 0: no constraint */
149 { 0x00800000, 0x00100000 }, /* class 1: G4 */
150 { 0x00040000, 0x00010000 }, /* class 2: G3 */
151 { 0x00004000, 0x00001000 }, /* class 3: G2 */
152};
153
154static int mpc7450_get_constraint(u64 event, unsigned long *maskp,
155 unsigned long *valp)
156{
157 int pmc, class;
158 u32 mask, value;
159 int thresh, tuse;
160
161 class = mpc7450_classify_event(event);
162 if (class < 0)
163 return -1;
164 if (class == 4) {
165 pmc = ((unsigned int)event >> PM_PMC_SH) & PM_PMC_MSK;
166 mask = pmcbits[pmc - 1][0];
167 value = pmcbits[pmc - 1][1];
168 } else {
169 mask = classbits[class][0];
170 value = classbits[class][1];
171 }
172
173 tuse = mpc7450_threshold_use(event);
174 if (tuse) {
175 thresh = ((unsigned int)event >> PM_THRESH_SH) & PM_THRESH_MSK;
176 mask |= 0x3f << 24;
177 value |= thresh << 24;
178 if (tuse == 2) {
179 mask |= 0x40000000;
180 if ((unsigned int)event & PM_THRMULT_MSKS)
181 value |= 0x40000000;
182 }
183 }
184
185 *maskp = mask;
186 *valp = value;
187 return 0;
188}
189
190static const unsigned int event_alternatives[][MAX_ALT] = {
191 { 0x217, 0x317 }, /* PM_L1_DCACHE_MISS */
192 { 0x418, 0x50f, 0x60f }, /* PM_SNOOP_RETRY */
193 { 0x502, 0x602 }, /* PM_L2_HIT */
194 { 0x503, 0x603 }, /* PM_L3_HIT */
195 { 0x504, 0x604 }, /* PM_L2_ICACHE_MISS */
196 { 0x505, 0x605 }, /* PM_L3_ICACHE_MISS */
197 { 0x506, 0x606 }, /* PM_L2_DCACHE_MISS */
198 { 0x507, 0x607 }, /* PM_L3_DCACHE_MISS */
199 { 0x50a, 0x623 }, /* PM_LD_HIT_L3 */
200 { 0x50b, 0x624 }, /* PM_ST_HIT_L3 */
201 { 0x50d, 0x60d }, /* PM_L2_TOUCH_HIT */
202 { 0x50e, 0x60e }, /* PM_L3_TOUCH_HIT */
203 { 0x512, 0x612 }, /* PM_INT_LOCAL */
204 { 0x513, 0x61d }, /* PM_L2_MISS */
205 { 0x514, 0x61e }, /* PM_L3_MISS */
206};
207
208/*
209 * Scan the alternatives table for a match and return the
210 * index into the alternatives table if found, else -1.
211 */
212static int find_alternative(u32 event)
213{
214 int i, j;
215
216 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
217 if (event < event_alternatives[i][0])
218 break;
219 for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
220 if (event == event_alternatives[i][j])
221 return i;
222 }
223 return -1;
224}
225
226static int mpc7450_get_alternatives(u64 event, unsigned int flags, u64 alt[])
227{
228 int i, j, nalt = 1;
229 u32 ae;
230
231 alt[0] = event;
232 nalt = 1;
233 i = find_alternative((u32)event);
234 if (i >= 0) {
235 for (j = 0; j < MAX_ALT; ++j) {
236 ae = event_alternatives[i][j];
237 if (ae && ae != (u32)event)
238 alt[nalt++] = ae;
239 }
240 }
241 return nalt;
242}
243
244/*
245 * Bitmaps of which PMCs each class can use for classes 0 - 3.
246 * Bit i is set if PMC i+1 is usable.
247 */
248static const u8 classmap[N_CLASSES] = {
249 0x3f, 0x0f, 0x0b, 0x03, 0
250};
251
252/* Bit position and width of each PMCSEL field */
253static const int pmcsel_shift[N_COUNTER] = {
254 6, 0, 27, 22, 17, 11
255};
256static const u32 pmcsel_mask[N_COUNTER] = {
257 0x7f, 0x3f, 0x1f, 0x1f, 0x1f, 0x3f
258};
259
260/*
261 * Compute MMCR0/1/2 values for a set of events.
262 */
263static int mpc7450_compute_mmcr(u64 event[], int n_ev,
264 unsigned int hwc[], unsigned long mmcr[])
265{
266 u8 event_index[N_CLASSES][N_COUNTER];
267 int n_classevent[N_CLASSES];
268 int i, j, class, tuse;
269 u32 pmc_inuse = 0, pmc_avail;
270 u32 mmcr0 = 0, mmcr1 = 0, mmcr2 = 0;
271 u32 ev, pmc, thresh;
272
273 if (n_ev > N_COUNTER)
274 return -1;
275
276 /* First pass: count usage in each class */
277 for (i = 0; i < N_CLASSES; ++i)
278 n_classevent[i] = 0;
279 for (i = 0; i < n_ev; ++i) {
280 class = mpc7450_classify_event(event[i]);
281 if (class < 0)
282 return -1;
283 j = n_classevent[class]++;
284 event_index[class][j] = i;
285 }
286
287 /* Second pass: allocate PMCs from most specific event to least */
288 for (class = N_CLASSES - 1; class >= 0; --class) {
289 for (i = 0; i < n_classevent[class]; ++i) {
290 ev = event[event_index[class][i]];
291 if (class == 4) {
292 pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK;
293 if (pmc_inuse & (1 << (pmc - 1)))
294 return -1;
295 } else {
296 /* Find a suitable PMC */
297 pmc_avail = classmap[class] & ~pmc_inuse;
298 if (!pmc_avail)
299 return -1;
300 pmc = ffs(pmc_avail);
301 }
302 pmc_inuse |= 1 << (pmc - 1);
303
304 tuse = mpc7450_threshold_use(ev);
305 if (tuse) {
306 thresh = (ev >> PM_THRESH_SH) & PM_THRESH_MSK;
307 mmcr0 |= thresh << 16;
308 if (tuse == 2 && (ev & PM_THRMULT_MSKS))
309 mmcr2 = 0x80000000;
310 }
311 ev &= pmcsel_mask[pmc - 1];
312 ev <<= pmcsel_shift[pmc - 1];
313 if (pmc <= 2)
314 mmcr0 |= ev;
315 else
316 mmcr1 |= ev;
317 hwc[event_index[class][i]] = pmc - 1;
318 }
319 }
320
321 if (pmc_inuse & 1)
322 mmcr0 |= MMCR0_PMC1CE;
323 if (pmc_inuse & 0x3e)
324 mmcr0 |= MMCR0_PMCnCE;
325
326 /* Return MMCRx values */
327 mmcr[0] = mmcr0;
328 mmcr[1] = mmcr1;
329 mmcr[2] = mmcr2;
330 return 0;
331}
332
333/*
334 * Disable counting by a PMC.
335 * Note that the pmc argument is 0-based here, not 1-based.
336 */
337static void mpc7450_disable_pmc(unsigned int pmc, unsigned long mmcr[])
338{
339 if (pmc <= 1)
340 mmcr[0] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]);
341 else
342 mmcr[1] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]);
343}
344
345static int mpc7450_generic_events[] = {
346 [PERF_COUNT_HW_CPU_CYCLES] = 1,
347 [PERF_COUNT_HW_INSTRUCTIONS] = 2,
348 [PERF_COUNT_HW_CACHE_MISSES] = 0x217, /* PM_L1_DCACHE_MISS */
349 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x122, /* PM_BR_CMPL */
350 [PERF_COUNT_HW_BRANCH_MISSES] = 0x41c, /* PM_BR_MPRED */
351};
352
353#define C(x) PERF_COUNT_HW_CACHE_##x
354
355/*
356 * Table of generalized cache-related events.
357 * 0 means not supported, -1 means nonsensical, other values
358 * are event codes.
359 */
360static int mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
361 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
362 [C(OP_READ)] = { 0, 0x225 },
363 [C(OP_WRITE)] = { 0, 0x227 },
364 [C(OP_PREFETCH)] = { 0, 0 },
365 },
366 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
367 [C(OP_READ)] = { 0x129, 0x115 },
368 [C(OP_WRITE)] = { -1, -1 },
369 [C(OP_PREFETCH)] = { 0x634, 0 },
370 },
371 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
372 [C(OP_READ)] = { 0, 0 },
373 [C(OP_WRITE)] = { 0, 0 },
374 [C(OP_PREFETCH)] = { 0, 0 },
375 },
376 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
377 [C(OP_READ)] = { 0, 0x312 },
378 [C(OP_WRITE)] = { -1, -1 },
379 [C(OP_PREFETCH)] = { -1, -1 },
380 },
381 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
382 [C(OP_READ)] = { 0, 0x223 },
383 [C(OP_WRITE)] = { -1, -1 },
384 [C(OP_PREFETCH)] = { -1, -1 },
385 },
386 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
387 [C(OP_READ)] = { 0x122, 0x41c },
388 [C(OP_WRITE)] = { -1, -1 },
389 [C(OP_PREFETCH)] = { -1, -1 },
390 },
391 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
392 [C(OP_READ)] = { -1, -1 },
393 [C(OP_WRITE)] = { -1, -1 },
394 [C(OP_PREFETCH)] = { -1, -1 },
395 },
396};
397
398struct power_pmu mpc7450_pmu = {
399 .name = "MPC7450 family",
400 .n_counter = N_COUNTER,
401 .max_alternatives = MAX_ALT,
402 .add_fields = 0x00111555ul,
403 .test_adder = 0x00301000ul,
404 .compute_mmcr = mpc7450_compute_mmcr,
405 .get_constraint = mpc7450_get_constraint,
406 .get_alternatives = mpc7450_get_alternatives,
407 .disable_pmc = mpc7450_disable_pmc,
408 .n_generic = ARRAY_SIZE(mpc7450_generic_events),
409 .generic_events = mpc7450_generic_events,
410 .cache_events = &mpc7450_cache_events,
411};
412
413static int __init init_mpc7450_pmu(void)
414{
415 if (!cur_cpu_spec->oprofile_cpu_type ||
416 strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/7450"))
417 return -ENODEV;
418
419 return register_power_pmu(&mpc7450_pmu);
420}
421
422early_initcall(init_mpc7450_pmu);
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index e1612dfb4a93..2049f2d00ffe 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -21,12 +21,13 @@
21#include <linux/of.h> 21#include <linux/of.h>
22#include <linux/of_device.h> 22#include <linux/of_device.h>
23#include <linux/of_platform.h> 23#include <linux/of_platform.h>
24#include <linux/atomic.h>
24 25
25#include <asm/errno.h> 26#include <asm/errno.h>
26#include <asm/topology.h> 27#include <asm/topology.h>
27#include <asm/pci-bridge.h> 28#include <asm/pci-bridge.h>
28#include <asm/ppc-pci.h> 29#include <asm/ppc-pci.h>
29#include <linux/atomic.h> 30#include <asm/eeh.h>
30 31
31#ifdef CONFIG_PPC_OF_PLATFORM_PCI 32#ifdef CONFIG_PPC_OF_PLATFORM_PCI
32 33
@@ -66,6 +67,9 @@ static int __devinit of_pci_phb_probe(struct platform_device *dev)
66 /* Init pci_dn data structures */ 67 /* Init pci_dn data structures */
67 pci_devs_phb_init_dynamic(phb); 68 pci_devs_phb_init_dynamic(phb);
68 69
70 /* Create EEH devices for the PHB */
71 eeh_dev_phb_init_dynamic(phb);
72
69 /* Register devices with EEH */ 73 /* Register devices with EEH */
70#ifdef CONFIG_EEH 74#ifdef CONFIG_EEH
71 if (dev->dev.of_node->child) 75 if (dev->dev.of_node->child)
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 41456ff55e14..0bb1f98613ba 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -11,13 +11,10 @@
11#include <linux/export.h> 11#include <linux/export.h>
12#include <linux/memblock.h> 12#include <linux/memblock.h>
13 13
14#include <asm/firmware.h>
15#include <asm/lppaca.h> 14#include <asm/lppaca.h>
16#include <asm/paca.h> 15#include <asm/paca.h>
17#include <asm/sections.h> 16#include <asm/sections.h>
18#include <asm/pgtable.h> 17#include <asm/pgtable.h>
19#include <asm/iseries/lpar_map.h>
20#include <asm/iseries/hv_types.h>
21#include <asm/kexec.h> 18#include <asm/kexec.h>
22 19
23/* This symbol is provided by the linker - let it fill in the paca 20/* This symbol is provided by the linker - let it fill in the paca
@@ -30,8 +27,8 @@ extern unsigned long __toc_start;
30 * The structure which the hypervisor knows about - this structure 27 * The structure which the hypervisor knows about - this structure
31 * should not cross a page boundary. The vpa_init/register_vpa call 28 * should not cross a page boundary. The vpa_init/register_vpa call
32 * is now known to fail if the lppaca structure crosses a page 29 * is now known to fail if the lppaca structure crosses a page
33 * boundary. The lppaca is also used on legacy iSeries and POWER5 30 * boundary. The lppaca is also used on POWER5 pSeries boxes.
34 * pSeries boxes. The lppaca is 640 bytes long, and cannot readily 31 * The lppaca is 640 bytes long, and cannot readily
35 * change since the hypervisor knows its layout, so a 1kB alignment 32 * change since the hypervisor knows its layout, so a 1kB alignment
36 * will suffice to ensure that it doesn't cross a page boundary. 33 * will suffice to ensure that it doesn't cross a page boundary.
37 */ 34 */
@@ -183,12 +180,9 @@ void __init allocate_pacas(void)
183 /* 180 /*
184 * We can't take SLB misses on the paca, and we want to access them 181 * We can't take SLB misses on the paca, and we want to access them
185 * in real mode, so allocate them within the RMA and also within 182 * in real mode, so allocate them within the RMA and also within
186 * the first segment. On iSeries they must be within the area mapped 183 * the first segment.
187 * by the HV, which is HvPagesToMap * HVPAGESIZE bytes.
188 */ 184 */
189 limit = min(0x10000000ULL, ppc64_rma_size); 185 limit = min(0x10000000ULL, ppc64_rma_size);
190 if (firmware_has_feature(FW_FEATURE_ISERIES))
191 limit = min(limit, HvPagesToMap * HVPAGESIZE);
192 186
193 paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids); 187 paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
194 188
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index cce98d76e905..8e78e93c8185 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -38,7 +38,6 @@
38#include <asm/byteorder.h> 38#include <asm/byteorder.h>
39#include <asm/machdep.h> 39#include <asm/machdep.h>
40#include <asm/ppc-pci.h> 40#include <asm/ppc-pci.h>
41#include <asm/firmware.h>
42#include <asm/eeh.h> 41#include <asm/eeh.h>
43 42
44static DEFINE_SPINLOCK(hose_spinlock); 43static DEFINE_SPINLOCK(hose_spinlock);
@@ -50,9 +49,6 @@ static int global_phb_number; /* Global phb counter */
50/* ISA Memory physical address */ 49/* ISA Memory physical address */
51resource_size_t isa_mem_base; 50resource_size_t isa_mem_base;
52 51
53/* Default PCI flags is 0 on ppc32, modified at boot on ppc64 */
54unsigned int pci_flags = 0;
55
56 52
57static struct dma_map_ops *pci_dma_ops = &dma_direct_ops; 53static struct dma_map_ops *pci_dma_ops = &dma_direct_ops;
58 54
@@ -219,20 +215,6 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
219 struct of_irq oirq; 215 struct of_irq oirq;
220 unsigned int virq; 216 unsigned int virq;
221 217
222 /* The current device-tree that iSeries generates from the HV
223 * PCI informations doesn't contain proper interrupt routing,
224 * and all the fallback would do is print out crap, so we
225 * don't attempt to resolve the interrupts here at all, some
226 * iSeries specific fixup does it.
227 *
228 * In the long run, we will hopefully fix the generated device-tree
229 * instead.
230 */
231#ifdef CONFIG_PPC_ISERIES
232 if (firmware_has_feature(FW_FEATURE_ISERIES))
233 return -1;
234#endif
235
236 pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev)); 218 pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev));
237 219
238#ifdef DEBUG 220#ifdef DEBUG
@@ -849,60 +831,6 @@ int pci_proc_domain(struct pci_bus *bus)
849 return 1; 831 return 1;
850} 832}
851 833
852void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
853 struct resource *res)
854{
855 resource_size_t offset = 0, mask = (resource_size_t)-1;
856 struct pci_controller *hose = pci_bus_to_host(dev->bus);
857
858 if (!hose)
859 return;
860 if (res->flags & IORESOURCE_IO) {
861 offset = (unsigned long)hose->io_base_virt - _IO_BASE;
862 mask = 0xffffffffu;
863 } else if (res->flags & IORESOURCE_MEM)
864 offset = hose->pci_mem_offset;
865
866 region->start = (res->start - offset) & mask;
867 region->end = (res->end - offset) & mask;
868}
869EXPORT_SYMBOL(pcibios_resource_to_bus);
870
871void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
872 struct pci_bus_region *region)
873{
874 resource_size_t offset = 0, mask = (resource_size_t)-1;
875 struct pci_controller *hose = pci_bus_to_host(dev->bus);
876
877 if (!hose)
878 return;
879 if (res->flags & IORESOURCE_IO) {
880 offset = (unsigned long)hose->io_base_virt - _IO_BASE;
881 mask = 0xffffffffu;
882 } else if (res->flags & IORESOURCE_MEM)
883 offset = hose->pci_mem_offset;
884 res->start = (region->start + offset) & mask;
885 res->end = (region->end + offset) & mask;
886}
887EXPORT_SYMBOL(pcibios_bus_to_resource);
888
889/* Fixup a bus resource into a linux resource */
890static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev)
891{
892 struct pci_controller *hose = pci_bus_to_host(dev->bus);
893 resource_size_t offset = 0, mask = (resource_size_t)-1;
894
895 if (res->flags & IORESOURCE_IO) {
896 offset = (unsigned long)hose->io_base_virt - _IO_BASE;
897 mask = 0xffffffffu;
898 } else if (res->flags & IORESOURCE_MEM)
899 offset = hose->pci_mem_offset;
900
901 res->start = (res->start + offset) & mask;
902 res->end = (res->end + offset) & mask;
903}
904
905
906/* This header fixup will do the resource fixup for all devices as they are 834/* This header fixup will do the resource fixup for all devices as they are
907 * probed, but not for bridge ranges 835 * probed, but not for bridge ranges
908 */ 836 */
@@ -942,18 +870,11 @@ static void __devinit pcibios_fixup_resources(struct pci_dev *dev)
942 continue; 870 continue;
943 } 871 }
944 872
945 pr_debug("PCI:%s Resource %d %016llx-%016llx [%x] fixup...\n", 873 pr_debug("PCI:%s Resource %d %016llx-%016llx [%x]\n",
946 pci_name(dev), i, 874 pci_name(dev), i,
947 (unsigned long long)res->start,\ 875 (unsigned long long)res->start,\
948 (unsigned long long)res->end, 876 (unsigned long long)res->end,
949 (unsigned int)res->flags); 877 (unsigned int)res->flags);
950
951 fixup_resource(res, dev);
952
953 pr_debug("PCI:%s %016llx-%016llx\n",
954 pci_name(dev),
955 (unsigned long long)res->start,
956 (unsigned long long)res->end);
957 } 878 }
958 879
959 /* Call machine specific resource fixup */ 880 /* Call machine specific resource fixup */
@@ -1055,27 +976,18 @@ static void __devinit pcibios_fixup_bridge(struct pci_bus *bus)
1055 continue; 976 continue;
1056 } 977 }
1057 978
1058 pr_debug("PCI:%s Bus rsrc %d %016llx-%016llx [%x] fixup...\n", 979 pr_debug("PCI:%s Bus rsrc %d %016llx-%016llx [%x]\n",
1059 pci_name(dev), i, 980 pci_name(dev), i,
1060 (unsigned long long)res->start,\ 981 (unsigned long long)res->start,\
1061 (unsigned long long)res->end, 982 (unsigned long long)res->end,
1062 (unsigned int)res->flags); 983 (unsigned int)res->flags);
1063 984
1064 /* Perform fixup */
1065 fixup_resource(res, dev);
1066
1067 /* Try to detect uninitialized P2P bridge resources, 985 /* Try to detect uninitialized P2P bridge resources,
1068 * and clear them out so they get re-assigned later 986 * and clear them out so they get re-assigned later
1069 */ 987 */
1070 if (pcibios_uninitialized_bridge_resource(bus, res)) { 988 if (pcibios_uninitialized_bridge_resource(bus, res)) {
1071 res->flags = 0; 989 res->flags = 0;
1072 pr_debug("PCI:%s (unassigned)\n", pci_name(dev)); 990 pr_debug("PCI:%s (unassigned)\n", pci_name(dev));
1073 } else {
1074
1075 pr_debug("PCI:%s %016llx-%016llx\n",
1076 pci_name(dev),
1077 (unsigned long long)res->start,
1078 (unsigned long long)res->end);
1079 } 991 }
1080 } 992 }
1081} 993}
@@ -1565,6 +1477,11 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
1565 return pci_enable_resources(dev, mask); 1477 return pci_enable_resources(dev, mask);
1566} 1478}
1567 1479
1480resource_size_t pcibios_io_space_offset(struct pci_controller *hose)
1481{
1482 return (unsigned long) hose->io_base_virt - _IO_BASE;
1483}
1484
1568static void __devinit pcibios_setup_phb_resources(struct pci_controller *hose, struct list_head *resources) 1485static void __devinit pcibios_setup_phb_resources(struct pci_controller *hose, struct list_head *resources)
1569{ 1486{
1570 struct resource *res; 1487 struct resource *res;
@@ -1589,7 +1506,7 @@ static void __devinit pcibios_setup_phb_resources(struct pci_controller *hose, s
1589 (unsigned long long)res->start, 1506 (unsigned long long)res->start,
1590 (unsigned long long)res->end, 1507 (unsigned long long)res->end,
1591 (unsigned long)res->flags); 1508 (unsigned long)res->flags);
1592 pci_add_resource(resources, res); 1509 pci_add_resource_offset(resources, res, pcibios_io_space_offset(hose));
1593 1510
1594 /* Hookup PHB Memory resources */ 1511 /* Hookup PHB Memory resources */
1595 for (i = 0; i < 3; ++i) { 1512 for (i = 0; i < 3; ++i) {
@@ -1612,7 +1529,7 @@ static void __devinit pcibios_setup_phb_resources(struct pci_controller *hose, s
1612 (unsigned long long)res->start, 1529 (unsigned long long)res->start,
1613 (unsigned long long)res->end, 1530 (unsigned long long)res->end,
1614 (unsigned long)res->flags); 1531 (unsigned long)res->flags);
1615 pci_add_resource(resources, res); 1532 pci_add_resource_offset(resources, res, hose->pci_mem_offset);
1616 } 1533 }
1617 1534
1618 pr_debug("PCI: PHB MEM offset = %016llx\n", 1535 pr_debug("PCI: PHB MEM offset = %016llx\n",
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index fdd1a3d951dc..4b06ec5a502e 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -219,9 +219,9 @@ void __devinit pcibios_setup_phb_io_space(struct pci_controller *hose)
219 struct resource *res = &hose->io_resource; 219 struct resource *res = &hose->io_resource;
220 220
221 /* Fixup IO space offset */ 221 /* Fixup IO space offset */
222 io_offset = (unsigned long)hose->io_base_virt - isa_io_base; 222 io_offset = pcibios_io_space_offset(hose);
223 res->start = (res->start + io_offset) & 0xffffffffu; 223 res->start += io_offset;
224 res->end = (res->end + io_offset) & 0xffffffffu; 224 res->end += io_offset;
225} 225}
226 226
227static int __init pcibios_init(void) 227static int __init pcibios_init(void)
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 3318d39b7d4c..94a54f61d341 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -33,8 +33,6 @@
33#include <asm/machdep.h> 33#include <asm/machdep.h>
34#include <asm/ppc-pci.h> 34#include <asm/ppc-pci.h>
35 35
36unsigned long pci_probe_only = 1;
37
38/* pci_io_base -- the base address from which io bars are offsets. 36/* pci_io_base -- the base address from which io bars are offsets.
39 * This is the lowest I/O base address (so bar values are always positive), 37 * This is the lowest I/O base address (so bar values are always positive),
40 * and it *must* be the start of ISA space if an ISA bus exists because 38 * and it *must* be the start of ISA space if an ISA bus exists because
@@ -55,9 +53,6 @@ static int __init pcibios_init(void)
55 */ 53 */
56 ppc_md.phys_mem_access_prot = pci_phys_mem_access_prot; 54 ppc_md.phys_mem_access_prot = pci_phys_mem_access_prot;
57 55
58 if (pci_probe_only)
59 pci_add_flags(PCI_PROBE_ONLY);
60
61 /* On ppc64, we always enable PCI domains and we keep domain 0 56 /* On ppc64, we always enable PCI domains and we keep domain 0
62 * backward compatible in /proc for video cards 57 * backward compatible in /proc for video cards
63 */ 58 */
@@ -173,7 +168,7 @@ static int __devinit pcibios_map_phb_io_space(struct pci_controller *hose)
173 return -ENOMEM; 168 return -ENOMEM;
174 169
175 /* Fixup hose IO resource */ 170 /* Fixup hose IO resource */
176 io_virt_offset = (unsigned long)hose->io_base_virt - _IO_BASE; 171 io_virt_offset = pcibios_io_space_offset(hose);
177 hose->io_resource.start += io_virt_offset; 172 hose->io_resource.start += io_virt_offset;
178 hose->io_resource.end += io_virt_offset; 173 hose->io_resource.end += io_virt_offset;
179 174
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index b37d0b5a796e..89dde171a6fa 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -75,6 +75,7 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev)
75{ 75{
76 u64 base, size; 76 u64 base, size;
77 unsigned int flags; 77 unsigned int flags;
78 struct pci_bus_region region;
78 struct resource *res; 79 struct resource *res;
79 const u32 *addrs; 80 const u32 *addrs;
80 u32 i; 81 u32 i;
@@ -106,10 +107,11 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev)
106 printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i); 107 printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
107 continue; 108 continue;
108 } 109 }
109 res->start = base;
110 res->end = base + size - 1;
111 res->flags = flags; 110 res->flags = flags;
112 res->name = pci_name(dev); 111 res->name = pci_name(dev);
112 region.start = base;
113 region.end = base + size - 1;
114 pcibios_bus_to_resource(dev, res, &region);
113 } 115 }
114} 116}
115 117
@@ -209,6 +211,7 @@ void __devinit of_scan_pci_bridge(struct pci_dev *dev)
209 struct pci_bus *bus; 211 struct pci_bus *bus;
210 const u32 *busrange, *ranges; 212 const u32 *busrange, *ranges;
211 int len, i, mode; 213 int len, i, mode;
214 struct pci_bus_region region;
212 struct resource *res; 215 struct resource *res;
213 unsigned int flags; 216 unsigned int flags;
214 u64 size; 217 u64 size;
@@ -270,9 +273,10 @@ void __devinit of_scan_pci_bridge(struct pci_dev *dev)
270 res = bus->resource[i]; 273 res = bus->resource[i];
271 ++i; 274 ++i;
272 } 275 }
273 res->start = of_read_number(&ranges[1], 2);
274 res->end = res->start + size - 1;
275 res->flags = flags; 276 res->flags = flags;
277 region.start = of_read_number(&ranges[1], 2);
278 region.end = region.start + size - 1;
279 pcibios_bus_to_resource(dev, res, &region);
276 } 280 }
277 sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus), 281 sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus),
278 bus->number); 282 bus->number);
diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c
deleted file mode 100644
index 564c1d8bdb5c..000000000000
--- a/arch/powerpc/kernel/perf_callchain.c
+++ /dev/null
@@ -1,492 +0,0 @@
1/*
2 * Performance counter callchain support - powerpc architecture code
3 *
4 * Copyright © 2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/sched.h>
13#include <linux/perf_event.h>
14#include <linux/percpu.h>
15#include <linux/uaccess.h>
16#include <linux/mm.h>
17#include <asm/ptrace.h>
18#include <asm/pgtable.h>
19#include <asm/sigcontext.h>
20#include <asm/ucontext.h>
21#include <asm/vdso.h>
22#ifdef CONFIG_PPC64
23#include "ppc32.h"
24#endif
25
26
27/*
28 * Is sp valid as the address of the next kernel stack frame after prev_sp?
29 * The next frame may be in a different stack area but should not go
30 * back down in the same stack area.
31 */
32static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
33{
34 if (sp & 0xf)
35 return 0; /* must be 16-byte aligned */
36 if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
37 return 0;
38 if (sp >= prev_sp + STACK_FRAME_OVERHEAD)
39 return 1;
40 /*
41 * sp could decrease when we jump off an interrupt stack
42 * back to the regular process stack.
43 */
44 if ((sp & ~(THREAD_SIZE - 1)) != (prev_sp & ~(THREAD_SIZE - 1)))
45 return 1;
46 return 0;
47}
48
49void
50perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
51{
52 unsigned long sp, next_sp;
53 unsigned long next_ip;
54 unsigned long lr;
55 long level = 0;
56 unsigned long *fp;
57
58 lr = regs->link;
59 sp = regs->gpr[1];
60 perf_callchain_store(entry, regs->nip);
61
62 if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
63 return;
64
65 for (;;) {
66 fp = (unsigned long *) sp;
67 next_sp = fp[0];
68
69 if (next_sp == sp + STACK_INT_FRAME_SIZE &&
70 fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
71 /*
72 * This looks like an interrupt frame for an
73 * interrupt that occurred in the kernel
74 */
75 regs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD);
76 next_ip = regs->nip;
77 lr = regs->link;
78 level = 0;
79 perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
80
81 } else {
82 if (level == 0)
83 next_ip = lr;
84 else
85 next_ip = fp[STACK_FRAME_LR_SAVE];
86
87 /*
88 * We can't tell which of the first two addresses
89 * we get are valid, but we can filter out the
90 * obviously bogus ones here. We replace them
91 * with 0 rather than removing them entirely so
92 * that userspace can tell which is which.
93 */
94 if ((level == 1 && next_ip == lr) ||
95 (level <= 1 && !kernel_text_address(next_ip)))
96 next_ip = 0;
97
98 ++level;
99 }
100
101 perf_callchain_store(entry, next_ip);
102 if (!valid_next_sp(next_sp, sp))
103 return;
104 sp = next_sp;
105 }
106}
107
108#ifdef CONFIG_PPC64
109/*
110 * On 64-bit we don't want to invoke hash_page on user addresses from
111 * interrupt context, so if the access faults, we read the page tables
112 * to find which page (if any) is mapped and access it directly.
113 */
114static int read_user_stack_slow(void __user *ptr, void *ret, int nb)
115{
116 pgd_t *pgdir;
117 pte_t *ptep, pte;
118 unsigned shift;
119 unsigned long addr = (unsigned long) ptr;
120 unsigned long offset;
121 unsigned long pfn;
122 void *kaddr;
123
124 pgdir = current->mm->pgd;
125 if (!pgdir)
126 return -EFAULT;
127
128 ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift);
129 if (!shift)
130 shift = PAGE_SHIFT;
131
132 /* align address to page boundary */
133 offset = addr & ((1UL << shift) - 1);
134 addr -= offset;
135
136 if (ptep == NULL)
137 return -EFAULT;
138 pte = *ptep;
139 if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER))
140 return -EFAULT;
141 pfn = pte_pfn(pte);
142 if (!page_is_ram(pfn))
143 return -EFAULT;
144
145 /* no highmem to worry about here */
146 kaddr = pfn_to_kaddr(pfn);
147 memcpy(ret, kaddr + offset, nb);
148 return 0;
149}
150
151static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret)
152{
153 if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) ||
154 ((unsigned long)ptr & 7))
155 return -EFAULT;
156
157 pagefault_disable();
158 if (!__get_user_inatomic(*ret, ptr)) {
159 pagefault_enable();
160 return 0;
161 }
162 pagefault_enable();
163
164 return read_user_stack_slow(ptr, ret, 8);
165}
166
167static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
168{
169 if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
170 ((unsigned long)ptr & 3))
171 return -EFAULT;
172
173 pagefault_disable();
174 if (!__get_user_inatomic(*ret, ptr)) {
175 pagefault_enable();
176 return 0;
177 }
178 pagefault_enable();
179
180 return read_user_stack_slow(ptr, ret, 4);
181}
182
183static inline int valid_user_sp(unsigned long sp, int is_64)
184{
185 if (!sp || (sp & 7) || sp > (is_64 ? TASK_SIZE : 0x100000000UL) - 32)
186 return 0;
187 return 1;
188}
189
190/*
191 * 64-bit user processes use the same stack frame for RT and non-RT signals.
192 */
193struct signal_frame_64 {
194 char dummy[__SIGNAL_FRAMESIZE];
195 struct ucontext uc;
196 unsigned long unused[2];
197 unsigned int tramp[6];
198 struct siginfo *pinfo;
199 void *puc;
200 struct siginfo info;
201 char abigap[288];
202};
203
204static int is_sigreturn_64_address(unsigned long nip, unsigned long fp)
205{
206 if (nip == fp + offsetof(struct signal_frame_64, tramp))
207 return 1;
208 if (vdso64_rt_sigtramp && current->mm->context.vdso_base &&
209 nip == current->mm->context.vdso_base + vdso64_rt_sigtramp)
210 return 1;
211 return 0;
212}
213
214/*
215 * Do some sanity checking on the signal frame pointed to by sp.
216 * We check the pinfo and puc pointers in the frame.
217 */
218static int sane_signal_64_frame(unsigned long sp)
219{
220 struct signal_frame_64 __user *sf;
221 unsigned long pinfo, puc;
222
223 sf = (struct signal_frame_64 __user *) sp;
224 if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) ||
225 read_user_stack_64((unsigned long __user *) &sf->puc, &puc))
226 return 0;
227 return pinfo == (unsigned long) &sf->info &&
228 puc == (unsigned long) &sf->uc;
229}
230
231static void perf_callchain_user_64(struct perf_callchain_entry *entry,
232 struct pt_regs *regs)
233{
234 unsigned long sp, next_sp;
235 unsigned long next_ip;
236 unsigned long lr;
237 long level = 0;
238 struct signal_frame_64 __user *sigframe;
239 unsigned long __user *fp, *uregs;
240
241 next_ip = regs->nip;
242 lr = regs->link;
243 sp = regs->gpr[1];
244 perf_callchain_store(entry, next_ip);
245
246 for (;;) {
247 fp = (unsigned long __user *) sp;
248 if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp))
249 return;
250 if (level > 0 && read_user_stack_64(&fp[2], &next_ip))
251 return;
252
253 /*
254 * Note: the next_sp - sp >= signal frame size check
255 * is true when next_sp < sp, which can happen when
256 * transitioning from an alternate signal stack to the
257 * normal stack.
258 */
259 if (next_sp - sp >= sizeof(struct signal_frame_64) &&
260 (is_sigreturn_64_address(next_ip, sp) ||
261 (level <= 1 && is_sigreturn_64_address(lr, sp))) &&
262 sane_signal_64_frame(sp)) {
263 /*
264 * This looks like an signal frame
265 */
266 sigframe = (struct signal_frame_64 __user *) sp;
267 uregs = sigframe->uc.uc_mcontext.gp_regs;
268 if (read_user_stack_64(&uregs[PT_NIP], &next_ip) ||
269 read_user_stack_64(&uregs[PT_LNK], &lr) ||
270 read_user_stack_64(&uregs[PT_R1], &sp))
271 return;
272 level = 0;
273 perf_callchain_store(entry, PERF_CONTEXT_USER);
274 perf_callchain_store(entry, next_ip);
275 continue;
276 }
277
278 if (level == 0)
279 next_ip = lr;
280 perf_callchain_store(entry, next_ip);
281 ++level;
282 sp = next_sp;
283 }
284}
285
286static inline int current_is_64bit(void)
287{
288 /*
289 * We can't use test_thread_flag() here because we may be on an
290 * interrupt stack, and the thread flags don't get copied over
291 * from the thread_info on the main stack to the interrupt stack.
292 */
293 return !test_ti_thread_flag(task_thread_info(current), TIF_32BIT);
294}
295
296#else /* CONFIG_PPC64 */
297/*
298 * On 32-bit we just access the address and let hash_page create a
299 * HPTE if necessary, so there is no need to fall back to reading
300 * the page tables. Since this is called at interrupt level,
301 * do_page_fault() won't treat a DSI as a page fault.
302 */
303static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
304{
305 int rc;
306
307 if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
308 ((unsigned long)ptr & 3))
309 return -EFAULT;
310
311 pagefault_disable();
312 rc = __get_user_inatomic(*ret, ptr);
313 pagefault_enable();
314
315 return rc;
316}
317
318static inline void perf_callchain_user_64(struct perf_callchain_entry *entry,
319 struct pt_regs *regs)
320{
321}
322
323static inline int current_is_64bit(void)
324{
325 return 0;
326}
327
328static inline int valid_user_sp(unsigned long sp, int is_64)
329{
330 if (!sp || (sp & 7) || sp > TASK_SIZE - 32)
331 return 0;
332 return 1;
333}
334
335#define __SIGNAL_FRAMESIZE32 __SIGNAL_FRAMESIZE
336#define sigcontext32 sigcontext
337#define mcontext32 mcontext
338#define ucontext32 ucontext
339#define compat_siginfo_t struct siginfo
340
341#endif /* CONFIG_PPC64 */
342
343/*
344 * Layout for non-RT signal frames
345 */
346struct signal_frame_32 {
347 char dummy[__SIGNAL_FRAMESIZE32];
348 struct sigcontext32 sctx;
349 struct mcontext32 mctx;
350 int abigap[56];
351};
352
353/*
354 * Layout for RT signal frames
355 */
356struct rt_signal_frame_32 {
357 char dummy[__SIGNAL_FRAMESIZE32 + 16];
358 compat_siginfo_t info;
359 struct ucontext32 uc;
360 int abigap[56];
361};
362
363static int is_sigreturn_32_address(unsigned int nip, unsigned int fp)
364{
365 if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad))
366 return 1;
367 if (vdso32_sigtramp && current->mm->context.vdso_base &&
368 nip == current->mm->context.vdso_base + vdso32_sigtramp)
369 return 1;
370 return 0;
371}
372
373static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp)
374{
375 if (nip == fp + offsetof(struct rt_signal_frame_32,
376 uc.uc_mcontext.mc_pad))
377 return 1;
378 if (vdso32_rt_sigtramp && current->mm->context.vdso_base &&
379 nip == current->mm->context.vdso_base + vdso32_rt_sigtramp)
380 return 1;
381 return 0;
382}
383
384static int sane_signal_32_frame(unsigned int sp)
385{
386 struct signal_frame_32 __user *sf;
387 unsigned int regs;
388
389 sf = (struct signal_frame_32 __user *) (unsigned long) sp;
390 if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, &regs))
391 return 0;
392 return regs == (unsigned long) &sf->mctx;
393}
394
395static int sane_rt_signal_32_frame(unsigned int sp)
396{
397 struct rt_signal_frame_32 __user *sf;
398 unsigned int regs;
399
400 sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
401 if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, &regs))
402 return 0;
403 return regs == (unsigned long) &sf->uc.uc_mcontext;
404}
405
406static unsigned int __user *signal_frame_32_regs(unsigned int sp,
407 unsigned int next_sp, unsigned int next_ip)
408{
409 struct mcontext32 __user *mctx = NULL;
410 struct signal_frame_32 __user *sf;
411 struct rt_signal_frame_32 __user *rt_sf;
412
413 /*
414 * Note: the next_sp - sp >= signal frame size check
415 * is true when next_sp < sp, for example, when
416 * transitioning from an alternate signal stack to the
417 * normal stack.
418 */
419 if (next_sp - sp >= sizeof(struct signal_frame_32) &&
420 is_sigreturn_32_address(next_ip, sp) &&
421 sane_signal_32_frame(sp)) {
422 sf = (struct signal_frame_32 __user *) (unsigned long) sp;
423 mctx = &sf->mctx;
424 }
425
426 if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) &&
427 is_rt_sigreturn_32_address(next_ip, sp) &&
428 sane_rt_signal_32_frame(sp)) {
429 rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
430 mctx = &rt_sf->uc.uc_mcontext;
431 }
432
433 if (!mctx)
434 return NULL;
435 return mctx->mc_gregs;
436}
437
438static void perf_callchain_user_32(struct perf_callchain_entry *entry,
439 struct pt_regs *regs)
440{
441 unsigned int sp, next_sp;
442 unsigned int next_ip;
443 unsigned int lr;
444 long level = 0;
445 unsigned int __user *fp, *uregs;
446
447 next_ip = regs->nip;
448 lr = regs->link;
449 sp = regs->gpr[1];
450 perf_callchain_store(entry, next_ip);
451
452 while (entry->nr < PERF_MAX_STACK_DEPTH) {
453 fp = (unsigned int __user *) (unsigned long) sp;
454 if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp))
455 return;
456 if (level > 0 && read_user_stack_32(&fp[1], &next_ip))
457 return;
458
459 uregs = signal_frame_32_regs(sp, next_sp, next_ip);
460 if (!uregs && level <= 1)
461 uregs = signal_frame_32_regs(sp, next_sp, lr);
462 if (uregs) {
463 /*
464 * This looks like an signal frame, so restart
465 * the stack trace with the values in it.
466 */
467 if (read_user_stack_32(&uregs[PT_NIP], &next_ip) ||
468 read_user_stack_32(&uregs[PT_LNK], &lr) ||
469 read_user_stack_32(&uregs[PT_R1], &sp))
470 return;
471 level = 0;
472 perf_callchain_store(entry, PERF_CONTEXT_USER);
473 perf_callchain_store(entry, next_ip);
474 continue;
475 }
476
477 if (level == 0)
478 next_ip = lr;
479 perf_callchain_store(entry, next_ip);
480 ++level;
481 sp = next_sp;
482 }
483}
484
485void
486perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
487{
488 if (current_is_64bit())
489 perf_callchain_user_64(entry, regs);
490 else
491 perf_callchain_user_32(entry, regs);
492}
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
deleted file mode 100644
index c2e27ede07ec..000000000000
--- a/arch/powerpc/kernel/perf_event.c
+++ /dev/null
@@ -1,1448 +0,0 @@
1/*
2 * Performance event support - powerpc architecture code
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/sched.h>
13#include <linux/perf_event.h>
14#include <linux/percpu.h>
15#include <linux/hardirq.h>
16#include <asm/reg.h>
17#include <asm/pmc.h>
18#include <asm/machdep.h>
19#include <asm/firmware.h>
20#include <asm/ptrace.h>
21
22struct cpu_hw_events {
23 int n_events;
24 int n_percpu;
25 int disabled;
26 int n_added;
27 int n_limited;
28 u8 pmcs_enabled;
29 struct perf_event *event[MAX_HWEVENTS];
30 u64 events[MAX_HWEVENTS];
31 unsigned int flags[MAX_HWEVENTS];
32 unsigned long mmcr[3];
33 struct perf_event *limited_counter[MAX_LIMITED_HWCOUNTERS];
34 u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS];
35 u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
36 unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
37 unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
38
39 unsigned int group_flag;
40 int n_txn_start;
41};
42DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
43
44struct power_pmu *ppmu;
45
46/*
47 * Normally, to ignore kernel events we set the FCS (freeze counters
48 * in supervisor mode) bit in MMCR0, but if the kernel runs with the
49 * hypervisor bit set in the MSR, or if we are running on a processor
50 * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
51 * then we need to use the FCHV bit to ignore kernel events.
52 */
53static unsigned int freeze_events_kernel = MMCR0_FCS;
54
55/*
56 * 32-bit doesn't have MMCRA but does have an MMCR2,
57 * and a few other names are different.
58 */
59#ifdef CONFIG_PPC32
60
61#define MMCR0_FCHV 0
62#define MMCR0_PMCjCE MMCR0_PMCnCE
63
64#define SPRN_MMCRA SPRN_MMCR2
65#define MMCRA_SAMPLE_ENABLE 0
66
67static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
68{
69 return 0;
70}
71static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { }
72static inline u32 perf_get_misc_flags(struct pt_regs *regs)
73{
74 return 0;
75}
76static inline void perf_read_regs(struct pt_regs *regs) { }
77static inline int perf_intr_is_nmi(struct pt_regs *regs)
78{
79 return 0;
80}
81
82#endif /* CONFIG_PPC32 */
83
84/*
85 * Things that are specific to 64-bit implementations.
86 */
87#ifdef CONFIG_PPC64
88
89static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
90{
91 unsigned long mmcra = regs->dsisr;
92
93 if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) {
94 unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT;
95 if (slot > 1)
96 return 4 * (slot - 1);
97 }
98 return 0;
99}
100
101/*
102 * The user wants a data address recorded.
103 * If we're not doing instruction sampling, give them the SDAR
104 * (sampled data address). If we are doing instruction sampling, then
105 * only give them the SDAR if it corresponds to the instruction
106 * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC
107 * bit in MMCRA.
108 */
109static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
110{
111 unsigned long mmcra = regs->dsisr;
112 unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ?
113 POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC;
114
115 if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync))
116 *addrp = mfspr(SPRN_SDAR);
117}
118
119static inline u32 perf_get_misc_flags(struct pt_regs *regs)
120{
121 unsigned long mmcra = regs->dsisr;
122 unsigned long sihv = MMCRA_SIHV;
123 unsigned long sipr = MMCRA_SIPR;
124
125 if (TRAP(regs) != 0xf00)
126 return 0; /* not a PMU interrupt */
127
128 if (ppmu->flags & PPMU_ALT_SIPR) {
129 sihv = POWER6_MMCRA_SIHV;
130 sipr = POWER6_MMCRA_SIPR;
131 }
132
133 /* PR has priority over HV, so order below is important */
134 if (mmcra & sipr)
135 return PERF_RECORD_MISC_USER;
136 if ((mmcra & sihv) && (freeze_events_kernel != MMCR0_FCHV))
137 return PERF_RECORD_MISC_HYPERVISOR;
138 return PERF_RECORD_MISC_KERNEL;
139}
140
141/*
142 * Overload regs->dsisr to store MMCRA so we only need to read it once
143 * on each interrupt.
144 */
145static inline void perf_read_regs(struct pt_regs *regs)
146{
147 regs->dsisr = mfspr(SPRN_MMCRA);
148}
149
150/*
151 * If interrupts were soft-disabled when a PMU interrupt occurs, treat
152 * it as an NMI.
153 */
154static inline int perf_intr_is_nmi(struct pt_regs *regs)
155{
156 return !regs->softe;
157}
158
159#endif /* CONFIG_PPC64 */
160
161static void perf_event_interrupt(struct pt_regs *regs);
162
163void perf_event_print_debug(void)
164{
165}
166
167/*
168 * Read one performance monitor counter (PMC).
169 */
170static unsigned long read_pmc(int idx)
171{
172 unsigned long val;
173
174 switch (idx) {
175 case 1:
176 val = mfspr(SPRN_PMC1);
177 break;
178 case 2:
179 val = mfspr(SPRN_PMC2);
180 break;
181 case 3:
182 val = mfspr(SPRN_PMC3);
183 break;
184 case 4:
185 val = mfspr(SPRN_PMC4);
186 break;
187 case 5:
188 val = mfspr(SPRN_PMC5);
189 break;
190 case 6:
191 val = mfspr(SPRN_PMC6);
192 break;
193#ifdef CONFIG_PPC64
194 case 7:
195 val = mfspr(SPRN_PMC7);
196 break;
197 case 8:
198 val = mfspr(SPRN_PMC8);
199 break;
200#endif /* CONFIG_PPC64 */
201 default:
202 printk(KERN_ERR "oops trying to read PMC%d\n", idx);
203 val = 0;
204 }
205 return val;
206}
207
208/*
209 * Write one PMC.
210 */
211static void write_pmc(int idx, unsigned long val)
212{
213 switch (idx) {
214 case 1:
215 mtspr(SPRN_PMC1, val);
216 break;
217 case 2:
218 mtspr(SPRN_PMC2, val);
219 break;
220 case 3:
221 mtspr(SPRN_PMC3, val);
222 break;
223 case 4:
224 mtspr(SPRN_PMC4, val);
225 break;
226 case 5:
227 mtspr(SPRN_PMC5, val);
228 break;
229 case 6:
230 mtspr(SPRN_PMC6, val);
231 break;
232#ifdef CONFIG_PPC64
233 case 7:
234 mtspr(SPRN_PMC7, val);
235 break;
236 case 8:
237 mtspr(SPRN_PMC8, val);
238 break;
239#endif /* CONFIG_PPC64 */
240 default:
241 printk(KERN_ERR "oops trying to write PMC%d\n", idx);
242 }
243}
244
245/*
246 * Check if a set of events can all go on the PMU at once.
247 * If they can't, this will look at alternative codes for the events
248 * and see if any combination of alternative codes is feasible.
249 * The feasible set is returned in event_id[].
250 */
251static int power_check_constraints(struct cpu_hw_events *cpuhw,
252 u64 event_id[], unsigned int cflags[],
253 int n_ev)
254{
255 unsigned long mask, value, nv;
256 unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS];
257 int n_alt[MAX_HWEVENTS], choice[MAX_HWEVENTS];
258 int i, j;
259 unsigned long addf = ppmu->add_fields;
260 unsigned long tadd = ppmu->test_adder;
261
262 if (n_ev > ppmu->n_counter)
263 return -1;
264
265 /* First see if the events will go on as-is */
266 for (i = 0; i < n_ev; ++i) {
267 if ((cflags[i] & PPMU_LIMITED_PMC_REQD)
268 && !ppmu->limited_pmc_event(event_id[i])) {
269 ppmu->get_alternatives(event_id[i], cflags[i],
270 cpuhw->alternatives[i]);
271 event_id[i] = cpuhw->alternatives[i][0];
272 }
273 if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0],
274 &cpuhw->avalues[i][0]))
275 return -1;
276 }
277 value = mask = 0;
278 for (i = 0; i < n_ev; ++i) {
279 nv = (value | cpuhw->avalues[i][0]) +
280 (value & cpuhw->avalues[i][0] & addf);
281 if ((((nv + tadd) ^ value) & mask) != 0 ||
282 (((nv + tadd) ^ cpuhw->avalues[i][0]) &
283 cpuhw->amasks[i][0]) != 0)
284 break;
285 value = nv;
286 mask |= cpuhw->amasks[i][0];
287 }
288 if (i == n_ev)
289 return 0; /* all OK */
290
291 /* doesn't work, gather alternatives... */
292 if (!ppmu->get_alternatives)
293 return -1;
294 for (i = 0; i < n_ev; ++i) {
295 choice[i] = 0;
296 n_alt[i] = ppmu->get_alternatives(event_id[i], cflags[i],
297 cpuhw->alternatives[i]);
298 for (j = 1; j < n_alt[i]; ++j)
299 ppmu->get_constraint(cpuhw->alternatives[i][j],
300 &cpuhw->amasks[i][j],
301 &cpuhw->avalues[i][j]);
302 }
303
304 /* enumerate all possibilities and see if any will work */
305 i = 0;
306 j = -1;
307 value = mask = nv = 0;
308 while (i < n_ev) {
309 if (j >= 0) {
310 /* we're backtracking, restore context */
311 value = svalues[i];
312 mask = smasks[i];
313 j = choice[i];
314 }
315 /*
316 * See if any alternative k for event_id i,
317 * where k > j, will satisfy the constraints.
318 */
319 while (++j < n_alt[i]) {
320 nv = (value | cpuhw->avalues[i][j]) +
321 (value & cpuhw->avalues[i][j] & addf);
322 if ((((nv + tadd) ^ value) & mask) == 0 &&
323 (((nv + tadd) ^ cpuhw->avalues[i][j])
324 & cpuhw->amasks[i][j]) == 0)
325 break;
326 }
327 if (j >= n_alt[i]) {
328 /*
329 * No feasible alternative, backtrack
330 * to event_id i-1 and continue enumerating its
331 * alternatives from where we got up to.
332 */
333 if (--i < 0)
334 return -1;
335 } else {
336 /*
337 * Found a feasible alternative for event_id i,
338 * remember where we got up to with this event_id,
339 * go on to the next event_id, and start with
340 * the first alternative for it.
341 */
342 choice[i] = j;
343 svalues[i] = value;
344 smasks[i] = mask;
345 value = nv;
346 mask |= cpuhw->amasks[i][j];
347 ++i;
348 j = -1;
349 }
350 }
351
352 /* OK, we have a feasible combination, tell the caller the solution */
353 for (i = 0; i < n_ev; ++i)
354 event_id[i] = cpuhw->alternatives[i][choice[i]];
355 return 0;
356}
357
358/*
359 * Check if newly-added events have consistent settings for
360 * exclude_{user,kernel,hv} with each other and any previously
361 * added events.
362 */
363static int check_excludes(struct perf_event **ctrs, unsigned int cflags[],
364 int n_prev, int n_new)
365{
366 int eu = 0, ek = 0, eh = 0;
367 int i, n, first;
368 struct perf_event *event;
369
370 n = n_prev + n_new;
371 if (n <= 1)
372 return 0;
373
374 first = 1;
375 for (i = 0; i < n; ++i) {
376 if (cflags[i] & PPMU_LIMITED_PMC_OK) {
377 cflags[i] &= ~PPMU_LIMITED_PMC_REQD;
378 continue;
379 }
380 event = ctrs[i];
381 if (first) {
382 eu = event->attr.exclude_user;
383 ek = event->attr.exclude_kernel;
384 eh = event->attr.exclude_hv;
385 first = 0;
386 } else if (event->attr.exclude_user != eu ||
387 event->attr.exclude_kernel != ek ||
388 event->attr.exclude_hv != eh) {
389 return -EAGAIN;
390 }
391 }
392
393 if (eu || ek || eh)
394 for (i = 0; i < n; ++i)
395 if (cflags[i] & PPMU_LIMITED_PMC_OK)
396 cflags[i] |= PPMU_LIMITED_PMC_REQD;
397
398 return 0;
399}
400
401static u64 check_and_compute_delta(u64 prev, u64 val)
402{
403 u64 delta = (val - prev) & 0xfffffffful;
404
405 /*
406 * POWER7 can roll back counter values, if the new value is smaller
407 * than the previous value it will cause the delta and the counter to
408 * have bogus values unless we rolled a counter over. If a coutner is
409 * rolled back, it will be smaller, but within 256, which is the maximum
410 * number of events to rollback at once. If we dectect a rollback
411 * return 0. This can lead to a small lack of precision in the
412 * counters.
413 */
414 if (prev > val && (prev - val) < 256)
415 delta = 0;
416
417 return delta;
418}
419
420static void power_pmu_read(struct perf_event *event)
421{
422 s64 val, delta, prev;
423
424 if (event->hw.state & PERF_HES_STOPPED)
425 return;
426
427 if (!event->hw.idx)
428 return;
429 /*
430 * Performance monitor interrupts come even when interrupts
431 * are soft-disabled, as long as interrupts are hard-enabled.
432 * Therefore we treat them like NMIs.
433 */
434 do {
435 prev = local64_read(&event->hw.prev_count);
436 barrier();
437 val = read_pmc(event->hw.idx);
438 delta = check_and_compute_delta(prev, val);
439 if (!delta)
440 return;
441 } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
442
443 local64_add(delta, &event->count);
444 local64_sub(delta, &event->hw.period_left);
445}
446
447/*
448 * On some machines, PMC5 and PMC6 can't be written, don't respect
449 * the freeze conditions, and don't generate interrupts. This tells
450 * us if `event' is using such a PMC.
451 */
452static int is_limited_pmc(int pmcnum)
453{
454 return (ppmu->flags & PPMU_LIMITED_PMC5_6)
455 && (pmcnum == 5 || pmcnum == 6);
456}
457
458static void freeze_limited_counters(struct cpu_hw_events *cpuhw,
459 unsigned long pmc5, unsigned long pmc6)
460{
461 struct perf_event *event;
462 u64 val, prev, delta;
463 int i;
464
465 for (i = 0; i < cpuhw->n_limited; ++i) {
466 event = cpuhw->limited_counter[i];
467 if (!event->hw.idx)
468 continue;
469 val = (event->hw.idx == 5) ? pmc5 : pmc6;
470 prev = local64_read(&event->hw.prev_count);
471 event->hw.idx = 0;
472 delta = check_and_compute_delta(prev, val);
473 if (delta)
474 local64_add(delta, &event->count);
475 }
476}
477
478static void thaw_limited_counters(struct cpu_hw_events *cpuhw,
479 unsigned long pmc5, unsigned long pmc6)
480{
481 struct perf_event *event;
482 u64 val, prev;
483 int i;
484
485 for (i = 0; i < cpuhw->n_limited; ++i) {
486 event = cpuhw->limited_counter[i];
487 event->hw.idx = cpuhw->limited_hwidx[i];
488 val = (event->hw.idx == 5) ? pmc5 : pmc6;
489 prev = local64_read(&event->hw.prev_count);
490 if (check_and_compute_delta(prev, val))
491 local64_set(&event->hw.prev_count, val);
492 perf_event_update_userpage(event);
493 }
494}
495
496/*
497 * Since limited events don't respect the freeze conditions, we
498 * have to read them immediately after freezing or unfreezing the
499 * other events. We try to keep the values from the limited
500 * events as consistent as possible by keeping the delay (in
501 * cycles and instructions) between freezing/unfreezing and reading
502 * the limited events as small and consistent as possible.
503 * Therefore, if any limited events are in use, we read them
504 * both, and always in the same order, to minimize variability,
505 * and do it inside the same asm that writes MMCR0.
506 */
507static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
508{
509 unsigned long pmc5, pmc6;
510
511 if (!cpuhw->n_limited) {
512 mtspr(SPRN_MMCR0, mmcr0);
513 return;
514 }
515
516 /*
517 * Write MMCR0, then read PMC5 and PMC6 immediately.
518 * To ensure we don't get a performance monitor interrupt
519 * between writing MMCR0 and freezing/thawing the limited
520 * events, we first write MMCR0 with the event overflow
521 * interrupt enable bits turned off.
522 */
523 asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5"
524 : "=&r" (pmc5), "=&r" (pmc6)
525 : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)),
526 "i" (SPRN_MMCR0),
527 "i" (SPRN_PMC5), "i" (SPRN_PMC6));
528
529 if (mmcr0 & MMCR0_FC)
530 freeze_limited_counters(cpuhw, pmc5, pmc6);
531 else
532 thaw_limited_counters(cpuhw, pmc5, pmc6);
533
534 /*
535 * Write the full MMCR0 including the event overflow interrupt
536 * enable bits, if necessary.
537 */
538 if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE))
539 mtspr(SPRN_MMCR0, mmcr0);
540}
541
542/*
543 * Disable all events to prevent PMU interrupts and to allow
544 * events to be added or removed.
545 */
546static void power_pmu_disable(struct pmu *pmu)
547{
548 struct cpu_hw_events *cpuhw;
549 unsigned long flags;
550
551 if (!ppmu)
552 return;
553 local_irq_save(flags);
554 cpuhw = &__get_cpu_var(cpu_hw_events);
555
556 if (!cpuhw->disabled) {
557 cpuhw->disabled = 1;
558 cpuhw->n_added = 0;
559
560 /*
561 * Check if we ever enabled the PMU on this cpu.
562 */
563 if (!cpuhw->pmcs_enabled) {
564 ppc_enable_pmcs();
565 cpuhw->pmcs_enabled = 1;
566 }
567
568 /*
569 * Disable instruction sampling if it was enabled
570 */
571 if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
572 mtspr(SPRN_MMCRA,
573 cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
574 mb();
575 }
576
577 /*
578 * Set the 'freeze counters' bit.
579 * The barrier is to make sure the mtspr has been
580 * executed and the PMU has frozen the events
581 * before we return.
582 */
583 write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC);
584 mb();
585 }
586 local_irq_restore(flags);
587}
588
589/*
590 * Re-enable all events if disable == 0.
591 * If we were previously disabled and events were added, then
592 * put the new config on the PMU.
593 */
594static void power_pmu_enable(struct pmu *pmu)
595{
596 struct perf_event *event;
597 struct cpu_hw_events *cpuhw;
598 unsigned long flags;
599 long i;
600 unsigned long val;
601 s64 left;
602 unsigned int hwc_index[MAX_HWEVENTS];
603 int n_lim;
604 int idx;
605
606 if (!ppmu)
607 return;
608 local_irq_save(flags);
609 cpuhw = &__get_cpu_var(cpu_hw_events);
610 if (!cpuhw->disabled) {
611 local_irq_restore(flags);
612 return;
613 }
614 cpuhw->disabled = 0;
615
616 /*
617 * If we didn't change anything, or only removed events,
618 * no need to recalculate MMCR* settings and reset the PMCs.
619 * Just reenable the PMU with the current MMCR* settings
620 * (possibly updated for removal of events).
621 */
622 if (!cpuhw->n_added) {
623 mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
624 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
625 if (cpuhw->n_events == 0)
626 ppc_set_pmu_inuse(0);
627 goto out_enable;
628 }
629
630 /*
631 * Compute MMCR* values for the new set of events
632 */
633 if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index,
634 cpuhw->mmcr)) {
635 /* shouldn't ever get here */
636 printk(KERN_ERR "oops compute_mmcr failed\n");
637 goto out;
638 }
639
640 /*
641 * Add in MMCR0 freeze bits corresponding to the
642 * attr.exclude_* bits for the first event.
643 * We have already checked that all events have the
644 * same values for these bits as the first event.
645 */
646 event = cpuhw->event[0];
647 if (event->attr.exclude_user)
648 cpuhw->mmcr[0] |= MMCR0_FCP;
649 if (event->attr.exclude_kernel)
650 cpuhw->mmcr[0] |= freeze_events_kernel;
651 if (event->attr.exclude_hv)
652 cpuhw->mmcr[0] |= MMCR0_FCHV;
653
654 /*
655 * Write the new configuration to MMCR* with the freeze
656 * bit set and set the hardware events to their initial values.
657 * Then unfreeze the events.
658 */
659 ppc_set_pmu_inuse(1);
660 mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
661 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
662 mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
663 | MMCR0_FC);
664
665 /*
666 * Read off any pre-existing events that need to move
667 * to another PMC.
668 */
669 for (i = 0; i < cpuhw->n_events; ++i) {
670 event = cpuhw->event[i];
671 if (event->hw.idx && event->hw.idx != hwc_index[i] + 1) {
672 power_pmu_read(event);
673 write_pmc(event->hw.idx, 0);
674 event->hw.idx = 0;
675 }
676 }
677
678 /*
679 * Initialize the PMCs for all the new and moved events.
680 */
681 cpuhw->n_limited = n_lim = 0;
682 for (i = 0; i < cpuhw->n_events; ++i) {
683 event = cpuhw->event[i];
684 if (event->hw.idx)
685 continue;
686 idx = hwc_index[i] + 1;
687 if (is_limited_pmc(idx)) {
688 cpuhw->limited_counter[n_lim] = event;
689 cpuhw->limited_hwidx[n_lim] = idx;
690 ++n_lim;
691 continue;
692 }
693 val = 0;
694 if (event->hw.sample_period) {
695 left = local64_read(&event->hw.period_left);
696 if (left < 0x80000000L)
697 val = 0x80000000L - left;
698 }
699 local64_set(&event->hw.prev_count, val);
700 event->hw.idx = idx;
701 if (event->hw.state & PERF_HES_STOPPED)
702 val = 0;
703 write_pmc(idx, val);
704 perf_event_update_userpage(event);
705 }
706 cpuhw->n_limited = n_lim;
707 cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
708
709 out_enable:
710 mb();
711 write_mmcr0(cpuhw, cpuhw->mmcr[0]);
712
713 /*
714 * Enable instruction sampling if necessary
715 */
716 if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
717 mb();
718 mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
719 }
720
721 out:
722 local_irq_restore(flags);
723}
724
725static int collect_events(struct perf_event *group, int max_count,
726 struct perf_event *ctrs[], u64 *events,
727 unsigned int *flags)
728{
729 int n = 0;
730 struct perf_event *event;
731
732 if (!is_software_event(group)) {
733 if (n >= max_count)
734 return -1;
735 ctrs[n] = group;
736 flags[n] = group->hw.event_base;
737 events[n++] = group->hw.config;
738 }
739 list_for_each_entry(event, &group->sibling_list, group_entry) {
740 if (!is_software_event(event) &&
741 event->state != PERF_EVENT_STATE_OFF) {
742 if (n >= max_count)
743 return -1;
744 ctrs[n] = event;
745 flags[n] = event->hw.event_base;
746 events[n++] = event->hw.config;
747 }
748 }
749 return n;
750}
751
752/*
753 * Add a event to the PMU.
754 * If all events are not already frozen, then we disable and
755 * re-enable the PMU in order to get hw_perf_enable to do the
756 * actual work of reconfiguring the PMU.
757 */
758static int power_pmu_add(struct perf_event *event, int ef_flags)
759{
760 struct cpu_hw_events *cpuhw;
761 unsigned long flags;
762 int n0;
763 int ret = -EAGAIN;
764
765 local_irq_save(flags);
766 perf_pmu_disable(event->pmu);
767
768 /*
769 * Add the event to the list (if there is room)
770 * and check whether the total set is still feasible.
771 */
772 cpuhw = &__get_cpu_var(cpu_hw_events);
773 n0 = cpuhw->n_events;
774 if (n0 >= ppmu->n_counter)
775 goto out;
776 cpuhw->event[n0] = event;
777 cpuhw->events[n0] = event->hw.config;
778 cpuhw->flags[n0] = event->hw.event_base;
779
780 if (!(ef_flags & PERF_EF_START))
781 event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
782
783 /*
784 * If group events scheduling transaction was started,
785 * skip the schedulability test here, it will be performed
786 * at commit time(->commit_txn) as a whole
787 */
788 if (cpuhw->group_flag & PERF_EVENT_TXN)
789 goto nocheck;
790
791 if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
792 goto out;
793 if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
794 goto out;
795 event->hw.config = cpuhw->events[n0];
796
797nocheck:
798 ++cpuhw->n_events;
799 ++cpuhw->n_added;
800
801 ret = 0;
802 out:
803 perf_pmu_enable(event->pmu);
804 local_irq_restore(flags);
805 return ret;
806}
807
808/*
809 * Remove a event from the PMU.
810 */
811static void power_pmu_del(struct perf_event *event, int ef_flags)
812{
813 struct cpu_hw_events *cpuhw;
814 long i;
815 unsigned long flags;
816
817 local_irq_save(flags);
818 perf_pmu_disable(event->pmu);
819
820 power_pmu_read(event);
821
822 cpuhw = &__get_cpu_var(cpu_hw_events);
823 for (i = 0; i < cpuhw->n_events; ++i) {
824 if (event == cpuhw->event[i]) {
825 while (++i < cpuhw->n_events) {
826 cpuhw->event[i-1] = cpuhw->event[i];
827 cpuhw->events[i-1] = cpuhw->events[i];
828 cpuhw->flags[i-1] = cpuhw->flags[i];
829 }
830 --cpuhw->n_events;
831 ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr);
832 if (event->hw.idx) {
833 write_pmc(event->hw.idx, 0);
834 event->hw.idx = 0;
835 }
836 perf_event_update_userpage(event);
837 break;
838 }
839 }
840 for (i = 0; i < cpuhw->n_limited; ++i)
841 if (event == cpuhw->limited_counter[i])
842 break;
843 if (i < cpuhw->n_limited) {
844 while (++i < cpuhw->n_limited) {
845 cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i];
846 cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i];
847 }
848 --cpuhw->n_limited;
849 }
850 if (cpuhw->n_events == 0) {
851 /* disable exceptions if no events are running */
852 cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
853 }
854
855 perf_pmu_enable(event->pmu);
856 local_irq_restore(flags);
857}
858
859/*
860 * POWER-PMU does not support disabling individual counters, hence
861 * program their cycle counter to their max value and ignore the interrupts.
862 */
863
864static void power_pmu_start(struct perf_event *event, int ef_flags)
865{
866 unsigned long flags;
867 s64 left;
868 unsigned long val;
869
870 if (!event->hw.idx || !event->hw.sample_period)
871 return;
872
873 if (!(event->hw.state & PERF_HES_STOPPED))
874 return;
875
876 if (ef_flags & PERF_EF_RELOAD)
877 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
878
879 local_irq_save(flags);
880 perf_pmu_disable(event->pmu);
881
882 event->hw.state = 0;
883 left = local64_read(&event->hw.period_left);
884
885 val = 0;
886 if (left < 0x80000000L)
887 val = 0x80000000L - left;
888
889 write_pmc(event->hw.idx, val);
890
891 perf_event_update_userpage(event);
892 perf_pmu_enable(event->pmu);
893 local_irq_restore(flags);
894}
895
896static void power_pmu_stop(struct perf_event *event, int ef_flags)
897{
898 unsigned long flags;
899
900 if (!event->hw.idx || !event->hw.sample_period)
901 return;
902
903 if (event->hw.state & PERF_HES_STOPPED)
904 return;
905
906 local_irq_save(flags);
907 perf_pmu_disable(event->pmu);
908
909 power_pmu_read(event);
910 event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
911 write_pmc(event->hw.idx, 0);
912
913 perf_event_update_userpage(event);
914 perf_pmu_enable(event->pmu);
915 local_irq_restore(flags);
916}
917
918/*
919 * Start group events scheduling transaction
920 * Set the flag to make pmu::enable() not perform the
921 * schedulability test, it will be performed at commit time
922 */
923void power_pmu_start_txn(struct pmu *pmu)
924{
925 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
926
927 perf_pmu_disable(pmu);
928 cpuhw->group_flag |= PERF_EVENT_TXN;
929 cpuhw->n_txn_start = cpuhw->n_events;
930}
931
932/*
933 * Stop group events scheduling transaction
934 * Clear the flag and pmu::enable() will perform the
935 * schedulability test.
936 */
937void power_pmu_cancel_txn(struct pmu *pmu)
938{
939 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
940
941 cpuhw->group_flag &= ~PERF_EVENT_TXN;
942 perf_pmu_enable(pmu);
943}
944
945/*
946 * Commit group events scheduling transaction
947 * Perform the group schedulability test as a whole
948 * Return 0 if success
949 */
950int power_pmu_commit_txn(struct pmu *pmu)
951{
952 struct cpu_hw_events *cpuhw;
953 long i, n;
954
955 if (!ppmu)
956 return -EAGAIN;
957 cpuhw = &__get_cpu_var(cpu_hw_events);
958 n = cpuhw->n_events;
959 if (check_excludes(cpuhw->event, cpuhw->flags, 0, n))
960 return -EAGAIN;
961 i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n);
962 if (i < 0)
963 return -EAGAIN;
964
965 for (i = cpuhw->n_txn_start; i < n; ++i)
966 cpuhw->event[i]->hw.config = cpuhw->events[i];
967
968 cpuhw->group_flag &= ~PERF_EVENT_TXN;
969 perf_pmu_enable(pmu);
970 return 0;
971}
972
973/*
974 * Return 1 if we might be able to put event on a limited PMC,
975 * or 0 if not.
976 * A event can only go on a limited PMC if it counts something
977 * that a limited PMC can count, doesn't require interrupts, and
978 * doesn't exclude any processor mode.
979 */
980static int can_go_on_limited_pmc(struct perf_event *event, u64 ev,
981 unsigned int flags)
982{
983 int n;
984 u64 alt[MAX_EVENT_ALTERNATIVES];
985
986 if (event->attr.exclude_user
987 || event->attr.exclude_kernel
988 || event->attr.exclude_hv
989 || event->attr.sample_period)
990 return 0;
991
992 if (ppmu->limited_pmc_event(ev))
993 return 1;
994
995 /*
996 * The requested event_id isn't on a limited PMC already;
997 * see if any alternative code goes on a limited PMC.
998 */
999 if (!ppmu->get_alternatives)
1000 return 0;
1001
1002 flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD;
1003 n = ppmu->get_alternatives(ev, flags, alt);
1004
1005 return n > 0;
1006}
1007
1008/*
1009 * Find an alternative event_id that goes on a normal PMC, if possible,
1010 * and return the event_id code, or 0 if there is no such alternative.
1011 * (Note: event_id code 0 is "don't count" on all machines.)
1012 */
1013static u64 normal_pmc_alternative(u64 ev, unsigned long flags)
1014{
1015 u64 alt[MAX_EVENT_ALTERNATIVES];
1016 int n;
1017
1018 flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD);
1019 n = ppmu->get_alternatives(ev, flags, alt);
1020 if (!n)
1021 return 0;
1022 return alt[0];
1023}
1024
1025/* Number of perf_events counting hardware events */
1026static atomic_t num_events;
1027/* Used to avoid races in calling reserve/release_pmc_hardware */
1028static DEFINE_MUTEX(pmc_reserve_mutex);
1029
1030/*
1031 * Release the PMU if this is the last perf_event.
1032 */
1033static void hw_perf_event_destroy(struct perf_event *event)
1034{
1035 if (!atomic_add_unless(&num_events, -1, 1)) {
1036 mutex_lock(&pmc_reserve_mutex);
1037 if (atomic_dec_return(&num_events) == 0)
1038 release_pmc_hardware();
1039 mutex_unlock(&pmc_reserve_mutex);
1040 }
1041}
1042
1043/*
1044 * Translate a generic cache event_id config to a raw event_id code.
1045 */
1046static int hw_perf_cache_event(u64 config, u64 *eventp)
1047{
1048 unsigned long type, op, result;
1049 int ev;
1050
1051 if (!ppmu->cache_events)
1052 return -EINVAL;
1053
1054 /* unpack config */
1055 type = config & 0xff;
1056 op = (config >> 8) & 0xff;
1057 result = (config >> 16) & 0xff;
1058
1059 if (type >= PERF_COUNT_HW_CACHE_MAX ||
1060 op >= PERF_COUNT_HW_CACHE_OP_MAX ||
1061 result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
1062 return -EINVAL;
1063
1064 ev = (*ppmu->cache_events)[type][op][result];
1065 if (ev == 0)
1066 return -EOPNOTSUPP;
1067 if (ev == -1)
1068 return -EINVAL;
1069 *eventp = ev;
1070 return 0;
1071}
1072
1073static int power_pmu_event_init(struct perf_event *event)
1074{
1075 u64 ev;
1076 unsigned long flags;
1077 struct perf_event *ctrs[MAX_HWEVENTS];
1078 u64 events[MAX_HWEVENTS];
1079 unsigned int cflags[MAX_HWEVENTS];
1080 int n;
1081 int err;
1082 struct cpu_hw_events *cpuhw;
1083
1084 if (!ppmu)
1085 return -ENOENT;
1086
1087 /* does not support taken branch sampling */
1088 if (has_branch_stack(event))
1089 return -EOPNOTSUPP;
1090
1091 switch (event->attr.type) {
1092 case PERF_TYPE_HARDWARE:
1093 ev = event->attr.config;
1094 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
1095 return -EOPNOTSUPP;
1096 ev = ppmu->generic_events[ev];
1097 break;
1098 case PERF_TYPE_HW_CACHE:
1099 err = hw_perf_cache_event(event->attr.config, &ev);
1100 if (err)
1101 return err;
1102 break;
1103 case PERF_TYPE_RAW:
1104 ev = event->attr.config;
1105 break;
1106 default:
1107 return -ENOENT;
1108 }
1109
1110 event->hw.config_base = ev;
1111 event->hw.idx = 0;
1112
1113 /*
1114 * If we are not running on a hypervisor, force the
1115 * exclude_hv bit to 0 so that we don't care what
1116 * the user set it to.
1117 */
1118 if (!firmware_has_feature(FW_FEATURE_LPAR))
1119 event->attr.exclude_hv = 0;
1120
1121 /*
1122 * If this is a per-task event, then we can use
1123 * PM_RUN_* events interchangeably with their non RUN_*
1124 * equivalents, e.g. PM_RUN_CYC instead of PM_CYC.
1125 * XXX we should check if the task is an idle task.
1126 */
1127 flags = 0;
1128 if (event->attach_state & PERF_ATTACH_TASK)
1129 flags |= PPMU_ONLY_COUNT_RUN;
1130
1131 /*
1132 * If this machine has limited events, check whether this
1133 * event_id could go on a limited event.
1134 */
1135 if (ppmu->flags & PPMU_LIMITED_PMC5_6) {
1136 if (can_go_on_limited_pmc(event, ev, flags)) {
1137 flags |= PPMU_LIMITED_PMC_OK;
1138 } else if (ppmu->limited_pmc_event(ev)) {
1139 /*
1140 * The requested event_id is on a limited PMC,
1141 * but we can't use a limited PMC; see if any
1142 * alternative goes on a normal PMC.
1143 */
1144 ev = normal_pmc_alternative(ev, flags);
1145 if (!ev)
1146 return -EINVAL;
1147 }
1148 }
1149
1150 /*
1151 * If this is in a group, check if it can go on with all the
1152 * other hardware events in the group. We assume the event
1153 * hasn't been linked into its leader's sibling list at this point.
1154 */
1155 n = 0;
1156 if (event->group_leader != event) {
1157 n = collect_events(event->group_leader, ppmu->n_counter - 1,
1158 ctrs, events, cflags);
1159 if (n < 0)
1160 return -EINVAL;
1161 }
1162 events[n] = ev;
1163 ctrs[n] = event;
1164 cflags[n] = flags;
1165 if (check_excludes(ctrs, cflags, n, 1))
1166 return -EINVAL;
1167
1168 cpuhw = &get_cpu_var(cpu_hw_events);
1169 err = power_check_constraints(cpuhw, events, cflags, n + 1);
1170 put_cpu_var(cpu_hw_events);
1171 if (err)
1172 return -EINVAL;
1173
1174 event->hw.config = events[n];
1175 event->hw.event_base = cflags[n];
1176 event->hw.last_period = event->hw.sample_period;
1177 local64_set(&event->hw.period_left, event->hw.last_period);
1178
1179 /*
1180 * See if we need to reserve the PMU.
1181 * If no events are currently in use, then we have to take a
1182 * mutex to ensure that we don't race with another task doing
1183 * reserve_pmc_hardware or release_pmc_hardware.
1184 */
1185 err = 0;
1186 if (!atomic_inc_not_zero(&num_events)) {
1187 mutex_lock(&pmc_reserve_mutex);
1188 if (atomic_read(&num_events) == 0 &&
1189 reserve_pmc_hardware(perf_event_interrupt))
1190 err = -EBUSY;
1191 else
1192 atomic_inc(&num_events);
1193 mutex_unlock(&pmc_reserve_mutex);
1194 }
1195 event->destroy = hw_perf_event_destroy;
1196
1197 return err;
1198}
1199
1200static int power_pmu_event_idx(struct perf_event *event)
1201{
1202 return event->hw.idx;
1203}
1204
1205struct pmu power_pmu = {
1206 .pmu_enable = power_pmu_enable,
1207 .pmu_disable = power_pmu_disable,
1208 .event_init = power_pmu_event_init,
1209 .add = power_pmu_add,
1210 .del = power_pmu_del,
1211 .start = power_pmu_start,
1212 .stop = power_pmu_stop,
1213 .read = power_pmu_read,
1214 .start_txn = power_pmu_start_txn,
1215 .cancel_txn = power_pmu_cancel_txn,
1216 .commit_txn = power_pmu_commit_txn,
1217 .event_idx = power_pmu_event_idx,
1218};
1219
1220/*
1221 * A counter has overflowed; update its count and record
1222 * things if requested. Note that interrupts are hard-disabled
1223 * here so there is no possibility of being interrupted.
1224 */
1225static void record_and_restart(struct perf_event *event, unsigned long val,
1226 struct pt_regs *regs)
1227{
1228 u64 period = event->hw.sample_period;
1229 s64 prev, delta, left;
1230 int record = 0;
1231
1232 if (event->hw.state & PERF_HES_STOPPED) {
1233 write_pmc(event->hw.idx, 0);
1234 return;
1235 }
1236
1237 /* we don't have to worry about interrupts here */
1238 prev = local64_read(&event->hw.prev_count);
1239 delta = check_and_compute_delta(prev, val);
1240 local64_add(delta, &event->count);
1241
1242 /*
1243 * See if the total period for this event has expired,
1244 * and update for the next period.
1245 */
1246 val = 0;
1247 left = local64_read(&event->hw.period_left) - delta;
1248 if (period) {
1249 if (left <= 0) {
1250 left += period;
1251 if (left <= 0)
1252 left = period;
1253 record = 1;
1254 event->hw.last_period = event->hw.sample_period;
1255 }
1256 if (left < 0x80000000LL)
1257 val = 0x80000000LL - left;
1258 }
1259
1260 write_pmc(event->hw.idx, val);
1261 local64_set(&event->hw.prev_count, val);
1262 local64_set(&event->hw.period_left, left);
1263 perf_event_update_userpage(event);
1264
1265 /*
1266 * Finally record data if requested.
1267 */
1268 if (record) {
1269 struct perf_sample_data data;
1270
1271 perf_sample_data_init(&data, ~0ULL);
1272 data.period = event->hw.last_period;
1273
1274 if (event->attr.sample_type & PERF_SAMPLE_ADDR)
1275 perf_get_data_addr(regs, &data.addr);
1276
1277 if (perf_event_overflow(event, &data, regs))
1278 power_pmu_stop(event, 0);
1279 }
1280}
1281
1282/*
1283 * Called from generic code to get the misc flags (i.e. processor mode)
1284 * for an event_id.
1285 */
1286unsigned long perf_misc_flags(struct pt_regs *regs)
1287{
1288 u32 flags = perf_get_misc_flags(regs);
1289
1290 if (flags)
1291 return flags;
1292 return user_mode(regs) ? PERF_RECORD_MISC_USER :
1293 PERF_RECORD_MISC_KERNEL;
1294}
1295
1296/*
1297 * Called from generic code to get the instruction pointer
1298 * for an event_id.
1299 */
1300unsigned long perf_instruction_pointer(struct pt_regs *regs)
1301{
1302 unsigned long ip;
1303
1304 if (TRAP(regs) != 0xf00)
1305 return regs->nip; /* not a PMU interrupt */
1306
1307 ip = mfspr(SPRN_SIAR) + perf_ip_adjust(regs);
1308 return ip;
1309}
1310
1311static bool pmc_overflow(unsigned long val)
1312{
1313 if ((int)val < 0)
1314 return true;
1315
1316 /*
1317 * Events on POWER7 can roll back if a speculative event doesn't
1318 * eventually complete. Unfortunately in some rare cases they will
1319 * raise a performance monitor exception. We need to catch this to
1320 * ensure we reset the PMC. In all cases the PMC will be 256 or less
1321 * cycles from overflow.
1322 *
1323 * We only do this if the first pass fails to find any overflowing
1324 * PMCs because a user might set a period of less than 256 and we
1325 * don't want to mistakenly reset them.
1326 */
1327 if (__is_processor(PV_POWER7) && ((0x80000000 - val) <= 256))
1328 return true;
1329
1330 return false;
1331}
1332
1333/*
1334 * Performance monitor interrupt stuff
1335 */
1336static void perf_event_interrupt(struct pt_regs *regs)
1337{
1338 int i;
1339 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
1340 struct perf_event *event;
1341 unsigned long val;
1342 int found = 0;
1343 int nmi;
1344
1345 if (cpuhw->n_limited)
1346 freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
1347 mfspr(SPRN_PMC6));
1348
1349 perf_read_regs(regs);
1350
1351 nmi = perf_intr_is_nmi(regs);
1352 if (nmi)
1353 nmi_enter();
1354 else
1355 irq_enter();
1356
1357 for (i = 0; i < cpuhw->n_events; ++i) {
1358 event = cpuhw->event[i];
1359 if (!event->hw.idx || is_limited_pmc(event->hw.idx))
1360 continue;
1361 val = read_pmc(event->hw.idx);
1362 if ((int)val < 0) {
1363 /* event has overflowed */
1364 found = 1;
1365 record_and_restart(event, val, regs);
1366 }
1367 }
1368
1369 /*
1370 * In case we didn't find and reset the event that caused
1371 * the interrupt, scan all events and reset any that are
1372 * negative, to avoid getting continual interrupts.
1373 * Any that we processed in the previous loop will not be negative.
1374 */
1375 if (!found) {
1376 for (i = 0; i < ppmu->n_counter; ++i) {
1377 if (is_limited_pmc(i + 1))
1378 continue;
1379 val = read_pmc(i + 1);
1380 if (pmc_overflow(val))
1381 write_pmc(i + 1, 0);
1382 }
1383 }
1384
1385 /*
1386 * Reset MMCR0 to its normal value. This will set PMXE and
1387 * clear FC (freeze counters) and PMAO (perf mon alert occurred)
1388 * and thus allow interrupts to occur again.
1389 * XXX might want to use MSR.PM to keep the events frozen until
1390 * we get back out of this interrupt.
1391 */
1392 write_mmcr0(cpuhw, cpuhw->mmcr[0]);
1393
1394 if (nmi)
1395 nmi_exit();
1396 else
1397 irq_exit();
1398}
1399
1400static void power_pmu_setup(int cpu)
1401{
1402 struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
1403
1404 if (!ppmu)
1405 return;
1406 memset(cpuhw, 0, sizeof(*cpuhw));
1407 cpuhw->mmcr[0] = MMCR0_FC;
1408}
1409
1410static int __cpuinit
1411power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
1412{
1413 unsigned int cpu = (long)hcpu;
1414
1415 switch (action & ~CPU_TASKS_FROZEN) {
1416 case CPU_UP_PREPARE:
1417 power_pmu_setup(cpu);
1418 break;
1419
1420 default:
1421 break;
1422 }
1423
1424 return NOTIFY_OK;
1425}
1426
1427int __cpuinit register_power_pmu(struct power_pmu *pmu)
1428{
1429 if (ppmu)
1430 return -EBUSY; /* something's already registered */
1431
1432 ppmu = pmu;
1433 pr_info("%s performance monitor hardware support registered\n",
1434 pmu->name);
1435
1436#ifdef MSR_HV
1437 /*
1438 * Use FCHV to ignore kernel events if MSR.HV is set.
1439 */
1440 if (mfmsr() & MSR_HV)
1441 freeze_events_kernel = MMCR0_FCHV;
1442#endif /* CONFIG_PPC64 */
1443
1444 perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW);
1445 perf_cpu_notifier(power_pmu_notifier);
1446
1447 return 0;
1448}
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
deleted file mode 100644
index 0a6d2a9d569c..000000000000
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ /dev/null
@@ -1,688 +0,0 @@
1/*
2 * Performance event support - Freescale Embedded Performance Monitor
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 * Copyright 2010 Freescale Semiconductor, Inc.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12#include <linux/kernel.h>
13#include <linux/sched.h>
14#include <linux/perf_event.h>
15#include <linux/percpu.h>
16#include <linux/hardirq.h>
17#include <asm/reg_fsl_emb.h>
18#include <asm/pmc.h>
19#include <asm/machdep.h>
20#include <asm/firmware.h>
21#include <asm/ptrace.h>
22
23struct cpu_hw_events {
24 int n_events;
25 int disabled;
26 u8 pmcs_enabled;
27 struct perf_event *event[MAX_HWEVENTS];
28};
29static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
30
31static struct fsl_emb_pmu *ppmu;
32
33/* Number of perf_events counting hardware events */
34static atomic_t num_events;
35/* Used to avoid races in calling reserve/release_pmc_hardware */
36static DEFINE_MUTEX(pmc_reserve_mutex);
37
38/*
39 * If interrupts were soft-disabled when a PMU interrupt occurs, treat
40 * it as an NMI.
41 */
42static inline int perf_intr_is_nmi(struct pt_regs *regs)
43{
44#ifdef __powerpc64__
45 return !regs->softe;
46#else
47 return 0;
48#endif
49}
50
51static void perf_event_interrupt(struct pt_regs *regs);
52
53/*
54 * Read one performance monitor counter (PMC).
55 */
56static unsigned long read_pmc(int idx)
57{
58 unsigned long val;
59
60 switch (idx) {
61 case 0:
62 val = mfpmr(PMRN_PMC0);
63 break;
64 case 1:
65 val = mfpmr(PMRN_PMC1);
66 break;
67 case 2:
68 val = mfpmr(PMRN_PMC2);
69 break;
70 case 3:
71 val = mfpmr(PMRN_PMC3);
72 break;
73 default:
74 printk(KERN_ERR "oops trying to read PMC%d\n", idx);
75 val = 0;
76 }
77 return val;
78}
79
80/*
81 * Write one PMC.
82 */
83static void write_pmc(int idx, unsigned long val)
84{
85 switch (idx) {
86 case 0:
87 mtpmr(PMRN_PMC0, val);
88 break;
89 case 1:
90 mtpmr(PMRN_PMC1, val);
91 break;
92 case 2:
93 mtpmr(PMRN_PMC2, val);
94 break;
95 case 3:
96 mtpmr(PMRN_PMC3, val);
97 break;
98 default:
99 printk(KERN_ERR "oops trying to write PMC%d\n", idx);
100 }
101
102 isync();
103}
104
105/*
106 * Write one local control A register
107 */
108static void write_pmlca(int idx, unsigned long val)
109{
110 switch (idx) {
111 case 0:
112 mtpmr(PMRN_PMLCA0, val);
113 break;
114 case 1:
115 mtpmr(PMRN_PMLCA1, val);
116 break;
117 case 2:
118 mtpmr(PMRN_PMLCA2, val);
119 break;
120 case 3:
121 mtpmr(PMRN_PMLCA3, val);
122 break;
123 default:
124 printk(KERN_ERR "oops trying to write PMLCA%d\n", idx);
125 }
126
127 isync();
128}
129
130/*
131 * Write one local control B register
132 */
133static void write_pmlcb(int idx, unsigned long val)
134{
135 switch (idx) {
136 case 0:
137 mtpmr(PMRN_PMLCB0, val);
138 break;
139 case 1:
140 mtpmr(PMRN_PMLCB1, val);
141 break;
142 case 2:
143 mtpmr(PMRN_PMLCB2, val);
144 break;
145 case 3:
146 mtpmr(PMRN_PMLCB3, val);
147 break;
148 default:
149 printk(KERN_ERR "oops trying to write PMLCB%d\n", idx);
150 }
151
152 isync();
153}
154
155static void fsl_emb_pmu_read(struct perf_event *event)
156{
157 s64 val, delta, prev;
158
159 if (event->hw.state & PERF_HES_STOPPED)
160 return;
161
162 /*
163 * Performance monitor interrupts come even when interrupts
164 * are soft-disabled, as long as interrupts are hard-enabled.
165 * Therefore we treat them like NMIs.
166 */
167 do {
168 prev = local64_read(&event->hw.prev_count);
169 barrier();
170 val = read_pmc(event->hw.idx);
171 } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
172
173 /* The counters are only 32 bits wide */
174 delta = (val - prev) & 0xfffffffful;
175 local64_add(delta, &event->count);
176 local64_sub(delta, &event->hw.period_left);
177}
178
179/*
180 * Disable all events to prevent PMU interrupts and to allow
181 * events to be added or removed.
182 */
183static void fsl_emb_pmu_disable(struct pmu *pmu)
184{
185 struct cpu_hw_events *cpuhw;
186 unsigned long flags;
187
188 local_irq_save(flags);
189 cpuhw = &__get_cpu_var(cpu_hw_events);
190
191 if (!cpuhw->disabled) {
192 cpuhw->disabled = 1;
193
194 /*
195 * Check if we ever enabled the PMU on this cpu.
196 */
197 if (!cpuhw->pmcs_enabled) {
198 ppc_enable_pmcs();
199 cpuhw->pmcs_enabled = 1;
200 }
201
202 if (atomic_read(&num_events)) {
203 /*
204 * Set the 'freeze all counters' bit, and disable
205 * interrupts. The barrier is to make sure the
206 * mtpmr has been executed and the PMU has frozen
207 * the events before we return.
208 */
209
210 mtpmr(PMRN_PMGC0, PMGC0_FAC);
211 isync();
212 }
213 }
214 local_irq_restore(flags);
215}
216
217/*
218 * Re-enable all events if disable == 0.
219 * If we were previously disabled and events were added, then
220 * put the new config on the PMU.
221 */
222static void fsl_emb_pmu_enable(struct pmu *pmu)
223{
224 struct cpu_hw_events *cpuhw;
225 unsigned long flags;
226
227 local_irq_save(flags);
228 cpuhw = &__get_cpu_var(cpu_hw_events);
229 if (!cpuhw->disabled)
230 goto out;
231
232 cpuhw->disabled = 0;
233 ppc_set_pmu_inuse(cpuhw->n_events != 0);
234
235 if (cpuhw->n_events > 0) {
236 mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE);
237 isync();
238 }
239
240 out:
241 local_irq_restore(flags);
242}
243
244static int collect_events(struct perf_event *group, int max_count,
245 struct perf_event *ctrs[])
246{
247 int n = 0;
248 struct perf_event *event;
249
250 if (!is_software_event(group)) {
251 if (n >= max_count)
252 return -1;
253 ctrs[n] = group;
254 n++;
255 }
256 list_for_each_entry(event, &group->sibling_list, group_entry) {
257 if (!is_software_event(event) &&
258 event->state != PERF_EVENT_STATE_OFF) {
259 if (n >= max_count)
260 return -1;
261 ctrs[n] = event;
262 n++;
263 }
264 }
265 return n;
266}
267
268/* context locked on entry */
269static int fsl_emb_pmu_add(struct perf_event *event, int flags)
270{
271 struct cpu_hw_events *cpuhw;
272 int ret = -EAGAIN;
273 int num_counters = ppmu->n_counter;
274 u64 val;
275 int i;
276
277 perf_pmu_disable(event->pmu);
278 cpuhw = &get_cpu_var(cpu_hw_events);
279
280 if (event->hw.config & FSL_EMB_EVENT_RESTRICTED)
281 num_counters = ppmu->n_restricted;
282
283 /*
284 * Allocate counters from top-down, so that restricted-capable
285 * counters are kept free as long as possible.
286 */
287 for (i = num_counters - 1; i >= 0; i--) {
288 if (cpuhw->event[i])
289 continue;
290
291 break;
292 }
293
294 if (i < 0)
295 goto out;
296
297 event->hw.idx = i;
298 cpuhw->event[i] = event;
299 ++cpuhw->n_events;
300
301 val = 0;
302 if (event->hw.sample_period) {
303 s64 left = local64_read(&event->hw.period_left);
304 if (left < 0x80000000L)
305 val = 0x80000000L - left;
306 }
307 local64_set(&event->hw.prev_count, val);
308
309 if (!(flags & PERF_EF_START)) {
310 event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
311 val = 0;
312 }
313
314 write_pmc(i, val);
315 perf_event_update_userpage(event);
316
317 write_pmlcb(i, event->hw.config >> 32);
318 write_pmlca(i, event->hw.config_base);
319
320 ret = 0;
321 out:
322 put_cpu_var(cpu_hw_events);
323 perf_pmu_enable(event->pmu);
324 return ret;
325}
326
327/* context locked on entry */
328static void fsl_emb_pmu_del(struct perf_event *event, int flags)
329{
330 struct cpu_hw_events *cpuhw;
331 int i = event->hw.idx;
332
333 perf_pmu_disable(event->pmu);
334 if (i < 0)
335 goto out;
336
337 fsl_emb_pmu_read(event);
338
339 cpuhw = &get_cpu_var(cpu_hw_events);
340
341 WARN_ON(event != cpuhw->event[event->hw.idx]);
342
343 write_pmlca(i, 0);
344 write_pmlcb(i, 0);
345 write_pmc(i, 0);
346
347 cpuhw->event[i] = NULL;
348 event->hw.idx = -1;
349
350 /*
351 * TODO: if at least one restricted event exists, and we
352 * just freed up a non-restricted-capable counter, and
353 * there is a restricted-capable counter occupied by
354 * a non-restricted event, migrate that event to the
355 * vacated counter.
356 */
357
358 cpuhw->n_events--;
359
360 out:
361 perf_pmu_enable(event->pmu);
362 put_cpu_var(cpu_hw_events);
363}
364
365static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags)
366{
367 unsigned long flags;
368 s64 left;
369
370 if (event->hw.idx < 0 || !event->hw.sample_period)
371 return;
372
373 if (!(event->hw.state & PERF_HES_STOPPED))
374 return;
375
376 if (ef_flags & PERF_EF_RELOAD)
377 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
378
379 local_irq_save(flags);
380 perf_pmu_disable(event->pmu);
381
382 event->hw.state = 0;
383 left = local64_read(&event->hw.period_left);
384 write_pmc(event->hw.idx, left);
385
386 perf_event_update_userpage(event);
387 perf_pmu_enable(event->pmu);
388 local_irq_restore(flags);
389}
390
391static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags)
392{
393 unsigned long flags;
394
395 if (event->hw.idx < 0 || !event->hw.sample_period)
396 return;
397
398 if (event->hw.state & PERF_HES_STOPPED)
399 return;
400
401 local_irq_save(flags);
402 perf_pmu_disable(event->pmu);
403
404 fsl_emb_pmu_read(event);
405 event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
406 write_pmc(event->hw.idx, 0);
407
408 perf_event_update_userpage(event);
409 perf_pmu_enable(event->pmu);
410 local_irq_restore(flags);
411}
412
413/*
414 * Release the PMU if this is the last perf_event.
415 */
416static void hw_perf_event_destroy(struct perf_event *event)
417{
418 if (!atomic_add_unless(&num_events, -1, 1)) {
419 mutex_lock(&pmc_reserve_mutex);
420 if (atomic_dec_return(&num_events) == 0)
421 release_pmc_hardware();
422 mutex_unlock(&pmc_reserve_mutex);
423 }
424}
425
426/*
427 * Translate a generic cache event_id config to a raw event_id code.
428 */
429static int hw_perf_cache_event(u64 config, u64 *eventp)
430{
431 unsigned long type, op, result;
432 int ev;
433
434 if (!ppmu->cache_events)
435 return -EINVAL;
436
437 /* unpack config */
438 type = config & 0xff;
439 op = (config >> 8) & 0xff;
440 result = (config >> 16) & 0xff;
441
442 if (type >= PERF_COUNT_HW_CACHE_MAX ||
443 op >= PERF_COUNT_HW_CACHE_OP_MAX ||
444 result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
445 return -EINVAL;
446
447 ev = (*ppmu->cache_events)[type][op][result];
448 if (ev == 0)
449 return -EOPNOTSUPP;
450 if (ev == -1)
451 return -EINVAL;
452 *eventp = ev;
453 return 0;
454}
455
456static int fsl_emb_pmu_event_init(struct perf_event *event)
457{
458 u64 ev;
459 struct perf_event *events[MAX_HWEVENTS];
460 int n;
461 int err;
462 int num_restricted;
463 int i;
464
465 switch (event->attr.type) {
466 case PERF_TYPE_HARDWARE:
467 ev = event->attr.config;
468 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
469 return -EOPNOTSUPP;
470 ev = ppmu->generic_events[ev];
471 break;
472
473 case PERF_TYPE_HW_CACHE:
474 err = hw_perf_cache_event(event->attr.config, &ev);
475 if (err)
476 return err;
477 break;
478
479 case PERF_TYPE_RAW:
480 ev = event->attr.config;
481 break;
482
483 default:
484 return -ENOENT;
485 }
486
487 event->hw.config = ppmu->xlate_event(ev);
488 if (!(event->hw.config & FSL_EMB_EVENT_VALID))
489 return -EINVAL;
490
491 /*
492 * If this is in a group, check if it can go on with all the
493 * other hardware events in the group. We assume the event
494 * hasn't been linked into its leader's sibling list at this point.
495 */
496 n = 0;
497 if (event->group_leader != event) {
498 n = collect_events(event->group_leader,
499 ppmu->n_counter - 1, events);
500 if (n < 0)
501 return -EINVAL;
502 }
503
504 if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) {
505 num_restricted = 0;
506 for (i = 0; i < n; i++) {
507 if (events[i]->hw.config & FSL_EMB_EVENT_RESTRICTED)
508 num_restricted++;
509 }
510
511 if (num_restricted >= ppmu->n_restricted)
512 return -EINVAL;
513 }
514
515 event->hw.idx = -1;
516
517 event->hw.config_base = PMLCA_CE | PMLCA_FCM1 |
518 (u32)((ev << 16) & PMLCA_EVENT_MASK);
519
520 if (event->attr.exclude_user)
521 event->hw.config_base |= PMLCA_FCU;
522 if (event->attr.exclude_kernel)
523 event->hw.config_base |= PMLCA_FCS;
524 if (event->attr.exclude_idle)
525 return -ENOTSUPP;
526
527 event->hw.last_period = event->hw.sample_period;
528 local64_set(&event->hw.period_left, event->hw.last_period);
529
530 /*
531 * See if we need to reserve the PMU.
532 * If no events are currently in use, then we have to take a
533 * mutex to ensure that we don't race with another task doing
534 * reserve_pmc_hardware or release_pmc_hardware.
535 */
536 err = 0;
537 if (!atomic_inc_not_zero(&num_events)) {
538 mutex_lock(&pmc_reserve_mutex);
539 if (atomic_read(&num_events) == 0 &&
540 reserve_pmc_hardware(perf_event_interrupt))
541 err = -EBUSY;
542 else
543 atomic_inc(&num_events);
544 mutex_unlock(&pmc_reserve_mutex);
545
546 mtpmr(PMRN_PMGC0, PMGC0_FAC);
547 isync();
548 }
549 event->destroy = hw_perf_event_destroy;
550
551 return err;
552}
553
554static struct pmu fsl_emb_pmu = {
555 .pmu_enable = fsl_emb_pmu_enable,
556 .pmu_disable = fsl_emb_pmu_disable,
557 .event_init = fsl_emb_pmu_event_init,
558 .add = fsl_emb_pmu_add,
559 .del = fsl_emb_pmu_del,
560 .start = fsl_emb_pmu_start,
561 .stop = fsl_emb_pmu_stop,
562 .read = fsl_emb_pmu_read,
563};
564
565/*
566 * A counter has overflowed; update its count and record
567 * things if requested. Note that interrupts are hard-disabled
568 * here so there is no possibility of being interrupted.
569 */
570static void record_and_restart(struct perf_event *event, unsigned long val,
571 struct pt_regs *regs)
572{
573 u64 period = event->hw.sample_period;
574 s64 prev, delta, left;
575 int record = 0;
576
577 if (event->hw.state & PERF_HES_STOPPED) {
578 write_pmc(event->hw.idx, 0);
579 return;
580 }
581
582 /* we don't have to worry about interrupts here */
583 prev = local64_read(&event->hw.prev_count);
584 delta = (val - prev) & 0xfffffffful;
585 local64_add(delta, &event->count);
586
587 /*
588 * See if the total period for this event has expired,
589 * and update for the next period.
590 */
591 val = 0;
592 left = local64_read(&event->hw.period_left) - delta;
593 if (period) {
594 if (left <= 0) {
595 left += period;
596 if (left <= 0)
597 left = period;
598 record = 1;
599 event->hw.last_period = event->hw.sample_period;
600 }
601 if (left < 0x80000000LL)
602 val = 0x80000000LL - left;
603 }
604
605 write_pmc(event->hw.idx, val);
606 local64_set(&event->hw.prev_count, val);
607 local64_set(&event->hw.period_left, left);
608 perf_event_update_userpage(event);
609
610 /*
611 * Finally record data if requested.
612 */
613 if (record) {
614 struct perf_sample_data data;
615
616 perf_sample_data_init(&data, 0);
617 data.period = event->hw.last_period;
618
619 if (perf_event_overflow(event, &data, regs))
620 fsl_emb_pmu_stop(event, 0);
621 }
622}
623
624static void perf_event_interrupt(struct pt_regs *regs)
625{
626 int i;
627 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
628 struct perf_event *event;
629 unsigned long val;
630 int found = 0;
631 int nmi;
632
633 nmi = perf_intr_is_nmi(regs);
634 if (nmi)
635 nmi_enter();
636 else
637 irq_enter();
638
639 for (i = 0; i < ppmu->n_counter; ++i) {
640 event = cpuhw->event[i];
641
642 val = read_pmc(i);
643 if ((int)val < 0) {
644 if (event) {
645 /* event has overflowed */
646 found = 1;
647 record_and_restart(event, val, regs);
648 } else {
649 /*
650 * Disabled counter is negative,
651 * reset it just in case.
652 */
653 write_pmc(i, 0);
654 }
655 }
656 }
657
658 /* PMM will keep counters frozen until we return from the interrupt. */
659 mtmsr(mfmsr() | MSR_PMM);
660 mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE);
661 isync();
662
663 if (nmi)
664 nmi_exit();
665 else
666 irq_exit();
667}
668
669void hw_perf_event_setup(int cpu)
670{
671 struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
672
673 memset(cpuhw, 0, sizeof(*cpuhw));
674}
675
676int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
677{
678 if (ppmu)
679 return -EBUSY; /* something's already registered */
680
681 ppmu = pmu;
682 pr_info("%s performance monitor hardware support registered\n",
683 pmu->name);
684
685 perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW);
686
687 return 0;
688}
diff --git a/arch/powerpc/kernel/pmc.c b/arch/powerpc/kernel/pmc.c
index a841a9d136a2..58eaa3ddf7b9 100644
--- a/arch/powerpc/kernel/pmc.c
+++ b/arch/powerpc/kernel/pmc.c
@@ -13,6 +13,7 @@
13 */ 13 */
14 14
15#include <linux/errno.h> 15#include <linux/errno.h>
16#include <linux/bug.h>
16#include <linux/spinlock.h> 17#include <linux/spinlock.h>
17#include <linux/export.h> 18#include <linux/export.h>
18 19
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
deleted file mode 100644
index b4f1dda4d089..000000000000
--- a/arch/powerpc/kernel/power4-pmu.c
+++ /dev/null
@@ -1,621 +0,0 @@
1/*
2 * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors.
3 *
4 * Copyright 2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/perf_event.h>
13#include <linux/string.h>
14#include <asm/reg.h>
15#include <asm/cputable.h>
16
17/*
18 * Bits in event code for POWER4
19 */
20#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */
21#define PM_PMC_MSK 0xf
22#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */
23#define PM_UNIT_MSK 0xf
24#define PM_LOWER_SH 6
25#define PM_LOWER_MSK 1
26#define PM_LOWER_MSKS 0x40
27#define PM_BYTE_SH 4 /* Byte number of event bus to use */
28#define PM_BYTE_MSK 3
29#define PM_PMCSEL_MSK 7
30
31/*
32 * Unit code values
33 */
34#define PM_FPU 1
35#define PM_ISU1 2
36#define PM_IFU 3
37#define PM_IDU0 4
38#define PM_ISU1_ALT 6
39#define PM_ISU2 7
40#define PM_IFU_ALT 8
41#define PM_LSU0 9
42#define PM_LSU1 0xc
43#define PM_GPS 0xf
44
45/*
46 * Bits in MMCR0 for POWER4
47 */
48#define MMCR0_PMC1SEL_SH 8
49#define MMCR0_PMC2SEL_SH 1
50#define MMCR_PMCSEL_MSK 0x1f
51
52/*
53 * Bits in MMCR1 for POWER4
54 */
55#define MMCR1_TTM0SEL_SH 62
56#define MMCR1_TTC0SEL_SH 61
57#define MMCR1_TTM1SEL_SH 59
58#define MMCR1_TTC1SEL_SH 58
59#define MMCR1_TTM2SEL_SH 56
60#define MMCR1_TTC2SEL_SH 55
61#define MMCR1_TTM3SEL_SH 53
62#define MMCR1_TTC3SEL_SH 52
63#define MMCR1_TTMSEL_MSK 3
64#define MMCR1_TD_CP_DBG0SEL_SH 50
65#define MMCR1_TD_CP_DBG1SEL_SH 48
66#define MMCR1_TD_CP_DBG2SEL_SH 46
67#define MMCR1_TD_CP_DBG3SEL_SH 44
68#define MMCR1_DEBUG0SEL_SH 43
69#define MMCR1_DEBUG1SEL_SH 42
70#define MMCR1_DEBUG2SEL_SH 41
71#define MMCR1_DEBUG3SEL_SH 40
72#define MMCR1_PMC1_ADDER_SEL_SH 39
73#define MMCR1_PMC2_ADDER_SEL_SH 38
74#define MMCR1_PMC6_ADDER_SEL_SH 37
75#define MMCR1_PMC5_ADDER_SEL_SH 36
76#define MMCR1_PMC8_ADDER_SEL_SH 35
77#define MMCR1_PMC7_ADDER_SEL_SH 34
78#define MMCR1_PMC3_ADDER_SEL_SH 33
79#define MMCR1_PMC4_ADDER_SEL_SH 32
80#define MMCR1_PMC3SEL_SH 27
81#define MMCR1_PMC4SEL_SH 22
82#define MMCR1_PMC5SEL_SH 17
83#define MMCR1_PMC6SEL_SH 12
84#define MMCR1_PMC7SEL_SH 7
85#define MMCR1_PMC8SEL_SH 2 /* note bit 0 is in MMCRA for GP */
86
87static short mmcr1_adder_bits[8] = {
88 MMCR1_PMC1_ADDER_SEL_SH,
89 MMCR1_PMC2_ADDER_SEL_SH,
90 MMCR1_PMC3_ADDER_SEL_SH,
91 MMCR1_PMC4_ADDER_SEL_SH,
92 MMCR1_PMC5_ADDER_SEL_SH,
93 MMCR1_PMC6_ADDER_SEL_SH,
94 MMCR1_PMC7_ADDER_SEL_SH,
95 MMCR1_PMC8_ADDER_SEL_SH
96};
97
98/*
99 * Bits in MMCRA
100 */
101#define MMCRA_PMC8SEL0_SH 17 /* PMC8SEL bit 0 for GP */
102
103/*
104 * Layout of constraint bits:
105 * 6666555555555544444444443333333333222222222211111111110000000000
106 * 3210987654321098765432109876543210987654321098765432109876543210
107 * |[ >[ >[ >|||[ >[ >< >< >< >< ><><><><><><><><>
108 * | UC1 UC2 UC3 ||| PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8
109 * \SMPL ||\TTC3SEL
110 * |\TTC_IFU_SEL
111 * \TTM2SEL0
112 *
113 * SMPL - SAMPLE_ENABLE constraint
114 * 56: SAMPLE_ENABLE value 0x0100_0000_0000_0000
115 *
116 * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2
117 * 55: UC1 error 0x0080_0000_0000_0000
118 * 54: FPU events needed 0x0040_0000_0000_0000
119 * 53: ISU1 events needed 0x0020_0000_0000_0000
120 * 52: IDU0|ISU2 events needed 0x0010_0000_0000_0000
121 *
122 * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0
123 * 51: UC2 error 0x0008_0000_0000_0000
124 * 50: FPU events needed 0x0004_0000_0000_0000
125 * 49: IFU events needed 0x0002_0000_0000_0000
126 * 48: LSU0 events needed 0x0001_0000_0000_0000
127 *
128 * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1
129 * 47: UC3 error 0x8000_0000_0000
130 * 46: LSU0 events needed 0x4000_0000_0000
131 * 45: IFU events needed 0x2000_0000_0000
132 * 44: IDU0|ISU2 events needed 0x1000_0000_0000
133 * 43: ISU1 events needed 0x0800_0000_0000
134 *
135 * TTM2SEL0
136 * 42: 0 = IDU0 events needed
137 * 1 = ISU2 events needed 0x0400_0000_0000
138 *
139 * TTC_IFU_SEL
140 * 41: 0 = IFU.U events needed
141 * 1 = IFU.L events needed 0x0200_0000_0000
142 *
143 * TTC3SEL
144 * 40: 0 = LSU1.U events needed
145 * 1 = LSU1.L events needed 0x0100_0000_0000
146 *
147 * PS1
148 * 39: PS1 error 0x0080_0000_0000
149 * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
150 *
151 * PS2
152 * 35: PS2 error 0x0008_0000_0000
153 * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
154 *
155 * B0
156 * 28-31: Byte 0 event source 0xf000_0000
157 * 1 = FPU
158 * 2 = ISU1
159 * 3 = IFU
160 * 4 = IDU0
161 * 7 = ISU2
162 * 9 = LSU0
163 * c = LSU1
164 * f = GPS
165 *
166 * B1, B2, B3
167 * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
168 *
169 * P8
170 * 15: P8 error 0x8000
171 * 14-15: Count of events needing PMC8
172 *
173 * P1..P7
174 * 0-13: Count of events needing PMC1..PMC7
175 *
176 * Note: this doesn't allow events using IFU.U to be combined with events
177 * using IFU.L, though that is feasible (using TTM0 and TTM2). However
178 * there are no listed events for IFU.L (they are debug events not
179 * verified for performance monitoring) so this shouldn't cause a
180 * problem.
181 */
182
183static struct unitinfo {
184 unsigned long value, mask;
185 int unit;
186 int lowerbit;
187} p4_unitinfo[16] = {
188 [PM_FPU] = { 0x44000000000000ul, 0x88000000000000ul, PM_FPU, 0 },
189 [PM_ISU1] = { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 },
190 [PM_ISU1_ALT] =
191 { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 },
192 [PM_IFU] = { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 },
193 [PM_IFU_ALT] =
194 { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 },
195 [PM_IDU0] = { 0x10100000000000ul, 0x80840000000000ul, PM_IDU0, 1 },
196 [PM_ISU2] = { 0x10140000000000ul, 0x80840000000000ul, PM_ISU2, 0 },
197 [PM_LSU0] = { 0x01400000000000ul, 0x08800000000000ul, PM_LSU0, 0 },
198 [PM_LSU1] = { 0x00000000000000ul, 0x00010000000000ul, PM_LSU1, 40 },
199 [PM_GPS] = { 0x00000000000000ul, 0x00000000000000ul, PM_GPS, 0 }
200};
201
202static unsigned char direct_marked_event[8] = {
203 (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
204 (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
205 (1<<3), /* PMC3: PM_MRK_ST_CMPL_INT */
206 (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
207 (1<<4) | (1<<5), /* PMC5: PM_MRK_GRP_TIMEO */
208 (1<<3) | (1<<4) | (1<<5),
209 /* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
210 (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
211 (1<<4), /* PMC8: PM_MRK_LSU_FIN */
212};
213
214/*
215 * Returns 1 if event counts things relating to marked instructions
216 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
217 */
218static int p4_marked_instr_event(u64 event)
219{
220 int pmc, psel, unit, byte, bit;
221 unsigned int mask;
222
223 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
224 psel = event & PM_PMCSEL_MSK;
225 if (pmc) {
226 if (direct_marked_event[pmc - 1] & (1 << psel))
227 return 1;
228 if (psel == 0) /* add events */
229 bit = (pmc <= 4)? pmc - 1: 8 - pmc;
230 else if (psel == 6) /* decode events */
231 bit = 4;
232 else
233 return 0;
234 } else
235 bit = psel;
236
237 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
238 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
239 mask = 0;
240 switch (unit) {
241 case PM_LSU1:
242 if (event & PM_LOWER_MSKS)
243 mask = 1 << 28; /* byte 7 bit 4 */
244 else
245 mask = 6 << 24; /* byte 3 bits 1 and 2 */
246 break;
247 case PM_LSU0:
248 /* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */
249 mask = 0x083dff00;
250 }
251 return (mask >> (byte * 8 + bit)) & 1;
252}
253
254static int p4_get_constraint(u64 event, unsigned long *maskp,
255 unsigned long *valp)
256{
257 int pmc, byte, unit, lower, sh;
258 unsigned long mask = 0, value = 0;
259 int grp = -1;
260
261 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
262 if (pmc) {
263 if (pmc > 8)
264 return -1;
265 sh = (pmc - 1) * 2;
266 mask |= 2 << sh;
267 value |= 1 << sh;
268 grp = ((pmc - 1) >> 1) & 1;
269 }
270 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
271 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
272 if (unit) {
273 lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK;
274
275 /*
276 * Bus events on bytes 0 and 2 can be counted
277 * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
278 */
279 if (!pmc)
280 grp = byte & 1;
281
282 if (!p4_unitinfo[unit].unit)
283 return -1;
284 mask |= p4_unitinfo[unit].mask;
285 value |= p4_unitinfo[unit].value;
286 sh = p4_unitinfo[unit].lowerbit;
287 if (sh > 1)
288 value |= (unsigned long)lower << sh;
289 else if (lower != sh)
290 return -1;
291 unit = p4_unitinfo[unit].unit;
292
293 /* Set byte lane select field */
294 mask |= 0xfULL << (28 - 4 * byte);
295 value |= (unsigned long)unit << (28 - 4 * byte);
296 }
297 if (grp == 0) {
298 /* increment PMC1/2/5/6 field */
299 mask |= 0x8000000000ull;
300 value |= 0x1000000000ull;
301 } else {
302 /* increment PMC3/4/7/8 field */
303 mask |= 0x800000000ull;
304 value |= 0x100000000ull;
305 }
306
307 /* Marked instruction events need sample_enable set */
308 if (p4_marked_instr_event(event)) {
309 mask |= 1ull << 56;
310 value |= 1ull << 56;
311 }
312
313 /* PMCSEL=6 decode events on byte 2 need sample_enable clear */
314 if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2)
315 mask |= 1ull << 56;
316
317 *maskp = mask;
318 *valp = value;
319 return 0;
320}
321
322static unsigned int ppc_inst_cmpl[] = {
323 0x1001, 0x4001, 0x6001, 0x7001, 0x8001
324};
325
326static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[])
327{
328 int i, j, na;
329
330 alt[0] = event;
331 na = 1;
332
333 /* 2 possibilities for PM_GRP_DISP_REJECT */
334 if (event == 0x8003 || event == 0x0224) {
335 alt[1] = event ^ (0x8003 ^ 0x0224);
336 return 2;
337 }
338
339 /* 2 possibilities for PM_ST_MISS_L1 */
340 if (event == 0x0c13 || event == 0x0c23) {
341 alt[1] = event ^ (0x0c13 ^ 0x0c23);
342 return 2;
343 }
344
345 /* several possibilities for PM_INST_CMPL */
346 for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) {
347 if (event == ppc_inst_cmpl[i]) {
348 for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j)
349 if (j != i)
350 alt[na++] = ppc_inst_cmpl[j];
351 break;
352 }
353 }
354
355 return na;
356}
357
358static int p4_compute_mmcr(u64 event[], int n_ev,
359 unsigned int hwc[], unsigned long mmcr[])
360{
361 unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
362 unsigned int pmc, unit, byte, psel, lower;
363 unsigned int ttm, grp;
364 unsigned int pmc_inuse = 0;
365 unsigned int pmc_grp_use[2];
366 unsigned char busbyte[4];
367 unsigned char unituse[16];
368 unsigned int unitlower = 0;
369 int i;
370
371 if (n_ev > 8)
372 return -1;
373
374 /* First pass to count resource use */
375 pmc_grp_use[0] = pmc_grp_use[1] = 0;
376 memset(busbyte, 0, sizeof(busbyte));
377 memset(unituse, 0, sizeof(unituse));
378 for (i = 0; i < n_ev; ++i) {
379 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
380 if (pmc) {
381 if (pmc_inuse & (1 << (pmc - 1)))
382 return -1;
383 pmc_inuse |= 1 << (pmc - 1);
384 /* count 1/2/5/6 vs 3/4/7/8 use */
385 ++pmc_grp_use[((pmc - 1) >> 1) & 1];
386 }
387 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
388 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
389 lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK;
390 if (unit) {
391 if (!pmc)
392 ++pmc_grp_use[byte & 1];
393 if (unit == 6 || unit == 8)
394 /* map alt ISU1/IFU codes: 6->2, 8->3 */
395 unit = (unit >> 1) - 1;
396 if (busbyte[byte] && busbyte[byte] != unit)
397 return -1;
398 busbyte[byte] = unit;
399 lower <<= unit;
400 if (unituse[unit] && lower != (unitlower & lower))
401 return -1;
402 unituse[unit] = 1;
403 unitlower |= lower;
404 }
405 }
406 if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
407 return -1;
408
409 /*
410 * Assign resources and set multiplexer selects.
411 *
412 * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2.
413 * Each TTMx can only select one unit, but since
414 * units 2 and 6 are both ISU1, and 3 and 8 are both IFU,
415 * we have some choices.
416 */
417 if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) {
418 unituse[6] = 1; /* Move 2 to 6 */
419 unituse[2] = 0;
420 }
421 if (unituse[3] & (unituse[1] | unituse[2])) {
422 unituse[8] = 1; /* Move 3 to 8 */
423 unituse[3] = 0;
424 unitlower = (unitlower & ~8) | ((unitlower & 8) << 5);
425 }
426 /* Check only one unit per TTMx */
427 if (unituse[1] + unituse[2] + unituse[3] > 1 ||
428 unituse[4] + unituse[6] + unituse[7] > 1 ||
429 unituse[8] + unituse[9] > 1 ||
430 (unituse[5] | unituse[10] | unituse[11] |
431 unituse[13] | unituse[14]))
432 return -1;
433
434 /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */
435 mmcr1 |= (unsigned long)(unituse[3] * 2 + unituse[2])
436 << MMCR1_TTM0SEL_SH;
437 mmcr1 |= (unsigned long)(unituse[7] * 3 + unituse[6] * 2)
438 << MMCR1_TTM1SEL_SH;
439 mmcr1 |= (unsigned long)unituse[9] << MMCR1_TTM2SEL_SH;
440
441 /* Set TTCxSEL fields. */
442 if (unitlower & 0xe)
443 mmcr1 |= 1ull << MMCR1_TTC0SEL_SH;
444 if (unitlower & 0xf0)
445 mmcr1 |= 1ull << MMCR1_TTC1SEL_SH;
446 if (unitlower & 0xf00)
447 mmcr1 |= 1ull << MMCR1_TTC2SEL_SH;
448 if (unitlower & 0x7000)
449 mmcr1 |= 1ull << MMCR1_TTC3SEL_SH;
450
451 /* Set byte lane select fields. */
452 for (byte = 0; byte < 4; ++byte) {
453 unit = busbyte[byte];
454 if (!unit)
455 continue;
456 if (unit == 0xf) {
457 /* special case for GPS */
458 mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte);
459 } else {
460 if (!unituse[unit])
461 ttm = unit - 1; /* 2->1, 3->2 */
462 else
463 ttm = unit >> 2;
464 mmcr1 |= (unsigned long)ttm
465 << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
466 }
467 }
468
469 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
470 for (i = 0; i < n_ev; ++i) {
471 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
472 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
473 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
474 psel = event[i] & PM_PMCSEL_MSK;
475 if (!pmc) {
476 /* Bus event or 00xxx direct event (off or cycles) */
477 if (unit)
478 psel |= 0x10 | ((byte & 2) << 2);
479 for (pmc = 0; pmc < 8; ++pmc) {
480 if (pmc_inuse & (1 << pmc))
481 continue;
482 grp = (pmc >> 1) & 1;
483 if (unit) {
484 if (grp == (byte & 1))
485 break;
486 } else if (pmc_grp_use[grp] < 4) {
487 ++pmc_grp_use[grp];
488 break;
489 }
490 }
491 pmc_inuse |= 1 << pmc;
492 } else {
493 /* Direct event */
494 --pmc;
495 if (psel == 0 && (byte & 2))
496 /* add events on higher-numbered bus */
497 mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
498 else if (psel == 6 && byte == 3)
499 /* seem to need to set sample_enable here */
500 mmcra |= MMCRA_SAMPLE_ENABLE;
501 psel |= 8;
502 }
503 if (pmc <= 1)
504 mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc);
505 else
506 mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
507 if (pmc == 7) /* PMC8 */
508 mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH;
509 hwc[i] = pmc;
510 if (p4_marked_instr_event(event[i]))
511 mmcra |= MMCRA_SAMPLE_ENABLE;
512 }
513
514 if (pmc_inuse & 1)
515 mmcr0 |= MMCR0_PMC1CE;
516 if (pmc_inuse & 0xfe)
517 mmcr0 |= MMCR0_PMCjCE;
518
519 mmcra |= 0x2000; /* mark only one IOP per PPC instruction */
520
521 /* Return MMCRx values */
522 mmcr[0] = mmcr0;
523 mmcr[1] = mmcr1;
524 mmcr[2] = mmcra;
525 return 0;
526}
527
528static void p4_disable_pmc(unsigned int pmc, unsigned long mmcr[])
529{
530 /*
531 * Setting the PMCxSEL field to 0 disables PMC x.
532 * (Note that pmc is 0-based here, not 1-based.)
533 */
534 if (pmc <= 1) {
535 mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc));
536 } else {
537 mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)));
538 if (pmc == 7)
539 mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH);
540 }
541}
542
543static int p4_generic_events[] = {
544 [PERF_COUNT_HW_CPU_CYCLES] = 7,
545 [PERF_COUNT_HW_INSTRUCTIONS] = 0x1001,
546 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */
547 [PERF_COUNT_HW_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */
548 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */
549 [PERF_COUNT_HW_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */
550};
551
552#define C(x) PERF_COUNT_HW_CACHE_##x
553
554/*
555 * Table of generalized cache-related events.
556 * 0 means not supported, -1 means nonsensical, other values
557 * are event codes.
558 */
559static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
560 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
561 [C(OP_READ)] = { 0x8c10, 0x3c10 },
562 [C(OP_WRITE)] = { 0x7c10, 0xc13 },
563 [C(OP_PREFETCH)] = { 0xc35, 0 },
564 },
565 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
566 [C(OP_READ)] = { 0, 0 },
567 [C(OP_WRITE)] = { -1, -1 },
568 [C(OP_PREFETCH)] = { 0, 0 },
569 },
570 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
571 [C(OP_READ)] = { 0, 0 },
572 [C(OP_WRITE)] = { 0, 0 },
573 [C(OP_PREFETCH)] = { 0xc34, 0 },
574 },
575 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
576 [C(OP_READ)] = { 0, 0x904 },
577 [C(OP_WRITE)] = { -1, -1 },
578 [C(OP_PREFETCH)] = { -1, -1 },
579 },
580 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
581 [C(OP_READ)] = { 0, 0x900 },
582 [C(OP_WRITE)] = { -1, -1 },
583 [C(OP_PREFETCH)] = { -1, -1 },
584 },
585 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
586 [C(OP_READ)] = { 0x330, 0x331 },
587 [C(OP_WRITE)] = { -1, -1 },
588 [C(OP_PREFETCH)] = { -1, -1 },
589 },
590 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
591 [C(OP_READ)] = { -1, -1 },
592 [C(OP_WRITE)] = { -1, -1 },
593 [C(OP_PREFETCH)] = { -1, -1 },
594 },
595};
596
597static struct power_pmu power4_pmu = {
598 .name = "POWER4/4+",
599 .n_counter = 8,
600 .max_alternatives = 5,
601 .add_fields = 0x0000001100005555ul,
602 .test_adder = 0x0011083300000000ul,
603 .compute_mmcr = p4_compute_mmcr,
604 .get_constraint = p4_get_constraint,
605 .get_alternatives = p4_get_alternatives,
606 .disable_pmc = p4_disable_pmc,
607 .n_generic = ARRAY_SIZE(p4_generic_events),
608 .generic_events = p4_generic_events,
609 .cache_events = &power4_cache_events,
610};
611
612static int __init init_power4_pmu(void)
613{
614 if (!cur_cpu_spec->oprofile_cpu_type ||
615 strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power4"))
616 return -ENODEV;
617
618 return register_power_pmu(&power4_pmu);
619}
620
621early_initcall(init_power4_pmu);
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
deleted file mode 100644
index a8757baa28f3..000000000000
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ /dev/null
@@ -1,690 +0,0 @@
1/*
2 * Performance counter support for POWER5+/++ (not POWER5) processors.
3 *
4 * Copyright 2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/perf_event.h>
13#include <linux/string.h>
14#include <asm/reg.h>
15#include <asm/cputable.h>
16
17/*
18 * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3)
19 */
20#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
21#define PM_PMC_MSK 0xf
22#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
23#define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */
24#define PM_UNIT_MSK 0xf
25#define PM_BYTE_SH 12 /* Byte number of event bus to use */
26#define PM_BYTE_MSK 7
27#define PM_GRS_SH 8 /* Storage subsystem mux select */
28#define PM_GRS_MSK 7
29#define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */
30#define PM_PMCSEL_MSK 0x7f
31
32/* Values in PM_UNIT field */
33#define PM_FPU 0
34#define PM_ISU0 1
35#define PM_IFU 2
36#define PM_ISU1 3
37#define PM_IDU 4
38#define PM_ISU0_ALT 6
39#define PM_GRS 7
40#define PM_LSU0 8
41#define PM_LSU1 0xc
42#define PM_LASTUNIT 0xc
43
44/*
45 * Bits in MMCR1 for POWER5+
46 */
47#define MMCR1_TTM0SEL_SH 62
48#define MMCR1_TTM1SEL_SH 60
49#define MMCR1_TTM2SEL_SH 58
50#define MMCR1_TTM3SEL_SH 56
51#define MMCR1_TTMSEL_MSK 3
52#define MMCR1_TD_CP_DBG0SEL_SH 54
53#define MMCR1_TD_CP_DBG1SEL_SH 52
54#define MMCR1_TD_CP_DBG2SEL_SH 50
55#define MMCR1_TD_CP_DBG3SEL_SH 48
56#define MMCR1_GRS_L2SEL_SH 46
57#define MMCR1_GRS_L2SEL_MSK 3
58#define MMCR1_GRS_L3SEL_SH 44
59#define MMCR1_GRS_L3SEL_MSK 3
60#define MMCR1_GRS_MCSEL_SH 41
61#define MMCR1_GRS_MCSEL_MSK 7
62#define MMCR1_GRS_FABSEL_SH 39
63#define MMCR1_GRS_FABSEL_MSK 3
64#define MMCR1_PMC1_ADDER_SEL_SH 35
65#define MMCR1_PMC2_ADDER_SEL_SH 34
66#define MMCR1_PMC3_ADDER_SEL_SH 33
67#define MMCR1_PMC4_ADDER_SEL_SH 32
68#define MMCR1_PMC1SEL_SH 25
69#define MMCR1_PMC2SEL_SH 17
70#define MMCR1_PMC3SEL_SH 9
71#define MMCR1_PMC4SEL_SH 1
72#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
73#define MMCR1_PMCSEL_MSK 0x7f
74
75/*
76 * Layout of constraint bits:
77 * 6666555555555544444444443333333333222222222211111111110000000000
78 * 3210987654321098765432109876543210987654321098765432109876543210
79 * [ ><><>< ><> <><>[ > < >< >< >< ><><><><><><>
80 * NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P6P5P4P3P2P1
81 *
82 * NC - number of counters
83 * 51: NC error 0x0008_0000_0000_0000
84 * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
85 *
86 * G0..G3 - GRS mux constraints
87 * 46-47: GRS_L2SEL value
88 * 44-45: GRS_L3SEL value
89 * 41-44: GRS_MCSEL value
90 * 39-40: GRS_FABSEL value
91 * Note that these match up with their bit positions in MMCR1
92 *
93 * T0 - TTM0 constraint
94 * 36-37: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0x30_0000_0000
95 *
96 * T1 - TTM1 constraint
97 * 34-35: TTM1SEL value (0=IDU, 3=GRS) 0x0c_0000_0000
98 *
99 * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
100 * 33: UC3 error 0x02_0000_0000
101 * 32: FPU|IFU|ISU1 events needed 0x01_0000_0000
102 * 31: ISU0 events needed 0x01_8000_0000
103 * 30: IDU|GRS events needed 0x00_4000_0000
104 *
105 * B0
106 * 24-27: Byte 0 event source 0x0f00_0000
107 * Encoding as for the event code
108 *
109 * B1, B2, B3
110 * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
111 *
112 * P6
113 * 11: P6 error 0x800
114 * 10-11: Count of events needing PMC6
115 *
116 * P1..P5
117 * 0-9: Count of events needing PMC1..PMC5
118 */
119
120static const int grsel_shift[8] = {
121 MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
122 MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
123 MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
124};
125
126/* Masks and values for using events from the various units */
127static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
128 [PM_FPU] = { 0x3200000000ul, 0x0100000000ul },
129 [PM_ISU0] = { 0x0200000000ul, 0x0080000000ul },
130 [PM_ISU1] = { 0x3200000000ul, 0x3100000000ul },
131 [PM_IFU] = { 0x3200000000ul, 0x2100000000ul },
132 [PM_IDU] = { 0x0e00000000ul, 0x0040000000ul },
133 [PM_GRS] = { 0x0e00000000ul, 0x0c40000000ul },
134};
135
136static int power5p_get_constraint(u64 event, unsigned long *maskp,
137 unsigned long *valp)
138{
139 int pmc, byte, unit, sh;
140 int bit, fmask;
141 unsigned long mask = 0, value = 0;
142
143 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
144 if (pmc) {
145 if (pmc > 6)
146 return -1;
147 sh = (pmc - 1) * 2;
148 mask |= 2 << sh;
149 value |= 1 << sh;
150 if (pmc >= 5 && !(event == 0x500009 || event == 0x600005))
151 return -1;
152 }
153 if (event & PM_BUSEVENT_MSK) {
154 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
155 if (unit > PM_LASTUNIT)
156 return -1;
157 if (unit == PM_ISU0_ALT)
158 unit = PM_ISU0;
159 mask |= unit_cons[unit][0];
160 value |= unit_cons[unit][1];
161 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
162 if (byte >= 4) {
163 if (unit != PM_LSU1)
164 return -1;
165 /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
166 ++unit;
167 byte &= 3;
168 }
169 if (unit == PM_GRS) {
170 bit = event & 7;
171 fmask = (bit == 6)? 7: 3;
172 sh = grsel_shift[bit];
173 mask |= (unsigned long)fmask << sh;
174 value |= (unsigned long)((event >> PM_GRS_SH) & fmask)
175 << sh;
176 }
177 /* Set byte lane select field */
178 mask |= 0xfUL << (24 - 4 * byte);
179 value |= (unsigned long)unit << (24 - 4 * byte);
180 }
181 if (pmc < 5) {
182 /* need a counter from PMC1-4 set */
183 mask |= 0x8000000000000ul;
184 value |= 0x1000000000000ul;
185 }
186 *maskp = mask;
187 *valp = value;
188 return 0;
189}
190
191static int power5p_limited_pmc_event(u64 event)
192{
193 int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
194
195 return pmc == 5 || pmc == 6;
196}
197
198#define MAX_ALT 3 /* at most 3 alternatives for any event */
199
200static const unsigned int event_alternatives[][MAX_ALT] = {
201 { 0x100c0, 0x40001f }, /* PM_GCT_FULL_CYC */
202 { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */
203 { 0x230e2, 0x323087 }, /* PM_BR_PRED_CR */
204 { 0x230e3, 0x223087, 0x3230a0 }, /* PM_BR_PRED_TA */
205 { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */
206 { 0x800c4, 0xc20e0 }, /* PM_DTLB_MISS */
207 { 0xc50c6, 0xc60e0 }, /* PM_MRK_DTLB_MISS */
208 { 0x100005, 0x600005 }, /* PM_RUN_CYC */
209 { 0x100009, 0x200009 }, /* PM_INST_CMPL */
210 { 0x200015, 0x300015 }, /* PM_LSU_LMQ_SRQ_EMPTY_CYC */
211 { 0x300009, 0x400009 }, /* PM_INST_DISP */
212};
213
214/*
215 * Scan the alternatives table for a match and return the
216 * index into the alternatives table if found, else -1.
217 */
218static int find_alternative(unsigned int event)
219{
220 int i, j;
221
222 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
223 if (event < event_alternatives[i][0])
224 break;
225 for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
226 if (event == event_alternatives[i][j])
227 return i;
228 }
229 return -1;
230}
231
232static const unsigned char bytedecode_alternatives[4][4] = {
233 /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 },
234 /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e },
235 /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 },
236 /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e }
237};
238
239/*
240 * Some direct events for decodes of event bus byte 3 have alternative
241 * PMCSEL values on other counters. This returns the alternative
242 * event code for those that do, or -1 otherwise. This also handles
243 * alternative PCMSEL values for add events.
244 */
245static s64 find_alternative_bdecode(u64 event)
246{
247 int pmc, altpmc, pp, j;
248
249 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
250 if (pmc == 0 || pmc > 4)
251 return -1;
252 altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */
253 pp = event & PM_PMCSEL_MSK;
254 for (j = 0; j < 4; ++j) {
255 if (bytedecode_alternatives[pmc - 1][j] == pp) {
256 return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
257 (altpmc << PM_PMC_SH) |
258 bytedecode_alternatives[altpmc - 1][j];
259 }
260 }
261
262 /* new decode alternatives for power5+ */
263 if (pmc == 1 && (pp == 0x0d || pp == 0x0e))
264 return event + (2 << PM_PMC_SH) + (0x2e - 0x0d);
265 if (pmc == 3 && (pp == 0x2e || pp == 0x2f))
266 return event - (2 << PM_PMC_SH) - (0x2e - 0x0d);
267
268 /* alternative add event encodings */
269 if (pp == 0x10 || pp == 0x28)
270 return ((event ^ (0x10 ^ 0x28)) & ~PM_PMC_MSKS) |
271 (altpmc << PM_PMC_SH);
272
273 return -1;
274}
275
276static int power5p_get_alternatives(u64 event, unsigned int flags, u64 alt[])
277{
278 int i, j, nalt = 1;
279 int nlim;
280 s64 ae;
281
282 alt[0] = event;
283 nalt = 1;
284 nlim = power5p_limited_pmc_event(event);
285 i = find_alternative(event);
286 if (i >= 0) {
287 for (j = 0; j < MAX_ALT; ++j) {
288 ae = event_alternatives[i][j];
289 if (ae && ae != event)
290 alt[nalt++] = ae;
291 nlim += power5p_limited_pmc_event(ae);
292 }
293 } else {
294 ae = find_alternative_bdecode(event);
295 if (ae > 0)
296 alt[nalt++] = ae;
297 }
298
299 if (flags & PPMU_ONLY_COUNT_RUN) {
300 /*
301 * We're only counting in RUN state,
302 * so PM_CYC is equivalent to PM_RUN_CYC
303 * and PM_INST_CMPL === PM_RUN_INST_CMPL.
304 * This doesn't include alternatives that don't provide
305 * any extra flexibility in assigning PMCs (e.g.
306 * 0x100005 for PM_RUN_CYC vs. 0xf for PM_CYC).
307 * Note that even with these additional alternatives
308 * we never end up with more than 3 alternatives for any event.
309 */
310 j = nalt;
311 for (i = 0; i < nalt; ++i) {
312 switch (alt[i]) {
313 case 0xf: /* PM_CYC */
314 alt[j++] = 0x600005; /* PM_RUN_CYC */
315 ++nlim;
316 break;
317 case 0x600005: /* PM_RUN_CYC */
318 alt[j++] = 0xf;
319 break;
320 case 0x100009: /* PM_INST_CMPL */
321 alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */
322 ++nlim;
323 break;
324 case 0x500009: /* PM_RUN_INST_CMPL */
325 alt[j++] = 0x100009; /* PM_INST_CMPL */
326 alt[j++] = 0x200009;
327 break;
328 }
329 }
330 nalt = j;
331 }
332
333 if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
334 /* remove the limited PMC events */
335 j = 0;
336 for (i = 0; i < nalt; ++i) {
337 if (!power5p_limited_pmc_event(alt[i])) {
338 alt[j] = alt[i];
339 ++j;
340 }
341 }
342 nalt = j;
343 } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
344 /* remove all but the limited PMC events */
345 j = 0;
346 for (i = 0; i < nalt; ++i) {
347 if (power5p_limited_pmc_event(alt[i])) {
348 alt[j] = alt[i];
349 ++j;
350 }
351 }
352 nalt = j;
353 }
354
355 return nalt;
356}
357
358/*
359 * Map of which direct events on which PMCs are marked instruction events.
360 * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event.
361 * Bit 0 is set if it is marked for all PMCs.
362 * The 0x80 bit indicates a byte decode PMCSEL value.
363 */
364static unsigned char direct_event_is_marked[0x28] = {
365 0, /* 00 */
366 0x1f, /* 01 PM_IOPS_CMPL */
367 0x2, /* 02 PM_MRK_GRP_DISP */
368 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
369 0, /* 04 */
370 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */
371 0x80, /* 06 */
372 0x80, /* 07 */
373 0, 0, 0,/* 08 - 0a */
374 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */
375 0, /* 0c */
376 0x80, /* 0d */
377 0x80, /* 0e */
378 0, /* 0f */
379 0, /* 10 */
380 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */
381 0, /* 12 */
382 0x10, /* 13 PM_MRK_GRP_CMPL */
383 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */
384 0x2, /* 15 PM_MRK_GRP_ISSUED */
385 0x80, /* 16 */
386 0x80, /* 17 */
387 0, 0, 0, 0, 0,
388 0x80, /* 1d */
389 0x80, /* 1e */
390 0, /* 1f */
391 0x80, /* 20 */
392 0x80, /* 21 */
393 0x80, /* 22 */
394 0x80, /* 23 */
395 0x80, /* 24 */
396 0x80, /* 25 */
397 0x80, /* 26 */
398 0x80, /* 27 */
399};
400
401/*
402 * Returns 1 if event counts things relating to marked instructions
403 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
404 */
405static int power5p_marked_instr_event(u64 event)
406{
407 int pmc, psel;
408 int bit, byte, unit;
409 u32 mask;
410
411 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
412 psel = event & PM_PMCSEL_MSK;
413 if (pmc >= 5)
414 return 0;
415
416 bit = -1;
417 if (psel < sizeof(direct_event_is_marked)) {
418 if (direct_event_is_marked[psel] & (1 << pmc))
419 return 1;
420 if (direct_event_is_marked[psel] & 0x80)
421 bit = 4;
422 else if (psel == 0x08)
423 bit = pmc - 1;
424 else if (psel == 0x10)
425 bit = 4 - pmc;
426 else if (psel == 0x1b && (pmc == 1 || pmc == 3))
427 bit = 4;
428 } else if ((psel & 0x48) == 0x40) {
429 bit = psel & 7;
430 } else if (psel == 0x28) {
431 bit = pmc - 1;
432 } else if (pmc == 3 && (psel == 0x2e || psel == 0x2f)) {
433 bit = 4;
434 }
435
436 if (!(event & PM_BUSEVENT_MSK) || bit == -1)
437 return 0;
438
439 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
440 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
441 if (unit == PM_LSU0) {
442 /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */
443 mask = 0x5dff00;
444 } else if (unit == PM_LSU1 && byte >= 4) {
445 byte -= 4;
446 /* byte 5 bits 6-7, byte 6 bits 0,4, byte 7 bits 0-4,6 */
447 mask = 0x5f11c000;
448 } else
449 return 0;
450
451 return (mask >> (byte * 8 + bit)) & 1;
452}
453
454static int power5p_compute_mmcr(u64 event[], int n_ev,
455 unsigned int hwc[], unsigned long mmcr[])
456{
457 unsigned long mmcr1 = 0;
458 unsigned long mmcra = 0;
459 unsigned int pmc, unit, byte, psel;
460 unsigned int ttm;
461 int i, isbus, bit, grsel;
462 unsigned int pmc_inuse = 0;
463 unsigned char busbyte[4];
464 unsigned char unituse[16];
465 int ttmuse;
466
467 if (n_ev > 6)
468 return -1;
469
470 /* First pass to count resource use */
471 memset(busbyte, 0, sizeof(busbyte));
472 memset(unituse, 0, sizeof(unituse));
473 for (i = 0; i < n_ev; ++i) {
474 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
475 if (pmc) {
476 if (pmc > 6)
477 return -1;
478 if (pmc_inuse & (1 << (pmc - 1)))
479 return -1;
480 pmc_inuse |= 1 << (pmc - 1);
481 }
482 if (event[i] & PM_BUSEVENT_MSK) {
483 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
484 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
485 if (unit > PM_LASTUNIT)
486 return -1;
487 if (unit == PM_ISU0_ALT)
488 unit = PM_ISU0;
489 if (byte >= 4) {
490 if (unit != PM_LSU1)
491 return -1;
492 ++unit;
493 byte &= 3;
494 }
495 if (busbyte[byte] && busbyte[byte] != unit)
496 return -1;
497 busbyte[byte] = unit;
498 unituse[unit] = 1;
499 }
500 }
501
502 /*
503 * Assign resources and set multiplexer selects.
504 *
505 * PM_ISU0 can go either on TTM0 or TTM1, but that's the only
506 * choice we have to deal with.
507 */
508 if (unituse[PM_ISU0] &
509 (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
510 unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */
511 unituse[PM_ISU0] = 0;
512 }
513 /* Set TTM[01]SEL fields. */
514 ttmuse = 0;
515 for (i = PM_FPU; i <= PM_ISU1; ++i) {
516 if (!unituse[i])
517 continue;
518 if (ttmuse++)
519 return -1;
520 mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH;
521 }
522 ttmuse = 0;
523 for (; i <= PM_GRS; ++i) {
524 if (!unituse[i])
525 continue;
526 if (ttmuse++)
527 return -1;
528 mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH;
529 }
530 if (ttmuse > 1)
531 return -1;
532
533 /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
534 for (byte = 0; byte < 4; ++byte) {
535 unit = busbyte[byte];
536 if (!unit)
537 continue;
538 if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
539 /* get ISU0 through TTM1 rather than TTM0 */
540 unit = PM_ISU0_ALT;
541 } else if (unit == PM_LSU1 + 1) {
542 /* select lower word of LSU1 for this byte */
543 mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte);
544 }
545 ttm = unit >> 2;
546 mmcr1 |= (unsigned long)ttm
547 << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
548 }
549
550 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
551 for (i = 0; i < n_ev; ++i) {
552 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
553 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
554 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
555 psel = event[i] & PM_PMCSEL_MSK;
556 isbus = event[i] & PM_BUSEVENT_MSK;
557 if (!pmc) {
558 /* Bus event or any-PMC direct event */
559 for (pmc = 0; pmc < 4; ++pmc) {
560 if (!(pmc_inuse & (1 << pmc)))
561 break;
562 }
563 if (pmc >= 4)
564 return -1;
565 pmc_inuse |= 1 << pmc;
566 } else if (pmc <= 4) {
567 /* Direct event */
568 --pmc;
569 if (isbus && (byte & 2) &&
570 (psel == 8 || psel == 0x10 || psel == 0x28))
571 /* add events on higher-numbered bus */
572 mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
573 } else {
574 /* Instructions or run cycles on PMC5/6 */
575 --pmc;
576 }
577 if (isbus && unit == PM_GRS) {
578 bit = psel & 7;
579 grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
580 mmcr1 |= (unsigned long)grsel << grsel_shift[bit];
581 }
582 if (power5p_marked_instr_event(event[i]))
583 mmcra |= MMCRA_SAMPLE_ENABLE;
584 if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1))
585 /* select alternate byte lane */
586 psel |= 0x10;
587 if (pmc <= 3)
588 mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
589 hwc[i] = pmc;
590 }
591
592 /* Return MMCRx values */
593 mmcr[0] = 0;
594 if (pmc_inuse & 1)
595 mmcr[0] = MMCR0_PMC1CE;
596 if (pmc_inuse & 0x3e)
597 mmcr[0] |= MMCR0_PMCjCE;
598 mmcr[1] = mmcr1;
599 mmcr[2] = mmcra;
600 return 0;
601}
602
603static void power5p_disable_pmc(unsigned int pmc, unsigned long mmcr[])
604{
605 if (pmc <= 3)
606 mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
607}
608
609static int power5p_generic_events[] = {
610 [PERF_COUNT_HW_CPU_CYCLES] = 0xf,
611 [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009,
612 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x1c10a8, /* LD_REF_L1 */
613 [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */
614 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */
615 [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */
616};
617
618#define C(x) PERF_COUNT_HW_CACHE_##x
619
620/*
621 * Table of generalized cache-related events.
622 * 0 means not supported, -1 means nonsensical, other values
623 * are event codes.
624 */
625static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
626 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
627 [C(OP_READ)] = { 0x1c10a8, 0x3c1088 },
628 [C(OP_WRITE)] = { 0x2c10a8, 0xc10c3 },
629 [C(OP_PREFETCH)] = { 0xc70e7, -1 },
630 },
631 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
632 [C(OP_READ)] = { 0, 0 },
633 [C(OP_WRITE)] = { -1, -1 },
634 [C(OP_PREFETCH)] = { 0, 0 },
635 },
636 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
637 [C(OP_READ)] = { 0, 0 },
638 [C(OP_WRITE)] = { 0, 0 },
639 [C(OP_PREFETCH)] = { 0xc50c3, 0 },
640 },
641 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
642 [C(OP_READ)] = { 0xc20e4, 0x800c4 },
643 [C(OP_WRITE)] = { -1, -1 },
644 [C(OP_PREFETCH)] = { -1, -1 },
645 },
646 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
647 [C(OP_READ)] = { 0, 0x800c0 },
648 [C(OP_WRITE)] = { -1, -1 },
649 [C(OP_PREFETCH)] = { -1, -1 },
650 },
651 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
652 [C(OP_READ)] = { 0x230e4, 0x230e5 },
653 [C(OP_WRITE)] = { -1, -1 },
654 [C(OP_PREFETCH)] = { -1, -1 },
655 },
656 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
657 [C(OP_READ)] = { -1, -1 },
658 [C(OP_WRITE)] = { -1, -1 },
659 [C(OP_PREFETCH)] = { -1, -1 },
660 },
661};
662
663static struct power_pmu power5p_pmu = {
664 .name = "POWER5+/++",
665 .n_counter = 6,
666 .max_alternatives = MAX_ALT,
667 .add_fields = 0x7000000000055ul,
668 .test_adder = 0x3000040000000ul,
669 .compute_mmcr = power5p_compute_mmcr,
670 .get_constraint = power5p_get_constraint,
671 .get_alternatives = power5p_get_alternatives,
672 .disable_pmc = power5p_disable_pmc,
673 .limited_pmc_event = power5p_limited_pmc_event,
674 .flags = PPMU_LIMITED_PMC5_6,
675 .n_generic = ARRAY_SIZE(power5p_generic_events),
676 .generic_events = power5p_generic_events,
677 .cache_events = &power5p_cache_events,
678};
679
680static int __init init_power5p_pmu(void)
681{
682 if (!cur_cpu_spec->oprofile_cpu_type ||
683 (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+")
684 && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5++")))
685 return -ENODEV;
686
687 return register_power_pmu(&power5p_pmu);
688}
689
690early_initcall(init_power5p_pmu);
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
deleted file mode 100644
index e7f06eb7a861..000000000000
--- a/arch/powerpc/kernel/power5-pmu.c
+++ /dev/null
@@ -1,629 +0,0 @@
1/*
2 * Performance counter support for POWER5 (not POWER5++) processors.
3 *
4 * Copyright 2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/perf_event.h>
13#include <linux/string.h>
14#include <asm/reg.h>
15#include <asm/cputable.h>
16
17/*
18 * Bits in event code for POWER5 (not POWER5++)
19 */
20#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
21#define PM_PMC_MSK 0xf
22#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
23#define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */
24#define PM_UNIT_MSK 0xf
25#define PM_BYTE_SH 12 /* Byte number of event bus to use */
26#define PM_BYTE_MSK 7
27#define PM_GRS_SH 8 /* Storage subsystem mux select */
28#define PM_GRS_MSK 7
29#define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */
30#define PM_PMCSEL_MSK 0x7f
31
32/* Values in PM_UNIT field */
33#define PM_FPU 0
34#define PM_ISU0 1
35#define PM_IFU 2
36#define PM_ISU1 3
37#define PM_IDU 4
38#define PM_ISU0_ALT 6
39#define PM_GRS 7
40#define PM_LSU0 8
41#define PM_LSU1 0xc
42#define PM_LASTUNIT 0xc
43
44/*
45 * Bits in MMCR1 for POWER5
46 */
47#define MMCR1_TTM0SEL_SH 62
48#define MMCR1_TTM1SEL_SH 60
49#define MMCR1_TTM2SEL_SH 58
50#define MMCR1_TTM3SEL_SH 56
51#define MMCR1_TTMSEL_MSK 3
52#define MMCR1_TD_CP_DBG0SEL_SH 54
53#define MMCR1_TD_CP_DBG1SEL_SH 52
54#define MMCR1_TD_CP_DBG2SEL_SH 50
55#define MMCR1_TD_CP_DBG3SEL_SH 48
56#define MMCR1_GRS_L2SEL_SH 46
57#define MMCR1_GRS_L2SEL_MSK 3
58#define MMCR1_GRS_L3SEL_SH 44
59#define MMCR1_GRS_L3SEL_MSK 3
60#define MMCR1_GRS_MCSEL_SH 41
61#define MMCR1_GRS_MCSEL_MSK 7
62#define MMCR1_GRS_FABSEL_SH 39
63#define MMCR1_GRS_FABSEL_MSK 3
64#define MMCR1_PMC1_ADDER_SEL_SH 35
65#define MMCR1_PMC2_ADDER_SEL_SH 34
66#define MMCR1_PMC3_ADDER_SEL_SH 33
67#define MMCR1_PMC4_ADDER_SEL_SH 32
68#define MMCR1_PMC1SEL_SH 25
69#define MMCR1_PMC2SEL_SH 17
70#define MMCR1_PMC3SEL_SH 9
71#define MMCR1_PMC4SEL_SH 1
72#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
73#define MMCR1_PMCSEL_MSK 0x7f
74
75/*
76 * Layout of constraint bits:
77 * 6666555555555544444444443333333333222222222211111111110000000000
78 * 3210987654321098765432109876543210987654321098765432109876543210
79 * <><>[ ><><>< ><> [ >[ >[ >< >< >< >< ><><><><><><>
80 * T0T1 NC G0G1G2 G3 UC PS1PS2 B0 B1 B2 B3 P6P5P4P3P2P1
81 *
82 * T0 - TTM0 constraint
83 * 54-55: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0xc0_0000_0000_0000
84 *
85 * T1 - TTM1 constraint
86 * 52-53: TTM1SEL value (0=IDU, 3=GRS) 0x30_0000_0000_0000
87 *
88 * NC - number of counters
89 * 51: NC error 0x0008_0000_0000_0000
90 * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
91 *
92 * G0..G3 - GRS mux constraints
93 * 46-47: GRS_L2SEL value
94 * 44-45: GRS_L3SEL value
95 * 41-44: GRS_MCSEL value
96 * 39-40: GRS_FABSEL value
97 * Note that these match up with their bit positions in MMCR1
98 *
99 * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
100 * 37: UC3 error 0x20_0000_0000
101 * 36: FPU|IFU|ISU1 events needed 0x10_0000_0000
102 * 35: ISU0 events needed 0x08_0000_0000
103 * 34: IDU|GRS events needed 0x04_0000_0000
104 *
105 * PS1
106 * 33: PS1 error 0x2_0000_0000
107 * 31-32: count of events needing PMC1/2 0x1_8000_0000
108 *
109 * PS2
110 * 30: PS2 error 0x4000_0000
111 * 28-29: count of events needing PMC3/4 0x3000_0000
112 *
113 * B0
114 * 24-27: Byte 0 event source 0x0f00_0000
115 * Encoding as for the event code
116 *
117 * B1, B2, B3
118 * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
119 *
120 * P1..P6
121 * 0-11: Count of events needing PMC1..PMC6
122 */
123
124static const int grsel_shift[8] = {
125 MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
126 MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
127 MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
128};
129
130/* Masks and values for using events from the various units */
131static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
132 [PM_FPU] = { 0xc0002000000000ul, 0x00001000000000ul },
133 [PM_ISU0] = { 0x00002000000000ul, 0x00000800000000ul },
134 [PM_ISU1] = { 0xc0002000000000ul, 0xc0001000000000ul },
135 [PM_IFU] = { 0xc0002000000000ul, 0x80001000000000ul },
136 [PM_IDU] = { 0x30002000000000ul, 0x00000400000000ul },
137 [PM_GRS] = { 0x30002000000000ul, 0x30000400000000ul },
138};
139
140static int power5_get_constraint(u64 event, unsigned long *maskp,
141 unsigned long *valp)
142{
143 int pmc, byte, unit, sh;
144 int bit, fmask;
145 unsigned long mask = 0, value = 0;
146 int grp = -1;
147
148 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
149 if (pmc) {
150 if (pmc > 6)
151 return -1;
152 sh = (pmc - 1) * 2;
153 mask |= 2 << sh;
154 value |= 1 << sh;
155 if (pmc <= 4)
156 grp = (pmc - 1) >> 1;
157 else if (event != 0x500009 && event != 0x600005)
158 return -1;
159 }
160 if (event & PM_BUSEVENT_MSK) {
161 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
162 if (unit > PM_LASTUNIT)
163 return -1;
164 if (unit == PM_ISU0_ALT)
165 unit = PM_ISU0;
166 mask |= unit_cons[unit][0];
167 value |= unit_cons[unit][1];
168 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
169 if (byte >= 4) {
170 if (unit != PM_LSU1)
171 return -1;
172 /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
173 ++unit;
174 byte &= 3;
175 }
176 if (unit == PM_GRS) {
177 bit = event & 7;
178 fmask = (bit == 6)? 7: 3;
179 sh = grsel_shift[bit];
180 mask |= (unsigned long)fmask << sh;
181 value |= (unsigned long)((event >> PM_GRS_SH) & fmask)
182 << sh;
183 }
184 /*
185 * Bus events on bytes 0 and 2 can be counted
186 * on PMC1/2; bytes 1 and 3 on PMC3/4.
187 */
188 if (!pmc)
189 grp = byte & 1;
190 /* Set byte lane select field */
191 mask |= 0xfUL << (24 - 4 * byte);
192 value |= (unsigned long)unit << (24 - 4 * byte);
193 }
194 if (grp == 0) {
195 /* increment PMC1/2 field */
196 mask |= 0x200000000ul;
197 value |= 0x080000000ul;
198 } else if (grp == 1) {
199 /* increment PMC3/4 field */
200 mask |= 0x40000000ul;
201 value |= 0x10000000ul;
202 }
203 if (pmc < 5) {
204 /* need a counter from PMC1-4 set */
205 mask |= 0x8000000000000ul;
206 value |= 0x1000000000000ul;
207 }
208 *maskp = mask;
209 *valp = value;
210 return 0;
211}
212
213#define MAX_ALT 3 /* at most 3 alternatives for any event */
214
215static const unsigned int event_alternatives[][MAX_ALT] = {
216 { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */
217 { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */
218 { 0x100005, 0x600005 }, /* PM_RUN_CYC */
219 { 0x100009, 0x200009, 0x500009 }, /* PM_INST_CMPL */
220 { 0x300009, 0x400009 }, /* PM_INST_DISP */
221};
222
223/*
224 * Scan the alternatives table for a match and return the
225 * index into the alternatives table if found, else -1.
226 */
227static int find_alternative(u64 event)
228{
229 int i, j;
230
231 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
232 if (event < event_alternatives[i][0])
233 break;
234 for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
235 if (event == event_alternatives[i][j])
236 return i;
237 }
238 return -1;
239}
240
241static const unsigned char bytedecode_alternatives[4][4] = {
242 /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 },
243 /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e },
244 /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 },
245 /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e }
246};
247
248/*
249 * Some direct events for decodes of event bus byte 3 have alternative
250 * PMCSEL values on other counters. This returns the alternative
251 * event code for those that do, or -1 otherwise.
252 */
253static s64 find_alternative_bdecode(u64 event)
254{
255 int pmc, altpmc, pp, j;
256
257 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
258 if (pmc == 0 || pmc > 4)
259 return -1;
260 altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */
261 pp = event & PM_PMCSEL_MSK;
262 for (j = 0; j < 4; ++j) {
263 if (bytedecode_alternatives[pmc - 1][j] == pp) {
264 return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
265 (altpmc << PM_PMC_SH) |
266 bytedecode_alternatives[altpmc - 1][j];
267 }
268 }
269 return -1;
270}
271
272static int power5_get_alternatives(u64 event, unsigned int flags, u64 alt[])
273{
274 int i, j, nalt = 1;
275 s64 ae;
276
277 alt[0] = event;
278 nalt = 1;
279 i = find_alternative(event);
280 if (i >= 0) {
281 for (j = 0; j < MAX_ALT; ++j) {
282 ae = event_alternatives[i][j];
283 if (ae && ae != event)
284 alt[nalt++] = ae;
285 }
286 } else {
287 ae = find_alternative_bdecode(event);
288 if (ae > 0)
289 alt[nalt++] = ae;
290 }
291 return nalt;
292}
293
294/*
295 * Map of which direct events on which PMCs are marked instruction events.
296 * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event.
297 * Bit 0 is set if it is marked for all PMCs.
298 * The 0x80 bit indicates a byte decode PMCSEL value.
299 */
300static unsigned char direct_event_is_marked[0x28] = {
301 0, /* 00 */
302 0x1f, /* 01 PM_IOPS_CMPL */
303 0x2, /* 02 PM_MRK_GRP_DISP */
304 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
305 0, /* 04 */
306 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */
307 0x80, /* 06 */
308 0x80, /* 07 */
309 0, 0, 0,/* 08 - 0a */
310 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */
311 0, /* 0c */
312 0x80, /* 0d */
313 0x80, /* 0e */
314 0, /* 0f */
315 0, /* 10 */
316 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */
317 0, /* 12 */
318 0x10, /* 13 PM_MRK_GRP_CMPL */
319 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */
320 0x2, /* 15 PM_MRK_GRP_ISSUED */
321 0x80, /* 16 */
322 0x80, /* 17 */
323 0, 0, 0, 0, 0,
324 0x80, /* 1d */
325 0x80, /* 1e */
326 0, /* 1f */
327 0x80, /* 20 */
328 0x80, /* 21 */
329 0x80, /* 22 */
330 0x80, /* 23 */
331 0x80, /* 24 */
332 0x80, /* 25 */
333 0x80, /* 26 */
334 0x80, /* 27 */
335};
336
337/*
338 * Returns 1 if event counts things relating to marked instructions
339 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
340 */
341static int power5_marked_instr_event(u64 event)
342{
343 int pmc, psel;
344 int bit, byte, unit;
345 u32 mask;
346
347 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
348 psel = event & PM_PMCSEL_MSK;
349 if (pmc >= 5)
350 return 0;
351
352 bit = -1;
353 if (psel < sizeof(direct_event_is_marked)) {
354 if (direct_event_is_marked[psel] & (1 << pmc))
355 return 1;
356 if (direct_event_is_marked[psel] & 0x80)
357 bit = 4;
358 else if (psel == 0x08)
359 bit = pmc - 1;
360 else if (psel == 0x10)
361 bit = 4 - pmc;
362 else if (psel == 0x1b && (pmc == 1 || pmc == 3))
363 bit = 4;
364 } else if ((psel & 0x58) == 0x40)
365 bit = psel & 7;
366
367 if (!(event & PM_BUSEVENT_MSK))
368 return 0;
369
370 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
371 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
372 if (unit == PM_LSU0) {
373 /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */
374 mask = 0x5dff00;
375 } else if (unit == PM_LSU1 && byte >= 4) {
376 byte -= 4;
377 /* byte 4 bits 1,3,5,7, byte 5 bits 6-7, byte 7 bits 0-4,6 */
378 mask = 0x5f00c0aa;
379 } else
380 return 0;
381
382 return (mask >> (byte * 8 + bit)) & 1;
383}
384
385static int power5_compute_mmcr(u64 event[], int n_ev,
386 unsigned int hwc[], unsigned long mmcr[])
387{
388 unsigned long mmcr1 = 0;
389 unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
390 unsigned int pmc, unit, byte, psel;
391 unsigned int ttm, grp;
392 int i, isbus, bit, grsel;
393 unsigned int pmc_inuse = 0;
394 unsigned int pmc_grp_use[2];
395 unsigned char busbyte[4];
396 unsigned char unituse[16];
397 int ttmuse;
398
399 if (n_ev > 6)
400 return -1;
401
402 /* First pass to count resource use */
403 pmc_grp_use[0] = pmc_grp_use[1] = 0;
404 memset(busbyte, 0, sizeof(busbyte));
405 memset(unituse, 0, sizeof(unituse));
406 for (i = 0; i < n_ev; ++i) {
407 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
408 if (pmc) {
409 if (pmc > 6)
410 return -1;
411 if (pmc_inuse & (1 << (pmc - 1)))
412 return -1;
413 pmc_inuse |= 1 << (pmc - 1);
414 /* count 1/2 vs 3/4 use */
415 if (pmc <= 4)
416 ++pmc_grp_use[(pmc - 1) >> 1];
417 }
418 if (event[i] & PM_BUSEVENT_MSK) {
419 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
420 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
421 if (unit > PM_LASTUNIT)
422 return -1;
423 if (unit == PM_ISU0_ALT)
424 unit = PM_ISU0;
425 if (byte >= 4) {
426 if (unit != PM_LSU1)
427 return -1;
428 ++unit;
429 byte &= 3;
430 }
431 if (!pmc)
432 ++pmc_grp_use[byte & 1];
433 if (busbyte[byte] && busbyte[byte] != unit)
434 return -1;
435 busbyte[byte] = unit;
436 unituse[unit] = 1;
437 }
438 }
439 if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2)
440 return -1;
441
442 /*
443 * Assign resources and set multiplexer selects.
444 *
445 * PM_ISU0 can go either on TTM0 or TTM1, but that's the only
446 * choice we have to deal with.
447 */
448 if (unituse[PM_ISU0] &
449 (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
450 unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */
451 unituse[PM_ISU0] = 0;
452 }
453 /* Set TTM[01]SEL fields. */
454 ttmuse = 0;
455 for (i = PM_FPU; i <= PM_ISU1; ++i) {
456 if (!unituse[i])
457 continue;
458 if (ttmuse++)
459 return -1;
460 mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH;
461 }
462 ttmuse = 0;
463 for (; i <= PM_GRS; ++i) {
464 if (!unituse[i])
465 continue;
466 if (ttmuse++)
467 return -1;
468 mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH;
469 }
470 if (ttmuse > 1)
471 return -1;
472
473 /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
474 for (byte = 0; byte < 4; ++byte) {
475 unit = busbyte[byte];
476 if (!unit)
477 continue;
478 if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
479 /* get ISU0 through TTM1 rather than TTM0 */
480 unit = PM_ISU0_ALT;
481 } else if (unit == PM_LSU1 + 1) {
482 /* select lower word of LSU1 for this byte */
483 mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte);
484 }
485 ttm = unit >> 2;
486 mmcr1 |= (unsigned long)ttm
487 << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
488 }
489
490 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
491 for (i = 0; i < n_ev; ++i) {
492 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
493 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
494 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
495 psel = event[i] & PM_PMCSEL_MSK;
496 isbus = event[i] & PM_BUSEVENT_MSK;
497 if (!pmc) {
498 /* Bus event or any-PMC direct event */
499 for (pmc = 0; pmc < 4; ++pmc) {
500 if (pmc_inuse & (1 << pmc))
501 continue;
502 grp = (pmc >> 1) & 1;
503 if (isbus) {
504 if (grp == (byte & 1))
505 break;
506 } else if (pmc_grp_use[grp] < 2) {
507 ++pmc_grp_use[grp];
508 break;
509 }
510 }
511 pmc_inuse |= 1 << pmc;
512 } else if (pmc <= 4) {
513 /* Direct event */
514 --pmc;
515 if ((psel == 8 || psel == 0x10) && isbus && (byte & 2))
516 /* add events on higher-numbered bus */
517 mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
518 } else {
519 /* Instructions or run cycles on PMC5/6 */
520 --pmc;
521 }
522 if (isbus && unit == PM_GRS) {
523 bit = psel & 7;
524 grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
525 mmcr1 |= (unsigned long)grsel << grsel_shift[bit];
526 }
527 if (power5_marked_instr_event(event[i]))
528 mmcra |= MMCRA_SAMPLE_ENABLE;
529 if (pmc <= 3)
530 mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
531 hwc[i] = pmc;
532 }
533
534 /* Return MMCRx values */
535 mmcr[0] = 0;
536 if (pmc_inuse & 1)
537 mmcr[0] = MMCR0_PMC1CE;
538 if (pmc_inuse & 0x3e)
539 mmcr[0] |= MMCR0_PMCjCE;
540 mmcr[1] = mmcr1;
541 mmcr[2] = mmcra;
542 return 0;
543}
544
545static void power5_disable_pmc(unsigned int pmc, unsigned long mmcr[])
546{
547 if (pmc <= 3)
548 mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
549}
550
551static int power5_generic_events[] = {
552 [PERF_COUNT_HW_CPU_CYCLES] = 0xf,
553 [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009,
554 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4c1090, /* LD_REF_L1 */
555 [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */
556 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */
557 [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */
558};
559
560#define C(x) PERF_COUNT_HW_CACHE_##x
561
562/*
563 * Table of generalized cache-related events.
564 * 0 means not supported, -1 means nonsensical, other values
565 * are event codes.
566 */
567static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
568 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
569 [C(OP_READ)] = { 0x4c1090, 0x3c1088 },
570 [C(OP_WRITE)] = { 0x3c1090, 0xc10c3 },
571 [C(OP_PREFETCH)] = { 0xc70e7, 0 },
572 },
573 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
574 [C(OP_READ)] = { 0, 0 },
575 [C(OP_WRITE)] = { -1, -1 },
576 [C(OP_PREFETCH)] = { 0, 0 },
577 },
578 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
579 [C(OP_READ)] = { 0, 0x3c309b },
580 [C(OP_WRITE)] = { 0, 0 },
581 [C(OP_PREFETCH)] = { 0xc50c3, 0 },
582 },
583 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
584 [C(OP_READ)] = { 0x2c4090, 0x800c4 },
585 [C(OP_WRITE)] = { -1, -1 },
586 [C(OP_PREFETCH)] = { -1, -1 },
587 },
588 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
589 [C(OP_READ)] = { 0, 0x800c0 },
590 [C(OP_WRITE)] = { -1, -1 },
591 [C(OP_PREFETCH)] = { -1, -1 },
592 },
593 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
594 [C(OP_READ)] = { 0x230e4, 0x230e5 },
595 [C(OP_WRITE)] = { -1, -1 },
596 [C(OP_PREFETCH)] = { -1, -1 },
597 },
598 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
599 [C(OP_READ)] = { -1, -1 },
600 [C(OP_WRITE)] = { -1, -1 },
601 [C(OP_PREFETCH)] = { -1, -1 },
602 },
603};
604
605static struct power_pmu power5_pmu = {
606 .name = "POWER5",
607 .n_counter = 6,
608 .max_alternatives = MAX_ALT,
609 .add_fields = 0x7000090000555ul,
610 .test_adder = 0x3000490000000ul,
611 .compute_mmcr = power5_compute_mmcr,
612 .get_constraint = power5_get_constraint,
613 .get_alternatives = power5_get_alternatives,
614 .disable_pmc = power5_disable_pmc,
615 .n_generic = ARRAY_SIZE(power5_generic_events),
616 .generic_events = power5_generic_events,
617 .cache_events = &power5_cache_events,
618};
619
620static int __init init_power5_pmu(void)
621{
622 if (!cur_cpu_spec->oprofile_cpu_type ||
623 strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5"))
624 return -ENODEV;
625
626 return register_power_pmu(&power5_pmu);
627}
628
629early_initcall(init_power5_pmu);
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
deleted file mode 100644
index 0bbc901e7efc..000000000000
--- a/arch/powerpc/kernel/power6-pmu.c
+++ /dev/null
@@ -1,552 +0,0 @@
1/*
2 * Performance counter support for POWER6 processors.
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/perf_event.h>
13#include <linux/string.h>
14#include <asm/reg.h>
15#include <asm/cputable.h>
16
17/*
18 * Bits in event code for POWER6
19 */
20#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
21#define PM_PMC_MSK 0x7
22#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
23#define PM_UNIT_SH 16 /* Unit event comes (TTMxSEL encoding) */
24#define PM_UNIT_MSK 0xf
25#define PM_UNIT_MSKS (PM_UNIT_MSK << PM_UNIT_SH)
26#define PM_LLAV 0x8000 /* Load lookahead match value */
27#define PM_LLA 0x4000 /* Load lookahead match enable */
28#define PM_BYTE_SH 12 /* Byte of event bus to use */
29#define PM_BYTE_MSK 3
30#define PM_SUBUNIT_SH 8 /* Subunit event comes from (NEST_SEL enc.) */
31#define PM_SUBUNIT_MSK 7
32#define PM_SUBUNIT_MSKS (PM_SUBUNIT_MSK << PM_SUBUNIT_SH)
33#define PM_PMCSEL_MSK 0xff /* PMCxSEL value */
34#define PM_BUSEVENT_MSK 0xf3700
35
36/*
37 * Bits in MMCR1 for POWER6
38 */
39#define MMCR1_TTM0SEL_SH 60
40#define MMCR1_TTMSEL_SH(n) (MMCR1_TTM0SEL_SH - (n) * 4)
41#define MMCR1_TTMSEL_MSK 0xf
42#define MMCR1_TTMSEL(m, n) (((m) >> MMCR1_TTMSEL_SH(n)) & MMCR1_TTMSEL_MSK)
43#define MMCR1_NESTSEL_SH 45
44#define MMCR1_NESTSEL_MSK 0x7
45#define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK)
46#define MMCR1_PMC1_LLA (1ul << 44)
47#define MMCR1_PMC1_LLA_VALUE (1ul << 39)
48#define MMCR1_PMC1_ADDR_SEL (1ul << 35)
49#define MMCR1_PMC1SEL_SH 24
50#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
51#define MMCR1_PMCSEL_MSK 0xff
52
53/*
54 * Map of which direct events on which PMCs are marked instruction events.
55 * Indexed by PMCSEL value >> 1.
56 * Bottom 4 bits are a map of which PMCs are interesting,
57 * top 4 bits say what sort of event:
58 * 0 = direct marked event,
59 * 1 = byte decode event,
60 * 4 = add/and event (PMC1 -> bits 0 & 4),
61 * 5 = add/and event (PMC1 -> bits 1 & 5),
62 * 6 = add/and event (PMC1 -> bits 2 & 6),
63 * 7 = add/and event (PMC1 -> bits 3 & 7).
64 */
65static unsigned char direct_event_is_marked[0x60 >> 1] = {
66 0, /* 00 */
67 0, /* 02 */
68 0, /* 04 */
69 0x07, /* 06 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
70 0x04, /* 08 PM_MRK_DFU_FIN */
71 0x06, /* 0a PM_MRK_IFU_FIN, PM_MRK_INST_FIN */
72 0, /* 0c */
73 0, /* 0e */
74 0x02, /* 10 PM_MRK_INST_DISP */
75 0x08, /* 12 PM_MRK_LSU_DERAT_MISS */
76 0, /* 14 */
77 0, /* 16 */
78 0x0c, /* 18 PM_THRESH_TIMEO, PM_MRK_INST_FIN */
79 0x0f, /* 1a PM_MRK_INST_DISP, PM_MRK_{FXU,FPU,LSU}_FIN */
80 0x01, /* 1c PM_MRK_INST_ISSUED */
81 0, /* 1e */
82 0, /* 20 */
83 0, /* 22 */
84 0, /* 24 */
85 0, /* 26 */
86 0x15, /* 28 PM_MRK_DATA_FROM_L2MISS, PM_MRK_DATA_FROM_L3MISS */
87 0, /* 2a */
88 0, /* 2c */
89 0, /* 2e */
90 0x4f, /* 30 */
91 0x7f, /* 32 */
92 0x4f, /* 34 */
93 0x5f, /* 36 */
94 0x6f, /* 38 */
95 0x4f, /* 3a */
96 0, /* 3c */
97 0x08, /* 3e PM_MRK_INST_TIMEO */
98 0x1f, /* 40 */
99 0x1f, /* 42 */
100 0x1f, /* 44 */
101 0x1f, /* 46 */
102 0x1f, /* 48 */
103 0x1f, /* 4a */
104 0x1f, /* 4c */
105 0x1f, /* 4e */
106 0, /* 50 */
107 0x05, /* 52 PM_MRK_BR_TAKEN, PM_MRK_BR_MPRED */
108 0x1c, /* 54 PM_MRK_PTEG_FROM_L3MISS, PM_MRK_PTEG_FROM_L2MISS */
109 0x02, /* 56 PM_MRK_LD_MISS_L1 */
110 0, /* 58 */
111 0, /* 5a */
112 0, /* 5c */
113 0, /* 5e */
114};
115
116/*
117 * Masks showing for each unit which bits are marked events.
118 * These masks are in LE order, i.e. 0x00000001 is byte 0, bit 0.
119 */
120static u32 marked_bus_events[16] = {
121 0x01000000, /* direct events set 1: byte 3 bit 0 */
122 0x00010000, /* direct events set 2: byte 2 bit 0 */
123 0, 0, 0, 0, /* IDU, IFU, nest: nothing */
124 0x00000088, /* VMX set 1: byte 0 bits 3, 7 */
125 0x000000c0, /* VMX set 2: byte 0 bits 4-7 */
126 0x04010000, /* LSU set 1: byte 2 bit 0, byte 3 bit 2 */
127 0xff010000u, /* LSU set 2: byte 2 bit 0, all of byte 3 */
128 0, /* LSU set 3 */
129 0x00000010, /* VMX set 3: byte 0 bit 4 */
130 0, /* BFP set 1 */
131 0x00000022, /* BFP set 2: byte 0 bits 1, 5 */
132 0, 0
133};
134
135/*
136 * Returns 1 if event counts things relating to marked instructions
137 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
138 */
139static int power6_marked_instr_event(u64 event)
140{
141 int pmc, psel, ptype;
142 int bit, byte, unit;
143 u32 mask;
144
145 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
146 psel = (event & PM_PMCSEL_MSK) >> 1; /* drop edge/level bit */
147 if (pmc >= 5)
148 return 0;
149
150 bit = -1;
151 if (psel < sizeof(direct_event_is_marked)) {
152 ptype = direct_event_is_marked[psel];
153 if (pmc == 0 || !(ptype & (1 << (pmc - 1))))
154 return 0;
155 ptype >>= 4;
156 if (ptype == 0)
157 return 1;
158 if (ptype == 1)
159 bit = 0;
160 else
161 bit = ptype ^ (pmc - 1);
162 } else if ((psel & 0x48) == 0x40)
163 bit = psel & 7;
164
165 if (!(event & PM_BUSEVENT_MSK) || bit == -1)
166 return 0;
167
168 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
169 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
170 mask = marked_bus_events[unit];
171 return (mask >> (byte * 8 + bit)) & 1;
172}
173
174/*
175 * Assign PMC numbers and compute MMCR1 value for a set of events
176 */
177static int p6_compute_mmcr(u64 event[], int n_ev,
178 unsigned int hwc[], unsigned long mmcr[])
179{
180 unsigned long mmcr1 = 0;
181 unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
182 int i;
183 unsigned int pmc, ev, b, u, s, psel;
184 unsigned int ttmset = 0;
185 unsigned int pmc_inuse = 0;
186
187 if (n_ev > 6)
188 return -1;
189 for (i = 0; i < n_ev; ++i) {
190 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
191 if (pmc) {
192 if (pmc_inuse & (1 << (pmc - 1)))
193 return -1; /* collision! */
194 pmc_inuse |= 1 << (pmc - 1);
195 }
196 }
197 for (i = 0; i < n_ev; ++i) {
198 ev = event[i];
199 pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK;
200 if (pmc) {
201 --pmc;
202 } else {
203 /* can go on any PMC; find a free one */
204 for (pmc = 0; pmc < 4; ++pmc)
205 if (!(pmc_inuse & (1 << pmc)))
206 break;
207 if (pmc >= 4)
208 return -1;
209 pmc_inuse |= 1 << pmc;
210 }
211 hwc[i] = pmc;
212 psel = ev & PM_PMCSEL_MSK;
213 if (ev & PM_BUSEVENT_MSK) {
214 /* this event uses the event bus */
215 b = (ev >> PM_BYTE_SH) & PM_BYTE_MSK;
216 u = (ev >> PM_UNIT_SH) & PM_UNIT_MSK;
217 /* check for conflict on this byte of event bus */
218 if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u)
219 return -1;
220 mmcr1 |= (unsigned long)u << MMCR1_TTMSEL_SH(b);
221 ttmset |= 1 << b;
222 if (u == 5) {
223 /* Nest events have a further mux */
224 s = (ev >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
225 if ((ttmset & 0x10) &&
226 MMCR1_NESTSEL(mmcr1) != s)
227 return -1;
228 ttmset |= 0x10;
229 mmcr1 |= (unsigned long)s << MMCR1_NESTSEL_SH;
230 }
231 if (0x30 <= psel && psel <= 0x3d) {
232 /* these need the PMCx_ADDR_SEL bits */
233 if (b >= 2)
234 mmcr1 |= MMCR1_PMC1_ADDR_SEL >> pmc;
235 }
236 /* bus select values are different for PMC3/4 */
237 if (pmc >= 2 && (psel & 0x90) == 0x80)
238 psel ^= 0x20;
239 }
240 if (ev & PM_LLA) {
241 mmcr1 |= MMCR1_PMC1_LLA >> pmc;
242 if (ev & PM_LLAV)
243 mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc;
244 }
245 if (power6_marked_instr_event(event[i]))
246 mmcra |= MMCRA_SAMPLE_ENABLE;
247 if (pmc < 4)
248 mmcr1 |= (unsigned long)psel << MMCR1_PMCSEL_SH(pmc);
249 }
250 mmcr[0] = 0;
251 if (pmc_inuse & 1)
252 mmcr[0] = MMCR0_PMC1CE;
253 if (pmc_inuse & 0xe)
254 mmcr[0] |= MMCR0_PMCjCE;
255 mmcr[1] = mmcr1;
256 mmcr[2] = mmcra;
257 return 0;
258}
259
260/*
261 * Layout of constraint bits:
262 *
263 * 0-1 add field: number of uses of PMC1 (max 1)
264 * 2-3, 4-5, 6-7, 8-9, 10-11: ditto for PMC2, 3, 4, 5, 6
265 * 12-15 add field: number of uses of PMC1-4 (max 4)
266 * 16-19 select field: unit on byte 0 of event bus
267 * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3
268 * 32-34 select field: nest (subunit) event selector
269 */
270static int p6_get_constraint(u64 event, unsigned long *maskp,
271 unsigned long *valp)
272{
273 int pmc, byte, sh, subunit;
274 unsigned long mask = 0, value = 0;
275
276 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
277 if (pmc) {
278 if (pmc > 4 && !(event == 0x500009 || event == 0x600005))
279 return -1;
280 sh = (pmc - 1) * 2;
281 mask |= 2 << sh;
282 value |= 1 << sh;
283 }
284 if (event & PM_BUSEVENT_MSK) {
285 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
286 sh = byte * 4 + (16 - PM_UNIT_SH);
287 mask |= PM_UNIT_MSKS << sh;
288 value |= (unsigned long)(event & PM_UNIT_MSKS) << sh;
289 if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) {
290 subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
291 mask |= (unsigned long)PM_SUBUNIT_MSK << 32;
292 value |= (unsigned long)subunit << 32;
293 }
294 }
295 if (pmc <= 4) {
296 mask |= 0x8000; /* add field for count of PMC1-4 uses */
297 value |= 0x1000;
298 }
299 *maskp = mask;
300 *valp = value;
301 return 0;
302}
303
304static int p6_limited_pmc_event(u64 event)
305{
306 int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
307
308 return pmc == 5 || pmc == 6;
309}
310
311#define MAX_ALT 4 /* at most 4 alternatives for any event */
312
313static const unsigned int event_alternatives[][MAX_ALT] = {
314 { 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */
315 { 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */
316 { 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */
317 { 0x10000a, 0x2000f4, 0x600005 }, /* PM_RUN_CYC */
318 { 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */
319 { 0x10000e, 0x400010 }, /* PM_PURR */
320 { 0x100010, 0x4000f8 }, /* PM_FLUSH */
321 { 0x10001a, 0x200010 }, /* PM_MRK_INST_DISP */
322 { 0x100026, 0x3000f8 }, /* PM_TB_BIT_TRANS */
323 { 0x100054, 0x2000f0 }, /* PM_ST_FIN */
324 { 0x100056, 0x2000fc }, /* PM_L1_ICACHE_MISS */
325 { 0x1000f0, 0x40000a }, /* PM_INST_IMC_MATCH_CMPL */
326 { 0x1000f8, 0x200008 }, /* PM_GCT_EMPTY_CYC */
327 { 0x1000fc, 0x400006 }, /* PM_LSU_DERAT_MISS_CYC */
328 { 0x20000e, 0x400007 }, /* PM_LSU_DERAT_MISS */
329 { 0x200012, 0x300012 }, /* PM_INST_DISP */
330 { 0x2000f2, 0x3000f2 }, /* PM_INST_DISP */
331 { 0x2000f8, 0x300010 }, /* PM_EXT_INT */
332 { 0x2000fe, 0x300056 }, /* PM_DATA_FROM_L2MISS */
333 { 0x2d0030, 0x30001a }, /* PM_MRK_FPU_FIN */
334 { 0x30000a, 0x400018 }, /* PM_MRK_INST_FIN */
335 { 0x3000f6, 0x40000e }, /* PM_L1_DCACHE_RELOAD_VALID */
336 { 0x3000fe, 0x400056 }, /* PM_DATA_FROM_L3MISS */
337};
338
339/*
340 * This could be made more efficient with a binary search on
341 * a presorted list, if necessary
342 */
343static int find_alternatives_list(u64 event)
344{
345 int i, j;
346 unsigned int alt;
347
348 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
349 if (event < event_alternatives[i][0])
350 return -1;
351 for (j = 0; j < MAX_ALT; ++j) {
352 alt = event_alternatives[i][j];
353 if (!alt || event < alt)
354 break;
355 if (event == alt)
356 return i;
357 }
358 }
359 return -1;
360}
361
362static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[])
363{
364 int i, j, nlim;
365 unsigned int psel, pmc;
366 unsigned int nalt = 1;
367 u64 aevent;
368
369 alt[0] = event;
370 nlim = p6_limited_pmc_event(event);
371
372 /* check the alternatives table */
373 i = find_alternatives_list(event);
374 if (i >= 0) {
375 /* copy out alternatives from list */
376 for (j = 0; j < MAX_ALT; ++j) {
377 aevent = event_alternatives[i][j];
378 if (!aevent)
379 break;
380 if (aevent != event)
381 alt[nalt++] = aevent;
382 nlim += p6_limited_pmc_event(aevent);
383 }
384
385 } else {
386 /* Check for alternative ways of computing sum events */
387 /* PMCSEL 0x32 counter N == PMCSEL 0x34 counter 5-N */
388 psel = event & (PM_PMCSEL_MSK & ~1); /* ignore edge bit */
389 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
390 if (pmc && (psel == 0x32 || psel == 0x34))
391 alt[nalt++] = ((event ^ 0x6) & ~PM_PMC_MSKS) |
392 ((5 - pmc) << PM_PMC_SH);
393
394 /* PMCSEL 0x38 counter N == PMCSEL 0x3a counter N+/-2 */
395 if (pmc && (psel == 0x38 || psel == 0x3a))
396 alt[nalt++] = ((event ^ 0x2) & ~PM_PMC_MSKS) |
397 ((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH);
398 }
399
400 if (flags & PPMU_ONLY_COUNT_RUN) {
401 /*
402 * We're only counting in RUN state,
403 * so PM_CYC is equivalent to PM_RUN_CYC,
404 * PM_INST_CMPL === PM_RUN_INST_CMPL, PM_PURR === PM_RUN_PURR.
405 * This doesn't include alternatives that don't provide
406 * any extra flexibility in assigning PMCs (e.g.
407 * 0x10000a for PM_RUN_CYC vs. 0x1e for PM_CYC).
408 * Note that even with these additional alternatives
409 * we never end up with more than 4 alternatives for any event.
410 */
411 j = nalt;
412 for (i = 0; i < nalt; ++i) {
413 switch (alt[i]) {
414 case 0x1e: /* PM_CYC */
415 alt[j++] = 0x600005; /* PM_RUN_CYC */
416 ++nlim;
417 break;
418 case 0x10000a: /* PM_RUN_CYC */
419 alt[j++] = 0x1e; /* PM_CYC */
420 break;
421 case 2: /* PM_INST_CMPL */
422 alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */
423 ++nlim;
424 break;
425 case 0x500009: /* PM_RUN_INST_CMPL */
426 alt[j++] = 2; /* PM_INST_CMPL */
427 break;
428 case 0x10000e: /* PM_PURR */
429 alt[j++] = 0x4000f4; /* PM_RUN_PURR */
430 break;
431 case 0x4000f4: /* PM_RUN_PURR */
432 alt[j++] = 0x10000e; /* PM_PURR */
433 break;
434 }
435 }
436 nalt = j;
437 }
438
439 if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
440 /* remove the limited PMC events */
441 j = 0;
442 for (i = 0; i < nalt; ++i) {
443 if (!p6_limited_pmc_event(alt[i])) {
444 alt[j] = alt[i];
445 ++j;
446 }
447 }
448 nalt = j;
449 } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
450 /* remove all but the limited PMC events */
451 j = 0;
452 for (i = 0; i < nalt; ++i) {
453 if (p6_limited_pmc_event(alt[i])) {
454 alt[j] = alt[i];
455 ++j;
456 }
457 }
458 nalt = j;
459 }
460
461 return nalt;
462}
463
464static void p6_disable_pmc(unsigned int pmc, unsigned long mmcr[])
465{
466 /* Set PMCxSEL to 0 to disable PMCx */
467 if (pmc <= 3)
468 mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
469}
470
471static int power6_generic_events[] = {
472 [PERF_COUNT_HW_CPU_CYCLES] = 0x1e,
473 [PERF_COUNT_HW_INSTRUCTIONS] = 2,
474 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x280030, /* LD_REF_L1 */
475 [PERF_COUNT_HW_CACHE_MISSES] = 0x30000c, /* LD_MISS_L1 */
476 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x410a0, /* BR_PRED */
477 [PERF_COUNT_HW_BRANCH_MISSES] = 0x400052, /* BR_MPRED */
478};
479
480#define C(x) PERF_COUNT_HW_CACHE_##x
481
482/*
483 * Table of generalized cache-related events.
484 * 0 means not supported, -1 means nonsensical, other values
485 * are event codes.
486 * The "DTLB" and "ITLB" events relate to the DERAT and IERAT.
487 */
488static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
489 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
490 [C(OP_READ)] = { 0x280030, 0x80080 },
491 [C(OP_WRITE)] = { 0x180032, 0x80088 },
492 [C(OP_PREFETCH)] = { 0x810a4, 0 },
493 },
494 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
495 [C(OP_READ)] = { 0, 0x100056 },
496 [C(OP_WRITE)] = { -1, -1 },
497 [C(OP_PREFETCH)] = { 0x4008c, 0 },
498 },
499 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
500 [C(OP_READ)] = { 0x150730, 0x250532 },
501 [C(OP_WRITE)] = { 0x250432, 0x150432 },
502 [C(OP_PREFETCH)] = { 0x810a6, 0 },
503 },
504 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
505 [C(OP_READ)] = { 0, 0x20000e },
506 [C(OP_WRITE)] = { -1, -1 },
507 [C(OP_PREFETCH)] = { -1, -1 },
508 },
509 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
510 [C(OP_READ)] = { 0, 0x420ce },
511 [C(OP_WRITE)] = { -1, -1 },
512 [C(OP_PREFETCH)] = { -1, -1 },
513 },
514 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
515 [C(OP_READ)] = { 0x430e6, 0x400052 },
516 [C(OP_WRITE)] = { -1, -1 },
517 [C(OP_PREFETCH)] = { -1, -1 },
518 },
519 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
520 [C(OP_READ)] = { -1, -1 },
521 [C(OP_WRITE)] = { -1, -1 },
522 [C(OP_PREFETCH)] = { -1, -1 },
523 },
524};
525
526static struct power_pmu power6_pmu = {
527 .name = "POWER6",
528 .n_counter = 6,
529 .max_alternatives = MAX_ALT,
530 .add_fields = 0x1555,
531 .test_adder = 0x3000,
532 .compute_mmcr = p6_compute_mmcr,
533 .get_constraint = p6_get_constraint,
534 .get_alternatives = p6_get_alternatives,
535 .disable_pmc = p6_disable_pmc,
536 .limited_pmc_event = p6_limited_pmc_event,
537 .flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR,
538 .n_generic = ARRAY_SIZE(power6_generic_events),
539 .generic_events = power6_generic_events,
540 .cache_events = &power6_cache_events,
541};
542
543static int __init init_power6_pmu(void)
544{
545 if (!cur_cpu_spec->oprofile_cpu_type ||
546 strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6"))
547 return -ENODEV;
548
549 return register_power_pmu(&power6_pmu);
550}
551
552early_initcall(init_power6_pmu);
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
deleted file mode 100644
index 1251e4d7e262..000000000000
--- a/arch/powerpc/kernel/power7-pmu.c
+++ /dev/null
@@ -1,379 +0,0 @@
1/*
2 * Performance counter support for POWER7 processors.
3 *
4 * Copyright 2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/perf_event.h>
13#include <linux/string.h>
14#include <asm/reg.h>
15#include <asm/cputable.h>
16
17/*
18 * Bits in event code for POWER7
19 */
20#define PM_PMC_SH 16 /* PMC number (1-based) for direct events */
21#define PM_PMC_MSK 0xf
22#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
23#define PM_UNIT_SH 12 /* TTMMUX number and setting - unit select */
24#define PM_UNIT_MSK 0xf
25#define PM_COMBINE_SH 11 /* Combined event bit */
26#define PM_COMBINE_MSK 1
27#define PM_COMBINE_MSKS 0x800
28#define PM_L2SEL_SH 8 /* L2 event select */
29#define PM_L2SEL_MSK 7
30#define PM_PMCSEL_MSK 0xff
31
32/*
33 * Bits in MMCR1 for POWER7
34 */
35#define MMCR1_TTM0SEL_SH 60
36#define MMCR1_TTM1SEL_SH 56
37#define MMCR1_TTM2SEL_SH 52
38#define MMCR1_TTM3SEL_SH 48
39#define MMCR1_TTMSEL_MSK 0xf
40#define MMCR1_L2SEL_SH 45
41#define MMCR1_L2SEL_MSK 7
42#define MMCR1_PMC1_COMBINE_SH 35
43#define MMCR1_PMC2_COMBINE_SH 34
44#define MMCR1_PMC3_COMBINE_SH 33
45#define MMCR1_PMC4_COMBINE_SH 32
46#define MMCR1_PMC1SEL_SH 24
47#define MMCR1_PMC2SEL_SH 16
48#define MMCR1_PMC3SEL_SH 8
49#define MMCR1_PMC4SEL_SH 0
50#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
51#define MMCR1_PMCSEL_MSK 0xff
52
53/*
54 * Layout of constraint bits:
55 * 6666555555555544444444443333333333222222222211111111110000000000
56 * 3210987654321098765432109876543210987654321098765432109876543210
57 * [ ><><><><><><>
58 * NC P6P5P4P3P2P1
59 *
60 * NC - number of counters
61 * 15: NC error 0x8000
62 * 12-14: number of events needing PMC1-4 0x7000
63 *
64 * P6
65 * 11: P6 error 0x800
66 * 10-11: Count of events needing PMC6
67 *
68 * P1..P5
69 * 0-9: Count of events needing PMC1..PMC5
70 */
71
72static int power7_get_constraint(u64 event, unsigned long *maskp,
73 unsigned long *valp)
74{
75 int pmc, sh;
76 unsigned long mask = 0, value = 0;
77
78 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
79 if (pmc) {
80 if (pmc > 6)
81 return -1;
82 sh = (pmc - 1) * 2;
83 mask |= 2 << sh;
84 value |= 1 << sh;
85 if (pmc >= 5 && !(event == 0x500fa || event == 0x600f4))
86 return -1;
87 }
88 if (pmc < 5) {
89 /* need a counter from PMC1-4 set */
90 mask |= 0x8000;
91 value |= 0x1000;
92 }
93 *maskp = mask;
94 *valp = value;
95 return 0;
96}
97
98#define MAX_ALT 2 /* at most 2 alternatives for any event */
99
100static const unsigned int event_alternatives[][MAX_ALT] = {
101 { 0x200f2, 0x300f2 }, /* PM_INST_DISP */
102 { 0x200f4, 0x600f4 }, /* PM_RUN_CYC */
103 { 0x400fa, 0x500fa }, /* PM_RUN_INST_CMPL */
104};
105
106/*
107 * Scan the alternatives table for a match and return the
108 * index into the alternatives table if found, else -1.
109 */
110static int find_alternative(u64 event)
111{
112 int i, j;
113
114 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
115 if (event < event_alternatives[i][0])
116 break;
117 for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
118 if (event == event_alternatives[i][j])
119 return i;
120 }
121 return -1;
122}
123
124static s64 find_alternative_decode(u64 event)
125{
126 int pmc, psel;
127
128 /* this only handles the 4x decode events */
129 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
130 psel = event & PM_PMCSEL_MSK;
131 if ((pmc == 2 || pmc == 4) && (psel & ~7) == 0x40)
132 return event - (1 << PM_PMC_SH) + 8;
133 if ((pmc == 1 || pmc == 3) && (psel & ~7) == 0x48)
134 return event + (1 << PM_PMC_SH) - 8;
135 return -1;
136}
137
138static int power7_get_alternatives(u64 event, unsigned int flags, u64 alt[])
139{
140 int i, j, nalt = 1;
141 s64 ae;
142
143 alt[0] = event;
144 nalt = 1;
145 i = find_alternative(event);
146 if (i >= 0) {
147 for (j = 0; j < MAX_ALT; ++j) {
148 ae = event_alternatives[i][j];
149 if (ae && ae != event)
150 alt[nalt++] = ae;
151 }
152 } else {
153 ae = find_alternative_decode(event);
154 if (ae > 0)
155 alt[nalt++] = ae;
156 }
157
158 if (flags & PPMU_ONLY_COUNT_RUN) {
159 /*
160 * We're only counting in RUN state,
161 * so PM_CYC is equivalent to PM_RUN_CYC
162 * and PM_INST_CMPL === PM_RUN_INST_CMPL.
163 * This doesn't include alternatives that don't provide
164 * any extra flexibility in assigning PMCs.
165 */
166 j = nalt;
167 for (i = 0; i < nalt; ++i) {
168 switch (alt[i]) {
169 case 0x1e: /* PM_CYC */
170 alt[j++] = 0x600f4; /* PM_RUN_CYC */
171 break;
172 case 0x600f4: /* PM_RUN_CYC */
173 alt[j++] = 0x1e;
174 break;
175 case 0x2: /* PM_PPC_CMPL */
176 alt[j++] = 0x500fa; /* PM_RUN_INST_CMPL */
177 break;
178 case 0x500fa: /* PM_RUN_INST_CMPL */
179 alt[j++] = 0x2; /* PM_PPC_CMPL */
180 break;
181 }
182 }
183 nalt = j;
184 }
185
186 return nalt;
187}
188
189/*
190 * Returns 1 if event counts things relating to marked instructions
191 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
192 */
193static int power7_marked_instr_event(u64 event)
194{
195 int pmc, psel;
196 int unit;
197
198 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
199 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
200 psel = event & PM_PMCSEL_MSK & ~1; /* trim off edge/level bit */
201 if (pmc >= 5)
202 return 0;
203
204 switch (psel >> 4) {
205 case 2:
206 return pmc == 2 || pmc == 4;
207 case 3:
208 if (psel == 0x3c)
209 return pmc == 1;
210 if (psel == 0x3e)
211 return pmc != 2;
212 return 1;
213 case 4:
214 case 5:
215 return unit == 0xd;
216 case 6:
217 if (psel == 0x64)
218 return pmc >= 3;
219 case 8:
220 return unit == 0xd;
221 }
222 return 0;
223}
224
225static int power7_compute_mmcr(u64 event[], int n_ev,
226 unsigned int hwc[], unsigned long mmcr[])
227{
228 unsigned long mmcr1 = 0;
229 unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
230 unsigned int pmc, unit, combine, l2sel, psel;
231 unsigned int pmc_inuse = 0;
232 int i;
233
234 /* First pass to count resource use */
235 for (i = 0; i < n_ev; ++i) {
236 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
237 if (pmc) {
238 if (pmc > 6)
239 return -1;
240 if (pmc_inuse & (1 << (pmc - 1)))
241 return -1;
242 pmc_inuse |= 1 << (pmc - 1);
243 }
244 }
245
246 /* Second pass: assign PMCs, set all MMCR1 fields */
247 for (i = 0; i < n_ev; ++i) {
248 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
249 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
250 combine = (event[i] >> PM_COMBINE_SH) & PM_COMBINE_MSK;
251 l2sel = (event[i] >> PM_L2SEL_SH) & PM_L2SEL_MSK;
252 psel = event[i] & PM_PMCSEL_MSK;
253 if (!pmc) {
254 /* Bus event or any-PMC direct event */
255 for (pmc = 0; pmc < 4; ++pmc) {
256 if (!(pmc_inuse & (1 << pmc)))
257 break;
258 }
259 if (pmc >= 4)
260 return -1;
261 pmc_inuse |= 1 << pmc;
262 } else {
263 /* Direct or decoded event */
264 --pmc;
265 }
266 if (pmc <= 3) {
267 mmcr1 |= (unsigned long) unit
268 << (MMCR1_TTM0SEL_SH - 4 * pmc);
269 mmcr1 |= (unsigned long) combine
270 << (MMCR1_PMC1_COMBINE_SH - pmc);
271 mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
272 if (unit == 6) /* L2 events */
273 mmcr1 |= (unsigned long) l2sel
274 << MMCR1_L2SEL_SH;
275 }
276 if (power7_marked_instr_event(event[i]))
277 mmcra |= MMCRA_SAMPLE_ENABLE;
278 hwc[i] = pmc;
279 }
280
281 /* Return MMCRx values */
282 mmcr[0] = 0;
283 if (pmc_inuse & 1)
284 mmcr[0] = MMCR0_PMC1CE;
285 if (pmc_inuse & 0x3e)
286 mmcr[0] |= MMCR0_PMCjCE;
287 mmcr[1] = mmcr1;
288 mmcr[2] = mmcra;
289 return 0;
290}
291
292static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[])
293{
294 if (pmc <= 3)
295 mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
296}
297
298static int power7_generic_events[] = {
299 [PERF_COUNT_HW_CPU_CYCLES] = 0x1e,
300 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x100f8, /* GCT_NOSLOT_CYC */
301 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x4000a, /* CMPLU_STALL */
302 [PERF_COUNT_HW_INSTRUCTIONS] = 2,
303 [PERF_COUNT_HW_CACHE_REFERENCES] = 0xc880, /* LD_REF_L1_LSU*/
304 [PERF_COUNT_HW_CACHE_MISSES] = 0x400f0, /* LD_MISS_L1 */
305 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x10068, /* BRU_FIN */
306 [PERF_COUNT_HW_BRANCH_MISSES] = 0x400f6, /* BR_MPRED */
307};
308
309#define C(x) PERF_COUNT_HW_CACHE_##x
310
311/*
312 * Table of generalized cache-related events.
313 * 0 means not supported, -1 means nonsensical, other values
314 * are event codes.
315 */
316static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
317 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
318 [C(OP_READ)] = { 0xc880, 0x400f0 },
319 [C(OP_WRITE)] = { 0, 0x300f0 },
320 [C(OP_PREFETCH)] = { 0xd8b8, 0 },
321 },
322 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
323 [C(OP_READ)] = { 0, 0x200fc },
324 [C(OP_WRITE)] = { -1, -1 },
325 [C(OP_PREFETCH)] = { 0x408a, 0 },
326 },
327 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
328 [C(OP_READ)] = { 0x16080, 0x26080 },
329 [C(OP_WRITE)] = { 0x16082, 0x26082 },
330 [C(OP_PREFETCH)] = { 0, 0 },
331 },
332 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
333 [C(OP_READ)] = { 0, 0x300fc },
334 [C(OP_WRITE)] = { -1, -1 },
335 [C(OP_PREFETCH)] = { -1, -1 },
336 },
337 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
338 [C(OP_READ)] = { 0, 0x400fc },
339 [C(OP_WRITE)] = { -1, -1 },
340 [C(OP_PREFETCH)] = { -1, -1 },
341 },
342 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
343 [C(OP_READ)] = { 0x10068, 0x400f6 },
344 [C(OP_WRITE)] = { -1, -1 },
345 [C(OP_PREFETCH)] = { -1, -1 },
346 },
347 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
348 [C(OP_READ)] = { -1, -1 },
349 [C(OP_WRITE)] = { -1, -1 },
350 [C(OP_PREFETCH)] = { -1, -1 },
351 },
352};
353
354static struct power_pmu power7_pmu = {
355 .name = "POWER7",
356 .n_counter = 6,
357 .max_alternatives = MAX_ALT + 1,
358 .add_fields = 0x1555ul,
359 .test_adder = 0x3000ul,
360 .compute_mmcr = power7_compute_mmcr,
361 .get_constraint = power7_get_constraint,
362 .get_alternatives = power7_get_alternatives,
363 .disable_pmc = power7_disable_pmc,
364 .flags = PPMU_ALT_SIPR,
365 .n_generic = ARRAY_SIZE(power7_generic_events),
366 .generic_events = power7_generic_events,
367 .cache_events = &power7_cache_events,
368};
369
370static int __init init_power7_pmu(void)
371{
372 if (!cur_cpu_spec->oprofile_cpu_type ||
373 strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7"))
374 return -ENODEV;
375
376 return register_power_pmu(&power7_pmu);
377}
378
379early_initcall(init_power7_pmu);
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
deleted file mode 100644
index 8c2190206964..000000000000
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ /dev/null
@@ -1,502 +0,0 @@
1/*
2 * Performance counter support for PPC970-family processors.
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/string.h>
12#include <linux/perf_event.h>
13#include <asm/reg.h>
14#include <asm/cputable.h>
15
16/*
17 * Bits in event code for PPC970
18 */
19#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */
20#define PM_PMC_MSK 0xf
21#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */
22#define PM_UNIT_MSK 0xf
23#define PM_SPCSEL_SH 6
24#define PM_SPCSEL_MSK 3
25#define PM_BYTE_SH 4 /* Byte number of event bus to use */
26#define PM_BYTE_MSK 3
27#define PM_PMCSEL_MSK 0xf
28
29/* Values in PM_UNIT field */
30#define PM_NONE 0
31#define PM_FPU 1
32#define PM_VPU 2
33#define PM_ISU 3
34#define PM_IFU 4
35#define PM_IDU 5
36#define PM_STS 6
37#define PM_LSU0 7
38#define PM_LSU1U 8
39#define PM_LSU1L 9
40#define PM_LASTUNIT 9
41
42/*
43 * Bits in MMCR0 for PPC970
44 */
45#define MMCR0_PMC1SEL_SH 8
46#define MMCR0_PMC2SEL_SH 1
47#define MMCR_PMCSEL_MSK 0x1f
48
49/*
50 * Bits in MMCR1 for PPC970
51 */
52#define MMCR1_TTM0SEL_SH 62
53#define MMCR1_TTM1SEL_SH 59
54#define MMCR1_TTM3SEL_SH 53
55#define MMCR1_TTMSEL_MSK 3
56#define MMCR1_TD_CP_DBG0SEL_SH 50
57#define MMCR1_TD_CP_DBG1SEL_SH 48
58#define MMCR1_TD_CP_DBG2SEL_SH 46
59#define MMCR1_TD_CP_DBG3SEL_SH 44
60#define MMCR1_PMC1_ADDER_SEL_SH 39
61#define MMCR1_PMC2_ADDER_SEL_SH 38
62#define MMCR1_PMC6_ADDER_SEL_SH 37
63#define MMCR1_PMC5_ADDER_SEL_SH 36
64#define MMCR1_PMC8_ADDER_SEL_SH 35
65#define MMCR1_PMC7_ADDER_SEL_SH 34
66#define MMCR1_PMC3_ADDER_SEL_SH 33
67#define MMCR1_PMC4_ADDER_SEL_SH 32
68#define MMCR1_PMC3SEL_SH 27
69#define MMCR1_PMC4SEL_SH 22
70#define MMCR1_PMC5SEL_SH 17
71#define MMCR1_PMC6SEL_SH 12
72#define MMCR1_PMC7SEL_SH 7
73#define MMCR1_PMC8SEL_SH 2
74
75static short mmcr1_adder_bits[8] = {
76 MMCR1_PMC1_ADDER_SEL_SH,
77 MMCR1_PMC2_ADDER_SEL_SH,
78 MMCR1_PMC3_ADDER_SEL_SH,
79 MMCR1_PMC4_ADDER_SEL_SH,
80 MMCR1_PMC5_ADDER_SEL_SH,
81 MMCR1_PMC6_ADDER_SEL_SH,
82 MMCR1_PMC7_ADDER_SEL_SH,
83 MMCR1_PMC8_ADDER_SEL_SH
84};
85
86/*
87 * Layout of constraint bits:
88 * 6666555555555544444444443333333333222222222211111111110000000000
89 * 3210987654321098765432109876543210987654321098765432109876543210
90 * <><><>[ >[ >[ >< >< >< >< ><><><><><><><><>
91 * SPT0T1 UC PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8
92 *
93 * SP - SPCSEL constraint
94 * 48-49: SPCSEL value 0x3_0000_0000_0000
95 *
96 * T0 - TTM0 constraint
97 * 46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000
98 *
99 * T1 - TTM1 constraint
100 * 44-45: TTM1SEL value (0=IDU, 3=STS) 0x3000_0000_0000
101 *
102 * UC - unit constraint: can't have all three of FPU|IFU|VPU, ISU, IDU|STS
103 * 43: UC3 error 0x0800_0000_0000
104 * 42: FPU|IFU|VPU events needed 0x0400_0000_0000
105 * 41: ISU events needed 0x0200_0000_0000
106 * 40: IDU|STS events needed 0x0100_0000_0000
107 *
108 * PS1
109 * 39: PS1 error 0x0080_0000_0000
110 * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
111 *
112 * PS2
113 * 35: PS2 error 0x0008_0000_0000
114 * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
115 *
116 * B0
117 * 28-31: Byte 0 event source 0xf000_0000
118 * Encoding as for the event code
119 *
120 * B1, B2, B3
121 * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
122 *
123 * P1
124 * 15: P1 error 0x8000
125 * 14-15: Count of events needing PMC1
126 *
127 * P2..P8
128 * 0-13: Count of events needing PMC2..PMC8
129 */
130
131static unsigned char direct_marked_event[8] = {
132 (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
133 (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
134 (1<<3) | (1<<5), /* PMC3: PM_MRK_ST_CMPL_INT, PM_MRK_VMX_FIN */
135 (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
136 (1<<4) | (1<<5), /* PMC5: PM_GRP_MRK, PM_MRK_GRP_TIMEO */
137 (1<<3) | (1<<4) | (1<<5),
138 /* PMC6: PM_MRK_ST_STS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
139 (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
140 (1<<4) /* PMC8: PM_MRK_LSU_FIN */
141};
142
143/*
144 * Returns 1 if event counts things relating to marked instructions
145 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
146 */
147static int p970_marked_instr_event(u64 event)
148{
149 int pmc, psel, unit, byte, bit;
150 unsigned int mask;
151
152 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
153 psel = event & PM_PMCSEL_MSK;
154 if (pmc) {
155 if (direct_marked_event[pmc - 1] & (1 << psel))
156 return 1;
157 if (psel == 0) /* add events */
158 bit = (pmc <= 4)? pmc - 1: 8 - pmc;
159 else if (psel == 7 || psel == 13) /* decode events */
160 bit = 4;
161 else
162 return 0;
163 } else
164 bit = psel;
165
166 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
167 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
168 mask = 0;
169 switch (unit) {
170 case PM_VPU:
171 mask = 0x4c; /* byte 0 bits 2,3,6 */
172 break;
173 case PM_LSU0:
174 /* byte 2 bits 0,2,3,4,6; all of byte 1 */
175 mask = 0x085dff00;
176 break;
177 case PM_LSU1L:
178 mask = 0x50 << 24; /* byte 3 bits 4,6 */
179 break;
180 }
181 return (mask >> (byte * 8 + bit)) & 1;
182}
183
184/* Masks and values for using events from the various units */
185static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
186 [PM_FPU] = { 0xc80000000000ull, 0x040000000000ull },
187 [PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull },
188 [PM_ISU] = { 0x080000000000ull, 0x020000000000ull },
189 [PM_IFU] = { 0xc80000000000ull, 0x840000000000ull },
190 [PM_IDU] = { 0x380000000000ull, 0x010000000000ull },
191 [PM_STS] = { 0x380000000000ull, 0x310000000000ull },
192};
193
194static int p970_get_constraint(u64 event, unsigned long *maskp,
195 unsigned long *valp)
196{
197 int pmc, byte, unit, sh, spcsel;
198 unsigned long mask = 0, value = 0;
199 int grp = -1;
200
201 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
202 if (pmc) {
203 if (pmc > 8)
204 return -1;
205 sh = (pmc - 1) * 2;
206 mask |= 2 << sh;
207 value |= 1 << sh;
208 grp = ((pmc - 1) >> 1) & 1;
209 }
210 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
211 if (unit) {
212 if (unit > PM_LASTUNIT)
213 return -1;
214 mask |= unit_cons[unit][0];
215 value |= unit_cons[unit][1];
216 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
217 /*
218 * Bus events on bytes 0 and 2 can be counted
219 * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
220 */
221 if (!pmc)
222 grp = byte & 1;
223 /* Set byte lane select field */
224 mask |= 0xfULL << (28 - 4 * byte);
225 value |= (unsigned long)unit << (28 - 4 * byte);
226 }
227 if (grp == 0) {
228 /* increment PMC1/2/5/6 field */
229 mask |= 0x8000000000ull;
230 value |= 0x1000000000ull;
231 } else if (grp == 1) {
232 /* increment PMC3/4/7/8 field */
233 mask |= 0x800000000ull;
234 value |= 0x100000000ull;
235 }
236 spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
237 if (spcsel) {
238 mask |= 3ull << 48;
239 value |= (unsigned long)spcsel << 48;
240 }
241 *maskp = mask;
242 *valp = value;
243 return 0;
244}
245
246static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[])
247{
248 alt[0] = event;
249
250 /* 2 alternatives for LSU empty */
251 if (event == 0x2002 || event == 0x3002) {
252 alt[1] = event ^ 0x1000;
253 return 2;
254 }
255
256 return 1;
257}
258
259static int p970_compute_mmcr(u64 event[], int n_ev,
260 unsigned int hwc[], unsigned long mmcr[])
261{
262 unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
263 unsigned int pmc, unit, byte, psel;
264 unsigned int ttm, grp;
265 unsigned int pmc_inuse = 0;
266 unsigned int pmc_grp_use[2];
267 unsigned char busbyte[4];
268 unsigned char unituse[16];
269 unsigned char unitmap[] = { 0, 0<<3, 3<<3, 1<<3, 2<<3, 0|4, 3|4 };
270 unsigned char ttmuse[2];
271 unsigned char pmcsel[8];
272 int i;
273 int spcsel;
274
275 if (n_ev > 8)
276 return -1;
277
278 /* First pass to count resource use */
279 pmc_grp_use[0] = pmc_grp_use[1] = 0;
280 memset(busbyte, 0, sizeof(busbyte));
281 memset(unituse, 0, sizeof(unituse));
282 for (i = 0; i < n_ev; ++i) {
283 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
284 if (pmc) {
285 if (pmc_inuse & (1 << (pmc - 1)))
286 return -1;
287 pmc_inuse |= 1 << (pmc - 1);
288 /* count 1/2/5/6 vs 3/4/7/8 use */
289 ++pmc_grp_use[((pmc - 1) >> 1) & 1];
290 }
291 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
292 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
293 if (unit) {
294 if (unit > PM_LASTUNIT)
295 return -1;
296 if (!pmc)
297 ++pmc_grp_use[byte & 1];
298 if (busbyte[byte] && busbyte[byte] != unit)
299 return -1;
300 busbyte[byte] = unit;
301 unituse[unit] = 1;
302 }
303 }
304 if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
305 return -1;
306
307 /*
308 * Assign resources and set multiplexer selects.
309 *
310 * PM_ISU can go either on TTM0 or TTM1, but that's the only
311 * choice we have to deal with.
312 */
313 if (unituse[PM_ISU] &
314 (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_VPU]))
315 unitmap[PM_ISU] = 2 | 4; /* move ISU to TTM1 */
316 /* Set TTM[01]SEL fields. */
317 ttmuse[0] = ttmuse[1] = 0;
318 for (i = PM_FPU; i <= PM_STS; ++i) {
319 if (!unituse[i])
320 continue;
321 ttm = unitmap[i];
322 ++ttmuse[(ttm >> 2) & 1];
323 mmcr1 |= (unsigned long)(ttm & ~4) << MMCR1_TTM1SEL_SH;
324 }
325 /* Check only one unit per TTMx */
326 if (ttmuse[0] > 1 || ttmuse[1] > 1)
327 return -1;
328
329 /* Set byte lane select fields and TTM3SEL. */
330 for (byte = 0; byte < 4; ++byte) {
331 unit = busbyte[byte];
332 if (!unit)
333 continue;
334 if (unit <= PM_STS)
335 ttm = (unitmap[unit] >> 2) & 1;
336 else if (unit == PM_LSU0)
337 ttm = 2;
338 else {
339 ttm = 3;
340 if (unit == PM_LSU1L && byte >= 2)
341 mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
342 }
343 mmcr1 |= (unsigned long)ttm
344 << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
345 }
346
347 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
348 memset(pmcsel, 0x8, sizeof(pmcsel)); /* 8 means don't count */
349 for (i = 0; i < n_ev; ++i) {
350 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
351 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
352 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
353 psel = event[i] & PM_PMCSEL_MSK;
354 if (!pmc) {
355 /* Bus event or any-PMC direct event */
356 if (unit)
357 psel |= 0x10 | ((byte & 2) << 2);
358 else
359 psel |= 8;
360 for (pmc = 0; pmc < 8; ++pmc) {
361 if (pmc_inuse & (1 << pmc))
362 continue;
363 grp = (pmc >> 1) & 1;
364 if (unit) {
365 if (grp == (byte & 1))
366 break;
367 } else if (pmc_grp_use[grp] < 4) {
368 ++pmc_grp_use[grp];
369 break;
370 }
371 }
372 pmc_inuse |= 1 << pmc;
373 } else {
374 /* Direct event */
375 --pmc;
376 if (psel == 0 && (byte & 2))
377 /* add events on higher-numbered bus */
378 mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
379 }
380 pmcsel[pmc] = psel;
381 hwc[i] = pmc;
382 spcsel = (event[i] >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
383 mmcr1 |= spcsel;
384 if (p970_marked_instr_event(event[i]))
385 mmcra |= MMCRA_SAMPLE_ENABLE;
386 }
387 for (pmc = 0; pmc < 2; ++pmc)
388 mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc);
389 for (; pmc < 8; ++pmc)
390 mmcr1 |= (unsigned long)pmcsel[pmc]
391 << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
392 if (pmc_inuse & 1)
393 mmcr0 |= MMCR0_PMC1CE;
394 if (pmc_inuse & 0xfe)
395 mmcr0 |= MMCR0_PMCjCE;
396
397 mmcra |= 0x2000; /* mark only one IOP per PPC instruction */
398
399 /* Return MMCRx values */
400 mmcr[0] = mmcr0;
401 mmcr[1] = mmcr1;
402 mmcr[2] = mmcra;
403 return 0;
404}
405
406static void p970_disable_pmc(unsigned int pmc, unsigned long mmcr[])
407{
408 int shift, i;
409
410 if (pmc <= 1) {
411 shift = MMCR0_PMC1SEL_SH - 7 * pmc;
412 i = 0;
413 } else {
414 shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2);
415 i = 1;
416 }
417 /*
418 * Setting the PMCxSEL field to 0x08 disables PMC x.
419 */
420 mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift);
421}
422
423static int ppc970_generic_events[] = {
424 [PERF_COUNT_HW_CPU_CYCLES] = 7,
425 [PERF_COUNT_HW_INSTRUCTIONS] = 1,
426 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8810, /* PM_LD_REF_L1 */
427 [PERF_COUNT_HW_CACHE_MISSES] = 0x3810, /* PM_LD_MISS_L1 */
428 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x431, /* PM_BR_ISSUED */
429 [PERF_COUNT_HW_BRANCH_MISSES] = 0x327, /* PM_GRP_BR_MPRED */
430};
431
432#define C(x) PERF_COUNT_HW_CACHE_##x
433
434/*
435 * Table of generalized cache-related events.
436 * 0 means not supported, -1 means nonsensical, other values
437 * are event codes.
438 */
439static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
440 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
441 [C(OP_READ)] = { 0x8810, 0x3810 },
442 [C(OP_WRITE)] = { 0x7810, 0x813 },
443 [C(OP_PREFETCH)] = { 0x731, 0 },
444 },
445 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
446 [C(OP_READ)] = { 0, 0 },
447 [C(OP_WRITE)] = { -1, -1 },
448 [C(OP_PREFETCH)] = { 0, 0 },
449 },
450 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
451 [C(OP_READ)] = { 0, 0 },
452 [C(OP_WRITE)] = { 0, 0 },
453 [C(OP_PREFETCH)] = { 0x733, 0 },
454 },
455 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
456 [C(OP_READ)] = { 0, 0x704 },
457 [C(OP_WRITE)] = { -1, -1 },
458 [C(OP_PREFETCH)] = { -1, -1 },
459 },
460 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
461 [C(OP_READ)] = { 0, 0x700 },
462 [C(OP_WRITE)] = { -1, -1 },
463 [C(OP_PREFETCH)] = { -1, -1 },
464 },
465 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
466 [C(OP_READ)] = { 0x431, 0x327 },
467 [C(OP_WRITE)] = { -1, -1 },
468 [C(OP_PREFETCH)] = { -1, -1 },
469 },
470 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
471 [C(OP_READ)] = { -1, -1 },
472 [C(OP_WRITE)] = { -1, -1 },
473 [C(OP_PREFETCH)] = { -1, -1 },
474 },
475};
476
477static struct power_pmu ppc970_pmu = {
478 .name = "PPC970/FX/MP",
479 .n_counter = 8,
480 .max_alternatives = 2,
481 .add_fields = 0x001100005555ull,
482 .test_adder = 0x013300000000ull,
483 .compute_mmcr = p970_compute_mmcr,
484 .get_constraint = p970_get_constraint,
485 .get_alternatives = p970_get_alternatives,
486 .disable_pmc = p970_disable_pmc,
487 .n_generic = ARRAY_SIZE(ppc970_generic_events),
488 .generic_events = ppc970_generic_events,
489 .cache_events = &ppc970_cache_events,
490};
491
492static int __init init_ppc970_pmu(void)
493{
494 if (!cur_cpu_spec->oprofile_cpu_type ||
495 (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970")
496 && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970MP")))
497 return -ENODEV;
498
499 return register_power_pmu(&ppc970_pmu);
500}
501
502early_initcall(init_ppc970_pmu);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index d817ab018486..e40707032ac3 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -647,6 +647,9 @@ void show_regs(struct pt_regs * regs)
647 printk("MSR: "REG" ", regs->msr); 647 printk("MSR: "REG" ", regs->msr);
648 printbits(regs->msr, msr_bits); 648 printbits(regs->msr, msr_bits);
649 printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); 649 printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer);
650#ifdef CONFIG_PPC64
651 printk("SOFTE: %ld\n", regs->softe);
652#endif
650 trap = TRAP(regs); 653 trap = TRAP(regs);
651 if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) 654 if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
652 printk("CFAR: "REG"\n", regs->orig_gpr3); 655 printk("CFAR: "REG"\n", regs->orig_gpr3);
@@ -1220,34 +1223,32 @@ void dump_stack(void)
1220EXPORT_SYMBOL(dump_stack); 1223EXPORT_SYMBOL(dump_stack);
1221 1224
1222#ifdef CONFIG_PPC64 1225#ifdef CONFIG_PPC64
1223void ppc64_runlatch_on(void) 1226/* Called with hard IRQs off */
1227void __ppc64_runlatch_on(void)
1224{ 1228{
1229 struct thread_info *ti = current_thread_info();
1225 unsigned long ctrl; 1230 unsigned long ctrl;
1226 1231
1227 if (cpu_has_feature(CPU_FTR_CTRL) && !test_thread_flag(TIF_RUNLATCH)) { 1232 ctrl = mfspr(SPRN_CTRLF);
1228 HMT_medium(); 1233 ctrl |= CTRL_RUNLATCH;
1229 1234 mtspr(SPRN_CTRLT, ctrl);
1230 ctrl = mfspr(SPRN_CTRLF);
1231 ctrl |= CTRL_RUNLATCH;
1232 mtspr(SPRN_CTRLT, ctrl);
1233 1235
1234 set_thread_flag(TIF_RUNLATCH); 1236 ti->local_flags |= TLF_RUNLATCH;
1235 }
1236} 1237}
1237 1238
1239/* Called with hard IRQs off */
1238void __ppc64_runlatch_off(void) 1240void __ppc64_runlatch_off(void)
1239{ 1241{
1242 struct thread_info *ti = current_thread_info();
1240 unsigned long ctrl; 1243 unsigned long ctrl;
1241 1244
1242 HMT_medium(); 1245 ti->local_flags &= ~TLF_RUNLATCH;
1243
1244 clear_thread_flag(TIF_RUNLATCH);
1245 1246
1246 ctrl = mfspr(SPRN_CTRLF); 1247 ctrl = mfspr(SPRN_CTRLF);
1247 ctrl &= ~CTRL_RUNLATCH; 1248 ctrl &= ~CTRL_RUNLATCH;
1248 mtspr(SPRN_CTRLT, ctrl); 1249 mtspr(SPRN_CTRLT, ctrl);
1249} 1250}
1250#endif 1251#endif /* CONFIG_PPC64 */
1251 1252
1252#if THREAD_SHIFT < PAGE_SHIFT 1253#if THREAD_SHIFT < PAGE_SHIFT
1253 1254
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index abe405dab34d..89e850af3dd6 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -52,9 +52,9 @@
52#include <asm/machdep.h> 52#include <asm/machdep.h>
53#include <asm/pSeries_reconfig.h> 53#include <asm/pSeries_reconfig.h>
54#include <asm/pci-bridge.h> 54#include <asm/pci-bridge.h>
55#include <asm/phyp_dump.h>
56#include <asm/kexec.h> 55#include <asm/kexec.h>
57#include <asm/opal.h> 56#include <asm/opal.h>
57#include <asm/fadump.h>
58 58
59#include <mm/mmu_decl.h> 59#include <mm/mmu_decl.h>
60 60
@@ -615,86 +615,6 @@ static void __init early_reserve_mem(void)
615 } 615 }
616} 616}
617 617
618#ifdef CONFIG_PHYP_DUMP
619/**
620 * phyp_dump_calculate_reserve_size() - reserve variable boot area 5% or arg
621 *
622 * Function to find the largest size we need to reserve
623 * during early boot process.
624 *
625 * It either looks for boot param and returns that OR
626 * returns larger of 256 or 5% rounded down to multiples of 256MB.
627 *
628 */
629static inline unsigned long phyp_dump_calculate_reserve_size(void)
630{
631 unsigned long tmp;
632
633 if (phyp_dump_info->reserve_bootvar)
634 return phyp_dump_info->reserve_bootvar;
635
636 /* divide by 20 to get 5% of value */
637 tmp = memblock_end_of_DRAM();
638 do_div(tmp, 20);
639
640 /* round it down in multiples of 256 */
641 tmp = tmp & ~0x0FFFFFFFUL;
642
643 return (tmp > PHYP_DUMP_RMR_END ? tmp : PHYP_DUMP_RMR_END);
644}
645
646/**
647 * phyp_dump_reserve_mem() - reserve all not-yet-dumped mmemory
648 *
649 * This routine may reserve memory regions in the kernel only
650 * if the system is supported and a dump was taken in last
651 * boot instance or if the hardware is supported and the
652 * scratch area needs to be setup. In other instances it returns
653 * without reserving anything. The memory in case of dump being
654 * active is freed when the dump is collected (by userland tools).
655 */
656static void __init phyp_dump_reserve_mem(void)
657{
658 unsigned long base, size;
659 unsigned long variable_reserve_size;
660
661 if (!phyp_dump_info->phyp_dump_configured) {
662 printk(KERN_ERR "Phyp-dump not supported on this hardware\n");
663 return;
664 }
665
666 if (!phyp_dump_info->phyp_dump_at_boot) {
667 printk(KERN_INFO "Phyp-dump disabled at boot time\n");
668 return;
669 }
670
671 variable_reserve_size = phyp_dump_calculate_reserve_size();
672
673 if (phyp_dump_info->phyp_dump_is_active) {
674 /* Reserve *everything* above RMR.Area freed by userland tools*/
675 base = variable_reserve_size;
676 size = memblock_end_of_DRAM() - base;
677
678 /* XXX crashed_ram_end is wrong, since it may be beyond
679 * the memory_limit, it will need to be adjusted. */
680 memblock_reserve(base, size);
681
682 phyp_dump_info->init_reserve_start = base;
683 phyp_dump_info->init_reserve_size = size;
684 } else {
685 size = phyp_dump_info->cpu_state_size +
686 phyp_dump_info->hpte_region_size +
687 variable_reserve_size;
688 base = memblock_end_of_DRAM() - size;
689 memblock_reserve(base, size);
690 phyp_dump_info->init_reserve_start = base;
691 phyp_dump_info->init_reserve_size = size;
692 }
693}
694#else
695static inline void __init phyp_dump_reserve_mem(void) {}
696#endif /* CONFIG_PHYP_DUMP && CONFIG_PPC_RTAS */
697
698void __init early_init_devtree(void *params) 618void __init early_init_devtree(void *params)
699{ 619{
700 phys_addr_t limit; 620 phys_addr_t limit;
@@ -714,9 +634,9 @@ void __init early_init_devtree(void *params)
714 of_scan_flat_dt(early_init_dt_scan_opal, NULL); 634 of_scan_flat_dt(early_init_dt_scan_opal, NULL);
715#endif 635#endif
716 636
717#ifdef CONFIG_PHYP_DUMP 637#ifdef CONFIG_FA_DUMP
718 /* scan tree to see if dump occurred during last boot */ 638 /* scan tree to see if dump is active during last boot */
719 of_scan_flat_dt(early_init_dt_scan_phyp_dump, NULL); 639 of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL);
720#endif 640#endif
721 641
722 /* Pre-initialize the cmd_line with the content of boot_commmand_line, 642 /* Pre-initialize the cmd_line with the content of boot_commmand_line,
@@ -750,9 +670,15 @@ void __init early_init_devtree(void *params)
750 if (PHYSICAL_START > MEMORY_START) 670 if (PHYSICAL_START > MEMORY_START)
751 memblock_reserve(MEMORY_START, 0x8000); 671 memblock_reserve(MEMORY_START, 0x8000);
752 reserve_kdump_trampoline(); 672 reserve_kdump_trampoline();
753 reserve_crashkernel(); 673#ifdef CONFIG_FA_DUMP
674 /*
675 * If we fail to reserve memory for firmware-assisted dump then
676 * fallback to kexec based kdump.
677 */
678 if (fadump_reserve_mem() == 0)
679#endif
680 reserve_crashkernel();
754 early_reserve_mem(); 681 early_reserve_mem();
755 phyp_dump_reserve_mem();
756 682
757 /* 683 /*
758 * Ensure that total memory size is page-aligned, because otherwise 684 * Ensure that total memory size is page-aligned, because otherwise
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index eca626ea3f23..e2d599048142 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -48,14 +48,6 @@
48#include <linux/linux_logo.h> 48#include <linux/linux_logo.h>
49 49
50/* 50/*
51 * Properties whose value is longer than this get excluded from our
52 * copy of the device tree. This value does need to be big enough to
53 * ensure that we don't lose things like the interrupt-map property
54 * on a PCI-PCI bridge.
55 */
56#define MAX_PROPERTY_LENGTH (1UL * 1024 * 1024)
57
58/*
59 * Eventually bump that one up 51 * Eventually bump that one up
60 */ 52 */
61#define DEVTREE_CHUNK_SIZE 0x100000 53#define DEVTREE_CHUNK_SIZE 0x100000
@@ -2273,13 +2265,6 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
2273 /* sanity checks */ 2265 /* sanity checks */
2274 if (l == PROM_ERROR) 2266 if (l == PROM_ERROR)
2275 continue; 2267 continue;
2276 if (l > MAX_PROPERTY_LENGTH) {
2277 prom_printf("WARNING: ignoring large property ");
2278 /* It seems OF doesn't null-terminate the path :-( */
2279 prom_printf("[%s] ", path);
2280 prom_printf("%s length 0x%x\n", RELOC(pname), l);
2281 continue;
2282 }
2283 2268
2284 /* push property head */ 2269 /* push property head */
2285 dt_push_token(OF_DT_PROP, mem_start, mem_end); 2270 dt_push_token(OF_DT_PROP, mem_start, mem_end);
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index 6cd8f0196b6d..179af906dcda 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -275,8 +275,11 @@ void __init find_and_init_phbs(void)
275 of_node_put(root); 275 of_node_put(root);
276 pci_devs_phb_init(); 276 pci_devs_phb_init();
277 277
278 /* Create EEH devices for all PHBs */
279 eeh_dev_phb_init();
280
278 /* 281 /*
279 * pci_probe_only and pci_assign_all_buses can be set via properties 282 * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties
280 * in chosen. 283 * in chosen.
281 */ 284 */
282 if (of_chosen) { 285 if (of_chosen) {
@@ -284,8 +287,12 @@ void __init find_and_init_phbs(void)
284 287
285 prop = of_get_property(of_chosen, 288 prop = of_get_property(of_chosen,
286 "linux,pci-probe-only", NULL); 289 "linux,pci-probe-only", NULL);
287 if (prop) 290 if (prop) {
288 pci_probe_only = *prop; 291 if (*prop)
292 pci_add_flags(PCI_PROBE_ONLY);
293 else
294 pci_clear_flags(PCI_PROBE_ONLY);
295 }
289 296
290#ifdef CONFIG_PPC32 /* Will be made generic soon */ 297#ifdef CONFIG_PPC32 /* Will be made generic soon */
291 prop = of_get_property(of_chosen, 298 prop = of_get_property(of_chosen,
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 77bb77da05c1..b0ebdeab9494 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -61,6 +61,7 @@
61#include <asm/xmon.h> 61#include <asm/xmon.h>
62#include <asm/cputhreads.h> 62#include <asm/cputhreads.h>
63#include <mm/mmu_decl.h> 63#include <mm/mmu_decl.h>
64#include <asm/fadump.h>
64 65
65#include "setup.h" 66#include "setup.h"
66 67
@@ -109,6 +110,14 @@ EXPORT_SYMBOL(ppc_do_canonicalize_irqs);
109/* also used by kexec */ 110/* also used by kexec */
110void machine_shutdown(void) 111void machine_shutdown(void)
111{ 112{
113#ifdef CONFIG_FA_DUMP
114 /*
115 * if fadump is active, cleanup the fadump registration before we
116 * shutdown.
117 */
118 fadump_cleanup();
119#endif
120
112 if (ppc_md.machine_shutdown) 121 if (ppc_md.machine_shutdown)
113 ppc_md.machine_shutdown(); 122 ppc_md.machine_shutdown();
114} 123}
@@ -639,6 +648,11 @@ EXPORT_SYMBOL(check_legacy_ioport);
639static int ppc_panic_event(struct notifier_block *this, 648static int ppc_panic_event(struct notifier_block *this,
640 unsigned long event, void *ptr) 649 unsigned long event, void *ptr)
641{ 650{
651 /*
652 * If firmware-assisted dump has been registered then trigger
653 * firmware-assisted dump and let firmware handle everything else.
654 */
655 crash_fadump(NULL, ptr);
642 ppc_md.panic(ptr); /* May not return */ 656 ppc_md.panic(ptr); /* May not return */
643 return NOTIFY_DONE; 657 return NOTIFY_DONE;
644} 658}
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index ac6e437b1021..7006b7f4267a 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -57,10 +57,7 @@ void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
57void restore_sigmask(sigset_t *set) 57void restore_sigmask(sigset_t *set)
58{ 58{
59 sigdelsetmask(set, ~_BLOCKABLE); 59 sigdelsetmask(set, ~_BLOCKABLE);
60 spin_lock_irq(&current->sighand->siglock); 60 set_current_blocked(set);
61 current->blocked = *set;
62 recalc_sigpending();
63 spin_unlock_irq(&current->sighand->siglock);
64} 61}
65 62
66static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, 63static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka,
@@ -169,13 +166,7 @@ static int do_signal(struct pt_regs *regs)
169 166
170 regs->trap = 0; 167 regs->trap = 0;
171 if (ret) { 168 if (ret) {
172 spin_lock_irq(&current->sighand->siglock); 169 block_sigmask(&ka, signr);
173 sigorsets(&current->blocked, &current->blocked,
174 &ka.sa.sa_mask);
175 if (!(ka.sa.sa_flags & SA_NODEFER))
176 sigaddset(&current->blocked, signr);
177 recalc_sigpending();
178 spin_unlock_irq(&current->sighand->siglock);
179 170
180 /* 171 /*
181 * A signal was successfully delivered; the saved sigmask is in 172 * A signal was successfully delivered; the saved sigmask is in
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 836a5a19eb2c..e061ef5dd449 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -242,12 +242,13 @@ static inline int restore_general_regs(struct pt_regs *regs,
242 */ 242 */
243long sys_sigsuspend(old_sigset_t mask) 243long sys_sigsuspend(old_sigset_t mask)
244{ 244{
245 mask &= _BLOCKABLE; 245 sigset_t blocked;
246 spin_lock_irq(&current->sighand->siglock); 246
247 current->saved_sigmask = current->blocked; 247 current->saved_sigmask = current->blocked;
248 siginitset(&current->blocked, mask); 248
249 recalc_sigpending(); 249 mask &= _BLOCKABLE;
250 spin_unlock_irq(&current->sighand->siglock); 250 siginitset(&blocked, mask);
251 set_current_blocked(&blocked);
251 252
252 current->state = TASK_INTERRUPTIBLE; 253 current->state = TASK_INTERRUPTIBLE;
253 schedule(); 254 schedule();
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 883e74c0d1b3..0c683d376b1c 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -12,7 +12,6 @@
12#include <asm/current.h> 12#include <asm/current.h>
13#include <asm/processor.h> 13#include <asm/processor.h>
14#include <asm/cputable.h> 14#include <asm/cputable.h>
15#include <asm/firmware.h>
16#include <asm/hvcall.h> 15#include <asm/hvcall.h>
17#include <asm/prom.h> 16#include <asm/prom.h>
18#include <asm/machdep.h> 17#include <asm/machdep.h>
@@ -341,8 +340,7 @@ static void __cpuinit register_cpu_online(unsigned int cpu)
341 int i, nattrs; 340 int i, nattrs;
342 341
343#ifdef CONFIG_PPC64 342#ifdef CONFIG_PPC64
344 if (!firmware_has_feature(FW_FEATURE_ISERIES) && 343 if (cpu_has_feature(CPU_FTR_SMT))
345 cpu_has_feature(CPU_FTR_SMT))
346 device_create_file(s, &dev_attr_smt_snooze_delay); 344 device_create_file(s, &dev_attr_smt_snooze_delay);
347#endif 345#endif
348 346
@@ -414,8 +412,7 @@ static void unregister_cpu_online(unsigned int cpu)
414 BUG_ON(!c->hotpluggable); 412 BUG_ON(!c->hotpluggable);
415 413
416#ifdef CONFIG_PPC64 414#ifdef CONFIG_PPC64
417 if (!firmware_has_feature(FW_FEATURE_ISERIES) && 415 if (cpu_has_feature(CPU_FTR_SMT))
418 cpu_has_feature(CPU_FTR_SMT))
419 device_remove_file(s, &dev_attr_smt_snooze_delay); 416 device_remove_file(s, &dev_attr_smt_snooze_delay);
420#endif 417#endif
421 418
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 567dd7c3ac2a..2c42cd72d0f5 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -17,8 +17,7 @@
17 * 17 *
18 * TODO (not necessarily in this file): 18 * TODO (not necessarily in this file):
19 * - improve precision and reproducibility of timebase frequency 19 * - improve precision and reproducibility of timebase frequency
20 * measurement at boot time. (for iSeries, we calibrate the timebase 20 * measurement at boot time.
21 * against the Titan chip's clock.)
22 * - for astronomical applications: add a new function to get 21 * - for astronomical applications: add a new function to get
23 * non ambiguous timestamps even around leap seconds. This needs 22 * non ambiguous timestamps even around leap seconds. This needs
24 * a new timestamp format and a good name. 23 * a new timestamp format and a good name.
@@ -70,10 +69,6 @@
70#include <asm/vdso_datapage.h> 69#include <asm/vdso_datapage.h>
71#include <asm/firmware.h> 70#include <asm/firmware.h>
72#include <asm/cputime.h> 71#include <asm/cputime.h>
73#ifdef CONFIG_PPC_ISERIES
74#include <asm/iseries/it_lp_queue.h>
75#include <asm/iseries/hv_call_xm.h>
76#endif
77 72
78/* powerpc clocksource/clockevent code */ 73/* powerpc clocksource/clockevent code */
79 74
@@ -117,14 +112,6 @@ static struct clock_event_device decrementer_clockevent = {
117DEFINE_PER_CPU(u64, decrementers_next_tb); 112DEFINE_PER_CPU(u64, decrementers_next_tb);
118static DEFINE_PER_CPU(struct clock_event_device, decrementers); 113static DEFINE_PER_CPU(struct clock_event_device, decrementers);
119 114
120#ifdef CONFIG_PPC_ISERIES
121static unsigned long __initdata iSeries_recal_titan;
122static signed long __initdata iSeries_recal_tb;
123
124/* Forward declaration is only needed for iSereis compiles */
125static void __init clocksource_init(void);
126#endif
127
128#define XSEC_PER_SEC (1024*1024) 115#define XSEC_PER_SEC (1024*1024)
129 116
130#ifdef CONFIG_PPC64 117#ifdef CONFIG_PPC64
@@ -259,7 +246,6 @@ void accumulate_stolen_time(void)
259 u64 sst, ust; 246 u64 sst, ust;
260 247
261 u8 save_soft_enabled = local_paca->soft_enabled; 248 u8 save_soft_enabled = local_paca->soft_enabled;
262 u8 save_hard_enabled = local_paca->hard_enabled;
263 249
264 /* We are called early in the exception entry, before 250 /* We are called early in the exception entry, before
265 * soft/hard_enabled are sync'ed to the expected state 251 * soft/hard_enabled are sync'ed to the expected state
@@ -268,7 +254,6 @@ void accumulate_stolen_time(void)
268 * complain 254 * complain
269 */ 255 */
270 local_paca->soft_enabled = 0; 256 local_paca->soft_enabled = 0;
271 local_paca->hard_enabled = 0;
272 257
273 sst = scan_dispatch_log(local_paca->starttime_user); 258 sst = scan_dispatch_log(local_paca->starttime_user);
274 ust = scan_dispatch_log(local_paca->starttime); 259 ust = scan_dispatch_log(local_paca->starttime);
@@ -277,7 +262,6 @@ void accumulate_stolen_time(void)
277 local_paca->stolen_time += ust + sst; 262 local_paca->stolen_time += ust + sst;
278 263
279 local_paca->soft_enabled = save_soft_enabled; 264 local_paca->soft_enabled = save_soft_enabled;
280 local_paca->hard_enabled = save_hard_enabled;
281} 265}
282 266
283static inline u64 calculate_stolen_time(u64 stop_tb) 267static inline u64 calculate_stolen_time(u64 stop_tb)
@@ -426,74 +410,6 @@ unsigned long profile_pc(struct pt_regs *regs)
426EXPORT_SYMBOL(profile_pc); 410EXPORT_SYMBOL(profile_pc);
427#endif 411#endif
428 412
429#ifdef CONFIG_PPC_ISERIES
430
431/*
432 * This function recalibrates the timebase based on the 49-bit time-of-day
433 * value in the Titan chip. The Titan is much more accurate than the value
434 * returned by the service processor for the timebase frequency.
435 */
436
437static int __init iSeries_tb_recal(void)
438{
439 unsigned long titan, tb;
440
441 /* Make sure we only run on iSeries */
442 if (!firmware_has_feature(FW_FEATURE_ISERIES))
443 return -ENODEV;
444
445 tb = get_tb();
446 titan = HvCallXm_loadTod();
447 if ( iSeries_recal_titan ) {
448 unsigned long tb_ticks = tb - iSeries_recal_tb;
449 unsigned long titan_usec = (titan - iSeries_recal_titan) >> 12;
450 unsigned long new_tb_ticks_per_sec = (tb_ticks * USEC_PER_SEC)/titan_usec;
451 unsigned long new_tb_ticks_per_jiffy =
452 DIV_ROUND_CLOSEST(new_tb_ticks_per_sec, HZ);
453 long tick_diff = new_tb_ticks_per_jiffy - tb_ticks_per_jiffy;
454 char sign = '+';
455 /* make sure tb_ticks_per_sec and tb_ticks_per_jiffy are consistent */
456 new_tb_ticks_per_sec = new_tb_ticks_per_jiffy * HZ;
457
458 if ( tick_diff < 0 ) {
459 tick_diff = -tick_diff;
460 sign = '-';
461 }
462 if ( tick_diff ) {
463 if ( tick_diff < tb_ticks_per_jiffy/25 ) {
464 printk( "Titan recalibrate: new tb_ticks_per_jiffy = %lu (%c%ld)\n",
465 new_tb_ticks_per_jiffy, sign, tick_diff );
466 tb_ticks_per_jiffy = new_tb_ticks_per_jiffy;
467 tb_ticks_per_sec = new_tb_ticks_per_sec;
468 calc_cputime_factors();
469 vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
470 setup_cputime_one_jiffy();
471 }
472 else {
473 printk( "Titan recalibrate: FAILED (difference > 4 percent)\n"
474 " new tb_ticks_per_jiffy = %lu\n"
475 " old tb_ticks_per_jiffy = %lu\n",
476 new_tb_ticks_per_jiffy, tb_ticks_per_jiffy );
477 }
478 }
479 }
480 iSeries_recal_titan = titan;
481 iSeries_recal_tb = tb;
482
483 /* Called here as now we know accurate values for the timebase */
484 clocksource_init();
485 return 0;
486}
487late_initcall(iSeries_tb_recal);
488
489/* Called from platform early init */
490void __init iSeries_time_init_early(void)
491{
492 iSeries_recal_tb = get_tb();
493 iSeries_recal_titan = HvCallXm_loadTod();
494}
495#endif /* CONFIG_PPC_ISERIES */
496
497#ifdef CONFIG_IRQ_WORK 413#ifdef CONFIG_IRQ_WORK
498 414
499/* 415/*
@@ -550,16 +466,6 @@ void arch_irq_work_raise(void)
550#endif /* CONFIG_IRQ_WORK */ 466#endif /* CONFIG_IRQ_WORK */
551 467
552/* 468/*
553 * For iSeries shared processors, we have to let the hypervisor
554 * set the hardware decrementer. We set a virtual decrementer
555 * in the lppaca and call the hypervisor if the virtual
556 * decrementer is less than the current value in the hardware
557 * decrementer. (almost always the new decrementer value will
558 * be greater than the current hardware decementer so the hypervisor
559 * call will not be needed)
560 */
561
562/*
563 * timer_interrupt - gets called when the decrementer overflows, 469 * timer_interrupt - gets called when the decrementer overflows,
564 * with interrupts disabled. 470 * with interrupts disabled.
565 */ 471 */
@@ -580,6 +486,11 @@ void timer_interrupt(struct pt_regs * regs)
580 if (!cpu_online(smp_processor_id())) 486 if (!cpu_online(smp_processor_id()))
581 return; 487 return;
582 488
489 /* Conditionally hard-enable interrupts now that the DEC has been
490 * bumped to its maximum value
491 */
492 may_hard_irq_enable();
493
583 trace_timer_interrupt_entry(regs); 494 trace_timer_interrupt_entry(regs);
584 495
585 __get_cpu_var(irq_stat).timer_irqs++; 496 __get_cpu_var(irq_stat).timer_irqs++;
@@ -597,20 +508,10 @@ void timer_interrupt(struct pt_regs * regs)
597 irq_work_run(); 508 irq_work_run();
598 } 509 }
599 510
600#ifdef CONFIG_PPC_ISERIES
601 if (firmware_has_feature(FW_FEATURE_ISERIES))
602 get_lppaca()->int_dword.fields.decr_int = 0;
603#endif
604
605 *next_tb = ~(u64)0; 511 *next_tb = ~(u64)0;
606 if (evt->event_handler) 512 if (evt->event_handler)
607 evt->event_handler(evt); 513 evt->event_handler(evt);
608 514
609#ifdef CONFIG_PPC_ISERIES
610 if (firmware_has_feature(FW_FEATURE_ISERIES) && hvlpevent_is_pending())
611 process_hvlpevents();
612#endif
613
614#ifdef CONFIG_PPC64 515#ifdef CONFIG_PPC64
615 /* collect purr register values often, for accurate calculations */ 516 /* collect purr register values often, for accurate calculations */
616 if (firmware_has_feature(FW_FEATURE_SPLPAR)) { 517 if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
@@ -982,9 +883,8 @@ void __init time_init(void)
982 */ 883 */
983 start_cpu_decrementer(); 884 start_cpu_decrementer();
984 885
985 /* Register the clocksource, if we're not running on iSeries */ 886 /* Register the clocksource */
986 if (!firmware_has_feature(FW_FEATURE_ISERIES)) 887 clocksource_init();
987 clocksource_init();
988 888
989 init_decrementer_clockevent(); 889 init_decrementer_clockevent();
990} 890}
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index c091527efd89..a750409ccc4e 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -57,6 +57,7 @@
57#include <asm/kexec.h> 57#include <asm/kexec.h>
58#include <asm/ppc-opcode.h> 58#include <asm/ppc-opcode.h>
59#include <asm/rio.h> 59#include <asm/rio.h>
60#include <asm/fadump.h>
60 61
61#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) 62#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
62int (*__debugger)(struct pt_regs *regs) __read_mostly; 63int (*__debugger)(struct pt_regs *regs) __read_mostly;
@@ -145,6 +146,8 @@ static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs,
145 arch_spin_unlock(&die_lock); 146 arch_spin_unlock(&die_lock);
146 raw_local_irq_restore(flags); 147 raw_local_irq_restore(flags);
147 148
149 crash_fadump(regs, "die oops");
150
148 /* 151 /*
149 * A system reset (0x100) is a request to dump, so we always send 152 * A system reset (0x100) is a request to dump, so we always send
150 * it through the crashdump code. 153 * it through the crashdump code.
@@ -244,6 +247,9 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
244 addr, regs->nip, regs->link, code); 247 addr, regs->nip, regs->link, code);
245 } 248 }
246 249
250 if (!arch_irq_disabled_regs(regs))
251 local_irq_enable();
252
247 memset(&info, 0, sizeof(info)); 253 memset(&info, 0, sizeof(info));
248 info.si_signo = signr; 254 info.si_signo = signr;
249 info.si_code = code; 255 info.si_code = code;
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 7d14bb697d40..d36ee1055f88 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -263,17 +263,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
263 * the "data" page of the vDSO or you'll stop getting kernel updates 263 * the "data" page of the vDSO or you'll stop getting kernel updates
264 * and your nice userland gettimeofday will be totally dead. 264 * and your nice userland gettimeofday will be totally dead.
265 * It's fine to use that for setting breakpoints in the vDSO code 265 * It's fine to use that for setting breakpoints in the vDSO code
266 * pages though 266 * pages though.
267 *
268 * Make sure the vDSO gets into every core dump.
269 * Dumping its contents makes post-mortem fully interpretable later
270 * without matching up the same kernel and hardware config to see
271 * what PC values meant.
272 */ 267 */
273 rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, 268 rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
274 VM_READ|VM_EXEC| 269 VM_READ|VM_EXEC|
275 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| 270 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
276 VM_ALWAYSDUMP,
277 vdso_pagelist); 271 vdso_pagelist);
278 if (rc) { 272 if (rc) {
279 current->mm->context.vdso_base = 0; 273 current->mm->context.vdso_base = 0;
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index 8b086299ba25..bca3fc427b45 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -34,11 +34,6 @@
34#include <asm/abs_addr.h> 34#include <asm/abs_addr.h>
35#include <asm/page.h> 35#include <asm/page.h>
36#include <asm/hvcall.h> 36#include <asm/hvcall.h>
37#include <asm/iseries/vio.h>
38#include <asm/iseries/hv_types.h>
39#include <asm/iseries/hv_lp_config.h>
40#include <asm/iseries/hv_call_xm.h>
41#include <asm/iseries/iommu.h>
42 37
43static struct bus_type vio_bus_type; 38static struct bus_type vio_bus_type;
44 39
@@ -1042,7 +1037,6 @@ static void vio_cmo_sysfs_init(void)
1042 vio_bus_type.bus_attrs = vio_cmo_bus_attrs; 1037 vio_bus_type.bus_attrs = vio_cmo_bus_attrs;
1043} 1038}
1044#else /* CONFIG_PPC_SMLPAR */ 1039#else /* CONFIG_PPC_SMLPAR */
1045/* Dummy functions for iSeries platform */
1046int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; } 1040int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; }
1047void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {} 1041void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {}
1048static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; } 1042static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; }
@@ -1060,9 +1054,6 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
1060 struct iommu_table *tbl; 1054 struct iommu_table *tbl;
1061 unsigned long offset, size; 1055 unsigned long offset, size;
1062 1056
1063 if (firmware_has_feature(FW_FEATURE_ISERIES))
1064 return vio_build_iommu_table_iseries(dev);
1065
1066 dma_window = of_get_property(dev->dev.of_node, 1057 dma_window = of_get_property(dev->dev.of_node,
1067 "ibm,my-dma-window", NULL); 1058 "ibm,my-dma-window", NULL);
1068 if (!dma_window) 1059 if (!dma_window)
@@ -1195,8 +1186,7 @@ static void __devinit vio_dev_release(struct device *dev)
1195{ 1186{
1196 struct iommu_table *tbl = get_iommu_table_base(dev); 1187 struct iommu_table *tbl = get_iommu_table_base(dev);
1197 1188
1198 /* iSeries uses a common table for all vio devices */ 1189 if (tbl)
1199 if (!firmware_has_feature(FW_FEATURE_ISERIES) && tbl)
1200 iommu_free_table(tbl, dev->of_node ? 1190 iommu_free_table(tbl, dev->of_node ?
1201 dev->of_node->full_name : dev_name(dev)); 1191 dev->of_node->full_name : dev_name(dev));
1202 of_node_put(dev->of_node); 1192 of_node_put(dev->of_node);
@@ -1244,12 +1234,6 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
1244 viodev->name = of_node->name; 1234 viodev->name = of_node->name;
1245 viodev->type = of_node->type; 1235 viodev->type = of_node->type;
1246 viodev->unit_address = *unit_address; 1236 viodev->unit_address = *unit_address;
1247 if (firmware_has_feature(FW_FEATURE_ISERIES)) {
1248 unit_address = of_get_property(of_node,
1249 "linux,unit_address", NULL);
1250 if (unit_address != NULL)
1251 viodev->unit_address = *unit_address;
1252 }
1253 viodev->dev.of_node = of_node_get(of_node); 1237 viodev->dev.of_node = of_node_get(of_node);
1254 1238
1255 if (firmware_has_feature(FW_FEATURE_CMO)) 1239 if (firmware_has_feature(FW_FEATURE_CMO))
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 710a54005dfb..65d1c08cf09e 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -109,11 +109,6 @@ SECTIONS
109 __ptov_table_begin = .; 109 __ptov_table_begin = .;
110 *(.ptov_fixup); 110 *(.ptov_fixup);
111 __ptov_table_end = .; 111 __ptov_table_end = .;
112#ifdef CONFIG_PPC_ISERIES
113 __dt_strings_start = .;
114 *(.dt_strings);
115 __dt_strings_end = .;
116#endif
117 } 112 }
118 113
119 .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { 114 .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {