diff options
author | Ingo Molnar <mingo@kernel.org> | 2012-03-26 11:18:44 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2012-03-26 11:19:03 -0400 |
commit | 7fd52392c56361a40f0c630a82b36b95ca31eac6 (patch) | |
tree | 14091de24c6b28ea4cae9826f98aeedb7be091f5 /arch/powerpc/kernel | |
parent | b01c3a0010aabadf745f3e7fdb9cab682e0a28a2 (diff) | |
parent | e22057c8599373e5caef0bc42bdb95d2a361ab0d (diff) |
Merge branch 'linus' into perf/urgent
Merge reason: we need to fix a non-trivial merge conflict.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/powerpc/kernel')
55 files changed, 2076 insertions, 8251 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index ee728e433aa2..f5808a35688c 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile | |||
@@ -60,6 +60,7 @@ obj-$(CONFIG_IBMVIO) += vio.o | |||
60 | obj-$(CONFIG_IBMEBUS) += ibmebus.o | 60 | obj-$(CONFIG_IBMEBUS) += ibmebus.o |
61 | obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o | 61 | obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o |
62 | obj-$(CONFIG_CRASH_DUMP) += crash_dump.o | 62 | obj-$(CONFIG_CRASH_DUMP) += crash_dump.o |
63 | obj-$(CONFIG_FA_DUMP) += fadump.o | ||
63 | ifeq ($(CONFIG_PPC32),y) | 64 | ifeq ($(CONFIG_PPC32),y) |
64 | obj-$(CONFIG_E500) += idle_e500.o | 65 | obj-$(CONFIG_E500) += idle_e500.o |
65 | endif | 66 | endif |
@@ -113,15 +114,6 @@ obj-$(CONFIG_PPC_IO_WORKAROUNDS) += io-workarounds.o | |||
113 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o | 114 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o |
114 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o | 115 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o |
115 | obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o | 116 | obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o |
116 | obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o | ||
117 | |||
118 | obj-$(CONFIG_PPC_PERF_CTRS) += perf_event.o | ||
119 | obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ | ||
120 | power5+-pmu.o power6-pmu.o power7-pmu.o | ||
121 | obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o | ||
122 | |||
123 | obj-$(CONFIG_FSL_EMB_PERF_EVENT) += perf_event_fsl_emb.o | ||
124 | obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o | ||
125 | 117 | ||
126 | obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o | 118 | obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o |
127 | 119 | ||
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 04caee7d9bc1..cc492e48ddfa 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -46,9 +46,6 @@ | |||
46 | #include <asm/hvcall.h> | 46 | #include <asm/hvcall.h> |
47 | #include <asm/xics.h> | 47 | #include <asm/xics.h> |
48 | #endif | 48 | #endif |
49 | #ifdef CONFIG_PPC_ISERIES | ||
50 | #include <asm/iseries/alpaca.h> | ||
51 | #endif | ||
52 | #ifdef CONFIG_PPC_POWERNV | 49 | #ifdef CONFIG_PPC_POWERNV |
53 | #include <asm/opal.h> | 50 | #include <asm/opal.h> |
54 | #endif | 51 | #endif |
@@ -147,7 +144,7 @@ int main(void) | |||
147 | DEFINE(PACAKBASE, offsetof(struct paca_struct, kernelbase)); | 144 | DEFINE(PACAKBASE, offsetof(struct paca_struct, kernelbase)); |
148 | DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); | 145 | DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); |
149 | DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); | 146 | DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); |
150 | DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); | 147 | DEFINE(PACAIRQHAPPENED, offsetof(struct paca_struct, irq_happened)); |
151 | DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); | 148 | DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); |
152 | #ifdef CONFIG_PPC_MM_SLICES | 149 | #ifdef CONFIG_PPC_MM_SLICES |
153 | DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, | 150 | DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, |
@@ -384,17 +381,6 @@ int main(void) | |||
384 | DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); | 381 | DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); |
385 | #endif | 382 | #endif |
386 | 383 | ||
387 | #ifdef CONFIG_PPC_ISERIES | ||
388 | /* the assembler miscalculates the VSID values */ | ||
389 | DEFINE(PAGE_OFFSET_ESID, GET_ESID(PAGE_OFFSET)); | ||
390 | DEFINE(PAGE_OFFSET_VSID, KERNEL_VSID(PAGE_OFFSET)); | ||
391 | DEFINE(VMALLOC_START_ESID, GET_ESID(VMALLOC_START)); | ||
392 | DEFINE(VMALLOC_START_VSID, KERNEL_VSID(VMALLOC_START)); | ||
393 | |||
394 | /* alpaca */ | ||
395 | DEFINE(ALPACA_SIZE, sizeof(struct alpaca)); | ||
396 | #endif | ||
397 | |||
398 | DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE); | 384 | DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE); |
399 | DEFINE(PTE_SIZE, sizeof(pte_t)); | 385 | DEFINE(PTE_SIZE, sizeof(pte_t)); |
400 | 386 | ||
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 81db9e2a8a20..138ae183c440 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c | |||
@@ -1816,7 +1816,7 @@ static struct cpu_spec __initdata cpu_specs[] = { | |||
1816 | .platform = "ppc440", | 1816 | .platform = "ppc440", |
1817 | }, | 1817 | }, |
1818 | { /* 464 in APM821xx */ | 1818 | { /* 464 in APM821xx */ |
1819 | .pvr_mask = 0xffffff00, | 1819 | .pvr_mask = 0xfffffff0, |
1820 | .pvr_value = 0x12C41C80, | 1820 | .pvr_value = 0x12C41C80, |
1821 | .cpu_name = "APM821XX", | 1821 | .cpu_name = "APM821XX", |
1822 | .cpu_features = CPU_FTRS_44X, | 1822 | .cpu_features = CPU_FTRS_44X, |
@@ -2019,6 +2019,24 @@ static struct cpu_spec __initdata cpu_specs[] = { | |||
2019 | .machine_check = machine_check_e500mc, | 2019 | .machine_check = machine_check_e500mc, |
2020 | .platform = "ppce5500", | 2020 | .platform = "ppce5500", |
2021 | }, | 2021 | }, |
2022 | { /* e6500 */ | ||
2023 | .pvr_mask = 0xffff0000, | ||
2024 | .pvr_value = 0x80400000, | ||
2025 | .cpu_name = "e6500", | ||
2026 | .cpu_features = CPU_FTRS_E6500, | ||
2027 | .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, | ||
2028 | .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | | ||
2029 | MMU_FTR_USE_TLBILX, | ||
2030 | .icache_bsize = 64, | ||
2031 | .dcache_bsize = 64, | ||
2032 | .num_pmcs = 4, | ||
2033 | .oprofile_cpu_type = "ppc/e6500", | ||
2034 | .oprofile_type = PPC_OPROFILE_FSL_EMB, | ||
2035 | .cpu_setup = __setup_cpu_e5500, | ||
2036 | .cpu_restore = __restore_cpu_e5500, | ||
2037 | .machine_check = machine_check_e500mc, | ||
2038 | .platform = "ppce6500", | ||
2039 | }, | ||
2022 | #ifdef CONFIG_PPC32 | 2040 | #ifdef CONFIG_PPC32 |
2023 | { /* default match */ | 2041 | { /* default match */ |
2024 | .pvr_mask = 0x00000000, | 2042 | .pvr_mask = 0x00000000, |
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index 2cc451aaaca7..5b25c8060fd6 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c | |||
@@ -37,6 +37,8 @@ void doorbell_exception(struct pt_regs *regs) | |||
37 | 37 | ||
38 | irq_enter(); | 38 | irq_enter(); |
39 | 39 | ||
40 | may_hard_irq_enable(); | ||
41 | |||
40 | smp_ipi_demux(); | 42 | smp_ipi_demux(); |
41 | 43 | ||
42 | irq_exit(); | 44 | irq_exit(); |
diff --git a/arch/powerpc/kernel/e500-pmu.c b/arch/powerpc/kernel/e500-pmu.c deleted file mode 100644 index cb2e2949c8d1..000000000000 --- a/arch/powerpc/kernel/e500-pmu.c +++ /dev/null | |||
@@ -1,134 +0,0 @@ | |||
1 | /* | ||
2 | * Performance counter support for e500 family processors. | ||
3 | * | ||
4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
5 | * Copyright 2010 Freescale Semiconductor, Inc. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | */ | ||
12 | #include <linux/string.h> | ||
13 | #include <linux/perf_event.h> | ||
14 | #include <asm/reg.h> | ||
15 | #include <asm/cputable.h> | ||
16 | |||
17 | /* | ||
18 | * Map of generic hardware event types to hardware events | ||
19 | * Zero if unsupported | ||
20 | */ | ||
21 | static int e500_generic_events[] = { | ||
22 | [PERF_COUNT_HW_CPU_CYCLES] = 1, | ||
23 | [PERF_COUNT_HW_INSTRUCTIONS] = 2, | ||
24 | [PERF_COUNT_HW_CACHE_MISSES] = 41, /* Data L1 cache reloads */ | ||
25 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 12, | ||
26 | [PERF_COUNT_HW_BRANCH_MISSES] = 15, | ||
27 | }; | ||
28 | |||
29 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
30 | |||
31 | /* | ||
32 | * Table of generalized cache-related events. | ||
33 | * 0 means not supported, -1 means nonsensical, other values | ||
34 | * are event codes. | ||
35 | */ | ||
36 | static int e500_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
37 | /* | ||
38 | * D-cache misses are not split into read/write/prefetch; | ||
39 | * use raw event 41. | ||
40 | */ | ||
41 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
42 | [C(OP_READ)] = { 27, 0 }, | ||
43 | [C(OP_WRITE)] = { 28, 0 }, | ||
44 | [C(OP_PREFETCH)] = { 29, 0 }, | ||
45 | }, | ||
46 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
47 | [C(OP_READ)] = { 2, 60 }, | ||
48 | [C(OP_WRITE)] = { -1, -1 }, | ||
49 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
50 | }, | ||
51 | /* | ||
52 | * Assuming LL means L2, it's not a good match for this model. | ||
53 | * It allocates only on L1 castout or explicit prefetch, and | ||
54 | * does not have separate read/write events (but it does have | ||
55 | * separate instruction/data events). | ||
56 | */ | ||
57 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
58 | [C(OP_READ)] = { 0, 0 }, | ||
59 | [C(OP_WRITE)] = { 0, 0 }, | ||
60 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
61 | }, | ||
62 | /* | ||
63 | * There are data/instruction MMU misses, but that's a miss on | ||
64 | * the chip's internal level-one TLB which is probably not | ||
65 | * what the user wants. Instead, unified level-two TLB misses | ||
66 | * are reported here. | ||
67 | */ | ||
68 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
69 | [C(OP_READ)] = { 26, 66 }, | ||
70 | [C(OP_WRITE)] = { -1, -1 }, | ||
71 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
72 | }, | ||
73 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
74 | [C(OP_READ)] = { 12, 15 }, | ||
75 | [C(OP_WRITE)] = { -1, -1 }, | ||
76 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
77 | }, | ||
78 | [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
79 | [C(OP_READ)] = { -1, -1 }, | ||
80 | [C(OP_WRITE)] = { -1, -1 }, | ||
81 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
82 | }, | ||
83 | }; | ||
84 | |||
85 | static int num_events = 128; | ||
86 | |||
87 | /* Upper half of event id is PMLCb, for threshold events */ | ||
88 | static u64 e500_xlate_event(u64 event_id) | ||
89 | { | ||
90 | u32 event_low = (u32)event_id; | ||
91 | u64 ret; | ||
92 | |||
93 | if (event_low >= num_events) | ||
94 | return 0; | ||
95 | |||
96 | ret = FSL_EMB_EVENT_VALID; | ||
97 | |||
98 | if (event_low >= 76 && event_low <= 81) { | ||
99 | ret |= FSL_EMB_EVENT_RESTRICTED; | ||
100 | ret |= event_id & | ||
101 | (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH); | ||
102 | } else if (event_id & | ||
103 | (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH)) { | ||
104 | /* Threshold requested on non-threshold event */ | ||
105 | return 0; | ||
106 | } | ||
107 | |||
108 | return ret; | ||
109 | } | ||
110 | |||
111 | static struct fsl_emb_pmu e500_pmu = { | ||
112 | .name = "e500 family", | ||
113 | .n_counter = 4, | ||
114 | .n_restricted = 2, | ||
115 | .xlate_event = e500_xlate_event, | ||
116 | .n_generic = ARRAY_SIZE(e500_generic_events), | ||
117 | .generic_events = e500_generic_events, | ||
118 | .cache_events = &e500_cache_events, | ||
119 | }; | ||
120 | |||
121 | static int init_e500_pmu(void) | ||
122 | { | ||
123 | if (!cur_cpu_spec->oprofile_cpu_type) | ||
124 | return -ENODEV; | ||
125 | |||
126 | if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500mc")) | ||
127 | num_events = 256; | ||
128 | else if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500")) | ||
129 | return -ENODEV; | ||
130 | |||
131 | return register_fsl_emb_pmu(&e500_pmu); | ||
132 | } | ||
133 | |||
134 | early_initcall(init_e500_pmu); | ||
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 866462cbe2d8..f8a7a1a1a9f4 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <asm/ptrace.h> | 32 | #include <asm/ptrace.h> |
33 | #include <asm/irqflags.h> | 33 | #include <asm/irqflags.h> |
34 | #include <asm/ftrace.h> | 34 | #include <asm/ftrace.h> |
35 | #include <asm/hw_irq.h> | ||
35 | 36 | ||
36 | /* | 37 | /* |
37 | * System calls. | 38 | * System calls. |
@@ -115,39 +116,33 @@ BEGIN_FW_FTR_SECTION | |||
115 | END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) | 116 | END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) |
116 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */ | 117 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */ |
117 | 118 | ||
118 | #ifdef CONFIG_TRACE_IRQFLAGS | 119 | /* |
119 | bl .trace_hardirqs_on | 120 | * A syscall should always be called with interrupts enabled |
120 | REST_GPR(0,r1) | 121 | * so we just unconditionally hard-enable here. When some kind |
121 | REST_4GPRS(3,r1) | 122 | * of irq tracing is used, we additionally check that condition |
122 | REST_2GPRS(7,r1) | 123 | * is correct |
123 | addi r9,r1,STACK_FRAME_OVERHEAD | 124 | */ |
124 | ld r12,_MSR(r1) | 125 | #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG) |
125 | #endif /* CONFIG_TRACE_IRQFLAGS */ | 126 | lbz r10,PACASOFTIRQEN(r13) |
126 | li r10,1 | 127 | xori r10,r10,1 |
127 | stb r10,PACASOFTIRQEN(r13) | 128 | 1: tdnei r10,0 |
128 | stb r10,PACAHARDIRQEN(r13) | 129 | EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING |
129 | std r10,SOFTE(r1) | 130 | #endif |
130 | #ifdef CONFIG_PPC_ISERIES | ||
131 | BEGIN_FW_FTR_SECTION | ||
132 | /* Hack for handling interrupts when soft-enabling on iSeries */ | ||
133 | cmpdi cr1,r0,0x5555 /* syscall 0x5555 */ | ||
134 | andi. r10,r12,MSR_PR /* from kernel */ | ||
135 | crand 4*cr0+eq,4*cr1+eq,4*cr0+eq | ||
136 | bne 2f | ||
137 | b hardware_interrupt_entry | ||
138 | 2: | ||
139 | END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) | ||
140 | #endif /* CONFIG_PPC_ISERIES */ | ||
141 | 131 | ||
142 | /* Hard enable interrupts */ | ||
143 | #ifdef CONFIG_PPC_BOOK3E | 132 | #ifdef CONFIG_PPC_BOOK3E |
144 | wrteei 1 | 133 | wrteei 1 |
145 | #else | 134 | #else |
146 | mfmsr r11 | 135 | ld r11,PACAKMSR(r13) |
147 | ori r11,r11,MSR_EE | 136 | ori r11,r11,MSR_EE |
148 | mtmsrd r11,1 | 137 | mtmsrd r11,1 |
149 | #endif /* CONFIG_PPC_BOOK3E */ | 138 | #endif /* CONFIG_PPC_BOOK3E */ |
150 | 139 | ||
140 | /* We do need to set SOFTE in the stack frame or the return | ||
141 | * from interrupt will be painful | ||
142 | */ | ||
143 | li r10,1 | ||
144 | std r10,SOFTE(r1) | ||
145 | |||
151 | #ifdef SHOW_SYSCALLS | 146 | #ifdef SHOW_SYSCALLS |
152 | bl .do_show_syscall | 147 | bl .do_show_syscall |
153 | REST_GPR(0,r1) | 148 | REST_GPR(0,r1) |
@@ -198,16 +193,14 @@ syscall_exit: | |||
198 | andi. r10,r8,MSR_RI | 193 | andi. r10,r8,MSR_RI |
199 | beq- unrecov_restore | 194 | beq- unrecov_restore |
200 | #endif | 195 | #endif |
201 | 196 | /* | |
202 | /* Disable interrupts so current_thread_info()->flags can't change, | 197 | * Disable interrupts so current_thread_info()->flags can't change, |
203 | * and so that we don't get interrupted after loading SRR0/1. | 198 | * and so that we don't get interrupted after loading SRR0/1. |
204 | */ | 199 | */ |
205 | #ifdef CONFIG_PPC_BOOK3E | 200 | #ifdef CONFIG_PPC_BOOK3E |
206 | wrteei 0 | 201 | wrteei 0 |
207 | #else | 202 | #else |
208 | mfmsr r10 | 203 | ld r10,PACAKMSR(r13) |
209 | rldicl r10,r10,48,1 | ||
210 | rotldi r10,r10,16 | ||
211 | mtmsrd r10,1 | 204 | mtmsrd r10,1 |
212 | #endif /* CONFIG_PPC_BOOK3E */ | 205 | #endif /* CONFIG_PPC_BOOK3E */ |
213 | 206 | ||
@@ -319,7 +312,7 @@ syscall_exit_work: | |||
319 | #ifdef CONFIG_PPC_BOOK3E | 312 | #ifdef CONFIG_PPC_BOOK3E |
320 | wrteei 1 | 313 | wrteei 1 |
321 | #else | 314 | #else |
322 | mfmsr r10 | 315 | ld r10,PACAKMSR(r13) |
323 | ori r10,r10,MSR_EE | 316 | ori r10,r10,MSR_EE |
324 | mtmsrd r10,1 | 317 | mtmsrd r10,1 |
325 | #endif /* CONFIG_PPC_BOOK3E */ | 318 | #endif /* CONFIG_PPC_BOOK3E */ |
@@ -565,10 +558,8 @@ _GLOBAL(ret_from_except_lite) | |||
565 | #ifdef CONFIG_PPC_BOOK3E | 558 | #ifdef CONFIG_PPC_BOOK3E |
566 | wrteei 0 | 559 | wrteei 0 |
567 | #else | 560 | #else |
568 | mfmsr r10 /* Get current interrupt state */ | 561 | ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */ |
569 | rldicl r9,r10,48,1 /* clear MSR_EE */ | 562 | mtmsrd r10,1 /* Update machine state */ |
570 | rotldi r9,r9,16 | ||
571 | mtmsrd r9,1 /* Update machine state */ | ||
572 | #endif /* CONFIG_PPC_BOOK3E */ | 563 | #endif /* CONFIG_PPC_BOOK3E */ |
573 | 564 | ||
574 | #ifdef CONFIG_PREEMPT | 565 | #ifdef CONFIG_PREEMPT |
@@ -591,25 +582,74 @@ _GLOBAL(ret_from_except_lite) | |||
591 | ld r4,TI_FLAGS(r9) | 582 | ld r4,TI_FLAGS(r9) |
592 | andi. r0,r4,_TIF_USER_WORK_MASK | 583 | andi. r0,r4,_TIF_USER_WORK_MASK |
593 | bne do_work | 584 | bne do_work |
594 | #endif | 585 | #endif /* !CONFIG_PREEMPT */ |
595 | 586 | ||
587 | .globl fast_exc_return_irq | ||
588 | fast_exc_return_irq: | ||
596 | restore: | 589 | restore: |
597 | BEGIN_FW_FTR_SECTION | 590 | /* |
591 | * This is the main kernel exit path, we first check if we | ||
592 | * have to change our interrupt state. | ||
593 | */ | ||
598 | ld r5,SOFTE(r1) | 594 | ld r5,SOFTE(r1) |
599 | FW_FTR_SECTION_ELSE | 595 | lbz r6,PACASOFTIRQEN(r13) |
600 | b .Liseries_check_pending_irqs | 596 | cmpwi cr1,r5,0 |
601 | ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) | 597 | cmpw cr0,r5,r6 |
602 | 2: | 598 | beq cr0,4f |
603 | TRACE_AND_RESTORE_IRQ(r5); | 599 | |
600 | /* We do, handle disable first, which is easy */ | ||
601 | bne cr1,3f; | ||
602 | li r0,0 | ||
603 | stb r0,PACASOFTIRQEN(r13); | ||
604 | TRACE_DISABLE_INTS | ||
605 | b 4f | ||
604 | 606 | ||
605 | /* extract EE bit and use it to restore paca->hard_enabled */ | 607 | 3: /* |
606 | ld r3,_MSR(r1) | 608 | * We are about to soft-enable interrupts (we are hard disabled |
607 | rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ | 609 | * at this point). We check if there's anything that needs to |
608 | stb r4,PACAHARDIRQEN(r13) | 610 | * be replayed first. |
611 | */ | ||
612 | lbz r0,PACAIRQHAPPENED(r13) | ||
613 | cmpwi cr0,r0,0 | ||
614 | bne- restore_check_irq_replay | ||
609 | 615 | ||
616 | /* | ||
617 | * Get here when nothing happened while soft-disabled, just | ||
618 | * soft-enable and move-on. We will hard-enable as a side | ||
619 | * effect of rfi | ||
620 | */ | ||
621 | restore_no_replay: | ||
622 | TRACE_ENABLE_INTS | ||
623 | li r0,1 | ||
624 | stb r0,PACASOFTIRQEN(r13); | ||
625 | |||
626 | /* | ||
627 | * Final return path. BookE is handled in a different file | ||
628 | */ | ||
629 | 4: | ||
610 | #ifdef CONFIG_PPC_BOOK3E | 630 | #ifdef CONFIG_PPC_BOOK3E |
611 | b .exception_return_book3e | 631 | b .exception_return_book3e |
612 | #else | 632 | #else |
633 | /* | ||
634 | * Clear the reservation. If we know the CPU tracks the address of | ||
635 | * the reservation then we can potentially save some cycles and use | ||
636 | * a larx. On POWER6 and POWER7 this is significantly faster. | ||
637 | */ | ||
638 | BEGIN_FTR_SECTION | ||
639 | stdcx. r0,0,r1 /* to clear the reservation */ | ||
640 | FTR_SECTION_ELSE | ||
641 | ldarx r4,0,r1 | ||
642 | ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) | ||
643 | |||
644 | /* | ||
645 | * Some code path such as load_up_fpu or altivec return directly | ||
646 | * here. They run entirely hard disabled and do not alter the | ||
647 | * interrupt state. They also don't use lwarx/stwcx. and thus | ||
648 | * are known not to leave dangling reservations. | ||
649 | */ | ||
650 | .globl fast_exception_return | ||
651 | fast_exception_return: | ||
652 | ld r3,_MSR(r1) | ||
613 | ld r4,_CTR(r1) | 653 | ld r4,_CTR(r1) |
614 | ld r0,_LINK(r1) | 654 | ld r0,_LINK(r1) |
615 | mtctr r4 | 655 | mtctr r4 |
@@ -623,28 +663,18 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) | |||
623 | beq- unrecov_restore | 663 | beq- unrecov_restore |
624 | 664 | ||
625 | /* | 665 | /* |
626 | * Clear the reservation. If we know the CPU tracks the address of | ||
627 | * the reservation then we can potentially save some cycles and use | ||
628 | * a larx. On POWER6 and POWER7 this is significantly faster. | ||
629 | */ | ||
630 | BEGIN_FTR_SECTION | ||
631 | stdcx. r0,0,r1 /* to clear the reservation */ | ||
632 | FTR_SECTION_ELSE | ||
633 | ldarx r4,0,r1 | ||
634 | ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) | ||
635 | |||
636 | /* | ||
637 | * Clear RI before restoring r13. If we are returning to | 666 | * Clear RI before restoring r13. If we are returning to |
638 | * userspace and we take an exception after restoring r13, | 667 | * userspace and we take an exception after restoring r13, |
639 | * we end up corrupting the userspace r13 value. | 668 | * we end up corrupting the userspace r13 value. |
640 | */ | 669 | */ |
641 | mfmsr r4 | 670 | ld r4,PACAKMSR(r13) /* Get kernel MSR without EE */ |
642 | andc r4,r4,r0 /* r0 contains MSR_RI here */ | 671 | andc r4,r4,r0 /* r0 contains MSR_RI here */ |
643 | mtmsrd r4,1 | 672 | mtmsrd r4,1 |
644 | 673 | ||
645 | /* | 674 | /* |
646 | * r13 is our per cpu area, only restore it if we are returning to | 675 | * r13 is our per cpu area, only restore it if we are returning to |
647 | * userspace | 676 | * userspace the value stored in the stack frame may belong to |
677 | * another CPU. | ||
648 | */ | 678 | */ |
649 | andi. r0,r3,MSR_PR | 679 | andi. r0,r3,MSR_PR |
650 | beq 1f | 680 | beq 1f |
@@ -669,30 +699,55 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) | |||
669 | 699 | ||
670 | #endif /* CONFIG_PPC_BOOK3E */ | 700 | #endif /* CONFIG_PPC_BOOK3E */ |
671 | 701 | ||
672 | .Liseries_check_pending_irqs: | 702 | /* |
673 | #ifdef CONFIG_PPC_ISERIES | 703 | * Something did happen, check if a re-emit is needed |
674 | ld r5,SOFTE(r1) | 704 | * (this also clears paca->irq_happened) |
675 | cmpdi 0,r5,0 | 705 | */ |
676 | beq 2b | 706 | restore_check_irq_replay: |
677 | /* Check for pending interrupts (iSeries) */ | 707 | /* XXX: We could implement a fast path here where we check |
678 | ld r3,PACALPPACAPTR(r13) | 708 | * for irq_happened being just 0x01, in which case we can |
679 | ld r3,LPPACAANYINT(r3) | 709 | * clear it and return. That means that we would potentially |
680 | cmpdi r3,0 | 710 | * miss a decrementer having wrapped all the way around. |
681 | beq+ 2b /* skip do_IRQ if no interrupts */ | 711 | * |
682 | 712 | * Still, this might be useful for things like hash_page | |
683 | li r3,0 | 713 | */ |
684 | stb r3,PACASOFTIRQEN(r13) /* ensure we are soft-disabled */ | 714 | bl .__check_irq_replay |
685 | #ifdef CONFIG_TRACE_IRQFLAGS | 715 | cmpwi cr0,r3,0 |
686 | bl .trace_hardirqs_off | 716 | beq restore_no_replay |
687 | mfmsr r10 | 717 | |
688 | #endif | 718 | /* |
689 | ori r10,r10,MSR_EE | 719 | * We need to re-emit an interrupt. We do so by re-using our |
690 | mtmsrd r10 /* hard-enable again */ | 720 | * existing exception frame. We first change the trap value, |
691 | addi r3,r1,STACK_FRAME_OVERHEAD | 721 | * but we need to ensure we preserve the low nibble of it |
692 | bl .do_IRQ | 722 | */ |
693 | b .ret_from_except_lite /* loop back and handle more */ | 723 | ld r4,_TRAP(r1) |
694 | #endif | 724 | clrldi r4,r4,60 |
725 | or r4,r4,r3 | ||
726 | std r4,_TRAP(r1) | ||
695 | 727 | ||
728 | /* | ||
729 | * Then find the right handler and call it. Interrupts are | ||
730 | * still soft-disabled and we keep them that way. | ||
731 | */ | ||
732 | cmpwi cr0,r3,0x500 | ||
733 | bne 1f | ||
734 | addi r3,r1,STACK_FRAME_OVERHEAD; | ||
735 | bl .do_IRQ | ||
736 | b .ret_from_except | ||
737 | 1: cmpwi cr0,r3,0x900 | ||
738 | bne 1f | ||
739 | addi r3,r1,STACK_FRAME_OVERHEAD; | ||
740 | bl .timer_interrupt | ||
741 | b .ret_from_except | ||
742 | #ifdef CONFIG_PPC_BOOK3E | ||
743 | 1: cmpwi cr0,r3,0x280 | ||
744 | bne 1f | ||
745 | addi r3,r1,STACK_FRAME_OVERHEAD; | ||
746 | bl .doorbell_exception | ||
747 | b .ret_from_except | ||
748 | #endif /* CONFIG_PPC_BOOK3E */ | ||
749 | 1: b .ret_from_except /* What else to do here ? */ | ||
750 | |||
696 | do_work: | 751 | do_work: |
697 | #ifdef CONFIG_PREEMPT | 752 | #ifdef CONFIG_PREEMPT |
698 | andi. r0,r3,MSR_PR /* Returning to user mode? */ | 753 | andi. r0,r3,MSR_PR /* Returning to user mode? */ |
@@ -705,31 +760,22 @@ do_work: | |||
705 | crandc eq,cr1*4+eq,eq | 760 | crandc eq,cr1*4+eq,eq |
706 | bne restore | 761 | bne restore |
707 | 762 | ||
708 | /* Here we are preempting the current task. | 763 | /* |
709 | * | 764 | * Here we are preempting the current task. We want to make |
710 | * Ensure interrupts are soft-disabled. We also properly mark | 765 | * sure we are soft-disabled first |
711 | * the PACA to reflect the fact that they are hard-disabled | ||
712 | * and trace the change | ||
713 | */ | 766 | */ |
714 | li r0,0 | 767 | SOFT_DISABLE_INTS(r3,r4) |
715 | stb r0,PACASOFTIRQEN(r13) | ||
716 | stb r0,PACAHARDIRQEN(r13) | ||
717 | TRACE_DISABLE_INTS | ||
718 | |||
719 | /* Call the scheduler with soft IRQs off */ | ||
720 | 1: bl .preempt_schedule_irq | 768 | 1: bl .preempt_schedule_irq |
721 | 769 | ||
722 | /* Hard-disable interrupts again (and update PACA) */ | 770 | /* Hard-disable interrupts again (and update PACA) */ |
723 | #ifdef CONFIG_PPC_BOOK3E | 771 | #ifdef CONFIG_PPC_BOOK3E |
724 | wrteei 0 | 772 | wrteei 0 |
725 | #else | 773 | #else |
726 | mfmsr r10 | 774 | ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */ |
727 | rldicl r10,r10,48,1 | ||
728 | rotldi r10,r10,16 | ||
729 | mtmsrd r10,1 | 775 | mtmsrd r10,1 |
730 | #endif /* CONFIG_PPC_BOOK3E */ | 776 | #endif /* CONFIG_PPC_BOOK3E */ |
731 | li r0,0 | 777 | li r0,PACA_IRQ_HARD_DIS |
732 | stb r0,PACAHARDIRQEN(r13) | 778 | stb r0,PACAIRQHAPPENED(r13) |
733 | 779 | ||
734 | /* Re-test flags and eventually loop */ | 780 | /* Re-test flags and eventually loop */ |
735 | clrrdi r9,r1,THREAD_SHIFT | 781 | clrrdi r9,r1,THREAD_SHIFT |
@@ -751,14 +797,12 @@ user_work: | |||
751 | 797 | ||
752 | andi. r0,r4,_TIF_NEED_RESCHED | 798 | andi. r0,r4,_TIF_NEED_RESCHED |
753 | beq 1f | 799 | beq 1f |
754 | li r5,1 | 800 | bl .restore_interrupts |
755 | TRACE_AND_RESTORE_IRQ(r5); | ||
756 | bl .schedule | 801 | bl .schedule |
757 | b .ret_from_except_lite | 802 | b .ret_from_except_lite |
758 | 803 | ||
759 | 1: bl .save_nvgprs | 804 | 1: bl .save_nvgprs |
760 | li r5,1 | 805 | bl .restore_interrupts |
761 | TRACE_AND_RESTORE_IRQ(r5); | ||
762 | addi r3,r1,STACK_FRAME_OVERHEAD | 806 | addi r3,r1,STACK_FRAME_OVERHEAD |
763 | bl .do_notify_resume | 807 | bl .do_notify_resume |
764 | b .ret_from_except | 808 | b .ret_from_except |
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 429983c06f91..7215cc2495df 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <asm/ptrace.h> | 24 | #include <asm/ptrace.h> |
25 | #include <asm/ppc-opcode.h> | 25 | #include <asm/ppc-opcode.h> |
26 | #include <asm/mmu.h> | 26 | #include <asm/mmu.h> |
27 | #include <asm/hw_irq.h> | ||
27 | 28 | ||
28 | /* XXX This will ultimately add space for a special exception save | 29 | /* XXX This will ultimately add space for a special exception save |
29 | * structure used to save things like SRR0/SRR1, SPRGs, MAS, etc... | 30 | * structure used to save things like SRR0/SRR1, SPRGs, MAS, etc... |
@@ -77,59 +78,55 @@ | |||
77 | #define SPRN_MC_SRR1 SPRN_MCSRR1 | 78 | #define SPRN_MC_SRR1 SPRN_MCSRR1 |
78 | 79 | ||
79 | #define NORMAL_EXCEPTION_PROLOG(n, addition) \ | 80 | #define NORMAL_EXCEPTION_PROLOG(n, addition) \ |
80 | EXCEPTION_PROLOG(n, GEN, addition##_GEN) | 81 | EXCEPTION_PROLOG(n, GEN, addition##_GEN(n)) |
81 | 82 | ||
82 | #define CRIT_EXCEPTION_PROLOG(n, addition) \ | 83 | #define CRIT_EXCEPTION_PROLOG(n, addition) \ |
83 | EXCEPTION_PROLOG(n, CRIT, addition##_CRIT) | 84 | EXCEPTION_PROLOG(n, CRIT, addition##_CRIT(n)) |
84 | 85 | ||
85 | #define DBG_EXCEPTION_PROLOG(n, addition) \ | 86 | #define DBG_EXCEPTION_PROLOG(n, addition) \ |
86 | EXCEPTION_PROLOG(n, DBG, addition##_DBG) | 87 | EXCEPTION_PROLOG(n, DBG, addition##_DBG(n)) |
87 | 88 | ||
88 | #define MC_EXCEPTION_PROLOG(n, addition) \ | 89 | #define MC_EXCEPTION_PROLOG(n, addition) \ |
89 | EXCEPTION_PROLOG(n, MC, addition##_MC) | 90 | EXCEPTION_PROLOG(n, MC, addition##_MC(n)) |
90 | 91 | ||
91 | 92 | ||
92 | /* Variants of the "addition" argument for the prolog | 93 | /* Variants of the "addition" argument for the prolog |
93 | */ | 94 | */ |
94 | #define PROLOG_ADDITION_NONE_GEN | 95 | #define PROLOG_ADDITION_NONE_GEN(n) |
95 | #define PROLOG_ADDITION_NONE_CRIT | 96 | #define PROLOG_ADDITION_NONE_CRIT(n) |
96 | #define PROLOG_ADDITION_NONE_DBG | 97 | #define PROLOG_ADDITION_NONE_DBG(n) |
97 | #define PROLOG_ADDITION_NONE_MC | 98 | #define PROLOG_ADDITION_NONE_MC(n) |
98 | 99 | ||
99 | #define PROLOG_ADDITION_MASKABLE_GEN \ | 100 | #define PROLOG_ADDITION_MASKABLE_GEN(n) \ |
100 | lbz r11,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \ | 101 | lbz r11,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \ |
101 | cmpwi cr0,r11,0; /* yes -> go out of line */ \ | 102 | cmpwi cr0,r11,0; /* yes -> go out of line */ \ |
102 | beq masked_interrupt_book3e; | 103 | beq masked_interrupt_book3e_##n |
103 | 104 | ||
104 | #define PROLOG_ADDITION_2REGS_GEN \ | 105 | #define PROLOG_ADDITION_2REGS_GEN(n) \ |
105 | std r14,PACA_EXGEN+EX_R14(r13); \ | 106 | std r14,PACA_EXGEN+EX_R14(r13); \ |
106 | std r15,PACA_EXGEN+EX_R15(r13) | 107 | std r15,PACA_EXGEN+EX_R15(r13) |
107 | 108 | ||
108 | #define PROLOG_ADDITION_1REG_GEN \ | 109 | #define PROLOG_ADDITION_1REG_GEN(n) \ |
109 | std r14,PACA_EXGEN+EX_R14(r13); | 110 | std r14,PACA_EXGEN+EX_R14(r13); |
110 | 111 | ||
111 | #define PROLOG_ADDITION_2REGS_CRIT \ | 112 | #define PROLOG_ADDITION_2REGS_CRIT(n) \ |
112 | std r14,PACA_EXCRIT+EX_R14(r13); \ | 113 | std r14,PACA_EXCRIT+EX_R14(r13); \ |
113 | std r15,PACA_EXCRIT+EX_R15(r13) | 114 | std r15,PACA_EXCRIT+EX_R15(r13) |
114 | 115 | ||
115 | #define PROLOG_ADDITION_2REGS_DBG \ | 116 | #define PROLOG_ADDITION_2REGS_DBG(n) \ |
116 | std r14,PACA_EXDBG+EX_R14(r13); \ | 117 | std r14,PACA_EXDBG+EX_R14(r13); \ |
117 | std r15,PACA_EXDBG+EX_R15(r13) | 118 | std r15,PACA_EXDBG+EX_R15(r13) |
118 | 119 | ||
119 | #define PROLOG_ADDITION_2REGS_MC \ | 120 | #define PROLOG_ADDITION_2REGS_MC(n) \ |
120 | std r14,PACA_EXMC+EX_R14(r13); \ | 121 | std r14,PACA_EXMC+EX_R14(r13); \ |
121 | std r15,PACA_EXMC+EX_R15(r13) | 122 | std r15,PACA_EXMC+EX_R15(r13) |
122 | 123 | ||
123 | #define PROLOG_ADDITION_DOORBELL_GEN \ | ||
124 | lbz r11,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \ | ||
125 | cmpwi cr0,r11,0; /* yes -> go out of line */ \ | ||
126 | beq masked_doorbell_book3e | ||
127 | |||
128 | 124 | ||
129 | /* Core exception code for all exceptions except TLB misses. | 125 | /* Core exception code for all exceptions except TLB misses. |
130 | * XXX: Needs to make SPRN_SPRG_GEN depend on exception type | 126 | * XXX: Needs to make SPRN_SPRG_GEN depend on exception type |
131 | */ | 127 | */ |
132 | #define EXCEPTION_COMMON(n, excf, ints) \ | 128 | #define EXCEPTION_COMMON(n, excf, ints) \ |
129 | exc_##n##_common: \ | ||
133 | std r0,GPR0(r1); /* save r0 in stackframe */ \ | 130 | std r0,GPR0(r1); /* save r0 in stackframe */ \ |
134 | std r2,GPR2(r1); /* save r2 in stackframe */ \ | 131 | std r2,GPR2(r1); /* save r2 in stackframe */ \ |
135 | SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ | 132 | SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ |
@@ -167,20 +164,21 @@ | |||
167 | std r0,RESULT(r1); /* clear regs->result */ \ | 164 | std r0,RESULT(r1); /* clear regs->result */ \ |
168 | ints; | 165 | ints; |
169 | 166 | ||
170 | /* Variants for the "ints" argument */ | 167 | /* Variants for the "ints" argument. This one does nothing when we want |
168 | * to keep interrupts in their original state | ||
169 | */ | ||
171 | #define INTS_KEEP | 170 | #define INTS_KEEP |
172 | #define INTS_DISABLE_SOFT \ | 171 | |
173 | stb r0,PACASOFTIRQEN(r13); /* mark interrupts soft-disabled */ \ | 172 | /* This second version is meant for exceptions that don't immediately |
174 | TRACE_DISABLE_INTS; | 173 | * hard-enable. We set a bit in paca->irq_happened to ensure that |
175 | #define INTS_DISABLE_HARD \ | 174 | * a subsequent call to arch_local_irq_restore() will properly |
176 | stb r0,PACAHARDIRQEN(r13); /* and hard disabled */ | 175 | * hard-enable and avoid the fast-path |
177 | #define INTS_DISABLE_ALL \ | 176 | */ |
178 | INTS_DISABLE_SOFT \ | 177 | #define INTS_DISABLE SOFT_DISABLE_INTS(r3,r4) |
179 | INTS_DISABLE_HARD | 178 | |
180 | 179 | /* This is called by exceptions that used INTS_KEEP (that did not touch | |
181 | /* This is called by exceptions that used INTS_KEEP (that is did not clear | 180 | * irq indicators in the PACA). This will restore MSR:EE to it's previous |
182 | * neither soft nor hard IRQ indicators in the PACA. This will restore MSR:EE | 181 | * value |
183 | * to it's previous value | ||
184 | * | 182 | * |
185 | * XXX In the long run, we may want to open-code it in order to separate the | 183 | * XXX In the long run, we may want to open-code it in order to separate the |
186 | * load from the wrtee, thus limiting the latency caused by the dependency | 184 | * load from the wrtee, thus limiting the latency caused by the dependency |
@@ -238,7 +236,7 @@ exc_##n##_bad_stack: \ | |||
238 | #define MASKABLE_EXCEPTION(trapnum, label, hdlr, ack) \ | 236 | #define MASKABLE_EXCEPTION(trapnum, label, hdlr, ack) \ |
239 | START_EXCEPTION(label); \ | 237 | START_EXCEPTION(label); \ |
240 | NORMAL_EXCEPTION_PROLOG(trapnum, PROLOG_ADDITION_MASKABLE) \ | 238 | NORMAL_EXCEPTION_PROLOG(trapnum, PROLOG_ADDITION_MASKABLE) \ |
241 | EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE_ALL) \ | 239 | EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE) \ |
242 | ack(r8); \ | 240 | ack(r8); \ |
243 | CHECK_NAPPING(); \ | 241 | CHECK_NAPPING(); \ |
244 | addi r3,r1,STACK_FRAME_OVERHEAD; \ | 242 | addi r3,r1,STACK_FRAME_OVERHEAD; \ |
@@ -289,7 +287,7 @@ interrupt_end_book3e: | |||
289 | /* Critical Input Interrupt */ | 287 | /* Critical Input Interrupt */ |
290 | START_EXCEPTION(critical_input); | 288 | START_EXCEPTION(critical_input); |
291 | CRIT_EXCEPTION_PROLOG(0x100, PROLOG_ADDITION_NONE) | 289 | CRIT_EXCEPTION_PROLOG(0x100, PROLOG_ADDITION_NONE) |
292 | // EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE_ALL) | 290 | // EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE) |
293 | // bl special_reg_save_crit | 291 | // bl special_reg_save_crit |
294 | // CHECK_NAPPING(); | 292 | // CHECK_NAPPING(); |
295 | // addi r3,r1,STACK_FRAME_OVERHEAD | 293 | // addi r3,r1,STACK_FRAME_OVERHEAD |
@@ -300,7 +298,7 @@ interrupt_end_book3e: | |||
300 | /* Machine Check Interrupt */ | 298 | /* Machine Check Interrupt */ |
301 | START_EXCEPTION(machine_check); | 299 | START_EXCEPTION(machine_check); |
302 | CRIT_EXCEPTION_PROLOG(0x200, PROLOG_ADDITION_NONE) | 300 | CRIT_EXCEPTION_PROLOG(0x200, PROLOG_ADDITION_NONE) |
303 | // EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE_ALL) | 301 | // EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE) |
304 | // bl special_reg_save_mc | 302 | // bl special_reg_save_mc |
305 | // addi r3,r1,STACK_FRAME_OVERHEAD | 303 | // addi r3,r1,STACK_FRAME_OVERHEAD |
306 | // CHECK_NAPPING(); | 304 | // CHECK_NAPPING(); |
@@ -313,7 +311,7 @@ interrupt_end_book3e: | |||
313 | NORMAL_EXCEPTION_PROLOG(0x300, PROLOG_ADDITION_2REGS) | 311 | NORMAL_EXCEPTION_PROLOG(0x300, PROLOG_ADDITION_2REGS) |
314 | mfspr r14,SPRN_DEAR | 312 | mfspr r14,SPRN_DEAR |
315 | mfspr r15,SPRN_ESR | 313 | mfspr r15,SPRN_ESR |
316 | EXCEPTION_COMMON(0x300, PACA_EXGEN, INTS_KEEP) | 314 | EXCEPTION_COMMON(0x300, PACA_EXGEN, INTS_DISABLE) |
317 | b storage_fault_common | 315 | b storage_fault_common |
318 | 316 | ||
319 | /* Instruction Storage Interrupt */ | 317 | /* Instruction Storage Interrupt */ |
@@ -321,7 +319,7 @@ interrupt_end_book3e: | |||
321 | NORMAL_EXCEPTION_PROLOG(0x400, PROLOG_ADDITION_2REGS) | 319 | NORMAL_EXCEPTION_PROLOG(0x400, PROLOG_ADDITION_2REGS) |
322 | li r15,0 | 320 | li r15,0 |
323 | mr r14,r10 | 321 | mr r14,r10 |
324 | EXCEPTION_COMMON(0x400, PACA_EXGEN, INTS_KEEP) | 322 | EXCEPTION_COMMON(0x400, PACA_EXGEN, INTS_DISABLE) |
325 | b storage_fault_common | 323 | b storage_fault_common |
326 | 324 | ||
327 | /* External Input Interrupt */ | 325 | /* External Input Interrupt */ |
@@ -339,12 +337,11 @@ interrupt_end_book3e: | |||
339 | START_EXCEPTION(program); | 337 | START_EXCEPTION(program); |
340 | NORMAL_EXCEPTION_PROLOG(0x700, PROLOG_ADDITION_1REG) | 338 | NORMAL_EXCEPTION_PROLOG(0x700, PROLOG_ADDITION_1REG) |
341 | mfspr r14,SPRN_ESR | 339 | mfspr r14,SPRN_ESR |
342 | EXCEPTION_COMMON(0x700, PACA_EXGEN, INTS_DISABLE_SOFT) | 340 | EXCEPTION_COMMON(0x700, PACA_EXGEN, INTS_DISABLE) |
343 | std r14,_DSISR(r1) | 341 | std r14,_DSISR(r1) |
344 | addi r3,r1,STACK_FRAME_OVERHEAD | 342 | addi r3,r1,STACK_FRAME_OVERHEAD |
345 | ld r14,PACA_EXGEN+EX_R14(r13) | 343 | ld r14,PACA_EXGEN+EX_R14(r13) |
346 | bl .save_nvgprs | 344 | bl .save_nvgprs |
347 | INTS_RESTORE_HARD | ||
348 | bl .program_check_exception | 345 | bl .program_check_exception |
349 | b .ret_from_except | 346 | b .ret_from_except |
350 | 347 | ||
@@ -353,15 +350,16 @@ interrupt_end_book3e: | |||
353 | NORMAL_EXCEPTION_PROLOG(0x800, PROLOG_ADDITION_NONE) | 350 | NORMAL_EXCEPTION_PROLOG(0x800, PROLOG_ADDITION_NONE) |
354 | /* we can probably do a shorter exception entry for that one... */ | 351 | /* we can probably do a shorter exception entry for that one... */ |
355 | EXCEPTION_COMMON(0x800, PACA_EXGEN, INTS_KEEP) | 352 | EXCEPTION_COMMON(0x800, PACA_EXGEN, INTS_KEEP) |
356 | bne 1f /* if from user, just load it up */ | 353 | ld r12,_MSR(r1) |
354 | andi. r0,r12,MSR_PR; | ||
355 | beq- 1f | ||
356 | bl .load_up_fpu | ||
357 | b fast_exception_return | ||
358 | 1: INTS_DISABLE | ||
357 | bl .save_nvgprs | 359 | bl .save_nvgprs |
358 | addi r3,r1,STACK_FRAME_OVERHEAD | 360 | addi r3,r1,STACK_FRAME_OVERHEAD |
359 | INTS_RESTORE_HARD | ||
360 | bl .kernel_fp_unavailable_exception | 361 | bl .kernel_fp_unavailable_exception |
361 | BUG_OPCODE | 362 | b .ret_from_except |
362 | 1: ld r12,_MSR(r1) | ||
363 | bl .load_up_fpu | ||
364 | b fast_exception_return | ||
365 | 363 | ||
366 | /* Decrementer Interrupt */ | 364 | /* Decrementer Interrupt */ |
367 | MASKABLE_EXCEPTION(0x900, decrementer, .timer_interrupt, ACK_DEC) | 365 | MASKABLE_EXCEPTION(0x900, decrementer, .timer_interrupt, ACK_DEC) |
@@ -372,7 +370,7 @@ interrupt_end_book3e: | |||
372 | /* Watchdog Timer Interrupt */ | 370 | /* Watchdog Timer Interrupt */ |
373 | START_EXCEPTION(watchdog); | 371 | START_EXCEPTION(watchdog); |
374 | CRIT_EXCEPTION_PROLOG(0x9f0, PROLOG_ADDITION_NONE) | 372 | CRIT_EXCEPTION_PROLOG(0x9f0, PROLOG_ADDITION_NONE) |
375 | // EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE_ALL) | 373 | // EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE) |
376 | // bl special_reg_save_crit | 374 | // bl special_reg_save_crit |
377 | // CHECK_NAPPING(); | 375 | // CHECK_NAPPING(); |
378 | // addi r3,r1,STACK_FRAME_OVERHEAD | 376 | // addi r3,r1,STACK_FRAME_OVERHEAD |
@@ -391,10 +389,9 @@ interrupt_end_book3e: | |||
391 | /* Auxiliary Processor Unavailable Interrupt */ | 389 | /* Auxiliary Processor Unavailable Interrupt */ |
392 | START_EXCEPTION(ap_unavailable); | 390 | START_EXCEPTION(ap_unavailable); |
393 | NORMAL_EXCEPTION_PROLOG(0xf20, PROLOG_ADDITION_NONE) | 391 | NORMAL_EXCEPTION_PROLOG(0xf20, PROLOG_ADDITION_NONE) |
394 | EXCEPTION_COMMON(0xf20, PACA_EXGEN, INTS_KEEP) | 392 | EXCEPTION_COMMON(0xf20, PACA_EXGEN, INTS_DISABLE) |
395 | addi r3,r1,STACK_FRAME_OVERHEAD | ||
396 | bl .save_nvgprs | 393 | bl .save_nvgprs |
397 | INTS_RESTORE_HARD | 394 | addi r3,r1,STACK_FRAME_OVERHEAD |
398 | bl .unknown_exception | 395 | bl .unknown_exception |
399 | b .ret_from_except | 396 | b .ret_from_except |
400 | 397 | ||
@@ -450,7 +447,7 @@ interrupt_end_book3e: | |||
450 | mfspr r15,SPRN_SPRG_CRIT_SCRATCH | 447 | mfspr r15,SPRN_SPRG_CRIT_SCRATCH |
451 | mtspr SPRN_SPRG_GEN_SCRATCH,r15 | 448 | mtspr SPRN_SPRG_GEN_SCRATCH,r15 |
452 | mfspr r14,SPRN_DBSR | 449 | mfspr r14,SPRN_DBSR |
453 | EXCEPTION_COMMON(0xd00, PACA_EXCRIT, INTS_DISABLE_ALL) | 450 | EXCEPTION_COMMON(0xd00, PACA_EXCRIT, INTS_DISABLE) |
454 | std r14,_DSISR(r1) | 451 | std r14,_DSISR(r1) |
455 | addi r3,r1,STACK_FRAME_OVERHEAD | 452 | addi r3,r1,STACK_FRAME_OVERHEAD |
456 | mr r4,r14 | 453 | mr r4,r14 |
@@ -465,7 +462,7 @@ kernel_dbg_exc: | |||
465 | 462 | ||
466 | /* Debug exception as a debug interrupt*/ | 463 | /* Debug exception as a debug interrupt*/ |
467 | START_EXCEPTION(debug_debug); | 464 | START_EXCEPTION(debug_debug); |
468 | DBG_EXCEPTION_PROLOG(0xd00, PROLOG_ADDITION_2REGS) | 465 | DBG_EXCEPTION_PROLOG(0xd08, PROLOG_ADDITION_2REGS) |
469 | 466 | ||
470 | /* | 467 | /* |
471 | * If there is a single step or branch-taken exception in an | 468 | * If there is a single step or branch-taken exception in an |
@@ -515,7 +512,7 @@ kernel_dbg_exc: | |||
515 | mfspr r15,SPRN_SPRG_DBG_SCRATCH | 512 | mfspr r15,SPRN_SPRG_DBG_SCRATCH |
516 | mtspr SPRN_SPRG_GEN_SCRATCH,r15 | 513 | mtspr SPRN_SPRG_GEN_SCRATCH,r15 |
517 | mfspr r14,SPRN_DBSR | 514 | mfspr r14,SPRN_DBSR |
518 | EXCEPTION_COMMON(0xd00, PACA_EXDBG, INTS_DISABLE_ALL) | 515 | EXCEPTION_COMMON(0xd08, PACA_EXDBG, INTS_DISABLE) |
519 | std r14,_DSISR(r1) | 516 | std r14,_DSISR(r1) |
520 | addi r3,r1,STACK_FRAME_OVERHEAD | 517 | addi r3,r1,STACK_FRAME_OVERHEAD |
521 | mr r4,r14 | 518 | mr r4,r14 |
@@ -525,21 +522,20 @@ kernel_dbg_exc: | |||
525 | bl .DebugException | 522 | bl .DebugException |
526 | b .ret_from_except | 523 | b .ret_from_except |
527 | 524 | ||
528 | MASKABLE_EXCEPTION(0x260, perfmon, .performance_monitor_exception, ACK_NONE) | 525 | START_EXCEPTION(perfmon); |
529 | 526 | NORMAL_EXCEPTION_PROLOG(0x260, PROLOG_ADDITION_NONE) | |
530 | /* Doorbell interrupt */ | 527 | EXCEPTION_COMMON(0x260, PACA_EXGEN, INTS_DISABLE) |
531 | START_EXCEPTION(doorbell) | ||
532 | NORMAL_EXCEPTION_PROLOG(0x2070, PROLOG_ADDITION_DOORBELL) | ||
533 | EXCEPTION_COMMON(0x2070, PACA_EXGEN, INTS_DISABLE_ALL) | ||
534 | CHECK_NAPPING() | ||
535 | addi r3,r1,STACK_FRAME_OVERHEAD | 528 | addi r3,r1,STACK_FRAME_OVERHEAD |
536 | bl .doorbell_exception | 529 | bl .performance_monitor_exception |
537 | b .ret_from_except_lite | 530 | b .ret_from_except_lite |
538 | 531 | ||
532 | /* Doorbell interrupt */ | ||
533 | MASKABLE_EXCEPTION(0x280, doorbell, .doorbell_exception, ACK_NONE) | ||
534 | |||
539 | /* Doorbell critical Interrupt */ | 535 | /* Doorbell critical Interrupt */ |
540 | START_EXCEPTION(doorbell_crit); | 536 | START_EXCEPTION(doorbell_crit); |
541 | CRIT_EXCEPTION_PROLOG(0x2080, PROLOG_ADDITION_NONE) | 537 | CRIT_EXCEPTION_PROLOG(0x2a0, PROLOG_ADDITION_NONE) |
542 | // EXCEPTION_COMMON(0x2080, PACA_EXCRIT, INTS_DISABLE_ALL) | 538 | // EXCEPTION_COMMON(0x2a0, PACA_EXCRIT, INTS_DISABLE) |
543 | // bl special_reg_save_crit | 539 | // bl special_reg_save_crit |
544 | // CHECK_NAPPING(); | 540 | // CHECK_NAPPING(); |
545 | // addi r3,r1,STACK_FRAME_OVERHEAD | 541 | // addi r3,r1,STACK_FRAME_OVERHEAD |
@@ -547,36 +543,114 @@ kernel_dbg_exc: | |||
547 | // b ret_from_crit_except | 543 | // b ret_from_crit_except |
548 | b . | 544 | b . |
549 | 545 | ||
546 | /* Guest Doorbell */ | ||
550 | MASKABLE_EXCEPTION(0x2c0, guest_doorbell, .unknown_exception, ACK_NONE) | 547 | MASKABLE_EXCEPTION(0x2c0, guest_doorbell, .unknown_exception, ACK_NONE) |
551 | MASKABLE_EXCEPTION(0x2e0, guest_doorbell_crit, .unknown_exception, ACK_NONE) | ||
552 | MASKABLE_EXCEPTION(0x310, hypercall, .unknown_exception, ACK_NONE) | ||
553 | MASKABLE_EXCEPTION(0x320, ehpriv, .unknown_exception, ACK_NONE) | ||
554 | 548 | ||
549 | /* Guest Doorbell critical Interrupt */ | ||
550 | START_EXCEPTION(guest_doorbell_crit); | ||
551 | CRIT_EXCEPTION_PROLOG(0x2e0, PROLOG_ADDITION_NONE) | ||
552 | // EXCEPTION_COMMON(0x2e0, PACA_EXCRIT, INTS_DISABLE) | ||
553 | // bl special_reg_save_crit | ||
554 | // CHECK_NAPPING(); | ||
555 | // addi r3,r1,STACK_FRAME_OVERHEAD | ||
556 | // bl .guest_doorbell_critical_exception | ||
557 | // b ret_from_crit_except | ||
558 | b . | ||
559 | |||
560 | /* Hypervisor call */ | ||
561 | START_EXCEPTION(hypercall); | ||
562 | NORMAL_EXCEPTION_PROLOG(0x310, PROLOG_ADDITION_NONE) | ||
563 | EXCEPTION_COMMON(0x310, PACA_EXGEN, INTS_KEEP) | ||
564 | addi r3,r1,STACK_FRAME_OVERHEAD | ||
565 | bl .save_nvgprs | ||
566 | INTS_RESTORE_HARD | ||
567 | bl .unknown_exception | ||
568 | b .ret_from_except | ||
569 | |||
570 | /* Embedded Hypervisor priviledged */ | ||
571 | START_EXCEPTION(ehpriv); | ||
572 | NORMAL_EXCEPTION_PROLOG(0x320, PROLOG_ADDITION_NONE) | ||
573 | EXCEPTION_COMMON(0x320, PACA_EXGEN, INTS_KEEP) | ||
574 | addi r3,r1,STACK_FRAME_OVERHEAD | ||
575 | bl .save_nvgprs | ||
576 | INTS_RESTORE_HARD | ||
577 | bl .unknown_exception | ||
578 | b .ret_from_except | ||
555 | 579 | ||
556 | /* | 580 | /* |
557 | * An interrupt came in while soft-disabled; clear EE in SRR1, | 581 | * An interrupt came in while soft-disabled; We mark paca->irq_happened |
558 | * clear paca->hard_enabled and return. | 582 | * accordingly and if the interrupt is level sensitive, we hard disable |
559 | */ | 583 | */ |
560 | masked_doorbell_book3e: | ||
561 | mtcr r10 | ||
562 | /* Resend the doorbell to fire again when ints enabled */ | ||
563 | mfspr r10,SPRN_PIR | ||
564 | PPC_MSGSND(r10) | ||
565 | b masked_interrupt_book3e_common | ||
566 | 584 | ||
567 | masked_interrupt_book3e: | 585 | masked_interrupt_book3e_0x500: |
586 | /* XXX When adding support for EPR, use PACA_IRQ_EE_EDGE */ | ||
587 | li r11,PACA_IRQ_EE | ||
588 | b masked_interrupt_book3e_full_mask | ||
589 | |||
590 | masked_interrupt_book3e_0x900: | ||
591 | ACK_DEC(r11); | ||
592 | li r11,PACA_IRQ_DEC | ||
593 | b masked_interrupt_book3e_no_mask | ||
594 | masked_interrupt_book3e_0x980: | ||
595 | ACK_FIT(r11); | ||
596 | li r11,PACA_IRQ_DEC | ||
597 | b masked_interrupt_book3e_no_mask | ||
598 | masked_interrupt_book3e_0x280: | ||
599 | masked_interrupt_book3e_0x2c0: | ||
600 | li r11,PACA_IRQ_DBELL | ||
601 | b masked_interrupt_book3e_no_mask | ||
602 | |||
603 | masked_interrupt_book3e_no_mask: | ||
604 | mtcr r10 | ||
605 | lbz r10,PACAIRQHAPPENED(r13) | ||
606 | or r10,r10,r11 | ||
607 | stb r10,PACAIRQHAPPENED(r13) | ||
608 | b 1f | ||
609 | masked_interrupt_book3e_full_mask: | ||
568 | mtcr r10 | 610 | mtcr r10 |
569 | masked_interrupt_book3e_common: | 611 | lbz r10,PACAIRQHAPPENED(r13) |
570 | stb r11,PACAHARDIRQEN(r13) | 612 | or r10,r10,r11 |
613 | stb r10,PACAIRQHAPPENED(r13) | ||
571 | mfspr r10,SPRN_SRR1 | 614 | mfspr r10,SPRN_SRR1 |
572 | rldicl r11,r10,48,1 /* clear MSR_EE */ | 615 | rldicl r11,r10,48,1 /* clear MSR_EE */ |
573 | rotldi r10,r11,16 | 616 | rotldi r10,r11,16 |
574 | mtspr SPRN_SRR1,r10 | 617 | mtspr SPRN_SRR1,r10 |
575 | ld r10,PACA_EXGEN+EX_R10(r13); /* restore registers */ | 618 | 1: ld r10,PACA_EXGEN+EX_R10(r13); |
576 | ld r11,PACA_EXGEN+EX_R11(r13); | 619 | ld r11,PACA_EXGEN+EX_R11(r13); |
577 | mfspr r13,SPRN_SPRG_GEN_SCRATCH; | 620 | mfspr r13,SPRN_SPRG_GEN_SCRATCH; |
578 | rfi | 621 | rfi |
579 | b . | 622 | b . |
623 | /* | ||
624 | * Called from arch_local_irq_enable when an interrupt needs | ||
625 | * to be resent. r3 contains either 0x500,0x900,0x260 or 0x280 | ||
626 | * to indicate the kind of interrupt. MSR:EE is already off. | ||
627 | * We generate a stackframe like if a real interrupt had happened. | ||
628 | * | ||
629 | * Note: While MSR:EE is off, we need to make sure that _MSR | ||
630 | * in the generated frame has EE set to 1 or the exception | ||
631 | * handler will not properly re-enable them. | ||
632 | */ | ||
633 | _GLOBAL(__replay_interrupt) | ||
634 | /* We are going to jump to the exception common code which | ||
635 | * will retrieve various register values from the PACA which | ||
636 | * we don't give a damn about. | ||
637 | */ | ||
638 | mflr r10 | ||
639 | mfmsr r11 | ||
640 | mfcr r4 | ||
641 | mtspr SPRN_SPRG_GEN_SCRATCH,r13; | ||
642 | std r1,PACA_EXGEN+EX_R1(r13); | ||
643 | stw r4,PACA_EXGEN+EX_CR(r13); | ||
644 | ori r11,r11,MSR_EE | ||
645 | subi r1,r1,INT_FRAME_SIZE; | ||
646 | cmpwi cr0,r3,0x500 | ||
647 | beq exc_0x500_common | ||
648 | cmpwi cr0,r3,0x900 | ||
649 | beq exc_0x900_common | ||
650 | cmpwi cr0,r3,0x280 | ||
651 | beq exc_0x280_common | ||
652 | blr | ||
653 | |||
580 | 654 | ||
581 | /* | 655 | /* |
582 | * This is called from 0x300 and 0x400 handlers after the prologs with | 656 | * This is called from 0x300 and 0x400 handlers after the prologs with |
@@ -591,7 +665,6 @@ storage_fault_common: | |||
591 | mr r5,r15 | 665 | mr r5,r15 |
592 | ld r14,PACA_EXGEN+EX_R14(r13) | 666 | ld r14,PACA_EXGEN+EX_R14(r13) |
593 | ld r15,PACA_EXGEN+EX_R15(r13) | 667 | ld r15,PACA_EXGEN+EX_R15(r13) |
594 | INTS_RESTORE_HARD | ||
595 | bl .do_page_fault | 668 | bl .do_page_fault |
596 | cmpdi r3,0 | 669 | cmpdi r3,0 |
597 | bne- 1f | 670 | bne- 1f |
@@ -680,6 +753,8 @@ BAD_STACK_TRAMPOLINE(0x000) | |||
680 | BAD_STACK_TRAMPOLINE(0x100) | 753 | BAD_STACK_TRAMPOLINE(0x100) |
681 | BAD_STACK_TRAMPOLINE(0x200) | 754 | BAD_STACK_TRAMPOLINE(0x200) |
682 | BAD_STACK_TRAMPOLINE(0x260) | 755 | BAD_STACK_TRAMPOLINE(0x260) |
756 | BAD_STACK_TRAMPOLINE(0x280) | ||
757 | BAD_STACK_TRAMPOLINE(0x2a0) | ||
683 | BAD_STACK_TRAMPOLINE(0x2c0) | 758 | BAD_STACK_TRAMPOLINE(0x2c0) |
684 | BAD_STACK_TRAMPOLINE(0x2e0) | 759 | BAD_STACK_TRAMPOLINE(0x2e0) |
685 | BAD_STACK_TRAMPOLINE(0x300) | 760 | BAD_STACK_TRAMPOLINE(0x300) |
@@ -697,11 +772,10 @@ BAD_STACK_TRAMPOLINE(0xa00) | |||
697 | BAD_STACK_TRAMPOLINE(0xb00) | 772 | BAD_STACK_TRAMPOLINE(0xb00) |
698 | BAD_STACK_TRAMPOLINE(0xc00) | 773 | BAD_STACK_TRAMPOLINE(0xc00) |
699 | BAD_STACK_TRAMPOLINE(0xd00) | 774 | BAD_STACK_TRAMPOLINE(0xd00) |
775 | BAD_STACK_TRAMPOLINE(0xd08) | ||
700 | BAD_STACK_TRAMPOLINE(0xe00) | 776 | BAD_STACK_TRAMPOLINE(0xe00) |
701 | BAD_STACK_TRAMPOLINE(0xf00) | 777 | BAD_STACK_TRAMPOLINE(0xf00) |
702 | BAD_STACK_TRAMPOLINE(0xf20) | 778 | BAD_STACK_TRAMPOLINE(0xf20) |
703 | BAD_STACK_TRAMPOLINE(0x2070) | ||
704 | BAD_STACK_TRAMPOLINE(0x2080) | ||
705 | 779 | ||
706 | .globl bad_stack_book3e | 780 | .globl bad_stack_book3e |
707 | bad_stack_book3e: | 781 | bad_stack_book3e: |
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 15c5a4f6de01..2d0868a4e2f0 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S | |||
@@ -12,6 +12,7 @@ | |||
12 | * | 12 | * |
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include <asm/hw_irq.h> | ||
15 | #include <asm/exception-64s.h> | 16 | #include <asm/exception-64s.h> |
16 | #include <asm/ptrace.h> | 17 | #include <asm/ptrace.h> |
17 | 18 | ||
@@ -19,7 +20,7 @@ | |||
19 | * We layout physical memory as follows: | 20 | * We layout physical memory as follows: |
20 | * 0x0000 - 0x00ff : Secondary processor spin code | 21 | * 0x0000 - 0x00ff : Secondary processor spin code |
21 | * 0x0100 - 0x2fff : pSeries Interrupt prologs | 22 | * 0x0100 - 0x2fff : pSeries Interrupt prologs |
22 | * 0x3000 - 0x5fff : interrupt support, iSeries and common interrupt prologs | 23 | * 0x3000 - 0x5fff : interrupt support common interrupt prologs |
23 | * 0x6000 - 0x6fff : Initial (CPU0) segment table | 24 | * 0x6000 - 0x6fff : Initial (CPU0) segment table |
24 | * 0x7000 - 0x7fff : FWNMI data area | 25 | * 0x7000 - 0x7fff : FWNMI data area |
25 | * 0x8000 - : Early init and support code | 26 | * 0x8000 - : Early init and support code |
@@ -356,34 +357,60 @@ do_stab_bolted_pSeries: | |||
356 | KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40) | 357 | KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40) |
357 | 358 | ||
358 | /* | 359 | /* |
359 | * An interrupt came in while soft-disabled; clear EE in SRR1, | 360 | * An interrupt came in while soft-disabled. We set paca->irq_happened, |
360 | * clear paca->hard_enabled and return. | 361 | * then, if it was a decrementer interrupt, we bump the dec to max and |
362 | * and return, else we hard disable and return. This is called with | ||
363 | * r10 containing the value to OR to the paca field. | ||
361 | */ | 364 | */ |
362 | masked_interrupt: | 365 | #define MASKED_INTERRUPT(_H) \ |
363 | stb r10,PACAHARDIRQEN(r13) | 366 | masked_##_H##interrupt: \ |
364 | mtcrf 0x80,r9 | 367 | std r11,PACA_EXGEN+EX_R11(r13); \ |
365 | ld r9,PACA_EXGEN+EX_R9(r13) | 368 | lbz r11,PACAIRQHAPPENED(r13); \ |
366 | mfspr r10,SPRN_SRR1 | 369 | or r11,r11,r10; \ |
367 | rldicl r10,r10,48,1 /* clear MSR_EE */ | 370 | stb r11,PACAIRQHAPPENED(r13); \ |
368 | rotldi r10,r10,16 | 371 | andi. r10,r10,PACA_IRQ_DEC; \ |
369 | mtspr SPRN_SRR1,r10 | 372 | beq 1f; \ |
370 | ld r10,PACA_EXGEN+EX_R10(r13) | 373 | lis r10,0x7fff; \ |
371 | GET_SCRATCH0(r13) | 374 | ori r10,r10,0xffff; \ |
372 | rfid | 375 | mtspr SPRN_DEC,r10; \ |
376 | b 2f; \ | ||
377 | 1: mfspr r10,SPRN_##_H##SRR1; \ | ||
378 | rldicl r10,r10,48,1; /* clear MSR_EE */ \ | ||
379 | rotldi r10,r10,16; \ | ||
380 | mtspr SPRN_##_H##SRR1,r10; \ | ||
381 | 2: mtcrf 0x80,r9; \ | ||
382 | ld r9,PACA_EXGEN+EX_R9(r13); \ | ||
383 | ld r10,PACA_EXGEN+EX_R10(r13); \ | ||
384 | ld r11,PACA_EXGEN+EX_R11(r13); \ | ||
385 | GET_SCRATCH0(r13); \ | ||
386 | ##_H##rfid; \ | ||
373 | b . | 387 | b . |
388 | |||
389 | MASKED_INTERRUPT() | ||
390 | MASKED_INTERRUPT(H) | ||
374 | 391 | ||
375 | masked_Hinterrupt: | 392 | /* |
376 | stb r10,PACAHARDIRQEN(r13) | 393 | * Called from arch_local_irq_enable when an interrupt needs |
377 | mtcrf 0x80,r9 | 394 | * to be resent. r3 contains 0x500 or 0x900 to indicate which |
378 | ld r9,PACA_EXGEN+EX_R9(r13) | 395 | * kind of interrupt. MSR:EE is already off. We generate a |
379 | mfspr r10,SPRN_HSRR1 | 396 | * stackframe like if a real interrupt had happened. |
380 | rldicl r10,r10,48,1 /* clear MSR_EE */ | 397 | * |
381 | rotldi r10,r10,16 | 398 | * Note: While MSR:EE is off, we need to make sure that _MSR |
382 | mtspr SPRN_HSRR1,r10 | 399 | * in the generated frame has EE set to 1 or the exception |
383 | ld r10,PACA_EXGEN+EX_R10(r13) | 400 | * handler will not properly re-enable them. |
384 | GET_SCRATCH0(r13) | 401 | */ |
385 | hrfid | 402 | _GLOBAL(__replay_interrupt) |
386 | b . | 403 | /* We are going to jump to the exception common code which |
404 | * will retrieve various register values from the PACA which | ||
405 | * we don't give a damn about, so we don't bother storing them. | ||
406 | */ | ||
407 | mfmsr r12 | ||
408 | mflr r11 | ||
409 | mfcr r9 | ||
410 | ori r12,r12,MSR_EE | ||
411 | andi. r3,r3,0x0800 | ||
412 | bne decrementer_common | ||
413 | b hardware_interrupt_common | ||
387 | 414 | ||
388 | #ifdef CONFIG_PPC_PSERIES | 415 | #ifdef CONFIG_PPC_PSERIES |
389 | /* | 416 | /* |
@@ -458,14 +485,15 @@ machine_check_common: | |||
458 | bl .machine_check_exception | 485 | bl .machine_check_exception |
459 | b .ret_from_except | 486 | b .ret_from_except |
460 | 487 | ||
461 | STD_EXCEPTION_COMMON_LITE(0x900, decrementer, .timer_interrupt) | 488 | STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ) |
489 | STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, .timer_interrupt) | ||
462 | STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception) | 490 | STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception) |
463 | STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception) | 491 | STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception) |
464 | STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception) | 492 | STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception) |
465 | STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception) | 493 | STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception) |
466 | STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception) | 494 | STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception) |
467 | STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception) | 495 | STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception) |
468 | STD_EXCEPTION_COMMON_IDLE(0xf00, performance_monitor, .performance_monitor_exception) | 496 | STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, .performance_monitor_exception) |
469 | STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception) | 497 | STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception) |
470 | #ifdef CONFIG_ALTIVEC | 498 | #ifdef CONFIG_ALTIVEC |
471 | STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception) | 499 | STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception) |
@@ -482,6 +510,9 @@ machine_check_common: | |||
482 | system_call_entry: | 510 | system_call_entry: |
483 | b system_call_common | 511 | b system_call_common |
484 | 512 | ||
513 | ppc64_runlatch_on_trampoline: | ||
514 | b .__ppc64_runlatch_on | ||
515 | |||
485 | /* | 516 | /* |
486 | * Here we have detected that the kernel stack pointer is bad. | 517 | * Here we have detected that the kernel stack pointer is bad. |
487 | * R9 contains the saved CR, r13 points to the paca, | 518 | * R9 contains the saved CR, r13 points to the paca, |
@@ -555,6 +586,8 @@ data_access_common: | |||
555 | mfspr r10,SPRN_DSISR | 586 | mfspr r10,SPRN_DSISR |
556 | stw r10,PACA_EXGEN+EX_DSISR(r13) | 587 | stw r10,PACA_EXGEN+EX_DSISR(r13) |
557 | EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN) | 588 | EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN) |
589 | DISABLE_INTS | ||
590 | ld r12,_MSR(r1) | ||
558 | ld r3,PACA_EXGEN+EX_DAR(r13) | 591 | ld r3,PACA_EXGEN+EX_DAR(r13) |
559 | lwz r4,PACA_EXGEN+EX_DSISR(r13) | 592 | lwz r4,PACA_EXGEN+EX_DSISR(r13) |
560 | li r5,0x300 | 593 | li r5,0x300 |
@@ -569,6 +602,7 @@ h_data_storage_common: | |||
569 | stw r10,PACA_EXGEN+EX_DSISR(r13) | 602 | stw r10,PACA_EXGEN+EX_DSISR(r13) |
570 | EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN) | 603 | EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN) |
571 | bl .save_nvgprs | 604 | bl .save_nvgprs |
605 | DISABLE_INTS | ||
572 | addi r3,r1,STACK_FRAME_OVERHEAD | 606 | addi r3,r1,STACK_FRAME_OVERHEAD |
573 | bl .unknown_exception | 607 | bl .unknown_exception |
574 | b .ret_from_except | 608 | b .ret_from_except |
@@ -577,6 +611,8 @@ h_data_storage_common: | |||
577 | .globl instruction_access_common | 611 | .globl instruction_access_common |
578 | instruction_access_common: | 612 | instruction_access_common: |
579 | EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN) | 613 | EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN) |
614 | DISABLE_INTS | ||
615 | ld r12,_MSR(r1) | ||
580 | ld r3,_NIP(r1) | 616 | ld r3,_NIP(r1) |
581 | andis. r4,r12,0x5820 | 617 | andis. r4,r12,0x5820 |
582 | li r5,0x400 | 618 | li r5,0x400 |
@@ -672,12 +708,6 @@ _GLOBAL(slb_miss_realmode) | |||
672 | ld r10,PACA_EXSLB+EX_LR(r13) | 708 | ld r10,PACA_EXSLB+EX_LR(r13) |
673 | ld r3,PACA_EXSLB+EX_R3(r13) | 709 | ld r3,PACA_EXSLB+EX_R3(r13) |
674 | lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ | 710 | lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ |
675 | #ifdef CONFIG_PPC_ISERIES | ||
676 | BEGIN_FW_FTR_SECTION | ||
677 | ld r11,PACALPPACAPTR(r13) | ||
678 | ld r11,LPPACASRR0(r11) /* get SRR0 value */ | ||
679 | END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) | ||
680 | #endif /* CONFIG_PPC_ISERIES */ | ||
681 | 711 | ||
682 | mtlr r10 | 712 | mtlr r10 |
683 | 713 | ||
@@ -690,12 +720,6 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) | |||
690 | mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ | 720 | mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ |
691 | .machine pop | 721 | .machine pop |
692 | 722 | ||
693 | #ifdef CONFIG_PPC_ISERIES | ||
694 | BEGIN_FW_FTR_SECTION | ||
695 | mtspr SPRN_SRR0,r11 | ||
696 | mtspr SPRN_SRR1,r12 | ||
697 | END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) | ||
698 | #endif /* CONFIG_PPC_ISERIES */ | ||
699 | ld r9,PACA_EXSLB+EX_R9(r13) | 723 | ld r9,PACA_EXSLB+EX_R9(r13) |
700 | ld r10,PACA_EXSLB+EX_R10(r13) | 724 | ld r10,PACA_EXSLB+EX_R10(r13) |
701 | ld r11,PACA_EXSLB+EX_R11(r13) | 725 | ld r11,PACA_EXSLB+EX_R11(r13) |
@@ -704,13 +728,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) | |||
704 | rfid | 728 | rfid |
705 | b . /* prevent speculative execution */ | 729 | b . /* prevent speculative execution */ |
706 | 730 | ||
707 | 2: | 731 | 2: mfspr r11,SPRN_SRR0 |
708 | #ifdef CONFIG_PPC_ISERIES | ||
709 | BEGIN_FW_FTR_SECTION | ||
710 | b unrecov_slb | ||
711 | END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) | ||
712 | #endif /* CONFIG_PPC_ISERIES */ | ||
713 | mfspr r11,SPRN_SRR0 | ||
714 | ld r10,PACAKBASE(r13) | 732 | ld r10,PACAKBASE(r13) |
715 | LOAD_HANDLER(r10,unrecov_slb) | 733 | LOAD_HANDLER(r10,unrecov_slb) |
716 | mtspr SPRN_SRR0,r10 | 734 | mtspr SPRN_SRR0,r10 |
@@ -727,20 +745,6 @@ unrecov_slb: | |||
727 | bl .unrecoverable_exception | 745 | bl .unrecoverable_exception |
728 | b 1b | 746 | b 1b |
729 | 747 | ||
730 | .align 7 | ||
731 | .globl hardware_interrupt_common | ||
732 | .globl hardware_interrupt_entry | ||
733 | hardware_interrupt_common: | ||
734 | EXCEPTION_PROLOG_COMMON(0x500, PACA_EXGEN) | ||
735 | FINISH_NAP | ||
736 | hardware_interrupt_entry: | ||
737 | DISABLE_INTS | ||
738 | BEGIN_FTR_SECTION | ||
739 | bl .ppc64_runlatch_on | ||
740 | END_FTR_SECTION_IFSET(CPU_FTR_CTRL) | ||
741 | addi r3,r1,STACK_FRAME_OVERHEAD | ||
742 | bl .do_IRQ | ||
743 | b .ret_from_except_lite | ||
744 | 748 | ||
745 | #ifdef CONFIG_PPC_970_NAP | 749 | #ifdef CONFIG_PPC_970_NAP |
746 | power4_fixup_nap: | 750 | power4_fixup_nap: |
@@ -785,8 +789,8 @@ fp_unavailable_common: | |||
785 | EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN) | 789 | EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN) |
786 | bne 1f /* if from user, just load it up */ | 790 | bne 1f /* if from user, just load it up */ |
787 | bl .save_nvgprs | 791 | bl .save_nvgprs |
792 | DISABLE_INTS | ||
788 | addi r3,r1,STACK_FRAME_OVERHEAD | 793 | addi r3,r1,STACK_FRAME_OVERHEAD |
789 | ENABLE_INTS | ||
790 | bl .kernel_fp_unavailable_exception | 794 | bl .kernel_fp_unavailable_exception |
791 | BUG_OPCODE | 795 | BUG_OPCODE |
792 | 1: bl .load_up_fpu | 796 | 1: bl .load_up_fpu |
@@ -805,8 +809,8 @@ BEGIN_FTR_SECTION | |||
805 | END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) | 809 | END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) |
806 | #endif | 810 | #endif |
807 | bl .save_nvgprs | 811 | bl .save_nvgprs |
812 | DISABLE_INTS | ||
808 | addi r3,r1,STACK_FRAME_OVERHEAD | 813 | addi r3,r1,STACK_FRAME_OVERHEAD |
809 | ENABLE_INTS | ||
810 | bl .altivec_unavailable_exception | 814 | bl .altivec_unavailable_exception |
811 | b .ret_from_except | 815 | b .ret_from_except |
812 | 816 | ||
@@ -816,13 +820,14 @@ vsx_unavailable_common: | |||
816 | EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN) | 820 | EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN) |
817 | #ifdef CONFIG_VSX | 821 | #ifdef CONFIG_VSX |
818 | BEGIN_FTR_SECTION | 822 | BEGIN_FTR_SECTION |
819 | bne .load_up_vsx | 823 | beq 1f |
824 | b .load_up_vsx | ||
820 | 1: | 825 | 1: |
821 | END_FTR_SECTION_IFSET(CPU_FTR_VSX) | 826 | END_FTR_SECTION_IFSET(CPU_FTR_VSX) |
822 | #endif | 827 | #endif |
823 | bl .save_nvgprs | 828 | bl .save_nvgprs |
829 | DISABLE_INTS | ||
824 | addi r3,r1,STACK_FRAME_OVERHEAD | 830 | addi r3,r1,STACK_FRAME_OVERHEAD |
825 | ENABLE_INTS | ||
826 | bl .vsx_unavailable_exception | 831 | bl .vsx_unavailable_exception |
827 | b .ret_from_except | 832 | b .ret_from_except |
828 | 833 | ||
@@ -831,66 +836,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) | |||
831 | __end_handlers: | 836 | __end_handlers: |
832 | 837 | ||
833 | /* | 838 | /* |
834 | * Return from an exception with minimal checks. | ||
835 | * The caller is assumed to have done EXCEPTION_PROLOG_COMMON. | ||
836 | * If interrupts have been enabled, or anything has been | ||
837 | * done that might have changed the scheduling status of | ||
838 | * any task or sent any task a signal, you should use | ||
839 | * ret_from_except or ret_from_except_lite instead of this. | ||
840 | */ | ||
841 | fast_exc_return_irq: /* restores irq state too */ | ||
842 | ld r3,SOFTE(r1) | ||
843 | TRACE_AND_RESTORE_IRQ(r3); | ||
844 | ld r12,_MSR(r1) | ||
845 | rldicl r4,r12,49,63 /* get MSR_EE to LSB */ | ||
846 | stb r4,PACAHARDIRQEN(r13) /* restore paca->hard_enabled */ | ||
847 | b 1f | ||
848 | |||
849 | .globl fast_exception_return | ||
850 | fast_exception_return: | ||
851 | ld r12,_MSR(r1) | ||
852 | 1: ld r11,_NIP(r1) | ||
853 | andi. r3,r12,MSR_RI /* check if RI is set */ | ||
854 | beq- unrecov_fer | ||
855 | |||
856 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | ||
857 | andi. r3,r12,MSR_PR | ||
858 | beq 2f | ||
859 | ACCOUNT_CPU_USER_EXIT(r3, r4) | ||
860 | 2: | ||
861 | #endif | ||
862 | |||
863 | ld r3,_CCR(r1) | ||
864 | ld r4,_LINK(r1) | ||
865 | ld r5,_CTR(r1) | ||
866 | ld r6,_XER(r1) | ||
867 | mtcr r3 | ||
868 | mtlr r4 | ||
869 | mtctr r5 | ||
870 | mtxer r6 | ||
871 | REST_GPR(0, r1) | ||
872 | REST_8GPRS(2, r1) | ||
873 | |||
874 | mfmsr r10 | ||
875 | rldicl r10,r10,48,1 /* clear EE */ | ||
876 | rldicr r10,r10,16,61 /* clear RI (LE is 0 already) */ | ||
877 | mtmsrd r10,1 | ||
878 | |||
879 | mtspr SPRN_SRR1,r12 | ||
880 | mtspr SPRN_SRR0,r11 | ||
881 | REST_4GPRS(10, r1) | ||
882 | ld r1,GPR1(r1) | ||
883 | rfid | ||
884 | b . /* prevent speculative execution */ | ||
885 | |||
886 | unrecov_fer: | ||
887 | bl .save_nvgprs | ||
888 | 1: addi r3,r1,STACK_FRAME_OVERHEAD | ||
889 | bl .unrecoverable_exception | ||
890 | b 1b | ||
891 | |||
892 | |||
893 | /* | ||
894 | * Hash table stuff | 839 | * Hash table stuff |
895 | */ | 840 | */ |
896 | .align 7 | 841 | .align 7 |
@@ -912,28 +857,6 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) | |||
912 | lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ | 857 | lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ |
913 | andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ | 858 | andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ |
914 | bne 77f /* then don't call hash_page now */ | 859 | bne 77f /* then don't call hash_page now */ |
915 | |||
916 | /* | ||
917 | * On iSeries, we soft-disable interrupts here, then | ||
918 | * hard-enable interrupts so that the hash_page code can spin on | ||
919 | * the hash_table_lock without problems on a shared processor. | ||
920 | */ | ||
921 | DISABLE_INTS | ||
922 | |||
923 | /* | ||
924 | * Currently, trace_hardirqs_off() will be called by DISABLE_INTS | ||
925 | * and will clobber volatile registers when irq tracing is enabled | ||
926 | * so we need to reload them. It may be possible to be smarter here | ||
927 | * and move the irq tracing elsewhere but let's keep it simple for | ||
928 | * now | ||
929 | */ | ||
930 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
931 | ld r3,_DAR(r1) | ||
932 | ld r4,_DSISR(r1) | ||
933 | ld r5,_TRAP(r1) | ||
934 | ld r12,_MSR(r1) | ||
935 | clrrdi r5,r5,4 | ||
936 | #endif /* CONFIG_TRACE_IRQFLAGS */ | ||
937 | /* | 860 | /* |
938 | * We need to set the _PAGE_USER bit if MSR_PR is set or if we are | 861 | * We need to set the _PAGE_USER bit if MSR_PR is set or if we are |
939 | * accessing a userspace segment (even from the kernel). We assume | 862 | * accessing a userspace segment (even from the kernel). We assume |
@@ -951,62 +874,25 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) | |||
951 | * r4 contains the required access permissions | 874 | * r4 contains the required access permissions |
952 | * r5 contains the trap number | 875 | * r5 contains the trap number |
953 | * | 876 | * |
954 | * at return r3 = 0 for success | 877 | * at return r3 = 0 for success, 1 for page fault, negative for error |
955 | */ | 878 | */ |
956 | bl .hash_page /* build HPTE if possible */ | 879 | bl .hash_page /* build HPTE if possible */ |
957 | cmpdi r3,0 /* see if hash_page succeeded */ | 880 | cmpdi r3,0 /* see if hash_page succeeded */ |
958 | 881 | ||
959 | BEGIN_FW_FTR_SECTION | 882 | /* Success */ |
960 | /* | ||
961 | * If we had interrupts soft-enabled at the point where the | ||
962 | * DSI/ISI occurred, and an interrupt came in during hash_page, | ||
963 | * handle it now. | ||
964 | * We jump to ret_from_except_lite rather than fast_exception_return | ||
965 | * because ret_from_except_lite will check for and handle pending | ||
966 | * interrupts if necessary. | ||
967 | */ | ||
968 | beq 13f | ||
969 | END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) | ||
970 | |||
971 | BEGIN_FW_FTR_SECTION | ||
972 | /* | ||
973 | * Here we have interrupts hard-disabled, so it is sufficient | ||
974 | * to restore paca->{soft,hard}_enable and get out. | ||
975 | */ | ||
976 | beq fast_exc_return_irq /* Return from exception on success */ | 883 | beq fast_exc_return_irq /* Return from exception on success */ |
977 | END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES) | ||
978 | |||
979 | /* For a hash failure, we don't bother re-enabling interrupts */ | ||
980 | ble- 12f | ||
981 | |||
982 | /* | ||
983 | * hash_page couldn't handle it, set soft interrupt enable back | ||
984 | * to what it was before the trap. Note that .arch_local_irq_restore | ||
985 | * handles any interrupts pending at this point. | ||
986 | */ | ||
987 | ld r3,SOFTE(r1) | ||
988 | TRACE_AND_RESTORE_IRQ_PARTIAL(r3, 11f) | ||
989 | bl .arch_local_irq_restore | ||
990 | b 11f | ||
991 | 884 | ||
992 | /* We have a data breakpoint exception - handle it */ | 885 | /* Error */ |
993 | handle_dabr_fault: | 886 | blt- 13f |
994 | bl .save_nvgprs | ||
995 | ld r4,_DAR(r1) | ||
996 | ld r5,_DSISR(r1) | ||
997 | addi r3,r1,STACK_FRAME_OVERHEAD | ||
998 | bl .do_dabr | ||
999 | b .ret_from_except_lite | ||
1000 | 887 | ||
1001 | /* Here we have a page fault that hash_page can't handle. */ | 888 | /* Here we have a page fault that hash_page can't handle. */ |
1002 | handle_page_fault: | 889 | handle_page_fault: |
1003 | ENABLE_INTS | ||
1004 | 11: ld r4,_DAR(r1) | 890 | 11: ld r4,_DAR(r1) |
1005 | ld r5,_DSISR(r1) | 891 | ld r5,_DSISR(r1) |
1006 | addi r3,r1,STACK_FRAME_OVERHEAD | 892 | addi r3,r1,STACK_FRAME_OVERHEAD |
1007 | bl .do_page_fault | 893 | bl .do_page_fault |
1008 | cmpdi r3,0 | 894 | cmpdi r3,0 |
1009 | beq+ 13f | 895 | beq+ 12f |
1010 | bl .save_nvgprs | 896 | bl .save_nvgprs |
1011 | mr r5,r3 | 897 | mr r5,r3 |
1012 | addi r3,r1,STACK_FRAME_OVERHEAD | 898 | addi r3,r1,STACK_FRAME_OVERHEAD |
@@ -1014,12 +900,20 @@ handle_page_fault: | |||
1014 | bl .bad_page_fault | 900 | bl .bad_page_fault |
1015 | b .ret_from_except | 901 | b .ret_from_except |
1016 | 902 | ||
1017 | 13: b .ret_from_except_lite | 903 | /* We have a data breakpoint exception - handle it */ |
904 | handle_dabr_fault: | ||
905 | bl .save_nvgprs | ||
906 | ld r4,_DAR(r1) | ||
907 | ld r5,_DSISR(r1) | ||
908 | addi r3,r1,STACK_FRAME_OVERHEAD | ||
909 | bl .do_dabr | ||
910 | 12: b .ret_from_except_lite | ||
911 | |||
1018 | 912 | ||
1019 | /* We have a page fault that hash_page could handle but HV refused | 913 | /* We have a page fault that hash_page could handle but HV refused |
1020 | * the PTE insertion | 914 | * the PTE insertion |
1021 | */ | 915 | */ |
1022 | 12: bl .save_nvgprs | 916 | 13: bl .save_nvgprs |
1023 | mr r5,r3 | 917 | mr r5,r3 |
1024 | addi r3,r1,STACK_FRAME_OVERHEAD | 918 | addi r3,r1,STACK_FRAME_OVERHEAD |
1025 | ld r4,_DAR(r1) | 919 | ld r4,_DAR(r1) |
@@ -1141,51 +1035,19 @@ _GLOBAL(do_stab_bolted) | |||
1141 | .= 0x7000 | 1035 | .= 0x7000 |
1142 | .globl fwnmi_data_area | 1036 | .globl fwnmi_data_area |
1143 | fwnmi_data_area: | 1037 | fwnmi_data_area: |
1144 | #endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ | ||
1145 | 1038 | ||
1146 | /* iSeries does not use the FWNMI stuff, so it is safe to put | ||
1147 | * this here, even if we later allow kernels that will boot on | ||
1148 | * both pSeries and iSeries */ | ||
1149 | #ifdef CONFIG_PPC_ISERIES | ||
1150 | . = LPARMAP_PHYS | ||
1151 | .globl xLparMap | ||
1152 | xLparMap: | ||
1153 | .quad HvEsidsToMap /* xNumberEsids */ | ||
1154 | .quad HvRangesToMap /* xNumberRanges */ | ||
1155 | .quad STAB0_PAGE /* xSegmentTableOffs */ | ||
1156 | .zero 40 /* xRsvd */ | ||
1157 | /* xEsids (HvEsidsToMap entries of 2 quads) */ | ||
1158 | .quad PAGE_OFFSET_ESID /* xKernelEsid */ | ||
1159 | .quad PAGE_OFFSET_VSID /* xKernelVsid */ | ||
1160 | .quad VMALLOC_START_ESID /* xKernelEsid */ | ||
1161 | .quad VMALLOC_START_VSID /* xKernelVsid */ | ||
1162 | /* xRanges (HvRangesToMap entries of 3 quads) */ | ||
1163 | .quad HvPagesToMap /* xPages */ | ||
1164 | .quad 0 /* xOffset */ | ||
1165 | .quad PAGE_OFFSET_VSID << (SID_SHIFT - HW_PAGE_SHIFT) /* xVPN */ | ||
1166 | |||
1167 | #endif /* CONFIG_PPC_ISERIES */ | ||
1168 | |||
1169 | #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) | ||
1170 | /* pseries and powernv need to keep the whole page from | 1039 | /* pseries and powernv need to keep the whole page from |
1171 | * 0x7000 to 0x8000 free for use by the firmware | 1040 | * 0x7000 to 0x8000 free for use by the firmware |
1172 | */ | 1041 | */ |
1173 | . = 0x8000 | 1042 | . = 0x8000 |
1174 | #endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ | 1043 | #endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ |
1175 | 1044 | ||
1176 | /* | 1045 | /* Space for CPU0's segment table */ |
1177 | * Space for CPU0's segment table. | 1046 | .balign 4096 |
1178 | * | ||
1179 | * On iSeries, the hypervisor must fill in at least one entry before | ||
1180 | * we get control (with relocate on). The address is given to the hv | ||
1181 | * as a page number (see xLparMap above), so this must be at a | ||
1182 | * fixed address (the linker can't compute (u64)&initial_stab >> | ||
1183 | * PAGE_SHIFT). | ||
1184 | */ | ||
1185 | . = STAB0_OFFSET /* 0x8000 */ | ||
1186 | .globl initial_stab | 1047 | .globl initial_stab |
1187 | initial_stab: | 1048 | initial_stab: |
1188 | .space 4096 | 1049 | .space 4096 |
1050 | |||
1189 | #ifdef CONFIG_PPC_POWERNV | 1051 | #ifdef CONFIG_PPC_POWERNV |
1190 | _GLOBAL(opal_mc_secondary_handler) | 1052 | _GLOBAL(opal_mc_secondary_handler) |
1191 | HMT_MEDIUM | 1053 | HMT_MEDIUM |
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c new file mode 100644 index 000000000000..cfe7a38708c3 --- /dev/null +++ b/arch/powerpc/kernel/fadump.c | |||
@@ -0,0 +1,1315 @@ | |||
1 | /* | ||
2 | * Firmware Assisted dump: A robust mechanism to get reliable kernel crash | ||
3 | * dump with assistance from firmware. This approach does not use kexec, | ||
4 | * instead firmware assists in booting the kdump kernel while preserving | ||
5 | * memory contents. The most of the code implementation has been adapted | ||
6 | * from phyp assisted dump implementation written by Linas Vepstas and | ||
7 | * Manish Ahuja | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License | ||
20 | * along with this program; if not, write to the Free Software | ||
21 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
22 | * | ||
23 | * Copyright 2011 IBM Corporation | ||
24 | * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> | ||
25 | */ | ||
26 | |||
27 | #undef DEBUG | ||
28 | #define pr_fmt(fmt) "fadump: " fmt | ||
29 | |||
30 | #include <linux/string.h> | ||
31 | #include <linux/memblock.h> | ||
32 | #include <linux/delay.h> | ||
33 | #include <linux/debugfs.h> | ||
34 | #include <linux/seq_file.h> | ||
35 | #include <linux/crash_dump.h> | ||
36 | #include <linux/kobject.h> | ||
37 | #include <linux/sysfs.h> | ||
38 | |||
39 | #include <asm/page.h> | ||
40 | #include <asm/prom.h> | ||
41 | #include <asm/rtas.h> | ||
42 | #include <asm/fadump.h> | ||
43 | |||
44 | static struct fw_dump fw_dump; | ||
45 | static struct fadump_mem_struct fdm; | ||
46 | static const struct fadump_mem_struct *fdm_active; | ||
47 | |||
48 | static DEFINE_MUTEX(fadump_mutex); | ||
49 | struct fad_crash_memory_ranges crash_memory_ranges[INIT_CRASHMEM_RANGES]; | ||
50 | int crash_mem_ranges; | ||
51 | |||
52 | /* Scan the Firmware Assisted dump configuration details. */ | ||
53 | int __init early_init_dt_scan_fw_dump(unsigned long node, | ||
54 | const char *uname, int depth, void *data) | ||
55 | { | ||
56 | __be32 *sections; | ||
57 | int i, num_sections; | ||
58 | unsigned long size; | ||
59 | const int *token; | ||
60 | |||
61 | if (depth != 1 || strcmp(uname, "rtas") != 0) | ||
62 | return 0; | ||
63 | |||
64 | /* | ||
65 | * Check if Firmware Assisted dump is supported. if yes, check | ||
66 | * if dump has been initiated on last reboot. | ||
67 | */ | ||
68 | token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL); | ||
69 | if (!token) | ||
70 | return 0; | ||
71 | |||
72 | fw_dump.fadump_supported = 1; | ||
73 | fw_dump.ibm_configure_kernel_dump = *token; | ||
74 | |||
75 | /* | ||
76 | * The 'ibm,kernel-dump' rtas node is present only if there is | ||
77 | * dump data waiting for us. | ||
78 | */ | ||
79 | fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL); | ||
80 | if (fdm_active) | ||
81 | fw_dump.dump_active = 1; | ||
82 | |||
83 | /* Get the sizes required to store dump data for the firmware provided | ||
84 | * dump sections. | ||
85 | * For each dump section type supported, a 32bit cell which defines | ||
86 | * the ID of a supported section followed by two 32 bit cells which | ||
87 | * gives teh size of the section in bytes. | ||
88 | */ | ||
89 | sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes", | ||
90 | &size); | ||
91 | |||
92 | if (!sections) | ||
93 | return 0; | ||
94 | |||
95 | num_sections = size / (3 * sizeof(u32)); | ||
96 | |||
97 | for (i = 0; i < num_sections; i++, sections += 3) { | ||
98 | u32 type = (u32)of_read_number(sections, 1); | ||
99 | |||
100 | switch (type) { | ||
101 | case FADUMP_CPU_STATE_DATA: | ||
102 | fw_dump.cpu_state_data_size = | ||
103 | of_read_ulong(§ions[1], 2); | ||
104 | break; | ||
105 | case FADUMP_HPTE_REGION: | ||
106 | fw_dump.hpte_region_size = | ||
107 | of_read_ulong(§ions[1], 2); | ||
108 | break; | ||
109 | } | ||
110 | } | ||
111 | return 1; | ||
112 | } | ||
113 | |||
114 | int is_fadump_active(void) | ||
115 | { | ||
116 | return fw_dump.dump_active; | ||
117 | } | ||
118 | |||
119 | /* Print firmware assisted dump configurations for debugging purpose. */ | ||
120 | static void fadump_show_config(void) | ||
121 | { | ||
122 | pr_debug("Support for firmware-assisted dump (fadump): %s\n", | ||
123 | (fw_dump.fadump_supported ? "present" : "no support")); | ||
124 | |||
125 | if (!fw_dump.fadump_supported) | ||
126 | return; | ||
127 | |||
128 | pr_debug("Fadump enabled : %s\n", | ||
129 | (fw_dump.fadump_enabled ? "yes" : "no")); | ||
130 | pr_debug("Dump Active : %s\n", | ||
131 | (fw_dump.dump_active ? "yes" : "no")); | ||
132 | pr_debug("Dump section sizes:\n"); | ||
133 | pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size); | ||
134 | pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size); | ||
135 | pr_debug("Boot memory size : %lx\n", fw_dump.boot_memory_size); | ||
136 | } | ||
137 | |||
138 | static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm, | ||
139 | unsigned long addr) | ||
140 | { | ||
141 | if (!fdm) | ||
142 | return 0; | ||
143 | |||
144 | memset(fdm, 0, sizeof(struct fadump_mem_struct)); | ||
145 | addr = addr & PAGE_MASK; | ||
146 | |||
147 | fdm->header.dump_format_version = 0x00000001; | ||
148 | fdm->header.dump_num_sections = 3; | ||
149 | fdm->header.dump_status_flag = 0; | ||
150 | fdm->header.offset_first_dump_section = | ||
151 | (u32)offsetof(struct fadump_mem_struct, cpu_state_data); | ||
152 | |||
153 | /* | ||
154 | * Fields for disk dump option. | ||
155 | * We are not using disk dump option, hence set these fields to 0. | ||
156 | */ | ||
157 | fdm->header.dd_block_size = 0; | ||
158 | fdm->header.dd_block_offset = 0; | ||
159 | fdm->header.dd_num_blocks = 0; | ||
160 | fdm->header.dd_offset_disk_path = 0; | ||
161 | |||
162 | /* set 0 to disable an automatic dump-reboot. */ | ||
163 | fdm->header.max_time_auto = 0; | ||
164 | |||
165 | /* Kernel dump sections */ | ||
166 | /* cpu state data section. */ | ||
167 | fdm->cpu_state_data.request_flag = FADUMP_REQUEST_FLAG; | ||
168 | fdm->cpu_state_data.source_data_type = FADUMP_CPU_STATE_DATA; | ||
169 | fdm->cpu_state_data.source_address = 0; | ||
170 | fdm->cpu_state_data.source_len = fw_dump.cpu_state_data_size; | ||
171 | fdm->cpu_state_data.destination_address = addr; | ||
172 | addr += fw_dump.cpu_state_data_size; | ||
173 | |||
174 | /* hpte region section */ | ||
175 | fdm->hpte_region.request_flag = FADUMP_REQUEST_FLAG; | ||
176 | fdm->hpte_region.source_data_type = FADUMP_HPTE_REGION; | ||
177 | fdm->hpte_region.source_address = 0; | ||
178 | fdm->hpte_region.source_len = fw_dump.hpte_region_size; | ||
179 | fdm->hpte_region.destination_address = addr; | ||
180 | addr += fw_dump.hpte_region_size; | ||
181 | |||
182 | /* RMA region section */ | ||
183 | fdm->rmr_region.request_flag = FADUMP_REQUEST_FLAG; | ||
184 | fdm->rmr_region.source_data_type = FADUMP_REAL_MODE_REGION; | ||
185 | fdm->rmr_region.source_address = RMA_START; | ||
186 | fdm->rmr_region.source_len = fw_dump.boot_memory_size; | ||
187 | fdm->rmr_region.destination_address = addr; | ||
188 | addr += fw_dump.boot_memory_size; | ||
189 | |||
190 | return addr; | ||
191 | } | ||
192 | |||
193 | /** | ||
194 | * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM | ||
195 | * | ||
196 | * Function to find the largest memory size we need to reserve during early | ||
197 | * boot process. This will be the size of the memory that is required for a | ||
198 | * kernel to boot successfully. | ||
199 | * | ||
200 | * This function has been taken from phyp-assisted dump feature implementation. | ||
201 | * | ||
202 | * returns larger of 256MB or 5% rounded down to multiples of 256MB. | ||
203 | * | ||
204 | * TODO: Come up with better approach to find out more accurate memory size | ||
205 | * that is required for a kernel to boot successfully. | ||
206 | * | ||
207 | */ | ||
208 | static inline unsigned long fadump_calculate_reserve_size(void) | ||
209 | { | ||
210 | unsigned long size; | ||
211 | |||
212 | /* | ||
213 | * Check if the size is specified through fadump_reserve_mem= cmdline | ||
214 | * option. If yes, then use that. | ||
215 | */ | ||
216 | if (fw_dump.reserve_bootvar) | ||
217 | return fw_dump.reserve_bootvar; | ||
218 | |||
219 | /* divide by 20 to get 5% of value */ | ||
220 | size = memblock_end_of_DRAM() / 20; | ||
221 | |||
222 | /* round it down in multiples of 256 */ | ||
223 | size = size & ~0x0FFFFFFFUL; | ||
224 | |||
225 | /* Truncate to memory_limit. We don't want to over reserve the memory.*/ | ||
226 | if (memory_limit && size > memory_limit) | ||
227 | size = memory_limit; | ||
228 | |||
229 | return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM); | ||
230 | } | ||
231 | |||
232 | /* | ||
233 | * Calculate the total memory size required to be reserved for | ||
234 | * firmware-assisted dump registration. | ||
235 | */ | ||
236 | static unsigned long get_fadump_area_size(void) | ||
237 | { | ||
238 | unsigned long size = 0; | ||
239 | |||
240 | size += fw_dump.cpu_state_data_size; | ||
241 | size += fw_dump.hpte_region_size; | ||
242 | size += fw_dump.boot_memory_size; | ||
243 | size += sizeof(struct fadump_crash_info_header); | ||
244 | size += sizeof(struct elfhdr); /* ELF core header.*/ | ||
245 | size += sizeof(struct elf_phdr); /* place holder for cpu notes */ | ||
246 | /* Program headers for crash memory regions. */ | ||
247 | size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2); | ||
248 | |||
249 | size = PAGE_ALIGN(size); | ||
250 | return size; | ||
251 | } | ||
252 | |||
253 | int __init fadump_reserve_mem(void) | ||
254 | { | ||
255 | unsigned long base, size, memory_boundary; | ||
256 | |||
257 | if (!fw_dump.fadump_enabled) | ||
258 | return 0; | ||
259 | |||
260 | if (!fw_dump.fadump_supported) { | ||
261 | printk(KERN_INFO "Firmware-assisted dump is not supported on" | ||
262 | " this hardware\n"); | ||
263 | fw_dump.fadump_enabled = 0; | ||
264 | return 0; | ||
265 | } | ||
266 | /* | ||
267 | * Initialize boot memory size | ||
268 | * If dump is active then we have already calculated the size during | ||
269 | * first kernel. | ||
270 | */ | ||
271 | if (fdm_active) | ||
272 | fw_dump.boot_memory_size = fdm_active->rmr_region.source_len; | ||
273 | else | ||
274 | fw_dump.boot_memory_size = fadump_calculate_reserve_size(); | ||
275 | |||
276 | /* | ||
277 | * Calculate the memory boundary. | ||
278 | * If memory_limit is less than actual memory boundary then reserve | ||
279 | * the memory for fadump beyond the memory_limit and adjust the | ||
280 | * memory_limit accordingly, so that the running kernel can run with | ||
281 | * specified memory_limit. | ||
282 | */ | ||
283 | if (memory_limit && memory_limit < memblock_end_of_DRAM()) { | ||
284 | size = get_fadump_area_size(); | ||
285 | if ((memory_limit + size) < memblock_end_of_DRAM()) | ||
286 | memory_limit += size; | ||
287 | else | ||
288 | memory_limit = memblock_end_of_DRAM(); | ||
289 | printk(KERN_INFO "Adjusted memory_limit for firmware-assisted" | ||
290 | " dump, now %#016llx\n", | ||
291 | (unsigned long long)memory_limit); | ||
292 | } | ||
293 | if (memory_limit) | ||
294 | memory_boundary = memory_limit; | ||
295 | else | ||
296 | memory_boundary = memblock_end_of_DRAM(); | ||
297 | |||
298 | if (fw_dump.dump_active) { | ||
299 | printk(KERN_INFO "Firmware-assisted dump is active.\n"); | ||
300 | /* | ||
301 | * If last boot has crashed then reserve all the memory | ||
302 | * above boot_memory_size so that we don't touch it until | ||
303 | * dump is written to disk by userspace tool. This memory | ||
304 | * will be released for general use once the dump is saved. | ||
305 | */ | ||
306 | base = fw_dump.boot_memory_size; | ||
307 | size = memory_boundary - base; | ||
308 | memblock_reserve(base, size); | ||
309 | printk(KERN_INFO "Reserved %ldMB of memory at %ldMB " | ||
310 | "for saving crash dump\n", | ||
311 | (unsigned long)(size >> 20), | ||
312 | (unsigned long)(base >> 20)); | ||
313 | |||
314 | fw_dump.fadumphdr_addr = | ||
315 | fdm_active->rmr_region.destination_address + | ||
316 | fdm_active->rmr_region.source_len; | ||
317 | pr_debug("fadumphdr_addr = %p\n", | ||
318 | (void *) fw_dump.fadumphdr_addr); | ||
319 | } else { | ||
320 | /* Reserve the memory at the top of memory. */ | ||
321 | size = get_fadump_area_size(); | ||
322 | base = memory_boundary - size; | ||
323 | memblock_reserve(base, size); | ||
324 | printk(KERN_INFO "Reserved %ldMB of memory at %ldMB " | ||
325 | "for firmware-assisted dump\n", | ||
326 | (unsigned long)(size >> 20), | ||
327 | (unsigned long)(base >> 20)); | ||
328 | } | ||
329 | fw_dump.reserve_dump_area_start = base; | ||
330 | fw_dump.reserve_dump_area_size = size; | ||
331 | return 1; | ||
332 | } | ||
333 | |||
334 | /* Look for fadump= cmdline option. */ | ||
335 | static int __init early_fadump_param(char *p) | ||
336 | { | ||
337 | if (!p) | ||
338 | return 1; | ||
339 | |||
340 | if (strncmp(p, "on", 2) == 0) | ||
341 | fw_dump.fadump_enabled = 1; | ||
342 | else if (strncmp(p, "off", 3) == 0) | ||
343 | fw_dump.fadump_enabled = 0; | ||
344 | |||
345 | return 0; | ||
346 | } | ||
347 | early_param("fadump", early_fadump_param); | ||
348 | |||
349 | /* Look for fadump_reserve_mem= cmdline option */ | ||
350 | static int __init early_fadump_reserve_mem(char *p) | ||
351 | { | ||
352 | if (p) | ||
353 | fw_dump.reserve_bootvar = memparse(p, &p); | ||
354 | return 0; | ||
355 | } | ||
356 | early_param("fadump_reserve_mem", early_fadump_reserve_mem); | ||
357 | |||
358 | static void register_fw_dump(struct fadump_mem_struct *fdm) | ||
359 | { | ||
360 | int rc; | ||
361 | unsigned int wait_time; | ||
362 | |||
363 | pr_debug("Registering for firmware-assisted kernel dump...\n"); | ||
364 | |||
365 | /* TODO: Add upper time limit for the delay */ | ||
366 | do { | ||
367 | rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL, | ||
368 | FADUMP_REGISTER, fdm, | ||
369 | sizeof(struct fadump_mem_struct)); | ||
370 | |||
371 | wait_time = rtas_busy_delay_time(rc); | ||
372 | if (wait_time) | ||
373 | mdelay(wait_time); | ||
374 | |||
375 | } while (wait_time); | ||
376 | |||
377 | switch (rc) { | ||
378 | case -1: | ||
379 | printk(KERN_ERR "Failed to register firmware-assisted kernel" | ||
380 | " dump. Hardware Error(%d).\n", rc); | ||
381 | break; | ||
382 | case -3: | ||
383 | printk(KERN_ERR "Failed to register firmware-assisted kernel" | ||
384 | " dump. Parameter Error(%d).\n", rc); | ||
385 | break; | ||
386 | case -9: | ||
387 | printk(KERN_ERR "firmware-assisted kernel dump is already " | ||
388 | " registered."); | ||
389 | fw_dump.dump_registered = 1; | ||
390 | break; | ||
391 | case 0: | ||
392 | printk(KERN_INFO "firmware-assisted kernel dump registration" | ||
393 | " is successful\n"); | ||
394 | fw_dump.dump_registered = 1; | ||
395 | break; | ||
396 | } | ||
397 | } | ||
398 | |||
399 | void crash_fadump(struct pt_regs *regs, const char *str) | ||
400 | { | ||
401 | struct fadump_crash_info_header *fdh = NULL; | ||
402 | |||
403 | if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr) | ||
404 | return; | ||
405 | |||
406 | fdh = __va(fw_dump.fadumphdr_addr); | ||
407 | crashing_cpu = smp_processor_id(); | ||
408 | fdh->crashing_cpu = crashing_cpu; | ||
409 | crash_save_vmcoreinfo(); | ||
410 | |||
411 | if (regs) | ||
412 | fdh->regs = *regs; | ||
413 | else | ||
414 | ppc_save_regs(&fdh->regs); | ||
415 | |||
416 | fdh->cpu_online_mask = *cpu_online_mask; | ||
417 | |||
418 | /* Call ibm,os-term rtas call to trigger firmware assisted dump */ | ||
419 | rtas_os_term((char *)str); | ||
420 | } | ||
421 | |||
422 | #define GPR_MASK 0xffffff0000000000 | ||
423 | static inline int fadump_gpr_index(u64 id) | ||
424 | { | ||
425 | int i = -1; | ||
426 | char str[3]; | ||
427 | |||
428 | if ((id & GPR_MASK) == REG_ID("GPR")) { | ||
429 | /* get the digits at the end */ | ||
430 | id &= ~GPR_MASK; | ||
431 | id >>= 24; | ||
432 | str[2] = '\0'; | ||
433 | str[1] = id & 0xff; | ||
434 | str[0] = (id >> 8) & 0xff; | ||
435 | sscanf(str, "%d", &i); | ||
436 | if (i > 31) | ||
437 | i = -1; | ||
438 | } | ||
439 | return i; | ||
440 | } | ||
441 | |||
442 | static inline void fadump_set_regval(struct pt_regs *regs, u64 reg_id, | ||
443 | u64 reg_val) | ||
444 | { | ||
445 | int i; | ||
446 | |||
447 | i = fadump_gpr_index(reg_id); | ||
448 | if (i >= 0) | ||
449 | regs->gpr[i] = (unsigned long)reg_val; | ||
450 | else if (reg_id == REG_ID("NIA")) | ||
451 | regs->nip = (unsigned long)reg_val; | ||
452 | else if (reg_id == REG_ID("MSR")) | ||
453 | regs->msr = (unsigned long)reg_val; | ||
454 | else if (reg_id == REG_ID("CTR")) | ||
455 | regs->ctr = (unsigned long)reg_val; | ||
456 | else if (reg_id == REG_ID("LR")) | ||
457 | regs->link = (unsigned long)reg_val; | ||
458 | else if (reg_id == REG_ID("XER")) | ||
459 | regs->xer = (unsigned long)reg_val; | ||
460 | else if (reg_id == REG_ID("CR")) | ||
461 | regs->ccr = (unsigned long)reg_val; | ||
462 | else if (reg_id == REG_ID("DAR")) | ||
463 | regs->dar = (unsigned long)reg_val; | ||
464 | else if (reg_id == REG_ID("DSISR")) | ||
465 | regs->dsisr = (unsigned long)reg_val; | ||
466 | } | ||
467 | |||
468 | static struct fadump_reg_entry* | ||
469 | fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs) | ||
470 | { | ||
471 | memset(regs, 0, sizeof(struct pt_regs)); | ||
472 | |||
473 | while (reg_entry->reg_id != REG_ID("CPUEND")) { | ||
474 | fadump_set_regval(regs, reg_entry->reg_id, | ||
475 | reg_entry->reg_value); | ||
476 | reg_entry++; | ||
477 | } | ||
478 | reg_entry++; | ||
479 | return reg_entry; | ||
480 | } | ||
481 | |||
482 | static u32 *fadump_append_elf_note(u32 *buf, char *name, unsigned type, | ||
483 | void *data, size_t data_len) | ||
484 | { | ||
485 | struct elf_note note; | ||
486 | |||
487 | note.n_namesz = strlen(name) + 1; | ||
488 | note.n_descsz = data_len; | ||
489 | note.n_type = type; | ||
490 | memcpy(buf, ¬e, sizeof(note)); | ||
491 | buf += (sizeof(note) + 3)/4; | ||
492 | memcpy(buf, name, note.n_namesz); | ||
493 | buf += (note.n_namesz + 3)/4; | ||
494 | memcpy(buf, data, note.n_descsz); | ||
495 | buf += (note.n_descsz + 3)/4; | ||
496 | |||
497 | return buf; | ||
498 | } | ||
499 | |||
500 | static void fadump_final_note(u32 *buf) | ||
501 | { | ||
502 | struct elf_note note; | ||
503 | |||
504 | note.n_namesz = 0; | ||
505 | note.n_descsz = 0; | ||
506 | note.n_type = 0; | ||
507 | memcpy(buf, ¬e, sizeof(note)); | ||
508 | } | ||
509 | |||
510 | static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) | ||
511 | { | ||
512 | struct elf_prstatus prstatus; | ||
513 | |||
514 | memset(&prstatus, 0, sizeof(prstatus)); | ||
515 | /* | ||
516 | * FIXME: How do i get PID? Do I really need it? | ||
517 | * prstatus.pr_pid = ???? | ||
518 | */ | ||
519 | elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); | ||
520 | buf = fadump_append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, | ||
521 | &prstatus, sizeof(prstatus)); | ||
522 | return buf; | ||
523 | } | ||
524 | |||
525 | static void fadump_update_elfcore_header(char *bufp) | ||
526 | { | ||
527 | struct elfhdr *elf; | ||
528 | struct elf_phdr *phdr; | ||
529 | |||
530 | elf = (struct elfhdr *)bufp; | ||
531 | bufp += sizeof(struct elfhdr); | ||
532 | |||
533 | /* First note is a place holder for cpu notes info. */ | ||
534 | phdr = (struct elf_phdr *)bufp; | ||
535 | |||
536 | if (phdr->p_type == PT_NOTE) { | ||
537 | phdr->p_paddr = fw_dump.cpu_notes_buf; | ||
538 | phdr->p_offset = phdr->p_paddr; | ||
539 | phdr->p_filesz = fw_dump.cpu_notes_buf_size; | ||
540 | phdr->p_memsz = fw_dump.cpu_notes_buf_size; | ||
541 | } | ||
542 | return; | ||
543 | } | ||
544 | |||
545 | static void *fadump_cpu_notes_buf_alloc(unsigned long size) | ||
546 | { | ||
547 | void *vaddr; | ||
548 | struct page *page; | ||
549 | unsigned long order, count, i; | ||
550 | |||
551 | order = get_order(size); | ||
552 | vaddr = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, order); | ||
553 | if (!vaddr) | ||
554 | return NULL; | ||
555 | |||
556 | count = 1 << order; | ||
557 | page = virt_to_page(vaddr); | ||
558 | for (i = 0; i < count; i++) | ||
559 | SetPageReserved(page + i); | ||
560 | return vaddr; | ||
561 | } | ||
562 | |||
563 | static void fadump_cpu_notes_buf_free(unsigned long vaddr, unsigned long size) | ||
564 | { | ||
565 | struct page *page; | ||
566 | unsigned long order, count, i; | ||
567 | |||
568 | order = get_order(size); | ||
569 | count = 1 << order; | ||
570 | page = virt_to_page(vaddr); | ||
571 | for (i = 0; i < count; i++) | ||
572 | ClearPageReserved(page + i); | ||
573 | __free_pages(page, order); | ||
574 | } | ||
575 | |||
576 | /* | ||
577 | * Read CPU state dump data and convert it into ELF notes. | ||
578 | * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be | ||
579 | * used to access the data to allow for additional fields to be added without | ||
580 | * affecting compatibility. Each list of registers for a CPU starts with | ||
581 | * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes, | ||
582 | * 8 Byte ASCII identifier and 8 Byte register value. The register entry | ||
583 | * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part | ||
584 | * of register value. For more details refer to PAPR document. | ||
585 | * | ||
586 | * Only for the crashing cpu we ignore the CPU dump data and get exact | ||
587 | * state from fadump crash info structure populated by first kernel at the | ||
588 | * time of crash. | ||
589 | */ | ||
590 | static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm) | ||
591 | { | ||
592 | struct fadump_reg_save_area_header *reg_header; | ||
593 | struct fadump_reg_entry *reg_entry; | ||
594 | struct fadump_crash_info_header *fdh = NULL; | ||
595 | void *vaddr; | ||
596 | unsigned long addr; | ||
597 | u32 num_cpus, *note_buf; | ||
598 | struct pt_regs regs; | ||
599 | int i, rc = 0, cpu = 0; | ||
600 | |||
601 | if (!fdm->cpu_state_data.bytes_dumped) | ||
602 | return -EINVAL; | ||
603 | |||
604 | addr = fdm->cpu_state_data.destination_address; | ||
605 | vaddr = __va(addr); | ||
606 | |||
607 | reg_header = vaddr; | ||
608 | if (reg_header->magic_number != REGSAVE_AREA_MAGIC) { | ||
609 | printk(KERN_ERR "Unable to read register save area.\n"); | ||
610 | return -ENOENT; | ||
611 | } | ||
612 | pr_debug("--------CPU State Data------------\n"); | ||
613 | pr_debug("Magic Number: %llx\n", reg_header->magic_number); | ||
614 | pr_debug("NumCpuOffset: %x\n", reg_header->num_cpu_offset); | ||
615 | |||
616 | vaddr += reg_header->num_cpu_offset; | ||
617 | num_cpus = *((u32 *)(vaddr)); | ||
618 | pr_debug("NumCpus : %u\n", num_cpus); | ||
619 | vaddr += sizeof(u32); | ||
620 | reg_entry = (struct fadump_reg_entry *)vaddr; | ||
621 | |||
622 | /* Allocate buffer to hold cpu crash notes. */ | ||
623 | fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t); | ||
624 | fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size); | ||
625 | note_buf = fadump_cpu_notes_buf_alloc(fw_dump.cpu_notes_buf_size); | ||
626 | if (!note_buf) { | ||
627 | printk(KERN_ERR "Failed to allocate 0x%lx bytes for " | ||
628 | "cpu notes buffer\n", fw_dump.cpu_notes_buf_size); | ||
629 | return -ENOMEM; | ||
630 | } | ||
631 | fw_dump.cpu_notes_buf = __pa(note_buf); | ||
632 | |||
633 | pr_debug("Allocated buffer for cpu notes of size %ld at %p\n", | ||
634 | (num_cpus * sizeof(note_buf_t)), note_buf); | ||
635 | |||
636 | if (fw_dump.fadumphdr_addr) | ||
637 | fdh = __va(fw_dump.fadumphdr_addr); | ||
638 | |||
639 | for (i = 0; i < num_cpus; i++) { | ||
640 | if (reg_entry->reg_id != REG_ID("CPUSTRT")) { | ||
641 | printk(KERN_ERR "Unable to read CPU state data\n"); | ||
642 | rc = -ENOENT; | ||
643 | goto error_out; | ||
644 | } | ||
645 | /* Lower 4 bytes of reg_value contains logical cpu id */ | ||
646 | cpu = reg_entry->reg_value & FADUMP_CPU_ID_MASK; | ||
647 | if (!cpumask_test_cpu(cpu, &fdh->cpu_online_mask)) { | ||
648 | SKIP_TO_NEXT_CPU(reg_entry); | ||
649 | continue; | ||
650 | } | ||
651 | pr_debug("Reading register data for cpu %d...\n", cpu); | ||
652 | if (fdh && fdh->crashing_cpu == cpu) { | ||
653 | regs = fdh->regs; | ||
654 | note_buf = fadump_regs_to_elf_notes(note_buf, ®s); | ||
655 | SKIP_TO_NEXT_CPU(reg_entry); | ||
656 | } else { | ||
657 | reg_entry++; | ||
658 | reg_entry = fadump_read_registers(reg_entry, ®s); | ||
659 | note_buf = fadump_regs_to_elf_notes(note_buf, ®s); | ||
660 | } | ||
661 | } | ||
662 | fadump_final_note(note_buf); | ||
663 | |||
664 | pr_debug("Updating elfcore header (%llx) with cpu notes\n", | ||
665 | fdh->elfcorehdr_addr); | ||
666 | fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr)); | ||
667 | return 0; | ||
668 | |||
669 | error_out: | ||
670 | fadump_cpu_notes_buf_free((unsigned long)__va(fw_dump.cpu_notes_buf), | ||
671 | fw_dump.cpu_notes_buf_size); | ||
672 | fw_dump.cpu_notes_buf = 0; | ||
673 | fw_dump.cpu_notes_buf_size = 0; | ||
674 | return rc; | ||
675 | |||
676 | } | ||
677 | |||
678 | /* | ||
679 | * Validate and process the dump data stored by firmware before exporting | ||
680 | * it through '/proc/vmcore'. | ||
681 | */ | ||
682 | static int __init process_fadump(const struct fadump_mem_struct *fdm_active) | ||
683 | { | ||
684 | struct fadump_crash_info_header *fdh; | ||
685 | int rc = 0; | ||
686 | |||
687 | if (!fdm_active || !fw_dump.fadumphdr_addr) | ||
688 | return -EINVAL; | ||
689 | |||
690 | /* Check if the dump data is valid. */ | ||
691 | if ((fdm_active->header.dump_status_flag == FADUMP_ERROR_FLAG) || | ||
692 | (fdm_active->cpu_state_data.error_flags != 0) || | ||
693 | (fdm_active->rmr_region.error_flags != 0)) { | ||
694 | printk(KERN_ERR "Dump taken by platform is not valid\n"); | ||
695 | return -EINVAL; | ||
696 | } | ||
697 | if ((fdm_active->rmr_region.bytes_dumped != | ||
698 | fdm_active->rmr_region.source_len) || | ||
699 | !fdm_active->cpu_state_data.bytes_dumped) { | ||
700 | printk(KERN_ERR "Dump taken by platform is incomplete\n"); | ||
701 | return -EINVAL; | ||
702 | } | ||
703 | |||
704 | /* Validate the fadump crash info header */ | ||
705 | fdh = __va(fw_dump.fadumphdr_addr); | ||
706 | if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { | ||
707 | printk(KERN_ERR "Crash info header is not valid.\n"); | ||
708 | return -EINVAL; | ||
709 | } | ||
710 | |||
711 | rc = fadump_build_cpu_notes(fdm_active); | ||
712 | if (rc) | ||
713 | return rc; | ||
714 | |||
715 | /* | ||
716 | * We are done validating dump info and elfcore header is now ready | ||
717 | * to be exported. set elfcorehdr_addr so that vmcore module will | ||
718 | * export the elfcore header through '/proc/vmcore'. | ||
719 | */ | ||
720 | elfcorehdr_addr = fdh->elfcorehdr_addr; | ||
721 | |||
722 | return 0; | ||
723 | } | ||
724 | |||
725 | static inline void fadump_add_crash_memory(unsigned long long base, | ||
726 | unsigned long long end) | ||
727 | { | ||
728 | if (base == end) | ||
729 | return; | ||
730 | |||
731 | pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n", | ||
732 | crash_mem_ranges, base, end - 1, (end - base)); | ||
733 | crash_memory_ranges[crash_mem_ranges].base = base; | ||
734 | crash_memory_ranges[crash_mem_ranges].size = end - base; | ||
735 | crash_mem_ranges++; | ||
736 | } | ||
737 | |||
738 | static void fadump_exclude_reserved_area(unsigned long long start, | ||
739 | unsigned long long end) | ||
740 | { | ||
741 | unsigned long long ra_start, ra_end; | ||
742 | |||
743 | ra_start = fw_dump.reserve_dump_area_start; | ||
744 | ra_end = ra_start + fw_dump.reserve_dump_area_size; | ||
745 | |||
746 | if ((ra_start < end) && (ra_end > start)) { | ||
747 | if ((start < ra_start) && (end > ra_end)) { | ||
748 | fadump_add_crash_memory(start, ra_start); | ||
749 | fadump_add_crash_memory(ra_end, end); | ||
750 | } else if (start < ra_start) { | ||
751 | fadump_add_crash_memory(start, ra_start); | ||
752 | } else if (ra_end < end) { | ||
753 | fadump_add_crash_memory(ra_end, end); | ||
754 | } | ||
755 | } else | ||
756 | fadump_add_crash_memory(start, end); | ||
757 | } | ||
758 | |||
759 | static int fadump_init_elfcore_header(char *bufp) | ||
760 | { | ||
761 | struct elfhdr *elf; | ||
762 | |||
763 | elf = (struct elfhdr *) bufp; | ||
764 | bufp += sizeof(struct elfhdr); | ||
765 | memcpy(elf->e_ident, ELFMAG, SELFMAG); | ||
766 | elf->e_ident[EI_CLASS] = ELF_CLASS; | ||
767 | elf->e_ident[EI_DATA] = ELF_DATA; | ||
768 | elf->e_ident[EI_VERSION] = EV_CURRENT; | ||
769 | elf->e_ident[EI_OSABI] = ELF_OSABI; | ||
770 | memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD); | ||
771 | elf->e_type = ET_CORE; | ||
772 | elf->e_machine = ELF_ARCH; | ||
773 | elf->e_version = EV_CURRENT; | ||
774 | elf->e_entry = 0; | ||
775 | elf->e_phoff = sizeof(struct elfhdr); | ||
776 | elf->e_shoff = 0; | ||
777 | elf->e_flags = ELF_CORE_EFLAGS; | ||
778 | elf->e_ehsize = sizeof(struct elfhdr); | ||
779 | elf->e_phentsize = sizeof(struct elf_phdr); | ||
780 | elf->e_phnum = 0; | ||
781 | elf->e_shentsize = 0; | ||
782 | elf->e_shnum = 0; | ||
783 | elf->e_shstrndx = 0; | ||
784 | |||
785 | return 0; | ||
786 | } | ||
787 | |||
788 | /* | ||
789 | * Traverse through memblock structure and setup crash memory ranges. These | ||
790 | * ranges will be used create PT_LOAD program headers in elfcore header. | ||
791 | */ | ||
792 | static void fadump_setup_crash_memory_ranges(void) | ||
793 | { | ||
794 | struct memblock_region *reg; | ||
795 | unsigned long long start, end; | ||
796 | |||
797 | pr_debug("Setup crash memory ranges.\n"); | ||
798 | crash_mem_ranges = 0; | ||
799 | /* | ||
800 | * add the first memory chunk (RMA_START through boot_memory_size) as | ||
801 | * a separate memory chunk. The reason is, at the time crash firmware | ||
802 | * will move the content of this memory chunk to different location | ||
803 | * specified during fadump registration. We need to create a separate | ||
804 | * program header for this chunk with the correct offset. | ||
805 | */ | ||
806 | fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size); | ||
807 | |||
808 | for_each_memblock(memory, reg) { | ||
809 | start = (unsigned long long)reg->base; | ||
810 | end = start + (unsigned long long)reg->size; | ||
811 | if (start == RMA_START && end >= fw_dump.boot_memory_size) | ||
812 | start = fw_dump.boot_memory_size; | ||
813 | |||
814 | /* add this range excluding the reserved dump area. */ | ||
815 | fadump_exclude_reserved_area(start, end); | ||
816 | } | ||
817 | } | ||
818 | |||
819 | /* | ||
820 | * If the given physical address falls within the boot memory region then | ||
821 | * return the relocated address that points to the dump region reserved | ||
822 | * for saving initial boot memory contents. | ||
823 | */ | ||
824 | static inline unsigned long fadump_relocate(unsigned long paddr) | ||
825 | { | ||
826 | if (paddr > RMA_START && paddr < fw_dump.boot_memory_size) | ||
827 | return fdm.rmr_region.destination_address + paddr; | ||
828 | else | ||
829 | return paddr; | ||
830 | } | ||
831 | |||
832 | static int fadump_create_elfcore_headers(char *bufp) | ||
833 | { | ||
834 | struct elfhdr *elf; | ||
835 | struct elf_phdr *phdr; | ||
836 | int i; | ||
837 | |||
838 | fadump_init_elfcore_header(bufp); | ||
839 | elf = (struct elfhdr *)bufp; | ||
840 | bufp += sizeof(struct elfhdr); | ||
841 | |||
842 | /* | ||
843 | * setup ELF PT_NOTE, place holder for cpu notes info. The notes info | ||
844 | * will be populated during second kernel boot after crash. Hence | ||
845 | * this PT_NOTE will always be the first elf note. | ||
846 | * | ||
847 | * NOTE: Any new ELF note addition should be placed after this note. | ||
848 | */ | ||
849 | phdr = (struct elf_phdr *)bufp; | ||
850 | bufp += sizeof(struct elf_phdr); | ||
851 | phdr->p_type = PT_NOTE; | ||
852 | phdr->p_flags = 0; | ||
853 | phdr->p_vaddr = 0; | ||
854 | phdr->p_align = 0; | ||
855 | |||
856 | phdr->p_offset = 0; | ||
857 | phdr->p_paddr = 0; | ||
858 | phdr->p_filesz = 0; | ||
859 | phdr->p_memsz = 0; | ||
860 | |||
861 | (elf->e_phnum)++; | ||
862 | |||
863 | /* setup ELF PT_NOTE for vmcoreinfo */ | ||
864 | phdr = (struct elf_phdr *)bufp; | ||
865 | bufp += sizeof(struct elf_phdr); | ||
866 | phdr->p_type = PT_NOTE; | ||
867 | phdr->p_flags = 0; | ||
868 | phdr->p_vaddr = 0; | ||
869 | phdr->p_align = 0; | ||
870 | |||
871 | phdr->p_paddr = fadump_relocate(paddr_vmcoreinfo_note()); | ||
872 | phdr->p_offset = phdr->p_paddr; | ||
873 | phdr->p_memsz = vmcoreinfo_max_size; | ||
874 | phdr->p_filesz = vmcoreinfo_max_size; | ||
875 | |||
876 | /* Increment number of program headers. */ | ||
877 | (elf->e_phnum)++; | ||
878 | |||
879 | /* setup PT_LOAD sections. */ | ||
880 | |||
881 | for (i = 0; i < crash_mem_ranges; i++) { | ||
882 | unsigned long long mbase, msize; | ||
883 | mbase = crash_memory_ranges[i].base; | ||
884 | msize = crash_memory_ranges[i].size; | ||
885 | |||
886 | if (!msize) | ||
887 | continue; | ||
888 | |||
889 | phdr = (struct elf_phdr *)bufp; | ||
890 | bufp += sizeof(struct elf_phdr); | ||
891 | phdr->p_type = PT_LOAD; | ||
892 | phdr->p_flags = PF_R|PF_W|PF_X; | ||
893 | phdr->p_offset = mbase; | ||
894 | |||
895 | if (mbase == RMA_START) { | ||
896 | /* | ||
897 | * The entire RMA region will be moved by firmware | ||
898 | * to the specified destination_address. Hence set | ||
899 | * the correct offset. | ||
900 | */ | ||
901 | phdr->p_offset = fdm.rmr_region.destination_address; | ||
902 | } | ||
903 | |||
904 | phdr->p_paddr = mbase; | ||
905 | phdr->p_vaddr = (unsigned long)__va(mbase); | ||
906 | phdr->p_filesz = msize; | ||
907 | phdr->p_memsz = msize; | ||
908 | phdr->p_align = 0; | ||
909 | |||
910 | /* Increment number of program headers. */ | ||
911 | (elf->e_phnum)++; | ||
912 | } | ||
913 | return 0; | ||
914 | } | ||
915 | |||
916 | static unsigned long init_fadump_header(unsigned long addr) | ||
917 | { | ||
918 | struct fadump_crash_info_header *fdh; | ||
919 | |||
920 | if (!addr) | ||
921 | return 0; | ||
922 | |||
923 | fw_dump.fadumphdr_addr = addr; | ||
924 | fdh = __va(addr); | ||
925 | addr += sizeof(struct fadump_crash_info_header); | ||
926 | |||
927 | memset(fdh, 0, sizeof(struct fadump_crash_info_header)); | ||
928 | fdh->magic_number = FADUMP_CRASH_INFO_MAGIC; | ||
929 | fdh->elfcorehdr_addr = addr; | ||
930 | /* We will set the crashing cpu id in crash_fadump() during crash. */ | ||
931 | fdh->crashing_cpu = CPU_UNKNOWN; | ||
932 | |||
933 | return addr; | ||
934 | } | ||
935 | |||
936 | static void register_fadump(void) | ||
937 | { | ||
938 | unsigned long addr; | ||
939 | void *vaddr; | ||
940 | |||
941 | /* | ||
942 | * If no memory is reserved then we can not register for firmware- | ||
943 | * assisted dump. | ||
944 | */ | ||
945 | if (!fw_dump.reserve_dump_area_size) | ||
946 | return; | ||
947 | |||
948 | fadump_setup_crash_memory_ranges(); | ||
949 | |||
950 | addr = fdm.rmr_region.destination_address + fdm.rmr_region.source_len; | ||
951 | /* Initialize fadump crash info header. */ | ||
952 | addr = init_fadump_header(addr); | ||
953 | vaddr = __va(addr); | ||
954 | |||
955 | pr_debug("Creating ELF core headers at %#016lx\n", addr); | ||
956 | fadump_create_elfcore_headers(vaddr); | ||
957 | |||
958 | /* register the future kernel dump with firmware. */ | ||
959 | register_fw_dump(&fdm); | ||
960 | } | ||
961 | |||
962 | static int fadump_unregister_dump(struct fadump_mem_struct *fdm) | ||
963 | { | ||
964 | int rc = 0; | ||
965 | unsigned int wait_time; | ||
966 | |||
967 | pr_debug("Un-register firmware-assisted dump\n"); | ||
968 | |||
969 | /* TODO: Add upper time limit for the delay */ | ||
970 | do { | ||
971 | rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL, | ||
972 | FADUMP_UNREGISTER, fdm, | ||
973 | sizeof(struct fadump_mem_struct)); | ||
974 | |||
975 | wait_time = rtas_busy_delay_time(rc); | ||
976 | if (wait_time) | ||
977 | mdelay(wait_time); | ||
978 | } while (wait_time); | ||
979 | |||
980 | if (rc) { | ||
981 | printk(KERN_ERR "Failed to un-register firmware-assisted dump." | ||
982 | " unexpected error(%d).\n", rc); | ||
983 | return rc; | ||
984 | } | ||
985 | fw_dump.dump_registered = 0; | ||
986 | return 0; | ||
987 | } | ||
988 | |||
989 | static int fadump_invalidate_dump(struct fadump_mem_struct *fdm) | ||
990 | { | ||
991 | int rc = 0; | ||
992 | unsigned int wait_time; | ||
993 | |||
994 | pr_debug("Invalidating firmware-assisted dump registration\n"); | ||
995 | |||
996 | /* TODO: Add upper time limit for the delay */ | ||
997 | do { | ||
998 | rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL, | ||
999 | FADUMP_INVALIDATE, fdm, | ||
1000 | sizeof(struct fadump_mem_struct)); | ||
1001 | |||
1002 | wait_time = rtas_busy_delay_time(rc); | ||
1003 | if (wait_time) | ||
1004 | mdelay(wait_time); | ||
1005 | } while (wait_time); | ||
1006 | |||
1007 | if (rc) { | ||
1008 | printk(KERN_ERR "Failed to invalidate firmware-assisted dump " | ||
1009 | "rgistration. unexpected error(%d).\n", rc); | ||
1010 | return rc; | ||
1011 | } | ||
1012 | fw_dump.dump_active = 0; | ||
1013 | fdm_active = NULL; | ||
1014 | return 0; | ||
1015 | } | ||
1016 | |||
1017 | void fadump_cleanup(void) | ||
1018 | { | ||
1019 | /* Invalidate the registration only if dump is active. */ | ||
1020 | if (fw_dump.dump_active) { | ||
1021 | init_fadump_mem_struct(&fdm, | ||
1022 | fdm_active->cpu_state_data.destination_address); | ||
1023 | fadump_invalidate_dump(&fdm); | ||
1024 | } | ||
1025 | } | ||
1026 | |||
1027 | /* | ||
1028 | * Release the memory that was reserved in early boot to preserve the memory | ||
1029 | * contents. The released memory will be available for general use. | ||
1030 | */ | ||
1031 | static void fadump_release_memory(unsigned long begin, unsigned long end) | ||
1032 | { | ||
1033 | unsigned long addr; | ||
1034 | unsigned long ra_start, ra_end; | ||
1035 | |||
1036 | ra_start = fw_dump.reserve_dump_area_start; | ||
1037 | ra_end = ra_start + fw_dump.reserve_dump_area_size; | ||
1038 | |||
1039 | for (addr = begin; addr < end; addr += PAGE_SIZE) { | ||
1040 | /* | ||
1041 | * exclude the dump reserve area. Will reuse it for next | ||
1042 | * fadump registration. | ||
1043 | */ | ||
1044 | if (addr <= ra_end && ((addr + PAGE_SIZE) > ra_start)) | ||
1045 | continue; | ||
1046 | |||
1047 | ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT)); | ||
1048 | init_page_count(pfn_to_page(addr >> PAGE_SHIFT)); | ||
1049 | free_page((unsigned long)__va(addr)); | ||
1050 | totalram_pages++; | ||
1051 | } | ||
1052 | } | ||
1053 | |||
1054 | static void fadump_invalidate_release_mem(void) | ||
1055 | { | ||
1056 | unsigned long reserved_area_start, reserved_area_end; | ||
1057 | unsigned long destination_address; | ||
1058 | |||
1059 | mutex_lock(&fadump_mutex); | ||
1060 | if (!fw_dump.dump_active) { | ||
1061 | mutex_unlock(&fadump_mutex); | ||
1062 | return; | ||
1063 | } | ||
1064 | |||
1065 | destination_address = fdm_active->cpu_state_data.destination_address; | ||
1066 | fadump_cleanup(); | ||
1067 | mutex_unlock(&fadump_mutex); | ||
1068 | |||
1069 | /* | ||
1070 | * Save the current reserved memory bounds we will require them | ||
1071 | * later for releasing the memory for general use. | ||
1072 | */ | ||
1073 | reserved_area_start = fw_dump.reserve_dump_area_start; | ||
1074 | reserved_area_end = reserved_area_start + | ||
1075 | fw_dump.reserve_dump_area_size; | ||
1076 | /* | ||
1077 | * Setup reserve_dump_area_start and its size so that we can | ||
1078 | * reuse this reserved memory for Re-registration. | ||
1079 | */ | ||
1080 | fw_dump.reserve_dump_area_start = destination_address; | ||
1081 | fw_dump.reserve_dump_area_size = get_fadump_area_size(); | ||
1082 | |||
1083 | fadump_release_memory(reserved_area_start, reserved_area_end); | ||
1084 | if (fw_dump.cpu_notes_buf) { | ||
1085 | fadump_cpu_notes_buf_free( | ||
1086 | (unsigned long)__va(fw_dump.cpu_notes_buf), | ||
1087 | fw_dump.cpu_notes_buf_size); | ||
1088 | fw_dump.cpu_notes_buf = 0; | ||
1089 | fw_dump.cpu_notes_buf_size = 0; | ||
1090 | } | ||
1091 | /* Initialize the kernel dump memory structure for FAD registration. */ | ||
1092 | init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start); | ||
1093 | } | ||
1094 | |||
1095 | static ssize_t fadump_release_memory_store(struct kobject *kobj, | ||
1096 | struct kobj_attribute *attr, | ||
1097 | const char *buf, size_t count) | ||
1098 | { | ||
1099 | if (!fw_dump.dump_active) | ||
1100 | return -EPERM; | ||
1101 | |||
1102 | if (buf[0] == '1') { | ||
1103 | /* | ||
1104 | * Take away the '/proc/vmcore'. We are releasing the dump | ||
1105 | * memory, hence it will not be valid anymore. | ||
1106 | */ | ||
1107 | vmcore_cleanup(); | ||
1108 | fadump_invalidate_release_mem(); | ||
1109 | |||
1110 | } else | ||
1111 | return -EINVAL; | ||
1112 | return count; | ||
1113 | } | ||
1114 | |||
1115 | static ssize_t fadump_enabled_show(struct kobject *kobj, | ||
1116 | struct kobj_attribute *attr, | ||
1117 | char *buf) | ||
1118 | { | ||
1119 | return sprintf(buf, "%d\n", fw_dump.fadump_enabled); | ||
1120 | } | ||
1121 | |||
1122 | static ssize_t fadump_register_show(struct kobject *kobj, | ||
1123 | struct kobj_attribute *attr, | ||
1124 | char *buf) | ||
1125 | { | ||
1126 | return sprintf(buf, "%d\n", fw_dump.dump_registered); | ||
1127 | } | ||
1128 | |||
1129 | static ssize_t fadump_register_store(struct kobject *kobj, | ||
1130 | struct kobj_attribute *attr, | ||
1131 | const char *buf, size_t count) | ||
1132 | { | ||
1133 | int ret = 0; | ||
1134 | |||
1135 | if (!fw_dump.fadump_enabled || fdm_active) | ||
1136 | return -EPERM; | ||
1137 | |||
1138 | mutex_lock(&fadump_mutex); | ||
1139 | |||
1140 | switch (buf[0]) { | ||
1141 | case '0': | ||
1142 | if (fw_dump.dump_registered == 0) { | ||
1143 | ret = -EINVAL; | ||
1144 | goto unlock_out; | ||
1145 | } | ||
1146 | /* Un-register Firmware-assisted dump */ | ||
1147 | fadump_unregister_dump(&fdm); | ||
1148 | break; | ||
1149 | case '1': | ||
1150 | if (fw_dump.dump_registered == 1) { | ||
1151 | ret = -EINVAL; | ||
1152 | goto unlock_out; | ||
1153 | } | ||
1154 | /* Register Firmware-assisted dump */ | ||
1155 | register_fadump(); | ||
1156 | break; | ||
1157 | default: | ||
1158 | ret = -EINVAL; | ||
1159 | break; | ||
1160 | } | ||
1161 | |||
1162 | unlock_out: | ||
1163 | mutex_unlock(&fadump_mutex); | ||
1164 | return ret < 0 ? ret : count; | ||
1165 | } | ||
1166 | |||
1167 | static int fadump_region_show(struct seq_file *m, void *private) | ||
1168 | { | ||
1169 | const struct fadump_mem_struct *fdm_ptr; | ||
1170 | |||
1171 | if (!fw_dump.fadump_enabled) | ||
1172 | return 0; | ||
1173 | |||
1174 | mutex_lock(&fadump_mutex); | ||
1175 | if (fdm_active) | ||
1176 | fdm_ptr = fdm_active; | ||
1177 | else { | ||
1178 | mutex_unlock(&fadump_mutex); | ||
1179 | fdm_ptr = &fdm; | ||
1180 | } | ||
1181 | |||
1182 | seq_printf(m, | ||
1183 | "CPU : [%#016llx-%#016llx] %#llx bytes, " | ||
1184 | "Dumped: %#llx\n", | ||
1185 | fdm_ptr->cpu_state_data.destination_address, | ||
1186 | fdm_ptr->cpu_state_data.destination_address + | ||
1187 | fdm_ptr->cpu_state_data.source_len - 1, | ||
1188 | fdm_ptr->cpu_state_data.source_len, | ||
1189 | fdm_ptr->cpu_state_data.bytes_dumped); | ||
1190 | seq_printf(m, | ||
1191 | "HPTE: [%#016llx-%#016llx] %#llx bytes, " | ||
1192 | "Dumped: %#llx\n", | ||
1193 | fdm_ptr->hpte_region.destination_address, | ||
1194 | fdm_ptr->hpte_region.destination_address + | ||
1195 | fdm_ptr->hpte_region.source_len - 1, | ||
1196 | fdm_ptr->hpte_region.source_len, | ||
1197 | fdm_ptr->hpte_region.bytes_dumped); | ||
1198 | seq_printf(m, | ||
1199 | "DUMP: [%#016llx-%#016llx] %#llx bytes, " | ||
1200 | "Dumped: %#llx\n", | ||
1201 | fdm_ptr->rmr_region.destination_address, | ||
1202 | fdm_ptr->rmr_region.destination_address + | ||
1203 | fdm_ptr->rmr_region.source_len - 1, | ||
1204 | fdm_ptr->rmr_region.source_len, | ||
1205 | fdm_ptr->rmr_region.bytes_dumped); | ||
1206 | |||
1207 | if (!fdm_active || | ||
1208 | (fw_dump.reserve_dump_area_start == | ||
1209 | fdm_ptr->cpu_state_data.destination_address)) | ||
1210 | goto out; | ||
1211 | |||
1212 | /* Dump is active. Show reserved memory region. */ | ||
1213 | seq_printf(m, | ||
1214 | " : [%#016llx-%#016llx] %#llx bytes, " | ||
1215 | "Dumped: %#llx\n", | ||
1216 | (unsigned long long)fw_dump.reserve_dump_area_start, | ||
1217 | fdm_ptr->cpu_state_data.destination_address - 1, | ||
1218 | fdm_ptr->cpu_state_data.destination_address - | ||
1219 | fw_dump.reserve_dump_area_start, | ||
1220 | fdm_ptr->cpu_state_data.destination_address - | ||
1221 | fw_dump.reserve_dump_area_start); | ||
1222 | out: | ||
1223 | if (fdm_active) | ||
1224 | mutex_unlock(&fadump_mutex); | ||
1225 | return 0; | ||
1226 | } | ||
1227 | |||
1228 | static struct kobj_attribute fadump_release_attr = __ATTR(fadump_release_mem, | ||
1229 | 0200, NULL, | ||
1230 | fadump_release_memory_store); | ||
1231 | static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled, | ||
1232 | 0444, fadump_enabled_show, | ||
1233 | NULL); | ||
1234 | static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered, | ||
1235 | 0644, fadump_register_show, | ||
1236 | fadump_register_store); | ||
1237 | |||
1238 | static int fadump_region_open(struct inode *inode, struct file *file) | ||
1239 | { | ||
1240 | return single_open(file, fadump_region_show, inode->i_private); | ||
1241 | } | ||
1242 | |||
1243 | static const struct file_operations fadump_region_fops = { | ||
1244 | .open = fadump_region_open, | ||
1245 | .read = seq_read, | ||
1246 | .llseek = seq_lseek, | ||
1247 | .release = single_release, | ||
1248 | }; | ||
1249 | |||
1250 | static void fadump_init_files(void) | ||
1251 | { | ||
1252 | struct dentry *debugfs_file; | ||
1253 | int rc = 0; | ||
1254 | |||
1255 | rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr); | ||
1256 | if (rc) | ||
1257 | printk(KERN_ERR "fadump: unable to create sysfs file" | ||
1258 | " fadump_enabled (%d)\n", rc); | ||
1259 | |||
1260 | rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr); | ||
1261 | if (rc) | ||
1262 | printk(KERN_ERR "fadump: unable to create sysfs file" | ||
1263 | " fadump_registered (%d)\n", rc); | ||
1264 | |||
1265 | debugfs_file = debugfs_create_file("fadump_region", 0444, | ||
1266 | powerpc_debugfs_root, NULL, | ||
1267 | &fadump_region_fops); | ||
1268 | if (!debugfs_file) | ||
1269 | printk(KERN_ERR "fadump: unable to create debugfs file" | ||
1270 | " fadump_region\n"); | ||
1271 | |||
1272 | if (fw_dump.dump_active) { | ||
1273 | rc = sysfs_create_file(kernel_kobj, &fadump_release_attr.attr); | ||
1274 | if (rc) | ||
1275 | printk(KERN_ERR "fadump: unable to create sysfs file" | ||
1276 | " fadump_release_mem (%d)\n", rc); | ||
1277 | } | ||
1278 | return; | ||
1279 | } | ||
1280 | |||
1281 | /* | ||
1282 | * Prepare for firmware-assisted dump. | ||
1283 | */ | ||
1284 | int __init setup_fadump(void) | ||
1285 | { | ||
1286 | if (!fw_dump.fadump_enabled) | ||
1287 | return 0; | ||
1288 | |||
1289 | if (!fw_dump.fadump_supported) { | ||
1290 | printk(KERN_ERR "Firmware-assisted dump is not supported on" | ||
1291 | " this hardware\n"); | ||
1292 | return 0; | ||
1293 | } | ||
1294 | |||
1295 | fadump_show_config(); | ||
1296 | /* | ||
1297 | * If dump data is available then see if it is valid and prepare for | ||
1298 | * saving it to the disk. | ||
1299 | */ | ||
1300 | if (fw_dump.dump_active) { | ||
1301 | /* | ||
1302 | * if dump process fails then invalidate the registration | ||
1303 | * and release memory before proceeding for re-registration. | ||
1304 | */ | ||
1305 | if (process_fadump(fdm_active) < 0) | ||
1306 | fadump_invalidate_release_mem(); | ||
1307 | } | ||
1308 | /* Initialize the kernel dump memory structure for FAD registration. */ | ||
1309 | else if (fw_dump.reserve_dump_area_size) | ||
1310 | init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start); | ||
1311 | fadump_init_files(); | ||
1312 | |||
1313 | return 1; | ||
1314 | } | ||
1315 | subsys_initcall(setup_fadump); | ||
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 0654dba2c1f1..dc0488b6f6e1 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S | |||
@@ -395,7 +395,7 @@ DataAccess: | |||
395 | bl hash_page | 395 | bl hash_page |
396 | 1: lwz r5,_DSISR(r11) /* get DSISR value */ | 396 | 1: lwz r5,_DSISR(r11) /* get DSISR value */ |
397 | mfspr r4,SPRN_DAR | 397 | mfspr r4,SPRN_DAR |
398 | EXC_XFER_EE_LITE(0x300, handle_page_fault) | 398 | EXC_XFER_LITE(0x300, handle_page_fault) |
399 | 399 | ||
400 | 400 | ||
401 | /* Instruction access exception. */ | 401 | /* Instruction access exception. */ |
@@ -410,7 +410,7 @@ InstructionAccess: | |||
410 | bl hash_page | 410 | bl hash_page |
411 | 1: mr r4,r12 | 411 | 1: mr r4,r12 |
412 | mr r5,r9 | 412 | mr r5,r9 |
413 | EXC_XFER_EE_LITE(0x400, handle_page_fault) | 413 | EXC_XFER_LITE(0x400, handle_page_fault) |
414 | 414 | ||
415 | /* External interrupt */ | 415 | /* External interrupt */ |
416 | EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) | 416 | EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) |
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 872a6af83bad..4989661b710b 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S | |||
@@ -394,7 +394,7 @@ label: | |||
394 | NORMAL_EXCEPTION_PROLOG | 394 | NORMAL_EXCEPTION_PROLOG |
395 | mr r4,r12 /* Pass SRR0 as arg2 */ | 395 | mr r4,r12 /* Pass SRR0 as arg2 */ |
396 | li r5,0 /* Pass zero as arg3 */ | 396 | li r5,0 /* Pass zero as arg3 */ |
397 | EXC_XFER_EE_LITE(0x400, handle_page_fault) | 397 | EXC_XFER_LITE(0x400, handle_page_fault) |
398 | 398 | ||
399 | /* 0x0500 - External Interrupt Exception */ | 399 | /* 0x0500 - External Interrupt Exception */ |
400 | EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) | 400 | EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) |
@@ -747,7 +747,7 @@ DataAccess: | |||
747 | mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ | 747 | mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ |
748 | stw r5,_ESR(r11) | 748 | stw r5,_ESR(r11) |
749 | mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ | 749 | mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ |
750 | EXC_XFER_EE_LITE(0x300, handle_page_fault) | 750 | EXC_XFER_LITE(0x300, handle_page_fault) |
751 | 751 | ||
752 | /* Other PowerPC processors, namely those derived from the 6xx-series | 752 | /* Other PowerPC processors, namely those derived from the 6xx-series |
753 | * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved. | 753 | * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved. |
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 06c7251c1bf7..58bddee8e1e8 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S | |||
@@ -32,13 +32,13 @@ | |||
32 | #include <asm/cputable.h> | 32 | #include <asm/cputable.h> |
33 | #include <asm/setup.h> | 33 | #include <asm/setup.h> |
34 | #include <asm/hvcall.h> | 34 | #include <asm/hvcall.h> |
35 | #include <asm/iseries/lpar_map.h> | ||
36 | #include <asm/thread_info.h> | 35 | #include <asm/thread_info.h> |
37 | #include <asm/firmware.h> | 36 | #include <asm/firmware.h> |
38 | #include <asm/page_64.h> | 37 | #include <asm/page_64.h> |
39 | #include <asm/irqflags.h> | 38 | #include <asm/irqflags.h> |
40 | #include <asm/kvm_book3s_asm.h> | 39 | #include <asm/kvm_book3s_asm.h> |
41 | #include <asm/ptrace.h> | 40 | #include <asm/ptrace.h> |
41 | #include <asm/hw_irq.h> | ||
42 | 42 | ||
43 | /* The physical memory is laid out such that the secondary processor | 43 | /* The physical memory is laid out such that the secondary processor |
44 | * spin code sits at 0x0000...0x00ff. On server, the vectors follow | 44 | * spin code sits at 0x0000...0x00ff. On server, the vectors follow |
@@ -57,10 +57,6 @@ | |||
57 | * entry in r9 for debugging purposes | 57 | * entry in r9 for debugging purposes |
58 | * 2. Secondary processors enter at 0x60 with PIR in gpr3 | 58 | * 2. Secondary processors enter at 0x60 with PIR in gpr3 |
59 | * | 59 | * |
60 | * For iSeries: | ||
61 | * 1. The MMU is on (as it always is for iSeries) | ||
62 | * 2. The kernel is entered at system_reset_iSeries | ||
63 | * | ||
64 | * For Book3E processors: | 60 | * For Book3E processors: |
65 | * 1. The MMU is on running in AS0 in a state defined in ePAPR | 61 | * 1. The MMU is on running in AS0 in a state defined in ePAPR |
66 | * 2. The kernel is entered at __start | 62 | * 2. The kernel is entered at __start |
@@ -93,15 +89,6 @@ __secondary_hold_spinloop: | |||
93 | __secondary_hold_acknowledge: | 89 | __secondary_hold_acknowledge: |
94 | .llong 0x0 | 90 | .llong 0x0 |
95 | 91 | ||
96 | #ifdef CONFIG_PPC_ISERIES | ||
97 | /* | ||
98 | * At offset 0x20, there is a pointer to iSeries LPAR data. | ||
99 | * This is required by the hypervisor | ||
100 | */ | ||
101 | . = 0x20 | ||
102 | .llong hvReleaseData-KERNELBASE | ||
103 | #endif /* CONFIG_PPC_ISERIES */ | ||
104 | |||
105 | #ifdef CONFIG_RELOCATABLE | 92 | #ifdef CONFIG_RELOCATABLE |
106 | /* This flag is set to 1 by a loader if the kernel should run | 93 | /* This flag is set to 1 by a loader if the kernel should run |
107 | * at the loaded address instead of the linked address. This | 94 | * at the loaded address instead of the linked address. This |
@@ -564,7 +551,8 @@ _GLOBAL(pmac_secondary_start) | |||
564 | */ | 551 | */ |
565 | li r0,0 | 552 | li r0,0 |
566 | stb r0,PACASOFTIRQEN(r13) | 553 | stb r0,PACASOFTIRQEN(r13) |
567 | stb r0,PACAHARDIRQEN(r13) | 554 | li r0,PACA_IRQ_HARD_DIS |
555 | stb r0,PACAIRQHAPPENED(r13) | ||
568 | 556 | ||
569 | /* Create a temp kernel stack for use before relocation is on. */ | 557 | /* Create a temp kernel stack for use before relocation is on. */ |
570 | ld r1,PACAEMERGSP(r13) | 558 | ld r1,PACAEMERGSP(r13) |
@@ -582,7 +570,7 @@ _GLOBAL(pmac_secondary_start) | |||
582 | * 1. Processor number | 570 | * 1. Processor number |
583 | * 2. Segment table pointer (virtual address) | 571 | * 2. Segment table pointer (virtual address) |
584 | * On entry the following are set: | 572 | * On entry the following are set: |
585 | * r1 = stack pointer. vaddr for iSeries, raddr (temp stack) for pSeries | 573 | * r1 = stack pointer (real addr of temp stack) |
586 | * r24 = cpu# (in Linux terms) | 574 | * r24 = cpu# (in Linux terms) |
587 | * r13 = paca virtual address | 575 | * r13 = paca virtual address |
588 | * SPRG_PACA = paca virtual address | 576 | * SPRG_PACA = paca virtual address |
@@ -595,7 +583,7 @@ __secondary_start: | |||
595 | /* Set thread priority to MEDIUM */ | 583 | /* Set thread priority to MEDIUM */ |
596 | HMT_MEDIUM | 584 | HMT_MEDIUM |
597 | 585 | ||
598 | /* Initialize the kernel stack. Just a repeat for iSeries. */ | 586 | /* Initialize the kernel stack */ |
599 | LOAD_REG_ADDR(r3, current_set) | 587 | LOAD_REG_ADDR(r3, current_set) |
600 | sldi r28,r24,3 /* get current_set[cpu#] */ | 588 | sldi r28,r24,3 /* get current_set[cpu#] */ |
601 | ldx r14,r3,r28 | 589 | ldx r14,r3,r28 |
@@ -615,20 +603,16 @@ __secondary_start: | |||
615 | li r7,0 | 603 | li r7,0 |
616 | mtlr r7 | 604 | mtlr r7 |
617 | 605 | ||
606 | /* Mark interrupts soft and hard disabled (they might be enabled | ||
607 | * in the PACA when doing hotplug) | ||
608 | */ | ||
609 | stb r7,PACASOFTIRQEN(r13) | ||
610 | li r0,PACA_IRQ_HARD_DIS | ||
611 | stb r0,PACAIRQHAPPENED(r13) | ||
612 | |||
618 | /* enable MMU and jump to start_secondary */ | 613 | /* enable MMU and jump to start_secondary */ |
619 | LOAD_REG_ADDR(r3, .start_secondary_prolog) | 614 | LOAD_REG_ADDR(r3, .start_secondary_prolog) |
620 | LOAD_REG_IMMEDIATE(r4, MSR_KERNEL) | 615 | LOAD_REG_IMMEDIATE(r4, MSR_KERNEL) |
621 | #ifdef CONFIG_PPC_ISERIES | ||
622 | BEGIN_FW_FTR_SECTION | ||
623 | ori r4,r4,MSR_EE | ||
624 | li r8,1 | ||
625 | stb r8,PACAHARDIRQEN(r13) | ||
626 | END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) | ||
627 | #endif | ||
628 | BEGIN_FW_FTR_SECTION | ||
629 | stb r7,PACAHARDIRQEN(r13) | ||
630 | END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES) | ||
631 | stb r7,PACASOFTIRQEN(r13) | ||
632 | 616 | ||
633 | mtspr SPRN_SRR0,r3 | 617 | mtspr SPRN_SRR0,r3 |
634 | mtspr SPRN_SRR1,r4 | 618 | mtspr SPRN_SRR1,r4 |
@@ -771,22 +755,18 @@ _INIT_GLOBAL(start_here_common) | |||
771 | /* Load the TOC (virtual address) */ | 755 | /* Load the TOC (virtual address) */ |
772 | ld r2,PACATOC(r13) | 756 | ld r2,PACATOC(r13) |
773 | 757 | ||
758 | /* Do more system initializations in virtual mode */ | ||
774 | bl .setup_system | 759 | bl .setup_system |
775 | 760 | ||
776 | /* Load up the kernel context */ | 761 | /* Mark interrupts soft and hard disabled (they might be enabled |
777 | 5: | 762 | * in the PACA when doing hotplug) |
778 | li r5,0 | 763 | */ |
779 | stb r5,PACASOFTIRQEN(r13) /* Soft Disabled */ | 764 | li r0,0 |
780 | #ifdef CONFIG_PPC_ISERIES | 765 | stb r0,PACASOFTIRQEN(r13) |
781 | BEGIN_FW_FTR_SECTION | 766 | li r0,PACA_IRQ_HARD_DIS |
782 | mfmsr r5 | 767 | stb r0,PACAIRQHAPPENED(r13) |
783 | ori r5,r5,MSR_EE /* Hard Enabled on iSeries*/ | ||
784 | mtmsrd r5 | ||
785 | li r5,1 | ||
786 | END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) | ||
787 | #endif | ||
788 | stb r5,PACAHARDIRQEN(r13) /* Hard Disabled on others */ | ||
789 | 768 | ||
769 | /* Generic kernel entry */ | ||
790 | bl .start_kernel | 770 | bl .start_kernel |
791 | 771 | ||
792 | /* Not reached */ | 772 | /* Not reached */ |
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index b68cb173ba2c..b2a5860accfb 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S | |||
@@ -220,7 +220,7 @@ DataAccess: | |||
220 | mfspr r4,SPRN_DAR | 220 | mfspr r4,SPRN_DAR |
221 | li r10,0x00f0 | 221 | li r10,0x00f0 |
222 | mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ | 222 | mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ |
223 | EXC_XFER_EE_LITE(0x300, handle_page_fault) | 223 | EXC_XFER_LITE(0x300, handle_page_fault) |
224 | 224 | ||
225 | /* Instruction access exception. | 225 | /* Instruction access exception. |
226 | * This is "never generated" by the MPC8xx. We jump to it for other | 226 | * This is "never generated" by the MPC8xx. We jump to it for other |
@@ -231,7 +231,7 @@ InstructionAccess: | |||
231 | EXCEPTION_PROLOG | 231 | EXCEPTION_PROLOG |
232 | mr r4,r12 | 232 | mr r4,r12 |
233 | mr r5,r9 | 233 | mr r5,r9 |
234 | EXC_XFER_EE_LITE(0x400, handle_page_fault) | 234 | EXC_XFER_LITE(0x400, handle_page_fault) |
235 | 235 | ||
236 | /* External interrupt */ | 236 | /* External interrupt */ |
237 | EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) | 237 | EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) |
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index fc921bf62e15..0e4175388f47 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h | |||
@@ -359,7 +359,7 @@ label: | |||
359 | mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ | 359 | mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ |
360 | stw r5,_ESR(r11); \ | 360 | stw r5,_ESR(r11); \ |
361 | mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \ | 361 | mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \ |
362 | EXC_XFER_EE_LITE(0x0300, handle_page_fault) | 362 | EXC_XFER_LITE(0x0300, handle_page_fault) |
363 | 363 | ||
364 | #define INSTRUCTION_STORAGE_EXCEPTION \ | 364 | #define INSTRUCTION_STORAGE_EXCEPTION \ |
365 | START_EXCEPTION(InstructionStorage) \ | 365 | START_EXCEPTION(InstructionStorage) \ |
@@ -368,7 +368,7 @@ label: | |||
368 | stw r5,_ESR(r11); \ | 368 | stw r5,_ESR(r11); \ |
369 | mr r4,r12; /* Pass SRR0 as arg2 */ \ | 369 | mr r4,r12; /* Pass SRR0 as arg2 */ \ |
370 | li r5,0; /* Pass zero as arg3 */ \ | 370 | li r5,0; /* Pass zero as arg3 */ \ |
371 | EXC_XFER_EE_LITE(0x0400, handle_page_fault) | 371 | EXC_XFER_LITE(0x0400, handle_page_fault) |
372 | 372 | ||
373 | #define ALIGNMENT_EXCEPTION \ | 373 | #define ALIGNMENT_EXCEPTION \ |
374 | START_EXCEPTION(Alignment) \ | 374 | START_EXCEPTION(Alignment) \ |
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index d5d78c4ceef6..28e62598d0e8 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S | |||
@@ -319,7 +319,7 @@ interrupt_base: | |||
319 | mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ | 319 | mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ |
320 | andis. r10,r5,(ESR_ILK|ESR_DLK)@h | 320 | andis. r10,r5,(ESR_ILK|ESR_DLK)@h |
321 | bne 1f | 321 | bne 1f |
322 | EXC_XFER_EE_LITE(0x0300, handle_page_fault) | 322 | EXC_XFER_LITE(0x0300, handle_page_fault) |
323 | 1: | 323 | 1: |
324 | addi r3,r1,STACK_FRAME_OVERHEAD | 324 | addi r3,r1,STACK_FRAME_OVERHEAD |
325 | EXC_XFER_EE_LITE(0x0300, CacheLockingException) | 325 | EXC_XFER_EE_LITE(0x0300, CacheLockingException) |
diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index d39ae606ff8d..79bb282e6501 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c | |||
@@ -713,7 +713,7 @@ static struct dev_pm_ops ibmebus_bus_dev_pm_ops = { | |||
713 | 713 | ||
714 | struct bus_type ibmebus_bus_type = { | 714 | struct bus_type ibmebus_bus_type = { |
715 | .name = "ibmebus", | 715 | .name = "ibmebus", |
716 | .uevent = of_device_uevent, | 716 | .uevent = of_device_uevent_modalias, |
717 | .bus_attrs = ibmebus_bus_attrs, | 717 | .bus_attrs = ibmebus_bus_attrs, |
718 | .match = ibmebus_bus_bus_match, | 718 | .match = ibmebus_bus_bus_match, |
719 | .probe = ibmebus_bus_device_probe, | 719 | .probe = ibmebus_bus_device_probe, |
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index c97fc60c790c..e8e821146f38 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c | |||
@@ -84,7 +84,11 @@ void cpu_idle(void) | |||
84 | 84 | ||
85 | start_critical_timings(); | 85 | start_critical_timings(); |
86 | 86 | ||
87 | local_irq_enable(); | 87 | /* Some power_save functions return with |
88 | * interrupts enabled, some don't. | ||
89 | */ | ||
90 | if (irqs_disabled()) | ||
91 | local_irq_enable(); | ||
88 | set_thread_flag(TIF_POLLING_NRFLAG); | 92 | set_thread_flag(TIF_POLLING_NRFLAG); |
89 | 93 | ||
90 | } else { | 94 | } else { |
diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S index 16c002d6bdf1..ff007b59448d 100644 --- a/arch/powerpc/kernel/idle_book3e.S +++ b/arch/powerpc/kernel/idle_book3e.S | |||
@@ -29,43 +29,30 @@ _GLOBAL(book3e_idle) | |||
29 | wrteei 0 | 29 | wrteei 0 |
30 | 30 | ||
31 | /* Now check if an interrupt came in while we were soft disabled | 31 | /* Now check if an interrupt came in while we were soft disabled |
32 | * since we may otherwise lose it (doorbells etc...). We know | 32 | * since we may otherwise lose it (doorbells etc...). |
33 | * that since PACAHARDIRQEN will have been cleared in that case. | ||
34 | */ | 33 | */ |
35 | lbz r3,PACAHARDIRQEN(r13) | 34 | lbz r3,PACAIRQHAPPENED(r13) |
36 | cmpwi cr0,r3,0 | 35 | cmpwi cr0,r3,0 |
37 | beqlr | 36 | bnelr |
38 | 37 | ||
39 | /* Now we are going to mark ourselves as soft and hard enables in | 38 | /* Now we are going to mark ourselves as soft and hard enabled in |
40 | * order to be able to take interrupts while asleep. We inform lockdep | 39 | * order to be able to take interrupts while asleep. We inform lockdep |
41 | * of that. We don't actually turn interrupts on just yet tho. | 40 | * of that. We don't actually turn interrupts on just yet tho. |
42 | */ | 41 | */ |
43 | #ifdef CONFIG_TRACE_IRQFLAGS | 42 | #ifdef CONFIG_TRACE_IRQFLAGS |
44 | stdu r1,-128(r1) | 43 | stdu r1,-128(r1) |
45 | bl .trace_hardirqs_on | 44 | bl .trace_hardirqs_on |
45 | addi r1,r1,128 | ||
46 | #endif | 46 | #endif |
47 | li r0,1 | 47 | li r0,1 |
48 | stb r0,PACASOFTIRQEN(r13) | 48 | stb r0,PACASOFTIRQEN(r13) |
49 | stb r0,PACAHARDIRQEN(r13) | ||
50 | 49 | ||
51 | /* Interrupts will make use return to LR, so get something we want | 50 | /* Interrupts will make use return to LR, so get something we want |
52 | * in there | 51 | * in there |
53 | */ | 52 | */ |
54 | bl 1f | 53 | bl 1f |
55 | 54 | ||
56 | /* Hard disable interrupts again */ | 55 | /* And return (interrupts are on) */ |
57 | wrteei 0 | ||
58 | |||
59 | /* Mark them off again in the PACA as well */ | ||
60 | li r0,0 | ||
61 | stb r0,PACASOFTIRQEN(r13) | ||
62 | stb r0,PACAHARDIRQEN(r13) | ||
63 | |||
64 | /* Tell lockdep about it */ | ||
65 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
66 | bl .trace_hardirqs_off | ||
67 | addi r1,r1,128 | ||
68 | #endif | ||
69 | ld r0,16(r1) | 56 | ld r0,16(r1) |
70 | mtlr r0 | 57 | mtlr r0 |
71 | blr | 58 | blr |
diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S index ba3195478600..2c71b0fc9f91 100644 --- a/arch/powerpc/kernel/idle_power4.S +++ b/arch/powerpc/kernel/idle_power4.S | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <asm/thread_info.h> | 14 | #include <asm/thread_info.h> |
15 | #include <asm/ppc_asm.h> | 15 | #include <asm/ppc_asm.h> |
16 | #include <asm/asm-offsets.h> | 16 | #include <asm/asm-offsets.h> |
17 | #include <asm/irqflags.h> | ||
17 | 18 | ||
18 | #undef DEBUG | 19 | #undef DEBUG |
19 | 20 | ||
@@ -29,14 +30,31 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) | |||
29 | cmpwi 0,r4,0 | 30 | cmpwi 0,r4,0 |
30 | beqlr | 31 | beqlr |
31 | 32 | ||
32 | /* Go to NAP now */ | 33 | /* Hard disable interrupts */ |
33 | mfmsr r7 | 34 | mfmsr r7 |
34 | rldicl r0,r7,48,1 | 35 | rldicl r0,r7,48,1 |
35 | rotldi r0,r0,16 | 36 | rotldi r0,r0,16 |
36 | mtmsrd r0,1 /* hard-disable interrupts */ | 37 | mtmsrd r0,1 |
38 | |||
39 | /* Check if something happened while soft-disabled */ | ||
40 | lbz r0,PACAIRQHAPPENED(r13) | ||
41 | cmpwi cr0,r0,0 | ||
42 | bnelr | ||
43 | |||
44 | /* Soft-enable interrupts */ | ||
45 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
46 | mflr r0 | ||
47 | std r0,16(r1) | ||
48 | stdu r1,-128(r1) | ||
49 | bl .trace_hardirqs_on | ||
50 | addi r1,r1,128 | ||
51 | ld r0,16(r1) | ||
52 | mtlr r0 | ||
53 | mfmsr r7 | ||
54 | #endif /* CONFIG_TRACE_IRQFLAGS */ | ||
55 | |||
37 | li r0,1 | 56 | li r0,1 |
38 | stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */ | 57 | stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */ |
39 | stb r0,PACAHARDIRQEN(r13) | ||
40 | BEGIN_FTR_SECTION | 58 | BEGIN_FTR_SECTION |
41 | DSSALL | 59 | DSSALL |
42 | sync | 60 | sync |
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index fcdff198da4b..0cdc9a392839 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * This file contains the power_save function for 970-family CPUs. | 2 | * This file contains the power_save function for Power7 CPUs. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or | 4 | * This program is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU General Public License | 5 | * modify it under the terms of the GNU General Public License |
@@ -15,6 +15,7 @@ | |||
15 | #include <asm/ppc_asm.h> | 15 | #include <asm/ppc_asm.h> |
16 | #include <asm/asm-offsets.h> | 16 | #include <asm/asm-offsets.h> |
17 | #include <asm/ppc-opcode.h> | 17 | #include <asm/ppc-opcode.h> |
18 | #include <asm/hw_irq.h> | ||
18 | 19 | ||
19 | #undef DEBUG | 20 | #undef DEBUG |
20 | 21 | ||
@@ -51,9 +52,25 @@ _GLOBAL(power7_idle) | |||
51 | rldicl r9,r9,48,1 | 52 | rldicl r9,r9,48,1 |
52 | rotldi r9,r9,16 | 53 | rotldi r9,r9,16 |
53 | mtmsrd r9,1 /* hard-disable interrupts */ | 54 | mtmsrd r9,1 /* hard-disable interrupts */ |
55 | |||
56 | /* Check if something happened while soft-disabled */ | ||
57 | lbz r0,PACAIRQHAPPENED(r13) | ||
58 | cmpwi cr0,r0,0 | ||
59 | beq 1f | ||
60 | addi r1,r1,INT_FRAME_SIZE | ||
61 | ld r0,16(r1) | ||
62 | mtlr r0 | ||
63 | blr | ||
64 | |||
65 | 1: /* We mark irqs hard disabled as this is the state we'll | ||
66 | * be in when returning and we need to tell arch_local_irq_restore() | ||
67 | * about it | ||
68 | */ | ||
69 | li r0,PACA_IRQ_HARD_DIS | ||
70 | stb r0,PACAIRQHAPPENED(r13) | ||
71 | |||
72 | /* We haven't lost state ... yet */ | ||
54 | li r0,0 | 73 | li r0,0 |
55 | stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */ | ||
56 | stb r0,PACAHARDIRQEN(r13) | ||
57 | stb r0,PACA_NAPSTATELOST(r13) | 74 | stb r0,PACA_NAPSTATELOST(r13) |
58 | 75 | ||
59 | /* Continue saving state */ | 76 | /* Continue saving state */ |
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 0cfcf98aafca..359f078571c7 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <asm/pci-bridge.h> | 39 | #include <asm/pci-bridge.h> |
40 | #include <asm/machdep.h> | 40 | #include <asm/machdep.h> |
41 | #include <asm/kdump.h> | 41 | #include <asm/kdump.h> |
42 | #include <asm/fadump.h> | ||
42 | 43 | ||
43 | #define DBG(...) | 44 | #define DBG(...) |
44 | 45 | ||
@@ -445,7 +446,12 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, | |||
445 | 446 | ||
446 | static void iommu_table_clear(struct iommu_table *tbl) | 447 | static void iommu_table_clear(struct iommu_table *tbl) |
447 | { | 448 | { |
448 | if (!is_kdump_kernel()) { | 449 | /* |
450 | * In case of firmware assisted dump system goes through clean | ||
451 | * reboot process at the time of system crash. Hence it's safe to | ||
452 | * clear the TCE entries if firmware assisted dump is active. | ||
453 | */ | ||
454 | if (!is_kdump_kernel() || is_fadump_active()) { | ||
449 | /* Clear the table in case firmware left allocations in it */ | 455 | /* Clear the table in case firmware left allocations in it */ |
450 | ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size); | 456 | ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size); |
451 | return; | 457 | return; |
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 01e2877e8e04..a3d128e94cff 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c | |||
@@ -93,20 +93,16 @@ extern int tau_interrupts(int); | |||
93 | 93 | ||
94 | #ifdef CONFIG_PPC64 | 94 | #ifdef CONFIG_PPC64 |
95 | 95 | ||
96 | #ifndef CONFIG_SPARSE_IRQ | ||
97 | EXPORT_SYMBOL(irq_desc); | ||
98 | #endif | ||
99 | |||
100 | int distribute_irqs = 1; | 96 | int distribute_irqs = 1; |
101 | 97 | ||
102 | static inline notrace unsigned long get_hard_enabled(void) | 98 | static inline notrace unsigned long get_irq_happened(void) |
103 | { | 99 | { |
104 | unsigned long enabled; | 100 | unsigned long happened; |
105 | 101 | ||
106 | __asm__ __volatile__("lbz %0,%1(13)" | 102 | __asm__ __volatile__("lbz %0,%1(13)" |
107 | : "=r" (enabled) : "i" (offsetof(struct paca_struct, hard_enabled))); | 103 | : "=r" (happened) : "i" (offsetof(struct paca_struct, irq_happened))); |
108 | 104 | ||
109 | return enabled; | 105 | return happened; |
110 | } | 106 | } |
111 | 107 | ||
112 | static inline notrace void set_soft_enabled(unsigned long enable) | 108 | static inline notrace void set_soft_enabled(unsigned long enable) |
@@ -115,88 +111,162 @@ static inline notrace void set_soft_enabled(unsigned long enable) | |||
115 | : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); | 111 | : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); |
116 | } | 112 | } |
117 | 113 | ||
118 | static inline notrace void decrementer_check_overflow(void) | 114 | static inline notrace int decrementer_check_overflow(void) |
119 | { | 115 | { |
120 | u64 now = get_tb_or_rtc(); | 116 | u64 now = get_tb_or_rtc(); |
121 | u64 *next_tb; | 117 | u64 *next_tb = &__get_cpu_var(decrementers_next_tb); |
122 | 118 | ||
123 | preempt_disable(); | ||
124 | next_tb = &__get_cpu_var(decrementers_next_tb); | ||
125 | |||
126 | if (now >= *next_tb) | 119 | if (now >= *next_tb) |
127 | set_dec(1); | 120 | set_dec(1); |
128 | preempt_enable(); | 121 | return now >= *next_tb; |
129 | } | 122 | } |
130 | 123 | ||
131 | notrace void arch_local_irq_restore(unsigned long en) | 124 | /* This is called whenever we are re-enabling interrupts |
125 | * and returns either 0 (nothing to do) or 500/900 if there's | ||
126 | * either an EE or a DEC to generate. | ||
127 | * | ||
128 | * This is called in two contexts: From arch_local_irq_restore() | ||
129 | * before soft-enabling interrupts, and from the exception exit | ||
130 | * path when returning from an interrupt from a soft-disabled to | ||
131 | * a soft enabled context. In both case we have interrupts hard | ||
132 | * disabled. | ||
133 | * | ||
134 | * We take care of only clearing the bits we handled in the | ||
135 | * PACA irq_happened field since we can only re-emit one at a | ||
136 | * time and we don't want to "lose" one. | ||
137 | */ | ||
138 | notrace unsigned int __check_irq_replay(void) | ||
132 | { | 139 | { |
133 | /* | 140 | /* |
134 | * get_paca()->soft_enabled = en; | 141 | * We use local_paca rather than get_paca() to avoid all |
135 | * Is it ever valid to use local_irq_restore(0) when soft_enabled is 1? | 142 | * the debug_smp_processor_id() business in this low level |
136 | * That was allowed before, and in such a case we do need to take care | 143 | * function |
137 | * that gcc will set soft_enabled directly via r13, not choose to use | ||
138 | * an intermediate register, lest we're preempted to a different cpu. | ||
139 | */ | 144 | */ |
140 | set_soft_enabled(en); | 145 | unsigned char happened = local_paca->irq_happened; |
141 | if (!en) | 146 | |
142 | return; | 147 | /* Clear bit 0 which we wouldn't clear otherwise */ |
148 | local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; | ||
143 | 149 | ||
144 | #ifdef CONFIG_PPC_STD_MMU_64 | 150 | /* |
145 | if (firmware_has_feature(FW_FEATURE_ISERIES)) { | 151 | * Force the delivery of pending soft-disabled interrupts on PS3. |
146 | /* | 152 | * Any HV call will have this side effect. |
147 | * Do we need to disable preemption here? Not really: in the | 153 | */ |
148 | * unlikely event that we're preempted to a different cpu in | 154 | if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { |
149 | * between getting r13, loading its lppaca_ptr, and loading | 155 | u64 tmp, tmp2; |
150 | * its any_int, we might call iseries_handle_interrupts without | 156 | lv1_get_version_info(&tmp, &tmp2); |
151 | * an interrupt pending on the new cpu, but that's no disaster, | ||
152 | * is it? And the business of preempting us off the old cpu | ||
153 | * would itself involve a local_irq_restore which handles the | ||
154 | * interrupt to that cpu. | ||
155 | * | ||
156 | * But use "local_paca->lppaca_ptr" instead of "get_lppaca()" | ||
157 | * to avoid any preemption checking added into get_paca(). | ||
158 | */ | ||
159 | if (local_paca->lppaca_ptr->int_dword.any_int) | ||
160 | iseries_handle_interrupts(); | ||
161 | } | 157 | } |
162 | #endif /* CONFIG_PPC_STD_MMU_64 */ | ||
163 | 158 | ||
164 | /* | 159 | /* |
165 | * if (get_paca()->hard_enabled) return; | 160 | * We may have missed a decrementer interrupt. We check the |
166 | * But again we need to take care that gcc gets hard_enabled directly | 161 | * decrementer itself rather than the paca irq_happened field |
167 | * via r13, not choose to use an intermediate register, lest we're | 162 | * in case we also had a rollover while hard disabled |
168 | * preempted to a different cpu in between the two instructions. | 163 | */ |
164 | local_paca->irq_happened &= ~PACA_IRQ_DEC; | ||
165 | if (decrementer_check_overflow()) | ||
166 | return 0x900; | ||
167 | |||
168 | /* Finally check if an external interrupt happened */ | ||
169 | local_paca->irq_happened &= ~PACA_IRQ_EE; | ||
170 | if (happened & PACA_IRQ_EE) | ||
171 | return 0x500; | ||
172 | |||
173 | #ifdef CONFIG_PPC_BOOK3E | ||
174 | /* Finally check if an EPR external interrupt happened | ||
175 | * this bit is typically set if we need to handle another | ||
176 | * "edge" interrupt from within the MPIC "EPR" handler | ||
177 | */ | ||
178 | local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE; | ||
179 | if (happened & PACA_IRQ_EE_EDGE) | ||
180 | return 0x500; | ||
181 | |||
182 | local_paca->irq_happened &= ~PACA_IRQ_DBELL; | ||
183 | if (happened & PACA_IRQ_DBELL) | ||
184 | return 0x280; | ||
185 | #endif /* CONFIG_PPC_BOOK3E */ | ||
186 | |||
187 | /* There should be nothing left ! */ | ||
188 | BUG_ON(local_paca->irq_happened != 0); | ||
189 | |||
190 | return 0; | ||
191 | } | ||
192 | |||
193 | notrace void arch_local_irq_restore(unsigned long en) | ||
194 | { | ||
195 | unsigned char irq_happened; | ||
196 | unsigned int replay; | ||
197 | |||
198 | /* Write the new soft-enabled value */ | ||
199 | set_soft_enabled(en); | ||
200 | if (!en) | ||
201 | return; | ||
202 | /* | ||
203 | * From this point onward, we can take interrupts, preempt, | ||
204 | * etc... unless we got hard-disabled. We check if an event | ||
205 | * happened. If none happened, we know we can just return. | ||
206 | * | ||
207 | * We may have preempted before the check below, in which case | ||
208 | * we are checking the "new" CPU instead of the old one. This | ||
209 | * is only a problem if an event happened on the "old" CPU. | ||
210 | * | ||
211 | * External interrupt events on non-iseries will have caused | ||
212 | * interrupts to be hard-disabled, so there is no problem, we | ||
213 | * cannot have preempted. | ||
169 | */ | 214 | */ |
170 | if (get_hard_enabled()) | 215 | irq_happened = get_irq_happened(); |
216 | if (!irq_happened) | ||
171 | return; | 217 | return; |
172 | 218 | ||
173 | /* | 219 | /* |
174 | * Need to hard-enable interrupts here. Since currently disabled, | 220 | * We need to hard disable to get a trusted value from |
175 | * no need to take further asm precautions against preemption; but | 221 | * __check_irq_replay(). We also need to soft-disable |
176 | * use local_paca instead of get_paca() to avoid preemption checking. | 222 | * again to avoid warnings in there due to the use of |
223 | * per-cpu variables. | ||
224 | * | ||
225 | * We know that if the value in irq_happened is exactly 0x01 | ||
226 | * then we are already hard disabled (there are other less | ||
227 | * common cases that we'll ignore for now), so we skip the | ||
228 | * (expensive) mtmsrd. | ||
177 | */ | 229 | */ |
178 | local_paca->hard_enabled = en; | 230 | if (unlikely(irq_happened != PACA_IRQ_HARD_DIS)) |
231 | __hard_irq_disable(); | ||
232 | set_soft_enabled(0); | ||
179 | 233 | ||
180 | /* | 234 | /* |
181 | * Trigger the decrementer if we have a pending event. Some processors | 235 | * Check if anything needs to be re-emitted. We haven't |
182 | * only trigger on edge transitions of the sign bit. We might also | 236 | * soft-enabled yet to avoid warnings in decrementer_check_overflow |
183 | * have disabled interrupts long enough that the decrementer wrapped | 237 | * accessing per-cpu variables |
184 | * to positive. | ||
185 | */ | 238 | */ |
186 | decrementer_check_overflow(); | 239 | replay = __check_irq_replay(); |
240 | |||
241 | /* We can soft-enable now */ | ||
242 | set_soft_enabled(1); | ||
187 | 243 | ||
188 | /* | 244 | /* |
189 | * Force the delivery of pending soft-disabled interrupts on PS3. | 245 | * And replay if we have to. This will return with interrupts |
190 | * Any HV call will have this side effect. | 246 | * hard-enabled. |
191 | */ | 247 | */ |
192 | if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { | 248 | if (replay) { |
193 | u64 tmp, tmp2; | 249 | __replay_interrupt(replay); |
194 | lv1_get_version_info(&tmp, &tmp2); | 250 | return; |
195 | } | 251 | } |
196 | 252 | ||
253 | /* Finally, let's ensure we are hard enabled */ | ||
197 | __hard_irq_enable(); | 254 | __hard_irq_enable(); |
198 | } | 255 | } |
199 | EXPORT_SYMBOL(arch_local_irq_restore); | 256 | EXPORT_SYMBOL(arch_local_irq_restore); |
257 | |||
258 | /* | ||
259 | * This is specifically called by assembly code to re-enable interrupts | ||
260 | * if they are currently disabled. This is typically called before | ||
261 | * schedule() or do_signal() when returning to userspace. We do it | ||
262 | * in C to avoid the burden of dealing with lockdep etc... | ||
263 | */ | ||
264 | void restore_interrupts(void) | ||
265 | { | ||
266 | if (irqs_disabled()) | ||
267 | local_irq_enable(); | ||
268 | } | ||
269 | |||
200 | #endif /* CONFIG_PPC64 */ | 270 | #endif /* CONFIG_PPC64 */ |
201 | 271 | ||
202 | int arch_show_interrupts(struct seq_file *p, int prec) | 272 | int arch_show_interrupts(struct seq_file *p, int prec) |
@@ -364,8 +434,17 @@ void do_IRQ(struct pt_regs *regs) | |||
364 | 434 | ||
365 | check_stack_overflow(); | 435 | check_stack_overflow(); |
366 | 436 | ||
437 | /* | ||
438 | * Query the platform PIC for the interrupt & ack it. | ||
439 | * | ||
440 | * This will typically lower the interrupt line to the CPU | ||
441 | */ | ||
367 | irq = ppc_md.get_irq(); | 442 | irq = ppc_md.get_irq(); |
368 | 443 | ||
444 | /* We can hard enable interrupts now */ | ||
445 | may_hard_irq_enable(); | ||
446 | |||
447 | /* And finally process it */ | ||
369 | if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) | 448 | if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) |
370 | handle_one_irq(irq); | 449 | handle_one_irq(irq); |
371 | else if (irq != NO_IRQ_IGNORE) | 450 | else if (irq != NO_IRQ_IGNORE) |
@@ -374,15 +453,6 @@ void do_IRQ(struct pt_regs *regs) | |||
374 | irq_exit(); | 453 | irq_exit(); |
375 | set_irq_regs(old_regs); | 454 | set_irq_regs(old_regs); |
376 | 455 | ||
377 | #ifdef CONFIG_PPC_ISERIES | ||
378 | if (firmware_has_feature(FW_FEATURE_ISERIES) && | ||
379 | get_lppaca()->int_dword.fields.decr_int) { | ||
380 | get_lppaca()->int_dword.fields.decr_int = 0; | ||
381 | /* Signal a fake decrementer interrupt */ | ||
382 | timer_interrupt(regs); | ||
383 | } | ||
384 | #endif | ||
385 | |||
386 | trace_irq_exit(regs); | 456 | trace_irq_exit(regs); |
387 | } | 457 | } |
388 | 458 | ||
@@ -490,409 +560,19 @@ void do_softirq(void) | |||
490 | local_irq_restore(flags); | 560 | local_irq_restore(flags); |
491 | } | 561 | } |
492 | 562 | ||
493 | |||
494 | /* | ||
495 | * IRQ controller and virtual interrupts | ||
496 | */ | ||
497 | |||
498 | /* The main irq map itself is an array of NR_IRQ entries containing the | ||
499 | * associate host and irq number. An entry with a host of NULL is free. | ||
500 | * An entry can be allocated if it's free, the allocator always then sets | ||
501 | * hwirq first to the host's invalid irq number and then fills ops. | ||
502 | */ | ||
503 | struct irq_map_entry { | ||
504 | irq_hw_number_t hwirq; | ||
505 | struct irq_host *host; | ||
506 | }; | ||
507 | |||
508 | static LIST_HEAD(irq_hosts); | ||
509 | static DEFINE_RAW_SPINLOCK(irq_big_lock); | ||
510 | static DEFINE_MUTEX(revmap_trees_mutex); | ||
511 | static struct irq_map_entry irq_map[NR_IRQS]; | ||
512 | static unsigned int irq_virq_count = NR_IRQS; | ||
513 | static struct irq_host *irq_default_host; | ||
514 | |||
515 | irq_hw_number_t irqd_to_hwirq(struct irq_data *d) | 563 | irq_hw_number_t irqd_to_hwirq(struct irq_data *d) |
516 | { | 564 | { |
517 | return irq_map[d->irq].hwirq; | 565 | return d->hwirq; |
518 | } | 566 | } |
519 | EXPORT_SYMBOL_GPL(irqd_to_hwirq); | 567 | EXPORT_SYMBOL_GPL(irqd_to_hwirq); |
520 | 568 | ||
521 | irq_hw_number_t virq_to_hw(unsigned int virq) | 569 | irq_hw_number_t virq_to_hw(unsigned int virq) |
522 | { | 570 | { |
523 | return irq_map[virq].hwirq; | 571 | struct irq_data *irq_data = irq_get_irq_data(virq); |
572 | return WARN_ON(!irq_data) ? 0 : irq_data->hwirq; | ||
524 | } | 573 | } |
525 | EXPORT_SYMBOL_GPL(virq_to_hw); | 574 | EXPORT_SYMBOL_GPL(virq_to_hw); |
526 | 575 | ||
527 | bool virq_is_host(unsigned int virq, struct irq_host *host) | ||
528 | { | ||
529 | return irq_map[virq].host == host; | ||
530 | } | ||
531 | EXPORT_SYMBOL_GPL(virq_is_host); | ||
532 | |||
533 | static int default_irq_host_match(struct irq_host *h, struct device_node *np) | ||
534 | { | ||
535 | return h->of_node != NULL && h->of_node == np; | ||
536 | } | ||
537 | |||
538 | struct irq_host *irq_alloc_host(struct device_node *of_node, | ||
539 | unsigned int revmap_type, | ||
540 | unsigned int revmap_arg, | ||
541 | struct irq_host_ops *ops, | ||
542 | irq_hw_number_t inval_irq) | ||
543 | { | ||
544 | struct irq_host *host; | ||
545 | unsigned int size = sizeof(struct irq_host); | ||
546 | unsigned int i; | ||
547 | unsigned int *rmap; | ||
548 | unsigned long flags; | ||
549 | |||
550 | /* Allocate structure and revmap table if using linear mapping */ | ||
551 | if (revmap_type == IRQ_HOST_MAP_LINEAR) | ||
552 | size += revmap_arg * sizeof(unsigned int); | ||
553 | host = kzalloc(size, GFP_KERNEL); | ||
554 | if (host == NULL) | ||
555 | return NULL; | ||
556 | |||
557 | /* Fill structure */ | ||
558 | host->revmap_type = revmap_type; | ||
559 | host->inval_irq = inval_irq; | ||
560 | host->ops = ops; | ||
561 | host->of_node = of_node_get(of_node); | ||
562 | |||
563 | if (host->ops->match == NULL) | ||
564 | host->ops->match = default_irq_host_match; | ||
565 | |||
566 | raw_spin_lock_irqsave(&irq_big_lock, flags); | ||
567 | |||
568 | /* If it's a legacy controller, check for duplicates and | ||
569 | * mark it as allocated (we use irq 0 host pointer for that | ||
570 | */ | ||
571 | if (revmap_type == IRQ_HOST_MAP_LEGACY) { | ||
572 | if (irq_map[0].host != NULL) { | ||
573 | raw_spin_unlock_irqrestore(&irq_big_lock, flags); | ||
574 | of_node_put(host->of_node); | ||
575 | kfree(host); | ||
576 | return NULL; | ||
577 | } | ||
578 | irq_map[0].host = host; | ||
579 | } | ||
580 | |||
581 | list_add(&host->link, &irq_hosts); | ||
582 | raw_spin_unlock_irqrestore(&irq_big_lock, flags); | ||
583 | |||
584 | /* Additional setups per revmap type */ | ||
585 | switch(revmap_type) { | ||
586 | case IRQ_HOST_MAP_LEGACY: | ||
587 | /* 0 is always the invalid number for legacy */ | ||
588 | host->inval_irq = 0; | ||
589 | /* setup us as the host for all legacy interrupts */ | ||
590 | for (i = 1; i < NUM_ISA_INTERRUPTS; i++) { | ||
591 | irq_map[i].hwirq = i; | ||
592 | smp_wmb(); | ||
593 | irq_map[i].host = host; | ||
594 | smp_wmb(); | ||
595 | |||
596 | /* Legacy flags are left to default at this point, | ||
597 | * one can then use irq_create_mapping() to | ||
598 | * explicitly change them | ||
599 | */ | ||
600 | ops->map(host, i, i); | ||
601 | |||
602 | /* Clear norequest flags */ | ||
603 | irq_clear_status_flags(i, IRQ_NOREQUEST); | ||
604 | } | ||
605 | break; | ||
606 | case IRQ_HOST_MAP_LINEAR: | ||
607 | rmap = (unsigned int *)(host + 1); | ||
608 | for (i = 0; i < revmap_arg; i++) | ||
609 | rmap[i] = NO_IRQ; | ||
610 | host->revmap_data.linear.size = revmap_arg; | ||
611 | smp_wmb(); | ||
612 | host->revmap_data.linear.revmap = rmap; | ||
613 | break; | ||
614 | case IRQ_HOST_MAP_TREE: | ||
615 | INIT_RADIX_TREE(&host->revmap_data.tree, GFP_KERNEL); | ||
616 | break; | ||
617 | default: | ||
618 | break; | ||
619 | } | ||
620 | |||
621 | pr_debug("irq: Allocated host of type %d @0x%p\n", revmap_type, host); | ||
622 | |||
623 | return host; | ||
624 | } | ||
625 | |||
626 | struct irq_host *irq_find_host(struct device_node *node) | ||
627 | { | ||
628 | struct irq_host *h, *found = NULL; | ||
629 | unsigned long flags; | ||
630 | |||
631 | /* We might want to match the legacy controller last since | ||
632 | * it might potentially be set to match all interrupts in | ||
633 | * the absence of a device node. This isn't a problem so far | ||
634 | * yet though... | ||
635 | */ | ||
636 | raw_spin_lock_irqsave(&irq_big_lock, flags); | ||
637 | list_for_each_entry(h, &irq_hosts, link) | ||
638 | if (h->ops->match(h, node)) { | ||
639 | found = h; | ||
640 | break; | ||
641 | } | ||
642 | raw_spin_unlock_irqrestore(&irq_big_lock, flags); | ||
643 | return found; | ||
644 | } | ||
645 | EXPORT_SYMBOL_GPL(irq_find_host); | ||
646 | |||
647 | void irq_set_default_host(struct irq_host *host) | ||
648 | { | ||
649 | pr_debug("irq: Default host set to @0x%p\n", host); | ||
650 | |||
651 | irq_default_host = host; | ||
652 | } | ||
653 | |||
654 | void irq_set_virq_count(unsigned int count) | ||
655 | { | ||
656 | pr_debug("irq: Trying to set virq count to %d\n", count); | ||
657 | |||
658 | BUG_ON(count < NUM_ISA_INTERRUPTS); | ||
659 | if (count < NR_IRQS) | ||
660 | irq_virq_count = count; | ||
661 | } | ||
662 | |||
663 | static int irq_setup_virq(struct irq_host *host, unsigned int virq, | ||
664 | irq_hw_number_t hwirq) | ||
665 | { | ||
666 | int res; | ||
667 | |||
668 | res = irq_alloc_desc_at(virq, 0); | ||
669 | if (res != virq) { | ||
670 | pr_debug("irq: -> allocating desc failed\n"); | ||
671 | goto error; | ||
672 | } | ||
673 | |||
674 | /* map it */ | ||
675 | smp_wmb(); | ||
676 | irq_map[virq].hwirq = hwirq; | ||
677 | smp_mb(); | ||
678 | |||
679 | if (host->ops->map(host, virq, hwirq)) { | ||
680 | pr_debug("irq: -> mapping failed, freeing\n"); | ||
681 | goto errdesc; | ||
682 | } | ||
683 | |||
684 | irq_clear_status_flags(virq, IRQ_NOREQUEST); | ||
685 | |||
686 | return 0; | ||
687 | |||
688 | errdesc: | ||
689 | irq_free_descs(virq, 1); | ||
690 | error: | ||
691 | irq_free_virt(virq, 1); | ||
692 | return -1; | ||
693 | } | ||
694 | |||
695 | unsigned int irq_create_direct_mapping(struct irq_host *host) | ||
696 | { | ||
697 | unsigned int virq; | ||
698 | |||
699 | if (host == NULL) | ||
700 | host = irq_default_host; | ||
701 | |||
702 | BUG_ON(host == NULL); | ||
703 | WARN_ON(host->revmap_type != IRQ_HOST_MAP_NOMAP); | ||
704 | |||
705 | virq = irq_alloc_virt(host, 1, 0); | ||
706 | if (virq == NO_IRQ) { | ||
707 | pr_debug("irq: create_direct virq allocation failed\n"); | ||
708 | return NO_IRQ; | ||
709 | } | ||
710 | |||
711 | pr_debug("irq: create_direct obtained virq %d\n", virq); | ||
712 | |||
713 | if (irq_setup_virq(host, virq, virq)) | ||
714 | return NO_IRQ; | ||
715 | |||
716 | return virq; | ||
717 | } | ||
718 | |||
719 | unsigned int irq_create_mapping(struct irq_host *host, | ||
720 | irq_hw_number_t hwirq) | ||
721 | { | ||
722 | unsigned int virq, hint; | ||
723 | |||
724 | pr_debug("irq: irq_create_mapping(0x%p, 0x%lx)\n", host, hwirq); | ||
725 | |||
726 | /* Look for default host if nececssary */ | ||
727 | if (host == NULL) | ||
728 | host = irq_default_host; | ||
729 | if (host == NULL) { | ||
730 | printk(KERN_WARNING "irq_create_mapping called for" | ||
731 | " NULL host, hwirq=%lx\n", hwirq); | ||
732 | WARN_ON(1); | ||
733 | return NO_IRQ; | ||
734 | } | ||
735 | pr_debug("irq: -> using host @%p\n", host); | ||
736 | |||
737 | /* Check if mapping already exists */ | ||
738 | virq = irq_find_mapping(host, hwirq); | ||
739 | if (virq != NO_IRQ) { | ||
740 | pr_debug("irq: -> existing mapping on virq %d\n", virq); | ||
741 | return virq; | ||
742 | } | ||
743 | |||
744 | /* Get a virtual interrupt number */ | ||
745 | if (host->revmap_type == IRQ_HOST_MAP_LEGACY) { | ||
746 | /* Handle legacy */ | ||
747 | virq = (unsigned int)hwirq; | ||
748 | if (virq == 0 || virq >= NUM_ISA_INTERRUPTS) | ||
749 | return NO_IRQ; | ||
750 | return virq; | ||
751 | } else { | ||
752 | /* Allocate a virtual interrupt number */ | ||
753 | hint = hwirq % irq_virq_count; | ||
754 | virq = irq_alloc_virt(host, 1, hint); | ||
755 | if (virq == NO_IRQ) { | ||
756 | pr_debug("irq: -> virq allocation failed\n"); | ||
757 | return NO_IRQ; | ||
758 | } | ||
759 | } | ||
760 | |||
761 | if (irq_setup_virq(host, virq, hwirq)) | ||
762 | return NO_IRQ; | ||
763 | |||
764 | pr_debug("irq: irq %lu on host %s mapped to virtual irq %u\n", | ||
765 | hwirq, host->of_node ? host->of_node->full_name : "null", virq); | ||
766 | |||
767 | return virq; | ||
768 | } | ||
769 | EXPORT_SYMBOL_GPL(irq_create_mapping); | ||
770 | |||
771 | unsigned int irq_create_of_mapping(struct device_node *controller, | ||
772 | const u32 *intspec, unsigned int intsize) | ||
773 | { | ||
774 | struct irq_host *host; | ||
775 | irq_hw_number_t hwirq; | ||
776 | unsigned int type = IRQ_TYPE_NONE; | ||
777 | unsigned int virq; | ||
778 | |||
779 | if (controller == NULL) | ||
780 | host = irq_default_host; | ||
781 | else | ||
782 | host = irq_find_host(controller); | ||
783 | if (host == NULL) { | ||
784 | printk(KERN_WARNING "irq: no irq host found for %s !\n", | ||
785 | controller->full_name); | ||
786 | return NO_IRQ; | ||
787 | } | ||
788 | |||
789 | /* If host has no translation, then we assume interrupt line */ | ||
790 | if (host->ops->xlate == NULL) | ||
791 | hwirq = intspec[0]; | ||
792 | else { | ||
793 | if (host->ops->xlate(host, controller, intspec, intsize, | ||
794 | &hwirq, &type)) | ||
795 | return NO_IRQ; | ||
796 | } | ||
797 | |||
798 | /* Create mapping */ | ||
799 | virq = irq_create_mapping(host, hwirq); | ||
800 | if (virq == NO_IRQ) | ||
801 | return virq; | ||
802 | |||
803 | /* Set type if specified and different than the current one */ | ||
804 | if (type != IRQ_TYPE_NONE && | ||
805 | type != (irqd_get_trigger_type(irq_get_irq_data(virq)))) | ||
806 | irq_set_irq_type(virq, type); | ||
807 | return virq; | ||
808 | } | ||
809 | EXPORT_SYMBOL_GPL(irq_create_of_mapping); | ||
810 | |||
811 | void irq_dispose_mapping(unsigned int virq) | ||
812 | { | ||
813 | struct irq_host *host; | ||
814 | irq_hw_number_t hwirq; | ||
815 | |||
816 | if (virq == NO_IRQ) | ||
817 | return; | ||
818 | |||
819 | host = irq_map[virq].host; | ||
820 | if (WARN_ON(host == NULL)) | ||
821 | return; | ||
822 | |||
823 | /* Never unmap legacy interrupts */ | ||
824 | if (host->revmap_type == IRQ_HOST_MAP_LEGACY) | ||
825 | return; | ||
826 | |||
827 | irq_set_status_flags(virq, IRQ_NOREQUEST); | ||
828 | |||
829 | /* remove chip and handler */ | ||
830 | irq_set_chip_and_handler(virq, NULL, NULL); | ||
831 | |||
832 | /* Make sure it's completed */ | ||
833 | synchronize_irq(virq); | ||
834 | |||
835 | /* Tell the PIC about it */ | ||
836 | if (host->ops->unmap) | ||
837 | host->ops->unmap(host, virq); | ||
838 | smp_mb(); | ||
839 | |||
840 | /* Clear reverse map */ | ||
841 | hwirq = irq_map[virq].hwirq; | ||
842 | switch(host->revmap_type) { | ||
843 | case IRQ_HOST_MAP_LINEAR: | ||
844 | if (hwirq < host->revmap_data.linear.size) | ||
845 | host->revmap_data.linear.revmap[hwirq] = NO_IRQ; | ||
846 | break; | ||
847 | case IRQ_HOST_MAP_TREE: | ||
848 | mutex_lock(&revmap_trees_mutex); | ||
849 | radix_tree_delete(&host->revmap_data.tree, hwirq); | ||
850 | mutex_unlock(&revmap_trees_mutex); | ||
851 | break; | ||
852 | } | ||
853 | |||
854 | /* Destroy map */ | ||
855 | smp_mb(); | ||
856 | irq_map[virq].hwirq = host->inval_irq; | ||
857 | |||
858 | irq_free_descs(virq, 1); | ||
859 | /* Free it */ | ||
860 | irq_free_virt(virq, 1); | ||
861 | } | ||
862 | EXPORT_SYMBOL_GPL(irq_dispose_mapping); | ||
863 | |||
864 | unsigned int irq_find_mapping(struct irq_host *host, | ||
865 | irq_hw_number_t hwirq) | ||
866 | { | ||
867 | unsigned int i; | ||
868 | unsigned int hint = hwirq % irq_virq_count; | ||
869 | |||
870 | /* Look for default host if nececssary */ | ||
871 | if (host == NULL) | ||
872 | host = irq_default_host; | ||
873 | if (host == NULL) | ||
874 | return NO_IRQ; | ||
875 | |||
876 | /* legacy -> bail early */ | ||
877 | if (host->revmap_type == IRQ_HOST_MAP_LEGACY) | ||
878 | return hwirq; | ||
879 | |||
880 | /* Slow path does a linear search of the map */ | ||
881 | if (hint < NUM_ISA_INTERRUPTS) | ||
882 | hint = NUM_ISA_INTERRUPTS; | ||
883 | i = hint; | ||
884 | do { | ||
885 | if (irq_map[i].host == host && | ||
886 | irq_map[i].hwirq == hwirq) | ||
887 | return i; | ||
888 | i++; | ||
889 | if (i >= irq_virq_count) | ||
890 | i = NUM_ISA_INTERRUPTS; | ||
891 | } while(i != hint); | ||
892 | return NO_IRQ; | ||
893 | } | ||
894 | EXPORT_SYMBOL_GPL(irq_find_mapping); | ||
895 | |||
896 | #ifdef CONFIG_SMP | 576 | #ifdef CONFIG_SMP |
897 | int irq_choose_cpu(const struct cpumask *mask) | 577 | int irq_choose_cpu(const struct cpumask *mask) |
898 | { | 578 | { |
@@ -929,232 +609,11 @@ int irq_choose_cpu(const struct cpumask *mask) | |||
929 | } | 609 | } |
930 | #endif | 610 | #endif |
931 | 611 | ||
932 | unsigned int irq_radix_revmap_lookup(struct irq_host *host, | ||
933 | irq_hw_number_t hwirq) | ||
934 | { | ||
935 | struct irq_map_entry *ptr; | ||
936 | unsigned int virq; | ||
937 | |||
938 | if (WARN_ON_ONCE(host->revmap_type != IRQ_HOST_MAP_TREE)) | ||
939 | return irq_find_mapping(host, hwirq); | ||
940 | |||
941 | /* | ||
942 | * The ptr returned references the static global irq_map. | ||
943 | * but freeing an irq can delete nodes along the path to | ||
944 | * do the lookup via call_rcu. | ||
945 | */ | ||
946 | rcu_read_lock(); | ||
947 | ptr = radix_tree_lookup(&host->revmap_data.tree, hwirq); | ||
948 | rcu_read_unlock(); | ||
949 | |||
950 | /* | ||
951 | * If found in radix tree, then fine. | ||
952 | * Else fallback to linear lookup - this should not happen in practice | ||
953 | * as it means that we failed to insert the node in the radix tree. | ||
954 | */ | ||
955 | if (ptr) | ||
956 | virq = ptr - irq_map; | ||
957 | else | ||
958 | virq = irq_find_mapping(host, hwirq); | ||
959 | |||
960 | return virq; | ||
961 | } | ||
962 | |||
963 | void irq_radix_revmap_insert(struct irq_host *host, unsigned int virq, | ||
964 | irq_hw_number_t hwirq) | ||
965 | { | ||
966 | if (WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE)) | ||
967 | return; | ||
968 | |||
969 | if (virq != NO_IRQ) { | ||
970 | mutex_lock(&revmap_trees_mutex); | ||
971 | radix_tree_insert(&host->revmap_data.tree, hwirq, | ||
972 | &irq_map[virq]); | ||
973 | mutex_unlock(&revmap_trees_mutex); | ||
974 | } | ||
975 | } | ||
976 | |||
977 | unsigned int irq_linear_revmap(struct irq_host *host, | ||
978 | irq_hw_number_t hwirq) | ||
979 | { | ||
980 | unsigned int *revmap; | ||
981 | |||
982 | if (WARN_ON_ONCE(host->revmap_type != IRQ_HOST_MAP_LINEAR)) | ||
983 | return irq_find_mapping(host, hwirq); | ||
984 | |||
985 | /* Check revmap bounds */ | ||
986 | if (unlikely(hwirq >= host->revmap_data.linear.size)) | ||
987 | return irq_find_mapping(host, hwirq); | ||
988 | |||
989 | /* Check if revmap was allocated */ | ||
990 | revmap = host->revmap_data.linear.revmap; | ||
991 | if (unlikely(revmap == NULL)) | ||
992 | return irq_find_mapping(host, hwirq); | ||
993 | |||
994 | /* Fill up revmap with slow path if no mapping found */ | ||
995 | if (unlikely(revmap[hwirq] == NO_IRQ)) | ||
996 | revmap[hwirq] = irq_find_mapping(host, hwirq); | ||
997 | |||
998 | return revmap[hwirq]; | ||
999 | } | ||
1000 | |||
1001 | unsigned int irq_alloc_virt(struct irq_host *host, | ||
1002 | unsigned int count, | ||
1003 | unsigned int hint) | ||
1004 | { | ||
1005 | unsigned long flags; | ||
1006 | unsigned int i, j, found = NO_IRQ; | ||
1007 | |||
1008 | if (count == 0 || count > (irq_virq_count - NUM_ISA_INTERRUPTS)) | ||
1009 | return NO_IRQ; | ||
1010 | |||
1011 | raw_spin_lock_irqsave(&irq_big_lock, flags); | ||
1012 | |||
1013 | /* Use hint for 1 interrupt if any */ | ||
1014 | if (count == 1 && hint >= NUM_ISA_INTERRUPTS && | ||
1015 | hint < irq_virq_count && irq_map[hint].host == NULL) { | ||
1016 | found = hint; | ||
1017 | goto hint_found; | ||
1018 | } | ||
1019 | |||
1020 | /* Look for count consecutive numbers in the allocatable | ||
1021 | * (non-legacy) space | ||
1022 | */ | ||
1023 | for (i = NUM_ISA_INTERRUPTS, j = 0; i < irq_virq_count; i++) { | ||
1024 | if (irq_map[i].host != NULL) | ||
1025 | j = 0; | ||
1026 | else | ||
1027 | j++; | ||
1028 | |||
1029 | if (j == count) { | ||
1030 | found = i - count + 1; | ||
1031 | break; | ||
1032 | } | ||
1033 | } | ||
1034 | if (found == NO_IRQ) { | ||
1035 | raw_spin_unlock_irqrestore(&irq_big_lock, flags); | ||
1036 | return NO_IRQ; | ||
1037 | } | ||
1038 | hint_found: | ||
1039 | for (i = found; i < (found + count); i++) { | ||
1040 | irq_map[i].hwirq = host->inval_irq; | ||
1041 | smp_wmb(); | ||
1042 | irq_map[i].host = host; | ||
1043 | } | ||
1044 | raw_spin_unlock_irqrestore(&irq_big_lock, flags); | ||
1045 | return found; | ||
1046 | } | ||
1047 | |||
1048 | void irq_free_virt(unsigned int virq, unsigned int count) | ||
1049 | { | ||
1050 | unsigned long flags; | ||
1051 | unsigned int i; | ||
1052 | |||
1053 | WARN_ON (virq < NUM_ISA_INTERRUPTS); | ||
1054 | WARN_ON (count == 0 || (virq + count) > irq_virq_count); | ||
1055 | |||
1056 | if (virq < NUM_ISA_INTERRUPTS) { | ||
1057 | if (virq + count < NUM_ISA_INTERRUPTS) | ||
1058 | return; | ||
1059 | count =- NUM_ISA_INTERRUPTS - virq; | ||
1060 | virq = NUM_ISA_INTERRUPTS; | ||
1061 | } | ||
1062 | |||
1063 | if (count > irq_virq_count || virq > irq_virq_count - count) { | ||
1064 | if (virq > irq_virq_count) | ||
1065 | return; | ||
1066 | count = irq_virq_count - virq; | ||
1067 | } | ||
1068 | |||
1069 | raw_spin_lock_irqsave(&irq_big_lock, flags); | ||
1070 | for (i = virq; i < (virq + count); i++) { | ||
1071 | struct irq_host *host; | ||
1072 | |||
1073 | host = irq_map[i].host; | ||
1074 | irq_map[i].hwirq = host->inval_irq; | ||
1075 | smp_wmb(); | ||
1076 | irq_map[i].host = NULL; | ||
1077 | } | ||
1078 | raw_spin_unlock_irqrestore(&irq_big_lock, flags); | ||
1079 | } | ||
1080 | |||
1081 | int arch_early_irq_init(void) | 612 | int arch_early_irq_init(void) |
1082 | { | 613 | { |
1083 | return 0; | 614 | return 0; |
1084 | } | 615 | } |
1085 | 616 | ||
1086 | #ifdef CONFIG_VIRQ_DEBUG | ||
1087 | static int virq_debug_show(struct seq_file *m, void *private) | ||
1088 | { | ||
1089 | unsigned long flags; | ||
1090 | struct irq_desc *desc; | ||
1091 | const char *p; | ||
1092 | static const char none[] = "none"; | ||
1093 | void *data; | ||
1094 | int i; | ||
1095 | |||
1096 | seq_printf(m, "%-5s %-7s %-15s %-18s %s\n", "virq", "hwirq", | ||
1097 | "chip name", "chip data", "host name"); | ||
1098 | |||
1099 | for (i = 1; i < nr_irqs; i++) { | ||
1100 | desc = irq_to_desc(i); | ||
1101 | if (!desc) | ||
1102 | continue; | ||
1103 | |||
1104 | raw_spin_lock_irqsave(&desc->lock, flags); | ||
1105 | |||
1106 | if (desc->action && desc->action->handler) { | ||
1107 | struct irq_chip *chip; | ||
1108 | |||
1109 | seq_printf(m, "%5d ", i); | ||
1110 | seq_printf(m, "0x%05lx ", irq_map[i].hwirq); | ||
1111 | |||
1112 | chip = irq_desc_get_chip(desc); | ||
1113 | if (chip && chip->name) | ||
1114 | p = chip->name; | ||
1115 | else | ||
1116 | p = none; | ||
1117 | seq_printf(m, "%-15s ", p); | ||
1118 | |||
1119 | data = irq_desc_get_chip_data(desc); | ||
1120 | seq_printf(m, "0x%16p ", data); | ||
1121 | |||
1122 | if (irq_map[i].host && irq_map[i].host->of_node) | ||
1123 | p = irq_map[i].host->of_node->full_name; | ||
1124 | else | ||
1125 | p = none; | ||
1126 | seq_printf(m, "%s\n", p); | ||
1127 | } | ||
1128 | |||
1129 | raw_spin_unlock_irqrestore(&desc->lock, flags); | ||
1130 | } | ||
1131 | |||
1132 | return 0; | ||
1133 | } | ||
1134 | |||
1135 | static int virq_debug_open(struct inode *inode, struct file *file) | ||
1136 | { | ||
1137 | return single_open(file, virq_debug_show, inode->i_private); | ||
1138 | } | ||
1139 | |||
1140 | static const struct file_operations virq_debug_fops = { | ||
1141 | .open = virq_debug_open, | ||
1142 | .read = seq_read, | ||
1143 | .llseek = seq_lseek, | ||
1144 | .release = single_release, | ||
1145 | }; | ||
1146 | |||
1147 | static int __init irq_debugfs_init(void) | ||
1148 | { | ||
1149 | if (debugfs_create_file("virq_mapping", S_IRUGO, powerpc_debugfs_root, | ||
1150 | NULL, &virq_debug_fops) == NULL) | ||
1151 | return -ENOMEM; | ||
1152 | |||
1153 | return 0; | ||
1154 | } | ||
1155 | __initcall(irq_debugfs_init); | ||
1156 | #endif /* CONFIG_VIRQ_DEBUG */ | ||
1157 | |||
1158 | #ifdef CONFIG_PPC64 | 617 | #ifdef CONFIG_PPC64 |
1159 | static int __init setup_noirqdistrib(char *str) | 618 | static int __init setup_noirqdistrib(char *str) |
1160 | { | 619 | { |
diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c index 479752901ec6..d45ec58703ce 100644 --- a/arch/powerpc/kernel/isa-bridge.c +++ b/arch/powerpc/kernel/isa-bridge.c | |||
@@ -29,7 +29,6 @@ | |||
29 | #include <asm/pci-bridge.h> | 29 | #include <asm/pci-bridge.h> |
30 | #include <asm/machdep.h> | 30 | #include <asm/machdep.h> |
31 | #include <asm/ppc-pci.h> | 31 | #include <asm/ppc-pci.h> |
32 | #include <asm/firmware.h> | ||
33 | 32 | ||
34 | unsigned long isa_io_base; /* NULL if no ISA bus */ | 33 | unsigned long isa_io_base; /* NULL if no ISA bus */ |
35 | EXPORT_SYMBOL(isa_io_base); | 34 | EXPORT_SYMBOL(isa_io_base); |
@@ -261,8 +260,6 @@ static struct notifier_block isa_bridge_notifier = { | |||
261 | */ | 260 | */ |
262 | static int __init isa_bridge_init(void) | 261 | static int __init isa_bridge_init(void) |
263 | { | 262 | { |
264 | if (firmware_has_feature(FW_FEATURE_ISERIES)) | ||
265 | return 0; | ||
266 | bus_register_notifier(&pci_bus_type, &isa_bridge_notifier); | 263 | bus_register_notifier(&pci_bus_type, &isa_bridge_notifier); |
267 | return 0; | 264 | return 0; |
268 | } | 265 | } |
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 578f35f18723..ac12bd80ad95 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c | |||
@@ -26,7 +26,6 @@ | |||
26 | #include <linux/seq_file.h> | 26 | #include <linux/seq_file.h> |
27 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
28 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
29 | #include <asm/iseries/hv_lp_config.h> | ||
30 | #include <asm/lppaca.h> | 29 | #include <asm/lppaca.h> |
31 | #include <asm/hvcall.h> | 30 | #include <asm/hvcall.h> |
32 | #include <asm/firmware.h> | 31 | #include <asm/firmware.h> |
@@ -55,80 +54,14 @@ static unsigned long get_purr(void) | |||
55 | int cpu; | 54 | int cpu; |
56 | 55 | ||
57 | for_each_possible_cpu(cpu) { | 56 | for_each_possible_cpu(cpu) { |
58 | if (firmware_has_feature(FW_FEATURE_ISERIES)) | 57 | struct cpu_usage *cu; |
59 | sum_purr += lppaca_of(cpu).emulated_time_base; | ||
60 | else { | ||
61 | struct cpu_usage *cu; | ||
62 | 58 | ||
63 | cu = &per_cpu(cpu_usage_array, cpu); | 59 | cu = &per_cpu(cpu_usage_array, cpu); |
64 | sum_purr += cu->current_tb; | 60 | sum_purr += cu->current_tb; |
65 | } | ||
66 | } | 61 | } |
67 | return sum_purr; | 62 | return sum_purr; |
68 | } | 63 | } |
69 | 64 | ||
70 | #ifdef CONFIG_PPC_ISERIES | ||
71 | |||
72 | /* | ||
73 | * Methods used to fetch LPAR data when running on an iSeries platform. | ||
74 | */ | ||
75 | static int iseries_lparcfg_data(struct seq_file *m, void *v) | ||
76 | { | ||
77 | unsigned long pool_id; | ||
78 | int shared, entitled_capacity, max_entitled_capacity; | ||
79 | int processors, max_processors; | ||
80 | unsigned long purr = get_purr(); | ||
81 | |||
82 | shared = (int)(local_paca->lppaca_ptr->shared_proc); | ||
83 | |||
84 | seq_printf(m, "system_active_processors=%d\n", | ||
85 | (int)HvLpConfig_getSystemPhysicalProcessors()); | ||
86 | |||
87 | seq_printf(m, "system_potential_processors=%d\n", | ||
88 | (int)HvLpConfig_getSystemPhysicalProcessors()); | ||
89 | |||
90 | processors = (int)HvLpConfig_getPhysicalProcessors(); | ||
91 | seq_printf(m, "partition_active_processors=%d\n", processors); | ||
92 | |||
93 | max_processors = (int)HvLpConfig_getMaxPhysicalProcessors(); | ||
94 | seq_printf(m, "partition_potential_processors=%d\n", max_processors); | ||
95 | |||
96 | if (shared) { | ||
97 | entitled_capacity = HvLpConfig_getSharedProcUnits(); | ||
98 | max_entitled_capacity = HvLpConfig_getMaxSharedProcUnits(); | ||
99 | } else { | ||
100 | entitled_capacity = processors * 100; | ||
101 | max_entitled_capacity = max_processors * 100; | ||
102 | } | ||
103 | seq_printf(m, "partition_entitled_capacity=%d\n", entitled_capacity); | ||
104 | |||
105 | seq_printf(m, "partition_max_entitled_capacity=%d\n", | ||
106 | max_entitled_capacity); | ||
107 | |||
108 | if (shared) { | ||
109 | pool_id = HvLpConfig_getSharedPoolIndex(); | ||
110 | seq_printf(m, "pool=%d\n", (int)pool_id); | ||
111 | seq_printf(m, "pool_capacity=%d\n", | ||
112 | (int)(HvLpConfig_getNumProcsInSharedPool(pool_id) * | ||
113 | 100)); | ||
114 | seq_printf(m, "purr=%ld\n", purr); | ||
115 | } | ||
116 | |||
117 | seq_printf(m, "shared_processor_mode=%d\n", shared); | ||
118 | |||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | #else /* CONFIG_PPC_ISERIES */ | ||
123 | |||
124 | static int iseries_lparcfg_data(struct seq_file *m, void *v) | ||
125 | { | ||
126 | return 0; | ||
127 | } | ||
128 | |||
129 | #endif /* CONFIG_PPC_ISERIES */ | ||
130 | |||
131 | #ifdef CONFIG_PPC_PSERIES | ||
132 | /* | 65 | /* |
133 | * Methods used to fetch LPAR data when running on a pSeries platform. | 66 | * Methods used to fetch LPAR data when running on a pSeries platform. |
134 | */ | 67 | */ |
@@ -648,8 +581,7 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf, | |||
648 | u8 new_weight, *new_weight_ptr = &new_weight; | 581 | u8 new_weight, *new_weight_ptr = &new_weight; |
649 | ssize_t retval; | 582 | ssize_t retval; |
650 | 583 | ||
651 | if (!firmware_has_feature(FW_FEATURE_SPLPAR) || | 584 | if (!firmware_has_feature(FW_FEATURE_SPLPAR)) |
652 | firmware_has_feature(FW_FEATURE_ISERIES)) | ||
653 | return -EINVAL; | 585 | return -EINVAL; |
654 | 586 | ||
655 | if (count > kbuf_sz) | 587 | if (count > kbuf_sz) |
@@ -709,21 +641,6 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf, | |||
709 | return retval; | 641 | return retval; |
710 | } | 642 | } |
711 | 643 | ||
712 | #else /* CONFIG_PPC_PSERIES */ | ||
713 | |||
714 | static int pseries_lparcfg_data(struct seq_file *m, void *v) | ||
715 | { | ||
716 | return 0; | ||
717 | } | ||
718 | |||
719 | static ssize_t lparcfg_write(struct file *file, const char __user * buf, | ||
720 | size_t count, loff_t * off) | ||
721 | { | ||
722 | return -EINVAL; | ||
723 | } | ||
724 | |||
725 | #endif /* CONFIG_PPC_PSERIES */ | ||
726 | |||
727 | static int lparcfg_data(struct seq_file *m, void *v) | 644 | static int lparcfg_data(struct seq_file *m, void *v) |
728 | { | 645 | { |
729 | struct device_node *rootdn; | 646 | struct device_node *rootdn; |
@@ -738,19 +655,11 @@ static int lparcfg_data(struct seq_file *m, void *v) | |||
738 | rootdn = of_find_node_by_path("/"); | 655 | rootdn = of_find_node_by_path("/"); |
739 | if (rootdn) { | 656 | if (rootdn) { |
740 | tmp = of_get_property(rootdn, "model", NULL); | 657 | tmp = of_get_property(rootdn, "model", NULL); |
741 | if (tmp) { | 658 | if (tmp) |
742 | model = tmp; | 659 | model = tmp; |
743 | /* Skip "IBM," - see platforms/iseries/dt.c */ | ||
744 | if (firmware_has_feature(FW_FEATURE_ISERIES)) | ||
745 | model += 4; | ||
746 | } | ||
747 | tmp = of_get_property(rootdn, "system-id", NULL); | 660 | tmp = of_get_property(rootdn, "system-id", NULL); |
748 | if (tmp) { | 661 | if (tmp) |
749 | system_id = tmp; | 662 | system_id = tmp; |
750 | /* Skip "IBM," - see platforms/iseries/dt.c */ | ||
751 | if (firmware_has_feature(FW_FEATURE_ISERIES)) | ||
752 | system_id += 4; | ||
753 | } | ||
754 | lp_index_ptr = of_get_property(rootdn, "ibm,partition-no", | 663 | lp_index_ptr = of_get_property(rootdn, "ibm,partition-no", |
755 | NULL); | 664 | NULL); |
756 | if (lp_index_ptr) | 665 | if (lp_index_ptr) |
@@ -761,8 +670,6 @@ static int lparcfg_data(struct seq_file *m, void *v) | |||
761 | seq_printf(m, "system_type=%s\n", model); | 670 | seq_printf(m, "system_type=%s\n", model); |
762 | seq_printf(m, "partition_id=%d\n", (int)lp_index); | 671 | seq_printf(m, "partition_id=%d\n", (int)lp_index); |
763 | 672 | ||
764 | if (firmware_has_feature(FW_FEATURE_ISERIES)) | ||
765 | return iseries_lparcfg_data(m, v); | ||
766 | return pseries_lparcfg_data(m, v); | 673 | return pseries_lparcfg_data(m, v); |
767 | } | 674 | } |
768 | 675 | ||
@@ -786,8 +693,7 @@ static int __init lparcfg_init(void) | |||
786 | umode_t mode = S_IRUSR | S_IRGRP | S_IROTH; | 693 | umode_t mode = S_IRUSR | S_IRGRP | S_IROTH; |
787 | 694 | ||
788 | /* Allow writing if we have FW_FEATURE_SPLPAR */ | 695 | /* Allow writing if we have FW_FEATURE_SPLPAR */ |
789 | if (firmware_has_feature(FW_FEATURE_SPLPAR) && | 696 | if (firmware_has_feature(FW_FEATURE_SPLPAR)) |
790 | !firmware_has_feature(FW_FEATURE_ISERIES)) | ||
791 | mode |= S_IWUSR; | 697 | mode |= S_IWUSR; |
792 | 698 | ||
793 | ent = proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_fops); | 699 | ent = proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_fops); |
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index b69463ec2010..ba16874fe294 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S | |||
@@ -5,7 +5,6 @@ | |||
5 | * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) | 5 | * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) |
6 | * and Paul Mackerras. | 6 | * and Paul Mackerras. |
7 | * | 7 | * |
8 | * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com) | ||
9 | * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) | 8 | * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) |
10 | * | 9 | * |
11 | * setjmp/longjmp code by Paul Mackerras. | 10 | * setjmp/longjmp code by Paul Mackerras. |
diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c deleted file mode 100644 index fe21b515ca44..000000000000 --- a/arch/powerpc/kernel/mpc7450-pmu.c +++ /dev/null | |||
@@ -1,422 +0,0 @@ | |||
1 | /* | ||
2 | * Performance counter support for MPC7450-family processors. | ||
3 | * | ||
4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/string.h> | ||
12 | #include <linux/perf_event.h> | ||
13 | #include <asm/reg.h> | ||
14 | #include <asm/cputable.h> | ||
15 | |||
16 | #define N_COUNTER 6 /* Number of hardware counters */ | ||
17 | #define MAX_ALT 3 /* Maximum number of event alternative codes */ | ||
18 | |||
19 | /* | ||
20 | * Bits in event code for MPC7450 family | ||
21 | */ | ||
22 | #define PM_THRMULT_MSKS 0x40000 | ||
23 | #define PM_THRESH_SH 12 | ||
24 | #define PM_THRESH_MSK 0x3f | ||
25 | #define PM_PMC_SH 8 | ||
26 | #define PM_PMC_MSK 7 | ||
27 | #define PM_PMCSEL_MSK 0x7f | ||
28 | |||
29 | /* | ||
30 | * Classify events according to how specific their PMC requirements are. | ||
31 | * Result is: | ||
32 | * 0: can go on any PMC | ||
33 | * 1: can go on PMCs 1-4 | ||
34 | * 2: can go on PMCs 1,2,4 | ||
35 | * 3: can go on PMCs 1 or 2 | ||
36 | * 4: can only go on one PMC | ||
37 | * -1: event code is invalid | ||
38 | */ | ||
39 | #define N_CLASSES 5 | ||
40 | |||
41 | static int mpc7450_classify_event(u32 event) | ||
42 | { | ||
43 | int pmc; | ||
44 | |||
45 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
46 | if (pmc) { | ||
47 | if (pmc > N_COUNTER) | ||
48 | return -1; | ||
49 | return 4; | ||
50 | } | ||
51 | event &= PM_PMCSEL_MSK; | ||
52 | if (event <= 1) | ||
53 | return 0; | ||
54 | if (event <= 7) | ||
55 | return 1; | ||
56 | if (event <= 13) | ||
57 | return 2; | ||
58 | if (event <= 22) | ||
59 | return 3; | ||
60 | return -1; | ||
61 | } | ||
62 | |||
63 | /* | ||
64 | * Events using threshold and possible threshold scale: | ||
65 | * code scale? name | ||
66 | * 11e N PM_INSTQ_EXCEED_CYC | ||
67 | * 11f N PM_ALTV_IQ_EXCEED_CYC | ||
68 | * 128 Y PM_DTLB_SEARCH_EXCEED_CYC | ||
69 | * 12b Y PM_LD_MISS_EXCEED_L1_CYC | ||
70 | * 220 N PM_CQ_EXCEED_CYC | ||
71 | * 30c N PM_GPR_RB_EXCEED_CYC | ||
72 | * 30d ? PM_FPR_IQ_EXCEED_CYC ? | ||
73 | * 311 Y PM_ITLB_SEARCH_EXCEED | ||
74 | * 410 N PM_GPR_IQ_EXCEED_CYC | ||
75 | */ | ||
76 | |||
77 | /* | ||
78 | * Return use of threshold and threshold scale bits: | ||
79 | * 0 = uses neither, 1 = uses threshold, 2 = uses both | ||
80 | */ | ||
81 | static int mpc7450_threshold_use(u32 event) | ||
82 | { | ||
83 | int pmc, sel; | ||
84 | |||
85 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
86 | sel = event & PM_PMCSEL_MSK; | ||
87 | switch (pmc) { | ||
88 | case 1: | ||
89 | if (sel == 0x1e || sel == 0x1f) | ||
90 | return 1; | ||
91 | if (sel == 0x28 || sel == 0x2b) | ||
92 | return 2; | ||
93 | break; | ||
94 | case 2: | ||
95 | if (sel == 0x20) | ||
96 | return 1; | ||
97 | break; | ||
98 | case 3: | ||
99 | if (sel == 0xc || sel == 0xd) | ||
100 | return 1; | ||
101 | if (sel == 0x11) | ||
102 | return 2; | ||
103 | break; | ||
104 | case 4: | ||
105 | if (sel == 0x10) | ||
106 | return 1; | ||
107 | break; | ||
108 | } | ||
109 | return 0; | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * Layout of constraint bits: | ||
114 | * 33222222222211111111110000000000 | ||
115 | * 10987654321098765432109876543210 | ||
116 | * |< >< > < > < ><><><><><><> | ||
117 | * TS TV G4 G3 G2P6P5P4P3P2P1 | ||
118 | * | ||
119 | * P1 - P6 | ||
120 | * 0 - 11: Count of events needing PMC1 .. PMC6 | ||
121 | * | ||
122 | * G2 | ||
123 | * 12 - 14: Count of events needing PMC1 or PMC2 | ||
124 | * | ||
125 | * G3 | ||
126 | * 16 - 18: Count of events needing PMC1, PMC2 or PMC4 | ||
127 | * | ||
128 | * G4 | ||
129 | * 20 - 23: Count of events needing PMC1, PMC2, PMC3 or PMC4 | ||
130 | * | ||
131 | * TV | ||
132 | * 24 - 29: Threshold value requested | ||
133 | * | ||
134 | * TS | ||
135 | * 30: Threshold scale value requested | ||
136 | */ | ||
137 | |||
138 | static u32 pmcbits[N_COUNTER][2] = { | ||
139 | { 0x00844002, 0x00111001 }, /* PMC1 mask, value: P1,G2,G3,G4 */ | ||
140 | { 0x00844008, 0x00111004 }, /* PMC2: P2,G2,G3,G4 */ | ||
141 | { 0x00800020, 0x00100010 }, /* PMC3: P3,G4 */ | ||
142 | { 0x00840080, 0x00110040 }, /* PMC4: P4,G3,G4 */ | ||
143 | { 0x00000200, 0x00000100 }, /* PMC5: P5 */ | ||
144 | { 0x00000800, 0x00000400 } /* PMC6: P6 */ | ||
145 | }; | ||
146 | |||
147 | static u32 classbits[N_CLASSES - 1][2] = { | ||
148 | { 0x00000000, 0x00000000 }, /* class 0: no constraint */ | ||
149 | { 0x00800000, 0x00100000 }, /* class 1: G4 */ | ||
150 | { 0x00040000, 0x00010000 }, /* class 2: G3 */ | ||
151 | { 0x00004000, 0x00001000 }, /* class 3: G2 */ | ||
152 | }; | ||
153 | |||
154 | static int mpc7450_get_constraint(u64 event, unsigned long *maskp, | ||
155 | unsigned long *valp) | ||
156 | { | ||
157 | int pmc, class; | ||
158 | u32 mask, value; | ||
159 | int thresh, tuse; | ||
160 | |||
161 | class = mpc7450_classify_event(event); | ||
162 | if (class < 0) | ||
163 | return -1; | ||
164 | if (class == 4) { | ||
165 | pmc = ((unsigned int)event >> PM_PMC_SH) & PM_PMC_MSK; | ||
166 | mask = pmcbits[pmc - 1][0]; | ||
167 | value = pmcbits[pmc - 1][1]; | ||
168 | } else { | ||
169 | mask = classbits[class][0]; | ||
170 | value = classbits[class][1]; | ||
171 | } | ||
172 | |||
173 | tuse = mpc7450_threshold_use(event); | ||
174 | if (tuse) { | ||
175 | thresh = ((unsigned int)event >> PM_THRESH_SH) & PM_THRESH_MSK; | ||
176 | mask |= 0x3f << 24; | ||
177 | value |= thresh << 24; | ||
178 | if (tuse == 2) { | ||
179 | mask |= 0x40000000; | ||
180 | if ((unsigned int)event & PM_THRMULT_MSKS) | ||
181 | value |= 0x40000000; | ||
182 | } | ||
183 | } | ||
184 | |||
185 | *maskp = mask; | ||
186 | *valp = value; | ||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
191 | { 0x217, 0x317 }, /* PM_L1_DCACHE_MISS */ | ||
192 | { 0x418, 0x50f, 0x60f }, /* PM_SNOOP_RETRY */ | ||
193 | { 0x502, 0x602 }, /* PM_L2_HIT */ | ||
194 | { 0x503, 0x603 }, /* PM_L3_HIT */ | ||
195 | { 0x504, 0x604 }, /* PM_L2_ICACHE_MISS */ | ||
196 | { 0x505, 0x605 }, /* PM_L3_ICACHE_MISS */ | ||
197 | { 0x506, 0x606 }, /* PM_L2_DCACHE_MISS */ | ||
198 | { 0x507, 0x607 }, /* PM_L3_DCACHE_MISS */ | ||
199 | { 0x50a, 0x623 }, /* PM_LD_HIT_L3 */ | ||
200 | { 0x50b, 0x624 }, /* PM_ST_HIT_L3 */ | ||
201 | { 0x50d, 0x60d }, /* PM_L2_TOUCH_HIT */ | ||
202 | { 0x50e, 0x60e }, /* PM_L3_TOUCH_HIT */ | ||
203 | { 0x512, 0x612 }, /* PM_INT_LOCAL */ | ||
204 | { 0x513, 0x61d }, /* PM_L2_MISS */ | ||
205 | { 0x514, 0x61e }, /* PM_L3_MISS */ | ||
206 | }; | ||
207 | |||
208 | /* | ||
209 | * Scan the alternatives table for a match and return the | ||
210 | * index into the alternatives table if found, else -1. | ||
211 | */ | ||
212 | static int find_alternative(u32 event) | ||
213 | { | ||
214 | int i, j; | ||
215 | |||
216 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
217 | if (event < event_alternatives[i][0]) | ||
218 | break; | ||
219 | for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) | ||
220 | if (event == event_alternatives[i][j]) | ||
221 | return i; | ||
222 | } | ||
223 | return -1; | ||
224 | } | ||
225 | |||
226 | static int mpc7450_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
227 | { | ||
228 | int i, j, nalt = 1; | ||
229 | u32 ae; | ||
230 | |||
231 | alt[0] = event; | ||
232 | nalt = 1; | ||
233 | i = find_alternative((u32)event); | ||
234 | if (i >= 0) { | ||
235 | for (j = 0; j < MAX_ALT; ++j) { | ||
236 | ae = event_alternatives[i][j]; | ||
237 | if (ae && ae != (u32)event) | ||
238 | alt[nalt++] = ae; | ||
239 | } | ||
240 | } | ||
241 | return nalt; | ||
242 | } | ||
243 | |||
244 | /* | ||
245 | * Bitmaps of which PMCs each class can use for classes 0 - 3. | ||
246 | * Bit i is set if PMC i+1 is usable. | ||
247 | */ | ||
248 | static const u8 classmap[N_CLASSES] = { | ||
249 | 0x3f, 0x0f, 0x0b, 0x03, 0 | ||
250 | }; | ||
251 | |||
252 | /* Bit position and width of each PMCSEL field */ | ||
253 | static const int pmcsel_shift[N_COUNTER] = { | ||
254 | 6, 0, 27, 22, 17, 11 | ||
255 | }; | ||
256 | static const u32 pmcsel_mask[N_COUNTER] = { | ||
257 | 0x7f, 0x3f, 0x1f, 0x1f, 0x1f, 0x3f | ||
258 | }; | ||
259 | |||
260 | /* | ||
261 | * Compute MMCR0/1/2 values for a set of events. | ||
262 | */ | ||
263 | static int mpc7450_compute_mmcr(u64 event[], int n_ev, | ||
264 | unsigned int hwc[], unsigned long mmcr[]) | ||
265 | { | ||
266 | u8 event_index[N_CLASSES][N_COUNTER]; | ||
267 | int n_classevent[N_CLASSES]; | ||
268 | int i, j, class, tuse; | ||
269 | u32 pmc_inuse = 0, pmc_avail; | ||
270 | u32 mmcr0 = 0, mmcr1 = 0, mmcr2 = 0; | ||
271 | u32 ev, pmc, thresh; | ||
272 | |||
273 | if (n_ev > N_COUNTER) | ||
274 | return -1; | ||
275 | |||
276 | /* First pass: count usage in each class */ | ||
277 | for (i = 0; i < N_CLASSES; ++i) | ||
278 | n_classevent[i] = 0; | ||
279 | for (i = 0; i < n_ev; ++i) { | ||
280 | class = mpc7450_classify_event(event[i]); | ||
281 | if (class < 0) | ||
282 | return -1; | ||
283 | j = n_classevent[class]++; | ||
284 | event_index[class][j] = i; | ||
285 | } | ||
286 | |||
287 | /* Second pass: allocate PMCs from most specific event to least */ | ||
288 | for (class = N_CLASSES - 1; class >= 0; --class) { | ||
289 | for (i = 0; i < n_classevent[class]; ++i) { | ||
290 | ev = event[event_index[class][i]]; | ||
291 | if (class == 4) { | ||
292 | pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK; | ||
293 | if (pmc_inuse & (1 << (pmc - 1))) | ||
294 | return -1; | ||
295 | } else { | ||
296 | /* Find a suitable PMC */ | ||
297 | pmc_avail = classmap[class] & ~pmc_inuse; | ||
298 | if (!pmc_avail) | ||
299 | return -1; | ||
300 | pmc = ffs(pmc_avail); | ||
301 | } | ||
302 | pmc_inuse |= 1 << (pmc - 1); | ||
303 | |||
304 | tuse = mpc7450_threshold_use(ev); | ||
305 | if (tuse) { | ||
306 | thresh = (ev >> PM_THRESH_SH) & PM_THRESH_MSK; | ||
307 | mmcr0 |= thresh << 16; | ||
308 | if (tuse == 2 && (ev & PM_THRMULT_MSKS)) | ||
309 | mmcr2 = 0x80000000; | ||
310 | } | ||
311 | ev &= pmcsel_mask[pmc - 1]; | ||
312 | ev <<= pmcsel_shift[pmc - 1]; | ||
313 | if (pmc <= 2) | ||
314 | mmcr0 |= ev; | ||
315 | else | ||
316 | mmcr1 |= ev; | ||
317 | hwc[event_index[class][i]] = pmc - 1; | ||
318 | } | ||
319 | } | ||
320 | |||
321 | if (pmc_inuse & 1) | ||
322 | mmcr0 |= MMCR0_PMC1CE; | ||
323 | if (pmc_inuse & 0x3e) | ||
324 | mmcr0 |= MMCR0_PMCnCE; | ||
325 | |||
326 | /* Return MMCRx values */ | ||
327 | mmcr[0] = mmcr0; | ||
328 | mmcr[1] = mmcr1; | ||
329 | mmcr[2] = mmcr2; | ||
330 | return 0; | ||
331 | } | ||
332 | |||
333 | /* | ||
334 | * Disable counting by a PMC. | ||
335 | * Note that the pmc argument is 0-based here, not 1-based. | ||
336 | */ | ||
337 | static void mpc7450_disable_pmc(unsigned int pmc, unsigned long mmcr[]) | ||
338 | { | ||
339 | if (pmc <= 1) | ||
340 | mmcr[0] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); | ||
341 | else | ||
342 | mmcr[1] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); | ||
343 | } | ||
344 | |||
345 | static int mpc7450_generic_events[] = { | ||
346 | [PERF_COUNT_HW_CPU_CYCLES] = 1, | ||
347 | [PERF_COUNT_HW_INSTRUCTIONS] = 2, | ||
348 | [PERF_COUNT_HW_CACHE_MISSES] = 0x217, /* PM_L1_DCACHE_MISS */ | ||
349 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x122, /* PM_BR_CMPL */ | ||
350 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x41c, /* PM_BR_MPRED */ | ||
351 | }; | ||
352 | |||
353 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
354 | |||
355 | /* | ||
356 | * Table of generalized cache-related events. | ||
357 | * 0 means not supported, -1 means nonsensical, other values | ||
358 | * are event codes. | ||
359 | */ | ||
360 | static int mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
361 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
362 | [C(OP_READ)] = { 0, 0x225 }, | ||
363 | [C(OP_WRITE)] = { 0, 0x227 }, | ||
364 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
365 | }, | ||
366 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
367 | [C(OP_READ)] = { 0x129, 0x115 }, | ||
368 | [C(OP_WRITE)] = { -1, -1 }, | ||
369 | [C(OP_PREFETCH)] = { 0x634, 0 }, | ||
370 | }, | ||
371 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
372 | [C(OP_READ)] = { 0, 0 }, | ||
373 | [C(OP_WRITE)] = { 0, 0 }, | ||
374 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
375 | }, | ||
376 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
377 | [C(OP_READ)] = { 0, 0x312 }, | ||
378 | [C(OP_WRITE)] = { -1, -1 }, | ||
379 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
380 | }, | ||
381 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
382 | [C(OP_READ)] = { 0, 0x223 }, | ||
383 | [C(OP_WRITE)] = { -1, -1 }, | ||
384 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
385 | }, | ||
386 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
387 | [C(OP_READ)] = { 0x122, 0x41c }, | ||
388 | [C(OP_WRITE)] = { -1, -1 }, | ||
389 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
390 | }, | ||
391 | [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
392 | [C(OP_READ)] = { -1, -1 }, | ||
393 | [C(OP_WRITE)] = { -1, -1 }, | ||
394 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
395 | }, | ||
396 | }; | ||
397 | |||
398 | struct power_pmu mpc7450_pmu = { | ||
399 | .name = "MPC7450 family", | ||
400 | .n_counter = N_COUNTER, | ||
401 | .max_alternatives = MAX_ALT, | ||
402 | .add_fields = 0x00111555ul, | ||
403 | .test_adder = 0x00301000ul, | ||
404 | .compute_mmcr = mpc7450_compute_mmcr, | ||
405 | .get_constraint = mpc7450_get_constraint, | ||
406 | .get_alternatives = mpc7450_get_alternatives, | ||
407 | .disable_pmc = mpc7450_disable_pmc, | ||
408 | .n_generic = ARRAY_SIZE(mpc7450_generic_events), | ||
409 | .generic_events = mpc7450_generic_events, | ||
410 | .cache_events = &mpc7450_cache_events, | ||
411 | }; | ||
412 | |||
413 | static int __init init_mpc7450_pmu(void) | ||
414 | { | ||
415 | if (!cur_cpu_spec->oprofile_cpu_type || | ||
416 | strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/7450")) | ||
417 | return -ENODEV; | ||
418 | |||
419 | return register_power_pmu(&mpc7450_pmu); | ||
420 | } | ||
421 | |||
422 | early_initcall(init_mpc7450_pmu); | ||
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index e1612dfb4a93..2049f2d00ffe 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c | |||
@@ -21,12 +21,13 @@ | |||
21 | #include <linux/of.h> | 21 | #include <linux/of.h> |
22 | #include <linux/of_device.h> | 22 | #include <linux/of_device.h> |
23 | #include <linux/of_platform.h> | 23 | #include <linux/of_platform.h> |
24 | #include <linux/atomic.h> | ||
24 | 25 | ||
25 | #include <asm/errno.h> | 26 | #include <asm/errno.h> |
26 | #include <asm/topology.h> | 27 | #include <asm/topology.h> |
27 | #include <asm/pci-bridge.h> | 28 | #include <asm/pci-bridge.h> |
28 | #include <asm/ppc-pci.h> | 29 | #include <asm/ppc-pci.h> |
29 | #include <linux/atomic.h> | 30 | #include <asm/eeh.h> |
30 | 31 | ||
31 | #ifdef CONFIG_PPC_OF_PLATFORM_PCI | 32 | #ifdef CONFIG_PPC_OF_PLATFORM_PCI |
32 | 33 | ||
@@ -66,6 +67,9 @@ static int __devinit of_pci_phb_probe(struct platform_device *dev) | |||
66 | /* Init pci_dn data structures */ | 67 | /* Init pci_dn data structures */ |
67 | pci_devs_phb_init_dynamic(phb); | 68 | pci_devs_phb_init_dynamic(phb); |
68 | 69 | ||
70 | /* Create EEH devices for the PHB */ | ||
71 | eeh_dev_phb_init_dynamic(phb); | ||
72 | |||
69 | /* Register devices with EEH */ | 73 | /* Register devices with EEH */ |
70 | #ifdef CONFIG_EEH | 74 | #ifdef CONFIG_EEH |
71 | if (dev->dev.of_node->child) | 75 | if (dev->dev.of_node->child) |
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 41456ff55e14..0bb1f98613ba 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c | |||
@@ -11,13 +11,10 @@ | |||
11 | #include <linux/export.h> | 11 | #include <linux/export.h> |
12 | #include <linux/memblock.h> | 12 | #include <linux/memblock.h> |
13 | 13 | ||
14 | #include <asm/firmware.h> | ||
15 | #include <asm/lppaca.h> | 14 | #include <asm/lppaca.h> |
16 | #include <asm/paca.h> | 15 | #include <asm/paca.h> |
17 | #include <asm/sections.h> | 16 | #include <asm/sections.h> |
18 | #include <asm/pgtable.h> | 17 | #include <asm/pgtable.h> |
19 | #include <asm/iseries/lpar_map.h> | ||
20 | #include <asm/iseries/hv_types.h> | ||
21 | #include <asm/kexec.h> | 18 | #include <asm/kexec.h> |
22 | 19 | ||
23 | /* This symbol is provided by the linker - let it fill in the paca | 20 | /* This symbol is provided by the linker - let it fill in the paca |
@@ -30,8 +27,8 @@ extern unsigned long __toc_start; | |||
30 | * The structure which the hypervisor knows about - this structure | 27 | * The structure which the hypervisor knows about - this structure |
31 | * should not cross a page boundary. The vpa_init/register_vpa call | 28 | * should not cross a page boundary. The vpa_init/register_vpa call |
32 | * is now known to fail if the lppaca structure crosses a page | 29 | * is now known to fail if the lppaca structure crosses a page |
33 | * boundary. The lppaca is also used on legacy iSeries and POWER5 | 30 | * boundary. The lppaca is also used on POWER5 pSeries boxes. |
34 | * pSeries boxes. The lppaca is 640 bytes long, and cannot readily | 31 | * The lppaca is 640 bytes long, and cannot readily |
35 | * change since the hypervisor knows its layout, so a 1kB alignment | 32 | * change since the hypervisor knows its layout, so a 1kB alignment |
36 | * will suffice to ensure that it doesn't cross a page boundary. | 33 | * will suffice to ensure that it doesn't cross a page boundary. |
37 | */ | 34 | */ |
@@ -183,12 +180,9 @@ void __init allocate_pacas(void) | |||
183 | /* | 180 | /* |
184 | * We can't take SLB misses on the paca, and we want to access them | 181 | * We can't take SLB misses on the paca, and we want to access them |
185 | * in real mode, so allocate them within the RMA and also within | 182 | * in real mode, so allocate them within the RMA and also within |
186 | * the first segment. On iSeries they must be within the area mapped | 183 | * the first segment. |
187 | * by the HV, which is HvPagesToMap * HVPAGESIZE bytes. | ||
188 | */ | 184 | */ |
189 | limit = min(0x10000000ULL, ppc64_rma_size); | 185 | limit = min(0x10000000ULL, ppc64_rma_size); |
190 | if (firmware_has_feature(FW_FEATURE_ISERIES)) | ||
191 | limit = min(limit, HvPagesToMap * HVPAGESIZE); | ||
192 | 186 | ||
193 | paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids); | 187 | paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids); |
194 | 188 | ||
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index cce98d76e905..8e78e93c8185 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c | |||
@@ -38,7 +38,6 @@ | |||
38 | #include <asm/byteorder.h> | 38 | #include <asm/byteorder.h> |
39 | #include <asm/machdep.h> | 39 | #include <asm/machdep.h> |
40 | #include <asm/ppc-pci.h> | 40 | #include <asm/ppc-pci.h> |
41 | #include <asm/firmware.h> | ||
42 | #include <asm/eeh.h> | 41 | #include <asm/eeh.h> |
43 | 42 | ||
44 | static DEFINE_SPINLOCK(hose_spinlock); | 43 | static DEFINE_SPINLOCK(hose_spinlock); |
@@ -50,9 +49,6 @@ static int global_phb_number; /* Global phb counter */ | |||
50 | /* ISA Memory physical address */ | 49 | /* ISA Memory physical address */ |
51 | resource_size_t isa_mem_base; | 50 | resource_size_t isa_mem_base; |
52 | 51 | ||
53 | /* Default PCI flags is 0 on ppc32, modified at boot on ppc64 */ | ||
54 | unsigned int pci_flags = 0; | ||
55 | |||
56 | 52 | ||
57 | static struct dma_map_ops *pci_dma_ops = &dma_direct_ops; | 53 | static struct dma_map_ops *pci_dma_ops = &dma_direct_ops; |
58 | 54 | ||
@@ -219,20 +215,6 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) | |||
219 | struct of_irq oirq; | 215 | struct of_irq oirq; |
220 | unsigned int virq; | 216 | unsigned int virq; |
221 | 217 | ||
222 | /* The current device-tree that iSeries generates from the HV | ||
223 | * PCI informations doesn't contain proper interrupt routing, | ||
224 | * and all the fallback would do is print out crap, so we | ||
225 | * don't attempt to resolve the interrupts here at all, some | ||
226 | * iSeries specific fixup does it. | ||
227 | * | ||
228 | * In the long run, we will hopefully fix the generated device-tree | ||
229 | * instead. | ||
230 | */ | ||
231 | #ifdef CONFIG_PPC_ISERIES | ||
232 | if (firmware_has_feature(FW_FEATURE_ISERIES)) | ||
233 | return -1; | ||
234 | #endif | ||
235 | |||
236 | pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev)); | 218 | pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev)); |
237 | 219 | ||
238 | #ifdef DEBUG | 220 | #ifdef DEBUG |
@@ -849,60 +831,6 @@ int pci_proc_domain(struct pci_bus *bus) | |||
849 | return 1; | 831 | return 1; |
850 | } | 832 | } |
851 | 833 | ||
852 | void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, | ||
853 | struct resource *res) | ||
854 | { | ||
855 | resource_size_t offset = 0, mask = (resource_size_t)-1; | ||
856 | struct pci_controller *hose = pci_bus_to_host(dev->bus); | ||
857 | |||
858 | if (!hose) | ||
859 | return; | ||
860 | if (res->flags & IORESOURCE_IO) { | ||
861 | offset = (unsigned long)hose->io_base_virt - _IO_BASE; | ||
862 | mask = 0xffffffffu; | ||
863 | } else if (res->flags & IORESOURCE_MEM) | ||
864 | offset = hose->pci_mem_offset; | ||
865 | |||
866 | region->start = (res->start - offset) & mask; | ||
867 | region->end = (res->end - offset) & mask; | ||
868 | } | ||
869 | EXPORT_SYMBOL(pcibios_resource_to_bus); | ||
870 | |||
871 | void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, | ||
872 | struct pci_bus_region *region) | ||
873 | { | ||
874 | resource_size_t offset = 0, mask = (resource_size_t)-1; | ||
875 | struct pci_controller *hose = pci_bus_to_host(dev->bus); | ||
876 | |||
877 | if (!hose) | ||
878 | return; | ||
879 | if (res->flags & IORESOURCE_IO) { | ||
880 | offset = (unsigned long)hose->io_base_virt - _IO_BASE; | ||
881 | mask = 0xffffffffu; | ||
882 | } else if (res->flags & IORESOURCE_MEM) | ||
883 | offset = hose->pci_mem_offset; | ||
884 | res->start = (region->start + offset) & mask; | ||
885 | res->end = (region->end + offset) & mask; | ||
886 | } | ||
887 | EXPORT_SYMBOL(pcibios_bus_to_resource); | ||
888 | |||
889 | /* Fixup a bus resource into a linux resource */ | ||
890 | static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev) | ||
891 | { | ||
892 | struct pci_controller *hose = pci_bus_to_host(dev->bus); | ||
893 | resource_size_t offset = 0, mask = (resource_size_t)-1; | ||
894 | |||
895 | if (res->flags & IORESOURCE_IO) { | ||
896 | offset = (unsigned long)hose->io_base_virt - _IO_BASE; | ||
897 | mask = 0xffffffffu; | ||
898 | } else if (res->flags & IORESOURCE_MEM) | ||
899 | offset = hose->pci_mem_offset; | ||
900 | |||
901 | res->start = (res->start + offset) & mask; | ||
902 | res->end = (res->end + offset) & mask; | ||
903 | } | ||
904 | |||
905 | |||
906 | /* This header fixup will do the resource fixup for all devices as they are | 834 | /* This header fixup will do the resource fixup for all devices as they are |
907 | * probed, but not for bridge ranges | 835 | * probed, but not for bridge ranges |
908 | */ | 836 | */ |
@@ -942,18 +870,11 @@ static void __devinit pcibios_fixup_resources(struct pci_dev *dev) | |||
942 | continue; | 870 | continue; |
943 | } | 871 | } |
944 | 872 | ||
945 | pr_debug("PCI:%s Resource %d %016llx-%016llx [%x] fixup...\n", | 873 | pr_debug("PCI:%s Resource %d %016llx-%016llx [%x]\n", |
946 | pci_name(dev), i, | 874 | pci_name(dev), i, |
947 | (unsigned long long)res->start,\ | 875 | (unsigned long long)res->start,\ |
948 | (unsigned long long)res->end, | 876 | (unsigned long long)res->end, |
949 | (unsigned int)res->flags); | 877 | (unsigned int)res->flags); |
950 | |||
951 | fixup_resource(res, dev); | ||
952 | |||
953 | pr_debug("PCI:%s %016llx-%016llx\n", | ||
954 | pci_name(dev), | ||
955 | (unsigned long long)res->start, | ||
956 | (unsigned long long)res->end); | ||
957 | } | 878 | } |
958 | 879 | ||
959 | /* Call machine specific resource fixup */ | 880 | /* Call machine specific resource fixup */ |
@@ -1055,27 +976,18 @@ static void __devinit pcibios_fixup_bridge(struct pci_bus *bus) | |||
1055 | continue; | 976 | continue; |
1056 | } | 977 | } |
1057 | 978 | ||
1058 | pr_debug("PCI:%s Bus rsrc %d %016llx-%016llx [%x] fixup...\n", | 979 | pr_debug("PCI:%s Bus rsrc %d %016llx-%016llx [%x]\n", |
1059 | pci_name(dev), i, | 980 | pci_name(dev), i, |
1060 | (unsigned long long)res->start,\ | 981 | (unsigned long long)res->start,\ |
1061 | (unsigned long long)res->end, | 982 | (unsigned long long)res->end, |
1062 | (unsigned int)res->flags); | 983 | (unsigned int)res->flags); |
1063 | 984 | ||
1064 | /* Perform fixup */ | ||
1065 | fixup_resource(res, dev); | ||
1066 | |||
1067 | /* Try to detect uninitialized P2P bridge resources, | 985 | /* Try to detect uninitialized P2P bridge resources, |
1068 | * and clear them out so they get re-assigned later | 986 | * and clear them out so they get re-assigned later |
1069 | */ | 987 | */ |
1070 | if (pcibios_uninitialized_bridge_resource(bus, res)) { | 988 | if (pcibios_uninitialized_bridge_resource(bus, res)) { |
1071 | res->flags = 0; | 989 | res->flags = 0; |
1072 | pr_debug("PCI:%s (unassigned)\n", pci_name(dev)); | 990 | pr_debug("PCI:%s (unassigned)\n", pci_name(dev)); |
1073 | } else { | ||
1074 | |||
1075 | pr_debug("PCI:%s %016llx-%016llx\n", | ||
1076 | pci_name(dev), | ||
1077 | (unsigned long long)res->start, | ||
1078 | (unsigned long long)res->end); | ||
1079 | } | 991 | } |
1080 | } | 992 | } |
1081 | } | 993 | } |
@@ -1565,6 +1477,11 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) | |||
1565 | return pci_enable_resources(dev, mask); | 1477 | return pci_enable_resources(dev, mask); |
1566 | } | 1478 | } |
1567 | 1479 | ||
1480 | resource_size_t pcibios_io_space_offset(struct pci_controller *hose) | ||
1481 | { | ||
1482 | return (unsigned long) hose->io_base_virt - _IO_BASE; | ||
1483 | } | ||
1484 | |||
1568 | static void __devinit pcibios_setup_phb_resources(struct pci_controller *hose, struct list_head *resources) | 1485 | static void __devinit pcibios_setup_phb_resources(struct pci_controller *hose, struct list_head *resources) |
1569 | { | 1486 | { |
1570 | struct resource *res; | 1487 | struct resource *res; |
@@ -1589,7 +1506,7 @@ static void __devinit pcibios_setup_phb_resources(struct pci_controller *hose, s | |||
1589 | (unsigned long long)res->start, | 1506 | (unsigned long long)res->start, |
1590 | (unsigned long long)res->end, | 1507 | (unsigned long long)res->end, |
1591 | (unsigned long)res->flags); | 1508 | (unsigned long)res->flags); |
1592 | pci_add_resource(resources, res); | 1509 | pci_add_resource_offset(resources, res, pcibios_io_space_offset(hose)); |
1593 | 1510 | ||
1594 | /* Hookup PHB Memory resources */ | 1511 | /* Hookup PHB Memory resources */ |
1595 | for (i = 0; i < 3; ++i) { | 1512 | for (i = 0; i < 3; ++i) { |
@@ -1612,7 +1529,7 @@ static void __devinit pcibios_setup_phb_resources(struct pci_controller *hose, s | |||
1612 | (unsigned long long)res->start, | 1529 | (unsigned long long)res->start, |
1613 | (unsigned long long)res->end, | 1530 | (unsigned long long)res->end, |
1614 | (unsigned long)res->flags); | 1531 | (unsigned long)res->flags); |
1615 | pci_add_resource(resources, res); | 1532 | pci_add_resource_offset(resources, res, hose->pci_mem_offset); |
1616 | } | 1533 | } |
1617 | 1534 | ||
1618 | pr_debug("PCI: PHB MEM offset = %016llx\n", | 1535 | pr_debug("PCI: PHB MEM offset = %016llx\n", |
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index fdd1a3d951dc..4b06ec5a502e 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c | |||
@@ -219,9 +219,9 @@ void __devinit pcibios_setup_phb_io_space(struct pci_controller *hose) | |||
219 | struct resource *res = &hose->io_resource; | 219 | struct resource *res = &hose->io_resource; |
220 | 220 | ||
221 | /* Fixup IO space offset */ | 221 | /* Fixup IO space offset */ |
222 | io_offset = (unsigned long)hose->io_base_virt - isa_io_base; | 222 | io_offset = pcibios_io_space_offset(hose); |
223 | res->start = (res->start + io_offset) & 0xffffffffu; | 223 | res->start += io_offset; |
224 | res->end = (res->end + io_offset) & 0xffffffffu; | 224 | res->end += io_offset; |
225 | } | 225 | } |
226 | 226 | ||
227 | static int __init pcibios_init(void) | 227 | static int __init pcibios_init(void) |
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 3318d39b7d4c..94a54f61d341 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c | |||
@@ -33,8 +33,6 @@ | |||
33 | #include <asm/machdep.h> | 33 | #include <asm/machdep.h> |
34 | #include <asm/ppc-pci.h> | 34 | #include <asm/ppc-pci.h> |
35 | 35 | ||
36 | unsigned long pci_probe_only = 1; | ||
37 | |||
38 | /* pci_io_base -- the base address from which io bars are offsets. | 36 | /* pci_io_base -- the base address from which io bars are offsets. |
39 | * This is the lowest I/O base address (so bar values are always positive), | 37 | * This is the lowest I/O base address (so bar values are always positive), |
40 | * and it *must* be the start of ISA space if an ISA bus exists because | 38 | * and it *must* be the start of ISA space if an ISA bus exists because |
@@ -55,9 +53,6 @@ static int __init pcibios_init(void) | |||
55 | */ | 53 | */ |
56 | ppc_md.phys_mem_access_prot = pci_phys_mem_access_prot; | 54 | ppc_md.phys_mem_access_prot = pci_phys_mem_access_prot; |
57 | 55 | ||
58 | if (pci_probe_only) | ||
59 | pci_add_flags(PCI_PROBE_ONLY); | ||
60 | |||
61 | /* On ppc64, we always enable PCI domains and we keep domain 0 | 56 | /* On ppc64, we always enable PCI domains and we keep domain 0 |
62 | * backward compatible in /proc for video cards | 57 | * backward compatible in /proc for video cards |
63 | */ | 58 | */ |
@@ -173,7 +168,7 @@ static int __devinit pcibios_map_phb_io_space(struct pci_controller *hose) | |||
173 | return -ENOMEM; | 168 | return -ENOMEM; |
174 | 169 | ||
175 | /* Fixup hose IO resource */ | 170 | /* Fixup hose IO resource */ |
176 | io_virt_offset = (unsigned long)hose->io_base_virt - _IO_BASE; | 171 | io_virt_offset = pcibios_io_space_offset(hose); |
177 | hose->io_resource.start += io_virt_offset; | 172 | hose->io_resource.start += io_virt_offset; |
178 | hose->io_resource.end += io_virt_offset; | 173 | hose->io_resource.end += io_virt_offset; |
179 | 174 | ||
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index b37d0b5a796e..89dde171a6fa 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c | |||
@@ -75,6 +75,7 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev) | |||
75 | { | 75 | { |
76 | u64 base, size; | 76 | u64 base, size; |
77 | unsigned int flags; | 77 | unsigned int flags; |
78 | struct pci_bus_region region; | ||
78 | struct resource *res; | 79 | struct resource *res; |
79 | const u32 *addrs; | 80 | const u32 *addrs; |
80 | u32 i; | 81 | u32 i; |
@@ -106,10 +107,11 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev) | |||
106 | printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i); | 107 | printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i); |
107 | continue; | 108 | continue; |
108 | } | 109 | } |
109 | res->start = base; | ||
110 | res->end = base + size - 1; | ||
111 | res->flags = flags; | 110 | res->flags = flags; |
112 | res->name = pci_name(dev); | 111 | res->name = pci_name(dev); |
112 | region.start = base; | ||
113 | region.end = base + size - 1; | ||
114 | pcibios_bus_to_resource(dev, res, ®ion); | ||
113 | } | 115 | } |
114 | } | 116 | } |
115 | 117 | ||
@@ -209,6 +211,7 @@ void __devinit of_scan_pci_bridge(struct pci_dev *dev) | |||
209 | struct pci_bus *bus; | 211 | struct pci_bus *bus; |
210 | const u32 *busrange, *ranges; | 212 | const u32 *busrange, *ranges; |
211 | int len, i, mode; | 213 | int len, i, mode; |
214 | struct pci_bus_region region; | ||
212 | struct resource *res; | 215 | struct resource *res; |
213 | unsigned int flags; | 216 | unsigned int flags; |
214 | u64 size; | 217 | u64 size; |
@@ -270,9 +273,10 @@ void __devinit of_scan_pci_bridge(struct pci_dev *dev) | |||
270 | res = bus->resource[i]; | 273 | res = bus->resource[i]; |
271 | ++i; | 274 | ++i; |
272 | } | 275 | } |
273 | res->start = of_read_number(&ranges[1], 2); | ||
274 | res->end = res->start + size - 1; | ||
275 | res->flags = flags; | 276 | res->flags = flags; |
277 | region.start = of_read_number(&ranges[1], 2); | ||
278 | region.end = region.start + size - 1; | ||
279 | pcibios_bus_to_resource(dev, res, ®ion); | ||
276 | } | 280 | } |
277 | sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus), | 281 | sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus), |
278 | bus->number); | 282 | bus->number); |
diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c deleted file mode 100644 index 564c1d8bdb5c..000000000000 --- a/arch/powerpc/kernel/perf_callchain.c +++ /dev/null | |||
@@ -1,492 +0,0 @@ | |||
1 | /* | ||
2 | * Performance counter callchain support - powerpc architecture code | ||
3 | * | ||
4 | * Copyright © 2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/sched.h> | ||
13 | #include <linux/perf_event.h> | ||
14 | #include <linux/percpu.h> | ||
15 | #include <linux/uaccess.h> | ||
16 | #include <linux/mm.h> | ||
17 | #include <asm/ptrace.h> | ||
18 | #include <asm/pgtable.h> | ||
19 | #include <asm/sigcontext.h> | ||
20 | #include <asm/ucontext.h> | ||
21 | #include <asm/vdso.h> | ||
22 | #ifdef CONFIG_PPC64 | ||
23 | #include "ppc32.h" | ||
24 | #endif | ||
25 | |||
26 | |||
27 | /* | ||
28 | * Is sp valid as the address of the next kernel stack frame after prev_sp? | ||
29 | * The next frame may be in a different stack area but should not go | ||
30 | * back down in the same stack area. | ||
31 | */ | ||
32 | static int valid_next_sp(unsigned long sp, unsigned long prev_sp) | ||
33 | { | ||
34 | if (sp & 0xf) | ||
35 | return 0; /* must be 16-byte aligned */ | ||
36 | if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) | ||
37 | return 0; | ||
38 | if (sp >= prev_sp + STACK_FRAME_OVERHEAD) | ||
39 | return 1; | ||
40 | /* | ||
41 | * sp could decrease when we jump off an interrupt stack | ||
42 | * back to the regular process stack. | ||
43 | */ | ||
44 | if ((sp & ~(THREAD_SIZE - 1)) != (prev_sp & ~(THREAD_SIZE - 1))) | ||
45 | return 1; | ||
46 | return 0; | ||
47 | } | ||
48 | |||
49 | void | ||
50 | perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) | ||
51 | { | ||
52 | unsigned long sp, next_sp; | ||
53 | unsigned long next_ip; | ||
54 | unsigned long lr; | ||
55 | long level = 0; | ||
56 | unsigned long *fp; | ||
57 | |||
58 | lr = regs->link; | ||
59 | sp = regs->gpr[1]; | ||
60 | perf_callchain_store(entry, regs->nip); | ||
61 | |||
62 | if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) | ||
63 | return; | ||
64 | |||
65 | for (;;) { | ||
66 | fp = (unsigned long *) sp; | ||
67 | next_sp = fp[0]; | ||
68 | |||
69 | if (next_sp == sp + STACK_INT_FRAME_SIZE && | ||
70 | fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { | ||
71 | /* | ||
72 | * This looks like an interrupt frame for an | ||
73 | * interrupt that occurred in the kernel | ||
74 | */ | ||
75 | regs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD); | ||
76 | next_ip = regs->nip; | ||
77 | lr = regs->link; | ||
78 | level = 0; | ||
79 | perf_callchain_store(entry, PERF_CONTEXT_KERNEL); | ||
80 | |||
81 | } else { | ||
82 | if (level == 0) | ||
83 | next_ip = lr; | ||
84 | else | ||
85 | next_ip = fp[STACK_FRAME_LR_SAVE]; | ||
86 | |||
87 | /* | ||
88 | * We can't tell which of the first two addresses | ||
89 | * we get are valid, but we can filter out the | ||
90 | * obviously bogus ones here. We replace them | ||
91 | * with 0 rather than removing them entirely so | ||
92 | * that userspace can tell which is which. | ||
93 | */ | ||
94 | if ((level == 1 && next_ip == lr) || | ||
95 | (level <= 1 && !kernel_text_address(next_ip))) | ||
96 | next_ip = 0; | ||
97 | |||
98 | ++level; | ||
99 | } | ||
100 | |||
101 | perf_callchain_store(entry, next_ip); | ||
102 | if (!valid_next_sp(next_sp, sp)) | ||
103 | return; | ||
104 | sp = next_sp; | ||
105 | } | ||
106 | } | ||
107 | |||
108 | #ifdef CONFIG_PPC64 | ||
109 | /* | ||
110 | * On 64-bit we don't want to invoke hash_page on user addresses from | ||
111 | * interrupt context, so if the access faults, we read the page tables | ||
112 | * to find which page (if any) is mapped and access it directly. | ||
113 | */ | ||
114 | static int read_user_stack_slow(void __user *ptr, void *ret, int nb) | ||
115 | { | ||
116 | pgd_t *pgdir; | ||
117 | pte_t *ptep, pte; | ||
118 | unsigned shift; | ||
119 | unsigned long addr = (unsigned long) ptr; | ||
120 | unsigned long offset; | ||
121 | unsigned long pfn; | ||
122 | void *kaddr; | ||
123 | |||
124 | pgdir = current->mm->pgd; | ||
125 | if (!pgdir) | ||
126 | return -EFAULT; | ||
127 | |||
128 | ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift); | ||
129 | if (!shift) | ||
130 | shift = PAGE_SHIFT; | ||
131 | |||
132 | /* align address to page boundary */ | ||
133 | offset = addr & ((1UL << shift) - 1); | ||
134 | addr -= offset; | ||
135 | |||
136 | if (ptep == NULL) | ||
137 | return -EFAULT; | ||
138 | pte = *ptep; | ||
139 | if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER)) | ||
140 | return -EFAULT; | ||
141 | pfn = pte_pfn(pte); | ||
142 | if (!page_is_ram(pfn)) | ||
143 | return -EFAULT; | ||
144 | |||
145 | /* no highmem to worry about here */ | ||
146 | kaddr = pfn_to_kaddr(pfn); | ||
147 | memcpy(ret, kaddr + offset, nb); | ||
148 | return 0; | ||
149 | } | ||
150 | |||
151 | static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) | ||
152 | { | ||
153 | if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) || | ||
154 | ((unsigned long)ptr & 7)) | ||
155 | return -EFAULT; | ||
156 | |||
157 | pagefault_disable(); | ||
158 | if (!__get_user_inatomic(*ret, ptr)) { | ||
159 | pagefault_enable(); | ||
160 | return 0; | ||
161 | } | ||
162 | pagefault_enable(); | ||
163 | |||
164 | return read_user_stack_slow(ptr, ret, 8); | ||
165 | } | ||
166 | |||
167 | static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) | ||
168 | { | ||
169 | if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || | ||
170 | ((unsigned long)ptr & 3)) | ||
171 | return -EFAULT; | ||
172 | |||
173 | pagefault_disable(); | ||
174 | if (!__get_user_inatomic(*ret, ptr)) { | ||
175 | pagefault_enable(); | ||
176 | return 0; | ||
177 | } | ||
178 | pagefault_enable(); | ||
179 | |||
180 | return read_user_stack_slow(ptr, ret, 4); | ||
181 | } | ||
182 | |||
183 | static inline int valid_user_sp(unsigned long sp, int is_64) | ||
184 | { | ||
185 | if (!sp || (sp & 7) || sp > (is_64 ? TASK_SIZE : 0x100000000UL) - 32) | ||
186 | return 0; | ||
187 | return 1; | ||
188 | } | ||
189 | |||
190 | /* | ||
191 | * 64-bit user processes use the same stack frame for RT and non-RT signals. | ||
192 | */ | ||
193 | struct signal_frame_64 { | ||
194 | char dummy[__SIGNAL_FRAMESIZE]; | ||
195 | struct ucontext uc; | ||
196 | unsigned long unused[2]; | ||
197 | unsigned int tramp[6]; | ||
198 | struct siginfo *pinfo; | ||
199 | void *puc; | ||
200 | struct siginfo info; | ||
201 | char abigap[288]; | ||
202 | }; | ||
203 | |||
204 | static int is_sigreturn_64_address(unsigned long nip, unsigned long fp) | ||
205 | { | ||
206 | if (nip == fp + offsetof(struct signal_frame_64, tramp)) | ||
207 | return 1; | ||
208 | if (vdso64_rt_sigtramp && current->mm->context.vdso_base && | ||
209 | nip == current->mm->context.vdso_base + vdso64_rt_sigtramp) | ||
210 | return 1; | ||
211 | return 0; | ||
212 | } | ||
213 | |||
214 | /* | ||
215 | * Do some sanity checking on the signal frame pointed to by sp. | ||
216 | * We check the pinfo and puc pointers in the frame. | ||
217 | */ | ||
218 | static int sane_signal_64_frame(unsigned long sp) | ||
219 | { | ||
220 | struct signal_frame_64 __user *sf; | ||
221 | unsigned long pinfo, puc; | ||
222 | |||
223 | sf = (struct signal_frame_64 __user *) sp; | ||
224 | if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) || | ||
225 | read_user_stack_64((unsigned long __user *) &sf->puc, &puc)) | ||
226 | return 0; | ||
227 | return pinfo == (unsigned long) &sf->info && | ||
228 | puc == (unsigned long) &sf->uc; | ||
229 | } | ||
230 | |||
231 | static void perf_callchain_user_64(struct perf_callchain_entry *entry, | ||
232 | struct pt_regs *regs) | ||
233 | { | ||
234 | unsigned long sp, next_sp; | ||
235 | unsigned long next_ip; | ||
236 | unsigned long lr; | ||
237 | long level = 0; | ||
238 | struct signal_frame_64 __user *sigframe; | ||
239 | unsigned long __user *fp, *uregs; | ||
240 | |||
241 | next_ip = regs->nip; | ||
242 | lr = regs->link; | ||
243 | sp = regs->gpr[1]; | ||
244 | perf_callchain_store(entry, next_ip); | ||
245 | |||
246 | for (;;) { | ||
247 | fp = (unsigned long __user *) sp; | ||
248 | if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp)) | ||
249 | return; | ||
250 | if (level > 0 && read_user_stack_64(&fp[2], &next_ip)) | ||
251 | return; | ||
252 | |||
253 | /* | ||
254 | * Note: the next_sp - sp >= signal frame size check | ||
255 | * is true when next_sp < sp, which can happen when | ||
256 | * transitioning from an alternate signal stack to the | ||
257 | * normal stack. | ||
258 | */ | ||
259 | if (next_sp - sp >= sizeof(struct signal_frame_64) && | ||
260 | (is_sigreturn_64_address(next_ip, sp) || | ||
261 | (level <= 1 && is_sigreturn_64_address(lr, sp))) && | ||
262 | sane_signal_64_frame(sp)) { | ||
263 | /* | ||
264 | * This looks like an signal frame | ||
265 | */ | ||
266 | sigframe = (struct signal_frame_64 __user *) sp; | ||
267 | uregs = sigframe->uc.uc_mcontext.gp_regs; | ||
268 | if (read_user_stack_64(&uregs[PT_NIP], &next_ip) || | ||
269 | read_user_stack_64(&uregs[PT_LNK], &lr) || | ||
270 | read_user_stack_64(&uregs[PT_R1], &sp)) | ||
271 | return; | ||
272 | level = 0; | ||
273 | perf_callchain_store(entry, PERF_CONTEXT_USER); | ||
274 | perf_callchain_store(entry, next_ip); | ||
275 | continue; | ||
276 | } | ||
277 | |||
278 | if (level == 0) | ||
279 | next_ip = lr; | ||
280 | perf_callchain_store(entry, next_ip); | ||
281 | ++level; | ||
282 | sp = next_sp; | ||
283 | } | ||
284 | } | ||
285 | |||
286 | static inline int current_is_64bit(void) | ||
287 | { | ||
288 | /* | ||
289 | * We can't use test_thread_flag() here because we may be on an | ||
290 | * interrupt stack, and the thread flags don't get copied over | ||
291 | * from the thread_info on the main stack to the interrupt stack. | ||
292 | */ | ||
293 | return !test_ti_thread_flag(task_thread_info(current), TIF_32BIT); | ||
294 | } | ||
295 | |||
296 | #else /* CONFIG_PPC64 */ | ||
297 | /* | ||
298 | * On 32-bit we just access the address and let hash_page create a | ||
299 | * HPTE if necessary, so there is no need to fall back to reading | ||
300 | * the page tables. Since this is called at interrupt level, | ||
301 | * do_page_fault() won't treat a DSI as a page fault. | ||
302 | */ | ||
303 | static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) | ||
304 | { | ||
305 | int rc; | ||
306 | |||
307 | if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || | ||
308 | ((unsigned long)ptr & 3)) | ||
309 | return -EFAULT; | ||
310 | |||
311 | pagefault_disable(); | ||
312 | rc = __get_user_inatomic(*ret, ptr); | ||
313 | pagefault_enable(); | ||
314 | |||
315 | return rc; | ||
316 | } | ||
317 | |||
318 | static inline void perf_callchain_user_64(struct perf_callchain_entry *entry, | ||
319 | struct pt_regs *regs) | ||
320 | { | ||
321 | } | ||
322 | |||
323 | static inline int current_is_64bit(void) | ||
324 | { | ||
325 | return 0; | ||
326 | } | ||
327 | |||
328 | static inline int valid_user_sp(unsigned long sp, int is_64) | ||
329 | { | ||
330 | if (!sp || (sp & 7) || sp > TASK_SIZE - 32) | ||
331 | return 0; | ||
332 | return 1; | ||
333 | } | ||
334 | |||
335 | #define __SIGNAL_FRAMESIZE32 __SIGNAL_FRAMESIZE | ||
336 | #define sigcontext32 sigcontext | ||
337 | #define mcontext32 mcontext | ||
338 | #define ucontext32 ucontext | ||
339 | #define compat_siginfo_t struct siginfo | ||
340 | |||
341 | #endif /* CONFIG_PPC64 */ | ||
342 | |||
343 | /* | ||
344 | * Layout for non-RT signal frames | ||
345 | */ | ||
346 | struct signal_frame_32 { | ||
347 | char dummy[__SIGNAL_FRAMESIZE32]; | ||
348 | struct sigcontext32 sctx; | ||
349 | struct mcontext32 mctx; | ||
350 | int abigap[56]; | ||
351 | }; | ||
352 | |||
353 | /* | ||
354 | * Layout for RT signal frames | ||
355 | */ | ||
356 | struct rt_signal_frame_32 { | ||
357 | char dummy[__SIGNAL_FRAMESIZE32 + 16]; | ||
358 | compat_siginfo_t info; | ||
359 | struct ucontext32 uc; | ||
360 | int abigap[56]; | ||
361 | }; | ||
362 | |||
363 | static int is_sigreturn_32_address(unsigned int nip, unsigned int fp) | ||
364 | { | ||
365 | if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad)) | ||
366 | return 1; | ||
367 | if (vdso32_sigtramp && current->mm->context.vdso_base && | ||
368 | nip == current->mm->context.vdso_base + vdso32_sigtramp) | ||
369 | return 1; | ||
370 | return 0; | ||
371 | } | ||
372 | |||
373 | static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp) | ||
374 | { | ||
375 | if (nip == fp + offsetof(struct rt_signal_frame_32, | ||
376 | uc.uc_mcontext.mc_pad)) | ||
377 | return 1; | ||
378 | if (vdso32_rt_sigtramp && current->mm->context.vdso_base && | ||
379 | nip == current->mm->context.vdso_base + vdso32_rt_sigtramp) | ||
380 | return 1; | ||
381 | return 0; | ||
382 | } | ||
383 | |||
384 | static int sane_signal_32_frame(unsigned int sp) | ||
385 | { | ||
386 | struct signal_frame_32 __user *sf; | ||
387 | unsigned int regs; | ||
388 | |||
389 | sf = (struct signal_frame_32 __user *) (unsigned long) sp; | ||
390 | if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, ®s)) | ||
391 | return 0; | ||
392 | return regs == (unsigned long) &sf->mctx; | ||
393 | } | ||
394 | |||
395 | static int sane_rt_signal_32_frame(unsigned int sp) | ||
396 | { | ||
397 | struct rt_signal_frame_32 __user *sf; | ||
398 | unsigned int regs; | ||
399 | |||
400 | sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp; | ||
401 | if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, ®s)) | ||
402 | return 0; | ||
403 | return regs == (unsigned long) &sf->uc.uc_mcontext; | ||
404 | } | ||
405 | |||
406 | static unsigned int __user *signal_frame_32_regs(unsigned int sp, | ||
407 | unsigned int next_sp, unsigned int next_ip) | ||
408 | { | ||
409 | struct mcontext32 __user *mctx = NULL; | ||
410 | struct signal_frame_32 __user *sf; | ||
411 | struct rt_signal_frame_32 __user *rt_sf; | ||
412 | |||
413 | /* | ||
414 | * Note: the next_sp - sp >= signal frame size check | ||
415 | * is true when next_sp < sp, for example, when | ||
416 | * transitioning from an alternate signal stack to the | ||
417 | * normal stack. | ||
418 | */ | ||
419 | if (next_sp - sp >= sizeof(struct signal_frame_32) && | ||
420 | is_sigreturn_32_address(next_ip, sp) && | ||
421 | sane_signal_32_frame(sp)) { | ||
422 | sf = (struct signal_frame_32 __user *) (unsigned long) sp; | ||
423 | mctx = &sf->mctx; | ||
424 | } | ||
425 | |||
426 | if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) && | ||
427 | is_rt_sigreturn_32_address(next_ip, sp) && | ||
428 | sane_rt_signal_32_frame(sp)) { | ||
429 | rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp; | ||
430 | mctx = &rt_sf->uc.uc_mcontext; | ||
431 | } | ||
432 | |||
433 | if (!mctx) | ||
434 | return NULL; | ||
435 | return mctx->mc_gregs; | ||
436 | } | ||
437 | |||
438 | static void perf_callchain_user_32(struct perf_callchain_entry *entry, | ||
439 | struct pt_regs *regs) | ||
440 | { | ||
441 | unsigned int sp, next_sp; | ||
442 | unsigned int next_ip; | ||
443 | unsigned int lr; | ||
444 | long level = 0; | ||
445 | unsigned int __user *fp, *uregs; | ||
446 | |||
447 | next_ip = regs->nip; | ||
448 | lr = regs->link; | ||
449 | sp = regs->gpr[1]; | ||
450 | perf_callchain_store(entry, next_ip); | ||
451 | |||
452 | while (entry->nr < PERF_MAX_STACK_DEPTH) { | ||
453 | fp = (unsigned int __user *) (unsigned long) sp; | ||
454 | if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp)) | ||
455 | return; | ||
456 | if (level > 0 && read_user_stack_32(&fp[1], &next_ip)) | ||
457 | return; | ||
458 | |||
459 | uregs = signal_frame_32_regs(sp, next_sp, next_ip); | ||
460 | if (!uregs && level <= 1) | ||
461 | uregs = signal_frame_32_regs(sp, next_sp, lr); | ||
462 | if (uregs) { | ||
463 | /* | ||
464 | * This looks like an signal frame, so restart | ||
465 | * the stack trace with the values in it. | ||
466 | */ | ||
467 | if (read_user_stack_32(&uregs[PT_NIP], &next_ip) || | ||
468 | read_user_stack_32(&uregs[PT_LNK], &lr) || | ||
469 | read_user_stack_32(&uregs[PT_R1], &sp)) | ||
470 | return; | ||
471 | level = 0; | ||
472 | perf_callchain_store(entry, PERF_CONTEXT_USER); | ||
473 | perf_callchain_store(entry, next_ip); | ||
474 | continue; | ||
475 | } | ||
476 | |||
477 | if (level == 0) | ||
478 | next_ip = lr; | ||
479 | perf_callchain_store(entry, next_ip); | ||
480 | ++level; | ||
481 | sp = next_sp; | ||
482 | } | ||
483 | } | ||
484 | |||
485 | void | ||
486 | perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) | ||
487 | { | ||
488 | if (current_is_64bit()) | ||
489 | perf_callchain_user_64(entry, regs); | ||
490 | else | ||
491 | perf_callchain_user_32(entry, regs); | ||
492 | } | ||
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c deleted file mode 100644 index c2e27ede07ec..000000000000 --- a/arch/powerpc/kernel/perf_event.c +++ /dev/null | |||
@@ -1,1448 +0,0 @@ | |||
1 | /* | ||
2 | * Performance event support - powerpc architecture code | ||
3 | * | ||
4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/sched.h> | ||
13 | #include <linux/perf_event.h> | ||
14 | #include <linux/percpu.h> | ||
15 | #include <linux/hardirq.h> | ||
16 | #include <asm/reg.h> | ||
17 | #include <asm/pmc.h> | ||
18 | #include <asm/machdep.h> | ||
19 | #include <asm/firmware.h> | ||
20 | #include <asm/ptrace.h> | ||
21 | |||
22 | struct cpu_hw_events { | ||
23 | int n_events; | ||
24 | int n_percpu; | ||
25 | int disabled; | ||
26 | int n_added; | ||
27 | int n_limited; | ||
28 | u8 pmcs_enabled; | ||
29 | struct perf_event *event[MAX_HWEVENTS]; | ||
30 | u64 events[MAX_HWEVENTS]; | ||
31 | unsigned int flags[MAX_HWEVENTS]; | ||
32 | unsigned long mmcr[3]; | ||
33 | struct perf_event *limited_counter[MAX_LIMITED_HWCOUNTERS]; | ||
34 | u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; | ||
35 | u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; | ||
36 | unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; | ||
37 | unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; | ||
38 | |||
39 | unsigned int group_flag; | ||
40 | int n_txn_start; | ||
41 | }; | ||
42 | DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); | ||
43 | |||
44 | struct power_pmu *ppmu; | ||
45 | |||
46 | /* | ||
47 | * Normally, to ignore kernel events we set the FCS (freeze counters | ||
48 | * in supervisor mode) bit in MMCR0, but if the kernel runs with the | ||
49 | * hypervisor bit set in the MSR, or if we are running on a processor | ||
50 | * where the hypervisor bit is forced to 1 (as on Apple G5 processors), | ||
51 | * then we need to use the FCHV bit to ignore kernel events. | ||
52 | */ | ||
53 | static unsigned int freeze_events_kernel = MMCR0_FCS; | ||
54 | |||
55 | /* | ||
56 | * 32-bit doesn't have MMCRA but does have an MMCR2, | ||
57 | * and a few other names are different. | ||
58 | */ | ||
59 | #ifdef CONFIG_PPC32 | ||
60 | |||
61 | #define MMCR0_FCHV 0 | ||
62 | #define MMCR0_PMCjCE MMCR0_PMCnCE | ||
63 | |||
64 | #define SPRN_MMCRA SPRN_MMCR2 | ||
65 | #define MMCRA_SAMPLE_ENABLE 0 | ||
66 | |||
67 | static inline unsigned long perf_ip_adjust(struct pt_regs *regs) | ||
68 | { | ||
69 | return 0; | ||
70 | } | ||
71 | static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { } | ||
72 | static inline u32 perf_get_misc_flags(struct pt_regs *regs) | ||
73 | { | ||
74 | return 0; | ||
75 | } | ||
76 | static inline void perf_read_regs(struct pt_regs *regs) { } | ||
77 | static inline int perf_intr_is_nmi(struct pt_regs *regs) | ||
78 | { | ||
79 | return 0; | ||
80 | } | ||
81 | |||
82 | #endif /* CONFIG_PPC32 */ | ||
83 | |||
84 | /* | ||
85 | * Things that are specific to 64-bit implementations. | ||
86 | */ | ||
87 | #ifdef CONFIG_PPC64 | ||
88 | |||
89 | static inline unsigned long perf_ip_adjust(struct pt_regs *regs) | ||
90 | { | ||
91 | unsigned long mmcra = regs->dsisr; | ||
92 | |||
93 | if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) { | ||
94 | unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT; | ||
95 | if (slot > 1) | ||
96 | return 4 * (slot - 1); | ||
97 | } | ||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | /* | ||
102 | * The user wants a data address recorded. | ||
103 | * If we're not doing instruction sampling, give them the SDAR | ||
104 | * (sampled data address). If we are doing instruction sampling, then | ||
105 | * only give them the SDAR if it corresponds to the instruction | ||
106 | * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC | ||
107 | * bit in MMCRA. | ||
108 | */ | ||
109 | static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) | ||
110 | { | ||
111 | unsigned long mmcra = regs->dsisr; | ||
112 | unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ? | ||
113 | POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC; | ||
114 | |||
115 | if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) | ||
116 | *addrp = mfspr(SPRN_SDAR); | ||
117 | } | ||
118 | |||
119 | static inline u32 perf_get_misc_flags(struct pt_regs *regs) | ||
120 | { | ||
121 | unsigned long mmcra = regs->dsisr; | ||
122 | unsigned long sihv = MMCRA_SIHV; | ||
123 | unsigned long sipr = MMCRA_SIPR; | ||
124 | |||
125 | if (TRAP(regs) != 0xf00) | ||
126 | return 0; /* not a PMU interrupt */ | ||
127 | |||
128 | if (ppmu->flags & PPMU_ALT_SIPR) { | ||
129 | sihv = POWER6_MMCRA_SIHV; | ||
130 | sipr = POWER6_MMCRA_SIPR; | ||
131 | } | ||
132 | |||
133 | /* PR has priority over HV, so order below is important */ | ||
134 | if (mmcra & sipr) | ||
135 | return PERF_RECORD_MISC_USER; | ||
136 | if ((mmcra & sihv) && (freeze_events_kernel != MMCR0_FCHV)) | ||
137 | return PERF_RECORD_MISC_HYPERVISOR; | ||
138 | return PERF_RECORD_MISC_KERNEL; | ||
139 | } | ||
140 | |||
141 | /* | ||
142 | * Overload regs->dsisr to store MMCRA so we only need to read it once | ||
143 | * on each interrupt. | ||
144 | */ | ||
145 | static inline void perf_read_regs(struct pt_regs *regs) | ||
146 | { | ||
147 | regs->dsisr = mfspr(SPRN_MMCRA); | ||
148 | } | ||
149 | |||
150 | /* | ||
151 | * If interrupts were soft-disabled when a PMU interrupt occurs, treat | ||
152 | * it as an NMI. | ||
153 | */ | ||
154 | static inline int perf_intr_is_nmi(struct pt_regs *regs) | ||
155 | { | ||
156 | return !regs->softe; | ||
157 | } | ||
158 | |||
159 | #endif /* CONFIG_PPC64 */ | ||
160 | |||
161 | static void perf_event_interrupt(struct pt_regs *regs); | ||
162 | |||
163 | void perf_event_print_debug(void) | ||
164 | { | ||
165 | } | ||
166 | |||
167 | /* | ||
168 | * Read one performance monitor counter (PMC). | ||
169 | */ | ||
170 | static unsigned long read_pmc(int idx) | ||
171 | { | ||
172 | unsigned long val; | ||
173 | |||
174 | switch (idx) { | ||
175 | case 1: | ||
176 | val = mfspr(SPRN_PMC1); | ||
177 | break; | ||
178 | case 2: | ||
179 | val = mfspr(SPRN_PMC2); | ||
180 | break; | ||
181 | case 3: | ||
182 | val = mfspr(SPRN_PMC3); | ||
183 | break; | ||
184 | case 4: | ||
185 | val = mfspr(SPRN_PMC4); | ||
186 | break; | ||
187 | case 5: | ||
188 | val = mfspr(SPRN_PMC5); | ||
189 | break; | ||
190 | case 6: | ||
191 | val = mfspr(SPRN_PMC6); | ||
192 | break; | ||
193 | #ifdef CONFIG_PPC64 | ||
194 | case 7: | ||
195 | val = mfspr(SPRN_PMC7); | ||
196 | break; | ||
197 | case 8: | ||
198 | val = mfspr(SPRN_PMC8); | ||
199 | break; | ||
200 | #endif /* CONFIG_PPC64 */ | ||
201 | default: | ||
202 | printk(KERN_ERR "oops trying to read PMC%d\n", idx); | ||
203 | val = 0; | ||
204 | } | ||
205 | return val; | ||
206 | } | ||
207 | |||
208 | /* | ||
209 | * Write one PMC. | ||
210 | */ | ||
211 | static void write_pmc(int idx, unsigned long val) | ||
212 | { | ||
213 | switch (idx) { | ||
214 | case 1: | ||
215 | mtspr(SPRN_PMC1, val); | ||
216 | break; | ||
217 | case 2: | ||
218 | mtspr(SPRN_PMC2, val); | ||
219 | break; | ||
220 | case 3: | ||
221 | mtspr(SPRN_PMC3, val); | ||
222 | break; | ||
223 | case 4: | ||
224 | mtspr(SPRN_PMC4, val); | ||
225 | break; | ||
226 | case 5: | ||
227 | mtspr(SPRN_PMC5, val); | ||
228 | break; | ||
229 | case 6: | ||
230 | mtspr(SPRN_PMC6, val); | ||
231 | break; | ||
232 | #ifdef CONFIG_PPC64 | ||
233 | case 7: | ||
234 | mtspr(SPRN_PMC7, val); | ||
235 | break; | ||
236 | case 8: | ||
237 | mtspr(SPRN_PMC8, val); | ||
238 | break; | ||
239 | #endif /* CONFIG_PPC64 */ | ||
240 | default: | ||
241 | printk(KERN_ERR "oops trying to write PMC%d\n", idx); | ||
242 | } | ||
243 | } | ||
244 | |||
245 | /* | ||
246 | * Check if a set of events can all go on the PMU at once. | ||
247 | * If they can't, this will look at alternative codes for the events | ||
248 | * and see if any combination of alternative codes is feasible. | ||
249 | * The feasible set is returned in event_id[]. | ||
250 | */ | ||
251 | static int power_check_constraints(struct cpu_hw_events *cpuhw, | ||
252 | u64 event_id[], unsigned int cflags[], | ||
253 | int n_ev) | ||
254 | { | ||
255 | unsigned long mask, value, nv; | ||
256 | unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS]; | ||
257 | int n_alt[MAX_HWEVENTS], choice[MAX_HWEVENTS]; | ||
258 | int i, j; | ||
259 | unsigned long addf = ppmu->add_fields; | ||
260 | unsigned long tadd = ppmu->test_adder; | ||
261 | |||
262 | if (n_ev > ppmu->n_counter) | ||
263 | return -1; | ||
264 | |||
265 | /* First see if the events will go on as-is */ | ||
266 | for (i = 0; i < n_ev; ++i) { | ||
267 | if ((cflags[i] & PPMU_LIMITED_PMC_REQD) | ||
268 | && !ppmu->limited_pmc_event(event_id[i])) { | ||
269 | ppmu->get_alternatives(event_id[i], cflags[i], | ||
270 | cpuhw->alternatives[i]); | ||
271 | event_id[i] = cpuhw->alternatives[i][0]; | ||
272 | } | ||
273 | if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0], | ||
274 | &cpuhw->avalues[i][0])) | ||
275 | return -1; | ||
276 | } | ||
277 | value = mask = 0; | ||
278 | for (i = 0; i < n_ev; ++i) { | ||
279 | nv = (value | cpuhw->avalues[i][0]) + | ||
280 | (value & cpuhw->avalues[i][0] & addf); | ||
281 | if ((((nv + tadd) ^ value) & mask) != 0 || | ||
282 | (((nv + tadd) ^ cpuhw->avalues[i][0]) & | ||
283 | cpuhw->amasks[i][0]) != 0) | ||
284 | break; | ||
285 | value = nv; | ||
286 | mask |= cpuhw->amasks[i][0]; | ||
287 | } | ||
288 | if (i == n_ev) | ||
289 | return 0; /* all OK */ | ||
290 | |||
291 | /* doesn't work, gather alternatives... */ | ||
292 | if (!ppmu->get_alternatives) | ||
293 | return -1; | ||
294 | for (i = 0; i < n_ev; ++i) { | ||
295 | choice[i] = 0; | ||
296 | n_alt[i] = ppmu->get_alternatives(event_id[i], cflags[i], | ||
297 | cpuhw->alternatives[i]); | ||
298 | for (j = 1; j < n_alt[i]; ++j) | ||
299 | ppmu->get_constraint(cpuhw->alternatives[i][j], | ||
300 | &cpuhw->amasks[i][j], | ||
301 | &cpuhw->avalues[i][j]); | ||
302 | } | ||
303 | |||
304 | /* enumerate all possibilities and see if any will work */ | ||
305 | i = 0; | ||
306 | j = -1; | ||
307 | value = mask = nv = 0; | ||
308 | while (i < n_ev) { | ||
309 | if (j >= 0) { | ||
310 | /* we're backtracking, restore context */ | ||
311 | value = svalues[i]; | ||
312 | mask = smasks[i]; | ||
313 | j = choice[i]; | ||
314 | } | ||
315 | /* | ||
316 | * See if any alternative k for event_id i, | ||
317 | * where k > j, will satisfy the constraints. | ||
318 | */ | ||
319 | while (++j < n_alt[i]) { | ||
320 | nv = (value | cpuhw->avalues[i][j]) + | ||
321 | (value & cpuhw->avalues[i][j] & addf); | ||
322 | if ((((nv + tadd) ^ value) & mask) == 0 && | ||
323 | (((nv + tadd) ^ cpuhw->avalues[i][j]) | ||
324 | & cpuhw->amasks[i][j]) == 0) | ||
325 | break; | ||
326 | } | ||
327 | if (j >= n_alt[i]) { | ||
328 | /* | ||
329 | * No feasible alternative, backtrack | ||
330 | * to event_id i-1 and continue enumerating its | ||
331 | * alternatives from where we got up to. | ||
332 | */ | ||
333 | if (--i < 0) | ||
334 | return -1; | ||
335 | } else { | ||
336 | /* | ||
337 | * Found a feasible alternative for event_id i, | ||
338 | * remember where we got up to with this event_id, | ||
339 | * go on to the next event_id, and start with | ||
340 | * the first alternative for it. | ||
341 | */ | ||
342 | choice[i] = j; | ||
343 | svalues[i] = value; | ||
344 | smasks[i] = mask; | ||
345 | value = nv; | ||
346 | mask |= cpuhw->amasks[i][j]; | ||
347 | ++i; | ||
348 | j = -1; | ||
349 | } | ||
350 | } | ||
351 | |||
352 | /* OK, we have a feasible combination, tell the caller the solution */ | ||
353 | for (i = 0; i < n_ev; ++i) | ||
354 | event_id[i] = cpuhw->alternatives[i][choice[i]]; | ||
355 | return 0; | ||
356 | } | ||
357 | |||
358 | /* | ||
359 | * Check if newly-added events have consistent settings for | ||
360 | * exclude_{user,kernel,hv} with each other and any previously | ||
361 | * added events. | ||
362 | */ | ||
363 | static int check_excludes(struct perf_event **ctrs, unsigned int cflags[], | ||
364 | int n_prev, int n_new) | ||
365 | { | ||
366 | int eu = 0, ek = 0, eh = 0; | ||
367 | int i, n, first; | ||
368 | struct perf_event *event; | ||
369 | |||
370 | n = n_prev + n_new; | ||
371 | if (n <= 1) | ||
372 | return 0; | ||
373 | |||
374 | first = 1; | ||
375 | for (i = 0; i < n; ++i) { | ||
376 | if (cflags[i] & PPMU_LIMITED_PMC_OK) { | ||
377 | cflags[i] &= ~PPMU_LIMITED_PMC_REQD; | ||
378 | continue; | ||
379 | } | ||
380 | event = ctrs[i]; | ||
381 | if (first) { | ||
382 | eu = event->attr.exclude_user; | ||
383 | ek = event->attr.exclude_kernel; | ||
384 | eh = event->attr.exclude_hv; | ||
385 | first = 0; | ||
386 | } else if (event->attr.exclude_user != eu || | ||
387 | event->attr.exclude_kernel != ek || | ||
388 | event->attr.exclude_hv != eh) { | ||
389 | return -EAGAIN; | ||
390 | } | ||
391 | } | ||
392 | |||
393 | if (eu || ek || eh) | ||
394 | for (i = 0; i < n; ++i) | ||
395 | if (cflags[i] & PPMU_LIMITED_PMC_OK) | ||
396 | cflags[i] |= PPMU_LIMITED_PMC_REQD; | ||
397 | |||
398 | return 0; | ||
399 | } | ||
400 | |||
401 | static u64 check_and_compute_delta(u64 prev, u64 val) | ||
402 | { | ||
403 | u64 delta = (val - prev) & 0xfffffffful; | ||
404 | |||
405 | /* | ||
406 | * POWER7 can roll back counter values, if the new value is smaller | ||
407 | * than the previous value it will cause the delta and the counter to | ||
408 | * have bogus values unless we rolled a counter over. If a coutner is | ||
409 | * rolled back, it will be smaller, but within 256, which is the maximum | ||
410 | * number of events to rollback at once. If we dectect a rollback | ||
411 | * return 0. This can lead to a small lack of precision in the | ||
412 | * counters. | ||
413 | */ | ||
414 | if (prev > val && (prev - val) < 256) | ||
415 | delta = 0; | ||
416 | |||
417 | return delta; | ||
418 | } | ||
419 | |||
420 | static void power_pmu_read(struct perf_event *event) | ||
421 | { | ||
422 | s64 val, delta, prev; | ||
423 | |||
424 | if (event->hw.state & PERF_HES_STOPPED) | ||
425 | return; | ||
426 | |||
427 | if (!event->hw.idx) | ||
428 | return; | ||
429 | /* | ||
430 | * Performance monitor interrupts come even when interrupts | ||
431 | * are soft-disabled, as long as interrupts are hard-enabled. | ||
432 | * Therefore we treat them like NMIs. | ||
433 | */ | ||
434 | do { | ||
435 | prev = local64_read(&event->hw.prev_count); | ||
436 | barrier(); | ||
437 | val = read_pmc(event->hw.idx); | ||
438 | delta = check_and_compute_delta(prev, val); | ||
439 | if (!delta) | ||
440 | return; | ||
441 | } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev); | ||
442 | |||
443 | local64_add(delta, &event->count); | ||
444 | local64_sub(delta, &event->hw.period_left); | ||
445 | } | ||
446 | |||
447 | /* | ||
448 | * On some machines, PMC5 and PMC6 can't be written, don't respect | ||
449 | * the freeze conditions, and don't generate interrupts. This tells | ||
450 | * us if `event' is using such a PMC. | ||
451 | */ | ||
452 | static int is_limited_pmc(int pmcnum) | ||
453 | { | ||
454 | return (ppmu->flags & PPMU_LIMITED_PMC5_6) | ||
455 | && (pmcnum == 5 || pmcnum == 6); | ||
456 | } | ||
457 | |||
458 | static void freeze_limited_counters(struct cpu_hw_events *cpuhw, | ||
459 | unsigned long pmc5, unsigned long pmc6) | ||
460 | { | ||
461 | struct perf_event *event; | ||
462 | u64 val, prev, delta; | ||
463 | int i; | ||
464 | |||
465 | for (i = 0; i < cpuhw->n_limited; ++i) { | ||
466 | event = cpuhw->limited_counter[i]; | ||
467 | if (!event->hw.idx) | ||
468 | continue; | ||
469 | val = (event->hw.idx == 5) ? pmc5 : pmc6; | ||
470 | prev = local64_read(&event->hw.prev_count); | ||
471 | event->hw.idx = 0; | ||
472 | delta = check_and_compute_delta(prev, val); | ||
473 | if (delta) | ||
474 | local64_add(delta, &event->count); | ||
475 | } | ||
476 | } | ||
477 | |||
478 | static void thaw_limited_counters(struct cpu_hw_events *cpuhw, | ||
479 | unsigned long pmc5, unsigned long pmc6) | ||
480 | { | ||
481 | struct perf_event *event; | ||
482 | u64 val, prev; | ||
483 | int i; | ||
484 | |||
485 | for (i = 0; i < cpuhw->n_limited; ++i) { | ||
486 | event = cpuhw->limited_counter[i]; | ||
487 | event->hw.idx = cpuhw->limited_hwidx[i]; | ||
488 | val = (event->hw.idx == 5) ? pmc5 : pmc6; | ||
489 | prev = local64_read(&event->hw.prev_count); | ||
490 | if (check_and_compute_delta(prev, val)) | ||
491 | local64_set(&event->hw.prev_count, val); | ||
492 | perf_event_update_userpage(event); | ||
493 | } | ||
494 | } | ||
495 | |||
496 | /* | ||
497 | * Since limited events don't respect the freeze conditions, we | ||
498 | * have to read them immediately after freezing or unfreezing the | ||
499 | * other events. We try to keep the values from the limited | ||
500 | * events as consistent as possible by keeping the delay (in | ||
501 | * cycles and instructions) between freezing/unfreezing and reading | ||
502 | * the limited events as small and consistent as possible. | ||
503 | * Therefore, if any limited events are in use, we read them | ||
504 | * both, and always in the same order, to minimize variability, | ||
505 | * and do it inside the same asm that writes MMCR0. | ||
506 | */ | ||
507 | static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) | ||
508 | { | ||
509 | unsigned long pmc5, pmc6; | ||
510 | |||
511 | if (!cpuhw->n_limited) { | ||
512 | mtspr(SPRN_MMCR0, mmcr0); | ||
513 | return; | ||
514 | } | ||
515 | |||
516 | /* | ||
517 | * Write MMCR0, then read PMC5 and PMC6 immediately. | ||
518 | * To ensure we don't get a performance monitor interrupt | ||
519 | * between writing MMCR0 and freezing/thawing the limited | ||
520 | * events, we first write MMCR0 with the event overflow | ||
521 | * interrupt enable bits turned off. | ||
522 | */ | ||
523 | asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5" | ||
524 | : "=&r" (pmc5), "=&r" (pmc6) | ||
525 | : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)), | ||
526 | "i" (SPRN_MMCR0), | ||
527 | "i" (SPRN_PMC5), "i" (SPRN_PMC6)); | ||
528 | |||
529 | if (mmcr0 & MMCR0_FC) | ||
530 | freeze_limited_counters(cpuhw, pmc5, pmc6); | ||
531 | else | ||
532 | thaw_limited_counters(cpuhw, pmc5, pmc6); | ||
533 | |||
534 | /* | ||
535 | * Write the full MMCR0 including the event overflow interrupt | ||
536 | * enable bits, if necessary. | ||
537 | */ | ||
538 | if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE)) | ||
539 | mtspr(SPRN_MMCR0, mmcr0); | ||
540 | } | ||
541 | |||
542 | /* | ||
543 | * Disable all events to prevent PMU interrupts and to allow | ||
544 | * events to be added or removed. | ||
545 | */ | ||
546 | static void power_pmu_disable(struct pmu *pmu) | ||
547 | { | ||
548 | struct cpu_hw_events *cpuhw; | ||
549 | unsigned long flags; | ||
550 | |||
551 | if (!ppmu) | ||
552 | return; | ||
553 | local_irq_save(flags); | ||
554 | cpuhw = &__get_cpu_var(cpu_hw_events); | ||
555 | |||
556 | if (!cpuhw->disabled) { | ||
557 | cpuhw->disabled = 1; | ||
558 | cpuhw->n_added = 0; | ||
559 | |||
560 | /* | ||
561 | * Check if we ever enabled the PMU on this cpu. | ||
562 | */ | ||
563 | if (!cpuhw->pmcs_enabled) { | ||
564 | ppc_enable_pmcs(); | ||
565 | cpuhw->pmcs_enabled = 1; | ||
566 | } | ||
567 | |||
568 | /* | ||
569 | * Disable instruction sampling if it was enabled | ||
570 | */ | ||
571 | if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { | ||
572 | mtspr(SPRN_MMCRA, | ||
573 | cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); | ||
574 | mb(); | ||
575 | } | ||
576 | |||
577 | /* | ||
578 | * Set the 'freeze counters' bit. | ||
579 | * The barrier is to make sure the mtspr has been | ||
580 | * executed and the PMU has frozen the events | ||
581 | * before we return. | ||
582 | */ | ||
583 | write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC); | ||
584 | mb(); | ||
585 | } | ||
586 | local_irq_restore(flags); | ||
587 | } | ||
588 | |||
589 | /* | ||
590 | * Re-enable all events if disable == 0. | ||
591 | * If we were previously disabled and events were added, then | ||
592 | * put the new config on the PMU. | ||
593 | */ | ||
594 | static void power_pmu_enable(struct pmu *pmu) | ||
595 | { | ||
596 | struct perf_event *event; | ||
597 | struct cpu_hw_events *cpuhw; | ||
598 | unsigned long flags; | ||
599 | long i; | ||
600 | unsigned long val; | ||
601 | s64 left; | ||
602 | unsigned int hwc_index[MAX_HWEVENTS]; | ||
603 | int n_lim; | ||
604 | int idx; | ||
605 | |||
606 | if (!ppmu) | ||
607 | return; | ||
608 | local_irq_save(flags); | ||
609 | cpuhw = &__get_cpu_var(cpu_hw_events); | ||
610 | if (!cpuhw->disabled) { | ||
611 | local_irq_restore(flags); | ||
612 | return; | ||
613 | } | ||
614 | cpuhw->disabled = 0; | ||
615 | |||
616 | /* | ||
617 | * If we didn't change anything, or only removed events, | ||
618 | * no need to recalculate MMCR* settings and reset the PMCs. | ||
619 | * Just reenable the PMU with the current MMCR* settings | ||
620 | * (possibly updated for removal of events). | ||
621 | */ | ||
622 | if (!cpuhw->n_added) { | ||
623 | mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); | ||
624 | mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); | ||
625 | if (cpuhw->n_events == 0) | ||
626 | ppc_set_pmu_inuse(0); | ||
627 | goto out_enable; | ||
628 | } | ||
629 | |||
630 | /* | ||
631 | * Compute MMCR* values for the new set of events | ||
632 | */ | ||
633 | if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index, | ||
634 | cpuhw->mmcr)) { | ||
635 | /* shouldn't ever get here */ | ||
636 | printk(KERN_ERR "oops compute_mmcr failed\n"); | ||
637 | goto out; | ||
638 | } | ||
639 | |||
640 | /* | ||
641 | * Add in MMCR0 freeze bits corresponding to the | ||
642 | * attr.exclude_* bits for the first event. | ||
643 | * We have already checked that all events have the | ||
644 | * same values for these bits as the first event. | ||
645 | */ | ||
646 | event = cpuhw->event[0]; | ||
647 | if (event->attr.exclude_user) | ||
648 | cpuhw->mmcr[0] |= MMCR0_FCP; | ||
649 | if (event->attr.exclude_kernel) | ||
650 | cpuhw->mmcr[0] |= freeze_events_kernel; | ||
651 | if (event->attr.exclude_hv) | ||
652 | cpuhw->mmcr[0] |= MMCR0_FCHV; | ||
653 | |||
654 | /* | ||
655 | * Write the new configuration to MMCR* with the freeze | ||
656 | * bit set and set the hardware events to their initial values. | ||
657 | * Then unfreeze the events. | ||
658 | */ | ||
659 | ppc_set_pmu_inuse(1); | ||
660 | mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); | ||
661 | mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); | ||
662 | mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) | ||
663 | | MMCR0_FC); | ||
664 | |||
665 | /* | ||
666 | * Read off any pre-existing events that need to move | ||
667 | * to another PMC. | ||
668 | */ | ||
669 | for (i = 0; i < cpuhw->n_events; ++i) { | ||
670 | event = cpuhw->event[i]; | ||
671 | if (event->hw.idx && event->hw.idx != hwc_index[i] + 1) { | ||
672 | power_pmu_read(event); | ||
673 | write_pmc(event->hw.idx, 0); | ||
674 | event->hw.idx = 0; | ||
675 | } | ||
676 | } | ||
677 | |||
678 | /* | ||
679 | * Initialize the PMCs for all the new and moved events. | ||
680 | */ | ||
681 | cpuhw->n_limited = n_lim = 0; | ||
682 | for (i = 0; i < cpuhw->n_events; ++i) { | ||
683 | event = cpuhw->event[i]; | ||
684 | if (event->hw.idx) | ||
685 | continue; | ||
686 | idx = hwc_index[i] + 1; | ||
687 | if (is_limited_pmc(idx)) { | ||
688 | cpuhw->limited_counter[n_lim] = event; | ||
689 | cpuhw->limited_hwidx[n_lim] = idx; | ||
690 | ++n_lim; | ||
691 | continue; | ||
692 | } | ||
693 | val = 0; | ||
694 | if (event->hw.sample_period) { | ||
695 | left = local64_read(&event->hw.period_left); | ||
696 | if (left < 0x80000000L) | ||
697 | val = 0x80000000L - left; | ||
698 | } | ||
699 | local64_set(&event->hw.prev_count, val); | ||
700 | event->hw.idx = idx; | ||
701 | if (event->hw.state & PERF_HES_STOPPED) | ||
702 | val = 0; | ||
703 | write_pmc(idx, val); | ||
704 | perf_event_update_userpage(event); | ||
705 | } | ||
706 | cpuhw->n_limited = n_lim; | ||
707 | cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; | ||
708 | |||
709 | out_enable: | ||
710 | mb(); | ||
711 | write_mmcr0(cpuhw, cpuhw->mmcr[0]); | ||
712 | |||
713 | /* | ||
714 | * Enable instruction sampling if necessary | ||
715 | */ | ||
716 | if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { | ||
717 | mb(); | ||
718 | mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); | ||
719 | } | ||
720 | |||
721 | out: | ||
722 | local_irq_restore(flags); | ||
723 | } | ||
724 | |||
725 | static int collect_events(struct perf_event *group, int max_count, | ||
726 | struct perf_event *ctrs[], u64 *events, | ||
727 | unsigned int *flags) | ||
728 | { | ||
729 | int n = 0; | ||
730 | struct perf_event *event; | ||
731 | |||
732 | if (!is_software_event(group)) { | ||
733 | if (n >= max_count) | ||
734 | return -1; | ||
735 | ctrs[n] = group; | ||
736 | flags[n] = group->hw.event_base; | ||
737 | events[n++] = group->hw.config; | ||
738 | } | ||
739 | list_for_each_entry(event, &group->sibling_list, group_entry) { | ||
740 | if (!is_software_event(event) && | ||
741 | event->state != PERF_EVENT_STATE_OFF) { | ||
742 | if (n >= max_count) | ||
743 | return -1; | ||
744 | ctrs[n] = event; | ||
745 | flags[n] = event->hw.event_base; | ||
746 | events[n++] = event->hw.config; | ||
747 | } | ||
748 | } | ||
749 | return n; | ||
750 | } | ||
751 | |||
752 | /* | ||
753 | * Add a event to the PMU. | ||
754 | * If all events are not already frozen, then we disable and | ||
755 | * re-enable the PMU in order to get hw_perf_enable to do the | ||
756 | * actual work of reconfiguring the PMU. | ||
757 | */ | ||
758 | static int power_pmu_add(struct perf_event *event, int ef_flags) | ||
759 | { | ||
760 | struct cpu_hw_events *cpuhw; | ||
761 | unsigned long flags; | ||
762 | int n0; | ||
763 | int ret = -EAGAIN; | ||
764 | |||
765 | local_irq_save(flags); | ||
766 | perf_pmu_disable(event->pmu); | ||
767 | |||
768 | /* | ||
769 | * Add the event to the list (if there is room) | ||
770 | * and check whether the total set is still feasible. | ||
771 | */ | ||
772 | cpuhw = &__get_cpu_var(cpu_hw_events); | ||
773 | n0 = cpuhw->n_events; | ||
774 | if (n0 >= ppmu->n_counter) | ||
775 | goto out; | ||
776 | cpuhw->event[n0] = event; | ||
777 | cpuhw->events[n0] = event->hw.config; | ||
778 | cpuhw->flags[n0] = event->hw.event_base; | ||
779 | |||
780 | if (!(ef_flags & PERF_EF_START)) | ||
781 | event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
782 | |||
783 | /* | ||
784 | * If group events scheduling transaction was started, | ||
785 | * skip the schedulability test here, it will be performed | ||
786 | * at commit time(->commit_txn) as a whole | ||
787 | */ | ||
788 | if (cpuhw->group_flag & PERF_EVENT_TXN) | ||
789 | goto nocheck; | ||
790 | |||
791 | if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) | ||
792 | goto out; | ||
793 | if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1)) | ||
794 | goto out; | ||
795 | event->hw.config = cpuhw->events[n0]; | ||
796 | |||
797 | nocheck: | ||
798 | ++cpuhw->n_events; | ||
799 | ++cpuhw->n_added; | ||
800 | |||
801 | ret = 0; | ||
802 | out: | ||
803 | perf_pmu_enable(event->pmu); | ||
804 | local_irq_restore(flags); | ||
805 | return ret; | ||
806 | } | ||
807 | |||
808 | /* | ||
809 | * Remove a event from the PMU. | ||
810 | */ | ||
811 | static void power_pmu_del(struct perf_event *event, int ef_flags) | ||
812 | { | ||
813 | struct cpu_hw_events *cpuhw; | ||
814 | long i; | ||
815 | unsigned long flags; | ||
816 | |||
817 | local_irq_save(flags); | ||
818 | perf_pmu_disable(event->pmu); | ||
819 | |||
820 | power_pmu_read(event); | ||
821 | |||
822 | cpuhw = &__get_cpu_var(cpu_hw_events); | ||
823 | for (i = 0; i < cpuhw->n_events; ++i) { | ||
824 | if (event == cpuhw->event[i]) { | ||
825 | while (++i < cpuhw->n_events) { | ||
826 | cpuhw->event[i-1] = cpuhw->event[i]; | ||
827 | cpuhw->events[i-1] = cpuhw->events[i]; | ||
828 | cpuhw->flags[i-1] = cpuhw->flags[i]; | ||
829 | } | ||
830 | --cpuhw->n_events; | ||
831 | ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr); | ||
832 | if (event->hw.idx) { | ||
833 | write_pmc(event->hw.idx, 0); | ||
834 | event->hw.idx = 0; | ||
835 | } | ||
836 | perf_event_update_userpage(event); | ||
837 | break; | ||
838 | } | ||
839 | } | ||
840 | for (i = 0; i < cpuhw->n_limited; ++i) | ||
841 | if (event == cpuhw->limited_counter[i]) | ||
842 | break; | ||
843 | if (i < cpuhw->n_limited) { | ||
844 | while (++i < cpuhw->n_limited) { | ||
845 | cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i]; | ||
846 | cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i]; | ||
847 | } | ||
848 | --cpuhw->n_limited; | ||
849 | } | ||
850 | if (cpuhw->n_events == 0) { | ||
851 | /* disable exceptions if no events are running */ | ||
852 | cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); | ||
853 | } | ||
854 | |||
855 | perf_pmu_enable(event->pmu); | ||
856 | local_irq_restore(flags); | ||
857 | } | ||
858 | |||
859 | /* | ||
860 | * POWER-PMU does not support disabling individual counters, hence | ||
861 | * program their cycle counter to their max value and ignore the interrupts. | ||
862 | */ | ||
863 | |||
864 | static void power_pmu_start(struct perf_event *event, int ef_flags) | ||
865 | { | ||
866 | unsigned long flags; | ||
867 | s64 left; | ||
868 | unsigned long val; | ||
869 | |||
870 | if (!event->hw.idx || !event->hw.sample_period) | ||
871 | return; | ||
872 | |||
873 | if (!(event->hw.state & PERF_HES_STOPPED)) | ||
874 | return; | ||
875 | |||
876 | if (ef_flags & PERF_EF_RELOAD) | ||
877 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); | ||
878 | |||
879 | local_irq_save(flags); | ||
880 | perf_pmu_disable(event->pmu); | ||
881 | |||
882 | event->hw.state = 0; | ||
883 | left = local64_read(&event->hw.period_left); | ||
884 | |||
885 | val = 0; | ||
886 | if (left < 0x80000000L) | ||
887 | val = 0x80000000L - left; | ||
888 | |||
889 | write_pmc(event->hw.idx, val); | ||
890 | |||
891 | perf_event_update_userpage(event); | ||
892 | perf_pmu_enable(event->pmu); | ||
893 | local_irq_restore(flags); | ||
894 | } | ||
895 | |||
896 | static void power_pmu_stop(struct perf_event *event, int ef_flags) | ||
897 | { | ||
898 | unsigned long flags; | ||
899 | |||
900 | if (!event->hw.idx || !event->hw.sample_period) | ||
901 | return; | ||
902 | |||
903 | if (event->hw.state & PERF_HES_STOPPED) | ||
904 | return; | ||
905 | |||
906 | local_irq_save(flags); | ||
907 | perf_pmu_disable(event->pmu); | ||
908 | |||
909 | power_pmu_read(event); | ||
910 | event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
911 | write_pmc(event->hw.idx, 0); | ||
912 | |||
913 | perf_event_update_userpage(event); | ||
914 | perf_pmu_enable(event->pmu); | ||
915 | local_irq_restore(flags); | ||
916 | } | ||
917 | |||
918 | /* | ||
919 | * Start group events scheduling transaction | ||
920 | * Set the flag to make pmu::enable() not perform the | ||
921 | * schedulability test, it will be performed at commit time | ||
922 | */ | ||
923 | void power_pmu_start_txn(struct pmu *pmu) | ||
924 | { | ||
925 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | ||
926 | |||
927 | perf_pmu_disable(pmu); | ||
928 | cpuhw->group_flag |= PERF_EVENT_TXN; | ||
929 | cpuhw->n_txn_start = cpuhw->n_events; | ||
930 | } | ||
931 | |||
932 | /* | ||
933 | * Stop group events scheduling transaction | ||
934 | * Clear the flag and pmu::enable() will perform the | ||
935 | * schedulability test. | ||
936 | */ | ||
937 | void power_pmu_cancel_txn(struct pmu *pmu) | ||
938 | { | ||
939 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | ||
940 | |||
941 | cpuhw->group_flag &= ~PERF_EVENT_TXN; | ||
942 | perf_pmu_enable(pmu); | ||
943 | } | ||
944 | |||
945 | /* | ||
946 | * Commit group events scheduling transaction | ||
947 | * Perform the group schedulability test as a whole | ||
948 | * Return 0 if success | ||
949 | */ | ||
950 | int power_pmu_commit_txn(struct pmu *pmu) | ||
951 | { | ||
952 | struct cpu_hw_events *cpuhw; | ||
953 | long i, n; | ||
954 | |||
955 | if (!ppmu) | ||
956 | return -EAGAIN; | ||
957 | cpuhw = &__get_cpu_var(cpu_hw_events); | ||
958 | n = cpuhw->n_events; | ||
959 | if (check_excludes(cpuhw->event, cpuhw->flags, 0, n)) | ||
960 | return -EAGAIN; | ||
961 | i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n); | ||
962 | if (i < 0) | ||
963 | return -EAGAIN; | ||
964 | |||
965 | for (i = cpuhw->n_txn_start; i < n; ++i) | ||
966 | cpuhw->event[i]->hw.config = cpuhw->events[i]; | ||
967 | |||
968 | cpuhw->group_flag &= ~PERF_EVENT_TXN; | ||
969 | perf_pmu_enable(pmu); | ||
970 | return 0; | ||
971 | } | ||
972 | |||
973 | /* | ||
974 | * Return 1 if we might be able to put event on a limited PMC, | ||
975 | * or 0 if not. | ||
976 | * A event can only go on a limited PMC if it counts something | ||
977 | * that a limited PMC can count, doesn't require interrupts, and | ||
978 | * doesn't exclude any processor mode. | ||
979 | */ | ||
980 | static int can_go_on_limited_pmc(struct perf_event *event, u64 ev, | ||
981 | unsigned int flags) | ||
982 | { | ||
983 | int n; | ||
984 | u64 alt[MAX_EVENT_ALTERNATIVES]; | ||
985 | |||
986 | if (event->attr.exclude_user | ||
987 | || event->attr.exclude_kernel | ||
988 | || event->attr.exclude_hv | ||
989 | || event->attr.sample_period) | ||
990 | return 0; | ||
991 | |||
992 | if (ppmu->limited_pmc_event(ev)) | ||
993 | return 1; | ||
994 | |||
995 | /* | ||
996 | * The requested event_id isn't on a limited PMC already; | ||
997 | * see if any alternative code goes on a limited PMC. | ||
998 | */ | ||
999 | if (!ppmu->get_alternatives) | ||
1000 | return 0; | ||
1001 | |||
1002 | flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD; | ||
1003 | n = ppmu->get_alternatives(ev, flags, alt); | ||
1004 | |||
1005 | return n > 0; | ||
1006 | } | ||
1007 | |||
1008 | /* | ||
1009 | * Find an alternative event_id that goes on a normal PMC, if possible, | ||
1010 | * and return the event_id code, or 0 if there is no such alternative. | ||
1011 | * (Note: event_id code 0 is "don't count" on all machines.) | ||
1012 | */ | ||
1013 | static u64 normal_pmc_alternative(u64 ev, unsigned long flags) | ||
1014 | { | ||
1015 | u64 alt[MAX_EVENT_ALTERNATIVES]; | ||
1016 | int n; | ||
1017 | |||
1018 | flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD); | ||
1019 | n = ppmu->get_alternatives(ev, flags, alt); | ||
1020 | if (!n) | ||
1021 | return 0; | ||
1022 | return alt[0]; | ||
1023 | } | ||
1024 | |||
1025 | /* Number of perf_events counting hardware events */ | ||
1026 | static atomic_t num_events; | ||
1027 | /* Used to avoid races in calling reserve/release_pmc_hardware */ | ||
1028 | static DEFINE_MUTEX(pmc_reserve_mutex); | ||
1029 | |||
1030 | /* | ||
1031 | * Release the PMU if this is the last perf_event. | ||
1032 | */ | ||
1033 | static void hw_perf_event_destroy(struct perf_event *event) | ||
1034 | { | ||
1035 | if (!atomic_add_unless(&num_events, -1, 1)) { | ||
1036 | mutex_lock(&pmc_reserve_mutex); | ||
1037 | if (atomic_dec_return(&num_events) == 0) | ||
1038 | release_pmc_hardware(); | ||
1039 | mutex_unlock(&pmc_reserve_mutex); | ||
1040 | } | ||
1041 | } | ||
1042 | |||
1043 | /* | ||
1044 | * Translate a generic cache event_id config to a raw event_id code. | ||
1045 | */ | ||
1046 | static int hw_perf_cache_event(u64 config, u64 *eventp) | ||
1047 | { | ||
1048 | unsigned long type, op, result; | ||
1049 | int ev; | ||
1050 | |||
1051 | if (!ppmu->cache_events) | ||
1052 | return -EINVAL; | ||
1053 | |||
1054 | /* unpack config */ | ||
1055 | type = config & 0xff; | ||
1056 | op = (config >> 8) & 0xff; | ||
1057 | result = (config >> 16) & 0xff; | ||
1058 | |||
1059 | if (type >= PERF_COUNT_HW_CACHE_MAX || | ||
1060 | op >= PERF_COUNT_HW_CACHE_OP_MAX || | ||
1061 | result >= PERF_COUNT_HW_CACHE_RESULT_MAX) | ||
1062 | return -EINVAL; | ||
1063 | |||
1064 | ev = (*ppmu->cache_events)[type][op][result]; | ||
1065 | if (ev == 0) | ||
1066 | return -EOPNOTSUPP; | ||
1067 | if (ev == -1) | ||
1068 | return -EINVAL; | ||
1069 | *eventp = ev; | ||
1070 | return 0; | ||
1071 | } | ||
1072 | |||
1073 | static int power_pmu_event_init(struct perf_event *event) | ||
1074 | { | ||
1075 | u64 ev; | ||
1076 | unsigned long flags; | ||
1077 | struct perf_event *ctrs[MAX_HWEVENTS]; | ||
1078 | u64 events[MAX_HWEVENTS]; | ||
1079 | unsigned int cflags[MAX_HWEVENTS]; | ||
1080 | int n; | ||
1081 | int err; | ||
1082 | struct cpu_hw_events *cpuhw; | ||
1083 | |||
1084 | if (!ppmu) | ||
1085 | return -ENOENT; | ||
1086 | |||
1087 | /* does not support taken branch sampling */ | ||
1088 | if (has_branch_stack(event)) | ||
1089 | return -EOPNOTSUPP; | ||
1090 | |||
1091 | switch (event->attr.type) { | ||
1092 | case PERF_TYPE_HARDWARE: | ||
1093 | ev = event->attr.config; | ||
1094 | if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) | ||
1095 | return -EOPNOTSUPP; | ||
1096 | ev = ppmu->generic_events[ev]; | ||
1097 | break; | ||
1098 | case PERF_TYPE_HW_CACHE: | ||
1099 | err = hw_perf_cache_event(event->attr.config, &ev); | ||
1100 | if (err) | ||
1101 | return err; | ||
1102 | break; | ||
1103 | case PERF_TYPE_RAW: | ||
1104 | ev = event->attr.config; | ||
1105 | break; | ||
1106 | default: | ||
1107 | return -ENOENT; | ||
1108 | } | ||
1109 | |||
1110 | event->hw.config_base = ev; | ||
1111 | event->hw.idx = 0; | ||
1112 | |||
1113 | /* | ||
1114 | * If we are not running on a hypervisor, force the | ||
1115 | * exclude_hv bit to 0 so that we don't care what | ||
1116 | * the user set it to. | ||
1117 | */ | ||
1118 | if (!firmware_has_feature(FW_FEATURE_LPAR)) | ||
1119 | event->attr.exclude_hv = 0; | ||
1120 | |||
1121 | /* | ||
1122 | * If this is a per-task event, then we can use | ||
1123 | * PM_RUN_* events interchangeably with their non RUN_* | ||
1124 | * equivalents, e.g. PM_RUN_CYC instead of PM_CYC. | ||
1125 | * XXX we should check if the task is an idle task. | ||
1126 | */ | ||
1127 | flags = 0; | ||
1128 | if (event->attach_state & PERF_ATTACH_TASK) | ||
1129 | flags |= PPMU_ONLY_COUNT_RUN; | ||
1130 | |||
1131 | /* | ||
1132 | * If this machine has limited events, check whether this | ||
1133 | * event_id could go on a limited event. | ||
1134 | */ | ||
1135 | if (ppmu->flags & PPMU_LIMITED_PMC5_6) { | ||
1136 | if (can_go_on_limited_pmc(event, ev, flags)) { | ||
1137 | flags |= PPMU_LIMITED_PMC_OK; | ||
1138 | } else if (ppmu->limited_pmc_event(ev)) { | ||
1139 | /* | ||
1140 | * The requested event_id is on a limited PMC, | ||
1141 | * but we can't use a limited PMC; see if any | ||
1142 | * alternative goes on a normal PMC. | ||
1143 | */ | ||
1144 | ev = normal_pmc_alternative(ev, flags); | ||
1145 | if (!ev) | ||
1146 | return -EINVAL; | ||
1147 | } | ||
1148 | } | ||
1149 | |||
1150 | /* | ||
1151 | * If this is in a group, check if it can go on with all the | ||
1152 | * other hardware events in the group. We assume the event | ||
1153 | * hasn't been linked into its leader's sibling list at this point. | ||
1154 | */ | ||
1155 | n = 0; | ||
1156 | if (event->group_leader != event) { | ||
1157 | n = collect_events(event->group_leader, ppmu->n_counter - 1, | ||
1158 | ctrs, events, cflags); | ||
1159 | if (n < 0) | ||
1160 | return -EINVAL; | ||
1161 | } | ||
1162 | events[n] = ev; | ||
1163 | ctrs[n] = event; | ||
1164 | cflags[n] = flags; | ||
1165 | if (check_excludes(ctrs, cflags, n, 1)) | ||
1166 | return -EINVAL; | ||
1167 | |||
1168 | cpuhw = &get_cpu_var(cpu_hw_events); | ||
1169 | err = power_check_constraints(cpuhw, events, cflags, n + 1); | ||
1170 | put_cpu_var(cpu_hw_events); | ||
1171 | if (err) | ||
1172 | return -EINVAL; | ||
1173 | |||
1174 | event->hw.config = events[n]; | ||
1175 | event->hw.event_base = cflags[n]; | ||
1176 | event->hw.last_period = event->hw.sample_period; | ||
1177 | local64_set(&event->hw.period_left, event->hw.last_period); | ||
1178 | |||
1179 | /* | ||
1180 | * See if we need to reserve the PMU. | ||
1181 | * If no events are currently in use, then we have to take a | ||
1182 | * mutex to ensure that we don't race with another task doing | ||
1183 | * reserve_pmc_hardware or release_pmc_hardware. | ||
1184 | */ | ||
1185 | err = 0; | ||
1186 | if (!atomic_inc_not_zero(&num_events)) { | ||
1187 | mutex_lock(&pmc_reserve_mutex); | ||
1188 | if (atomic_read(&num_events) == 0 && | ||
1189 | reserve_pmc_hardware(perf_event_interrupt)) | ||
1190 | err = -EBUSY; | ||
1191 | else | ||
1192 | atomic_inc(&num_events); | ||
1193 | mutex_unlock(&pmc_reserve_mutex); | ||
1194 | } | ||
1195 | event->destroy = hw_perf_event_destroy; | ||
1196 | |||
1197 | return err; | ||
1198 | } | ||
1199 | |||
1200 | static int power_pmu_event_idx(struct perf_event *event) | ||
1201 | { | ||
1202 | return event->hw.idx; | ||
1203 | } | ||
1204 | |||
1205 | struct pmu power_pmu = { | ||
1206 | .pmu_enable = power_pmu_enable, | ||
1207 | .pmu_disable = power_pmu_disable, | ||
1208 | .event_init = power_pmu_event_init, | ||
1209 | .add = power_pmu_add, | ||
1210 | .del = power_pmu_del, | ||
1211 | .start = power_pmu_start, | ||
1212 | .stop = power_pmu_stop, | ||
1213 | .read = power_pmu_read, | ||
1214 | .start_txn = power_pmu_start_txn, | ||
1215 | .cancel_txn = power_pmu_cancel_txn, | ||
1216 | .commit_txn = power_pmu_commit_txn, | ||
1217 | .event_idx = power_pmu_event_idx, | ||
1218 | }; | ||
1219 | |||
1220 | /* | ||
1221 | * A counter has overflowed; update its count and record | ||
1222 | * things if requested. Note that interrupts are hard-disabled | ||
1223 | * here so there is no possibility of being interrupted. | ||
1224 | */ | ||
1225 | static void record_and_restart(struct perf_event *event, unsigned long val, | ||
1226 | struct pt_regs *regs) | ||
1227 | { | ||
1228 | u64 period = event->hw.sample_period; | ||
1229 | s64 prev, delta, left; | ||
1230 | int record = 0; | ||
1231 | |||
1232 | if (event->hw.state & PERF_HES_STOPPED) { | ||
1233 | write_pmc(event->hw.idx, 0); | ||
1234 | return; | ||
1235 | } | ||
1236 | |||
1237 | /* we don't have to worry about interrupts here */ | ||
1238 | prev = local64_read(&event->hw.prev_count); | ||
1239 | delta = check_and_compute_delta(prev, val); | ||
1240 | local64_add(delta, &event->count); | ||
1241 | |||
1242 | /* | ||
1243 | * See if the total period for this event has expired, | ||
1244 | * and update for the next period. | ||
1245 | */ | ||
1246 | val = 0; | ||
1247 | left = local64_read(&event->hw.period_left) - delta; | ||
1248 | if (period) { | ||
1249 | if (left <= 0) { | ||
1250 | left += period; | ||
1251 | if (left <= 0) | ||
1252 | left = period; | ||
1253 | record = 1; | ||
1254 | event->hw.last_period = event->hw.sample_period; | ||
1255 | } | ||
1256 | if (left < 0x80000000LL) | ||
1257 | val = 0x80000000LL - left; | ||
1258 | } | ||
1259 | |||
1260 | write_pmc(event->hw.idx, val); | ||
1261 | local64_set(&event->hw.prev_count, val); | ||
1262 | local64_set(&event->hw.period_left, left); | ||
1263 | perf_event_update_userpage(event); | ||
1264 | |||
1265 | /* | ||
1266 | * Finally record data if requested. | ||
1267 | */ | ||
1268 | if (record) { | ||
1269 | struct perf_sample_data data; | ||
1270 | |||
1271 | perf_sample_data_init(&data, ~0ULL); | ||
1272 | data.period = event->hw.last_period; | ||
1273 | |||
1274 | if (event->attr.sample_type & PERF_SAMPLE_ADDR) | ||
1275 | perf_get_data_addr(regs, &data.addr); | ||
1276 | |||
1277 | if (perf_event_overflow(event, &data, regs)) | ||
1278 | power_pmu_stop(event, 0); | ||
1279 | } | ||
1280 | } | ||
1281 | |||
1282 | /* | ||
1283 | * Called from generic code to get the misc flags (i.e. processor mode) | ||
1284 | * for an event_id. | ||
1285 | */ | ||
1286 | unsigned long perf_misc_flags(struct pt_regs *regs) | ||
1287 | { | ||
1288 | u32 flags = perf_get_misc_flags(regs); | ||
1289 | |||
1290 | if (flags) | ||
1291 | return flags; | ||
1292 | return user_mode(regs) ? PERF_RECORD_MISC_USER : | ||
1293 | PERF_RECORD_MISC_KERNEL; | ||
1294 | } | ||
1295 | |||
1296 | /* | ||
1297 | * Called from generic code to get the instruction pointer | ||
1298 | * for an event_id. | ||
1299 | */ | ||
1300 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | ||
1301 | { | ||
1302 | unsigned long ip; | ||
1303 | |||
1304 | if (TRAP(regs) != 0xf00) | ||
1305 | return regs->nip; /* not a PMU interrupt */ | ||
1306 | |||
1307 | ip = mfspr(SPRN_SIAR) + perf_ip_adjust(regs); | ||
1308 | return ip; | ||
1309 | } | ||
1310 | |||
1311 | static bool pmc_overflow(unsigned long val) | ||
1312 | { | ||
1313 | if ((int)val < 0) | ||
1314 | return true; | ||
1315 | |||
1316 | /* | ||
1317 | * Events on POWER7 can roll back if a speculative event doesn't | ||
1318 | * eventually complete. Unfortunately in some rare cases they will | ||
1319 | * raise a performance monitor exception. We need to catch this to | ||
1320 | * ensure we reset the PMC. In all cases the PMC will be 256 or less | ||
1321 | * cycles from overflow. | ||
1322 | * | ||
1323 | * We only do this if the first pass fails to find any overflowing | ||
1324 | * PMCs because a user might set a period of less than 256 and we | ||
1325 | * don't want to mistakenly reset them. | ||
1326 | */ | ||
1327 | if (__is_processor(PV_POWER7) && ((0x80000000 - val) <= 256)) | ||
1328 | return true; | ||
1329 | |||
1330 | return false; | ||
1331 | } | ||
1332 | |||
1333 | /* | ||
1334 | * Performance monitor interrupt stuff | ||
1335 | */ | ||
1336 | static void perf_event_interrupt(struct pt_regs *regs) | ||
1337 | { | ||
1338 | int i; | ||
1339 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | ||
1340 | struct perf_event *event; | ||
1341 | unsigned long val; | ||
1342 | int found = 0; | ||
1343 | int nmi; | ||
1344 | |||
1345 | if (cpuhw->n_limited) | ||
1346 | freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), | ||
1347 | mfspr(SPRN_PMC6)); | ||
1348 | |||
1349 | perf_read_regs(regs); | ||
1350 | |||
1351 | nmi = perf_intr_is_nmi(regs); | ||
1352 | if (nmi) | ||
1353 | nmi_enter(); | ||
1354 | else | ||
1355 | irq_enter(); | ||
1356 | |||
1357 | for (i = 0; i < cpuhw->n_events; ++i) { | ||
1358 | event = cpuhw->event[i]; | ||
1359 | if (!event->hw.idx || is_limited_pmc(event->hw.idx)) | ||
1360 | continue; | ||
1361 | val = read_pmc(event->hw.idx); | ||
1362 | if ((int)val < 0) { | ||
1363 | /* event has overflowed */ | ||
1364 | found = 1; | ||
1365 | record_and_restart(event, val, regs); | ||
1366 | } | ||
1367 | } | ||
1368 | |||
1369 | /* | ||
1370 | * In case we didn't find and reset the event that caused | ||
1371 | * the interrupt, scan all events and reset any that are | ||
1372 | * negative, to avoid getting continual interrupts. | ||
1373 | * Any that we processed in the previous loop will not be negative. | ||
1374 | */ | ||
1375 | if (!found) { | ||
1376 | for (i = 0; i < ppmu->n_counter; ++i) { | ||
1377 | if (is_limited_pmc(i + 1)) | ||
1378 | continue; | ||
1379 | val = read_pmc(i + 1); | ||
1380 | if (pmc_overflow(val)) | ||
1381 | write_pmc(i + 1, 0); | ||
1382 | } | ||
1383 | } | ||
1384 | |||
1385 | /* | ||
1386 | * Reset MMCR0 to its normal value. This will set PMXE and | ||
1387 | * clear FC (freeze counters) and PMAO (perf mon alert occurred) | ||
1388 | * and thus allow interrupts to occur again. | ||
1389 | * XXX might want to use MSR.PM to keep the events frozen until | ||
1390 | * we get back out of this interrupt. | ||
1391 | */ | ||
1392 | write_mmcr0(cpuhw, cpuhw->mmcr[0]); | ||
1393 | |||
1394 | if (nmi) | ||
1395 | nmi_exit(); | ||
1396 | else | ||
1397 | irq_exit(); | ||
1398 | } | ||
1399 | |||
1400 | static void power_pmu_setup(int cpu) | ||
1401 | { | ||
1402 | struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); | ||
1403 | |||
1404 | if (!ppmu) | ||
1405 | return; | ||
1406 | memset(cpuhw, 0, sizeof(*cpuhw)); | ||
1407 | cpuhw->mmcr[0] = MMCR0_FC; | ||
1408 | } | ||
1409 | |||
1410 | static int __cpuinit | ||
1411 | power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) | ||
1412 | { | ||
1413 | unsigned int cpu = (long)hcpu; | ||
1414 | |||
1415 | switch (action & ~CPU_TASKS_FROZEN) { | ||
1416 | case CPU_UP_PREPARE: | ||
1417 | power_pmu_setup(cpu); | ||
1418 | break; | ||
1419 | |||
1420 | default: | ||
1421 | break; | ||
1422 | } | ||
1423 | |||
1424 | return NOTIFY_OK; | ||
1425 | } | ||
1426 | |||
1427 | int __cpuinit register_power_pmu(struct power_pmu *pmu) | ||
1428 | { | ||
1429 | if (ppmu) | ||
1430 | return -EBUSY; /* something's already registered */ | ||
1431 | |||
1432 | ppmu = pmu; | ||
1433 | pr_info("%s performance monitor hardware support registered\n", | ||
1434 | pmu->name); | ||
1435 | |||
1436 | #ifdef MSR_HV | ||
1437 | /* | ||
1438 | * Use FCHV to ignore kernel events if MSR.HV is set. | ||
1439 | */ | ||
1440 | if (mfmsr() & MSR_HV) | ||
1441 | freeze_events_kernel = MMCR0_FCHV; | ||
1442 | #endif /* CONFIG_PPC64 */ | ||
1443 | |||
1444 | perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW); | ||
1445 | perf_cpu_notifier(power_pmu_notifier); | ||
1446 | |||
1447 | return 0; | ||
1448 | } | ||
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c deleted file mode 100644 index 0a6d2a9d569c..000000000000 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ /dev/null | |||
@@ -1,688 +0,0 @@ | |||
1 | /* | ||
2 | * Performance event support - Freescale Embedded Performance Monitor | ||
3 | * | ||
4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
5 | * Copyright 2010 Freescale Semiconductor, Inc. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | */ | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/sched.h> | ||
14 | #include <linux/perf_event.h> | ||
15 | #include <linux/percpu.h> | ||
16 | #include <linux/hardirq.h> | ||
17 | #include <asm/reg_fsl_emb.h> | ||
18 | #include <asm/pmc.h> | ||
19 | #include <asm/machdep.h> | ||
20 | #include <asm/firmware.h> | ||
21 | #include <asm/ptrace.h> | ||
22 | |||
23 | struct cpu_hw_events { | ||
24 | int n_events; | ||
25 | int disabled; | ||
26 | u8 pmcs_enabled; | ||
27 | struct perf_event *event[MAX_HWEVENTS]; | ||
28 | }; | ||
29 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); | ||
30 | |||
31 | static struct fsl_emb_pmu *ppmu; | ||
32 | |||
33 | /* Number of perf_events counting hardware events */ | ||
34 | static atomic_t num_events; | ||
35 | /* Used to avoid races in calling reserve/release_pmc_hardware */ | ||
36 | static DEFINE_MUTEX(pmc_reserve_mutex); | ||
37 | |||
38 | /* | ||
39 | * If interrupts were soft-disabled when a PMU interrupt occurs, treat | ||
40 | * it as an NMI. | ||
41 | */ | ||
42 | static inline int perf_intr_is_nmi(struct pt_regs *regs) | ||
43 | { | ||
44 | #ifdef __powerpc64__ | ||
45 | return !regs->softe; | ||
46 | #else | ||
47 | return 0; | ||
48 | #endif | ||
49 | } | ||
50 | |||
51 | static void perf_event_interrupt(struct pt_regs *regs); | ||
52 | |||
53 | /* | ||
54 | * Read one performance monitor counter (PMC). | ||
55 | */ | ||
56 | static unsigned long read_pmc(int idx) | ||
57 | { | ||
58 | unsigned long val; | ||
59 | |||
60 | switch (idx) { | ||
61 | case 0: | ||
62 | val = mfpmr(PMRN_PMC0); | ||
63 | break; | ||
64 | case 1: | ||
65 | val = mfpmr(PMRN_PMC1); | ||
66 | break; | ||
67 | case 2: | ||
68 | val = mfpmr(PMRN_PMC2); | ||
69 | break; | ||
70 | case 3: | ||
71 | val = mfpmr(PMRN_PMC3); | ||
72 | break; | ||
73 | default: | ||
74 | printk(KERN_ERR "oops trying to read PMC%d\n", idx); | ||
75 | val = 0; | ||
76 | } | ||
77 | return val; | ||
78 | } | ||
79 | |||
80 | /* | ||
81 | * Write one PMC. | ||
82 | */ | ||
83 | static void write_pmc(int idx, unsigned long val) | ||
84 | { | ||
85 | switch (idx) { | ||
86 | case 0: | ||
87 | mtpmr(PMRN_PMC0, val); | ||
88 | break; | ||
89 | case 1: | ||
90 | mtpmr(PMRN_PMC1, val); | ||
91 | break; | ||
92 | case 2: | ||
93 | mtpmr(PMRN_PMC2, val); | ||
94 | break; | ||
95 | case 3: | ||
96 | mtpmr(PMRN_PMC3, val); | ||
97 | break; | ||
98 | default: | ||
99 | printk(KERN_ERR "oops trying to write PMC%d\n", idx); | ||
100 | } | ||
101 | |||
102 | isync(); | ||
103 | } | ||
104 | |||
105 | /* | ||
106 | * Write one local control A register | ||
107 | */ | ||
108 | static void write_pmlca(int idx, unsigned long val) | ||
109 | { | ||
110 | switch (idx) { | ||
111 | case 0: | ||
112 | mtpmr(PMRN_PMLCA0, val); | ||
113 | break; | ||
114 | case 1: | ||
115 | mtpmr(PMRN_PMLCA1, val); | ||
116 | break; | ||
117 | case 2: | ||
118 | mtpmr(PMRN_PMLCA2, val); | ||
119 | break; | ||
120 | case 3: | ||
121 | mtpmr(PMRN_PMLCA3, val); | ||
122 | break; | ||
123 | default: | ||
124 | printk(KERN_ERR "oops trying to write PMLCA%d\n", idx); | ||
125 | } | ||
126 | |||
127 | isync(); | ||
128 | } | ||
129 | |||
130 | /* | ||
131 | * Write one local control B register | ||
132 | */ | ||
133 | static void write_pmlcb(int idx, unsigned long val) | ||
134 | { | ||
135 | switch (idx) { | ||
136 | case 0: | ||
137 | mtpmr(PMRN_PMLCB0, val); | ||
138 | break; | ||
139 | case 1: | ||
140 | mtpmr(PMRN_PMLCB1, val); | ||
141 | break; | ||
142 | case 2: | ||
143 | mtpmr(PMRN_PMLCB2, val); | ||
144 | break; | ||
145 | case 3: | ||
146 | mtpmr(PMRN_PMLCB3, val); | ||
147 | break; | ||
148 | default: | ||
149 | printk(KERN_ERR "oops trying to write PMLCB%d\n", idx); | ||
150 | } | ||
151 | |||
152 | isync(); | ||
153 | } | ||
154 | |||
155 | static void fsl_emb_pmu_read(struct perf_event *event) | ||
156 | { | ||
157 | s64 val, delta, prev; | ||
158 | |||
159 | if (event->hw.state & PERF_HES_STOPPED) | ||
160 | return; | ||
161 | |||
162 | /* | ||
163 | * Performance monitor interrupts come even when interrupts | ||
164 | * are soft-disabled, as long as interrupts are hard-enabled. | ||
165 | * Therefore we treat them like NMIs. | ||
166 | */ | ||
167 | do { | ||
168 | prev = local64_read(&event->hw.prev_count); | ||
169 | barrier(); | ||
170 | val = read_pmc(event->hw.idx); | ||
171 | } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev); | ||
172 | |||
173 | /* The counters are only 32 bits wide */ | ||
174 | delta = (val - prev) & 0xfffffffful; | ||
175 | local64_add(delta, &event->count); | ||
176 | local64_sub(delta, &event->hw.period_left); | ||
177 | } | ||
178 | |||
179 | /* | ||
180 | * Disable all events to prevent PMU interrupts and to allow | ||
181 | * events to be added or removed. | ||
182 | */ | ||
183 | static void fsl_emb_pmu_disable(struct pmu *pmu) | ||
184 | { | ||
185 | struct cpu_hw_events *cpuhw; | ||
186 | unsigned long flags; | ||
187 | |||
188 | local_irq_save(flags); | ||
189 | cpuhw = &__get_cpu_var(cpu_hw_events); | ||
190 | |||
191 | if (!cpuhw->disabled) { | ||
192 | cpuhw->disabled = 1; | ||
193 | |||
194 | /* | ||
195 | * Check if we ever enabled the PMU on this cpu. | ||
196 | */ | ||
197 | if (!cpuhw->pmcs_enabled) { | ||
198 | ppc_enable_pmcs(); | ||
199 | cpuhw->pmcs_enabled = 1; | ||
200 | } | ||
201 | |||
202 | if (atomic_read(&num_events)) { | ||
203 | /* | ||
204 | * Set the 'freeze all counters' bit, and disable | ||
205 | * interrupts. The barrier is to make sure the | ||
206 | * mtpmr has been executed and the PMU has frozen | ||
207 | * the events before we return. | ||
208 | */ | ||
209 | |||
210 | mtpmr(PMRN_PMGC0, PMGC0_FAC); | ||
211 | isync(); | ||
212 | } | ||
213 | } | ||
214 | local_irq_restore(flags); | ||
215 | } | ||
216 | |||
217 | /* | ||
218 | * Re-enable all events if disable == 0. | ||
219 | * If we were previously disabled and events were added, then | ||
220 | * put the new config on the PMU. | ||
221 | */ | ||
222 | static void fsl_emb_pmu_enable(struct pmu *pmu) | ||
223 | { | ||
224 | struct cpu_hw_events *cpuhw; | ||
225 | unsigned long flags; | ||
226 | |||
227 | local_irq_save(flags); | ||
228 | cpuhw = &__get_cpu_var(cpu_hw_events); | ||
229 | if (!cpuhw->disabled) | ||
230 | goto out; | ||
231 | |||
232 | cpuhw->disabled = 0; | ||
233 | ppc_set_pmu_inuse(cpuhw->n_events != 0); | ||
234 | |||
235 | if (cpuhw->n_events > 0) { | ||
236 | mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE); | ||
237 | isync(); | ||
238 | } | ||
239 | |||
240 | out: | ||
241 | local_irq_restore(flags); | ||
242 | } | ||
243 | |||
244 | static int collect_events(struct perf_event *group, int max_count, | ||
245 | struct perf_event *ctrs[]) | ||
246 | { | ||
247 | int n = 0; | ||
248 | struct perf_event *event; | ||
249 | |||
250 | if (!is_software_event(group)) { | ||
251 | if (n >= max_count) | ||
252 | return -1; | ||
253 | ctrs[n] = group; | ||
254 | n++; | ||
255 | } | ||
256 | list_for_each_entry(event, &group->sibling_list, group_entry) { | ||
257 | if (!is_software_event(event) && | ||
258 | event->state != PERF_EVENT_STATE_OFF) { | ||
259 | if (n >= max_count) | ||
260 | return -1; | ||
261 | ctrs[n] = event; | ||
262 | n++; | ||
263 | } | ||
264 | } | ||
265 | return n; | ||
266 | } | ||
267 | |||
268 | /* context locked on entry */ | ||
269 | static int fsl_emb_pmu_add(struct perf_event *event, int flags) | ||
270 | { | ||
271 | struct cpu_hw_events *cpuhw; | ||
272 | int ret = -EAGAIN; | ||
273 | int num_counters = ppmu->n_counter; | ||
274 | u64 val; | ||
275 | int i; | ||
276 | |||
277 | perf_pmu_disable(event->pmu); | ||
278 | cpuhw = &get_cpu_var(cpu_hw_events); | ||
279 | |||
280 | if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) | ||
281 | num_counters = ppmu->n_restricted; | ||
282 | |||
283 | /* | ||
284 | * Allocate counters from top-down, so that restricted-capable | ||
285 | * counters are kept free as long as possible. | ||
286 | */ | ||
287 | for (i = num_counters - 1; i >= 0; i--) { | ||
288 | if (cpuhw->event[i]) | ||
289 | continue; | ||
290 | |||
291 | break; | ||
292 | } | ||
293 | |||
294 | if (i < 0) | ||
295 | goto out; | ||
296 | |||
297 | event->hw.idx = i; | ||
298 | cpuhw->event[i] = event; | ||
299 | ++cpuhw->n_events; | ||
300 | |||
301 | val = 0; | ||
302 | if (event->hw.sample_period) { | ||
303 | s64 left = local64_read(&event->hw.period_left); | ||
304 | if (left < 0x80000000L) | ||
305 | val = 0x80000000L - left; | ||
306 | } | ||
307 | local64_set(&event->hw.prev_count, val); | ||
308 | |||
309 | if (!(flags & PERF_EF_START)) { | ||
310 | event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
311 | val = 0; | ||
312 | } | ||
313 | |||
314 | write_pmc(i, val); | ||
315 | perf_event_update_userpage(event); | ||
316 | |||
317 | write_pmlcb(i, event->hw.config >> 32); | ||
318 | write_pmlca(i, event->hw.config_base); | ||
319 | |||
320 | ret = 0; | ||
321 | out: | ||
322 | put_cpu_var(cpu_hw_events); | ||
323 | perf_pmu_enable(event->pmu); | ||
324 | return ret; | ||
325 | } | ||
326 | |||
327 | /* context locked on entry */ | ||
328 | static void fsl_emb_pmu_del(struct perf_event *event, int flags) | ||
329 | { | ||
330 | struct cpu_hw_events *cpuhw; | ||
331 | int i = event->hw.idx; | ||
332 | |||
333 | perf_pmu_disable(event->pmu); | ||
334 | if (i < 0) | ||
335 | goto out; | ||
336 | |||
337 | fsl_emb_pmu_read(event); | ||
338 | |||
339 | cpuhw = &get_cpu_var(cpu_hw_events); | ||
340 | |||
341 | WARN_ON(event != cpuhw->event[event->hw.idx]); | ||
342 | |||
343 | write_pmlca(i, 0); | ||
344 | write_pmlcb(i, 0); | ||
345 | write_pmc(i, 0); | ||
346 | |||
347 | cpuhw->event[i] = NULL; | ||
348 | event->hw.idx = -1; | ||
349 | |||
350 | /* | ||
351 | * TODO: if at least one restricted event exists, and we | ||
352 | * just freed up a non-restricted-capable counter, and | ||
353 | * there is a restricted-capable counter occupied by | ||
354 | * a non-restricted event, migrate that event to the | ||
355 | * vacated counter. | ||
356 | */ | ||
357 | |||
358 | cpuhw->n_events--; | ||
359 | |||
360 | out: | ||
361 | perf_pmu_enable(event->pmu); | ||
362 | put_cpu_var(cpu_hw_events); | ||
363 | } | ||
364 | |||
365 | static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags) | ||
366 | { | ||
367 | unsigned long flags; | ||
368 | s64 left; | ||
369 | |||
370 | if (event->hw.idx < 0 || !event->hw.sample_period) | ||
371 | return; | ||
372 | |||
373 | if (!(event->hw.state & PERF_HES_STOPPED)) | ||
374 | return; | ||
375 | |||
376 | if (ef_flags & PERF_EF_RELOAD) | ||
377 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); | ||
378 | |||
379 | local_irq_save(flags); | ||
380 | perf_pmu_disable(event->pmu); | ||
381 | |||
382 | event->hw.state = 0; | ||
383 | left = local64_read(&event->hw.period_left); | ||
384 | write_pmc(event->hw.idx, left); | ||
385 | |||
386 | perf_event_update_userpage(event); | ||
387 | perf_pmu_enable(event->pmu); | ||
388 | local_irq_restore(flags); | ||
389 | } | ||
390 | |||
391 | static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags) | ||
392 | { | ||
393 | unsigned long flags; | ||
394 | |||
395 | if (event->hw.idx < 0 || !event->hw.sample_period) | ||
396 | return; | ||
397 | |||
398 | if (event->hw.state & PERF_HES_STOPPED) | ||
399 | return; | ||
400 | |||
401 | local_irq_save(flags); | ||
402 | perf_pmu_disable(event->pmu); | ||
403 | |||
404 | fsl_emb_pmu_read(event); | ||
405 | event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
406 | write_pmc(event->hw.idx, 0); | ||
407 | |||
408 | perf_event_update_userpage(event); | ||
409 | perf_pmu_enable(event->pmu); | ||
410 | local_irq_restore(flags); | ||
411 | } | ||
412 | |||
413 | /* | ||
414 | * Release the PMU if this is the last perf_event. | ||
415 | */ | ||
416 | static void hw_perf_event_destroy(struct perf_event *event) | ||
417 | { | ||
418 | if (!atomic_add_unless(&num_events, -1, 1)) { | ||
419 | mutex_lock(&pmc_reserve_mutex); | ||
420 | if (atomic_dec_return(&num_events) == 0) | ||
421 | release_pmc_hardware(); | ||
422 | mutex_unlock(&pmc_reserve_mutex); | ||
423 | } | ||
424 | } | ||
425 | |||
426 | /* | ||
427 | * Translate a generic cache event_id config to a raw event_id code. | ||
428 | */ | ||
429 | static int hw_perf_cache_event(u64 config, u64 *eventp) | ||
430 | { | ||
431 | unsigned long type, op, result; | ||
432 | int ev; | ||
433 | |||
434 | if (!ppmu->cache_events) | ||
435 | return -EINVAL; | ||
436 | |||
437 | /* unpack config */ | ||
438 | type = config & 0xff; | ||
439 | op = (config >> 8) & 0xff; | ||
440 | result = (config >> 16) & 0xff; | ||
441 | |||
442 | if (type >= PERF_COUNT_HW_CACHE_MAX || | ||
443 | op >= PERF_COUNT_HW_CACHE_OP_MAX || | ||
444 | result >= PERF_COUNT_HW_CACHE_RESULT_MAX) | ||
445 | return -EINVAL; | ||
446 | |||
447 | ev = (*ppmu->cache_events)[type][op][result]; | ||
448 | if (ev == 0) | ||
449 | return -EOPNOTSUPP; | ||
450 | if (ev == -1) | ||
451 | return -EINVAL; | ||
452 | *eventp = ev; | ||
453 | return 0; | ||
454 | } | ||
455 | |||
456 | static int fsl_emb_pmu_event_init(struct perf_event *event) | ||
457 | { | ||
458 | u64 ev; | ||
459 | struct perf_event *events[MAX_HWEVENTS]; | ||
460 | int n; | ||
461 | int err; | ||
462 | int num_restricted; | ||
463 | int i; | ||
464 | |||
465 | switch (event->attr.type) { | ||
466 | case PERF_TYPE_HARDWARE: | ||
467 | ev = event->attr.config; | ||
468 | if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) | ||
469 | return -EOPNOTSUPP; | ||
470 | ev = ppmu->generic_events[ev]; | ||
471 | break; | ||
472 | |||
473 | case PERF_TYPE_HW_CACHE: | ||
474 | err = hw_perf_cache_event(event->attr.config, &ev); | ||
475 | if (err) | ||
476 | return err; | ||
477 | break; | ||
478 | |||
479 | case PERF_TYPE_RAW: | ||
480 | ev = event->attr.config; | ||
481 | break; | ||
482 | |||
483 | default: | ||
484 | return -ENOENT; | ||
485 | } | ||
486 | |||
487 | event->hw.config = ppmu->xlate_event(ev); | ||
488 | if (!(event->hw.config & FSL_EMB_EVENT_VALID)) | ||
489 | return -EINVAL; | ||
490 | |||
491 | /* | ||
492 | * If this is in a group, check if it can go on with all the | ||
493 | * other hardware events in the group. We assume the event | ||
494 | * hasn't been linked into its leader's sibling list at this point. | ||
495 | */ | ||
496 | n = 0; | ||
497 | if (event->group_leader != event) { | ||
498 | n = collect_events(event->group_leader, | ||
499 | ppmu->n_counter - 1, events); | ||
500 | if (n < 0) | ||
501 | return -EINVAL; | ||
502 | } | ||
503 | |||
504 | if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) { | ||
505 | num_restricted = 0; | ||
506 | for (i = 0; i < n; i++) { | ||
507 | if (events[i]->hw.config & FSL_EMB_EVENT_RESTRICTED) | ||
508 | num_restricted++; | ||
509 | } | ||
510 | |||
511 | if (num_restricted >= ppmu->n_restricted) | ||
512 | return -EINVAL; | ||
513 | } | ||
514 | |||
515 | event->hw.idx = -1; | ||
516 | |||
517 | event->hw.config_base = PMLCA_CE | PMLCA_FCM1 | | ||
518 | (u32)((ev << 16) & PMLCA_EVENT_MASK); | ||
519 | |||
520 | if (event->attr.exclude_user) | ||
521 | event->hw.config_base |= PMLCA_FCU; | ||
522 | if (event->attr.exclude_kernel) | ||
523 | event->hw.config_base |= PMLCA_FCS; | ||
524 | if (event->attr.exclude_idle) | ||
525 | return -ENOTSUPP; | ||
526 | |||
527 | event->hw.last_period = event->hw.sample_period; | ||
528 | local64_set(&event->hw.period_left, event->hw.last_period); | ||
529 | |||
530 | /* | ||
531 | * See if we need to reserve the PMU. | ||
532 | * If no events are currently in use, then we have to take a | ||
533 | * mutex to ensure that we don't race with another task doing | ||
534 | * reserve_pmc_hardware or release_pmc_hardware. | ||
535 | */ | ||
536 | err = 0; | ||
537 | if (!atomic_inc_not_zero(&num_events)) { | ||
538 | mutex_lock(&pmc_reserve_mutex); | ||
539 | if (atomic_read(&num_events) == 0 && | ||
540 | reserve_pmc_hardware(perf_event_interrupt)) | ||
541 | err = -EBUSY; | ||
542 | else | ||
543 | atomic_inc(&num_events); | ||
544 | mutex_unlock(&pmc_reserve_mutex); | ||
545 | |||
546 | mtpmr(PMRN_PMGC0, PMGC0_FAC); | ||
547 | isync(); | ||
548 | } | ||
549 | event->destroy = hw_perf_event_destroy; | ||
550 | |||
551 | return err; | ||
552 | } | ||
553 | |||
554 | static struct pmu fsl_emb_pmu = { | ||
555 | .pmu_enable = fsl_emb_pmu_enable, | ||
556 | .pmu_disable = fsl_emb_pmu_disable, | ||
557 | .event_init = fsl_emb_pmu_event_init, | ||
558 | .add = fsl_emb_pmu_add, | ||
559 | .del = fsl_emb_pmu_del, | ||
560 | .start = fsl_emb_pmu_start, | ||
561 | .stop = fsl_emb_pmu_stop, | ||
562 | .read = fsl_emb_pmu_read, | ||
563 | }; | ||
564 | |||
565 | /* | ||
566 | * A counter has overflowed; update its count and record | ||
567 | * things if requested. Note that interrupts are hard-disabled | ||
568 | * here so there is no possibility of being interrupted. | ||
569 | */ | ||
570 | static void record_and_restart(struct perf_event *event, unsigned long val, | ||
571 | struct pt_regs *regs) | ||
572 | { | ||
573 | u64 period = event->hw.sample_period; | ||
574 | s64 prev, delta, left; | ||
575 | int record = 0; | ||
576 | |||
577 | if (event->hw.state & PERF_HES_STOPPED) { | ||
578 | write_pmc(event->hw.idx, 0); | ||
579 | return; | ||
580 | } | ||
581 | |||
582 | /* we don't have to worry about interrupts here */ | ||
583 | prev = local64_read(&event->hw.prev_count); | ||
584 | delta = (val - prev) & 0xfffffffful; | ||
585 | local64_add(delta, &event->count); | ||
586 | |||
587 | /* | ||
588 | * See if the total period for this event has expired, | ||
589 | * and update for the next period. | ||
590 | */ | ||
591 | val = 0; | ||
592 | left = local64_read(&event->hw.period_left) - delta; | ||
593 | if (period) { | ||
594 | if (left <= 0) { | ||
595 | left += period; | ||
596 | if (left <= 0) | ||
597 | left = period; | ||
598 | record = 1; | ||
599 | event->hw.last_period = event->hw.sample_period; | ||
600 | } | ||
601 | if (left < 0x80000000LL) | ||
602 | val = 0x80000000LL - left; | ||
603 | } | ||
604 | |||
605 | write_pmc(event->hw.idx, val); | ||
606 | local64_set(&event->hw.prev_count, val); | ||
607 | local64_set(&event->hw.period_left, left); | ||
608 | perf_event_update_userpage(event); | ||
609 | |||
610 | /* | ||
611 | * Finally record data if requested. | ||
612 | */ | ||
613 | if (record) { | ||
614 | struct perf_sample_data data; | ||
615 | |||
616 | perf_sample_data_init(&data, 0); | ||
617 | data.period = event->hw.last_period; | ||
618 | |||
619 | if (perf_event_overflow(event, &data, regs)) | ||
620 | fsl_emb_pmu_stop(event, 0); | ||
621 | } | ||
622 | } | ||
623 | |||
624 | static void perf_event_interrupt(struct pt_regs *regs) | ||
625 | { | ||
626 | int i; | ||
627 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | ||
628 | struct perf_event *event; | ||
629 | unsigned long val; | ||
630 | int found = 0; | ||
631 | int nmi; | ||
632 | |||
633 | nmi = perf_intr_is_nmi(regs); | ||
634 | if (nmi) | ||
635 | nmi_enter(); | ||
636 | else | ||
637 | irq_enter(); | ||
638 | |||
639 | for (i = 0; i < ppmu->n_counter; ++i) { | ||
640 | event = cpuhw->event[i]; | ||
641 | |||
642 | val = read_pmc(i); | ||
643 | if ((int)val < 0) { | ||
644 | if (event) { | ||
645 | /* event has overflowed */ | ||
646 | found = 1; | ||
647 | record_and_restart(event, val, regs); | ||
648 | } else { | ||
649 | /* | ||
650 | * Disabled counter is negative, | ||
651 | * reset it just in case. | ||
652 | */ | ||
653 | write_pmc(i, 0); | ||
654 | } | ||
655 | } | ||
656 | } | ||
657 | |||
658 | /* PMM will keep counters frozen until we return from the interrupt. */ | ||
659 | mtmsr(mfmsr() | MSR_PMM); | ||
660 | mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE); | ||
661 | isync(); | ||
662 | |||
663 | if (nmi) | ||
664 | nmi_exit(); | ||
665 | else | ||
666 | irq_exit(); | ||
667 | } | ||
668 | |||
669 | void hw_perf_event_setup(int cpu) | ||
670 | { | ||
671 | struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); | ||
672 | |||
673 | memset(cpuhw, 0, sizeof(*cpuhw)); | ||
674 | } | ||
675 | |||
676 | int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu) | ||
677 | { | ||
678 | if (ppmu) | ||
679 | return -EBUSY; /* something's already registered */ | ||
680 | |||
681 | ppmu = pmu; | ||
682 | pr_info("%s performance monitor hardware support registered\n", | ||
683 | pmu->name); | ||
684 | |||
685 | perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW); | ||
686 | |||
687 | return 0; | ||
688 | } | ||
diff --git a/arch/powerpc/kernel/pmc.c b/arch/powerpc/kernel/pmc.c index a841a9d136a2..58eaa3ddf7b9 100644 --- a/arch/powerpc/kernel/pmc.c +++ b/arch/powerpc/kernel/pmc.c | |||
@@ -13,6 +13,7 @@ | |||
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include <linux/errno.h> | 15 | #include <linux/errno.h> |
16 | #include <linux/bug.h> | ||
16 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
17 | #include <linux/export.h> | 18 | #include <linux/export.h> |
18 | 19 | ||
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c deleted file mode 100644 index b4f1dda4d089..000000000000 --- a/arch/powerpc/kernel/power4-pmu.c +++ /dev/null | |||
@@ -1,621 +0,0 @@ | |||
1 | /* | ||
2 | * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors. | ||
3 | * | ||
4 | * Copyright 2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/perf_event.h> | ||
13 | #include <linux/string.h> | ||
14 | #include <asm/reg.h> | ||
15 | #include <asm/cputable.h> | ||
16 | |||
17 | /* | ||
18 | * Bits in event code for POWER4 | ||
19 | */ | ||
20 | #define PM_PMC_SH 12 /* PMC number (1-based) for direct events */ | ||
21 | #define PM_PMC_MSK 0xf | ||
22 | #define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */ | ||
23 | #define PM_UNIT_MSK 0xf | ||
24 | #define PM_LOWER_SH 6 | ||
25 | #define PM_LOWER_MSK 1 | ||
26 | #define PM_LOWER_MSKS 0x40 | ||
27 | #define PM_BYTE_SH 4 /* Byte number of event bus to use */ | ||
28 | #define PM_BYTE_MSK 3 | ||
29 | #define PM_PMCSEL_MSK 7 | ||
30 | |||
31 | /* | ||
32 | * Unit code values | ||
33 | */ | ||
34 | #define PM_FPU 1 | ||
35 | #define PM_ISU1 2 | ||
36 | #define PM_IFU 3 | ||
37 | #define PM_IDU0 4 | ||
38 | #define PM_ISU1_ALT 6 | ||
39 | #define PM_ISU2 7 | ||
40 | #define PM_IFU_ALT 8 | ||
41 | #define PM_LSU0 9 | ||
42 | #define PM_LSU1 0xc | ||
43 | #define PM_GPS 0xf | ||
44 | |||
45 | /* | ||
46 | * Bits in MMCR0 for POWER4 | ||
47 | */ | ||
48 | #define MMCR0_PMC1SEL_SH 8 | ||
49 | #define MMCR0_PMC2SEL_SH 1 | ||
50 | #define MMCR_PMCSEL_MSK 0x1f | ||
51 | |||
52 | /* | ||
53 | * Bits in MMCR1 for POWER4 | ||
54 | */ | ||
55 | #define MMCR1_TTM0SEL_SH 62 | ||
56 | #define MMCR1_TTC0SEL_SH 61 | ||
57 | #define MMCR1_TTM1SEL_SH 59 | ||
58 | #define MMCR1_TTC1SEL_SH 58 | ||
59 | #define MMCR1_TTM2SEL_SH 56 | ||
60 | #define MMCR1_TTC2SEL_SH 55 | ||
61 | #define MMCR1_TTM3SEL_SH 53 | ||
62 | #define MMCR1_TTC3SEL_SH 52 | ||
63 | #define MMCR1_TTMSEL_MSK 3 | ||
64 | #define MMCR1_TD_CP_DBG0SEL_SH 50 | ||
65 | #define MMCR1_TD_CP_DBG1SEL_SH 48 | ||
66 | #define MMCR1_TD_CP_DBG2SEL_SH 46 | ||
67 | #define MMCR1_TD_CP_DBG3SEL_SH 44 | ||
68 | #define MMCR1_DEBUG0SEL_SH 43 | ||
69 | #define MMCR1_DEBUG1SEL_SH 42 | ||
70 | #define MMCR1_DEBUG2SEL_SH 41 | ||
71 | #define MMCR1_DEBUG3SEL_SH 40 | ||
72 | #define MMCR1_PMC1_ADDER_SEL_SH 39 | ||
73 | #define MMCR1_PMC2_ADDER_SEL_SH 38 | ||
74 | #define MMCR1_PMC6_ADDER_SEL_SH 37 | ||
75 | #define MMCR1_PMC5_ADDER_SEL_SH 36 | ||
76 | #define MMCR1_PMC8_ADDER_SEL_SH 35 | ||
77 | #define MMCR1_PMC7_ADDER_SEL_SH 34 | ||
78 | #define MMCR1_PMC3_ADDER_SEL_SH 33 | ||
79 | #define MMCR1_PMC4_ADDER_SEL_SH 32 | ||
80 | #define MMCR1_PMC3SEL_SH 27 | ||
81 | #define MMCR1_PMC4SEL_SH 22 | ||
82 | #define MMCR1_PMC5SEL_SH 17 | ||
83 | #define MMCR1_PMC6SEL_SH 12 | ||
84 | #define MMCR1_PMC7SEL_SH 7 | ||
85 | #define MMCR1_PMC8SEL_SH 2 /* note bit 0 is in MMCRA for GP */ | ||
86 | |||
87 | static short mmcr1_adder_bits[8] = { | ||
88 | MMCR1_PMC1_ADDER_SEL_SH, | ||
89 | MMCR1_PMC2_ADDER_SEL_SH, | ||
90 | MMCR1_PMC3_ADDER_SEL_SH, | ||
91 | MMCR1_PMC4_ADDER_SEL_SH, | ||
92 | MMCR1_PMC5_ADDER_SEL_SH, | ||
93 | MMCR1_PMC6_ADDER_SEL_SH, | ||
94 | MMCR1_PMC7_ADDER_SEL_SH, | ||
95 | MMCR1_PMC8_ADDER_SEL_SH | ||
96 | }; | ||
97 | |||
98 | /* | ||
99 | * Bits in MMCRA | ||
100 | */ | ||
101 | #define MMCRA_PMC8SEL0_SH 17 /* PMC8SEL bit 0 for GP */ | ||
102 | |||
103 | /* | ||
104 | * Layout of constraint bits: | ||
105 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
106 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
107 | * |[ >[ >[ >|||[ >[ >< >< >< >< ><><><><><><><><> | ||
108 | * | UC1 UC2 UC3 ||| PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8 | ||
109 | * \SMPL ||\TTC3SEL | ||
110 | * |\TTC_IFU_SEL | ||
111 | * \TTM2SEL0 | ||
112 | * | ||
113 | * SMPL - SAMPLE_ENABLE constraint | ||
114 | * 56: SAMPLE_ENABLE value 0x0100_0000_0000_0000 | ||
115 | * | ||
116 | * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2 | ||
117 | * 55: UC1 error 0x0080_0000_0000_0000 | ||
118 | * 54: FPU events needed 0x0040_0000_0000_0000 | ||
119 | * 53: ISU1 events needed 0x0020_0000_0000_0000 | ||
120 | * 52: IDU0|ISU2 events needed 0x0010_0000_0000_0000 | ||
121 | * | ||
122 | * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0 | ||
123 | * 51: UC2 error 0x0008_0000_0000_0000 | ||
124 | * 50: FPU events needed 0x0004_0000_0000_0000 | ||
125 | * 49: IFU events needed 0x0002_0000_0000_0000 | ||
126 | * 48: LSU0 events needed 0x0001_0000_0000_0000 | ||
127 | * | ||
128 | * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1 | ||
129 | * 47: UC3 error 0x8000_0000_0000 | ||
130 | * 46: LSU0 events needed 0x4000_0000_0000 | ||
131 | * 45: IFU events needed 0x2000_0000_0000 | ||
132 | * 44: IDU0|ISU2 events needed 0x1000_0000_0000 | ||
133 | * 43: ISU1 events needed 0x0800_0000_0000 | ||
134 | * | ||
135 | * TTM2SEL0 | ||
136 | * 42: 0 = IDU0 events needed | ||
137 | * 1 = ISU2 events needed 0x0400_0000_0000 | ||
138 | * | ||
139 | * TTC_IFU_SEL | ||
140 | * 41: 0 = IFU.U events needed | ||
141 | * 1 = IFU.L events needed 0x0200_0000_0000 | ||
142 | * | ||
143 | * TTC3SEL | ||
144 | * 40: 0 = LSU1.U events needed | ||
145 | * 1 = LSU1.L events needed 0x0100_0000_0000 | ||
146 | * | ||
147 | * PS1 | ||
148 | * 39: PS1 error 0x0080_0000_0000 | ||
149 | * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000 | ||
150 | * | ||
151 | * PS2 | ||
152 | * 35: PS2 error 0x0008_0000_0000 | ||
153 | * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000 | ||
154 | * | ||
155 | * B0 | ||
156 | * 28-31: Byte 0 event source 0xf000_0000 | ||
157 | * 1 = FPU | ||
158 | * 2 = ISU1 | ||
159 | * 3 = IFU | ||
160 | * 4 = IDU0 | ||
161 | * 7 = ISU2 | ||
162 | * 9 = LSU0 | ||
163 | * c = LSU1 | ||
164 | * f = GPS | ||
165 | * | ||
166 | * B1, B2, B3 | ||
167 | * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources | ||
168 | * | ||
169 | * P8 | ||
170 | * 15: P8 error 0x8000 | ||
171 | * 14-15: Count of events needing PMC8 | ||
172 | * | ||
173 | * P1..P7 | ||
174 | * 0-13: Count of events needing PMC1..PMC7 | ||
175 | * | ||
176 | * Note: this doesn't allow events using IFU.U to be combined with events | ||
177 | * using IFU.L, though that is feasible (using TTM0 and TTM2). However | ||
178 | * there are no listed events for IFU.L (they are debug events not | ||
179 | * verified for performance monitoring) so this shouldn't cause a | ||
180 | * problem. | ||
181 | */ | ||
182 | |||
183 | static struct unitinfo { | ||
184 | unsigned long value, mask; | ||
185 | int unit; | ||
186 | int lowerbit; | ||
187 | } p4_unitinfo[16] = { | ||
188 | [PM_FPU] = { 0x44000000000000ul, 0x88000000000000ul, PM_FPU, 0 }, | ||
189 | [PM_ISU1] = { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 }, | ||
190 | [PM_ISU1_ALT] = | ||
191 | { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 }, | ||
192 | [PM_IFU] = { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 }, | ||
193 | [PM_IFU_ALT] = | ||
194 | { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 }, | ||
195 | [PM_IDU0] = { 0x10100000000000ul, 0x80840000000000ul, PM_IDU0, 1 }, | ||
196 | [PM_ISU2] = { 0x10140000000000ul, 0x80840000000000ul, PM_ISU2, 0 }, | ||
197 | [PM_LSU0] = { 0x01400000000000ul, 0x08800000000000ul, PM_LSU0, 0 }, | ||
198 | [PM_LSU1] = { 0x00000000000000ul, 0x00010000000000ul, PM_LSU1, 40 }, | ||
199 | [PM_GPS] = { 0x00000000000000ul, 0x00000000000000ul, PM_GPS, 0 } | ||
200 | }; | ||
201 | |||
202 | static unsigned char direct_marked_event[8] = { | ||
203 | (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */ | ||
204 | (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */ | ||
205 | (1<<3), /* PMC3: PM_MRK_ST_CMPL_INT */ | ||
206 | (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */ | ||
207 | (1<<4) | (1<<5), /* PMC5: PM_MRK_GRP_TIMEO */ | ||
208 | (1<<3) | (1<<4) | (1<<5), | ||
209 | /* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */ | ||
210 | (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */ | ||
211 | (1<<4), /* PMC8: PM_MRK_LSU_FIN */ | ||
212 | }; | ||
213 | |||
214 | /* | ||
215 | * Returns 1 if event counts things relating to marked instructions | ||
216 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
217 | */ | ||
218 | static int p4_marked_instr_event(u64 event) | ||
219 | { | ||
220 | int pmc, psel, unit, byte, bit; | ||
221 | unsigned int mask; | ||
222 | |||
223 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
224 | psel = event & PM_PMCSEL_MSK; | ||
225 | if (pmc) { | ||
226 | if (direct_marked_event[pmc - 1] & (1 << psel)) | ||
227 | return 1; | ||
228 | if (psel == 0) /* add events */ | ||
229 | bit = (pmc <= 4)? pmc - 1: 8 - pmc; | ||
230 | else if (psel == 6) /* decode events */ | ||
231 | bit = 4; | ||
232 | else | ||
233 | return 0; | ||
234 | } else | ||
235 | bit = psel; | ||
236 | |||
237 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
238 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
239 | mask = 0; | ||
240 | switch (unit) { | ||
241 | case PM_LSU1: | ||
242 | if (event & PM_LOWER_MSKS) | ||
243 | mask = 1 << 28; /* byte 7 bit 4 */ | ||
244 | else | ||
245 | mask = 6 << 24; /* byte 3 bits 1 and 2 */ | ||
246 | break; | ||
247 | case PM_LSU0: | ||
248 | /* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */ | ||
249 | mask = 0x083dff00; | ||
250 | } | ||
251 | return (mask >> (byte * 8 + bit)) & 1; | ||
252 | } | ||
253 | |||
254 | static int p4_get_constraint(u64 event, unsigned long *maskp, | ||
255 | unsigned long *valp) | ||
256 | { | ||
257 | int pmc, byte, unit, lower, sh; | ||
258 | unsigned long mask = 0, value = 0; | ||
259 | int grp = -1; | ||
260 | |||
261 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
262 | if (pmc) { | ||
263 | if (pmc > 8) | ||
264 | return -1; | ||
265 | sh = (pmc - 1) * 2; | ||
266 | mask |= 2 << sh; | ||
267 | value |= 1 << sh; | ||
268 | grp = ((pmc - 1) >> 1) & 1; | ||
269 | } | ||
270 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
271 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
272 | if (unit) { | ||
273 | lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK; | ||
274 | |||
275 | /* | ||
276 | * Bus events on bytes 0 and 2 can be counted | ||
277 | * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8. | ||
278 | */ | ||
279 | if (!pmc) | ||
280 | grp = byte & 1; | ||
281 | |||
282 | if (!p4_unitinfo[unit].unit) | ||
283 | return -1; | ||
284 | mask |= p4_unitinfo[unit].mask; | ||
285 | value |= p4_unitinfo[unit].value; | ||
286 | sh = p4_unitinfo[unit].lowerbit; | ||
287 | if (sh > 1) | ||
288 | value |= (unsigned long)lower << sh; | ||
289 | else if (lower != sh) | ||
290 | return -1; | ||
291 | unit = p4_unitinfo[unit].unit; | ||
292 | |||
293 | /* Set byte lane select field */ | ||
294 | mask |= 0xfULL << (28 - 4 * byte); | ||
295 | value |= (unsigned long)unit << (28 - 4 * byte); | ||
296 | } | ||
297 | if (grp == 0) { | ||
298 | /* increment PMC1/2/5/6 field */ | ||
299 | mask |= 0x8000000000ull; | ||
300 | value |= 0x1000000000ull; | ||
301 | } else { | ||
302 | /* increment PMC3/4/7/8 field */ | ||
303 | mask |= 0x800000000ull; | ||
304 | value |= 0x100000000ull; | ||
305 | } | ||
306 | |||
307 | /* Marked instruction events need sample_enable set */ | ||
308 | if (p4_marked_instr_event(event)) { | ||
309 | mask |= 1ull << 56; | ||
310 | value |= 1ull << 56; | ||
311 | } | ||
312 | |||
313 | /* PMCSEL=6 decode events on byte 2 need sample_enable clear */ | ||
314 | if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2) | ||
315 | mask |= 1ull << 56; | ||
316 | |||
317 | *maskp = mask; | ||
318 | *valp = value; | ||
319 | return 0; | ||
320 | } | ||
321 | |||
322 | static unsigned int ppc_inst_cmpl[] = { | ||
323 | 0x1001, 0x4001, 0x6001, 0x7001, 0x8001 | ||
324 | }; | ||
325 | |||
326 | static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
327 | { | ||
328 | int i, j, na; | ||
329 | |||
330 | alt[0] = event; | ||
331 | na = 1; | ||
332 | |||
333 | /* 2 possibilities for PM_GRP_DISP_REJECT */ | ||
334 | if (event == 0x8003 || event == 0x0224) { | ||
335 | alt[1] = event ^ (0x8003 ^ 0x0224); | ||
336 | return 2; | ||
337 | } | ||
338 | |||
339 | /* 2 possibilities for PM_ST_MISS_L1 */ | ||
340 | if (event == 0x0c13 || event == 0x0c23) { | ||
341 | alt[1] = event ^ (0x0c13 ^ 0x0c23); | ||
342 | return 2; | ||
343 | } | ||
344 | |||
345 | /* several possibilities for PM_INST_CMPL */ | ||
346 | for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) { | ||
347 | if (event == ppc_inst_cmpl[i]) { | ||
348 | for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j) | ||
349 | if (j != i) | ||
350 | alt[na++] = ppc_inst_cmpl[j]; | ||
351 | break; | ||
352 | } | ||
353 | } | ||
354 | |||
355 | return na; | ||
356 | } | ||
357 | |||
358 | static int p4_compute_mmcr(u64 event[], int n_ev, | ||
359 | unsigned int hwc[], unsigned long mmcr[]) | ||
360 | { | ||
361 | unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0; | ||
362 | unsigned int pmc, unit, byte, psel, lower; | ||
363 | unsigned int ttm, grp; | ||
364 | unsigned int pmc_inuse = 0; | ||
365 | unsigned int pmc_grp_use[2]; | ||
366 | unsigned char busbyte[4]; | ||
367 | unsigned char unituse[16]; | ||
368 | unsigned int unitlower = 0; | ||
369 | int i; | ||
370 | |||
371 | if (n_ev > 8) | ||
372 | return -1; | ||
373 | |||
374 | /* First pass to count resource use */ | ||
375 | pmc_grp_use[0] = pmc_grp_use[1] = 0; | ||
376 | memset(busbyte, 0, sizeof(busbyte)); | ||
377 | memset(unituse, 0, sizeof(unituse)); | ||
378 | for (i = 0; i < n_ev; ++i) { | ||
379 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
380 | if (pmc) { | ||
381 | if (pmc_inuse & (1 << (pmc - 1))) | ||
382 | return -1; | ||
383 | pmc_inuse |= 1 << (pmc - 1); | ||
384 | /* count 1/2/5/6 vs 3/4/7/8 use */ | ||
385 | ++pmc_grp_use[((pmc - 1) >> 1) & 1]; | ||
386 | } | ||
387 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
388 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
389 | lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK; | ||
390 | if (unit) { | ||
391 | if (!pmc) | ||
392 | ++pmc_grp_use[byte & 1]; | ||
393 | if (unit == 6 || unit == 8) | ||
394 | /* map alt ISU1/IFU codes: 6->2, 8->3 */ | ||
395 | unit = (unit >> 1) - 1; | ||
396 | if (busbyte[byte] && busbyte[byte] != unit) | ||
397 | return -1; | ||
398 | busbyte[byte] = unit; | ||
399 | lower <<= unit; | ||
400 | if (unituse[unit] && lower != (unitlower & lower)) | ||
401 | return -1; | ||
402 | unituse[unit] = 1; | ||
403 | unitlower |= lower; | ||
404 | } | ||
405 | } | ||
406 | if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4) | ||
407 | return -1; | ||
408 | |||
409 | /* | ||
410 | * Assign resources and set multiplexer selects. | ||
411 | * | ||
412 | * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2. | ||
413 | * Each TTMx can only select one unit, but since | ||
414 | * units 2 and 6 are both ISU1, and 3 and 8 are both IFU, | ||
415 | * we have some choices. | ||
416 | */ | ||
417 | if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) { | ||
418 | unituse[6] = 1; /* Move 2 to 6 */ | ||
419 | unituse[2] = 0; | ||
420 | } | ||
421 | if (unituse[3] & (unituse[1] | unituse[2])) { | ||
422 | unituse[8] = 1; /* Move 3 to 8 */ | ||
423 | unituse[3] = 0; | ||
424 | unitlower = (unitlower & ~8) | ((unitlower & 8) << 5); | ||
425 | } | ||
426 | /* Check only one unit per TTMx */ | ||
427 | if (unituse[1] + unituse[2] + unituse[3] > 1 || | ||
428 | unituse[4] + unituse[6] + unituse[7] > 1 || | ||
429 | unituse[8] + unituse[9] > 1 || | ||
430 | (unituse[5] | unituse[10] | unituse[11] | | ||
431 | unituse[13] | unituse[14])) | ||
432 | return -1; | ||
433 | |||
434 | /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */ | ||
435 | mmcr1 |= (unsigned long)(unituse[3] * 2 + unituse[2]) | ||
436 | << MMCR1_TTM0SEL_SH; | ||
437 | mmcr1 |= (unsigned long)(unituse[7] * 3 + unituse[6] * 2) | ||
438 | << MMCR1_TTM1SEL_SH; | ||
439 | mmcr1 |= (unsigned long)unituse[9] << MMCR1_TTM2SEL_SH; | ||
440 | |||
441 | /* Set TTCxSEL fields. */ | ||
442 | if (unitlower & 0xe) | ||
443 | mmcr1 |= 1ull << MMCR1_TTC0SEL_SH; | ||
444 | if (unitlower & 0xf0) | ||
445 | mmcr1 |= 1ull << MMCR1_TTC1SEL_SH; | ||
446 | if (unitlower & 0xf00) | ||
447 | mmcr1 |= 1ull << MMCR1_TTC2SEL_SH; | ||
448 | if (unitlower & 0x7000) | ||
449 | mmcr1 |= 1ull << MMCR1_TTC3SEL_SH; | ||
450 | |||
451 | /* Set byte lane select fields. */ | ||
452 | for (byte = 0; byte < 4; ++byte) { | ||
453 | unit = busbyte[byte]; | ||
454 | if (!unit) | ||
455 | continue; | ||
456 | if (unit == 0xf) { | ||
457 | /* special case for GPS */ | ||
458 | mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte); | ||
459 | } else { | ||
460 | if (!unituse[unit]) | ||
461 | ttm = unit - 1; /* 2->1, 3->2 */ | ||
462 | else | ||
463 | ttm = unit >> 2; | ||
464 | mmcr1 |= (unsigned long)ttm | ||
465 | << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | ||
466 | } | ||
467 | } | ||
468 | |||
469 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | ||
470 | for (i = 0; i < n_ev; ++i) { | ||
471 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
472 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
473 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
474 | psel = event[i] & PM_PMCSEL_MSK; | ||
475 | if (!pmc) { | ||
476 | /* Bus event or 00xxx direct event (off or cycles) */ | ||
477 | if (unit) | ||
478 | psel |= 0x10 | ((byte & 2) << 2); | ||
479 | for (pmc = 0; pmc < 8; ++pmc) { | ||
480 | if (pmc_inuse & (1 << pmc)) | ||
481 | continue; | ||
482 | grp = (pmc >> 1) & 1; | ||
483 | if (unit) { | ||
484 | if (grp == (byte & 1)) | ||
485 | break; | ||
486 | } else if (pmc_grp_use[grp] < 4) { | ||
487 | ++pmc_grp_use[grp]; | ||
488 | break; | ||
489 | } | ||
490 | } | ||
491 | pmc_inuse |= 1 << pmc; | ||
492 | } else { | ||
493 | /* Direct event */ | ||
494 | --pmc; | ||
495 | if (psel == 0 && (byte & 2)) | ||
496 | /* add events on higher-numbered bus */ | ||
497 | mmcr1 |= 1ull << mmcr1_adder_bits[pmc]; | ||
498 | else if (psel == 6 && byte == 3) | ||
499 | /* seem to need to set sample_enable here */ | ||
500 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
501 | psel |= 8; | ||
502 | } | ||
503 | if (pmc <= 1) | ||
504 | mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc); | ||
505 | else | ||
506 | mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); | ||
507 | if (pmc == 7) /* PMC8 */ | ||
508 | mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH; | ||
509 | hwc[i] = pmc; | ||
510 | if (p4_marked_instr_event(event[i])) | ||
511 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
512 | } | ||
513 | |||
514 | if (pmc_inuse & 1) | ||
515 | mmcr0 |= MMCR0_PMC1CE; | ||
516 | if (pmc_inuse & 0xfe) | ||
517 | mmcr0 |= MMCR0_PMCjCE; | ||
518 | |||
519 | mmcra |= 0x2000; /* mark only one IOP per PPC instruction */ | ||
520 | |||
521 | /* Return MMCRx values */ | ||
522 | mmcr[0] = mmcr0; | ||
523 | mmcr[1] = mmcr1; | ||
524 | mmcr[2] = mmcra; | ||
525 | return 0; | ||
526 | } | ||
527 | |||
528 | static void p4_disable_pmc(unsigned int pmc, unsigned long mmcr[]) | ||
529 | { | ||
530 | /* | ||
531 | * Setting the PMCxSEL field to 0 disables PMC x. | ||
532 | * (Note that pmc is 0-based here, not 1-based.) | ||
533 | */ | ||
534 | if (pmc <= 1) { | ||
535 | mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc)); | ||
536 | } else { | ||
537 | mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2))); | ||
538 | if (pmc == 7) | ||
539 | mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH); | ||
540 | } | ||
541 | } | ||
542 | |||
543 | static int p4_generic_events[] = { | ||
544 | [PERF_COUNT_HW_CPU_CYCLES] = 7, | ||
545 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x1001, | ||
546 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */ | ||
547 | [PERF_COUNT_HW_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */ | ||
548 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */ | ||
549 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */ | ||
550 | }; | ||
551 | |||
552 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
553 | |||
554 | /* | ||
555 | * Table of generalized cache-related events. | ||
556 | * 0 means not supported, -1 means nonsensical, other values | ||
557 | * are event codes. | ||
558 | */ | ||
559 | static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
560 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
561 | [C(OP_READ)] = { 0x8c10, 0x3c10 }, | ||
562 | [C(OP_WRITE)] = { 0x7c10, 0xc13 }, | ||
563 | [C(OP_PREFETCH)] = { 0xc35, 0 }, | ||
564 | }, | ||
565 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
566 | [C(OP_READ)] = { 0, 0 }, | ||
567 | [C(OP_WRITE)] = { -1, -1 }, | ||
568 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
569 | }, | ||
570 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
571 | [C(OP_READ)] = { 0, 0 }, | ||
572 | [C(OP_WRITE)] = { 0, 0 }, | ||
573 | [C(OP_PREFETCH)] = { 0xc34, 0 }, | ||
574 | }, | ||
575 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
576 | [C(OP_READ)] = { 0, 0x904 }, | ||
577 | [C(OP_WRITE)] = { -1, -1 }, | ||
578 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
579 | }, | ||
580 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
581 | [C(OP_READ)] = { 0, 0x900 }, | ||
582 | [C(OP_WRITE)] = { -1, -1 }, | ||
583 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
584 | }, | ||
585 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
586 | [C(OP_READ)] = { 0x330, 0x331 }, | ||
587 | [C(OP_WRITE)] = { -1, -1 }, | ||
588 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
589 | }, | ||
590 | [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
591 | [C(OP_READ)] = { -1, -1 }, | ||
592 | [C(OP_WRITE)] = { -1, -1 }, | ||
593 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
594 | }, | ||
595 | }; | ||
596 | |||
597 | static struct power_pmu power4_pmu = { | ||
598 | .name = "POWER4/4+", | ||
599 | .n_counter = 8, | ||
600 | .max_alternatives = 5, | ||
601 | .add_fields = 0x0000001100005555ul, | ||
602 | .test_adder = 0x0011083300000000ul, | ||
603 | .compute_mmcr = p4_compute_mmcr, | ||
604 | .get_constraint = p4_get_constraint, | ||
605 | .get_alternatives = p4_get_alternatives, | ||
606 | .disable_pmc = p4_disable_pmc, | ||
607 | .n_generic = ARRAY_SIZE(p4_generic_events), | ||
608 | .generic_events = p4_generic_events, | ||
609 | .cache_events = &power4_cache_events, | ||
610 | }; | ||
611 | |||
612 | static int __init init_power4_pmu(void) | ||
613 | { | ||
614 | if (!cur_cpu_spec->oprofile_cpu_type || | ||
615 | strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power4")) | ||
616 | return -ENODEV; | ||
617 | |||
618 | return register_power_pmu(&power4_pmu); | ||
619 | } | ||
620 | |||
621 | early_initcall(init_power4_pmu); | ||
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c deleted file mode 100644 index a8757baa28f3..000000000000 --- a/arch/powerpc/kernel/power5+-pmu.c +++ /dev/null | |||
@@ -1,690 +0,0 @@ | |||
1 | /* | ||
2 | * Performance counter support for POWER5+/++ (not POWER5) processors. | ||
3 | * | ||
4 | * Copyright 2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/perf_event.h> | ||
13 | #include <linux/string.h> | ||
14 | #include <asm/reg.h> | ||
15 | #include <asm/cputable.h> | ||
16 | |||
17 | /* | ||
18 | * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3) | ||
19 | */ | ||
20 | #define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ | ||
21 | #define PM_PMC_MSK 0xf | ||
22 | #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) | ||
23 | #define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */ | ||
24 | #define PM_UNIT_MSK 0xf | ||
25 | #define PM_BYTE_SH 12 /* Byte number of event bus to use */ | ||
26 | #define PM_BYTE_MSK 7 | ||
27 | #define PM_GRS_SH 8 /* Storage subsystem mux select */ | ||
28 | #define PM_GRS_MSK 7 | ||
29 | #define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */ | ||
30 | #define PM_PMCSEL_MSK 0x7f | ||
31 | |||
32 | /* Values in PM_UNIT field */ | ||
33 | #define PM_FPU 0 | ||
34 | #define PM_ISU0 1 | ||
35 | #define PM_IFU 2 | ||
36 | #define PM_ISU1 3 | ||
37 | #define PM_IDU 4 | ||
38 | #define PM_ISU0_ALT 6 | ||
39 | #define PM_GRS 7 | ||
40 | #define PM_LSU0 8 | ||
41 | #define PM_LSU1 0xc | ||
42 | #define PM_LASTUNIT 0xc | ||
43 | |||
44 | /* | ||
45 | * Bits in MMCR1 for POWER5+ | ||
46 | */ | ||
47 | #define MMCR1_TTM0SEL_SH 62 | ||
48 | #define MMCR1_TTM1SEL_SH 60 | ||
49 | #define MMCR1_TTM2SEL_SH 58 | ||
50 | #define MMCR1_TTM3SEL_SH 56 | ||
51 | #define MMCR1_TTMSEL_MSK 3 | ||
52 | #define MMCR1_TD_CP_DBG0SEL_SH 54 | ||
53 | #define MMCR1_TD_CP_DBG1SEL_SH 52 | ||
54 | #define MMCR1_TD_CP_DBG2SEL_SH 50 | ||
55 | #define MMCR1_TD_CP_DBG3SEL_SH 48 | ||
56 | #define MMCR1_GRS_L2SEL_SH 46 | ||
57 | #define MMCR1_GRS_L2SEL_MSK 3 | ||
58 | #define MMCR1_GRS_L3SEL_SH 44 | ||
59 | #define MMCR1_GRS_L3SEL_MSK 3 | ||
60 | #define MMCR1_GRS_MCSEL_SH 41 | ||
61 | #define MMCR1_GRS_MCSEL_MSK 7 | ||
62 | #define MMCR1_GRS_FABSEL_SH 39 | ||
63 | #define MMCR1_GRS_FABSEL_MSK 3 | ||
64 | #define MMCR1_PMC1_ADDER_SEL_SH 35 | ||
65 | #define MMCR1_PMC2_ADDER_SEL_SH 34 | ||
66 | #define MMCR1_PMC3_ADDER_SEL_SH 33 | ||
67 | #define MMCR1_PMC4_ADDER_SEL_SH 32 | ||
68 | #define MMCR1_PMC1SEL_SH 25 | ||
69 | #define MMCR1_PMC2SEL_SH 17 | ||
70 | #define MMCR1_PMC3SEL_SH 9 | ||
71 | #define MMCR1_PMC4SEL_SH 1 | ||
72 | #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) | ||
73 | #define MMCR1_PMCSEL_MSK 0x7f | ||
74 | |||
75 | /* | ||
76 | * Layout of constraint bits: | ||
77 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
78 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
79 | * [ ><><>< ><> <><>[ > < >< >< >< ><><><><><><> | ||
80 | * NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P6P5P4P3P2P1 | ||
81 | * | ||
82 | * NC - number of counters | ||
83 | * 51: NC error 0x0008_0000_0000_0000 | ||
84 | * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000 | ||
85 | * | ||
86 | * G0..G3 - GRS mux constraints | ||
87 | * 46-47: GRS_L2SEL value | ||
88 | * 44-45: GRS_L3SEL value | ||
89 | * 41-44: GRS_MCSEL value | ||
90 | * 39-40: GRS_FABSEL value | ||
91 | * Note that these match up with their bit positions in MMCR1 | ||
92 | * | ||
93 | * T0 - TTM0 constraint | ||
94 | * 36-37: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0x30_0000_0000 | ||
95 | * | ||
96 | * T1 - TTM1 constraint | ||
97 | * 34-35: TTM1SEL value (0=IDU, 3=GRS) 0x0c_0000_0000 | ||
98 | * | ||
99 | * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS | ||
100 | * 33: UC3 error 0x02_0000_0000 | ||
101 | * 32: FPU|IFU|ISU1 events needed 0x01_0000_0000 | ||
102 | * 31: ISU0 events needed 0x01_8000_0000 | ||
103 | * 30: IDU|GRS events needed 0x00_4000_0000 | ||
104 | * | ||
105 | * B0 | ||
106 | * 24-27: Byte 0 event source 0x0f00_0000 | ||
107 | * Encoding as for the event code | ||
108 | * | ||
109 | * B1, B2, B3 | ||
110 | * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources | ||
111 | * | ||
112 | * P6 | ||
113 | * 11: P6 error 0x800 | ||
114 | * 10-11: Count of events needing PMC6 | ||
115 | * | ||
116 | * P1..P5 | ||
117 | * 0-9: Count of events needing PMC1..PMC5 | ||
118 | */ | ||
119 | |||
120 | static const int grsel_shift[8] = { | ||
121 | MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, | ||
122 | MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, | ||
123 | MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH | ||
124 | }; | ||
125 | |||
126 | /* Masks and values for using events from the various units */ | ||
127 | static unsigned long unit_cons[PM_LASTUNIT+1][2] = { | ||
128 | [PM_FPU] = { 0x3200000000ul, 0x0100000000ul }, | ||
129 | [PM_ISU0] = { 0x0200000000ul, 0x0080000000ul }, | ||
130 | [PM_ISU1] = { 0x3200000000ul, 0x3100000000ul }, | ||
131 | [PM_IFU] = { 0x3200000000ul, 0x2100000000ul }, | ||
132 | [PM_IDU] = { 0x0e00000000ul, 0x0040000000ul }, | ||
133 | [PM_GRS] = { 0x0e00000000ul, 0x0c40000000ul }, | ||
134 | }; | ||
135 | |||
136 | static int power5p_get_constraint(u64 event, unsigned long *maskp, | ||
137 | unsigned long *valp) | ||
138 | { | ||
139 | int pmc, byte, unit, sh; | ||
140 | int bit, fmask; | ||
141 | unsigned long mask = 0, value = 0; | ||
142 | |||
143 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
144 | if (pmc) { | ||
145 | if (pmc > 6) | ||
146 | return -1; | ||
147 | sh = (pmc - 1) * 2; | ||
148 | mask |= 2 << sh; | ||
149 | value |= 1 << sh; | ||
150 | if (pmc >= 5 && !(event == 0x500009 || event == 0x600005)) | ||
151 | return -1; | ||
152 | } | ||
153 | if (event & PM_BUSEVENT_MSK) { | ||
154 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
155 | if (unit > PM_LASTUNIT) | ||
156 | return -1; | ||
157 | if (unit == PM_ISU0_ALT) | ||
158 | unit = PM_ISU0; | ||
159 | mask |= unit_cons[unit][0]; | ||
160 | value |= unit_cons[unit][1]; | ||
161 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
162 | if (byte >= 4) { | ||
163 | if (unit != PM_LSU1) | ||
164 | return -1; | ||
165 | /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */ | ||
166 | ++unit; | ||
167 | byte &= 3; | ||
168 | } | ||
169 | if (unit == PM_GRS) { | ||
170 | bit = event & 7; | ||
171 | fmask = (bit == 6)? 7: 3; | ||
172 | sh = grsel_shift[bit]; | ||
173 | mask |= (unsigned long)fmask << sh; | ||
174 | value |= (unsigned long)((event >> PM_GRS_SH) & fmask) | ||
175 | << sh; | ||
176 | } | ||
177 | /* Set byte lane select field */ | ||
178 | mask |= 0xfUL << (24 - 4 * byte); | ||
179 | value |= (unsigned long)unit << (24 - 4 * byte); | ||
180 | } | ||
181 | if (pmc < 5) { | ||
182 | /* need a counter from PMC1-4 set */ | ||
183 | mask |= 0x8000000000000ul; | ||
184 | value |= 0x1000000000000ul; | ||
185 | } | ||
186 | *maskp = mask; | ||
187 | *valp = value; | ||
188 | return 0; | ||
189 | } | ||
190 | |||
191 | static int power5p_limited_pmc_event(u64 event) | ||
192 | { | ||
193 | int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
194 | |||
195 | return pmc == 5 || pmc == 6; | ||
196 | } | ||
197 | |||
198 | #define MAX_ALT 3 /* at most 3 alternatives for any event */ | ||
199 | |||
200 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
201 | { 0x100c0, 0x40001f }, /* PM_GCT_FULL_CYC */ | ||
202 | { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */ | ||
203 | { 0x230e2, 0x323087 }, /* PM_BR_PRED_CR */ | ||
204 | { 0x230e3, 0x223087, 0x3230a0 }, /* PM_BR_PRED_TA */ | ||
205 | { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */ | ||
206 | { 0x800c4, 0xc20e0 }, /* PM_DTLB_MISS */ | ||
207 | { 0xc50c6, 0xc60e0 }, /* PM_MRK_DTLB_MISS */ | ||
208 | { 0x100005, 0x600005 }, /* PM_RUN_CYC */ | ||
209 | { 0x100009, 0x200009 }, /* PM_INST_CMPL */ | ||
210 | { 0x200015, 0x300015 }, /* PM_LSU_LMQ_SRQ_EMPTY_CYC */ | ||
211 | { 0x300009, 0x400009 }, /* PM_INST_DISP */ | ||
212 | }; | ||
213 | |||
214 | /* | ||
215 | * Scan the alternatives table for a match and return the | ||
216 | * index into the alternatives table if found, else -1. | ||
217 | */ | ||
218 | static int find_alternative(unsigned int event) | ||
219 | { | ||
220 | int i, j; | ||
221 | |||
222 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
223 | if (event < event_alternatives[i][0]) | ||
224 | break; | ||
225 | for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) | ||
226 | if (event == event_alternatives[i][j]) | ||
227 | return i; | ||
228 | } | ||
229 | return -1; | ||
230 | } | ||
231 | |||
232 | static const unsigned char bytedecode_alternatives[4][4] = { | ||
233 | /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 }, | ||
234 | /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e }, | ||
235 | /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 }, | ||
236 | /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e } | ||
237 | }; | ||
238 | |||
239 | /* | ||
240 | * Some direct events for decodes of event bus byte 3 have alternative | ||
241 | * PMCSEL values on other counters. This returns the alternative | ||
242 | * event code for those that do, or -1 otherwise. This also handles | ||
243 | * alternative PCMSEL values for add events. | ||
244 | */ | ||
245 | static s64 find_alternative_bdecode(u64 event) | ||
246 | { | ||
247 | int pmc, altpmc, pp, j; | ||
248 | |||
249 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
250 | if (pmc == 0 || pmc > 4) | ||
251 | return -1; | ||
252 | altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */ | ||
253 | pp = event & PM_PMCSEL_MSK; | ||
254 | for (j = 0; j < 4; ++j) { | ||
255 | if (bytedecode_alternatives[pmc - 1][j] == pp) { | ||
256 | return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) | | ||
257 | (altpmc << PM_PMC_SH) | | ||
258 | bytedecode_alternatives[altpmc - 1][j]; | ||
259 | } | ||
260 | } | ||
261 | |||
262 | /* new decode alternatives for power5+ */ | ||
263 | if (pmc == 1 && (pp == 0x0d || pp == 0x0e)) | ||
264 | return event + (2 << PM_PMC_SH) + (0x2e - 0x0d); | ||
265 | if (pmc == 3 && (pp == 0x2e || pp == 0x2f)) | ||
266 | return event - (2 << PM_PMC_SH) - (0x2e - 0x0d); | ||
267 | |||
268 | /* alternative add event encodings */ | ||
269 | if (pp == 0x10 || pp == 0x28) | ||
270 | return ((event ^ (0x10 ^ 0x28)) & ~PM_PMC_MSKS) | | ||
271 | (altpmc << PM_PMC_SH); | ||
272 | |||
273 | return -1; | ||
274 | } | ||
275 | |||
276 | static int power5p_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
277 | { | ||
278 | int i, j, nalt = 1; | ||
279 | int nlim; | ||
280 | s64 ae; | ||
281 | |||
282 | alt[0] = event; | ||
283 | nalt = 1; | ||
284 | nlim = power5p_limited_pmc_event(event); | ||
285 | i = find_alternative(event); | ||
286 | if (i >= 0) { | ||
287 | for (j = 0; j < MAX_ALT; ++j) { | ||
288 | ae = event_alternatives[i][j]; | ||
289 | if (ae && ae != event) | ||
290 | alt[nalt++] = ae; | ||
291 | nlim += power5p_limited_pmc_event(ae); | ||
292 | } | ||
293 | } else { | ||
294 | ae = find_alternative_bdecode(event); | ||
295 | if (ae > 0) | ||
296 | alt[nalt++] = ae; | ||
297 | } | ||
298 | |||
299 | if (flags & PPMU_ONLY_COUNT_RUN) { | ||
300 | /* | ||
301 | * We're only counting in RUN state, | ||
302 | * so PM_CYC is equivalent to PM_RUN_CYC | ||
303 | * and PM_INST_CMPL === PM_RUN_INST_CMPL. | ||
304 | * This doesn't include alternatives that don't provide | ||
305 | * any extra flexibility in assigning PMCs (e.g. | ||
306 | * 0x100005 for PM_RUN_CYC vs. 0xf for PM_CYC). | ||
307 | * Note that even with these additional alternatives | ||
308 | * we never end up with more than 3 alternatives for any event. | ||
309 | */ | ||
310 | j = nalt; | ||
311 | for (i = 0; i < nalt; ++i) { | ||
312 | switch (alt[i]) { | ||
313 | case 0xf: /* PM_CYC */ | ||
314 | alt[j++] = 0x600005; /* PM_RUN_CYC */ | ||
315 | ++nlim; | ||
316 | break; | ||
317 | case 0x600005: /* PM_RUN_CYC */ | ||
318 | alt[j++] = 0xf; | ||
319 | break; | ||
320 | case 0x100009: /* PM_INST_CMPL */ | ||
321 | alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */ | ||
322 | ++nlim; | ||
323 | break; | ||
324 | case 0x500009: /* PM_RUN_INST_CMPL */ | ||
325 | alt[j++] = 0x100009; /* PM_INST_CMPL */ | ||
326 | alt[j++] = 0x200009; | ||
327 | break; | ||
328 | } | ||
329 | } | ||
330 | nalt = j; | ||
331 | } | ||
332 | |||
333 | if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) { | ||
334 | /* remove the limited PMC events */ | ||
335 | j = 0; | ||
336 | for (i = 0; i < nalt; ++i) { | ||
337 | if (!power5p_limited_pmc_event(alt[i])) { | ||
338 | alt[j] = alt[i]; | ||
339 | ++j; | ||
340 | } | ||
341 | } | ||
342 | nalt = j; | ||
343 | } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) { | ||
344 | /* remove all but the limited PMC events */ | ||
345 | j = 0; | ||
346 | for (i = 0; i < nalt; ++i) { | ||
347 | if (power5p_limited_pmc_event(alt[i])) { | ||
348 | alt[j] = alt[i]; | ||
349 | ++j; | ||
350 | } | ||
351 | } | ||
352 | nalt = j; | ||
353 | } | ||
354 | |||
355 | return nalt; | ||
356 | } | ||
357 | |||
358 | /* | ||
359 | * Map of which direct events on which PMCs are marked instruction events. | ||
360 | * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event. | ||
361 | * Bit 0 is set if it is marked for all PMCs. | ||
362 | * The 0x80 bit indicates a byte decode PMCSEL value. | ||
363 | */ | ||
364 | static unsigned char direct_event_is_marked[0x28] = { | ||
365 | 0, /* 00 */ | ||
366 | 0x1f, /* 01 PM_IOPS_CMPL */ | ||
367 | 0x2, /* 02 PM_MRK_GRP_DISP */ | ||
368 | 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ | ||
369 | 0, /* 04 */ | ||
370 | 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */ | ||
371 | 0x80, /* 06 */ | ||
372 | 0x80, /* 07 */ | ||
373 | 0, 0, 0,/* 08 - 0a */ | ||
374 | 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */ | ||
375 | 0, /* 0c */ | ||
376 | 0x80, /* 0d */ | ||
377 | 0x80, /* 0e */ | ||
378 | 0, /* 0f */ | ||
379 | 0, /* 10 */ | ||
380 | 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */ | ||
381 | 0, /* 12 */ | ||
382 | 0x10, /* 13 PM_MRK_GRP_CMPL */ | ||
383 | 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */ | ||
384 | 0x2, /* 15 PM_MRK_GRP_ISSUED */ | ||
385 | 0x80, /* 16 */ | ||
386 | 0x80, /* 17 */ | ||
387 | 0, 0, 0, 0, 0, | ||
388 | 0x80, /* 1d */ | ||
389 | 0x80, /* 1e */ | ||
390 | 0, /* 1f */ | ||
391 | 0x80, /* 20 */ | ||
392 | 0x80, /* 21 */ | ||
393 | 0x80, /* 22 */ | ||
394 | 0x80, /* 23 */ | ||
395 | 0x80, /* 24 */ | ||
396 | 0x80, /* 25 */ | ||
397 | 0x80, /* 26 */ | ||
398 | 0x80, /* 27 */ | ||
399 | }; | ||
400 | |||
401 | /* | ||
402 | * Returns 1 if event counts things relating to marked instructions | ||
403 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
404 | */ | ||
405 | static int power5p_marked_instr_event(u64 event) | ||
406 | { | ||
407 | int pmc, psel; | ||
408 | int bit, byte, unit; | ||
409 | u32 mask; | ||
410 | |||
411 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
412 | psel = event & PM_PMCSEL_MSK; | ||
413 | if (pmc >= 5) | ||
414 | return 0; | ||
415 | |||
416 | bit = -1; | ||
417 | if (psel < sizeof(direct_event_is_marked)) { | ||
418 | if (direct_event_is_marked[psel] & (1 << pmc)) | ||
419 | return 1; | ||
420 | if (direct_event_is_marked[psel] & 0x80) | ||
421 | bit = 4; | ||
422 | else if (psel == 0x08) | ||
423 | bit = pmc - 1; | ||
424 | else if (psel == 0x10) | ||
425 | bit = 4 - pmc; | ||
426 | else if (psel == 0x1b && (pmc == 1 || pmc == 3)) | ||
427 | bit = 4; | ||
428 | } else if ((psel & 0x48) == 0x40) { | ||
429 | bit = psel & 7; | ||
430 | } else if (psel == 0x28) { | ||
431 | bit = pmc - 1; | ||
432 | } else if (pmc == 3 && (psel == 0x2e || psel == 0x2f)) { | ||
433 | bit = 4; | ||
434 | } | ||
435 | |||
436 | if (!(event & PM_BUSEVENT_MSK) || bit == -1) | ||
437 | return 0; | ||
438 | |||
439 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
440 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
441 | if (unit == PM_LSU0) { | ||
442 | /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */ | ||
443 | mask = 0x5dff00; | ||
444 | } else if (unit == PM_LSU1 && byte >= 4) { | ||
445 | byte -= 4; | ||
446 | /* byte 5 bits 6-7, byte 6 bits 0,4, byte 7 bits 0-4,6 */ | ||
447 | mask = 0x5f11c000; | ||
448 | } else | ||
449 | return 0; | ||
450 | |||
451 | return (mask >> (byte * 8 + bit)) & 1; | ||
452 | } | ||
453 | |||
454 | static int power5p_compute_mmcr(u64 event[], int n_ev, | ||
455 | unsigned int hwc[], unsigned long mmcr[]) | ||
456 | { | ||
457 | unsigned long mmcr1 = 0; | ||
458 | unsigned long mmcra = 0; | ||
459 | unsigned int pmc, unit, byte, psel; | ||
460 | unsigned int ttm; | ||
461 | int i, isbus, bit, grsel; | ||
462 | unsigned int pmc_inuse = 0; | ||
463 | unsigned char busbyte[4]; | ||
464 | unsigned char unituse[16]; | ||
465 | int ttmuse; | ||
466 | |||
467 | if (n_ev > 6) | ||
468 | return -1; | ||
469 | |||
470 | /* First pass to count resource use */ | ||
471 | memset(busbyte, 0, sizeof(busbyte)); | ||
472 | memset(unituse, 0, sizeof(unituse)); | ||
473 | for (i = 0; i < n_ev; ++i) { | ||
474 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
475 | if (pmc) { | ||
476 | if (pmc > 6) | ||
477 | return -1; | ||
478 | if (pmc_inuse & (1 << (pmc - 1))) | ||
479 | return -1; | ||
480 | pmc_inuse |= 1 << (pmc - 1); | ||
481 | } | ||
482 | if (event[i] & PM_BUSEVENT_MSK) { | ||
483 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
484 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
485 | if (unit > PM_LASTUNIT) | ||
486 | return -1; | ||
487 | if (unit == PM_ISU0_ALT) | ||
488 | unit = PM_ISU0; | ||
489 | if (byte >= 4) { | ||
490 | if (unit != PM_LSU1) | ||
491 | return -1; | ||
492 | ++unit; | ||
493 | byte &= 3; | ||
494 | } | ||
495 | if (busbyte[byte] && busbyte[byte] != unit) | ||
496 | return -1; | ||
497 | busbyte[byte] = unit; | ||
498 | unituse[unit] = 1; | ||
499 | } | ||
500 | } | ||
501 | |||
502 | /* | ||
503 | * Assign resources and set multiplexer selects. | ||
504 | * | ||
505 | * PM_ISU0 can go either on TTM0 or TTM1, but that's the only | ||
506 | * choice we have to deal with. | ||
507 | */ | ||
508 | if (unituse[PM_ISU0] & | ||
509 | (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) { | ||
510 | unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */ | ||
511 | unituse[PM_ISU0] = 0; | ||
512 | } | ||
513 | /* Set TTM[01]SEL fields. */ | ||
514 | ttmuse = 0; | ||
515 | for (i = PM_FPU; i <= PM_ISU1; ++i) { | ||
516 | if (!unituse[i]) | ||
517 | continue; | ||
518 | if (ttmuse++) | ||
519 | return -1; | ||
520 | mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH; | ||
521 | } | ||
522 | ttmuse = 0; | ||
523 | for (; i <= PM_GRS; ++i) { | ||
524 | if (!unituse[i]) | ||
525 | continue; | ||
526 | if (ttmuse++) | ||
527 | return -1; | ||
528 | mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH; | ||
529 | } | ||
530 | if (ttmuse > 1) | ||
531 | return -1; | ||
532 | |||
533 | /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */ | ||
534 | for (byte = 0; byte < 4; ++byte) { | ||
535 | unit = busbyte[byte]; | ||
536 | if (!unit) | ||
537 | continue; | ||
538 | if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) { | ||
539 | /* get ISU0 through TTM1 rather than TTM0 */ | ||
540 | unit = PM_ISU0_ALT; | ||
541 | } else if (unit == PM_LSU1 + 1) { | ||
542 | /* select lower word of LSU1 for this byte */ | ||
543 | mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte); | ||
544 | } | ||
545 | ttm = unit >> 2; | ||
546 | mmcr1 |= (unsigned long)ttm | ||
547 | << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | ||
548 | } | ||
549 | |||
550 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | ||
551 | for (i = 0; i < n_ev; ++i) { | ||
552 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
553 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
554 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
555 | psel = event[i] & PM_PMCSEL_MSK; | ||
556 | isbus = event[i] & PM_BUSEVENT_MSK; | ||
557 | if (!pmc) { | ||
558 | /* Bus event or any-PMC direct event */ | ||
559 | for (pmc = 0; pmc < 4; ++pmc) { | ||
560 | if (!(pmc_inuse & (1 << pmc))) | ||
561 | break; | ||
562 | } | ||
563 | if (pmc >= 4) | ||
564 | return -1; | ||
565 | pmc_inuse |= 1 << pmc; | ||
566 | } else if (pmc <= 4) { | ||
567 | /* Direct event */ | ||
568 | --pmc; | ||
569 | if (isbus && (byte & 2) && | ||
570 | (psel == 8 || psel == 0x10 || psel == 0x28)) | ||
571 | /* add events on higher-numbered bus */ | ||
572 | mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc); | ||
573 | } else { | ||
574 | /* Instructions or run cycles on PMC5/6 */ | ||
575 | --pmc; | ||
576 | } | ||
577 | if (isbus && unit == PM_GRS) { | ||
578 | bit = psel & 7; | ||
579 | grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; | ||
580 | mmcr1 |= (unsigned long)grsel << grsel_shift[bit]; | ||
581 | } | ||
582 | if (power5p_marked_instr_event(event[i])) | ||
583 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
584 | if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1)) | ||
585 | /* select alternate byte lane */ | ||
586 | psel |= 0x10; | ||
587 | if (pmc <= 3) | ||
588 | mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); | ||
589 | hwc[i] = pmc; | ||
590 | } | ||
591 | |||
592 | /* Return MMCRx values */ | ||
593 | mmcr[0] = 0; | ||
594 | if (pmc_inuse & 1) | ||
595 | mmcr[0] = MMCR0_PMC1CE; | ||
596 | if (pmc_inuse & 0x3e) | ||
597 | mmcr[0] |= MMCR0_PMCjCE; | ||
598 | mmcr[1] = mmcr1; | ||
599 | mmcr[2] = mmcra; | ||
600 | return 0; | ||
601 | } | ||
602 | |||
603 | static void power5p_disable_pmc(unsigned int pmc, unsigned long mmcr[]) | ||
604 | { | ||
605 | if (pmc <= 3) | ||
606 | mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); | ||
607 | } | ||
608 | |||
609 | static int power5p_generic_events[] = { | ||
610 | [PERF_COUNT_HW_CPU_CYCLES] = 0xf, | ||
611 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009, | ||
612 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x1c10a8, /* LD_REF_L1 */ | ||
613 | [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ | ||
614 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ | ||
615 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ | ||
616 | }; | ||
617 | |||
618 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
619 | |||
620 | /* | ||
621 | * Table of generalized cache-related events. | ||
622 | * 0 means not supported, -1 means nonsensical, other values | ||
623 | * are event codes. | ||
624 | */ | ||
625 | static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
626 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
627 | [C(OP_READ)] = { 0x1c10a8, 0x3c1088 }, | ||
628 | [C(OP_WRITE)] = { 0x2c10a8, 0xc10c3 }, | ||
629 | [C(OP_PREFETCH)] = { 0xc70e7, -1 }, | ||
630 | }, | ||
631 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
632 | [C(OP_READ)] = { 0, 0 }, | ||
633 | [C(OP_WRITE)] = { -1, -1 }, | ||
634 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
635 | }, | ||
636 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
637 | [C(OP_READ)] = { 0, 0 }, | ||
638 | [C(OP_WRITE)] = { 0, 0 }, | ||
639 | [C(OP_PREFETCH)] = { 0xc50c3, 0 }, | ||
640 | }, | ||
641 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
642 | [C(OP_READ)] = { 0xc20e4, 0x800c4 }, | ||
643 | [C(OP_WRITE)] = { -1, -1 }, | ||
644 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
645 | }, | ||
646 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
647 | [C(OP_READ)] = { 0, 0x800c0 }, | ||
648 | [C(OP_WRITE)] = { -1, -1 }, | ||
649 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
650 | }, | ||
651 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
652 | [C(OP_READ)] = { 0x230e4, 0x230e5 }, | ||
653 | [C(OP_WRITE)] = { -1, -1 }, | ||
654 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
655 | }, | ||
656 | [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
657 | [C(OP_READ)] = { -1, -1 }, | ||
658 | [C(OP_WRITE)] = { -1, -1 }, | ||
659 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
660 | }, | ||
661 | }; | ||
662 | |||
663 | static struct power_pmu power5p_pmu = { | ||
664 | .name = "POWER5+/++", | ||
665 | .n_counter = 6, | ||
666 | .max_alternatives = MAX_ALT, | ||
667 | .add_fields = 0x7000000000055ul, | ||
668 | .test_adder = 0x3000040000000ul, | ||
669 | .compute_mmcr = power5p_compute_mmcr, | ||
670 | .get_constraint = power5p_get_constraint, | ||
671 | .get_alternatives = power5p_get_alternatives, | ||
672 | .disable_pmc = power5p_disable_pmc, | ||
673 | .limited_pmc_event = power5p_limited_pmc_event, | ||
674 | .flags = PPMU_LIMITED_PMC5_6, | ||
675 | .n_generic = ARRAY_SIZE(power5p_generic_events), | ||
676 | .generic_events = power5p_generic_events, | ||
677 | .cache_events = &power5p_cache_events, | ||
678 | }; | ||
679 | |||
680 | static int __init init_power5p_pmu(void) | ||
681 | { | ||
682 | if (!cur_cpu_spec->oprofile_cpu_type || | ||
683 | (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+") | ||
684 | && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5++"))) | ||
685 | return -ENODEV; | ||
686 | |||
687 | return register_power_pmu(&power5p_pmu); | ||
688 | } | ||
689 | |||
690 | early_initcall(init_power5p_pmu); | ||
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c deleted file mode 100644 index e7f06eb7a861..000000000000 --- a/arch/powerpc/kernel/power5-pmu.c +++ /dev/null | |||
@@ -1,629 +0,0 @@ | |||
1 | /* | ||
2 | * Performance counter support for POWER5 (not POWER5++) processors. | ||
3 | * | ||
4 | * Copyright 2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/perf_event.h> | ||
13 | #include <linux/string.h> | ||
14 | #include <asm/reg.h> | ||
15 | #include <asm/cputable.h> | ||
16 | |||
17 | /* | ||
18 | * Bits in event code for POWER5 (not POWER5++) | ||
19 | */ | ||
20 | #define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ | ||
21 | #define PM_PMC_MSK 0xf | ||
22 | #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) | ||
23 | #define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */ | ||
24 | #define PM_UNIT_MSK 0xf | ||
25 | #define PM_BYTE_SH 12 /* Byte number of event bus to use */ | ||
26 | #define PM_BYTE_MSK 7 | ||
27 | #define PM_GRS_SH 8 /* Storage subsystem mux select */ | ||
28 | #define PM_GRS_MSK 7 | ||
29 | #define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */ | ||
30 | #define PM_PMCSEL_MSK 0x7f | ||
31 | |||
32 | /* Values in PM_UNIT field */ | ||
33 | #define PM_FPU 0 | ||
34 | #define PM_ISU0 1 | ||
35 | #define PM_IFU 2 | ||
36 | #define PM_ISU1 3 | ||
37 | #define PM_IDU 4 | ||
38 | #define PM_ISU0_ALT 6 | ||
39 | #define PM_GRS 7 | ||
40 | #define PM_LSU0 8 | ||
41 | #define PM_LSU1 0xc | ||
42 | #define PM_LASTUNIT 0xc | ||
43 | |||
44 | /* | ||
45 | * Bits in MMCR1 for POWER5 | ||
46 | */ | ||
47 | #define MMCR1_TTM0SEL_SH 62 | ||
48 | #define MMCR1_TTM1SEL_SH 60 | ||
49 | #define MMCR1_TTM2SEL_SH 58 | ||
50 | #define MMCR1_TTM3SEL_SH 56 | ||
51 | #define MMCR1_TTMSEL_MSK 3 | ||
52 | #define MMCR1_TD_CP_DBG0SEL_SH 54 | ||
53 | #define MMCR1_TD_CP_DBG1SEL_SH 52 | ||
54 | #define MMCR1_TD_CP_DBG2SEL_SH 50 | ||
55 | #define MMCR1_TD_CP_DBG3SEL_SH 48 | ||
56 | #define MMCR1_GRS_L2SEL_SH 46 | ||
57 | #define MMCR1_GRS_L2SEL_MSK 3 | ||
58 | #define MMCR1_GRS_L3SEL_SH 44 | ||
59 | #define MMCR1_GRS_L3SEL_MSK 3 | ||
60 | #define MMCR1_GRS_MCSEL_SH 41 | ||
61 | #define MMCR1_GRS_MCSEL_MSK 7 | ||
62 | #define MMCR1_GRS_FABSEL_SH 39 | ||
63 | #define MMCR1_GRS_FABSEL_MSK 3 | ||
64 | #define MMCR1_PMC1_ADDER_SEL_SH 35 | ||
65 | #define MMCR1_PMC2_ADDER_SEL_SH 34 | ||
66 | #define MMCR1_PMC3_ADDER_SEL_SH 33 | ||
67 | #define MMCR1_PMC4_ADDER_SEL_SH 32 | ||
68 | #define MMCR1_PMC1SEL_SH 25 | ||
69 | #define MMCR1_PMC2SEL_SH 17 | ||
70 | #define MMCR1_PMC3SEL_SH 9 | ||
71 | #define MMCR1_PMC4SEL_SH 1 | ||
72 | #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) | ||
73 | #define MMCR1_PMCSEL_MSK 0x7f | ||
74 | |||
75 | /* | ||
76 | * Layout of constraint bits: | ||
77 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
78 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
79 | * <><>[ ><><>< ><> [ >[ >[ >< >< >< >< ><><><><><><> | ||
80 | * T0T1 NC G0G1G2 G3 UC PS1PS2 B0 B1 B2 B3 P6P5P4P3P2P1 | ||
81 | * | ||
82 | * T0 - TTM0 constraint | ||
83 | * 54-55: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0xc0_0000_0000_0000 | ||
84 | * | ||
85 | * T1 - TTM1 constraint | ||
86 | * 52-53: TTM1SEL value (0=IDU, 3=GRS) 0x30_0000_0000_0000 | ||
87 | * | ||
88 | * NC - number of counters | ||
89 | * 51: NC error 0x0008_0000_0000_0000 | ||
90 | * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000 | ||
91 | * | ||
92 | * G0..G3 - GRS mux constraints | ||
93 | * 46-47: GRS_L2SEL value | ||
94 | * 44-45: GRS_L3SEL value | ||
95 | * 41-44: GRS_MCSEL value | ||
96 | * 39-40: GRS_FABSEL value | ||
97 | * Note that these match up with their bit positions in MMCR1 | ||
98 | * | ||
99 | * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS | ||
100 | * 37: UC3 error 0x20_0000_0000 | ||
101 | * 36: FPU|IFU|ISU1 events needed 0x10_0000_0000 | ||
102 | * 35: ISU0 events needed 0x08_0000_0000 | ||
103 | * 34: IDU|GRS events needed 0x04_0000_0000 | ||
104 | * | ||
105 | * PS1 | ||
106 | * 33: PS1 error 0x2_0000_0000 | ||
107 | * 31-32: count of events needing PMC1/2 0x1_8000_0000 | ||
108 | * | ||
109 | * PS2 | ||
110 | * 30: PS2 error 0x4000_0000 | ||
111 | * 28-29: count of events needing PMC3/4 0x3000_0000 | ||
112 | * | ||
113 | * B0 | ||
114 | * 24-27: Byte 0 event source 0x0f00_0000 | ||
115 | * Encoding as for the event code | ||
116 | * | ||
117 | * B1, B2, B3 | ||
118 | * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources | ||
119 | * | ||
120 | * P1..P6 | ||
121 | * 0-11: Count of events needing PMC1..PMC6 | ||
122 | */ | ||
123 | |||
124 | static const int grsel_shift[8] = { | ||
125 | MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, | ||
126 | MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, | ||
127 | MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH | ||
128 | }; | ||
129 | |||
130 | /* Masks and values for using events from the various units */ | ||
131 | static unsigned long unit_cons[PM_LASTUNIT+1][2] = { | ||
132 | [PM_FPU] = { 0xc0002000000000ul, 0x00001000000000ul }, | ||
133 | [PM_ISU0] = { 0x00002000000000ul, 0x00000800000000ul }, | ||
134 | [PM_ISU1] = { 0xc0002000000000ul, 0xc0001000000000ul }, | ||
135 | [PM_IFU] = { 0xc0002000000000ul, 0x80001000000000ul }, | ||
136 | [PM_IDU] = { 0x30002000000000ul, 0x00000400000000ul }, | ||
137 | [PM_GRS] = { 0x30002000000000ul, 0x30000400000000ul }, | ||
138 | }; | ||
139 | |||
140 | static int power5_get_constraint(u64 event, unsigned long *maskp, | ||
141 | unsigned long *valp) | ||
142 | { | ||
143 | int pmc, byte, unit, sh; | ||
144 | int bit, fmask; | ||
145 | unsigned long mask = 0, value = 0; | ||
146 | int grp = -1; | ||
147 | |||
148 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
149 | if (pmc) { | ||
150 | if (pmc > 6) | ||
151 | return -1; | ||
152 | sh = (pmc - 1) * 2; | ||
153 | mask |= 2 << sh; | ||
154 | value |= 1 << sh; | ||
155 | if (pmc <= 4) | ||
156 | grp = (pmc - 1) >> 1; | ||
157 | else if (event != 0x500009 && event != 0x600005) | ||
158 | return -1; | ||
159 | } | ||
160 | if (event & PM_BUSEVENT_MSK) { | ||
161 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
162 | if (unit > PM_LASTUNIT) | ||
163 | return -1; | ||
164 | if (unit == PM_ISU0_ALT) | ||
165 | unit = PM_ISU0; | ||
166 | mask |= unit_cons[unit][0]; | ||
167 | value |= unit_cons[unit][1]; | ||
168 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
169 | if (byte >= 4) { | ||
170 | if (unit != PM_LSU1) | ||
171 | return -1; | ||
172 | /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */ | ||
173 | ++unit; | ||
174 | byte &= 3; | ||
175 | } | ||
176 | if (unit == PM_GRS) { | ||
177 | bit = event & 7; | ||
178 | fmask = (bit == 6)? 7: 3; | ||
179 | sh = grsel_shift[bit]; | ||
180 | mask |= (unsigned long)fmask << sh; | ||
181 | value |= (unsigned long)((event >> PM_GRS_SH) & fmask) | ||
182 | << sh; | ||
183 | } | ||
184 | /* | ||
185 | * Bus events on bytes 0 and 2 can be counted | ||
186 | * on PMC1/2; bytes 1 and 3 on PMC3/4. | ||
187 | */ | ||
188 | if (!pmc) | ||
189 | grp = byte & 1; | ||
190 | /* Set byte lane select field */ | ||
191 | mask |= 0xfUL << (24 - 4 * byte); | ||
192 | value |= (unsigned long)unit << (24 - 4 * byte); | ||
193 | } | ||
194 | if (grp == 0) { | ||
195 | /* increment PMC1/2 field */ | ||
196 | mask |= 0x200000000ul; | ||
197 | value |= 0x080000000ul; | ||
198 | } else if (grp == 1) { | ||
199 | /* increment PMC3/4 field */ | ||
200 | mask |= 0x40000000ul; | ||
201 | value |= 0x10000000ul; | ||
202 | } | ||
203 | if (pmc < 5) { | ||
204 | /* need a counter from PMC1-4 set */ | ||
205 | mask |= 0x8000000000000ul; | ||
206 | value |= 0x1000000000000ul; | ||
207 | } | ||
208 | *maskp = mask; | ||
209 | *valp = value; | ||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | #define MAX_ALT 3 /* at most 3 alternatives for any event */ | ||
214 | |||
215 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
216 | { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */ | ||
217 | { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */ | ||
218 | { 0x100005, 0x600005 }, /* PM_RUN_CYC */ | ||
219 | { 0x100009, 0x200009, 0x500009 }, /* PM_INST_CMPL */ | ||
220 | { 0x300009, 0x400009 }, /* PM_INST_DISP */ | ||
221 | }; | ||
222 | |||
223 | /* | ||
224 | * Scan the alternatives table for a match and return the | ||
225 | * index into the alternatives table if found, else -1. | ||
226 | */ | ||
227 | static int find_alternative(u64 event) | ||
228 | { | ||
229 | int i, j; | ||
230 | |||
231 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
232 | if (event < event_alternatives[i][0]) | ||
233 | break; | ||
234 | for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) | ||
235 | if (event == event_alternatives[i][j]) | ||
236 | return i; | ||
237 | } | ||
238 | return -1; | ||
239 | } | ||
240 | |||
241 | static const unsigned char bytedecode_alternatives[4][4] = { | ||
242 | /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 }, | ||
243 | /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e }, | ||
244 | /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 }, | ||
245 | /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e } | ||
246 | }; | ||
247 | |||
248 | /* | ||
249 | * Some direct events for decodes of event bus byte 3 have alternative | ||
250 | * PMCSEL values on other counters. This returns the alternative | ||
251 | * event code for those that do, or -1 otherwise. | ||
252 | */ | ||
253 | static s64 find_alternative_bdecode(u64 event) | ||
254 | { | ||
255 | int pmc, altpmc, pp, j; | ||
256 | |||
257 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
258 | if (pmc == 0 || pmc > 4) | ||
259 | return -1; | ||
260 | altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */ | ||
261 | pp = event & PM_PMCSEL_MSK; | ||
262 | for (j = 0; j < 4; ++j) { | ||
263 | if (bytedecode_alternatives[pmc - 1][j] == pp) { | ||
264 | return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) | | ||
265 | (altpmc << PM_PMC_SH) | | ||
266 | bytedecode_alternatives[altpmc - 1][j]; | ||
267 | } | ||
268 | } | ||
269 | return -1; | ||
270 | } | ||
271 | |||
272 | static int power5_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
273 | { | ||
274 | int i, j, nalt = 1; | ||
275 | s64 ae; | ||
276 | |||
277 | alt[0] = event; | ||
278 | nalt = 1; | ||
279 | i = find_alternative(event); | ||
280 | if (i >= 0) { | ||
281 | for (j = 0; j < MAX_ALT; ++j) { | ||
282 | ae = event_alternatives[i][j]; | ||
283 | if (ae && ae != event) | ||
284 | alt[nalt++] = ae; | ||
285 | } | ||
286 | } else { | ||
287 | ae = find_alternative_bdecode(event); | ||
288 | if (ae > 0) | ||
289 | alt[nalt++] = ae; | ||
290 | } | ||
291 | return nalt; | ||
292 | } | ||
293 | |||
294 | /* | ||
295 | * Map of which direct events on which PMCs are marked instruction events. | ||
296 | * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event. | ||
297 | * Bit 0 is set if it is marked for all PMCs. | ||
298 | * The 0x80 bit indicates a byte decode PMCSEL value. | ||
299 | */ | ||
300 | static unsigned char direct_event_is_marked[0x28] = { | ||
301 | 0, /* 00 */ | ||
302 | 0x1f, /* 01 PM_IOPS_CMPL */ | ||
303 | 0x2, /* 02 PM_MRK_GRP_DISP */ | ||
304 | 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ | ||
305 | 0, /* 04 */ | ||
306 | 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */ | ||
307 | 0x80, /* 06 */ | ||
308 | 0x80, /* 07 */ | ||
309 | 0, 0, 0,/* 08 - 0a */ | ||
310 | 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */ | ||
311 | 0, /* 0c */ | ||
312 | 0x80, /* 0d */ | ||
313 | 0x80, /* 0e */ | ||
314 | 0, /* 0f */ | ||
315 | 0, /* 10 */ | ||
316 | 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */ | ||
317 | 0, /* 12 */ | ||
318 | 0x10, /* 13 PM_MRK_GRP_CMPL */ | ||
319 | 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */ | ||
320 | 0x2, /* 15 PM_MRK_GRP_ISSUED */ | ||
321 | 0x80, /* 16 */ | ||
322 | 0x80, /* 17 */ | ||
323 | 0, 0, 0, 0, 0, | ||
324 | 0x80, /* 1d */ | ||
325 | 0x80, /* 1e */ | ||
326 | 0, /* 1f */ | ||
327 | 0x80, /* 20 */ | ||
328 | 0x80, /* 21 */ | ||
329 | 0x80, /* 22 */ | ||
330 | 0x80, /* 23 */ | ||
331 | 0x80, /* 24 */ | ||
332 | 0x80, /* 25 */ | ||
333 | 0x80, /* 26 */ | ||
334 | 0x80, /* 27 */ | ||
335 | }; | ||
336 | |||
337 | /* | ||
338 | * Returns 1 if event counts things relating to marked instructions | ||
339 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
340 | */ | ||
341 | static int power5_marked_instr_event(u64 event) | ||
342 | { | ||
343 | int pmc, psel; | ||
344 | int bit, byte, unit; | ||
345 | u32 mask; | ||
346 | |||
347 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
348 | psel = event & PM_PMCSEL_MSK; | ||
349 | if (pmc >= 5) | ||
350 | return 0; | ||
351 | |||
352 | bit = -1; | ||
353 | if (psel < sizeof(direct_event_is_marked)) { | ||
354 | if (direct_event_is_marked[psel] & (1 << pmc)) | ||
355 | return 1; | ||
356 | if (direct_event_is_marked[psel] & 0x80) | ||
357 | bit = 4; | ||
358 | else if (psel == 0x08) | ||
359 | bit = pmc - 1; | ||
360 | else if (psel == 0x10) | ||
361 | bit = 4 - pmc; | ||
362 | else if (psel == 0x1b && (pmc == 1 || pmc == 3)) | ||
363 | bit = 4; | ||
364 | } else if ((psel & 0x58) == 0x40) | ||
365 | bit = psel & 7; | ||
366 | |||
367 | if (!(event & PM_BUSEVENT_MSK)) | ||
368 | return 0; | ||
369 | |||
370 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
371 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
372 | if (unit == PM_LSU0) { | ||
373 | /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */ | ||
374 | mask = 0x5dff00; | ||
375 | } else if (unit == PM_LSU1 && byte >= 4) { | ||
376 | byte -= 4; | ||
377 | /* byte 4 bits 1,3,5,7, byte 5 bits 6-7, byte 7 bits 0-4,6 */ | ||
378 | mask = 0x5f00c0aa; | ||
379 | } else | ||
380 | return 0; | ||
381 | |||
382 | return (mask >> (byte * 8 + bit)) & 1; | ||
383 | } | ||
384 | |||
385 | static int power5_compute_mmcr(u64 event[], int n_ev, | ||
386 | unsigned int hwc[], unsigned long mmcr[]) | ||
387 | { | ||
388 | unsigned long mmcr1 = 0; | ||
389 | unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; | ||
390 | unsigned int pmc, unit, byte, psel; | ||
391 | unsigned int ttm, grp; | ||
392 | int i, isbus, bit, grsel; | ||
393 | unsigned int pmc_inuse = 0; | ||
394 | unsigned int pmc_grp_use[2]; | ||
395 | unsigned char busbyte[4]; | ||
396 | unsigned char unituse[16]; | ||
397 | int ttmuse; | ||
398 | |||
399 | if (n_ev > 6) | ||
400 | return -1; | ||
401 | |||
402 | /* First pass to count resource use */ | ||
403 | pmc_grp_use[0] = pmc_grp_use[1] = 0; | ||
404 | memset(busbyte, 0, sizeof(busbyte)); | ||
405 | memset(unituse, 0, sizeof(unituse)); | ||
406 | for (i = 0; i < n_ev; ++i) { | ||
407 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
408 | if (pmc) { | ||
409 | if (pmc > 6) | ||
410 | return -1; | ||
411 | if (pmc_inuse & (1 << (pmc - 1))) | ||
412 | return -1; | ||
413 | pmc_inuse |= 1 << (pmc - 1); | ||
414 | /* count 1/2 vs 3/4 use */ | ||
415 | if (pmc <= 4) | ||
416 | ++pmc_grp_use[(pmc - 1) >> 1]; | ||
417 | } | ||
418 | if (event[i] & PM_BUSEVENT_MSK) { | ||
419 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
420 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
421 | if (unit > PM_LASTUNIT) | ||
422 | return -1; | ||
423 | if (unit == PM_ISU0_ALT) | ||
424 | unit = PM_ISU0; | ||
425 | if (byte >= 4) { | ||
426 | if (unit != PM_LSU1) | ||
427 | return -1; | ||
428 | ++unit; | ||
429 | byte &= 3; | ||
430 | } | ||
431 | if (!pmc) | ||
432 | ++pmc_grp_use[byte & 1]; | ||
433 | if (busbyte[byte] && busbyte[byte] != unit) | ||
434 | return -1; | ||
435 | busbyte[byte] = unit; | ||
436 | unituse[unit] = 1; | ||
437 | } | ||
438 | } | ||
439 | if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2) | ||
440 | return -1; | ||
441 | |||
442 | /* | ||
443 | * Assign resources and set multiplexer selects. | ||
444 | * | ||
445 | * PM_ISU0 can go either on TTM0 or TTM1, but that's the only | ||
446 | * choice we have to deal with. | ||
447 | */ | ||
448 | if (unituse[PM_ISU0] & | ||
449 | (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) { | ||
450 | unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */ | ||
451 | unituse[PM_ISU0] = 0; | ||
452 | } | ||
453 | /* Set TTM[01]SEL fields. */ | ||
454 | ttmuse = 0; | ||
455 | for (i = PM_FPU; i <= PM_ISU1; ++i) { | ||
456 | if (!unituse[i]) | ||
457 | continue; | ||
458 | if (ttmuse++) | ||
459 | return -1; | ||
460 | mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH; | ||
461 | } | ||
462 | ttmuse = 0; | ||
463 | for (; i <= PM_GRS; ++i) { | ||
464 | if (!unituse[i]) | ||
465 | continue; | ||
466 | if (ttmuse++) | ||
467 | return -1; | ||
468 | mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH; | ||
469 | } | ||
470 | if (ttmuse > 1) | ||
471 | return -1; | ||
472 | |||
473 | /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */ | ||
474 | for (byte = 0; byte < 4; ++byte) { | ||
475 | unit = busbyte[byte]; | ||
476 | if (!unit) | ||
477 | continue; | ||
478 | if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) { | ||
479 | /* get ISU0 through TTM1 rather than TTM0 */ | ||
480 | unit = PM_ISU0_ALT; | ||
481 | } else if (unit == PM_LSU1 + 1) { | ||
482 | /* select lower word of LSU1 for this byte */ | ||
483 | mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte); | ||
484 | } | ||
485 | ttm = unit >> 2; | ||
486 | mmcr1 |= (unsigned long)ttm | ||
487 | << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | ||
488 | } | ||
489 | |||
490 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | ||
491 | for (i = 0; i < n_ev; ++i) { | ||
492 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
493 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
494 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
495 | psel = event[i] & PM_PMCSEL_MSK; | ||
496 | isbus = event[i] & PM_BUSEVENT_MSK; | ||
497 | if (!pmc) { | ||
498 | /* Bus event or any-PMC direct event */ | ||
499 | for (pmc = 0; pmc < 4; ++pmc) { | ||
500 | if (pmc_inuse & (1 << pmc)) | ||
501 | continue; | ||
502 | grp = (pmc >> 1) & 1; | ||
503 | if (isbus) { | ||
504 | if (grp == (byte & 1)) | ||
505 | break; | ||
506 | } else if (pmc_grp_use[grp] < 2) { | ||
507 | ++pmc_grp_use[grp]; | ||
508 | break; | ||
509 | } | ||
510 | } | ||
511 | pmc_inuse |= 1 << pmc; | ||
512 | } else if (pmc <= 4) { | ||
513 | /* Direct event */ | ||
514 | --pmc; | ||
515 | if ((psel == 8 || psel == 0x10) && isbus && (byte & 2)) | ||
516 | /* add events on higher-numbered bus */ | ||
517 | mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc); | ||
518 | } else { | ||
519 | /* Instructions or run cycles on PMC5/6 */ | ||
520 | --pmc; | ||
521 | } | ||
522 | if (isbus && unit == PM_GRS) { | ||
523 | bit = psel & 7; | ||
524 | grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; | ||
525 | mmcr1 |= (unsigned long)grsel << grsel_shift[bit]; | ||
526 | } | ||
527 | if (power5_marked_instr_event(event[i])) | ||
528 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
529 | if (pmc <= 3) | ||
530 | mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); | ||
531 | hwc[i] = pmc; | ||
532 | } | ||
533 | |||
534 | /* Return MMCRx values */ | ||
535 | mmcr[0] = 0; | ||
536 | if (pmc_inuse & 1) | ||
537 | mmcr[0] = MMCR0_PMC1CE; | ||
538 | if (pmc_inuse & 0x3e) | ||
539 | mmcr[0] |= MMCR0_PMCjCE; | ||
540 | mmcr[1] = mmcr1; | ||
541 | mmcr[2] = mmcra; | ||
542 | return 0; | ||
543 | } | ||
544 | |||
545 | static void power5_disable_pmc(unsigned int pmc, unsigned long mmcr[]) | ||
546 | { | ||
547 | if (pmc <= 3) | ||
548 | mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); | ||
549 | } | ||
550 | |||
551 | static int power5_generic_events[] = { | ||
552 | [PERF_COUNT_HW_CPU_CYCLES] = 0xf, | ||
553 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009, | ||
554 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4c1090, /* LD_REF_L1 */ | ||
555 | [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ | ||
556 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ | ||
557 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ | ||
558 | }; | ||
559 | |||
560 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
561 | |||
562 | /* | ||
563 | * Table of generalized cache-related events. | ||
564 | * 0 means not supported, -1 means nonsensical, other values | ||
565 | * are event codes. | ||
566 | */ | ||
567 | static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
568 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
569 | [C(OP_READ)] = { 0x4c1090, 0x3c1088 }, | ||
570 | [C(OP_WRITE)] = { 0x3c1090, 0xc10c3 }, | ||
571 | [C(OP_PREFETCH)] = { 0xc70e7, 0 }, | ||
572 | }, | ||
573 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
574 | [C(OP_READ)] = { 0, 0 }, | ||
575 | [C(OP_WRITE)] = { -1, -1 }, | ||
576 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
577 | }, | ||
578 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
579 | [C(OP_READ)] = { 0, 0x3c309b }, | ||
580 | [C(OP_WRITE)] = { 0, 0 }, | ||
581 | [C(OP_PREFETCH)] = { 0xc50c3, 0 }, | ||
582 | }, | ||
583 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
584 | [C(OP_READ)] = { 0x2c4090, 0x800c4 }, | ||
585 | [C(OP_WRITE)] = { -1, -1 }, | ||
586 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
587 | }, | ||
588 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
589 | [C(OP_READ)] = { 0, 0x800c0 }, | ||
590 | [C(OP_WRITE)] = { -1, -1 }, | ||
591 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
592 | }, | ||
593 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
594 | [C(OP_READ)] = { 0x230e4, 0x230e5 }, | ||
595 | [C(OP_WRITE)] = { -1, -1 }, | ||
596 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
597 | }, | ||
598 | [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
599 | [C(OP_READ)] = { -1, -1 }, | ||
600 | [C(OP_WRITE)] = { -1, -1 }, | ||
601 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
602 | }, | ||
603 | }; | ||
604 | |||
605 | static struct power_pmu power5_pmu = { | ||
606 | .name = "POWER5", | ||
607 | .n_counter = 6, | ||
608 | .max_alternatives = MAX_ALT, | ||
609 | .add_fields = 0x7000090000555ul, | ||
610 | .test_adder = 0x3000490000000ul, | ||
611 | .compute_mmcr = power5_compute_mmcr, | ||
612 | .get_constraint = power5_get_constraint, | ||
613 | .get_alternatives = power5_get_alternatives, | ||
614 | .disable_pmc = power5_disable_pmc, | ||
615 | .n_generic = ARRAY_SIZE(power5_generic_events), | ||
616 | .generic_events = power5_generic_events, | ||
617 | .cache_events = &power5_cache_events, | ||
618 | }; | ||
619 | |||
620 | static int __init init_power5_pmu(void) | ||
621 | { | ||
622 | if (!cur_cpu_spec->oprofile_cpu_type || | ||
623 | strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5")) | ||
624 | return -ENODEV; | ||
625 | |||
626 | return register_power_pmu(&power5_pmu); | ||
627 | } | ||
628 | |||
629 | early_initcall(init_power5_pmu); | ||
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c deleted file mode 100644 index 0bbc901e7efc..000000000000 --- a/arch/powerpc/kernel/power6-pmu.c +++ /dev/null | |||
@@ -1,552 +0,0 @@ | |||
1 | /* | ||
2 | * Performance counter support for POWER6 processors. | ||
3 | * | ||
4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/perf_event.h> | ||
13 | #include <linux/string.h> | ||
14 | #include <asm/reg.h> | ||
15 | #include <asm/cputable.h> | ||
16 | |||
17 | /* | ||
18 | * Bits in event code for POWER6 | ||
19 | */ | ||
20 | #define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ | ||
21 | #define PM_PMC_MSK 0x7 | ||
22 | #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) | ||
23 | #define PM_UNIT_SH 16 /* Unit event comes (TTMxSEL encoding) */ | ||
24 | #define PM_UNIT_MSK 0xf | ||
25 | #define PM_UNIT_MSKS (PM_UNIT_MSK << PM_UNIT_SH) | ||
26 | #define PM_LLAV 0x8000 /* Load lookahead match value */ | ||
27 | #define PM_LLA 0x4000 /* Load lookahead match enable */ | ||
28 | #define PM_BYTE_SH 12 /* Byte of event bus to use */ | ||
29 | #define PM_BYTE_MSK 3 | ||
30 | #define PM_SUBUNIT_SH 8 /* Subunit event comes from (NEST_SEL enc.) */ | ||
31 | #define PM_SUBUNIT_MSK 7 | ||
32 | #define PM_SUBUNIT_MSKS (PM_SUBUNIT_MSK << PM_SUBUNIT_SH) | ||
33 | #define PM_PMCSEL_MSK 0xff /* PMCxSEL value */ | ||
34 | #define PM_BUSEVENT_MSK 0xf3700 | ||
35 | |||
36 | /* | ||
37 | * Bits in MMCR1 for POWER6 | ||
38 | */ | ||
39 | #define MMCR1_TTM0SEL_SH 60 | ||
40 | #define MMCR1_TTMSEL_SH(n) (MMCR1_TTM0SEL_SH - (n) * 4) | ||
41 | #define MMCR1_TTMSEL_MSK 0xf | ||
42 | #define MMCR1_TTMSEL(m, n) (((m) >> MMCR1_TTMSEL_SH(n)) & MMCR1_TTMSEL_MSK) | ||
43 | #define MMCR1_NESTSEL_SH 45 | ||
44 | #define MMCR1_NESTSEL_MSK 0x7 | ||
45 | #define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK) | ||
46 | #define MMCR1_PMC1_LLA (1ul << 44) | ||
47 | #define MMCR1_PMC1_LLA_VALUE (1ul << 39) | ||
48 | #define MMCR1_PMC1_ADDR_SEL (1ul << 35) | ||
49 | #define MMCR1_PMC1SEL_SH 24 | ||
50 | #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) | ||
51 | #define MMCR1_PMCSEL_MSK 0xff | ||
52 | |||
53 | /* | ||
54 | * Map of which direct events on which PMCs are marked instruction events. | ||
55 | * Indexed by PMCSEL value >> 1. | ||
56 | * Bottom 4 bits are a map of which PMCs are interesting, | ||
57 | * top 4 bits say what sort of event: | ||
58 | * 0 = direct marked event, | ||
59 | * 1 = byte decode event, | ||
60 | * 4 = add/and event (PMC1 -> bits 0 & 4), | ||
61 | * 5 = add/and event (PMC1 -> bits 1 & 5), | ||
62 | * 6 = add/and event (PMC1 -> bits 2 & 6), | ||
63 | * 7 = add/and event (PMC1 -> bits 3 & 7). | ||
64 | */ | ||
65 | static unsigned char direct_event_is_marked[0x60 >> 1] = { | ||
66 | 0, /* 00 */ | ||
67 | 0, /* 02 */ | ||
68 | 0, /* 04 */ | ||
69 | 0x07, /* 06 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ | ||
70 | 0x04, /* 08 PM_MRK_DFU_FIN */ | ||
71 | 0x06, /* 0a PM_MRK_IFU_FIN, PM_MRK_INST_FIN */ | ||
72 | 0, /* 0c */ | ||
73 | 0, /* 0e */ | ||
74 | 0x02, /* 10 PM_MRK_INST_DISP */ | ||
75 | 0x08, /* 12 PM_MRK_LSU_DERAT_MISS */ | ||
76 | 0, /* 14 */ | ||
77 | 0, /* 16 */ | ||
78 | 0x0c, /* 18 PM_THRESH_TIMEO, PM_MRK_INST_FIN */ | ||
79 | 0x0f, /* 1a PM_MRK_INST_DISP, PM_MRK_{FXU,FPU,LSU}_FIN */ | ||
80 | 0x01, /* 1c PM_MRK_INST_ISSUED */ | ||
81 | 0, /* 1e */ | ||
82 | 0, /* 20 */ | ||
83 | 0, /* 22 */ | ||
84 | 0, /* 24 */ | ||
85 | 0, /* 26 */ | ||
86 | 0x15, /* 28 PM_MRK_DATA_FROM_L2MISS, PM_MRK_DATA_FROM_L3MISS */ | ||
87 | 0, /* 2a */ | ||
88 | 0, /* 2c */ | ||
89 | 0, /* 2e */ | ||
90 | 0x4f, /* 30 */ | ||
91 | 0x7f, /* 32 */ | ||
92 | 0x4f, /* 34 */ | ||
93 | 0x5f, /* 36 */ | ||
94 | 0x6f, /* 38 */ | ||
95 | 0x4f, /* 3a */ | ||
96 | 0, /* 3c */ | ||
97 | 0x08, /* 3e PM_MRK_INST_TIMEO */ | ||
98 | 0x1f, /* 40 */ | ||
99 | 0x1f, /* 42 */ | ||
100 | 0x1f, /* 44 */ | ||
101 | 0x1f, /* 46 */ | ||
102 | 0x1f, /* 48 */ | ||
103 | 0x1f, /* 4a */ | ||
104 | 0x1f, /* 4c */ | ||
105 | 0x1f, /* 4e */ | ||
106 | 0, /* 50 */ | ||
107 | 0x05, /* 52 PM_MRK_BR_TAKEN, PM_MRK_BR_MPRED */ | ||
108 | 0x1c, /* 54 PM_MRK_PTEG_FROM_L3MISS, PM_MRK_PTEG_FROM_L2MISS */ | ||
109 | 0x02, /* 56 PM_MRK_LD_MISS_L1 */ | ||
110 | 0, /* 58 */ | ||
111 | 0, /* 5a */ | ||
112 | 0, /* 5c */ | ||
113 | 0, /* 5e */ | ||
114 | }; | ||
115 | |||
116 | /* | ||
117 | * Masks showing for each unit which bits are marked events. | ||
118 | * These masks are in LE order, i.e. 0x00000001 is byte 0, bit 0. | ||
119 | */ | ||
120 | static u32 marked_bus_events[16] = { | ||
121 | 0x01000000, /* direct events set 1: byte 3 bit 0 */ | ||
122 | 0x00010000, /* direct events set 2: byte 2 bit 0 */ | ||
123 | 0, 0, 0, 0, /* IDU, IFU, nest: nothing */ | ||
124 | 0x00000088, /* VMX set 1: byte 0 bits 3, 7 */ | ||
125 | 0x000000c0, /* VMX set 2: byte 0 bits 4-7 */ | ||
126 | 0x04010000, /* LSU set 1: byte 2 bit 0, byte 3 bit 2 */ | ||
127 | 0xff010000u, /* LSU set 2: byte 2 bit 0, all of byte 3 */ | ||
128 | 0, /* LSU set 3 */ | ||
129 | 0x00000010, /* VMX set 3: byte 0 bit 4 */ | ||
130 | 0, /* BFP set 1 */ | ||
131 | 0x00000022, /* BFP set 2: byte 0 bits 1, 5 */ | ||
132 | 0, 0 | ||
133 | }; | ||
134 | |||
135 | /* | ||
136 | * Returns 1 if event counts things relating to marked instructions | ||
137 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
138 | */ | ||
139 | static int power6_marked_instr_event(u64 event) | ||
140 | { | ||
141 | int pmc, psel, ptype; | ||
142 | int bit, byte, unit; | ||
143 | u32 mask; | ||
144 | |||
145 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
146 | psel = (event & PM_PMCSEL_MSK) >> 1; /* drop edge/level bit */ | ||
147 | if (pmc >= 5) | ||
148 | return 0; | ||
149 | |||
150 | bit = -1; | ||
151 | if (psel < sizeof(direct_event_is_marked)) { | ||
152 | ptype = direct_event_is_marked[psel]; | ||
153 | if (pmc == 0 || !(ptype & (1 << (pmc - 1)))) | ||
154 | return 0; | ||
155 | ptype >>= 4; | ||
156 | if (ptype == 0) | ||
157 | return 1; | ||
158 | if (ptype == 1) | ||
159 | bit = 0; | ||
160 | else | ||
161 | bit = ptype ^ (pmc - 1); | ||
162 | } else if ((psel & 0x48) == 0x40) | ||
163 | bit = psel & 7; | ||
164 | |||
165 | if (!(event & PM_BUSEVENT_MSK) || bit == -1) | ||
166 | return 0; | ||
167 | |||
168 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
169 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
170 | mask = marked_bus_events[unit]; | ||
171 | return (mask >> (byte * 8 + bit)) & 1; | ||
172 | } | ||
173 | |||
174 | /* | ||
175 | * Assign PMC numbers and compute MMCR1 value for a set of events | ||
176 | */ | ||
177 | static int p6_compute_mmcr(u64 event[], int n_ev, | ||
178 | unsigned int hwc[], unsigned long mmcr[]) | ||
179 | { | ||
180 | unsigned long mmcr1 = 0; | ||
181 | unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; | ||
182 | int i; | ||
183 | unsigned int pmc, ev, b, u, s, psel; | ||
184 | unsigned int ttmset = 0; | ||
185 | unsigned int pmc_inuse = 0; | ||
186 | |||
187 | if (n_ev > 6) | ||
188 | return -1; | ||
189 | for (i = 0; i < n_ev; ++i) { | ||
190 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
191 | if (pmc) { | ||
192 | if (pmc_inuse & (1 << (pmc - 1))) | ||
193 | return -1; /* collision! */ | ||
194 | pmc_inuse |= 1 << (pmc - 1); | ||
195 | } | ||
196 | } | ||
197 | for (i = 0; i < n_ev; ++i) { | ||
198 | ev = event[i]; | ||
199 | pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK; | ||
200 | if (pmc) { | ||
201 | --pmc; | ||
202 | } else { | ||
203 | /* can go on any PMC; find a free one */ | ||
204 | for (pmc = 0; pmc < 4; ++pmc) | ||
205 | if (!(pmc_inuse & (1 << pmc))) | ||
206 | break; | ||
207 | if (pmc >= 4) | ||
208 | return -1; | ||
209 | pmc_inuse |= 1 << pmc; | ||
210 | } | ||
211 | hwc[i] = pmc; | ||
212 | psel = ev & PM_PMCSEL_MSK; | ||
213 | if (ev & PM_BUSEVENT_MSK) { | ||
214 | /* this event uses the event bus */ | ||
215 | b = (ev >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
216 | u = (ev >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
217 | /* check for conflict on this byte of event bus */ | ||
218 | if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u) | ||
219 | return -1; | ||
220 | mmcr1 |= (unsigned long)u << MMCR1_TTMSEL_SH(b); | ||
221 | ttmset |= 1 << b; | ||
222 | if (u == 5) { | ||
223 | /* Nest events have a further mux */ | ||
224 | s = (ev >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK; | ||
225 | if ((ttmset & 0x10) && | ||
226 | MMCR1_NESTSEL(mmcr1) != s) | ||
227 | return -1; | ||
228 | ttmset |= 0x10; | ||
229 | mmcr1 |= (unsigned long)s << MMCR1_NESTSEL_SH; | ||
230 | } | ||
231 | if (0x30 <= psel && psel <= 0x3d) { | ||
232 | /* these need the PMCx_ADDR_SEL bits */ | ||
233 | if (b >= 2) | ||
234 | mmcr1 |= MMCR1_PMC1_ADDR_SEL >> pmc; | ||
235 | } | ||
236 | /* bus select values are different for PMC3/4 */ | ||
237 | if (pmc >= 2 && (psel & 0x90) == 0x80) | ||
238 | psel ^= 0x20; | ||
239 | } | ||
240 | if (ev & PM_LLA) { | ||
241 | mmcr1 |= MMCR1_PMC1_LLA >> pmc; | ||
242 | if (ev & PM_LLAV) | ||
243 | mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc; | ||
244 | } | ||
245 | if (power6_marked_instr_event(event[i])) | ||
246 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
247 | if (pmc < 4) | ||
248 | mmcr1 |= (unsigned long)psel << MMCR1_PMCSEL_SH(pmc); | ||
249 | } | ||
250 | mmcr[0] = 0; | ||
251 | if (pmc_inuse & 1) | ||
252 | mmcr[0] = MMCR0_PMC1CE; | ||
253 | if (pmc_inuse & 0xe) | ||
254 | mmcr[0] |= MMCR0_PMCjCE; | ||
255 | mmcr[1] = mmcr1; | ||
256 | mmcr[2] = mmcra; | ||
257 | return 0; | ||
258 | } | ||
259 | |||
260 | /* | ||
261 | * Layout of constraint bits: | ||
262 | * | ||
263 | * 0-1 add field: number of uses of PMC1 (max 1) | ||
264 | * 2-3, 4-5, 6-7, 8-9, 10-11: ditto for PMC2, 3, 4, 5, 6 | ||
265 | * 12-15 add field: number of uses of PMC1-4 (max 4) | ||
266 | * 16-19 select field: unit on byte 0 of event bus | ||
267 | * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3 | ||
268 | * 32-34 select field: nest (subunit) event selector | ||
269 | */ | ||
270 | static int p6_get_constraint(u64 event, unsigned long *maskp, | ||
271 | unsigned long *valp) | ||
272 | { | ||
273 | int pmc, byte, sh, subunit; | ||
274 | unsigned long mask = 0, value = 0; | ||
275 | |||
276 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
277 | if (pmc) { | ||
278 | if (pmc > 4 && !(event == 0x500009 || event == 0x600005)) | ||
279 | return -1; | ||
280 | sh = (pmc - 1) * 2; | ||
281 | mask |= 2 << sh; | ||
282 | value |= 1 << sh; | ||
283 | } | ||
284 | if (event & PM_BUSEVENT_MSK) { | ||
285 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
286 | sh = byte * 4 + (16 - PM_UNIT_SH); | ||
287 | mask |= PM_UNIT_MSKS << sh; | ||
288 | value |= (unsigned long)(event & PM_UNIT_MSKS) << sh; | ||
289 | if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) { | ||
290 | subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK; | ||
291 | mask |= (unsigned long)PM_SUBUNIT_MSK << 32; | ||
292 | value |= (unsigned long)subunit << 32; | ||
293 | } | ||
294 | } | ||
295 | if (pmc <= 4) { | ||
296 | mask |= 0x8000; /* add field for count of PMC1-4 uses */ | ||
297 | value |= 0x1000; | ||
298 | } | ||
299 | *maskp = mask; | ||
300 | *valp = value; | ||
301 | return 0; | ||
302 | } | ||
303 | |||
304 | static int p6_limited_pmc_event(u64 event) | ||
305 | { | ||
306 | int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
307 | |||
308 | return pmc == 5 || pmc == 6; | ||
309 | } | ||
310 | |||
311 | #define MAX_ALT 4 /* at most 4 alternatives for any event */ | ||
312 | |||
313 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
314 | { 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */ | ||
315 | { 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */ | ||
316 | { 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */ | ||
317 | { 0x10000a, 0x2000f4, 0x600005 }, /* PM_RUN_CYC */ | ||
318 | { 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */ | ||
319 | { 0x10000e, 0x400010 }, /* PM_PURR */ | ||
320 | { 0x100010, 0x4000f8 }, /* PM_FLUSH */ | ||
321 | { 0x10001a, 0x200010 }, /* PM_MRK_INST_DISP */ | ||
322 | { 0x100026, 0x3000f8 }, /* PM_TB_BIT_TRANS */ | ||
323 | { 0x100054, 0x2000f0 }, /* PM_ST_FIN */ | ||
324 | { 0x100056, 0x2000fc }, /* PM_L1_ICACHE_MISS */ | ||
325 | { 0x1000f0, 0x40000a }, /* PM_INST_IMC_MATCH_CMPL */ | ||
326 | { 0x1000f8, 0x200008 }, /* PM_GCT_EMPTY_CYC */ | ||
327 | { 0x1000fc, 0x400006 }, /* PM_LSU_DERAT_MISS_CYC */ | ||
328 | { 0x20000e, 0x400007 }, /* PM_LSU_DERAT_MISS */ | ||
329 | { 0x200012, 0x300012 }, /* PM_INST_DISP */ | ||
330 | { 0x2000f2, 0x3000f2 }, /* PM_INST_DISP */ | ||
331 | { 0x2000f8, 0x300010 }, /* PM_EXT_INT */ | ||
332 | { 0x2000fe, 0x300056 }, /* PM_DATA_FROM_L2MISS */ | ||
333 | { 0x2d0030, 0x30001a }, /* PM_MRK_FPU_FIN */ | ||
334 | { 0x30000a, 0x400018 }, /* PM_MRK_INST_FIN */ | ||
335 | { 0x3000f6, 0x40000e }, /* PM_L1_DCACHE_RELOAD_VALID */ | ||
336 | { 0x3000fe, 0x400056 }, /* PM_DATA_FROM_L3MISS */ | ||
337 | }; | ||
338 | |||
339 | /* | ||
340 | * This could be made more efficient with a binary search on | ||
341 | * a presorted list, if necessary | ||
342 | */ | ||
343 | static int find_alternatives_list(u64 event) | ||
344 | { | ||
345 | int i, j; | ||
346 | unsigned int alt; | ||
347 | |||
348 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
349 | if (event < event_alternatives[i][0]) | ||
350 | return -1; | ||
351 | for (j = 0; j < MAX_ALT; ++j) { | ||
352 | alt = event_alternatives[i][j]; | ||
353 | if (!alt || event < alt) | ||
354 | break; | ||
355 | if (event == alt) | ||
356 | return i; | ||
357 | } | ||
358 | } | ||
359 | return -1; | ||
360 | } | ||
361 | |||
362 | static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
363 | { | ||
364 | int i, j, nlim; | ||
365 | unsigned int psel, pmc; | ||
366 | unsigned int nalt = 1; | ||
367 | u64 aevent; | ||
368 | |||
369 | alt[0] = event; | ||
370 | nlim = p6_limited_pmc_event(event); | ||
371 | |||
372 | /* check the alternatives table */ | ||
373 | i = find_alternatives_list(event); | ||
374 | if (i >= 0) { | ||
375 | /* copy out alternatives from list */ | ||
376 | for (j = 0; j < MAX_ALT; ++j) { | ||
377 | aevent = event_alternatives[i][j]; | ||
378 | if (!aevent) | ||
379 | break; | ||
380 | if (aevent != event) | ||
381 | alt[nalt++] = aevent; | ||
382 | nlim += p6_limited_pmc_event(aevent); | ||
383 | } | ||
384 | |||
385 | } else { | ||
386 | /* Check for alternative ways of computing sum events */ | ||
387 | /* PMCSEL 0x32 counter N == PMCSEL 0x34 counter 5-N */ | ||
388 | psel = event & (PM_PMCSEL_MSK & ~1); /* ignore edge bit */ | ||
389 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
390 | if (pmc && (psel == 0x32 || psel == 0x34)) | ||
391 | alt[nalt++] = ((event ^ 0x6) & ~PM_PMC_MSKS) | | ||
392 | ((5 - pmc) << PM_PMC_SH); | ||
393 | |||
394 | /* PMCSEL 0x38 counter N == PMCSEL 0x3a counter N+/-2 */ | ||
395 | if (pmc && (psel == 0x38 || psel == 0x3a)) | ||
396 | alt[nalt++] = ((event ^ 0x2) & ~PM_PMC_MSKS) | | ||
397 | ((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH); | ||
398 | } | ||
399 | |||
400 | if (flags & PPMU_ONLY_COUNT_RUN) { | ||
401 | /* | ||
402 | * We're only counting in RUN state, | ||
403 | * so PM_CYC is equivalent to PM_RUN_CYC, | ||
404 | * PM_INST_CMPL === PM_RUN_INST_CMPL, PM_PURR === PM_RUN_PURR. | ||
405 | * This doesn't include alternatives that don't provide | ||
406 | * any extra flexibility in assigning PMCs (e.g. | ||
407 | * 0x10000a for PM_RUN_CYC vs. 0x1e for PM_CYC). | ||
408 | * Note that even with these additional alternatives | ||
409 | * we never end up with more than 4 alternatives for any event. | ||
410 | */ | ||
411 | j = nalt; | ||
412 | for (i = 0; i < nalt; ++i) { | ||
413 | switch (alt[i]) { | ||
414 | case 0x1e: /* PM_CYC */ | ||
415 | alt[j++] = 0x600005; /* PM_RUN_CYC */ | ||
416 | ++nlim; | ||
417 | break; | ||
418 | case 0x10000a: /* PM_RUN_CYC */ | ||
419 | alt[j++] = 0x1e; /* PM_CYC */ | ||
420 | break; | ||
421 | case 2: /* PM_INST_CMPL */ | ||
422 | alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */ | ||
423 | ++nlim; | ||
424 | break; | ||
425 | case 0x500009: /* PM_RUN_INST_CMPL */ | ||
426 | alt[j++] = 2; /* PM_INST_CMPL */ | ||
427 | break; | ||
428 | case 0x10000e: /* PM_PURR */ | ||
429 | alt[j++] = 0x4000f4; /* PM_RUN_PURR */ | ||
430 | break; | ||
431 | case 0x4000f4: /* PM_RUN_PURR */ | ||
432 | alt[j++] = 0x10000e; /* PM_PURR */ | ||
433 | break; | ||
434 | } | ||
435 | } | ||
436 | nalt = j; | ||
437 | } | ||
438 | |||
439 | if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) { | ||
440 | /* remove the limited PMC events */ | ||
441 | j = 0; | ||
442 | for (i = 0; i < nalt; ++i) { | ||
443 | if (!p6_limited_pmc_event(alt[i])) { | ||
444 | alt[j] = alt[i]; | ||
445 | ++j; | ||
446 | } | ||
447 | } | ||
448 | nalt = j; | ||
449 | } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) { | ||
450 | /* remove all but the limited PMC events */ | ||
451 | j = 0; | ||
452 | for (i = 0; i < nalt; ++i) { | ||
453 | if (p6_limited_pmc_event(alt[i])) { | ||
454 | alt[j] = alt[i]; | ||
455 | ++j; | ||
456 | } | ||
457 | } | ||
458 | nalt = j; | ||
459 | } | ||
460 | |||
461 | return nalt; | ||
462 | } | ||
463 | |||
464 | static void p6_disable_pmc(unsigned int pmc, unsigned long mmcr[]) | ||
465 | { | ||
466 | /* Set PMCxSEL to 0 to disable PMCx */ | ||
467 | if (pmc <= 3) | ||
468 | mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); | ||
469 | } | ||
470 | |||
471 | static int power6_generic_events[] = { | ||
472 | [PERF_COUNT_HW_CPU_CYCLES] = 0x1e, | ||
473 | [PERF_COUNT_HW_INSTRUCTIONS] = 2, | ||
474 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x280030, /* LD_REF_L1 */ | ||
475 | [PERF_COUNT_HW_CACHE_MISSES] = 0x30000c, /* LD_MISS_L1 */ | ||
476 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x410a0, /* BR_PRED */ | ||
477 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x400052, /* BR_MPRED */ | ||
478 | }; | ||
479 | |||
480 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
481 | |||
482 | /* | ||
483 | * Table of generalized cache-related events. | ||
484 | * 0 means not supported, -1 means nonsensical, other values | ||
485 | * are event codes. | ||
486 | * The "DTLB" and "ITLB" events relate to the DERAT and IERAT. | ||
487 | */ | ||
488 | static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
489 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
490 | [C(OP_READ)] = { 0x280030, 0x80080 }, | ||
491 | [C(OP_WRITE)] = { 0x180032, 0x80088 }, | ||
492 | [C(OP_PREFETCH)] = { 0x810a4, 0 }, | ||
493 | }, | ||
494 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
495 | [C(OP_READ)] = { 0, 0x100056 }, | ||
496 | [C(OP_WRITE)] = { -1, -1 }, | ||
497 | [C(OP_PREFETCH)] = { 0x4008c, 0 }, | ||
498 | }, | ||
499 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
500 | [C(OP_READ)] = { 0x150730, 0x250532 }, | ||
501 | [C(OP_WRITE)] = { 0x250432, 0x150432 }, | ||
502 | [C(OP_PREFETCH)] = { 0x810a6, 0 }, | ||
503 | }, | ||
504 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
505 | [C(OP_READ)] = { 0, 0x20000e }, | ||
506 | [C(OP_WRITE)] = { -1, -1 }, | ||
507 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
508 | }, | ||
509 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
510 | [C(OP_READ)] = { 0, 0x420ce }, | ||
511 | [C(OP_WRITE)] = { -1, -1 }, | ||
512 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
513 | }, | ||
514 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
515 | [C(OP_READ)] = { 0x430e6, 0x400052 }, | ||
516 | [C(OP_WRITE)] = { -1, -1 }, | ||
517 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
518 | }, | ||
519 | [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
520 | [C(OP_READ)] = { -1, -1 }, | ||
521 | [C(OP_WRITE)] = { -1, -1 }, | ||
522 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
523 | }, | ||
524 | }; | ||
525 | |||
526 | static struct power_pmu power6_pmu = { | ||
527 | .name = "POWER6", | ||
528 | .n_counter = 6, | ||
529 | .max_alternatives = MAX_ALT, | ||
530 | .add_fields = 0x1555, | ||
531 | .test_adder = 0x3000, | ||
532 | .compute_mmcr = p6_compute_mmcr, | ||
533 | .get_constraint = p6_get_constraint, | ||
534 | .get_alternatives = p6_get_alternatives, | ||
535 | .disable_pmc = p6_disable_pmc, | ||
536 | .limited_pmc_event = p6_limited_pmc_event, | ||
537 | .flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR, | ||
538 | .n_generic = ARRAY_SIZE(power6_generic_events), | ||
539 | .generic_events = power6_generic_events, | ||
540 | .cache_events = &power6_cache_events, | ||
541 | }; | ||
542 | |||
543 | static int __init init_power6_pmu(void) | ||
544 | { | ||
545 | if (!cur_cpu_spec->oprofile_cpu_type || | ||
546 | strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6")) | ||
547 | return -ENODEV; | ||
548 | |||
549 | return register_power_pmu(&power6_pmu); | ||
550 | } | ||
551 | |||
552 | early_initcall(init_power6_pmu); | ||
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c deleted file mode 100644 index 1251e4d7e262..000000000000 --- a/arch/powerpc/kernel/power7-pmu.c +++ /dev/null | |||
@@ -1,379 +0,0 @@ | |||
1 | /* | ||
2 | * Performance counter support for POWER7 processors. | ||
3 | * | ||
4 | * Copyright 2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/perf_event.h> | ||
13 | #include <linux/string.h> | ||
14 | #include <asm/reg.h> | ||
15 | #include <asm/cputable.h> | ||
16 | |||
17 | /* | ||
18 | * Bits in event code for POWER7 | ||
19 | */ | ||
20 | #define PM_PMC_SH 16 /* PMC number (1-based) for direct events */ | ||
21 | #define PM_PMC_MSK 0xf | ||
22 | #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) | ||
23 | #define PM_UNIT_SH 12 /* TTMMUX number and setting - unit select */ | ||
24 | #define PM_UNIT_MSK 0xf | ||
25 | #define PM_COMBINE_SH 11 /* Combined event bit */ | ||
26 | #define PM_COMBINE_MSK 1 | ||
27 | #define PM_COMBINE_MSKS 0x800 | ||
28 | #define PM_L2SEL_SH 8 /* L2 event select */ | ||
29 | #define PM_L2SEL_MSK 7 | ||
30 | #define PM_PMCSEL_MSK 0xff | ||
31 | |||
32 | /* | ||
33 | * Bits in MMCR1 for POWER7 | ||
34 | */ | ||
35 | #define MMCR1_TTM0SEL_SH 60 | ||
36 | #define MMCR1_TTM1SEL_SH 56 | ||
37 | #define MMCR1_TTM2SEL_SH 52 | ||
38 | #define MMCR1_TTM3SEL_SH 48 | ||
39 | #define MMCR1_TTMSEL_MSK 0xf | ||
40 | #define MMCR1_L2SEL_SH 45 | ||
41 | #define MMCR1_L2SEL_MSK 7 | ||
42 | #define MMCR1_PMC1_COMBINE_SH 35 | ||
43 | #define MMCR1_PMC2_COMBINE_SH 34 | ||
44 | #define MMCR1_PMC3_COMBINE_SH 33 | ||
45 | #define MMCR1_PMC4_COMBINE_SH 32 | ||
46 | #define MMCR1_PMC1SEL_SH 24 | ||
47 | #define MMCR1_PMC2SEL_SH 16 | ||
48 | #define MMCR1_PMC3SEL_SH 8 | ||
49 | #define MMCR1_PMC4SEL_SH 0 | ||
50 | #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) | ||
51 | #define MMCR1_PMCSEL_MSK 0xff | ||
52 | |||
53 | /* | ||
54 | * Layout of constraint bits: | ||
55 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
56 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
57 | * [ ><><><><><><> | ||
58 | * NC P6P5P4P3P2P1 | ||
59 | * | ||
60 | * NC - number of counters | ||
61 | * 15: NC error 0x8000 | ||
62 | * 12-14: number of events needing PMC1-4 0x7000 | ||
63 | * | ||
64 | * P6 | ||
65 | * 11: P6 error 0x800 | ||
66 | * 10-11: Count of events needing PMC6 | ||
67 | * | ||
68 | * P1..P5 | ||
69 | * 0-9: Count of events needing PMC1..PMC5 | ||
70 | */ | ||
71 | |||
72 | static int power7_get_constraint(u64 event, unsigned long *maskp, | ||
73 | unsigned long *valp) | ||
74 | { | ||
75 | int pmc, sh; | ||
76 | unsigned long mask = 0, value = 0; | ||
77 | |||
78 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
79 | if (pmc) { | ||
80 | if (pmc > 6) | ||
81 | return -1; | ||
82 | sh = (pmc - 1) * 2; | ||
83 | mask |= 2 << sh; | ||
84 | value |= 1 << sh; | ||
85 | if (pmc >= 5 && !(event == 0x500fa || event == 0x600f4)) | ||
86 | return -1; | ||
87 | } | ||
88 | if (pmc < 5) { | ||
89 | /* need a counter from PMC1-4 set */ | ||
90 | mask |= 0x8000; | ||
91 | value |= 0x1000; | ||
92 | } | ||
93 | *maskp = mask; | ||
94 | *valp = value; | ||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | #define MAX_ALT 2 /* at most 2 alternatives for any event */ | ||
99 | |||
100 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
101 | { 0x200f2, 0x300f2 }, /* PM_INST_DISP */ | ||
102 | { 0x200f4, 0x600f4 }, /* PM_RUN_CYC */ | ||
103 | { 0x400fa, 0x500fa }, /* PM_RUN_INST_CMPL */ | ||
104 | }; | ||
105 | |||
106 | /* | ||
107 | * Scan the alternatives table for a match and return the | ||
108 | * index into the alternatives table if found, else -1. | ||
109 | */ | ||
110 | static int find_alternative(u64 event) | ||
111 | { | ||
112 | int i, j; | ||
113 | |||
114 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
115 | if (event < event_alternatives[i][0]) | ||
116 | break; | ||
117 | for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) | ||
118 | if (event == event_alternatives[i][j]) | ||
119 | return i; | ||
120 | } | ||
121 | return -1; | ||
122 | } | ||
123 | |||
124 | static s64 find_alternative_decode(u64 event) | ||
125 | { | ||
126 | int pmc, psel; | ||
127 | |||
128 | /* this only handles the 4x decode events */ | ||
129 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
130 | psel = event & PM_PMCSEL_MSK; | ||
131 | if ((pmc == 2 || pmc == 4) && (psel & ~7) == 0x40) | ||
132 | return event - (1 << PM_PMC_SH) + 8; | ||
133 | if ((pmc == 1 || pmc == 3) && (psel & ~7) == 0x48) | ||
134 | return event + (1 << PM_PMC_SH) - 8; | ||
135 | return -1; | ||
136 | } | ||
137 | |||
138 | static int power7_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
139 | { | ||
140 | int i, j, nalt = 1; | ||
141 | s64 ae; | ||
142 | |||
143 | alt[0] = event; | ||
144 | nalt = 1; | ||
145 | i = find_alternative(event); | ||
146 | if (i >= 0) { | ||
147 | for (j = 0; j < MAX_ALT; ++j) { | ||
148 | ae = event_alternatives[i][j]; | ||
149 | if (ae && ae != event) | ||
150 | alt[nalt++] = ae; | ||
151 | } | ||
152 | } else { | ||
153 | ae = find_alternative_decode(event); | ||
154 | if (ae > 0) | ||
155 | alt[nalt++] = ae; | ||
156 | } | ||
157 | |||
158 | if (flags & PPMU_ONLY_COUNT_RUN) { | ||
159 | /* | ||
160 | * We're only counting in RUN state, | ||
161 | * so PM_CYC is equivalent to PM_RUN_CYC | ||
162 | * and PM_INST_CMPL === PM_RUN_INST_CMPL. | ||
163 | * This doesn't include alternatives that don't provide | ||
164 | * any extra flexibility in assigning PMCs. | ||
165 | */ | ||
166 | j = nalt; | ||
167 | for (i = 0; i < nalt; ++i) { | ||
168 | switch (alt[i]) { | ||
169 | case 0x1e: /* PM_CYC */ | ||
170 | alt[j++] = 0x600f4; /* PM_RUN_CYC */ | ||
171 | break; | ||
172 | case 0x600f4: /* PM_RUN_CYC */ | ||
173 | alt[j++] = 0x1e; | ||
174 | break; | ||
175 | case 0x2: /* PM_PPC_CMPL */ | ||
176 | alt[j++] = 0x500fa; /* PM_RUN_INST_CMPL */ | ||
177 | break; | ||
178 | case 0x500fa: /* PM_RUN_INST_CMPL */ | ||
179 | alt[j++] = 0x2; /* PM_PPC_CMPL */ | ||
180 | break; | ||
181 | } | ||
182 | } | ||
183 | nalt = j; | ||
184 | } | ||
185 | |||
186 | return nalt; | ||
187 | } | ||
188 | |||
189 | /* | ||
190 | * Returns 1 if event counts things relating to marked instructions | ||
191 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
192 | */ | ||
193 | static int power7_marked_instr_event(u64 event) | ||
194 | { | ||
195 | int pmc, psel; | ||
196 | int unit; | ||
197 | |||
198 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
199 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
200 | psel = event & PM_PMCSEL_MSK & ~1; /* trim off edge/level bit */ | ||
201 | if (pmc >= 5) | ||
202 | return 0; | ||
203 | |||
204 | switch (psel >> 4) { | ||
205 | case 2: | ||
206 | return pmc == 2 || pmc == 4; | ||
207 | case 3: | ||
208 | if (psel == 0x3c) | ||
209 | return pmc == 1; | ||
210 | if (psel == 0x3e) | ||
211 | return pmc != 2; | ||
212 | return 1; | ||
213 | case 4: | ||
214 | case 5: | ||
215 | return unit == 0xd; | ||
216 | case 6: | ||
217 | if (psel == 0x64) | ||
218 | return pmc >= 3; | ||
219 | case 8: | ||
220 | return unit == 0xd; | ||
221 | } | ||
222 | return 0; | ||
223 | } | ||
224 | |||
225 | static int power7_compute_mmcr(u64 event[], int n_ev, | ||
226 | unsigned int hwc[], unsigned long mmcr[]) | ||
227 | { | ||
228 | unsigned long mmcr1 = 0; | ||
229 | unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; | ||
230 | unsigned int pmc, unit, combine, l2sel, psel; | ||
231 | unsigned int pmc_inuse = 0; | ||
232 | int i; | ||
233 | |||
234 | /* First pass to count resource use */ | ||
235 | for (i = 0; i < n_ev; ++i) { | ||
236 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
237 | if (pmc) { | ||
238 | if (pmc > 6) | ||
239 | return -1; | ||
240 | if (pmc_inuse & (1 << (pmc - 1))) | ||
241 | return -1; | ||
242 | pmc_inuse |= 1 << (pmc - 1); | ||
243 | } | ||
244 | } | ||
245 | |||
246 | /* Second pass: assign PMCs, set all MMCR1 fields */ | ||
247 | for (i = 0; i < n_ev; ++i) { | ||
248 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
249 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
250 | combine = (event[i] >> PM_COMBINE_SH) & PM_COMBINE_MSK; | ||
251 | l2sel = (event[i] >> PM_L2SEL_SH) & PM_L2SEL_MSK; | ||
252 | psel = event[i] & PM_PMCSEL_MSK; | ||
253 | if (!pmc) { | ||
254 | /* Bus event or any-PMC direct event */ | ||
255 | for (pmc = 0; pmc < 4; ++pmc) { | ||
256 | if (!(pmc_inuse & (1 << pmc))) | ||
257 | break; | ||
258 | } | ||
259 | if (pmc >= 4) | ||
260 | return -1; | ||
261 | pmc_inuse |= 1 << pmc; | ||
262 | } else { | ||
263 | /* Direct or decoded event */ | ||
264 | --pmc; | ||
265 | } | ||
266 | if (pmc <= 3) { | ||
267 | mmcr1 |= (unsigned long) unit | ||
268 | << (MMCR1_TTM0SEL_SH - 4 * pmc); | ||
269 | mmcr1 |= (unsigned long) combine | ||
270 | << (MMCR1_PMC1_COMBINE_SH - pmc); | ||
271 | mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); | ||
272 | if (unit == 6) /* L2 events */ | ||
273 | mmcr1 |= (unsigned long) l2sel | ||
274 | << MMCR1_L2SEL_SH; | ||
275 | } | ||
276 | if (power7_marked_instr_event(event[i])) | ||
277 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
278 | hwc[i] = pmc; | ||
279 | } | ||
280 | |||
281 | /* Return MMCRx values */ | ||
282 | mmcr[0] = 0; | ||
283 | if (pmc_inuse & 1) | ||
284 | mmcr[0] = MMCR0_PMC1CE; | ||
285 | if (pmc_inuse & 0x3e) | ||
286 | mmcr[0] |= MMCR0_PMCjCE; | ||
287 | mmcr[1] = mmcr1; | ||
288 | mmcr[2] = mmcra; | ||
289 | return 0; | ||
290 | } | ||
291 | |||
292 | static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[]) | ||
293 | { | ||
294 | if (pmc <= 3) | ||
295 | mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); | ||
296 | } | ||
297 | |||
298 | static int power7_generic_events[] = { | ||
299 | [PERF_COUNT_HW_CPU_CYCLES] = 0x1e, | ||
300 | [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x100f8, /* GCT_NOSLOT_CYC */ | ||
301 | [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x4000a, /* CMPLU_STALL */ | ||
302 | [PERF_COUNT_HW_INSTRUCTIONS] = 2, | ||
303 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0xc880, /* LD_REF_L1_LSU*/ | ||
304 | [PERF_COUNT_HW_CACHE_MISSES] = 0x400f0, /* LD_MISS_L1 */ | ||
305 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x10068, /* BRU_FIN */ | ||
306 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x400f6, /* BR_MPRED */ | ||
307 | }; | ||
308 | |||
309 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
310 | |||
311 | /* | ||
312 | * Table of generalized cache-related events. | ||
313 | * 0 means not supported, -1 means nonsensical, other values | ||
314 | * are event codes. | ||
315 | */ | ||
316 | static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
317 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
318 | [C(OP_READ)] = { 0xc880, 0x400f0 }, | ||
319 | [C(OP_WRITE)] = { 0, 0x300f0 }, | ||
320 | [C(OP_PREFETCH)] = { 0xd8b8, 0 }, | ||
321 | }, | ||
322 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
323 | [C(OP_READ)] = { 0, 0x200fc }, | ||
324 | [C(OP_WRITE)] = { -1, -1 }, | ||
325 | [C(OP_PREFETCH)] = { 0x408a, 0 }, | ||
326 | }, | ||
327 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
328 | [C(OP_READ)] = { 0x16080, 0x26080 }, | ||
329 | [C(OP_WRITE)] = { 0x16082, 0x26082 }, | ||
330 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
331 | }, | ||
332 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
333 | [C(OP_READ)] = { 0, 0x300fc }, | ||
334 | [C(OP_WRITE)] = { -1, -1 }, | ||
335 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
336 | }, | ||
337 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
338 | [C(OP_READ)] = { 0, 0x400fc }, | ||
339 | [C(OP_WRITE)] = { -1, -1 }, | ||
340 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
341 | }, | ||
342 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
343 | [C(OP_READ)] = { 0x10068, 0x400f6 }, | ||
344 | [C(OP_WRITE)] = { -1, -1 }, | ||
345 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
346 | }, | ||
347 | [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
348 | [C(OP_READ)] = { -1, -1 }, | ||
349 | [C(OP_WRITE)] = { -1, -1 }, | ||
350 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
351 | }, | ||
352 | }; | ||
353 | |||
354 | static struct power_pmu power7_pmu = { | ||
355 | .name = "POWER7", | ||
356 | .n_counter = 6, | ||
357 | .max_alternatives = MAX_ALT + 1, | ||
358 | .add_fields = 0x1555ul, | ||
359 | .test_adder = 0x3000ul, | ||
360 | .compute_mmcr = power7_compute_mmcr, | ||
361 | .get_constraint = power7_get_constraint, | ||
362 | .get_alternatives = power7_get_alternatives, | ||
363 | .disable_pmc = power7_disable_pmc, | ||
364 | .flags = PPMU_ALT_SIPR, | ||
365 | .n_generic = ARRAY_SIZE(power7_generic_events), | ||
366 | .generic_events = power7_generic_events, | ||
367 | .cache_events = &power7_cache_events, | ||
368 | }; | ||
369 | |||
370 | static int __init init_power7_pmu(void) | ||
371 | { | ||
372 | if (!cur_cpu_spec->oprofile_cpu_type || | ||
373 | strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7")) | ||
374 | return -ENODEV; | ||
375 | |||
376 | return register_power_pmu(&power7_pmu); | ||
377 | } | ||
378 | |||
379 | early_initcall(init_power7_pmu); | ||
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c deleted file mode 100644 index 8c2190206964..000000000000 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ /dev/null | |||
@@ -1,502 +0,0 @@ | |||
1 | /* | ||
2 | * Performance counter support for PPC970-family processors. | ||
3 | * | ||
4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/string.h> | ||
12 | #include <linux/perf_event.h> | ||
13 | #include <asm/reg.h> | ||
14 | #include <asm/cputable.h> | ||
15 | |||
16 | /* | ||
17 | * Bits in event code for PPC970 | ||
18 | */ | ||
19 | #define PM_PMC_SH 12 /* PMC number (1-based) for direct events */ | ||
20 | #define PM_PMC_MSK 0xf | ||
21 | #define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */ | ||
22 | #define PM_UNIT_MSK 0xf | ||
23 | #define PM_SPCSEL_SH 6 | ||
24 | #define PM_SPCSEL_MSK 3 | ||
25 | #define PM_BYTE_SH 4 /* Byte number of event bus to use */ | ||
26 | #define PM_BYTE_MSK 3 | ||
27 | #define PM_PMCSEL_MSK 0xf | ||
28 | |||
29 | /* Values in PM_UNIT field */ | ||
30 | #define PM_NONE 0 | ||
31 | #define PM_FPU 1 | ||
32 | #define PM_VPU 2 | ||
33 | #define PM_ISU 3 | ||
34 | #define PM_IFU 4 | ||
35 | #define PM_IDU 5 | ||
36 | #define PM_STS 6 | ||
37 | #define PM_LSU0 7 | ||
38 | #define PM_LSU1U 8 | ||
39 | #define PM_LSU1L 9 | ||
40 | #define PM_LASTUNIT 9 | ||
41 | |||
42 | /* | ||
43 | * Bits in MMCR0 for PPC970 | ||
44 | */ | ||
45 | #define MMCR0_PMC1SEL_SH 8 | ||
46 | #define MMCR0_PMC2SEL_SH 1 | ||
47 | #define MMCR_PMCSEL_MSK 0x1f | ||
48 | |||
49 | /* | ||
50 | * Bits in MMCR1 for PPC970 | ||
51 | */ | ||
52 | #define MMCR1_TTM0SEL_SH 62 | ||
53 | #define MMCR1_TTM1SEL_SH 59 | ||
54 | #define MMCR1_TTM3SEL_SH 53 | ||
55 | #define MMCR1_TTMSEL_MSK 3 | ||
56 | #define MMCR1_TD_CP_DBG0SEL_SH 50 | ||
57 | #define MMCR1_TD_CP_DBG1SEL_SH 48 | ||
58 | #define MMCR1_TD_CP_DBG2SEL_SH 46 | ||
59 | #define MMCR1_TD_CP_DBG3SEL_SH 44 | ||
60 | #define MMCR1_PMC1_ADDER_SEL_SH 39 | ||
61 | #define MMCR1_PMC2_ADDER_SEL_SH 38 | ||
62 | #define MMCR1_PMC6_ADDER_SEL_SH 37 | ||
63 | #define MMCR1_PMC5_ADDER_SEL_SH 36 | ||
64 | #define MMCR1_PMC8_ADDER_SEL_SH 35 | ||
65 | #define MMCR1_PMC7_ADDER_SEL_SH 34 | ||
66 | #define MMCR1_PMC3_ADDER_SEL_SH 33 | ||
67 | #define MMCR1_PMC4_ADDER_SEL_SH 32 | ||
68 | #define MMCR1_PMC3SEL_SH 27 | ||
69 | #define MMCR1_PMC4SEL_SH 22 | ||
70 | #define MMCR1_PMC5SEL_SH 17 | ||
71 | #define MMCR1_PMC6SEL_SH 12 | ||
72 | #define MMCR1_PMC7SEL_SH 7 | ||
73 | #define MMCR1_PMC8SEL_SH 2 | ||
74 | |||
75 | static short mmcr1_adder_bits[8] = { | ||
76 | MMCR1_PMC1_ADDER_SEL_SH, | ||
77 | MMCR1_PMC2_ADDER_SEL_SH, | ||
78 | MMCR1_PMC3_ADDER_SEL_SH, | ||
79 | MMCR1_PMC4_ADDER_SEL_SH, | ||
80 | MMCR1_PMC5_ADDER_SEL_SH, | ||
81 | MMCR1_PMC6_ADDER_SEL_SH, | ||
82 | MMCR1_PMC7_ADDER_SEL_SH, | ||
83 | MMCR1_PMC8_ADDER_SEL_SH | ||
84 | }; | ||
85 | |||
86 | /* | ||
87 | * Layout of constraint bits: | ||
88 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
89 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
90 | * <><><>[ >[ >[ >< >< >< >< ><><><><><><><><> | ||
91 | * SPT0T1 UC PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8 | ||
92 | * | ||
93 | * SP - SPCSEL constraint | ||
94 | * 48-49: SPCSEL value 0x3_0000_0000_0000 | ||
95 | * | ||
96 | * T0 - TTM0 constraint | ||
97 | * 46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000 | ||
98 | * | ||
99 | * T1 - TTM1 constraint | ||
100 | * 44-45: TTM1SEL value (0=IDU, 3=STS) 0x3000_0000_0000 | ||
101 | * | ||
102 | * UC - unit constraint: can't have all three of FPU|IFU|VPU, ISU, IDU|STS | ||
103 | * 43: UC3 error 0x0800_0000_0000 | ||
104 | * 42: FPU|IFU|VPU events needed 0x0400_0000_0000 | ||
105 | * 41: ISU events needed 0x0200_0000_0000 | ||
106 | * 40: IDU|STS events needed 0x0100_0000_0000 | ||
107 | * | ||
108 | * PS1 | ||
109 | * 39: PS1 error 0x0080_0000_0000 | ||
110 | * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000 | ||
111 | * | ||
112 | * PS2 | ||
113 | * 35: PS2 error 0x0008_0000_0000 | ||
114 | * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000 | ||
115 | * | ||
116 | * B0 | ||
117 | * 28-31: Byte 0 event source 0xf000_0000 | ||
118 | * Encoding as for the event code | ||
119 | * | ||
120 | * B1, B2, B3 | ||
121 | * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources | ||
122 | * | ||
123 | * P1 | ||
124 | * 15: P1 error 0x8000 | ||
125 | * 14-15: Count of events needing PMC1 | ||
126 | * | ||
127 | * P2..P8 | ||
128 | * 0-13: Count of events needing PMC2..PMC8 | ||
129 | */ | ||
130 | |||
131 | static unsigned char direct_marked_event[8] = { | ||
132 | (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */ | ||
133 | (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */ | ||
134 | (1<<3) | (1<<5), /* PMC3: PM_MRK_ST_CMPL_INT, PM_MRK_VMX_FIN */ | ||
135 | (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */ | ||
136 | (1<<4) | (1<<5), /* PMC5: PM_GRP_MRK, PM_MRK_GRP_TIMEO */ | ||
137 | (1<<3) | (1<<4) | (1<<5), | ||
138 | /* PMC6: PM_MRK_ST_STS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */ | ||
139 | (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */ | ||
140 | (1<<4) /* PMC8: PM_MRK_LSU_FIN */ | ||
141 | }; | ||
142 | |||
143 | /* | ||
144 | * Returns 1 if event counts things relating to marked instructions | ||
145 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
146 | */ | ||
147 | static int p970_marked_instr_event(u64 event) | ||
148 | { | ||
149 | int pmc, psel, unit, byte, bit; | ||
150 | unsigned int mask; | ||
151 | |||
152 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
153 | psel = event & PM_PMCSEL_MSK; | ||
154 | if (pmc) { | ||
155 | if (direct_marked_event[pmc - 1] & (1 << psel)) | ||
156 | return 1; | ||
157 | if (psel == 0) /* add events */ | ||
158 | bit = (pmc <= 4)? pmc - 1: 8 - pmc; | ||
159 | else if (psel == 7 || psel == 13) /* decode events */ | ||
160 | bit = 4; | ||
161 | else | ||
162 | return 0; | ||
163 | } else | ||
164 | bit = psel; | ||
165 | |||
166 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
167 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
168 | mask = 0; | ||
169 | switch (unit) { | ||
170 | case PM_VPU: | ||
171 | mask = 0x4c; /* byte 0 bits 2,3,6 */ | ||
172 | break; | ||
173 | case PM_LSU0: | ||
174 | /* byte 2 bits 0,2,3,4,6; all of byte 1 */ | ||
175 | mask = 0x085dff00; | ||
176 | break; | ||
177 | case PM_LSU1L: | ||
178 | mask = 0x50 << 24; /* byte 3 bits 4,6 */ | ||
179 | break; | ||
180 | } | ||
181 | return (mask >> (byte * 8 + bit)) & 1; | ||
182 | } | ||
183 | |||
184 | /* Masks and values for using events from the various units */ | ||
185 | static unsigned long unit_cons[PM_LASTUNIT+1][2] = { | ||
186 | [PM_FPU] = { 0xc80000000000ull, 0x040000000000ull }, | ||
187 | [PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull }, | ||
188 | [PM_ISU] = { 0x080000000000ull, 0x020000000000ull }, | ||
189 | [PM_IFU] = { 0xc80000000000ull, 0x840000000000ull }, | ||
190 | [PM_IDU] = { 0x380000000000ull, 0x010000000000ull }, | ||
191 | [PM_STS] = { 0x380000000000ull, 0x310000000000ull }, | ||
192 | }; | ||
193 | |||
194 | static int p970_get_constraint(u64 event, unsigned long *maskp, | ||
195 | unsigned long *valp) | ||
196 | { | ||
197 | int pmc, byte, unit, sh, spcsel; | ||
198 | unsigned long mask = 0, value = 0; | ||
199 | int grp = -1; | ||
200 | |||
201 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
202 | if (pmc) { | ||
203 | if (pmc > 8) | ||
204 | return -1; | ||
205 | sh = (pmc - 1) * 2; | ||
206 | mask |= 2 << sh; | ||
207 | value |= 1 << sh; | ||
208 | grp = ((pmc - 1) >> 1) & 1; | ||
209 | } | ||
210 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
211 | if (unit) { | ||
212 | if (unit > PM_LASTUNIT) | ||
213 | return -1; | ||
214 | mask |= unit_cons[unit][0]; | ||
215 | value |= unit_cons[unit][1]; | ||
216 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
217 | /* | ||
218 | * Bus events on bytes 0 and 2 can be counted | ||
219 | * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8. | ||
220 | */ | ||
221 | if (!pmc) | ||
222 | grp = byte & 1; | ||
223 | /* Set byte lane select field */ | ||
224 | mask |= 0xfULL << (28 - 4 * byte); | ||
225 | value |= (unsigned long)unit << (28 - 4 * byte); | ||
226 | } | ||
227 | if (grp == 0) { | ||
228 | /* increment PMC1/2/5/6 field */ | ||
229 | mask |= 0x8000000000ull; | ||
230 | value |= 0x1000000000ull; | ||
231 | } else if (grp == 1) { | ||
232 | /* increment PMC3/4/7/8 field */ | ||
233 | mask |= 0x800000000ull; | ||
234 | value |= 0x100000000ull; | ||
235 | } | ||
236 | spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK; | ||
237 | if (spcsel) { | ||
238 | mask |= 3ull << 48; | ||
239 | value |= (unsigned long)spcsel << 48; | ||
240 | } | ||
241 | *maskp = mask; | ||
242 | *valp = value; | ||
243 | return 0; | ||
244 | } | ||
245 | |||
246 | static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
247 | { | ||
248 | alt[0] = event; | ||
249 | |||
250 | /* 2 alternatives for LSU empty */ | ||
251 | if (event == 0x2002 || event == 0x3002) { | ||
252 | alt[1] = event ^ 0x1000; | ||
253 | return 2; | ||
254 | } | ||
255 | |||
256 | return 1; | ||
257 | } | ||
258 | |||
259 | static int p970_compute_mmcr(u64 event[], int n_ev, | ||
260 | unsigned int hwc[], unsigned long mmcr[]) | ||
261 | { | ||
262 | unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0; | ||
263 | unsigned int pmc, unit, byte, psel; | ||
264 | unsigned int ttm, grp; | ||
265 | unsigned int pmc_inuse = 0; | ||
266 | unsigned int pmc_grp_use[2]; | ||
267 | unsigned char busbyte[4]; | ||
268 | unsigned char unituse[16]; | ||
269 | unsigned char unitmap[] = { 0, 0<<3, 3<<3, 1<<3, 2<<3, 0|4, 3|4 }; | ||
270 | unsigned char ttmuse[2]; | ||
271 | unsigned char pmcsel[8]; | ||
272 | int i; | ||
273 | int spcsel; | ||
274 | |||
275 | if (n_ev > 8) | ||
276 | return -1; | ||
277 | |||
278 | /* First pass to count resource use */ | ||
279 | pmc_grp_use[0] = pmc_grp_use[1] = 0; | ||
280 | memset(busbyte, 0, sizeof(busbyte)); | ||
281 | memset(unituse, 0, sizeof(unituse)); | ||
282 | for (i = 0; i < n_ev; ++i) { | ||
283 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
284 | if (pmc) { | ||
285 | if (pmc_inuse & (1 << (pmc - 1))) | ||
286 | return -1; | ||
287 | pmc_inuse |= 1 << (pmc - 1); | ||
288 | /* count 1/2/5/6 vs 3/4/7/8 use */ | ||
289 | ++pmc_grp_use[((pmc - 1) >> 1) & 1]; | ||
290 | } | ||
291 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
292 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
293 | if (unit) { | ||
294 | if (unit > PM_LASTUNIT) | ||
295 | return -1; | ||
296 | if (!pmc) | ||
297 | ++pmc_grp_use[byte & 1]; | ||
298 | if (busbyte[byte] && busbyte[byte] != unit) | ||
299 | return -1; | ||
300 | busbyte[byte] = unit; | ||
301 | unituse[unit] = 1; | ||
302 | } | ||
303 | } | ||
304 | if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4) | ||
305 | return -1; | ||
306 | |||
307 | /* | ||
308 | * Assign resources and set multiplexer selects. | ||
309 | * | ||
310 | * PM_ISU can go either on TTM0 or TTM1, but that's the only | ||
311 | * choice we have to deal with. | ||
312 | */ | ||
313 | if (unituse[PM_ISU] & | ||
314 | (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_VPU])) | ||
315 | unitmap[PM_ISU] = 2 | 4; /* move ISU to TTM1 */ | ||
316 | /* Set TTM[01]SEL fields. */ | ||
317 | ttmuse[0] = ttmuse[1] = 0; | ||
318 | for (i = PM_FPU; i <= PM_STS; ++i) { | ||
319 | if (!unituse[i]) | ||
320 | continue; | ||
321 | ttm = unitmap[i]; | ||
322 | ++ttmuse[(ttm >> 2) & 1]; | ||
323 | mmcr1 |= (unsigned long)(ttm & ~4) << MMCR1_TTM1SEL_SH; | ||
324 | } | ||
325 | /* Check only one unit per TTMx */ | ||
326 | if (ttmuse[0] > 1 || ttmuse[1] > 1) | ||
327 | return -1; | ||
328 | |||
329 | /* Set byte lane select fields and TTM3SEL. */ | ||
330 | for (byte = 0; byte < 4; ++byte) { | ||
331 | unit = busbyte[byte]; | ||
332 | if (!unit) | ||
333 | continue; | ||
334 | if (unit <= PM_STS) | ||
335 | ttm = (unitmap[unit] >> 2) & 1; | ||
336 | else if (unit == PM_LSU0) | ||
337 | ttm = 2; | ||
338 | else { | ||
339 | ttm = 3; | ||
340 | if (unit == PM_LSU1L && byte >= 2) | ||
341 | mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); | ||
342 | } | ||
343 | mmcr1 |= (unsigned long)ttm | ||
344 | << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | ||
345 | } | ||
346 | |||
347 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | ||
348 | memset(pmcsel, 0x8, sizeof(pmcsel)); /* 8 means don't count */ | ||
349 | for (i = 0; i < n_ev; ++i) { | ||
350 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
351 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
352 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
353 | psel = event[i] & PM_PMCSEL_MSK; | ||
354 | if (!pmc) { | ||
355 | /* Bus event or any-PMC direct event */ | ||
356 | if (unit) | ||
357 | psel |= 0x10 | ((byte & 2) << 2); | ||
358 | else | ||
359 | psel |= 8; | ||
360 | for (pmc = 0; pmc < 8; ++pmc) { | ||
361 | if (pmc_inuse & (1 << pmc)) | ||
362 | continue; | ||
363 | grp = (pmc >> 1) & 1; | ||
364 | if (unit) { | ||
365 | if (grp == (byte & 1)) | ||
366 | break; | ||
367 | } else if (pmc_grp_use[grp] < 4) { | ||
368 | ++pmc_grp_use[grp]; | ||
369 | break; | ||
370 | } | ||
371 | } | ||
372 | pmc_inuse |= 1 << pmc; | ||
373 | } else { | ||
374 | /* Direct event */ | ||
375 | --pmc; | ||
376 | if (psel == 0 && (byte & 2)) | ||
377 | /* add events on higher-numbered bus */ | ||
378 | mmcr1 |= 1ull << mmcr1_adder_bits[pmc]; | ||
379 | } | ||
380 | pmcsel[pmc] = psel; | ||
381 | hwc[i] = pmc; | ||
382 | spcsel = (event[i] >> PM_SPCSEL_SH) & PM_SPCSEL_MSK; | ||
383 | mmcr1 |= spcsel; | ||
384 | if (p970_marked_instr_event(event[i])) | ||
385 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
386 | } | ||
387 | for (pmc = 0; pmc < 2; ++pmc) | ||
388 | mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc); | ||
389 | for (; pmc < 8; ++pmc) | ||
390 | mmcr1 |= (unsigned long)pmcsel[pmc] | ||
391 | << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); | ||
392 | if (pmc_inuse & 1) | ||
393 | mmcr0 |= MMCR0_PMC1CE; | ||
394 | if (pmc_inuse & 0xfe) | ||
395 | mmcr0 |= MMCR0_PMCjCE; | ||
396 | |||
397 | mmcra |= 0x2000; /* mark only one IOP per PPC instruction */ | ||
398 | |||
399 | /* Return MMCRx values */ | ||
400 | mmcr[0] = mmcr0; | ||
401 | mmcr[1] = mmcr1; | ||
402 | mmcr[2] = mmcra; | ||
403 | return 0; | ||
404 | } | ||
405 | |||
406 | static void p970_disable_pmc(unsigned int pmc, unsigned long mmcr[]) | ||
407 | { | ||
408 | int shift, i; | ||
409 | |||
410 | if (pmc <= 1) { | ||
411 | shift = MMCR0_PMC1SEL_SH - 7 * pmc; | ||
412 | i = 0; | ||
413 | } else { | ||
414 | shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2); | ||
415 | i = 1; | ||
416 | } | ||
417 | /* | ||
418 | * Setting the PMCxSEL field to 0x08 disables PMC x. | ||
419 | */ | ||
420 | mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift); | ||
421 | } | ||
422 | |||
423 | static int ppc970_generic_events[] = { | ||
424 | [PERF_COUNT_HW_CPU_CYCLES] = 7, | ||
425 | [PERF_COUNT_HW_INSTRUCTIONS] = 1, | ||
426 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8810, /* PM_LD_REF_L1 */ | ||
427 | [PERF_COUNT_HW_CACHE_MISSES] = 0x3810, /* PM_LD_MISS_L1 */ | ||
428 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x431, /* PM_BR_ISSUED */ | ||
429 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x327, /* PM_GRP_BR_MPRED */ | ||
430 | }; | ||
431 | |||
432 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
433 | |||
434 | /* | ||
435 | * Table of generalized cache-related events. | ||
436 | * 0 means not supported, -1 means nonsensical, other values | ||
437 | * are event codes. | ||
438 | */ | ||
439 | static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
440 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
441 | [C(OP_READ)] = { 0x8810, 0x3810 }, | ||
442 | [C(OP_WRITE)] = { 0x7810, 0x813 }, | ||
443 | [C(OP_PREFETCH)] = { 0x731, 0 }, | ||
444 | }, | ||
445 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
446 | [C(OP_READ)] = { 0, 0 }, | ||
447 | [C(OP_WRITE)] = { -1, -1 }, | ||
448 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
449 | }, | ||
450 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
451 | [C(OP_READ)] = { 0, 0 }, | ||
452 | [C(OP_WRITE)] = { 0, 0 }, | ||
453 | [C(OP_PREFETCH)] = { 0x733, 0 }, | ||
454 | }, | ||
455 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
456 | [C(OP_READ)] = { 0, 0x704 }, | ||
457 | [C(OP_WRITE)] = { -1, -1 }, | ||
458 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
459 | }, | ||
460 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
461 | [C(OP_READ)] = { 0, 0x700 }, | ||
462 | [C(OP_WRITE)] = { -1, -1 }, | ||
463 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
464 | }, | ||
465 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
466 | [C(OP_READ)] = { 0x431, 0x327 }, | ||
467 | [C(OP_WRITE)] = { -1, -1 }, | ||
468 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
469 | }, | ||
470 | [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
471 | [C(OP_READ)] = { -1, -1 }, | ||
472 | [C(OP_WRITE)] = { -1, -1 }, | ||
473 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
474 | }, | ||
475 | }; | ||
476 | |||
477 | static struct power_pmu ppc970_pmu = { | ||
478 | .name = "PPC970/FX/MP", | ||
479 | .n_counter = 8, | ||
480 | .max_alternatives = 2, | ||
481 | .add_fields = 0x001100005555ull, | ||
482 | .test_adder = 0x013300000000ull, | ||
483 | .compute_mmcr = p970_compute_mmcr, | ||
484 | .get_constraint = p970_get_constraint, | ||
485 | .get_alternatives = p970_get_alternatives, | ||
486 | .disable_pmc = p970_disable_pmc, | ||
487 | .n_generic = ARRAY_SIZE(ppc970_generic_events), | ||
488 | .generic_events = ppc970_generic_events, | ||
489 | .cache_events = &ppc970_cache_events, | ||
490 | }; | ||
491 | |||
492 | static int __init init_ppc970_pmu(void) | ||
493 | { | ||
494 | if (!cur_cpu_spec->oprofile_cpu_type || | ||
495 | (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970") | ||
496 | && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970MP"))) | ||
497 | return -ENODEV; | ||
498 | |||
499 | return register_power_pmu(&ppc970_pmu); | ||
500 | } | ||
501 | |||
502 | early_initcall(init_ppc970_pmu); | ||
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index d817ab018486..e40707032ac3 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c | |||
@@ -647,6 +647,9 @@ void show_regs(struct pt_regs * regs) | |||
647 | printk("MSR: "REG" ", regs->msr); | 647 | printk("MSR: "REG" ", regs->msr); |
648 | printbits(regs->msr, msr_bits); | 648 | printbits(regs->msr, msr_bits); |
649 | printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); | 649 | printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); |
650 | #ifdef CONFIG_PPC64 | ||
651 | printk("SOFTE: %ld\n", regs->softe); | ||
652 | #endif | ||
650 | trap = TRAP(regs); | 653 | trap = TRAP(regs); |
651 | if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) | 654 | if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) |
652 | printk("CFAR: "REG"\n", regs->orig_gpr3); | 655 | printk("CFAR: "REG"\n", regs->orig_gpr3); |
@@ -1220,34 +1223,32 @@ void dump_stack(void) | |||
1220 | EXPORT_SYMBOL(dump_stack); | 1223 | EXPORT_SYMBOL(dump_stack); |
1221 | 1224 | ||
1222 | #ifdef CONFIG_PPC64 | 1225 | #ifdef CONFIG_PPC64 |
1223 | void ppc64_runlatch_on(void) | 1226 | /* Called with hard IRQs off */ |
1227 | void __ppc64_runlatch_on(void) | ||
1224 | { | 1228 | { |
1229 | struct thread_info *ti = current_thread_info(); | ||
1225 | unsigned long ctrl; | 1230 | unsigned long ctrl; |
1226 | 1231 | ||
1227 | if (cpu_has_feature(CPU_FTR_CTRL) && !test_thread_flag(TIF_RUNLATCH)) { | 1232 | ctrl = mfspr(SPRN_CTRLF); |
1228 | HMT_medium(); | 1233 | ctrl |= CTRL_RUNLATCH; |
1229 | 1234 | mtspr(SPRN_CTRLT, ctrl); | |
1230 | ctrl = mfspr(SPRN_CTRLF); | ||
1231 | ctrl |= CTRL_RUNLATCH; | ||
1232 | mtspr(SPRN_CTRLT, ctrl); | ||
1233 | 1235 | ||
1234 | set_thread_flag(TIF_RUNLATCH); | 1236 | ti->local_flags |= TLF_RUNLATCH; |
1235 | } | ||
1236 | } | 1237 | } |
1237 | 1238 | ||
1239 | /* Called with hard IRQs off */ | ||
1238 | void __ppc64_runlatch_off(void) | 1240 | void __ppc64_runlatch_off(void) |
1239 | { | 1241 | { |
1242 | struct thread_info *ti = current_thread_info(); | ||
1240 | unsigned long ctrl; | 1243 | unsigned long ctrl; |
1241 | 1244 | ||
1242 | HMT_medium(); | 1245 | ti->local_flags &= ~TLF_RUNLATCH; |
1243 | |||
1244 | clear_thread_flag(TIF_RUNLATCH); | ||
1245 | 1246 | ||
1246 | ctrl = mfspr(SPRN_CTRLF); | 1247 | ctrl = mfspr(SPRN_CTRLF); |
1247 | ctrl &= ~CTRL_RUNLATCH; | 1248 | ctrl &= ~CTRL_RUNLATCH; |
1248 | mtspr(SPRN_CTRLT, ctrl); | 1249 | mtspr(SPRN_CTRLT, ctrl); |
1249 | } | 1250 | } |
1250 | #endif | 1251 | #endif /* CONFIG_PPC64 */ |
1251 | 1252 | ||
1252 | #if THREAD_SHIFT < PAGE_SHIFT | 1253 | #if THREAD_SHIFT < PAGE_SHIFT |
1253 | 1254 | ||
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index abe405dab34d..89e850af3dd6 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c | |||
@@ -52,9 +52,9 @@ | |||
52 | #include <asm/machdep.h> | 52 | #include <asm/machdep.h> |
53 | #include <asm/pSeries_reconfig.h> | 53 | #include <asm/pSeries_reconfig.h> |
54 | #include <asm/pci-bridge.h> | 54 | #include <asm/pci-bridge.h> |
55 | #include <asm/phyp_dump.h> | ||
56 | #include <asm/kexec.h> | 55 | #include <asm/kexec.h> |
57 | #include <asm/opal.h> | 56 | #include <asm/opal.h> |
57 | #include <asm/fadump.h> | ||
58 | 58 | ||
59 | #include <mm/mmu_decl.h> | 59 | #include <mm/mmu_decl.h> |
60 | 60 | ||
@@ -615,86 +615,6 @@ static void __init early_reserve_mem(void) | |||
615 | } | 615 | } |
616 | } | 616 | } |
617 | 617 | ||
618 | #ifdef CONFIG_PHYP_DUMP | ||
619 | /** | ||
620 | * phyp_dump_calculate_reserve_size() - reserve variable boot area 5% or arg | ||
621 | * | ||
622 | * Function to find the largest size we need to reserve | ||
623 | * during early boot process. | ||
624 | * | ||
625 | * It either looks for boot param and returns that OR | ||
626 | * returns larger of 256 or 5% rounded down to multiples of 256MB. | ||
627 | * | ||
628 | */ | ||
629 | static inline unsigned long phyp_dump_calculate_reserve_size(void) | ||
630 | { | ||
631 | unsigned long tmp; | ||
632 | |||
633 | if (phyp_dump_info->reserve_bootvar) | ||
634 | return phyp_dump_info->reserve_bootvar; | ||
635 | |||
636 | /* divide by 20 to get 5% of value */ | ||
637 | tmp = memblock_end_of_DRAM(); | ||
638 | do_div(tmp, 20); | ||
639 | |||
640 | /* round it down in multiples of 256 */ | ||
641 | tmp = tmp & ~0x0FFFFFFFUL; | ||
642 | |||
643 | return (tmp > PHYP_DUMP_RMR_END ? tmp : PHYP_DUMP_RMR_END); | ||
644 | } | ||
645 | |||
646 | /** | ||
647 | * phyp_dump_reserve_mem() - reserve all not-yet-dumped mmemory | ||
648 | * | ||
649 | * This routine may reserve memory regions in the kernel only | ||
650 | * if the system is supported and a dump was taken in last | ||
651 | * boot instance or if the hardware is supported and the | ||
652 | * scratch area needs to be setup. In other instances it returns | ||
653 | * without reserving anything. The memory in case of dump being | ||
654 | * active is freed when the dump is collected (by userland tools). | ||
655 | */ | ||
656 | static void __init phyp_dump_reserve_mem(void) | ||
657 | { | ||
658 | unsigned long base, size; | ||
659 | unsigned long variable_reserve_size; | ||
660 | |||
661 | if (!phyp_dump_info->phyp_dump_configured) { | ||
662 | printk(KERN_ERR "Phyp-dump not supported on this hardware\n"); | ||
663 | return; | ||
664 | } | ||
665 | |||
666 | if (!phyp_dump_info->phyp_dump_at_boot) { | ||
667 | printk(KERN_INFO "Phyp-dump disabled at boot time\n"); | ||
668 | return; | ||
669 | } | ||
670 | |||
671 | variable_reserve_size = phyp_dump_calculate_reserve_size(); | ||
672 | |||
673 | if (phyp_dump_info->phyp_dump_is_active) { | ||
674 | /* Reserve *everything* above RMR.Area freed by userland tools*/ | ||
675 | base = variable_reserve_size; | ||
676 | size = memblock_end_of_DRAM() - base; | ||
677 | |||
678 | /* XXX crashed_ram_end is wrong, since it may be beyond | ||
679 | * the memory_limit, it will need to be adjusted. */ | ||
680 | memblock_reserve(base, size); | ||
681 | |||
682 | phyp_dump_info->init_reserve_start = base; | ||
683 | phyp_dump_info->init_reserve_size = size; | ||
684 | } else { | ||
685 | size = phyp_dump_info->cpu_state_size + | ||
686 | phyp_dump_info->hpte_region_size + | ||
687 | variable_reserve_size; | ||
688 | base = memblock_end_of_DRAM() - size; | ||
689 | memblock_reserve(base, size); | ||
690 | phyp_dump_info->init_reserve_start = base; | ||
691 | phyp_dump_info->init_reserve_size = size; | ||
692 | } | ||
693 | } | ||
694 | #else | ||
695 | static inline void __init phyp_dump_reserve_mem(void) {} | ||
696 | #endif /* CONFIG_PHYP_DUMP && CONFIG_PPC_RTAS */ | ||
697 | |||
698 | void __init early_init_devtree(void *params) | 618 | void __init early_init_devtree(void *params) |
699 | { | 619 | { |
700 | phys_addr_t limit; | 620 | phys_addr_t limit; |
@@ -714,9 +634,9 @@ void __init early_init_devtree(void *params) | |||
714 | of_scan_flat_dt(early_init_dt_scan_opal, NULL); | 634 | of_scan_flat_dt(early_init_dt_scan_opal, NULL); |
715 | #endif | 635 | #endif |
716 | 636 | ||
717 | #ifdef CONFIG_PHYP_DUMP | 637 | #ifdef CONFIG_FA_DUMP |
718 | /* scan tree to see if dump occurred during last boot */ | 638 | /* scan tree to see if dump is active during last boot */ |
719 | of_scan_flat_dt(early_init_dt_scan_phyp_dump, NULL); | 639 | of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL); |
720 | #endif | 640 | #endif |
721 | 641 | ||
722 | /* Pre-initialize the cmd_line with the content of boot_commmand_line, | 642 | /* Pre-initialize the cmd_line with the content of boot_commmand_line, |
@@ -750,9 +670,15 @@ void __init early_init_devtree(void *params) | |||
750 | if (PHYSICAL_START > MEMORY_START) | 670 | if (PHYSICAL_START > MEMORY_START) |
751 | memblock_reserve(MEMORY_START, 0x8000); | 671 | memblock_reserve(MEMORY_START, 0x8000); |
752 | reserve_kdump_trampoline(); | 672 | reserve_kdump_trampoline(); |
753 | reserve_crashkernel(); | 673 | #ifdef CONFIG_FA_DUMP |
674 | /* | ||
675 | * If we fail to reserve memory for firmware-assisted dump then | ||
676 | * fallback to kexec based kdump. | ||
677 | */ | ||
678 | if (fadump_reserve_mem() == 0) | ||
679 | #endif | ||
680 | reserve_crashkernel(); | ||
754 | early_reserve_mem(); | 681 | early_reserve_mem(); |
755 | phyp_dump_reserve_mem(); | ||
756 | 682 | ||
757 | /* | 683 | /* |
758 | * Ensure that total memory size is page-aligned, because otherwise | 684 | * Ensure that total memory size is page-aligned, because otherwise |
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index eca626ea3f23..e2d599048142 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c | |||
@@ -48,14 +48,6 @@ | |||
48 | #include <linux/linux_logo.h> | 48 | #include <linux/linux_logo.h> |
49 | 49 | ||
50 | /* | 50 | /* |
51 | * Properties whose value is longer than this get excluded from our | ||
52 | * copy of the device tree. This value does need to be big enough to | ||
53 | * ensure that we don't lose things like the interrupt-map property | ||
54 | * on a PCI-PCI bridge. | ||
55 | */ | ||
56 | #define MAX_PROPERTY_LENGTH (1UL * 1024 * 1024) | ||
57 | |||
58 | /* | ||
59 | * Eventually bump that one up | 51 | * Eventually bump that one up |
60 | */ | 52 | */ |
61 | #define DEVTREE_CHUNK_SIZE 0x100000 | 53 | #define DEVTREE_CHUNK_SIZE 0x100000 |
@@ -2273,13 +2265,6 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, | |||
2273 | /* sanity checks */ | 2265 | /* sanity checks */ |
2274 | if (l == PROM_ERROR) | 2266 | if (l == PROM_ERROR) |
2275 | continue; | 2267 | continue; |
2276 | if (l > MAX_PROPERTY_LENGTH) { | ||
2277 | prom_printf("WARNING: ignoring large property "); | ||
2278 | /* It seems OF doesn't null-terminate the path :-( */ | ||
2279 | prom_printf("[%s] ", path); | ||
2280 | prom_printf("%s length 0x%x\n", RELOC(pname), l); | ||
2281 | continue; | ||
2282 | } | ||
2283 | 2268 | ||
2284 | /* push property head */ | 2269 | /* push property head */ |
2285 | dt_push_token(OF_DT_PROP, mem_start, mem_end); | 2270 | dt_push_token(OF_DT_PROP, mem_start, mem_end); |
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index 6cd8f0196b6d..179af906dcda 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c | |||
@@ -275,8 +275,11 @@ void __init find_and_init_phbs(void) | |||
275 | of_node_put(root); | 275 | of_node_put(root); |
276 | pci_devs_phb_init(); | 276 | pci_devs_phb_init(); |
277 | 277 | ||
278 | /* Create EEH devices for all PHBs */ | ||
279 | eeh_dev_phb_init(); | ||
280 | |||
278 | /* | 281 | /* |
279 | * pci_probe_only and pci_assign_all_buses can be set via properties | 282 | * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties |
280 | * in chosen. | 283 | * in chosen. |
281 | */ | 284 | */ |
282 | if (of_chosen) { | 285 | if (of_chosen) { |
@@ -284,8 +287,12 @@ void __init find_and_init_phbs(void) | |||
284 | 287 | ||
285 | prop = of_get_property(of_chosen, | 288 | prop = of_get_property(of_chosen, |
286 | "linux,pci-probe-only", NULL); | 289 | "linux,pci-probe-only", NULL); |
287 | if (prop) | 290 | if (prop) { |
288 | pci_probe_only = *prop; | 291 | if (*prop) |
292 | pci_add_flags(PCI_PROBE_ONLY); | ||
293 | else | ||
294 | pci_clear_flags(PCI_PROBE_ONLY); | ||
295 | } | ||
289 | 296 | ||
290 | #ifdef CONFIG_PPC32 /* Will be made generic soon */ | 297 | #ifdef CONFIG_PPC32 /* Will be made generic soon */ |
291 | prop = of_get_property(of_chosen, | 298 | prop = of_get_property(of_chosen, |
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 77bb77da05c1..b0ebdeab9494 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c | |||
@@ -61,6 +61,7 @@ | |||
61 | #include <asm/xmon.h> | 61 | #include <asm/xmon.h> |
62 | #include <asm/cputhreads.h> | 62 | #include <asm/cputhreads.h> |
63 | #include <mm/mmu_decl.h> | 63 | #include <mm/mmu_decl.h> |
64 | #include <asm/fadump.h> | ||
64 | 65 | ||
65 | #include "setup.h" | 66 | #include "setup.h" |
66 | 67 | ||
@@ -109,6 +110,14 @@ EXPORT_SYMBOL(ppc_do_canonicalize_irqs); | |||
109 | /* also used by kexec */ | 110 | /* also used by kexec */ |
110 | void machine_shutdown(void) | 111 | void machine_shutdown(void) |
111 | { | 112 | { |
113 | #ifdef CONFIG_FA_DUMP | ||
114 | /* | ||
115 | * if fadump is active, cleanup the fadump registration before we | ||
116 | * shutdown. | ||
117 | */ | ||
118 | fadump_cleanup(); | ||
119 | #endif | ||
120 | |||
112 | if (ppc_md.machine_shutdown) | 121 | if (ppc_md.machine_shutdown) |
113 | ppc_md.machine_shutdown(); | 122 | ppc_md.machine_shutdown(); |
114 | } | 123 | } |
@@ -639,6 +648,11 @@ EXPORT_SYMBOL(check_legacy_ioport); | |||
639 | static int ppc_panic_event(struct notifier_block *this, | 648 | static int ppc_panic_event(struct notifier_block *this, |
640 | unsigned long event, void *ptr) | 649 | unsigned long event, void *ptr) |
641 | { | 650 | { |
651 | /* | ||
652 | * If firmware-assisted dump has been registered then trigger | ||
653 | * firmware-assisted dump and let firmware handle everything else. | ||
654 | */ | ||
655 | crash_fadump(NULL, ptr); | ||
642 | ppc_md.panic(ptr); /* May not return */ | 656 | ppc_md.panic(ptr); /* May not return */ |
643 | return NOTIFY_DONE; | 657 | return NOTIFY_DONE; |
644 | } | 658 | } |
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index ac6e437b1021..7006b7f4267a 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c | |||
@@ -57,10 +57,7 @@ void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, | |||
57 | void restore_sigmask(sigset_t *set) | 57 | void restore_sigmask(sigset_t *set) |
58 | { | 58 | { |
59 | sigdelsetmask(set, ~_BLOCKABLE); | 59 | sigdelsetmask(set, ~_BLOCKABLE); |
60 | spin_lock_irq(¤t->sighand->siglock); | 60 | set_current_blocked(set); |
61 | current->blocked = *set; | ||
62 | recalc_sigpending(); | ||
63 | spin_unlock_irq(¤t->sighand->siglock); | ||
64 | } | 61 | } |
65 | 62 | ||
66 | static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, | 63 | static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, |
@@ -169,13 +166,7 @@ static int do_signal(struct pt_regs *regs) | |||
169 | 166 | ||
170 | regs->trap = 0; | 167 | regs->trap = 0; |
171 | if (ret) { | 168 | if (ret) { |
172 | spin_lock_irq(¤t->sighand->siglock); | 169 | block_sigmask(&ka, signr); |
173 | sigorsets(¤t->blocked, ¤t->blocked, | ||
174 | &ka.sa.sa_mask); | ||
175 | if (!(ka.sa.sa_flags & SA_NODEFER)) | ||
176 | sigaddset(¤t->blocked, signr); | ||
177 | recalc_sigpending(); | ||
178 | spin_unlock_irq(¤t->sighand->siglock); | ||
179 | 170 | ||
180 | /* | 171 | /* |
181 | * A signal was successfully delivered; the saved sigmask is in | 172 | * A signal was successfully delivered; the saved sigmask is in |
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 836a5a19eb2c..e061ef5dd449 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c | |||
@@ -242,12 +242,13 @@ static inline int restore_general_regs(struct pt_regs *regs, | |||
242 | */ | 242 | */ |
243 | long sys_sigsuspend(old_sigset_t mask) | 243 | long sys_sigsuspend(old_sigset_t mask) |
244 | { | 244 | { |
245 | mask &= _BLOCKABLE; | 245 | sigset_t blocked; |
246 | spin_lock_irq(¤t->sighand->siglock); | 246 | |
247 | current->saved_sigmask = current->blocked; | 247 | current->saved_sigmask = current->blocked; |
248 | siginitset(¤t->blocked, mask); | 248 | |
249 | recalc_sigpending(); | 249 | mask &= _BLOCKABLE; |
250 | spin_unlock_irq(¤t->sighand->siglock); | 250 | siginitset(&blocked, mask); |
251 | set_current_blocked(&blocked); | ||
251 | 252 | ||
252 | current->state = TASK_INTERRUPTIBLE; | 253 | current->state = TASK_INTERRUPTIBLE; |
253 | schedule(); | 254 | schedule(); |
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 883e74c0d1b3..0c683d376b1c 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <asm/current.h> | 12 | #include <asm/current.h> |
13 | #include <asm/processor.h> | 13 | #include <asm/processor.h> |
14 | #include <asm/cputable.h> | 14 | #include <asm/cputable.h> |
15 | #include <asm/firmware.h> | ||
16 | #include <asm/hvcall.h> | 15 | #include <asm/hvcall.h> |
17 | #include <asm/prom.h> | 16 | #include <asm/prom.h> |
18 | #include <asm/machdep.h> | 17 | #include <asm/machdep.h> |
@@ -341,8 +340,7 @@ static void __cpuinit register_cpu_online(unsigned int cpu) | |||
341 | int i, nattrs; | 340 | int i, nattrs; |
342 | 341 | ||
343 | #ifdef CONFIG_PPC64 | 342 | #ifdef CONFIG_PPC64 |
344 | if (!firmware_has_feature(FW_FEATURE_ISERIES) && | 343 | if (cpu_has_feature(CPU_FTR_SMT)) |
345 | cpu_has_feature(CPU_FTR_SMT)) | ||
346 | device_create_file(s, &dev_attr_smt_snooze_delay); | 344 | device_create_file(s, &dev_attr_smt_snooze_delay); |
347 | #endif | 345 | #endif |
348 | 346 | ||
@@ -414,8 +412,7 @@ static void unregister_cpu_online(unsigned int cpu) | |||
414 | BUG_ON(!c->hotpluggable); | 412 | BUG_ON(!c->hotpluggable); |
415 | 413 | ||
416 | #ifdef CONFIG_PPC64 | 414 | #ifdef CONFIG_PPC64 |
417 | if (!firmware_has_feature(FW_FEATURE_ISERIES) && | 415 | if (cpu_has_feature(CPU_FTR_SMT)) |
418 | cpu_has_feature(CPU_FTR_SMT)) | ||
419 | device_remove_file(s, &dev_attr_smt_snooze_delay); | 416 | device_remove_file(s, &dev_attr_smt_snooze_delay); |
420 | #endif | 417 | #endif |
421 | 418 | ||
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 567dd7c3ac2a..2c42cd72d0f5 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c | |||
@@ -17,8 +17,7 @@ | |||
17 | * | 17 | * |
18 | * TODO (not necessarily in this file): | 18 | * TODO (not necessarily in this file): |
19 | * - improve precision and reproducibility of timebase frequency | 19 | * - improve precision and reproducibility of timebase frequency |
20 | * measurement at boot time. (for iSeries, we calibrate the timebase | 20 | * measurement at boot time. |
21 | * against the Titan chip's clock.) | ||
22 | * - for astronomical applications: add a new function to get | 21 | * - for astronomical applications: add a new function to get |
23 | * non ambiguous timestamps even around leap seconds. This needs | 22 | * non ambiguous timestamps even around leap seconds. This needs |
24 | * a new timestamp format and a good name. | 23 | * a new timestamp format and a good name. |
@@ -70,10 +69,6 @@ | |||
70 | #include <asm/vdso_datapage.h> | 69 | #include <asm/vdso_datapage.h> |
71 | #include <asm/firmware.h> | 70 | #include <asm/firmware.h> |
72 | #include <asm/cputime.h> | 71 | #include <asm/cputime.h> |
73 | #ifdef CONFIG_PPC_ISERIES | ||
74 | #include <asm/iseries/it_lp_queue.h> | ||
75 | #include <asm/iseries/hv_call_xm.h> | ||
76 | #endif | ||
77 | 72 | ||
78 | /* powerpc clocksource/clockevent code */ | 73 | /* powerpc clocksource/clockevent code */ |
79 | 74 | ||
@@ -117,14 +112,6 @@ static struct clock_event_device decrementer_clockevent = { | |||
117 | DEFINE_PER_CPU(u64, decrementers_next_tb); | 112 | DEFINE_PER_CPU(u64, decrementers_next_tb); |
118 | static DEFINE_PER_CPU(struct clock_event_device, decrementers); | 113 | static DEFINE_PER_CPU(struct clock_event_device, decrementers); |
119 | 114 | ||
120 | #ifdef CONFIG_PPC_ISERIES | ||
121 | static unsigned long __initdata iSeries_recal_titan; | ||
122 | static signed long __initdata iSeries_recal_tb; | ||
123 | |||
124 | /* Forward declaration is only needed for iSereis compiles */ | ||
125 | static void __init clocksource_init(void); | ||
126 | #endif | ||
127 | |||
128 | #define XSEC_PER_SEC (1024*1024) | 115 | #define XSEC_PER_SEC (1024*1024) |
129 | 116 | ||
130 | #ifdef CONFIG_PPC64 | 117 | #ifdef CONFIG_PPC64 |
@@ -259,7 +246,6 @@ void accumulate_stolen_time(void) | |||
259 | u64 sst, ust; | 246 | u64 sst, ust; |
260 | 247 | ||
261 | u8 save_soft_enabled = local_paca->soft_enabled; | 248 | u8 save_soft_enabled = local_paca->soft_enabled; |
262 | u8 save_hard_enabled = local_paca->hard_enabled; | ||
263 | 249 | ||
264 | /* We are called early in the exception entry, before | 250 | /* We are called early in the exception entry, before |
265 | * soft/hard_enabled are sync'ed to the expected state | 251 | * soft/hard_enabled are sync'ed to the expected state |
@@ -268,7 +254,6 @@ void accumulate_stolen_time(void) | |||
268 | * complain | 254 | * complain |
269 | */ | 255 | */ |
270 | local_paca->soft_enabled = 0; | 256 | local_paca->soft_enabled = 0; |
271 | local_paca->hard_enabled = 0; | ||
272 | 257 | ||
273 | sst = scan_dispatch_log(local_paca->starttime_user); | 258 | sst = scan_dispatch_log(local_paca->starttime_user); |
274 | ust = scan_dispatch_log(local_paca->starttime); | 259 | ust = scan_dispatch_log(local_paca->starttime); |
@@ -277,7 +262,6 @@ void accumulate_stolen_time(void) | |||
277 | local_paca->stolen_time += ust + sst; | 262 | local_paca->stolen_time += ust + sst; |
278 | 263 | ||
279 | local_paca->soft_enabled = save_soft_enabled; | 264 | local_paca->soft_enabled = save_soft_enabled; |
280 | local_paca->hard_enabled = save_hard_enabled; | ||
281 | } | 265 | } |
282 | 266 | ||
283 | static inline u64 calculate_stolen_time(u64 stop_tb) | 267 | static inline u64 calculate_stolen_time(u64 stop_tb) |
@@ -426,74 +410,6 @@ unsigned long profile_pc(struct pt_regs *regs) | |||
426 | EXPORT_SYMBOL(profile_pc); | 410 | EXPORT_SYMBOL(profile_pc); |
427 | #endif | 411 | #endif |
428 | 412 | ||
429 | #ifdef CONFIG_PPC_ISERIES | ||
430 | |||
431 | /* | ||
432 | * This function recalibrates the timebase based on the 49-bit time-of-day | ||
433 | * value in the Titan chip. The Titan is much more accurate than the value | ||
434 | * returned by the service processor for the timebase frequency. | ||
435 | */ | ||
436 | |||
437 | static int __init iSeries_tb_recal(void) | ||
438 | { | ||
439 | unsigned long titan, tb; | ||
440 | |||
441 | /* Make sure we only run on iSeries */ | ||
442 | if (!firmware_has_feature(FW_FEATURE_ISERIES)) | ||
443 | return -ENODEV; | ||
444 | |||
445 | tb = get_tb(); | ||
446 | titan = HvCallXm_loadTod(); | ||
447 | if ( iSeries_recal_titan ) { | ||
448 | unsigned long tb_ticks = tb - iSeries_recal_tb; | ||
449 | unsigned long titan_usec = (titan - iSeries_recal_titan) >> 12; | ||
450 | unsigned long new_tb_ticks_per_sec = (tb_ticks * USEC_PER_SEC)/titan_usec; | ||
451 | unsigned long new_tb_ticks_per_jiffy = | ||
452 | DIV_ROUND_CLOSEST(new_tb_ticks_per_sec, HZ); | ||
453 | long tick_diff = new_tb_ticks_per_jiffy - tb_ticks_per_jiffy; | ||
454 | char sign = '+'; | ||
455 | /* make sure tb_ticks_per_sec and tb_ticks_per_jiffy are consistent */ | ||
456 | new_tb_ticks_per_sec = new_tb_ticks_per_jiffy * HZ; | ||
457 | |||
458 | if ( tick_diff < 0 ) { | ||
459 | tick_diff = -tick_diff; | ||
460 | sign = '-'; | ||
461 | } | ||
462 | if ( tick_diff ) { | ||
463 | if ( tick_diff < tb_ticks_per_jiffy/25 ) { | ||
464 | printk( "Titan recalibrate: new tb_ticks_per_jiffy = %lu (%c%ld)\n", | ||
465 | new_tb_ticks_per_jiffy, sign, tick_diff ); | ||
466 | tb_ticks_per_jiffy = new_tb_ticks_per_jiffy; | ||
467 | tb_ticks_per_sec = new_tb_ticks_per_sec; | ||
468 | calc_cputime_factors(); | ||
469 | vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; | ||
470 | setup_cputime_one_jiffy(); | ||
471 | } | ||
472 | else { | ||
473 | printk( "Titan recalibrate: FAILED (difference > 4 percent)\n" | ||
474 | " new tb_ticks_per_jiffy = %lu\n" | ||
475 | " old tb_ticks_per_jiffy = %lu\n", | ||
476 | new_tb_ticks_per_jiffy, tb_ticks_per_jiffy ); | ||
477 | } | ||
478 | } | ||
479 | } | ||
480 | iSeries_recal_titan = titan; | ||
481 | iSeries_recal_tb = tb; | ||
482 | |||
483 | /* Called here as now we know accurate values for the timebase */ | ||
484 | clocksource_init(); | ||
485 | return 0; | ||
486 | } | ||
487 | late_initcall(iSeries_tb_recal); | ||
488 | |||
489 | /* Called from platform early init */ | ||
490 | void __init iSeries_time_init_early(void) | ||
491 | { | ||
492 | iSeries_recal_tb = get_tb(); | ||
493 | iSeries_recal_titan = HvCallXm_loadTod(); | ||
494 | } | ||
495 | #endif /* CONFIG_PPC_ISERIES */ | ||
496 | |||
497 | #ifdef CONFIG_IRQ_WORK | 413 | #ifdef CONFIG_IRQ_WORK |
498 | 414 | ||
499 | /* | 415 | /* |
@@ -550,16 +466,6 @@ void arch_irq_work_raise(void) | |||
550 | #endif /* CONFIG_IRQ_WORK */ | 466 | #endif /* CONFIG_IRQ_WORK */ |
551 | 467 | ||
552 | /* | 468 | /* |
553 | * For iSeries shared processors, we have to let the hypervisor | ||
554 | * set the hardware decrementer. We set a virtual decrementer | ||
555 | * in the lppaca and call the hypervisor if the virtual | ||
556 | * decrementer is less than the current value in the hardware | ||
557 | * decrementer. (almost always the new decrementer value will | ||
558 | * be greater than the current hardware decementer so the hypervisor | ||
559 | * call will not be needed) | ||
560 | */ | ||
561 | |||
562 | /* | ||
563 | * timer_interrupt - gets called when the decrementer overflows, | 469 | * timer_interrupt - gets called when the decrementer overflows, |
564 | * with interrupts disabled. | 470 | * with interrupts disabled. |
565 | */ | 471 | */ |
@@ -580,6 +486,11 @@ void timer_interrupt(struct pt_regs * regs) | |||
580 | if (!cpu_online(smp_processor_id())) | 486 | if (!cpu_online(smp_processor_id())) |
581 | return; | 487 | return; |
582 | 488 | ||
489 | /* Conditionally hard-enable interrupts now that the DEC has been | ||
490 | * bumped to its maximum value | ||
491 | */ | ||
492 | may_hard_irq_enable(); | ||
493 | |||
583 | trace_timer_interrupt_entry(regs); | 494 | trace_timer_interrupt_entry(regs); |
584 | 495 | ||
585 | __get_cpu_var(irq_stat).timer_irqs++; | 496 | __get_cpu_var(irq_stat).timer_irqs++; |
@@ -597,20 +508,10 @@ void timer_interrupt(struct pt_regs * regs) | |||
597 | irq_work_run(); | 508 | irq_work_run(); |
598 | } | 509 | } |
599 | 510 | ||
600 | #ifdef CONFIG_PPC_ISERIES | ||
601 | if (firmware_has_feature(FW_FEATURE_ISERIES)) | ||
602 | get_lppaca()->int_dword.fields.decr_int = 0; | ||
603 | #endif | ||
604 | |||
605 | *next_tb = ~(u64)0; | 511 | *next_tb = ~(u64)0; |
606 | if (evt->event_handler) | 512 | if (evt->event_handler) |
607 | evt->event_handler(evt); | 513 | evt->event_handler(evt); |
608 | 514 | ||
609 | #ifdef CONFIG_PPC_ISERIES | ||
610 | if (firmware_has_feature(FW_FEATURE_ISERIES) && hvlpevent_is_pending()) | ||
611 | process_hvlpevents(); | ||
612 | #endif | ||
613 | |||
614 | #ifdef CONFIG_PPC64 | 515 | #ifdef CONFIG_PPC64 |
615 | /* collect purr register values often, for accurate calculations */ | 516 | /* collect purr register values often, for accurate calculations */ |
616 | if (firmware_has_feature(FW_FEATURE_SPLPAR)) { | 517 | if (firmware_has_feature(FW_FEATURE_SPLPAR)) { |
@@ -982,9 +883,8 @@ void __init time_init(void) | |||
982 | */ | 883 | */ |
983 | start_cpu_decrementer(); | 884 | start_cpu_decrementer(); |
984 | 885 | ||
985 | /* Register the clocksource, if we're not running on iSeries */ | 886 | /* Register the clocksource */ |
986 | if (!firmware_has_feature(FW_FEATURE_ISERIES)) | 887 | clocksource_init(); |
987 | clocksource_init(); | ||
988 | 888 | ||
989 | init_decrementer_clockevent(); | 889 | init_decrementer_clockevent(); |
990 | } | 890 | } |
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index c091527efd89..a750409ccc4e 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c | |||
@@ -57,6 +57,7 @@ | |||
57 | #include <asm/kexec.h> | 57 | #include <asm/kexec.h> |
58 | #include <asm/ppc-opcode.h> | 58 | #include <asm/ppc-opcode.h> |
59 | #include <asm/rio.h> | 59 | #include <asm/rio.h> |
60 | #include <asm/fadump.h> | ||
60 | 61 | ||
61 | #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) | 62 | #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) |
62 | int (*__debugger)(struct pt_regs *regs) __read_mostly; | 63 | int (*__debugger)(struct pt_regs *regs) __read_mostly; |
@@ -145,6 +146,8 @@ static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, | |||
145 | arch_spin_unlock(&die_lock); | 146 | arch_spin_unlock(&die_lock); |
146 | raw_local_irq_restore(flags); | 147 | raw_local_irq_restore(flags); |
147 | 148 | ||
149 | crash_fadump(regs, "die oops"); | ||
150 | |||
148 | /* | 151 | /* |
149 | * A system reset (0x100) is a request to dump, so we always send | 152 | * A system reset (0x100) is a request to dump, so we always send |
150 | * it through the crashdump code. | 153 | * it through the crashdump code. |
@@ -244,6 +247,9 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) | |||
244 | addr, regs->nip, regs->link, code); | 247 | addr, regs->nip, regs->link, code); |
245 | } | 248 | } |
246 | 249 | ||
250 | if (!arch_irq_disabled_regs(regs)) | ||
251 | local_irq_enable(); | ||
252 | |||
247 | memset(&info, 0, sizeof(info)); | 253 | memset(&info, 0, sizeof(info)); |
248 | info.si_signo = signr; | 254 | info.si_signo = signr; |
249 | info.si_code = code; | 255 | info.si_code = code; |
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 7d14bb697d40..d36ee1055f88 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c | |||
@@ -263,17 +263,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) | |||
263 | * the "data" page of the vDSO or you'll stop getting kernel updates | 263 | * the "data" page of the vDSO or you'll stop getting kernel updates |
264 | * and your nice userland gettimeofday will be totally dead. | 264 | * and your nice userland gettimeofday will be totally dead. |
265 | * It's fine to use that for setting breakpoints in the vDSO code | 265 | * It's fine to use that for setting breakpoints in the vDSO code |
266 | * pages though | 266 | * pages though. |
267 | * | ||
268 | * Make sure the vDSO gets into every core dump. | ||
269 | * Dumping its contents makes post-mortem fully interpretable later | ||
270 | * without matching up the same kernel and hardware config to see | ||
271 | * what PC values meant. | ||
272 | */ | 267 | */ |
273 | rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, | 268 | rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, |
274 | VM_READ|VM_EXEC| | 269 | VM_READ|VM_EXEC| |
275 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| | 270 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, |
276 | VM_ALWAYSDUMP, | ||
277 | vdso_pagelist); | 271 | vdso_pagelist); |
278 | if (rc) { | 272 | if (rc) { |
279 | current->mm->context.vdso_base = 0; | 273 | current->mm->context.vdso_base = 0; |
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 8b086299ba25..bca3fc427b45 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c | |||
@@ -34,11 +34,6 @@ | |||
34 | #include <asm/abs_addr.h> | 34 | #include <asm/abs_addr.h> |
35 | #include <asm/page.h> | 35 | #include <asm/page.h> |
36 | #include <asm/hvcall.h> | 36 | #include <asm/hvcall.h> |
37 | #include <asm/iseries/vio.h> | ||
38 | #include <asm/iseries/hv_types.h> | ||
39 | #include <asm/iseries/hv_lp_config.h> | ||
40 | #include <asm/iseries/hv_call_xm.h> | ||
41 | #include <asm/iseries/iommu.h> | ||
42 | 37 | ||
43 | static struct bus_type vio_bus_type; | 38 | static struct bus_type vio_bus_type; |
44 | 39 | ||
@@ -1042,7 +1037,6 @@ static void vio_cmo_sysfs_init(void) | |||
1042 | vio_bus_type.bus_attrs = vio_cmo_bus_attrs; | 1037 | vio_bus_type.bus_attrs = vio_cmo_bus_attrs; |
1043 | } | 1038 | } |
1044 | #else /* CONFIG_PPC_SMLPAR */ | 1039 | #else /* CONFIG_PPC_SMLPAR */ |
1045 | /* Dummy functions for iSeries platform */ | ||
1046 | int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; } | 1040 | int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; } |
1047 | void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {} | 1041 | void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {} |
1048 | static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; } | 1042 | static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; } |
@@ -1060,9 +1054,6 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) | |||
1060 | struct iommu_table *tbl; | 1054 | struct iommu_table *tbl; |
1061 | unsigned long offset, size; | 1055 | unsigned long offset, size; |
1062 | 1056 | ||
1063 | if (firmware_has_feature(FW_FEATURE_ISERIES)) | ||
1064 | return vio_build_iommu_table_iseries(dev); | ||
1065 | |||
1066 | dma_window = of_get_property(dev->dev.of_node, | 1057 | dma_window = of_get_property(dev->dev.of_node, |
1067 | "ibm,my-dma-window", NULL); | 1058 | "ibm,my-dma-window", NULL); |
1068 | if (!dma_window) | 1059 | if (!dma_window) |
@@ -1195,8 +1186,7 @@ static void __devinit vio_dev_release(struct device *dev) | |||
1195 | { | 1186 | { |
1196 | struct iommu_table *tbl = get_iommu_table_base(dev); | 1187 | struct iommu_table *tbl = get_iommu_table_base(dev); |
1197 | 1188 | ||
1198 | /* iSeries uses a common table for all vio devices */ | 1189 | if (tbl) |
1199 | if (!firmware_has_feature(FW_FEATURE_ISERIES) && tbl) | ||
1200 | iommu_free_table(tbl, dev->of_node ? | 1190 | iommu_free_table(tbl, dev->of_node ? |
1201 | dev->of_node->full_name : dev_name(dev)); | 1191 | dev->of_node->full_name : dev_name(dev)); |
1202 | of_node_put(dev->of_node); | 1192 | of_node_put(dev->of_node); |
@@ -1244,12 +1234,6 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) | |||
1244 | viodev->name = of_node->name; | 1234 | viodev->name = of_node->name; |
1245 | viodev->type = of_node->type; | 1235 | viodev->type = of_node->type; |
1246 | viodev->unit_address = *unit_address; | 1236 | viodev->unit_address = *unit_address; |
1247 | if (firmware_has_feature(FW_FEATURE_ISERIES)) { | ||
1248 | unit_address = of_get_property(of_node, | ||
1249 | "linux,unit_address", NULL); | ||
1250 | if (unit_address != NULL) | ||
1251 | viodev->unit_address = *unit_address; | ||
1252 | } | ||
1253 | viodev->dev.of_node = of_node_get(of_node); | 1237 | viodev->dev.of_node = of_node_get(of_node); |
1254 | 1238 | ||
1255 | if (firmware_has_feature(FW_FEATURE_CMO)) | 1239 | if (firmware_has_feature(FW_FEATURE_CMO)) |
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 710a54005dfb..65d1c08cf09e 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S | |||
@@ -109,11 +109,6 @@ SECTIONS | |||
109 | __ptov_table_begin = .; | 109 | __ptov_table_begin = .; |
110 | *(.ptov_fixup); | 110 | *(.ptov_fixup); |
111 | __ptov_table_end = .; | 111 | __ptov_table_end = .; |
112 | #ifdef CONFIG_PPC_ISERIES | ||
113 | __dt_strings_start = .; | ||
114 | *(.dt_strings); | ||
115 | __dt_strings_end = .; | ||
116 | #endif | ||
117 | } | 112 | } |
118 | 113 | ||
119 | .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { | 114 | .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { |