aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/oprofile
diff options
context:
space:
mode:
authorMaynard Johnson <maynardj@us.ibm.com>2006-11-20 12:45:16 -0500
committerPaul Mackerras <paulus@samba.org>2006-12-04 04:40:14 -0500
commit18f2190d796198fbb5d4bc4c87511acf3ced7d47 (patch)
tree621afac81fc83728a41fa5ff9ee3381a1b0f5921 /arch/powerpc/oprofile
parent0443bbd3d8496f9c2bc3e8c9d1833c6638722743 (diff)
[POWERPC] cell: Add oprofile support
Add PPU event-based and cycle-based profiling support to Oprofile for Cell. Oprofile is expected to collect data on all CPUs simultaneously. However, there is one set of performance counters per node. There are two hardware threads or virtual CPUs on each node. Hence, OProfile must multiplex in time the performance counter collection on the two virtual CPUs. The multiplexing of the performance counters is done by a virtual counter routine. Initially, the counters are configured to collect data on the even CPUs in the system, one CPU per node. In order to capture the PC for the virtual CPU when the performance counter interrupt occurs (the specified number of events between samples has occurred), the even processors are configured to handle the performance counter interrupts for their node. The virtual counter routine is called via a kernel timer after the virtual sample time. The routine stops the counters, saves the current counts, loads the last counts for the other virtual CPU on the node, sets interrupts to be handled by the other virtual CPU and restarts the counters, the virtual timer routine is scheduled to run again. The virtual sample time is kept relatively small to make sure sampling occurs on both CPUs on the node with a relatively small granularity. Whenever the counters overflow, the performance counter interrupt is called to collect the PC for the CPU where data is being collected. The oprofile driver relies on a firmware RTAS call to setup the debug bus to route the desired signals to the performance counter hardware to be counted. The RTAS call must set the routing registers appropriately in each of the islands to pass the signals down the debug bus as well as routing the signals from a particular island onto the bus. There is a second firmware RTAS call to reset the debug bus to the non pass thru state when the counters are not in use. Signed-off-by: Carl Love <carll@us.ibm.com> Signed-off-by: Maynard Johnson <mpjohn@us.ibm.com> Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com> Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch/powerpc/oprofile')
-rw-r--r--arch/powerpc/oprofile/Makefile1
-rw-r--r--arch/powerpc/oprofile/common.c15
-rw-r--r--arch/powerpc/oprofile/op_model_cell.c724
3 files changed, 738 insertions, 2 deletions
diff --git a/arch/powerpc/oprofile/Makefile b/arch/powerpc/oprofile/Makefile
index 0b5df9c96ae0..51c510fed7f7 100644
--- a/arch/powerpc/oprofile/Makefile
+++ b/arch/powerpc/oprofile/Makefile
@@ -11,6 +11,7 @@ DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \
11 timer_int.o ) 11 timer_int.o )
12 12
13oprofile-y := $(DRIVER_OBJS) common.o backtrace.o 13oprofile-y := $(DRIVER_OBJS) common.o backtrace.o
14oprofile-$(CONFIG_PPC_CELL) += op_model_cell.o
14oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o 15oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o
15oprofile-$(CONFIG_FSL_BOOKE) += op_model_fsl_booke.o 16oprofile-$(CONFIG_FSL_BOOKE) += op_model_fsl_booke.o
16oprofile-$(CONFIG_6xx) += op_model_7450.o 17oprofile-$(CONFIG_6xx) += op_model_7450.o
diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c
index 63bbef3b63f1..7a423437977c 100644
--- a/arch/powerpc/oprofile/common.c
+++ b/arch/powerpc/oprofile/common.c
@@ -69,7 +69,10 @@ static void op_powerpc_cpu_start(void *dummy)
69 69
70static int op_powerpc_start(void) 70static int op_powerpc_start(void)
71{ 71{
72 on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1); 72 if (model->global_start)
73 model->global_start(ctr);
74 if (model->start)
75 on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1);
73 return 0; 76 return 0;
74} 77}
75 78
@@ -80,7 +83,10 @@ static inline void op_powerpc_cpu_stop(void *dummy)
80 83
81static void op_powerpc_stop(void) 84static void op_powerpc_stop(void)
82{ 85{
83 on_each_cpu(op_powerpc_cpu_stop, NULL, 0, 1); 86 if (model->stop)
87 on_each_cpu(op_powerpc_cpu_stop, NULL, 0, 1);
88 if (model->global_stop)
89 model->global_stop();
84} 90}
85 91
86static int op_powerpc_create_files(struct super_block *sb, struct dentry *root) 92static int op_powerpc_create_files(struct super_block *sb, struct dentry *root)
@@ -141,6 +147,11 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
141 147
142 switch (cur_cpu_spec->oprofile_type) { 148 switch (cur_cpu_spec->oprofile_type) {
143#ifdef CONFIG_PPC64 149#ifdef CONFIG_PPC64
150#ifdef CONFIG_PPC_CELL
151 case PPC_OPROFILE_CELL:
152 model = &op_model_cell;
153 break;
154#endif
144 case PPC_OPROFILE_RS64: 155 case PPC_OPROFILE_RS64:
145 model = &op_model_rs64; 156 model = &op_model_rs64;
146 break; 157 break;
diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c
new file mode 100644
index 000000000000..2eb15f388103
--- /dev/null
+++ b/arch/powerpc/oprofile/op_model_cell.c
@@ -0,0 +1,724 @@
1/*
2 * Cell Broadband Engine OProfile Support
3 *
4 * (C) Copyright IBM Corporation 2006
5 *
6 * Author: David Erb (djerb@us.ibm.com)
7 * Modifications:
8 * Carl Love <carll@us.ibm.com>
9 * Maynard Johnson <maynardj@us.ibm.com>
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version
14 * 2 of the License, or (at your option) any later version.
15 */
16
17#include <linux/cpufreq.h>
18#include <linux/delay.h>
19#include <linux/init.h>
20#include <linux/jiffies.h>
21#include <linux/kthread.h>
22#include <linux/oprofile.h>
23#include <linux/percpu.h>
24#include <linux/smp.h>
25#include <linux/spinlock.h>
26#include <linux/timer.h>
27#include <asm/cell-pmu.h>
28#include <asm/cputable.h>
29#include <asm/firmware.h>
30#include <asm/io.h>
31#include <asm/oprofile_impl.h>
32#include <asm/processor.h>
33#include <asm/prom.h>
34#include <asm/ptrace.h>
35#include <asm/reg.h>
36#include <asm/rtas.h>
37#include <asm/system.h>
38
39#include "../platforms/cell/interrupt.h"
40
41#define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */
42#define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */
43
44#define NUM_THREADS 2
45#define VIRT_CNTR_SW_TIME_NS 100000000 // 0.5 seconds
46
47struct pmc_cntrl_data {
48 unsigned long vcntr;
49 unsigned long evnts;
50 unsigned long masks;
51 unsigned long enabled;
52};
53
54/*
55 * ibm,cbe-perftools rtas parameters
56 */
57
58struct pm_signal {
59 u16 cpu; /* Processor to modify */
60 u16 sub_unit; /* hw subunit this applies to (if applicable) */
61 u16 signal_group; /* Signal Group to Enable/Disable */
62 u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event
63 * Bus Word(s) (bitmask)
64 */
65 u8 bit; /* Trigger/Event bit (if applicable) */
66};
67
68/*
69 * rtas call arguments
70 */
71enum {
72 SUBFUNC_RESET = 1,
73 SUBFUNC_ACTIVATE = 2,
74 SUBFUNC_DEACTIVATE = 3,
75
76 PASSTHRU_IGNORE = 0,
77 PASSTHRU_ENABLE = 1,
78 PASSTHRU_DISABLE = 2,
79};
80
81struct pm_cntrl {
82 u16 enable;
83 u16 stop_at_max;
84 u16 trace_mode;
85 u16 freeze;
86 u16 count_mode;
87};
88
89static struct {
90 u32 group_control;
91 u32 debug_bus_control;
92 struct pm_cntrl pm_cntrl;
93 u32 pm07_cntrl[NR_PHYS_CTRS];
94} pm_regs;
95
96
97#define GET_SUB_UNIT(x) ((x & 0x0000f000) >> 12)
98#define GET_BUS_WORD(x) ((x & 0x000000f0) >> 4)
99#define GET_BUS_TYPE(x) ((x & 0x00000300) >> 8)
100#define GET_POLARITY(x) ((x & 0x00000002) >> 1)
101#define GET_COUNT_CYCLES(x) (x & 0x00000001)
102#define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2)
103
104
105static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values);
106
107static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS];
108
109/* Interpetation of hdw_thread:
110 * 0 - even virtual cpus 0, 2, 4,...
111 * 1 - odd virtual cpus 1, 3, 5, ...
112 */
113static u32 hdw_thread;
114
115static u32 virt_cntr_inter_mask;
116static struct timer_list timer_virt_cntr;
117
118/* pm_signal needs to be global since it is initialized in
119 * cell_reg_setup at the time when the necessary information
120 * is available.
121 */
122static struct pm_signal pm_signal[NR_PHYS_CTRS];
123static int pm_rtas_token;
124
125static u32 reset_value[NR_PHYS_CTRS];
126static int num_counters;
127static int oprofile_running;
128static spinlock_t virt_cntr_lock = SPIN_LOCK_UNLOCKED;
129
130static u32 ctr_enabled;
131
132static unsigned char trace_bus[4];
133static unsigned char input_bus[2];
134
135/*
136 * Firmware interface functions
137 */
138static int
139rtas_ibm_cbe_perftools(int subfunc, int passthru,
140 void *address, unsigned long length)
141{
142 u64 paddr = __pa(address);
143
144 return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, passthru,
145 paddr >> 32, paddr & 0xffffffff, length);
146}
147
148static void pm_rtas_reset_signals(u32 node)
149{
150 int ret;
151 struct pm_signal pm_signal_local;
152
153 /* The debug bus is being set to the passthru disable state.
154 * However, the FW still expects atleast one legal signal routing
155 * entry or it will return an error on the arguments. If we don't
156 * supply a valid entry, we must ignore all return values. Ignoring
157 * all return values means we might miss an error we should be
158 * concerned about.
159 */
160
161 /* fw expects physical cpu #. */
162 pm_signal_local.cpu = node;
163 pm_signal_local.signal_group = 21;
164 pm_signal_local.bus_word = 1;
165 pm_signal_local.sub_unit = 0;
166 pm_signal_local.bit = 0;
167
168 ret = rtas_ibm_cbe_perftools(SUBFUNC_RESET, PASSTHRU_DISABLE,
169 &pm_signal_local,
170 sizeof(struct pm_signal));
171
172 if (ret)
173 printk(KERN_WARNING "%s: rtas returned: %d\n",
174 __FUNCTION__, ret);
175}
176
177static void pm_rtas_activate_signals(u32 node, u32 count)
178{
179 int ret;
180 int j;
181 struct pm_signal pm_signal_local[NR_PHYS_CTRS];
182
183 for (j = 0; j < count; j++) {
184 /* fw expects physical cpu # */
185 pm_signal_local[j].cpu = node;
186 pm_signal_local[j].signal_group = pm_signal[j].signal_group;
187 pm_signal_local[j].bus_word = pm_signal[j].bus_word;
188 pm_signal_local[j].sub_unit = pm_signal[j].sub_unit;
189 pm_signal_local[j].bit = pm_signal[j].bit;
190 }
191
192 ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE, PASSTHRU_ENABLE,
193 pm_signal_local,
194 count * sizeof(struct pm_signal));
195
196 if (ret)
197 printk(KERN_WARNING "%s: rtas returned: %d\n",
198 __FUNCTION__, ret);
199}
200
201/*
202 * PM Signal functions
203 */
204static void set_pm_event(u32 ctr, int event, u32 unit_mask)
205{
206 struct pm_signal *p;
207 u32 signal_bit;
208 u32 bus_word, bus_type, count_cycles, polarity, input_control;
209 int j, i;
210
211 if (event == PPU_CYCLES_EVENT_NUM) {
212 /* Special Event: Count all cpu cycles */
213 pm_regs.pm07_cntrl[ctr] = CBE_COUNT_ALL_CYCLES;
214 p = &(pm_signal[ctr]);
215 p->signal_group = 21;
216 p->bus_word = 1;
217 p->sub_unit = 0;
218 p->bit = 0;
219 goto out;
220 } else {
221 pm_regs.pm07_cntrl[ctr] = 0;
222 }
223
224 bus_word = GET_BUS_WORD(unit_mask);
225 bus_type = GET_BUS_TYPE(unit_mask);
226 count_cycles = GET_COUNT_CYCLES(unit_mask);
227 polarity = GET_POLARITY(unit_mask);
228 input_control = GET_INPUT_CONTROL(unit_mask);
229 signal_bit = (event % 100);
230
231 p = &(pm_signal[ctr]);
232
233 p->signal_group = event / 100;
234 p->bus_word = bus_word;
235 p->sub_unit = unit_mask & 0x0000f000;
236
237 pm_regs.pm07_cntrl[ctr] = 0;
238 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_COUNT_CYCLES(count_cycles);
239 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity);
240 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control);
241
242 if (input_control == 0) {
243 if (signal_bit > 31) {
244 signal_bit -= 32;
245 if (bus_word == 0x3)
246 bus_word = 0x2;
247 else if (bus_word == 0xc)
248 bus_word = 0x8;
249 }
250
251 if ((bus_type == 0) && p->signal_group >= 60)
252 bus_type = 2;
253 if ((bus_type == 1) && p->signal_group >= 50)
254 bus_type = 0;
255
256 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_MUX(signal_bit);
257 } else {
258 pm_regs.pm07_cntrl[ctr] = 0;
259 p->bit = signal_bit;
260 }
261
262 for (i = 0; i < 4; i++) {
263 if (bus_word & (1 << i)) {
264 pm_regs.debug_bus_control |=
265 (bus_type << (31 - (2 * i) + 1));
266
267 for (j = 0; j < 2; j++) {
268 if (input_bus[j] == 0xff) {
269 input_bus[j] = i;
270 pm_regs.group_control |=
271 (i << (31 - i));
272 break;
273 }
274 }
275 }
276 }
277out:
278 ;
279}
280
281static void write_pm_cntrl(int cpu, struct pm_cntrl *pm_cntrl)
282{
283 /* Oprofile will use 32 bit counters, set bits 7:10 to 0 */
284 u32 val = 0;
285 if (pm_cntrl->enable == 1)
286 val |= CBE_PM_ENABLE_PERF_MON;
287
288 if (pm_cntrl->stop_at_max == 1)
289 val |= CBE_PM_STOP_AT_MAX;
290
291 if (pm_cntrl->trace_mode == 1)
292 val |= CBE_PM_TRACE_MODE_SET(pm_cntrl->trace_mode);
293
294 if (pm_cntrl->freeze == 1)
295 val |= CBE_PM_FREEZE_ALL_CTRS;
296
297 /* Routine set_count_mode must be called previously to set
298 * the count mode based on the user selection of user and kernel.
299 */
300 val |= CBE_PM_COUNT_MODE_SET(pm_cntrl->count_mode);
301 cbe_write_pm(cpu, pm_control, val);
302}
303
304static inline void
305set_count_mode(u32 kernel, u32 user, struct pm_cntrl *pm_cntrl)
306{
307 /* The user must specify user and kernel if they want them. If
308 * neither is specified, OProfile will count in hypervisor mode
309 */
310 if (kernel) {
311 if (user)
312 pm_cntrl->count_mode = CBE_COUNT_ALL_MODES;
313 else
314 pm_cntrl->count_mode = CBE_COUNT_SUPERVISOR_MODE;
315 } else {
316 if (user)
317 pm_cntrl->count_mode = CBE_COUNT_PROBLEM_MODE;
318 else
319 pm_cntrl->count_mode = CBE_COUNT_HYPERVISOR_MODE;
320 }
321}
322
323static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl)
324{
325
326 pm07_cntrl[ctr] |= PM07_CTR_ENABLE(1);
327 cbe_write_pm07_control(cpu, ctr, pm07_cntrl[ctr]);
328}
329
330/*
331 * Oprofile is expected to collect data on all CPUs simultaneously.
332 * However, there is one set of performance counters per node. There are
333 * two hardware threads or virtual CPUs on each node. Hence, OProfile must
334 * multiplex in time the performance counter collection on the two virtual
335 * CPUs. The multiplexing of the performance counters is done by this
336 * virtual counter routine.
337 *
338 * The pmc_values used below is defined as 'per-cpu' but its use is
339 * more akin to 'per-node'. We need to store two sets of counter
340 * values per node -- one for the previous run and one for the next.
341 * The per-cpu[NR_PHYS_CTRS] gives us the storage we need. Each odd/even
342 * pair of per-cpu arrays is used for storing the previous and next
343 * pmc values for a given node.
344 * NOTE: We use the per-cpu variable to improve cache performance.
345 */
346static void cell_virtual_cntr(unsigned long data)
347{
348 /* This routine will alternate loading the virtual counters for
349 * virtual CPUs
350 */
351 int i, prev_hdw_thread, next_hdw_thread;
352 u32 cpu;
353 unsigned long flags;
354
355 /* Make sure that the interrupt_hander and
356 * the virt counter are not both playing with
357 * the counters on the same node.
358 */
359
360 spin_lock_irqsave(&virt_cntr_lock, flags);
361
362 prev_hdw_thread = hdw_thread;
363
364 /* switch the cpu handling the interrupts */
365 hdw_thread = 1 ^ hdw_thread;
366 next_hdw_thread = hdw_thread;
367
368 /* The following is done only once per each node, but
369 * we need cpu #, not node #, to pass to the cbe_xxx functions.
370 */
371 for_each_online_cpu(cpu) {
372 if (cbe_get_hw_thread_id(cpu))
373 continue;
374
375 /* stop counters, save counter values, restore counts
376 * for previous thread
377 */
378 cbe_disable_pm(cpu);
379 cbe_disable_pm_interrupts(cpu);
380 for (i = 0; i < num_counters; i++) {
381 per_cpu(pmc_values, cpu + prev_hdw_thread)[i]
382 = cbe_read_ctr(cpu, i);
383
384 if (per_cpu(pmc_values, cpu + next_hdw_thread)[i]
385 == 0xFFFFFFFF)
386 /* If the cntr value is 0xffffffff, we must
387 * reset that to 0xfffffff0 when the current
388 * thread is restarted. This will generate a new
389 * interrupt and make sure that we never restore
390 * the counters to the max value. If the counters
391 * were restored to the max value, they do not
392 * increment and no interrupts are generated. Hence
393 * no more samples will be collected on that cpu.
394 */
395 cbe_write_ctr(cpu, i, 0xFFFFFFF0);
396 else
397 cbe_write_ctr(cpu, i,
398 per_cpu(pmc_values,
399 cpu +
400 next_hdw_thread)[i]);
401 }
402
403 /* Switch to the other thread. Change the interrupt
404 * and control regs to be scheduled on the CPU
405 * corresponding to the thread to execute.
406 */
407 for (i = 0; i < num_counters; i++) {
408 if (pmc_cntrl[next_hdw_thread][i].enabled) {
409 /* There are some per thread events.
410 * Must do the set event, enable_cntr
411 * for each cpu.
412 */
413 set_pm_event(i,
414 pmc_cntrl[next_hdw_thread][i].evnts,
415 pmc_cntrl[next_hdw_thread][i].masks);
416 enable_ctr(cpu, i,
417 pm_regs.pm07_cntrl);
418 } else {
419 cbe_write_pm07_control(cpu, i, 0);
420 }
421 }
422
423 /* Enable interrupts on the CPU thread that is starting */
424 cbe_enable_pm_interrupts(cpu, next_hdw_thread,
425 virt_cntr_inter_mask);
426 cbe_enable_pm(cpu);
427 }
428
429 spin_unlock_irqrestore(&virt_cntr_lock, flags);
430
431 mod_timer(&timer_virt_cntr, jiffies + HZ / 10);
432}
433
434static void start_virt_cntrs(void)
435{
436 init_timer(&timer_virt_cntr);
437 timer_virt_cntr.function = cell_virtual_cntr;
438 timer_virt_cntr.data = 0UL;
439 timer_virt_cntr.expires = jiffies + HZ / 10;
440 add_timer(&timer_virt_cntr);
441}
442
443/* This function is called once for all cpus combined */
444static void
445cell_reg_setup(struct op_counter_config *ctr,
446 struct op_system_config *sys, int num_ctrs)
447{
448 int i, j, cpu;
449
450 pm_rtas_token = rtas_token("ibm,cbe-perftools");
451 if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) {
452 printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n",
453 __FUNCTION__);
454 goto out;
455 }
456
457 num_counters = num_ctrs;
458
459 pm_regs.group_control = 0;
460 pm_regs.debug_bus_control = 0;
461
462 /* setup the pm_control register */
463 memset(&pm_regs.pm_cntrl, 0, sizeof(struct pm_cntrl));
464 pm_regs.pm_cntrl.stop_at_max = 1;
465 pm_regs.pm_cntrl.trace_mode = 0;
466 pm_regs.pm_cntrl.freeze = 1;
467
468 set_count_mode(sys->enable_kernel, sys->enable_user,
469 &pm_regs.pm_cntrl);
470
471 /* Setup the thread 0 events */
472 for (i = 0; i < num_ctrs; ++i) {
473
474 pmc_cntrl[0][i].evnts = ctr[i].event;
475 pmc_cntrl[0][i].masks = ctr[i].unit_mask;
476 pmc_cntrl[0][i].enabled = ctr[i].enabled;
477 pmc_cntrl[0][i].vcntr = i;
478
479 for_each_possible_cpu(j)
480 per_cpu(pmc_values, j)[i] = 0;
481 }
482
483 /* Setup the thread 1 events, map the thread 0 event to the
484 * equivalent thread 1 event.
485 */
486 for (i = 0; i < num_ctrs; ++i) {
487 if ((ctr[i].event >= 2100) && (ctr[i].event <= 2111))
488 pmc_cntrl[1][i].evnts = ctr[i].event + 19;
489 else if (ctr[i].event == 2203)
490 pmc_cntrl[1][i].evnts = ctr[i].event;
491 else if ((ctr[i].event >= 2200) && (ctr[i].event <= 2215))
492 pmc_cntrl[1][i].evnts = ctr[i].event + 16;
493 else
494 pmc_cntrl[1][i].evnts = ctr[i].event;
495
496 pmc_cntrl[1][i].masks = ctr[i].unit_mask;
497 pmc_cntrl[1][i].enabled = ctr[i].enabled;
498 pmc_cntrl[1][i].vcntr = i;
499 }
500
501 for (i = 0; i < 4; i++)
502 trace_bus[i] = 0xff;
503
504 for (i = 0; i < 2; i++)
505 input_bus[i] = 0xff;
506
507 /* Our counters count up, and "count" refers to
508 * how much before the next interrupt, and we interrupt
509 * on overflow. So we calculate the starting value
510 * which will give us "count" until overflow.
511 * Then we set the events on the enabled counters.
512 */
513 for (i = 0; i < num_counters; ++i) {
514 /* start with virtual counter set 0 */
515 if (pmc_cntrl[0][i].enabled) {
516 /* Using 32bit counters, reset max - count */
517 reset_value[i] = 0xFFFFFFFF - ctr[i].count;
518 set_pm_event(i,
519 pmc_cntrl[0][i].evnts,
520 pmc_cntrl[0][i].masks);
521
522 /* global, used by cell_cpu_setup */
523 ctr_enabled |= (1 << i);
524 }
525 }
526
527 /* initialize the previous counts for the virtual cntrs */
528 for_each_online_cpu(cpu)
529 for (i = 0; i < num_counters; ++i) {
530 per_cpu(pmc_values, cpu)[i] = reset_value[i];
531 }
532out:
533 ;
534}
535
536/* This function is called once for each cpu */
537static void cell_cpu_setup(struct op_counter_config *cntr)
538{
539 u32 cpu = smp_processor_id();
540 u32 num_enabled = 0;
541 int i;
542
543 /* There is one performance monitor per processor chip (i.e. node),
544 * so we only need to perform this function once per node.
545 */
546 if (cbe_get_hw_thread_id(cpu))
547 goto out;
548
549 if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) {
550 printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n",
551 __FUNCTION__);
552 goto out;
553 }
554
555 /* Stop all counters */
556 cbe_disable_pm(cpu);
557 cbe_disable_pm_interrupts(cpu);
558
559 cbe_write_pm(cpu, pm_interval, 0);
560 cbe_write_pm(cpu, pm_start_stop, 0);
561 cbe_write_pm(cpu, group_control, pm_regs.group_control);
562 cbe_write_pm(cpu, debug_bus_control, pm_regs.debug_bus_control);
563 write_pm_cntrl(cpu, &pm_regs.pm_cntrl);
564
565 for (i = 0; i < num_counters; ++i) {
566 if (ctr_enabled & (1 << i)) {
567 pm_signal[num_enabled].cpu = cbe_cpu_to_node(cpu);
568 num_enabled++;
569 }
570 }
571
572 pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled);
573out:
574 ;
575}
576
577static void cell_global_start(struct op_counter_config *ctr)
578{
579 u32 cpu;
580 u32 interrupt_mask = 0;
581 u32 i;
582
583 /* This routine gets called once for the system.
584 * There is one performance monitor per node, so we
585 * only need to perform this function once per node.
586 */
587 for_each_online_cpu(cpu) {
588 if (cbe_get_hw_thread_id(cpu))
589 continue;
590
591 interrupt_mask = 0;
592
593 for (i = 0; i < num_counters; ++i) {
594 if (ctr_enabled & (1 << i)) {
595 cbe_write_ctr(cpu, i, reset_value[i]);
596 enable_ctr(cpu, i, pm_regs.pm07_cntrl);
597 interrupt_mask |=
598 CBE_PM_CTR_OVERFLOW_INTR(i);
599 } else {
600 /* Disable counter */
601 cbe_write_pm07_control(cpu, i, 0);
602 }
603 }
604
605 cbe_clear_pm_interrupts(cpu);
606 cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask);
607 cbe_enable_pm(cpu);
608 }
609
610 virt_cntr_inter_mask = interrupt_mask;
611 oprofile_running = 1;
612 smp_wmb();
613
614 /* NOTE: start_virt_cntrs will result in cell_virtual_cntr() being
615 * executed which manipulates the PMU. We start the "virtual counter"
616 * here so that we do not need to synchronize access to the PMU in
617 * the above for-loop.
618 */
619 start_virt_cntrs();
620}
621
622static void cell_global_stop(void)
623{
624 int cpu;
625
626 /* This routine will be called once for the system.
627 * There is one performance monitor per node, so we
628 * only need to perform this function once per node.
629 */
630 del_timer_sync(&timer_virt_cntr);
631 oprofile_running = 0;
632 smp_wmb();
633
634 for_each_online_cpu(cpu) {
635 if (cbe_get_hw_thread_id(cpu))
636 continue;
637
638 cbe_sync_irq(cbe_cpu_to_node(cpu));
639 /* Stop the counters */
640 cbe_disable_pm(cpu);
641
642 /* Deactivate the signals */
643 pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
644
645 /* Deactivate interrupts */
646 cbe_disable_pm_interrupts(cpu);
647 }
648}
649
650static void
651cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
652{
653 u32 cpu;
654 u64 pc;
655 int is_kernel;
656 unsigned long flags = 0;
657 u32 interrupt_mask;
658 int i;
659
660 cpu = smp_processor_id();
661
662 /* Need to make sure the interrupt handler and the virt counter
663 * routine are not running at the same time. See the
664 * cell_virtual_cntr() routine for additional comments.
665 */
666 spin_lock_irqsave(&virt_cntr_lock, flags);
667
668 /* Need to disable and reenable the performance counters
669 * to get the desired behavior from the hardware. This
670 * is hardware specific.
671 */
672
673 cbe_disable_pm(cpu);
674
675 interrupt_mask = cbe_clear_pm_interrupts(cpu);
676
677 /* If the interrupt mask has been cleared, then the virt cntr
678 * has cleared the interrupt. When the thread that generated
679 * the interrupt is restored, the data count will be restored to
680 * 0xffffff0 to cause the interrupt to be regenerated.
681 */
682
683 if ((oprofile_running == 1) && (interrupt_mask != 0)) {
684 pc = regs->nip;
685 is_kernel = is_kernel_addr(pc);
686
687 for (i = 0; i < num_counters; ++i) {
688 if ((interrupt_mask & CBE_PM_CTR_OVERFLOW_INTR(i))
689 && ctr[i].enabled) {
690 oprofile_add_pc(pc, is_kernel, i);
691 cbe_write_ctr(cpu, i, reset_value[i]);
692 }
693 }
694
695 /* The counters were frozen by the interrupt.
696 * Reenable the interrupt and restart the counters.
697 * If there was a race between the interrupt handler and
698 * the virtual counter routine. The virutal counter
699 * routine may have cleared the interrupts. Hence must
700 * use the virt_cntr_inter_mask to re-enable the interrupts.
701 */
702 cbe_enable_pm_interrupts(cpu, hdw_thread,
703 virt_cntr_inter_mask);
704
705 /* The writes to the various performance counters only writes
706 * to a latch. The new values (interrupt setting bits, reset
707 * counter value etc.) are not copied to the actual registers
708 * until the performance monitor is enabled. In order to get
709 * this to work as desired, the permormance monitor needs to
710 * be disabled while writting to the latches. This is a
711 * HW design issue.
712 */
713 cbe_enable_pm(cpu);
714 }
715 spin_unlock_irqrestore(&virt_cntr_lock, flags);
716}
717
718struct op_powerpc_model op_model_cell = {
719 .reg_setup = cell_reg_setup,
720 .cpu_setup = cell_cpu_setup,
721 .global_start = cell_global_start,
722 .global_stop = cell_global_stop,
723 .handle_interrupt = cell_handle_interrupt,
724};