aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/oprofile/op_model_cell.c
diff options
context:
space:
mode:
authorBob Nelson <rrnelson@linux.vnet.ibm.com>2007-07-20 15:39:53 -0400
committerArnd Bergmann <arnd@klappe.arndb.de>2007-07-20 15:42:24 -0400
commit1474855d0878cced6f39f51f3c2bd7428b44cb1e (patch)
treecbad42404bfc0f7222d0a88e4ed9b0e9e0d0cb50 /arch/powerpc/oprofile/op_model_cell.c
parent36aaccc1e96481e8310b1d13600096da0f24ff43 (diff)
[CELL] oprofile: add support to OProfile for profiling CELL BE SPUs
From: Maynard Johnson <mpjohn@us.ibm.com> This patch updates the existing arch/powerpc/oprofile/op_model_cell.c to add in the SPU profiling capabilities. In addition, a 'cell' subdirectory was added to arch/powerpc/oprofile to hold Cell-specific SPU profiling code. Exports spu_set_profile_private_kref and spu_get_profile_private_kref which are used by OProfile to store private profile information in spufs data structures. Also incorporated several fixes from other patches (rrn). Check pointer returned from kzalloc. Eliminated unnecessary cast. Better error handling and cleanup in the related area. 64-bit unsigned long parameter was being demoted to 32-bit unsigned int and eventually promoted back to unsigned long. Signed-off-by: Carl Love <carll@us.ibm.com> Signed-off-by: Maynard Johnson <mpjohn@us.ibm.com> Signed-off-by: Bob Nelson <rrnelson@us.ibm.com> Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com> Acked-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch/powerpc/oprofile/op_model_cell.c')
-rw-r--r--arch/powerpc/oprofile/op_model_cell.c607
1 files changed, 527 insertions, 80 deletions
diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c
index c29293befba9..d928b54f3a0f 100644
--- a/arch/powerpc/oprofile/op_model_cell.c
+++ b/arch/powerpc/oprofile/op_model_cell.c
@@ -5,8 +5,8 @@
5 * 5 *
6 * Author: David Erb (djerb@us.ibm.com) 6 * Author: David Erb (djerb@us.ibm.com)
7 * Modifications: 7 * Modifications:
8 * Carl Love <carll@us.ibm.com> 8 * Carl Love <carll@us.ibm.com>
9 * Maynard Johnson <maynardj@us.ibm.com> 9 * Maynard Johnson <maynardj@us.ibm.com>
10 * 10 *
11 * This program is free software; you can redistribute it and/or 11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License 12 * modify it under the terms of the GNU General Public License
@@ -38,12 +38,25 @@
38 38
39#include "../platforms/cell/interrupt.h" 39#include "../platforms/cell/interrupt.h"
40#include "../platforms/cell/cbe_regs.h" 40#include "../platforms/cell/cbe_regs.h"
41#include "cell/pr_util.h"
42
43static void cell_global_stop_spu(void);
44
45/*
46 * spu_cycle_reset is the number of cycles between samples.
47 * This variable is used for SPU profiling and should ONLY be set
48 * at the beginning of cell_reg_setup; otherwise, it's read-only.
49 */
50static unsigned int spu_cycle_reset;
51
52#define NUM_SPUS_PER_NODE 8
53#define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */
41 54
42#define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */ 55#define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */
43#define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying 56#define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying
44 * PPU_CYCLES event 57 * PPU_CYCLES event
45 */ 58 */
46#define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */ 59#define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */
47 60
48#define NUM_THREADS 2 /* number of physical threads in 61#define NUM_THREADS 2 /* number of physical threads in
49 * physical processor 62 * physical processor
@@ -51,6 +64,7 @@
51#define NUM_TRACE_BUS_WORDS 4 64#define NUM_TRACE_BUS_WORDS 4
52#define NUM_INPUT_BUS_WORDS 2 65#define NUM_INPUT_BUS_WORDS 2
53 66
67#define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */
54 68
55struct pmc_cntrl_data { 69struct pmc_cntrl_data {
56 unsigned long vcntr; 70 unsigned long vcntr;
@@ -62,11 +76,10 @@ struct pmc_cntrl_data {
62/* 76/*
63 * ibm,cbe-perftools rtas parameters 77 * ibm,cbe-perftools rtas parameters
64 */ 78 */
65
66struct pm_signal { 79struct pm_signal {
67 u16 cpu; /* Processor to modify */ 80 u16 cpu; /* Processor to modify */
68 u16 sub_unit; /* hw subunit this applies to (if applicable) */ 81 u16 sub_unit; /* hw subunit this applies to (if applicable)*/
69 short int signal_group; /* Signal Group to Enable/Disable */ 82 short int signal_group; /* Signal Group to Enable/Disable */
70 u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event 83 u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event
71 * Bus Word(s) (bitmask) 84 * Bus Word(s) (bitmask)
72 */ 85 */
@@ -112,21 +125,42 @@ static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values);
112 125
113static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; 126static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS];
114 127
115/* Interpetation of hdw_thread: 128/*
129 * The CELL profiling code makes rtas calls to setup the debug bus to
130 * route the performance signals. Additionally, SPU profiling requires
131 * a second rtas call to setup the hardware to capture the SPU PCs.
132 * The EIO error value is returned if the token lookups or the rtas
133 * call fail. The EIO error number is the best choice of the existing
134 * error numbers. The probability of rtas related error is very low. But
135 * by returning EIO and printing additional information to dmsg the user
136 * will know that OProfile did not start and dmesg will tell them why.
137 * OProfile does not support returning errors on Stop. Not a huge issue
138 * since failure to reset the debug bus or stop the SPU PC collection is
139 * not a fatel issue. Chances are if the Stop failed, Start doesn't work
140 * either.
141 */
142
143/*
144 * Interpetation of hdw_thread:
116 * 0 - even virtual cpus 0, 2, 4,... 145 * 0 - even virtual cpus 0, 2, 4,...
117 * 1 - odd virtual cpus 1, 3, 5, ... 146 * 1 - odd virtual cpus 1, 3, 5, ...
147 *
148 * FIXME: this is strictly wrong, we need to clean this up in a number
149 * of places. It works for now. -arnd
118 */ 150 */
119static u32 hdw_thread; 151static u32 hdw_thread;
120 152
121static u32 virt_cntr_inter_mask; 153static u32 virt_cntr_inter_mask;
122static struct timer_list timer_virt_cntr; 154static struct timer_list timer_virt_cntr;
123 155
124/* pm_signal needs to be global since it is initialized in 156/*
157 * pm_signal needs to be global since it is initialized in
125 * cell_reg_setup at the time when the necessary information 158 * cell_reg_setup at the time when the necessary information
126 * is available. 159 * is available.
127 */ 160 */
128static struct pm_signal pm_signal[NR_PHYS_CTRS]; 161static struct pm_signal pm_signal[NR_PHYS_CTRS];
129static int pm_rtas_token; 162static int pm_rtas_token; /* token for debug bus setup call */
163static int spu_rtas_token; /* token for SPU cycle profiling */
130 164
131static u32 reset_value[NR_PHYS_CTRS]; 165static u32 reset_value[NR_PHYS_CTRS];
132static int num_counters; 166static int num_counters;
@@ -147,8 +181,8 @@ rtas_ibm_cbe_perftools(int subfunc, int passthru,
147{ 181{
148 u64 paddr = __pa(address); 182 u64 paddr = __pa(address);
149 183
150 return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, passthru, 184 return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc,
151 paddr >> 32, paddr & 0xffffffff, length); 185 passthru, paddr >> 32, paddr & 0xffffffff, length);
152} 186}
153 187
154static void pm_rtas_reset_signals(u32 node) 188static void pm_rtas_reset_signals(u32 node)
@@ -156,12 +190,13 @@ static void pm_rtas_reset_signals(u32 node)
156 int ret; 190 int ret;
157 struct pm_signal pm_signal_local; 191 struct pm_signal pm_signal_local;
158 192
159 /* The debug bus is being set to the passthru disable state. 193 /*
160 * However, the FW still expects atleast one legal signal routing 194 * The debug bus is being set to the passthru disable state.
161 * entry or it will return an error on the arguments. If we don't 195 * However, the FW still expects atleast one legal signal routing
162 * supply a valid entry, we must ignore all return values. Ignoring 196 * entry or it will return an error on the arguments. If we don't
163 * all return values means we might miss an error we should be 197 * supply a valid entry, we must ignore all return values. Ignoring
164 * concerned about. 198 * all return values means we might miss an error we should be
199 * concerned about.
165 */ 200 */
166 201
167 /* fw expects physical cpu #. */ 202 /* fw expects physical cpu #. */
@@ -175,18 +210,24 @@ static void pm_rtas_reset_signals(u32 node)
175 &pm_signal_local, 210 &pm_signal_local,
176 sizeof(struct pm_signal)); 211 sizeof(struct pm_signal));
177 212
178 if (ret) 213 if (unlikely(ret))
214 /*
215 * Not a fatal error. For Oprofile stop, the oprofile
216 * functions do not support returning an error for
217 * failure to stop OProfile.
218 */
179 printk(KERN_WARNING "%s: rtas returned: %d\n", 219 printk(KERN_WARNING "%s: rtas returned: %d\n",
180 __FUNCTION__, ret); 220 __FUNCTION__, ret);
181} 221}
182 222
183static void pm_rtas_activate_signals(u32 node, u32 count) 223static int pm_rtas_activate_signals(u32 node, u32 count)
184{ 224{
185 int ret; 225 int ret;
186 int i, j; 226 int i, j;
187 struct pm_signal pm_signal_local[NR_PHYS_CTRS]; 227 struct pm_signal pm_signal_local[NR_PHYS_CTRS];
188 228
189 /* There is no debug setup required for the cycles event. 229 /*
230 * There is no debug setup required for the cycles event.
190 * Note that only events in the same group can be used. 231 * Note that only events in the same group can be used.
191 * Otherwise, there will be conflicts in correctly routing 232 * Otherwise, there will be conflicts in correctly routing
192 * the signals on the debug bus. It is the responsiblity 233 * the signals on the debug bus. It is the responsiblity
@@ -213,10 +254,14 @@ static void pm_rtas_activate_signals(u32 node, u32 count)
213 pm_signal_local, 254 pm_signal_local,
214 i * sizeof(struct pm_signal)); 255 i * sizeof(struct pm_signal));
215 256
216 if (ret) 257 if (unlikely(ret)) {
217 printk(KERN_WARNING "%s: rtas returned: %d\n", 258 printk(KERN_WARNING "%s: rtas returned: %d\n",
218 __FUNCTION__, ret); 259 __FUNCTION__, ret);
260 return -EIO;
261 }
219 } 262 }
263
264 return 0;
220} 265}
221 266
222/* 267/*
@@ -260,11 +305,12 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask)
260 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity); 305 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity);
261 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control); 306 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control);
262 307
263 /* Some of the islands signal selection is based on 64 bit words. 308 /*
309 * Some of the islands signal selection is based on 64 bit words.
264 * The debug bus words are 32 bits, the input words to the performance 310 * The debug bus words are 32 bits, the input words to the performance
265 * counters are defined as 32 bits. Need to convert the 64 bit island 311 * counters are defined as 32 bits. Need to convert the 64 bit island
266 * specification to the appropriate 32 input bit and bus word for the 312 * specification to the appropriate 32 input bit and bus word for the
267 * performance counter event selection. See the CELL Performance 313 * performance counter event selection. See the CELL Performance
268 * monitoring signals manual and the Perf cntr hardware descriptions 314 * monitoring signals manual and the Perf cntr hardware descriptions
269 * for the details. 315 * for the details.
270 */ 316 */
@@ -298,6 +344,7 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask)
298 input_bus[j] = i; 344 input_bus[j] = i;
299 pm_regs.group_control |= 345 pm_regs.group_control |=
300 (i << (31 - i)); 346 (i << (31 - i));
347
301 break; 348 break;
302 } 349 }
303 } 350 }
@@ -309,7 +356,8 @@ out:
309 356
310static void write_pm_cntrl(int cpu) 357static void write_pm_cntrl(int cpu)
311{ 358{
312 /* Oprofile will use 32 bit counters, set bits 7:10 to 0 359 /*
360 * Oprofile will use 32 bit counters, set bits 7:10 to 0
313 * pmregs.pm_cntrl is a global 361 * pmregs.pm_cntrl is a global
314 */ 362 */
315 363
@@ -326,7 +374,8 @@ static void write_pm_cntrl(int cpu)
326 if (pm_regs.pm_cntrl.freeze == 1) 374 if (pm_regs.pm_cntrl.freeze == 1)
327 val |= CBE_PM_FREEZE_ALL_CTRS; 375 val |= CBE_PM_FREEZE_ALL_CTRS;
328 376
329 /* Routine set_count_mode must be called previously to set 377 /*
378 * Routine set_count_mode must be called previously to set
330 * the count mode based on the user selection of user and kernel. 379 * the count mode based on the user selection of user and kernel.
331 */ 380 */
332 val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode); 381 val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode);
@@ -336,7 +385,8 @@ static void write_pm_cntrl(int cpu)
336static inline void 385static inline void
337set_count_mode(u32 kernel, u32 user) 386set_count_mode(u32 kernel, u32 user)
338{ 387{
339 /* The user must specify user and kernel if they want them. If 388 /*
389 * The user must specify user and kernel if they want them. If
340 * neither is specified, OProfile will count in hypervisor mode. 390 * neither is specified, OProfile will count in hypervisor mode.
341 * pm_regs.pm_cntrl is a global 391 * pm_regs.pm_cntrl is a global
342 */ 392 */
@@ -364,7 +414,7 @@ static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl)
364 414
365/* 415/*
366 * Oprofile is expected to collect data on all CPUs simultaneously. 416 * Oprofile is expected to collect data on all CPUs simultaneously.
367 * However, there is one set of performance counters per node. There are 417 * However, there is one set of performance counters per node. There are
368 * two hardware threads or virtual CPUs on each node. Hence, OProfile must 418 * two hardware threads or virtual CPUs on each node. Hence, OProfile must
369 * multiplex in time the performance counter collection on the two virtual 419 * multiplex in time the performance counter collection on the two virtual
370 * CPUs. The multiplexing of the performance counters is done by this 420 * CPUs. The multiplexing of the performance counters is done by this
@@ -377,19 +427,19 @@ static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl)
377 * pair of per-cpu arrays is used for storing the previous and next 427 * pair of per-cpu arrays is used for storing the previous and next
378 * pmc values for a given node. 428 * pmc values for a given node.
379 * NOTE: We use the per-cpu variable to improve cache performance. 429 * NOTE: We use the per-cpu variable to improve cache performance.
430 *
431 * This routine will alternate loading the virtual counters for
432 * virtual CPUs
380 */ 433 */
381static void cell_virtual_cntr(unsigned long data) 434static void cell_virtual_cntr(unsigned long data)
382{ 435{
383 /* This routine will alternate loading the virtual counters for
384 * virtual CPUs
385 */
386 int i, prev_hdw_thread, next_hdw_thread; 436 int i, prev_hdw_thread, next_hdw_thread;
387 u32 cpu; 437 u32 cpu;
388 unsigned long flags; 438 unsigned long flags;
389 439
390 /* Make sure that the interrupt_hander and 440 /*
391 * the virt counter are not both playing with 441 * Make sure that the interrupt_hander and the virt counter are
392 * the counters on the same node. 442 * not both playing with the counters on the same node.
393 */ 443 */
394 444
395 spin_lock_irqsave(&virt_cntr_lock, flags); 445 spin_lock_irqsave(&virt_cntr_lock, flags);
@@ -400,22 +450,25 @@ static void cell_virtual_cntr(unsigned long data)
400 hdw_thread = 1 ^ hdw_thread; 450 hdw_thread = 1 ^ hdw_thread;
401 next_hdw_thread = hdw_thread; 451 next_hdw_thread = hdw_thread;
402 452
403 for (i = 0; i < num_counters; i++) 453 /*
404 /* There are some per thread events. Must do the 454 * There are some per thread events. Must do the
405 * set event, for the thread that is being started 455 * set event, for the thread that is being started
406 */ 456 */
457 for (i = 0; i < num_counters; i++)
407 set_pm_event(i, 458 set_pm_event(i,
408 pmc_cntrl[next_hdw_thread][i].evnts, 459 pmc_cntrl[next_hdw_thread][i].evnts,
409 pmc_cntrl[next_hdw_thread][i].masks); 460 pmc_cntrl[next_hdw_thread][i].masks);
410 461
411 /* The following is done only once per each node, but 462 /*
463 * The following is done only once per each node, but
412 * we need cpu #, not node #, to pass to the cbe_xxx functions. 464 * we need cpu #, not node #, to pass to the cbe_xxx functions.
413 */ 465 */
414 for_each_online_cpu(cpu) { 466 for_each_online_cpu(cpu) {
415 if (cbe_get_hw_thread_id(cpu)) 467 if (cbe_get_hw_thread_id(cpu))
416 continue; 468 continue;
417 469
418 /* stop counters, save counter values, restore counts 470 /*
471 * stop counters, save counter values, restore counts
419 * for previous thread 472 * for previous thread
420 */ 473 */
421 cbe_disable_pm(cpu); 474 cbe_disable_pm(cpu);
@@ -428,7 +481,7 @@ static void cell_virtual_cntr(unsigned long data)
428 == 0xFFFFFFFF) 481 == 0xFFFFFFFF)
429 /* If the cntr value is 0xffffffff, we must 482 /* If the cntr value is 0xffffffff, we must
430 * reset that to 0xfffffff0 when the current 483 * reset that to 0xfffffff0 when the current
431 * thread is restarted. This will generate a 484 * thread is restarted. This will generate a
432 * new interrupt and make sure that we never 485 * new interrupt and make sure that we never
433 * restore the counters to the max value. If 486 * restore the counters to the max value. If
434 * the counters were restored to the max value, 487 * the counters were restored to the max value,
@@ -444,13 +497,15 @@ static void cell_virtual_cntr(unsigned long data)
444 next_hdw_thread)[i]); 497 next_hdw_thread)[i]);
445 } 498 }
446 499
447 /* Switch to the other thread. Change the interrupt 500 /*
501 * Switch to the other thread. Change the interrupt
448 * and control regs to be scheduled on the CPU 502 * and control regs to be scheduled on the CPU
449 * corresponding to the thread to execute. 503 * corresponding to the thread to execute.
450 */ 504 */
451 for (i = 0; i < num_counters; i++) { 505 for (i = 0; i < num_counters; i++) {
452 if (pmc_cntrl[next_hdw_thread][i].enabled) { 506 if (pmc_cntrl[next_hdw_thread][i].enabled) {
453 /* There are some per thread events. 507 /*
508 * There are some per thread events.
454 * Must do the set event, enable_cntr 509 * Must do the set event, enable_cntr
455 * for each cpu. 510 * for each cpu.
456 */ 511 */
@@ -482,17 +537,42 @@ static void start_virt_cntrs(void)
482} 537}
483 538
484/* This function is called once for all cpus combined */ 539/* This function is called once for all cpus combined */
485static void 540static int cell_reg_setup(struct op_counter_config *ctr,
486cell_reg_setup(struct op_counter_config *ctr, 541 struct op_system_config *sys, int num_ctrs)
487 struct op_system_config *sys, int num_ctrs)
488{ 542{
489 int i, j, cpu; 543 int i, j, cpu;
544 spu_cycle_reset = 0;
545
546 if (ctr[0].event == SPU_CYCLES_EVENT_NUM) {
547 spu_cycle_reset = ctr[0].count;
548
549 /*
550 * Each node will need to make the rtas call to start
551 * and stop SPU profiling. Get the token once and store it.
552 */
553 spu_rtas_token = rtas_token("ibm,cbe-spu-perftools");
554
555 if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) {
556 printk(KERN_ERR
557 "%s: rtas token ibm,cbe-spu-perftools unknown\n",
558 __FUNCTION__);
559 return -EIO;
560 }
561 }
490 562
491 pm_rtas_token = rtas_token("ibm,cbe-perftools"); 563 pm_rtas_token = rtas_token("ibm,cbe-perftools");
492 if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) { 564
493 printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n", 565 /*
566 * For all events excetp PPU CYCLEs, each node will need to make
567 * the rtas cbe-perftools call to setup and reset the debug bus.
568 * Make the token lookup call once and store it in the global
569 * variable pm_rtas_token.
570 */
571 if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
572 printk(KERN_ERR
573 "%s: rtas token ibm,cbe-perftools unknown\n",
494 __FUNCTION__); 574 __FUNCTION__);
495 goto out; 575 return -EIO;
496 } 576 }
497 577
498 num_counters = num_ctrs; 578 num_counters = num_ctrs;
@@ -520,7 +600,8 @@ cell_reg_setup(struct op_counter_config *ctr,
520 per_cpu(pmc_values, j)[i] = 0; 600 per_cpu(pmc_values, j)[i] = 0;
521 } 601 }
522 602
523 /* Setup the thread 1 events, map the thread 0 event to the 603 /*
604 * Setup the thread 1 events, map the thread 0 event to the
524 * equivalent thread 1 event. 605 * equivalent thread 1 event.
525 */ 606 */
526 for (i = 0; i < num_ctrs; ++i) { 607 for (i = 0; i < num_ctrs; ++i) {
@@ -544,9 +625,10 @@ cell_reg_setup(struct op_counter_config *ctr,
544 for (i = 0; i < NUM_INPUT_BUS_WORDS; i++) 625 for (i = 0; i < NUM_INPUT_BUS_WORDS; i++)
545 input_bus[i] = 0xff; 626 input_bus[i] = 0xff;
546 627
547 /* Our counters count up, and "count" refers to 628 /*
629 * Our counters count up, and "count" refers to
548 * how much before the next interrupt, and we interrupt 630 * how much before the next interrupt, and we interrupt
549 * on overflow. So we calculate the starting value 631 * on overflow. So we calculate the starting value
550 * which will give us "count" until overflow. 632 * which will give us "count" until overflow.
551 * Then we set the events on the enabled counters. 633 * Then we set the events on the enabled counters.
552 */ 634 */
@@ -569,28 +651,27 @@ cell_reg_setup(struct op_counter_config *ctr,
569 for (i = 0; i < num_counters; ++i) { 651 for (i = 0; i < num_counters; ++i) {
570 per_cpu(pmc_values, cpu)[i] = reset_value[i]; 652 per_cpu(pmc_values, cpu)[i] = reset_value[i];
571 } 653 }
572out: 654
573 ; 655 return 0;
574} 656}
575 657
658
659
576/* This function is called once for each cpu */ 660/* This function is called once for each cpu */
577static void cell_cpu_setup(struct op_counter_config *cntr) 661static int cell_cpu_setup(struct op_counter_config *cntr)
578{ 662{
579 u32 cpu = smp_processor_id(); 663 u32 cpu = smp_processor_id();
580 u32 num_enabled = 0; 664 u32 num_enabled = 0;
581 int i; 665 int i;
582 666
667 if (spu_cycle_reset)
668 return 0;
669
583 /* There is one performance monitor per processor chip (i.e. node), 670 /* There is one performance monitor per processor chip (i.e. node),
584 * so we only need to perform this function once per node. 671 * so we only need to perform this function once per node.
585 */ 672 */
586 if (cbe_get_hw_thread_id(cpu)) 673 if (cbe_get_hw_thread_id(cpu))
587 goto out; 674 return 0;
588
589 if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) {
590 printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n",
591 __FUNCTION__);
592 goto out;
593 }
594 675
595 /* Stop all counters */ 676 /* Stop all counters */
596 cbe_disable_pm(cpu); 677 cbe_disable_pm(cpu);
@@ -609,16 +690,286 @@ static void cell_cpu_setup(struct op_counter_config *cntr)
609 } 690 }
610 } 691 }
611 692
612 pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled); 693 /*
694 * The pm_rtas_activate_signals will return -EIO if the FW
695 * call failed.
696 */
697 return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled);
698}
699
700#define ENTRIES 303
701#define MAXLFSR 0xFFFFFF
702
703/* precomputed table of 24 bit LFSR values */
704static int initial_lfsr[] = {
705 8221349, 12579195, 5379618, 10097839, 7512963, 7519310, 3955098, 10753424,
706 15507573, 7458917, 285419, 2641121, 9780088, 3915503, 6668768, 1548716,
707 4885000, 8774424, 9650099, 2044357, 2304411, 9326253, 10332526, 4421547,
708 3440748, 10179459, 13332843, 10375561, 1313462, 8375100, 5198480, 6071392,
709 9341783, 1526887, 3985002, 1439429, 13923762, 7010104, 11969769, 4547026,
710 2040072, 4025602, 3437678, 7939992, 11444177, 4496094, 9803157, 10745556,
711 3671780, 4257846, 5662259, 13196905, 3237343, 12077182, 16222879, 7587769,
712 14706824, 2184640, 12591135, 10420257, 7406075, 3648978, 11042541, 15906893,
713 11914928, 4732944, 10695697, 12928164, 11980531, 4430912, 11939291, 2917017,
714 6119256, 4172004, 9373765, 8410071, 14788383, 5047459, 5474428, 1737756,
715 15967514, 13351758, 6691285, 8034329, 2856544, 14394753, 11310160, 12149558,
716 7487528, 7542781, 15668898, 12525138, 12790975, 3707933, 9106617, 1965401,
717 16219109, 12801644, 2443203, 4909502, 8762329, 3120803, 6360315, 9309720,
718 15164599, 10844842, 4456529, 6667610, 14924259, 884312, 6234963, 3326042,
719 15973422, 13919464, 5272099, 6414643, 3909029, 2764324, 5237926, 4774955,
720 10445906, 4955302, 5203726, 10798229, 11443419, 2303395, 333836, 9646934,
721 3464726, 4159182, 568492, 995747, 10318756, 13299332, 4836017, 8237783,
722 3878992, 2581665, 11394667, 5672745, 14412947, 3159169, 9094251, 16467278,
723 8671392, 15230076, 4843545, 7009238, 15504095, 1494895, 9627886, 14485051,
724 8304291, 252817, 12421642, 16085736, 4774072, 2456177, 4160695, 15409741,
725 4902868, 5793091, 13162925, 16039714, 782255, 11347835, 14884586, 366972,
726 16308990, 11913488, 13390465, 2958444, 10340278, 1177858, 1319431, 10426302,
727 2868597, 126119, 5784857, 5245324, 10903900, 16436004, 3389013, 1742384,
728 14674502, 10279218, 8536112, 10364279, 6877778, 14051163, 1025130, 6072469,
729 1988305, 8354440, 8216060, 16342977, 13112639, 3976679, 5913576, 8816697,
730 6879995, 14043764, 3339515, 9364420, 15808858, 12261651, 2141560, 5636398,
731 10345425, 10414756, 781725, 6155650, 4746914, 5078683, 7469001, 6799140,
732 10156444, 9667150, 10116470, 4133858, 2121972, 1124204, 1003577, 1611214,
733 14304602, 16221850, 13878465, 13577744, 3629235, 8772583, 10881308, 2410386,
734 7300044, 5378855, 9301235, 12755149, 4977682, 8083074, 10327581, 6395087,
735 9155434, 15501696, 7514362, 14520507, 15808945, 3244584, 4741962, 9658130,
736 14336147, 8654727, 7969093, 15759799, 14029445, 5038459, 9894848, 8659300,
737 13699287, 8834306, 10712885, 14753895, 10410465, 3373251, 309501, 9561475,
738 5526688, 14647426, 14209836, 5339224, 207299, 14069911, 8722990, 2290950,
739 3258216, 12505185, 6007317, 9218111, 14661019, 10537428, 11731949, 9027003,
740 6641507, 9490160, 200241, 9720425, 16277895, 10816638, 1554761, 10431375,
741 7467528, 6790302, 3429078, 14633753, 14428997, 11463204, 3576212, 2003426,
742 6123687, 820520, 9992513, 15784513, 5778891, 6428165, 8388607
743};
744
745/*
746 * The hardware uses an LFSR counting sequence to determine when to capture
747 * the SPU PCs. An LFSR sequence is like a puesdo random number sequence
748 * where each number occurs once in the sequence but the sequence is not in
749 * numerical order. The SPU PC capture is done when the LFSR sequence reaches
750 * the last value in the sequence. Hence the user specified value N
751 * corresponds to the LFSR number that is N from the end of the sequence.
752 *
753 * To avoid the time to compute the LFSR, a lookup table is used. The 24 bit
754 * LFSR sequence is broken into four ranges. The spacing of the precomputed
755 * values is adjusted in each range so the error between the user specifed
756 * number (N) of events between samples and the actual number of events based
757 * on the precomputed value will be les then about 6.2%. Note, if the user
758 * specifies N < 2^16, the LFSR value that is 2^16 from the end will be used.
759 * This is to prevent the loss of samples because the trace buffer is full.
760 *
761 * User specified N Step between Index in
762 * precomputed values precomputed
763 * table
764 * 0 to 2^16-1 ---- 0
765 * 2^16 to 2^16+2^19-1 2^12 1 to 128
766 * 2^16+2^19 to 2^16+2^19+2^22-1 2^15 129 to 256
767 * 2^16+2^19+2^22 to 2^24-1 2^18 257 to 302
768 *
769 *
770 * For example, the LFSR values in the second range are computed for 2^16,
771 * 2^16+2^12, ... , 2^19-2^16, 2^19 and stored in the table at indicies
772 * 1, 2,..., 127, 128.
773 *
774 * The 24 bit LFSR value for the nth number in the sequence can be
775 * calculated using the following code:
776 *
777 * #define size 24
778 * int calculate_lfsr(int n)
779 * {
780 * int i;
781 * unsigned int newlfsr0;
782 * unsigned int lfsr = 0xFFFFFF;
783 * unsigned int howmany = n;
784 *
785 * for (i = 2; i < howmany + 2; i++) {
786 * newlfsr0 = (((lfsr >> (size - 1 - 0)) & 1) ^
787 * ((lfsr >> (size - 1 - 1)) & 1) ^
788 * (((lfsr >> (size - 1 - 6)) & 1) ^
789 * ((lfsr >> (size - 1 - 23)) & 1)));
790 *
791 * lfsr >>= 1;
792 * lfsr = lfsr | (newlfsr0 << (size - 1));
793 * }
794 * return lfsr;
795 * }
796 */
797
798#define V2_16 (0x1 << 16)
799#define V2_19 (0x1 << 19)
800#define V2_22 (0x1 << 22)
801
802static int calculate_lfsr(int n)
803{
804 /*
805 * The ranges and steps are in powers of 2 so the calculations
806 * can be done using shifts rather then divide.
807 */
808 int index;
809
810 if ((n >> 16) == 0)
811 index = 0;
812 else if (((n - V2_16) >> 19) == 0)
813 index = ((n - V2_16) >> 12) + 1;
814 else if (((n - V2_16 - V2_19) >> 22) == 0)
815 index = ((n - V2_16 - V2_19) >> 15 ) + 1 + 128;
816 else if (((n - V2_16 - V2_19 - V2_22) >> 24) == 0)
817 index = ((n - V2_16 - V2_19 - V2_22) >> 18 ) + 1 + 256;
818 else
819 index = ENTRIES-1;
820
821 /* make sure index is valid */
822 if ((index > ENTRIES) || (index < 0))
823 index = ENTRIES-1;
824
825 return initial_lfsr[index];
826}
827
828static int pm_rtas_activate_spu_profiling(u32 node)
829{
830 int ret, i;
831 struct pm_signal pm_signal_local[NR_PHYS_CTRS];
832
833 /*
834 * Set up the rtas call to configure the debug bus to
835 * route the SPU PCs. Setup the pm_signal for each SPU
836 */
837 for (i = 0; i < NUM_SPUS_PER_NODE; i++) {
838 pm_signal_local[i].cpu = node;
839 pm_signal_local[i].signal_group = 41;
840 /* spu i on word (i/2) */
841 pm_signal_local[i].bus_word = 1 << i / 2;
842 /* spu i */
843 pm_signal_local[i].sub_unit = i;
844 pm_signal_local[i].bit = 63;
845 }
846
847 ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE,
848 PASSTHRU_ENABLE, pm_signal_local,
849 (NUM_SPUS_PER_NODE
850 * sizeof(struct pm_signal)));
851
852 if (unlikely(ret)) {
853 printk(KERN_WARNING "%s: rtas returned: %d\n",
854 __FUNCTION__, ret);
855 return -EIO;
856 }
857
858 return 0;
859}
860
861#ifdef CONFIG_CPU_FREQ
862static int
863oprof_cpufreq_notify(struct notifier_block *nb, unsigned long val, void *data)
864{
865 int ret = 0;
866 struct cpufreq_freqs *frq = data;
867 if ((val == CPUFREQ_PRECHANGE && frq->old < frq->new) ||
868 (val == CPUFREQ_POSTCHANGE && frq->old > frq->new) ||
869 (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE))
870 set_spu_profiling_frequency(frq->new, spu_cycle_reset);
871 return ret;
872}
873
874static struct notifier_block cpu_freq_notifier_block = {
875 .notifier_call = oprof_cpufreq_notify
876};
877#endif
878
879static int cell_global_start_spu(struct op_counter_config *ctr)
880{
881 int subfunc;
882 unsigned int lfsr_value;
883 int cpu;
884 int ret;
885 int rtas_error;
886 unsigned int cpu_khzfreq = 0;
887
888 /* The SPU profiling uses time-based profiling based on
889 * cpu frequency, so if configured with the CPU_FREQ
890 * option, we should detect frequency changes and react
891 * accordingly.
892 */
893#ifdef CONFIG_CPU_FREQ
894 ret = cpufreq_register_notifier(&cpu_freq_notifier_block,
895 CPUFREQ_TRANSITION_NOTIFIER);
896 if (ret < 0)
897 /* this is not a fatal error */
898 printk(KERN_ERR "CPU freq change registration failed: %d\n",
899 ret);
900
901 else
902 cpu_khzfreq = cpufreq_quick_get(smp_processor_id());
903#endif
904
905 set_spu_profiling_frequency(cpu_khzfreq, spu_cycle_reset);
906
907 for_each_online_cpu(cpu) {
908 if (cbe_get_hw_thread_id(cpu))
909 continue;
910
911 /*
912 * Setup SPU cycle-based profiling.
913 * Set perf_mon_control bit 0 to a zero before
914 * enabling spu collection hardware.
915 */
916 cbe_write_pm(cpu, pm_control, 0);
917
918 if (spu_cycle_reset > MAX_SPU_COUNT)
919 /* use largest possible value */
920 lfsr_value = calculate_lfsr(MAX_SPU_COUNT-1);
921 else
922 lfsr_value = calculate_lfsr(spu_cycle_reset);
923
924 /* must use a non zero value. Zero disables data collection. */
925 if (lfsr_value == 0)
926 lfsr_value = calculate_lfsr(1);
927
928 lfsr_value = lfsr_value << 8; /* shift lfsr to correct
929 * register location
930 */
931
932 /* debug bus setup */
933 ret = pm_rtas_activate_spu_profiling(cbe_cpu_to_node(cpu));
934
935 if (unlikely(ret)) {
936 rtas_error = ret;
937 goto out;
938 }
939
940
941 subfunc = 2; /* 2 - activate SPU tracing, 3 - deactivate */
942
943 /* start profiling */
944 ret = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc,
945 cbe_cpu_to_node(cpu), lfsr_value);
946
947 if (unlikely(ret != 0)) {
948 printk(KERN_ERR
949 "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n",
950 __FUNCTION__, ret);
951 rtas_error = -EIO;
952 goto out;
953 }
954 }
955
956 rtas_error = start_spu_profiling(spu_cycle_reset);
957 if (rtas_error)
958 goto out_stop;
959
960 oprofile_running = 1;
961 return 0;
962
963out_stop:
964 cell_global_stop_spu(); /* clean up the PMU/debug bus */
613out: 965out:
614 ; 966 return rtas_error;
615} 967}
616 968
617static void cell_global_start(struct op_counter_config *ctr) 969static int cell_global_start_ppu(struct op_counter_config *ctr)
618{ 970{
619 u32 cpu; 971 u32 cpu, i;
620 u32 interrupt_mask = 0; 972 u32 interrupt_mask = 0;
621 u32 i;
622 973
623 /* This routine gets called once for the system. 974 /* This routine gets called once for the system.
624 * There is one performance monitor per node, so we 975 * There is one performance monitor per node, so we
@@ -651,19 +1002,79 @@ static void cell_global_start(struct op_counter_config *ctr)
651 oprofile_running = 1; 1002 oprofile_running = 1;
652 smp_wmb(); 1003 smp_wmb();
653 1004
654 /* NOTE: start_virt_cntrs will result in cell_virtual_cntr() being 1005 /*
655 * executed which manipulates the PMU. We start the "virtual counter" 1006 * NOTE: start_virt_cntrs will result in cell_virtual_cntr() being
1007 * executed which manipulates the PMU. We start the "virtual counter"
656 * here so that we do not need to synchronize access to the PMU in 1008 * here so that we do not need to synchronize access to the PMU in
657 * the above for-loop. 1009 * the above for-loop.
658 */ 1010 */
659 start_virt_cntrs(); 1011 start_virt_cntrs();
1012
1013 return 0;
660} 1014}
661 1015
662static void cell_global_stop(void) 1016static int cell_global_start(struct op_counter_config *ctr)
1017{
1018 if (spu_cycle_reset)
1019 return cell_global_start_spu(ctr);
1020 else
1021 return cell_global_start_ppu(ctr);
1022}
1023
1024/*
1025 * Note the generic OProfile stop calls do not support returning
1026 * an error on stop. Hence, will not return an error if the FW
1027 * calls fail on stop. Failure to reset the debug bus is not an issue.
1028 * Failure to disable the SPU profiling is not an issue. The FW calls
1029 * to enable the performance counters and debug bus will work even if
1030 * the hardware was not cleanly reset.
1031 */
1032static void cell_global_stop_spu(void)
1033{
1034 int subfunc, rtn_value;
1035 unsigned int lfsr_value;
1036 int cpu;
1037
1038 oprofile_running = 0;
1039
1040#ifdef CONFIG_CPU_FREQ
1041 cpufreq_unregister_notifier(&cpu_freq_notifier_block,
1042 CPUFREQ_TRANSITION_NOTIFIER);
1043#endif
1044
1045 for_each_online_cpu(cpu) {
1046 if (cbe_get_hw_thread_id(cpu))
1047 continue;
1048
1049 subfunc = 3; /*
1050 * 2 - activate SPU tracing,
1051 * 3 - deactivate
1052 */
1053 lfsr_value = 0x8f100000;
1054
1055 rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL,
1056 subfunc, cbe_cpu_to_node(cpu),
1057 lfsr_value);
1058
1059 if (unlikely(rtn_value != 0)) {
1060 printk(KERN_ERR
1061 "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n",
1062 __FUNCTION__, rtn_value);
1063 }
1064
1065 /* Deactivate the signals */
1066 pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
1067 }
1068
1069 stop_spu_profiling();
1070}
1071
1072static void cell_global_stop_ppu(void)
663{ 1073{
664 int cpu; 1074 int cpu;
665 1075
666 /* This routine will be called once for the system. 1076 /*
1077 * This routine will be called once for the system.
667 * There is one performance monitor per node, so we 1078 * There is one performance monitor per node, so we
668 * only need to perform this function once per node. 1079 * only need to perform this function once per node.
669 */ 1080 */
@@ -687,8 +1098,16 @@ static void cell_global_stop(void)
687 } 1098 }
688} 1099}
689 1100
690static void 1101static void cell_global_stop(void)
691cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) 1102{
1103 if (spu_cycle_reset)
1104 cell_global_stop_spu();
1105 else
1106 cell_global_stop_ppu();
1107}
1108
1109static void cell_handle_interrupt(struct pt_regs *regs,
1110 struct op_counter_config *ctr)
692{ 1111{
693 u32 cpu; 1112 u32 cpu;
694 u64 pc; 1113 u64 pc;
@@ -699,13 +1118,15 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
699 1118
700 cpu = smp_processor_id(); 1119 cpu = smp_processor_id();
701 1120
702 /* Need to make sure the interrupt handler and the virt counter 1121 /*
1122 * Need to make sure the interrupt handler and the virt counter
703 * routine are not running at the same time. See the 1123 * routine are not running at the same time. See the
704 * cell_virtual_cntr() routine for additional comments. 1124 * cell_virtual_cntr() routine for additional comments.
705 */ 1125 */
706 spin_lock_irqsave(&virt_cntr_lock, flags); 1126 spin_lock_irqsave(&virt_cntr_lock, flags);
707 1127
708 /* Need to disable and reenable the performance counters 1128 /*
1129 * Need to disable and reenable the performance counters
709 * to get the desired behavior from the hardware. This 1130 * to get the desired behavior from the hardware. This
710 * is hardware specific. 1131 * is hardware specific.
711 */ 1132 */
@@ -714,7 +1135,8 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
714 1135
715 interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu); 1136 interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu);
716 1137
717 /* If the interrupt mask has been cleared, then the virt cntr 1138 /*
1139 * If the interrupt mask has been cleared, then the virt cntr
718 * has cleared the interrupt. When the thread that generated 1140 * has cleared the interrupt. When the thread that generated
719 * the interrupt is restored, the data count will be restored to 1141 * the interrupt is restored, the data count will be restored to
720 * 0xffffff0 to cause the interrupt to be regenerated. 1142 * 0xffffff0 to cause the interrupt to be regenerated.
@@ -732,18 +1154,20 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
732 } 1154 }
733 } 1155 }
734 1156
735 /* The counters were frozen by the interrupt. 1157 /*
1158 * The counters were frozen by the interrupt.
736 * Reenable the interrupt and restart the counters. 1159 * Reenable the interrupt and restart the counters.
737 * If there was a race between the interrupt handler and 1160 * If there was a race between the interrupt handler and
738 * the virtual counter routine. The virutal counter 1161 * the virtual counter routine. The virutal counter
739 * routine may have cleared the interrupts. Hence must 1162 * routine may have cleared the interrupts. Hence must
740 * use the virt_cntr_inter_mask to re-enable the interrupts. 1163 * use the virt_cntr_inter_mask to re-enable the interrupts.
741 */ 1164 */
742 cbe_enable_pm_interrupts(cpu, hdw_thread, 1165 cbe_enable_pm_interrupts(cpu, hdw_thread,
743 virt_cntr_inter_mask); 1166 virt_cntr_inter_mask);
744 1167
745 /* The writes to the various performance counters only writes 1168 /*
746 * to a latch. The new values (interrupt setting bits, reset 1169 * The writes to the various performance counters only writes
1170 * to a latch. The new values (interrupt setting bits, reset
747 * counter value etc.) are not copied to the actual registers 1171 * counter value etc.) are not copied to the actual registers
748 * until the performance monitor is enabled. In order to get 1172 * until the performance monitor is enabled. In order to get
749 * this to work as desired, the permormance monitor needs to 1173 * this to work as desired, the permormance monitor needs to
@@ -755,10 +1179,33 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
755 spin_unlock_irqrestore(&virt_cntr_lock, flags); 1179 spin_unlock_irqrestore(&virt_cntr_lock, flags);
756} 1180}
757 1181
1182/*
1183 * This function is called from the generic OProfile
1184 * driver. When profiling PPUs, we need to do the
1185 * generic sync start; otherwise, do spu_sync_start.
1186 */
1187static int cell_sync_start(void)
1188{
1189 if (spu_cycle_reset)
1190 return spu_sync_start();
1191 else
1192 return DO_GENERIC_SYNC;
1193}
1194
1195static int cell_sync_stop(void)
1196{
1197 if (spu_cycle_reset)
1198 return spu_sync_stop();
1199 else
1200 return 1;
1201}
1202
758struct op_powerpc_model op_model_cell = { 1203struct op_powerpc_model op_model_cell = {
759 .reg_setup = cell_reg_setup, 1204 .reg_setup = cell_reg_setup,
760 .cpu_setup = cell_cpu_setup, 1205 .cpu_setup = cell_cpu_setup,
761 .global_start = cell_global_start, 1206 .global_start = cell_global_start,
762 .global_stop = cell_global_stop, 1207 .global_stop = cell_global_stop,
1208 .sync_start = cell_sync_start,
1209 .sync_stop = cell_sync_stop,
763 .handle_interrupt = cell_handle_interrupt, 1210 .handle_interrupt = cell_handle_interrupt,
764}; 1211};