diff options
Diffstat (limited to 'arch/powerpc/oprofile/op_model_cell.c')
-rw-r--r-- | arch/powerpc/oprofile/op_model_cell.c | 607 |
1 files changed, 527 insertions, 80 deletions
diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c index c29293befba9..d928b54f3a0f 100644 --- a/arch/powerpc/oprofile/op_model_cell.c +++ b/arch/powerpc/oprofile/op_model_cell.c | |||
@@ -5,8 +5,8 @@ | |||
5 | * | 5 | * |
6 | * Author: David Erb (djerb@us.ibm.com) | 6 | * Author: David Erb (djerb@us.ibm.com) |
7 | * Modifications: | 7 | * Modifications: |
8 | * Carl Love <carll@us.ibm.com> | 8 | * Carl Love <carll@us.ibm.com> |
9 | * Maynard Johnson <maynardj@us.ibm.com> | 9 | * Maynard Johnson <maynardj@us.ibm.com> |
10 | * | 10 | * |
11 | * This program is free software; you can redistribute it and/or | 11 | * This program is free software; you can redistribute it and/or |
12 | * modify it under the terms of the GNU General Public License | 12 | * modify it under the terms of the GNU General Public License |
@@ -38,12 +38,25 @@ | |||
38 | 38 | ||
39 | #include "../platforms/cell/interrupt.h" | 39 | #include "../platforms/cell/interrupt.h" |
40 | #include "../platforms/cell/cbe_regs.h" | 40 | #include "../platforms/cell/cbe_regs.h" |
41 | #include "cell/pr_util.h" | ||
42 | |||
43 | static void cell_global_stop_spu(void); | ||
44 | |||
45 | /* | ||
46 | * spu_cycle_reset is the number of cycles between samples. | ||
47 | * This variable is used for SPU profiling and should ONLY be set | ||
48 | * at the beginning of cell_reg_setup; otherwise, it's read-only. | ||
49 | */ | ||
50 | static unsigned int spu_cycle_reset; | ||
51 | |||
52 | #define NUM_SPUS_PER_NODE 8 | ||
53 | #define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */ | ||
41 | 54 | ||
42 | #define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */ | 55 | #define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */ |
43 | #define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying | 56 | #define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying |
44 | * PPU_CYCLES event | 57 | * PPU_CYCLES event |
45 | */ | 58 | */ |
46 | #define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */ | 59 | #define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */ |
47 | 60 | ||
48 | #define NUM_THREADS 2 /* number of physical threads in | 61 | #define NUM_THREADS 2 /* number of physical threads in |
49 | * physical processor | 62 | * physical processor |
@@ -51,6 +64,7 @@ | |||
51 | #define NUM_TRACE_BUS_WORDS 4 | 64 | #define NUM_TRACE_BUS_WORDS 4 |
52 | #define NUM_INPUT_BUS_WORDS 2 | 65 | #define NUM_INPUT_BUS_WORDS 2 |
53 | 66 | ||
67 | #define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */ | ||
54 | 68 | ||
55 | struct pmc_cntrl_data { | 69 | struct pmc_cntrl_data { |
56 | unsigned long vcntr; | 70 | unsigned long vcntr; |
@@ -62,11 +76,10 @@ struct pmc_cntrl_data { | |||
62 | /* | 76 | /* |
63 | * ibm,cbe-perftools rtas parameters | 77 | * ibm,cbe-perftools rtas parameters |
64 | */ | 78 | */ |
65 | |||
66 | struct pm_signal { | 79 | struct pm_signal { |
67 | u16 cpu; /* Processor to modify */ | 80 | u16 cpu; /* Processor to modify */ |
68 | u16 sub_unit; /* hw subunit this applies to (if applicable) */ | 81 | u16 sub_unit; /* hw subunit this applies to (if applicable)*/ |
69 | short int signal_group; /* Signal Group to Enable/Disable */ | 82 | short int signal_group; /* Signal Group to Enable/Disable */ |
70 | u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event | 83 | u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event |
71 | * Bus Word(s) (bitmask) | 84 | * Bus Word(s) (bitmask) |
72 | */ | 85 | */ |
@@ -112,21 +125,42 @@ static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values); | |||
112 | 125 | ||
113 | static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; | 126 | static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; |
114 | 127 | ||
115 | /* Interpetation of hdw_thread: | 128 | /* |
129 | * The CELL profiling code makes rtas calls to setup the debug bus to | ||
130 | * route the performance signals. Additionally, SPU profiling requires | ||
131 | * a second rtas call to setup the hardware to capture the SPU PCs. | ||
132 | * The EIO error value is returned if the token lookups or the rtas | ||
133 | * call fail. The EIO error number is the best choice of the existing | ||
134 | * error numbers. The probability of rtas related error is very low. But | ||
135 | * by returning EIO and printing additional information to dmsg the user | ||
136 | * will know that OProfile did not start and dmesg will tell them why. | ||
137 | * OProfile does not support returning errors on Stop. Not a huge issue | ||
138 | * since failure to reset the debug bus or stop the SPU PC collection is | ||
139 | * not a fatel issue. Chances are if the Stop failed, Start doesn't work | ||
140 | * either. | ||
141 | */ | ||
142 | |||
143 | /* | ||
144 | * Interpetation of hdw_thread: | ||
116 | * 0 - even virtual cpus 0, 2, 4,... | 145 | * 0 - even virtual cpus 0, 2, 4,... |
117 | * 1 - odd virtual cpus 1, 3, 5, ... | 146 | * 1 - odd virtual cpus 1, 3, 5, ... |
147 | * | ||
148 | * FIXME: this is strictly wrong, we need to clean this up in a number | ||
149 | * of places. It works for now. -arnd | ||
118 | */ | 150 | */ |
119 | static u32 hdw_thread; | 151 | static u32 hdw_thread; |
120 | 152 | ||
121 | static u32 virt_cntr_inter_mask; | 153 | static u32 virt_cntr_inter_mask; |
122 | static struct timer_list timer_virt_cntr; | 154 | static struct timer_list timer_virt_cntr; |
123 | 155 | ||
124 | /* pm_signal needs to be global since it is initialized in | 156 | /* |
157 | * pm_signal needs to be global since it is initialized in | ||
125 | * cell_reg_setup at the time when the necessary information | 158 | * cell_reg_setup at the time when the necessary information |
126 | * is available. | 159 | * is available. |
127 | */ | 160 | */ |
128 | static struct pm_signal pm_signal[NR_PHYS_CTRS]; | 161 | static struct pm_signal pm_signal[NR_PHYS_CTRS]; |
129 | static int pm_rtas_token; | 162 | static int pm_rtas_token; /* token for debug bus setup call */ |
163 | static int spu_rtas_token; /* token for SPU cycle profiling */ | ||
130 | 164 | ||
131 | static u32 reset_value[NR_PHYS_CTRS]; | 165 | static u32 reset_value[NR_PHYS_CTRS]; |
132 | static int num_counters; | 166 | static int num_counters; |
@@ -147,8 +181,8 @@ rtas_ibm_cbe_perftools(int subfunc, int passthru, | |||
147 | { | 181 | { |
148 | u64 paddr = __pa(address); | 182 | u64 paddr = __pa(address); |
149 | 183 | ||
150 | return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, passthru, | 184 | return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, |
151 | paddr >> 32, paddr & 0xffffffff, length); | 185 | passthru, paddr >> 32, paddr & 0xffffffff, length); |
152 | } | 186 | } |
153 | 187 | ||
154 | static void pm_rtas_reset_signals(u32 node) | 188 | static void pm_rtas_reset_signals(u32 node) |
@@ -156,12 +190,13 @@ static void pm_rtas_reset_signals(u32 node) | |||
156 | int ret; | 190 | int ret; |
157 | struct pm_signal pm_signal_local; | 191 | struct pm_signal pm_signal_local; |
158 | 192 | ||
159 | /* The debug bus is being set to the passthru disable state. | 193 | /* |
160 | * However, the FW still expects atleast one legal signal routing | 194 | * The debug bus is being set to the passthru disable state. |
161 | * entry or it will return an error on the arguments. If we don't | 195 | * However, the FW still expects atleast one legal signal routing |
162 | * supply a valid entry, we must ignore all return values. Ignoring | 196 | * entry or it will return an error on the arguments. If we don't |
163 | * all return values means we might miss an error we should be | 197 | * supply a valid entry, we must ignore all return values. Ignoring |
164 | * concerned about. | 198 | * all return values means we might miss an error we should be |
199 | * concerned about. | ||
165 | */ | 200 | */ |
166 | 201 | ||
167 | /* fw expects physical cpu #. */ | 202 | /* fw expects physical cpu #. */ |
@@ -175,18 +210,24 @@ static void pm_rtas_reset_signals(u32 node) | |||
175 | &pm_signal_local, | 210 | &pm_signal_local, |
176 | sizeof(struct pm_signal)); | 211 | sizeof(struct pm_signal)); |
177 | 212 | ||
178 | if (ret) | 213 | if (unlikely(ret)) |
214 | /* | ||
215 | * Not a fatal error. For Oprofile stop, the oprofile | ||
216 | * functions do not support returning an error for | ||
217 | * failure to stop OProfile. | ||
218 | */ | ||
179 | printk(KERN_WARNING "%s: rtas returned: %d\n", | 219 | printk(KERN_WARNING "%s: rtas returned: %d\n", |
180 | __FUNCTION__, ret); | 220 | __FUNCTION__, ret); |
181 | } | 221 | } |
182 | 222 | ||
183 | static void pm_rtas_activate_signals(u32 node, u32 count) | 223 | static int pm_rtas_activate_signals(u32 node, u32 count) |
184 | { | 224 | { |
185 | int ret; | 225 | int ret; |
186 | int i, j; | 226 | int i, j; |
187 | struct pm_signal pm_signal_local[NR_PHYS_CTRS]; | 227 | struct pm_signal pm_signal_local[NR_PHYS_CTRS]; |
188 | 228 | ||
189 | /* There is no debug setup required for the cycles event. | 229 | /* |
230 | * There is no debug setup required for the cycles event. | ||
190 | * Note that only events in the same group can be used. | 231 | * Note that only events in the same group can be used. |
191 | * Otherwise, there will be conflicts in correctly routing | 232 | * Otherwise, there will be conflicts in correctly routing |
192 | * the signals on the debug bus. It is the responsiblity | 233 | * the signals on the debug bus. It is the responsiblity |
@@ -213,10 +254,14 @@ static void pm_rtas_activate_signals(u32 node, u32 count) | |||
213 | pm_signal_local, | 254 | pm_signal_local, |
214 | i * sizeof(struct pm_signal)); | 255 | i * sizeof(struct pm_signal)); |
215 | 256 | ||
216 | if (ret) | 257 | if (unlikely(ret)) { |
217 | printk(KERN_WARNING "%s: rtas returned: %d\n", | 258 | printk(KERN_WARNING "%s: rtas returned: %d\n", |
218 | __FUNCTION__, ret); | 259 | __FUNCTION__, ret); |
260 | return -EIO; | ||
261 | } | ||
219 | } | 262 | } |
263 | |||
264 | return 0; | ||
220 | } | 265 | } |
221 | 266 | ||
222 | /* | 267 | /* |
@@ -260,11 +305,12 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask) | |||
260 | pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity); | 305 | pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity); |
261 | pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control); | 306 | pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control); |
262 | 307 | ||
263 | /* Some of the islands signal selection is based on 64 bit words. | 308 | /* |
309 | * Some of the islands signal selection is based on 64 bit words. | ||
264 | * The debug bus words are 32 bits, the input words to the performance | 310 | * The debug bus words are 32 bits, the input words to the performance |
265 | * counters are defined as 32 bits. Need to convert the 64 bit island | 311 | * counters are defined as 32 bits. Need to convert the 64 bit island |
266 | * specification to the appropriate 32 input bit and bus word for the | 312 | * specification to the appropriate 32 input bit and bus word for the |
267 | * performance counter event selection. See the CELL Performance | 313 | * performance counter event selection. See the CELL Performance |
268 | * monitoring signals manual and the Perf cntr hardware descriptions | 314 | * monitoring signals manual and the Perf cntr hardware descriptions |
269 | * for the details. | 315 | * for the details. |
270 | */ | 316 | */ |
@@ -298,6 +344,7 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask) | |||
298 | input_bus[j] = i; | 344 | input_bus[j] = i; |
299 | pm_regs.group_control |= | 345 | pm_regs.group_control |= |
300 | (i << (31 - i)); | 346 | (i << (31 - i)); |
347 | |||
301 | break; | 348 | break; |
302 | } | 349 | } |
303 | } | 350 | } |
@@ -309,7 +356,8 @@ out: | |||
309 | 356 | ||
310 | static void write_pm_cntrl(int cpu) | 357 | static void write_pm_cntrl(int cpu) |
311 | { | 358 | { |
312 | /* Oprofile will use 32 bit counters, set bits 7:10 to 0 | 359 | /* |
360 | * Oprofile will use 32 bit counters, set bits 7:10 to 0 | ||
313 | * pmregs.pm_cntrl is a global | 361 | * pmregs.pm_cntrl is a global |
314 | */ | 362 | */ |
315 | 363 | ||
@@ -326,7 +374,8 @@ static void write_pm_cntrl(int cpu) | |||
326 | if (pm_regs.pm_cntrl.freeze == 1) | 374 | if (pm_regs.pm_cntrl.freeze == 1) |
327 | val |= CBE_PM_FREEZE_ALL_CTRS; | 375 | val |= CBE_PM_FREEZE_ALL_CTRS; |
328 | 376 | ||
329 | /* Routine set_count_mode must be called previously to set | 377 | /* |
378 | * Routine set_count_mode must be called previously to set | ||
330 | * the count mode based on the user selection of user and kernel. | 379 | * the count mode based on the user selection of user and kernel. |
331 | */ | 380 | */ |
332 | val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode); | 381 | val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode); |
@@ -336,7 +385,8 @@ static void write_pm_cntrl(int cpu) | |||
336 | static inline void | 385 | static inline void |
337 | set_count_mode(u32 kernel, u32 user) | 386 | set_count_mode(u32 kernel, u32 user) |
338 | { | 387 | { |
339 | /* The user must specify user and kernel if they want them. If | 388 | /* |
389 | * The user must specify user and kernel if they want them. If | ||
340 | * neither is specified, OProfile will count in hypervisor mode. | 390 | * neither is specified, OProfile will count in hypervisor mode. |
341 | * pm_regs.pm_cntrl is a global | 391 | * pm_regs.pm_cntrl is a global |
342 | */ | 392 | */ |
@@ -364,7 +414,7 @@ static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl) | |||
364 | 414 | ||
365 | /* | 415 | /* |
366 | * Oprofile is expected to collect data on all CPUs simultaneously. | 416 | * Oprofile is expected to collect data on all CPUs simultaneously. |
367 | * However, there is one set of performance counters per node. There are | 417 | * However, there is one set of performance counters per node. There are |
368 | * two hardware threads or virtual CPUs on each node. Hence, OProfile must | 418 | * two hardware threads or virtual CPUs on each node. Hence, OProfile must |
369 | * multiplex in time the performance counter collection on the two virtual | 419 | * multiplex in time the performance counter collection on the two virtual |
370 | * CPUs. The multiplexing of the performance counters is done by this | 420 | * CPUs. The multiplexing of the performance counters is done by this |
@@ -377,19 +427,19 @@ static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl) | |||
377 | * pair of per-cpu arrays is used for storing the previous and next | 427 | * pair of per-cpu arrays is used for storing the previous and next |
378 | * pmc values for a given node. | 428 | * pmc values for a given node. |
379 | * NOTE: We use the per-cpu variable to improve cache performance. | 429 | * NOTE: We use the per-cpu variable to improve cache performance. |
430 | * | ||
431 | * This routine will alternate loading the virtual counters for | ||
432 | * virtual CPUs | ||
380 | */ | 433 | */ |
381 | static void cell_virtual_cntr(unsigned long data) | 434 | static void cell_virtual_cntr(unsigned long data) |
382 | { | 435 | { |
383 | /* This routine will alternate loading the virtual counters for | ||
384 | * virtual CPUs | ||
385 | */ | ||
386 | int i, prev_hdw_thread, next_hdw_thread; | 436 | int i, prev_hdw_thread, next_hdw_thread; |
387 | u32 cpu; | 437 | u32 cpu; |
388 | unsigned long flags; | 438 | unsigned long flags; |
389 | 439 | ||
390 | /* Make sure that the interrupt_hander and | 440 | /* |
391 | * the virt counter are not both playing with | 441 | * Make sure that the interrupt_hander and the virt counter are |
392 | * the counters on the same node. | 442 | * not both playing with the counters on the same node. |
393 | */ | 443 | */ |
394 | 444 | ||
395 | spin_lock_irqsave(&virt_cntr_lock, flags); | 445 | spin_lock_irqsave(&virt_cntr_lock, flags); |
@@ -400,22 +450,25 @@ static void cell_virtual_cntr(unsigned long data) | |||
400 | hdw_thread = 1 ^ hdw_thread; | 450 | hdw_thread = 1 ^ hdw_thread; |
401 | next_hdw_thread = hdw_thread; | 451 | next_hdw_thread = hdw_thread; |
402 | 452 | ||
403 | for (i = 0; i < num_counters; i++) | 453 | /* |
404 | /* There are some per thread events. Must do the | 454 | * There are some per thread events. Must do the |
405 | * set event, for the thread that is being started | 455 | * set event, for the thread that is being started |
406 | */ | 456 | */ |
457 | for (i = 0; i < num_counters; i++) | ||
407 | set_pm_event(i, | 458 | set_pm_event(i, |
408 | pmc_cntrl[next_hdw_thread][i].evnts, | 459 | pmc_cntrl[next_hdw_thread][i].evnts, |
409 | pmc_cntrl[next_hdw_thread][i].masks); | 460 | pmc_cntrl[next_hdw_thread][i].masks); |
410 | 461 | ||
411 | /* The following is done only once per each node, but | 462 | /* |
463 | * The following is done only once per each node, but | ||
412 | * we need cpu #, not node #, to pass to the cbe_xxx functions. | 464 | * we need cpu #, not node #, to pass to the cbe_xxx functions. |
413 | */ | 465 | */ |
414 | for_each_online_cpu(cpu) { | 466 | for_each_online_cpu(cpu) { |
415 | if (cbe_get_hw_thread_id(cpu)) | 467 | if (cbe_get_hw_thread_id(cpu)) |
416 | continue; | 468 | continue; |
417 | 469 | ||
418 | /* stop counters, save counter values, restore counts | 470 | /* |
471 | * stop counters, save counter values, restore counts | ||
419 | * for previous thread | 472 | * for previous thread |
420 | */ | 473 | */ |
421 | cbe_disable_pm(cpu); | 474 | cbe_disable_pm(cpu); |
@@ -428,7 +481,7 @@ static void cell_virtual_cntr(unsigned long data) | |||
428 | == 0xFFFFFFFF) | 481 | == 0xFFFFFFFF) |
429 | /* If the cntr value is 0xffffffff, we must | 482 | /* If the cntr value is 0xffffffff, we must |
430 | * reset that to 0xfffffff0 when the current | 483 | * reset that to 0xfffffff0 when the current |
431 | * thread is restarted. This will generate a | 484 | * thread is restarted. This will generate a |
432 | * new interrupt and make sure that we never | 485 | * new interrupt and make sure that we never |
433 | * restore the counters to the max value. If | 486 | * restore the counters to the max value. If |
434 | * the counters were restored to the max value, | 487 | * the counters were restored to the max value, |
@@ -444,13 +497,15 @@ static void cell_virtual_cntr(unsigned long data) | |||
444 | next_hdw_thread)[i]); | 497 | next_hdw_thread)[i]); |
445 | } | 498 | } |
446 | 499 | ||
447 | /* Switch to the other thread. Change the interrupt | 500 | /* |
501 | * Switch to the other thread. Change the interrupt | ||
448 | * and control regs to be scheduled on the CPU | 502 | * and control regs to be scheduled on the CPU |
449 | * corresponding to the thread to execute. | 503 | * corresponding to the thread to execute. |
450 | */ | 504 | */ |
451 | for (i = 0; i < num_counters; i++) { | 505 | for (i = 0; i < num_counters; i++) { |
452 | if (pmc_cntrl[next_hdw_thread][i].enabled) { | 506 | if (pmc_cntrl[next_hdw_thread][i].enabled) { |
453 | /* There are some per thread events. | 507 | /* |
508 | * There are some per thread events. | ||
454 | * Must do the set event, enable_cntr | 509 | * Must do the set event, enable_cntr |
455 | * for each cpu. | 510 | * for each cpu. |
456 | */ | 511 | */ |
@@ -482,17 +537,42 @@ static void start_virt_cntrs(void) | |||
482 | } | 537 | } |
483 | 538 | ||
484 | /* This function is called once for all cpus combined */ | 539 | /* This function is called once for all cpus combined */ |
485 | static void | 540 | static int cell_reg_setup(struct op_counter_config *ctr, |
486 | cell_reg_setup(struct op_counter_config *ctr, | 541 | struct op_system_config *sys, int num_ctrs) |
487 | struct op_system_config *sys, int num_ctrs) | ||
488 | { | 542 | { |
489 | int i, j, cpu; | 543 | int i, j, cpu; |
544 | spu_cycle_reset = 0; | ||
545 | |||
546 | if (ctr[0].event == SPU_CYCLES_EVENT_NUM) { | ||
547 | spu_cycle_reset = ctr[0].count; | ||
548 | |||
549 | /* | ||
550 | * Each node will need to make the rtas call to start | ||
551 | * and stop SPU profiling. Get the token once and store it. | ||
552 | */ | ||
553 | spu_rtas_token = rtas_token("ibm,cbe-spu-perftools"); | ||
554 | |||
555 | if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) { | ||
556 | printk(KERN_ERR | ||
557 | "%s: rtas token ibm,cbe-spu-perftools unknown\n", | ||
558 | __FUNCTION__); | ||
559 | return -EIO; | ||
560 | } | ||
561 | } | ||
490 | 562 | ||
491 | pm_rtas_token = rtas_token("ibm,cbe-perftools"); | 563 | pm_rtas_token = rtas_token("ibm,cbe-perftools"); |
492 | if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) { | 564 | |
493 | printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n", | 565 | /* |
566 | * For all events excetp PPU CYCLEs, each node will need to make | ||
567 | * the rtas cbe-perftools call to setup and reset the debug bus. | ||
568 | * Make the token lookup call once and store it in the global | ||
569 | * variable pm_rtas_token. | ||
570 | */ | ||
571 | if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) { | ||
572 | printk(KERN_ERR | ||
573 | "%s: rtas token ibm,cbe-perftools unknown\n", | ||
494 | __FUNCTION__); | 574 | __FUNCTION__); |
495 | goto out; | 575 | return -EIO; |
496 | } | 576 | } |
497 | 577 | ||
498 | num_counters = num_ctrs; | 578 | num_counters = num_ctrs; |
@@ -520,7 +600,8 @@ cell_reg_setup(struct op_counter_config *ctr, | |||
520 | per_cpu(pmc_values, j)[i] = 0; | 600 | per_cpu(pmc_values, j)[i] = 0; |
521 | } | 601 | } |
522 | 602 | ||
523 | /* Setup the thread 1 events, map the thread 0 event to the | 603 | /* |
604 | * Setup the thread 1 events, map the thread 0 event to the | ||
524 | * equivalent thread 1 event. | 605 | * equivalent thread 1 event. |
525 | */ | 606 | */ |
526 | for (i = 0; i < num_ctrs; ++i) { | 607 | for (i = 0; i < num_ctrs; ++i) { |
@@ -544,9 +625,10 @@ cell_reg_setup(struct op_counter_config *ctr, | |||
544 | for (i = 0; i < NUM_INPUT_BUS_WORDS; i++) | 625 | for (i = 0; i < NUM_INPUT_BUS_WORDS; i++) |
545 | input_bus[i] = 0xff; | 626 | input_bus[i] = 0xff; |
546 | 627 | ||
547 | /* Our counters count up, and "count" refers to | 628 | /* |
629 | * Our counters count up, and "count" refers to | ||
548 | * how much before the next interrupt, and we interrupt | 630 | * how much before the next interrupt, and we interrupt |
549 | * on overflow. So we calculate the starting value | 631 | * on overflow. So we calculate the starting value |
550 | * which will give us "count" until overflow. | 632 | * which will give us "count" until overflow. |
551 | * Then we set the events on the enabled counters. | 633 | * Then we set the events on the enabled counters. |
552 | */ | 634 | */ |
@@ -569,28 +651,27 @@ cell_reg_setup(struct op_counter_config *ctr, | |||
569 | for (i = 0; i < num_counters; ++i) { | 651 | for (i = 0; i < num_counters; ++i) { |
570 | per_cpu(pmc_values, cpu)[i] = reset_value[i]; | 652 | per_cpu(pmc_values, cpu)[i] = reset_value[i]; |
571 | } | 653 | } |
572 | out: | 654 | |
573 | ; | 655 | return 0; |
574 | } | 656 | } |
575 | 657 | ||
658 | |||
659 | |||
576 | /* This function is called once for each cpu */ | 660 | /* This function is called once for each cpu */ |
577 | static void cell_cpu_setup(struct op_counter_config *cntr) | 661 | static int cell_cpu_setup(struct op_counter_config *cntr) |
578 | { | 662 | { |
579 | u32 cpu = smp_processor_id(); | 663 | u32 cpu = smp_processor_id(); |
580 | u32 num_enabled = 0; | 664 | u32 num_enabled = 0; |
581 | int i; | 665 | int i; |
582 | 666 | ||
667 | if (spu_cycle_reset) | ||
668 | return 0; | ||
669 | |||
583 | /* There is one performance monitor per processor chip (i.e. node), | 670 | /* There is one performance monitor per processor chip (i.e. node), |
584 | * so we only need to perform this function once per node. | 671 | * so we only need to perform this function once per node. |
585 | */ | 672 | */ |
586 | if (cbe_get_hw_thread_id(cpu)) | 673 | if (cbe_get_hw_thread_id(cpu)) |
587 | goto out; | 674 | return 0; |
588 | |||
589 | if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) { | ||
590 | printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n", | ||
591 | __FUNCTION__); | ||
592 | goto out; | ||
593 | } | ||
594 | 675 | ||
595 | /* Stop all counters */ | 676 | /* Stop all counters */ |
596 | cbe_disable_pm(cpu); | 677 | cbe_disable_pm(cpu); |
@@ -609,16 +690,286 @@ static void cell_cpu_setup(struct op_counter_config *cntr) | |||
609 | } | 690 | } |
610 | } | 691 | } |
611 | 692 | ||
612 | pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled); | 693 | /* |
694 | * The pm_rtas_activate_signals will return -EIO if the FW | ||
695 | * call failed. | ||
696 | */ | ||
697 | return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled); | ||
698 | } | ||
699 | |||
700 | #define ENTRIES 303 | ||
701 | #define MAXLFSR 0xFFFFFF | ||
702 | |||
703 | /* precomputed table of 24 bit LFSR values */ | ||
704 | static int initial_lfsr[] = { | ||
705 | 8221349, 12579195, 5379618, 10097839, 7512963, 7519310, 3955098, 10753424, | ||
706 | 15507573, 7458917, 285419, 2641121, 9780088, 3915503, 6668768, 1548716, | ||
707 | 4885000, 8774424, 9650099, 2044357, 2304411, 9326253, 10332526, 4421547, | ||
708 | 3440748, 10179459, 13332843, 10375561, 1313462, 8375100, 5198480, 6071392, | ||
709 | 9341783, 1526887, 3985002, 1439429, 13923762, 7010104, 11969769, 4547026, | ||
710 | 2040072, 4025602, 3437678, 7939992, 11444177, 4496094, 9803157, 10745556, | ||
711 | 3671780, 4257846, 5662259, 13196905, 3237343, 12077182, 16222879, 7587769, | ||
712 | 14706824, 2184640, 12591135, 10420257, 7406075, 3648978, 11042541, 15906893, | ||
713 | 11914928, 4732944, 10695697, 12928164, 11980531, 4430912, 11939291, 2917017, | ||
714 | 6119256, 4172004, 9373765, 8410071, 14788383, 5047459, 5474428, 1737756, | ||
715 | 15967514, 13351758, 6691285, 8034329, 2856544, 14394753, 11310160, 12149558, | ||
716 | 7487528, 7542781, 15668898, 12525138, 12790975, 3707933, 9106617, 1965401, | ||
717 | 16219109, 12801644, 2443203, 4909502, 8762329, 3120803, 6360315, 9309720, | ||
718 | 15164599, 10844842, 4456529, 6667610, 14924259, 884312, 6234963, 3326042, | ||
719 | 15973422, 13919464, 5272099, 6414643, 3909029, 2764324, 5237926, 4774955, | ||
720 | 10445906, 4955302, 5203726, 10798229, 11443419, 2303395, 333836, 9646934, | ||
721 | 3464726, 4159182, 568492, 995747, 10318756, 13299332, 4836017, 8237783, | ||
722 | 3878992, 2581665, 11394667, 5672745, 14412947, 3159169, 9094251, 16467278, | ||
723 | 8671392, 15230076, 4843545, 7009238, 15504095, 1494895, 9627886, 14485051, | ||
724 | 8304291, 252817, 12421642, 16085736, 4774072, 2456177, 4160695, 15409741, | ||
725 | 4902868, 5793091, 13162925, 16039714, 782255, 11347835, 14884586, 366972, | ||
726 | 16308990, 11913488, 13390465, 2958444, 10340278, 1177858, 1319431, 10426302, | ||
727 | 2868597, 126119, 5784857, 5245324, 10903900, 16436004, 3389013, 1742384, | ||
728 | 14674502, 10279218, 8536112, 10364279, 6877778, 14051163, 1025130, 6072469, | ||
729 | 1988305, 8354440, 8216060, 16342977, 13112639, 3976679, 5913576, 8816697, | ||
730 | 6879995, 14043764, 3339515, 9364420, 15808858, 12261651, 2141560, 5636398, | ||
731 | 10345425, 10414756, 781725, 6155650, 4746914, 5078683, 7469001, 6799140, | ||
732 | 10156444, 9667150, 10116470, 4133858, 2121972, 1124204, 1003577, 1611214, | ||
733 | 14304602, 16221850, 13878465, 13577744, 3629235, 8772583, 10881308, 2410386, | ||
734 | 7300044, 5378855, 9301235, 12755149, 4977682, 8083074, 10327581, 6395087, | ||
735 | 9155434, 15501696, 7514362, 14520507, 15808945, 3244584, 4741962, 9658130, | ||
736 | 14336147, 8654727, 7969093, 15759799, 14029445, 5038459, 9894848, 8659300, | ||
737 | 13699287, 8834306, 10712885, 14753895, 10410465, 3373251, 309501, 9561475, | ||
738 | 5526688, 14647426, 14209836, 5339224, 207299, 14069911, 8722990, 2290950, | ||
739 | 3258216, 12505185, 6007317, 9218111, 14661019, 10537428, 11731949, 9027003, | ||
740 | 6641507, 9490160, 200241, 9720425, 16277895, 10816638, 1554761, 10431375, | ||
741 | 7467528, 6790302, 3429078, 14633753, 14428997, 11463204, 3576212, 2003426, | ||
742 | 6123687, 820520, 9992513, 15784513, 5778891, 6428165, 8388607 | ||
743 | }; | ||
744 | |||
745 | /* | ||
746 | * The hardware uses an LFSR counting sequence to determine when to capture | ||
747 | * the SPU PCs. An LFSR sequence is like a puesdo random number sequence | ||
748 | * where each number occurs once in the sequence but the sequence is not in | ||
749 | * numerical order. The SPU PC capture is done when the LFSR sequence reaches | ||
750 | * the last value in the sequence. Hence the user specified value N | ||
751 | * corresponds to the LFSR number that is N from the end of the sequence. | ||
752 | * | ||
753 | * To avoid the time to compute the LFSR, a lookup table is used. The 24 bit | ||
754 | * LFSR sequence is broken into four ranges. The spacing of the precomputed | ||
755 | * values is adjusted in each range so the error between the user specifed | ||
756 | * number (N) of events between samples and the actual number of events based | ||
757 | * on the precomputed value will be les then about 6.2%. Note, if the user | ||
758 | * specifies N < 2^16, the LFSR value that is 2^16 from the end will be used. | ||
759 | * This is to prevent the loss of samples because the trace buffer is full. | ||
760 | * | ||
761 | * User specified N Step between Index in | ||
762 | * precomputed values precomputed | ||
763 | * table | ||
764 | * 0 to 2^16-1 ---- 0 | ||
765 | * 2^16 to 2^16+2^19-1 2^12 1 to 128 | ||
766 | * 2^16+2^19 to 2^16+2^19+2^22-1 2^15 129 to 256 | ||
767 | * 2^16+2^19+2^22 to 2^24-1 2^18 257 to 302 | ||
768 | * | ||
769 | * | ||
770 | * For example, the LFSR values in the second range are computed for 2^16, | ||
771 | * 2^16+2^12, ... , 2^19-2^16, 2^19 and stored in the table at indicies | ||
772 | * 1, 2,..., 127, 128. | ||
773 | * | ||
774 | * The 24 bit LFSR value for the nth number in the sequence can be | ||
775 | * calculated using the following code: | ||
776 | * | ||
777 | * #define size 24 | ||
778 | * int calculate_lfsr(int n) | ||
779 | * { | ||
780 | * int i; | ||
781 | * unsigned int newlfsr0; | ||
782 | * unsigned int lfsr = 0xFFFFFF; | ||
783 | * unsigned int howmany = n; | ||
784 | * | ||
785 | * for (i = 2; i < howmany + 2; i++) { | ||
786 | * newlfsr0 = (((lfsr >> (size - 1 - 0)) & 1) ^ | ||
787 | * ((lfsr >> (size - 1 - 1)) & 1) ^ | ||
788 | * (((lfsr >> (size - 1 - 6)) & 1) ^ | ||
789 | * ((lfsr >> (size - 1 - 23)) & 1))); | ||
790 | * | ||
791 | * lfsr >>= 1; | ||
792 | * lfsr = lfsr | (newlfsr0 << (size - 1)); | ||
793 | * } | ||
794 | * return lfsr; | ||
795 | * } | ||
796 | */ | ||
797 | |||
798 | #define V2_16 (0x1 << 16) | ||
799 | #define V2_19 (0x1 << 19) | ||
800 | #define V2_22 (0x1 << 22) | ||
801 | |||
802 | static int calculate_lfsr(int n) | ||
803 | { | ||
804 | /* | ||
805 | * The ranges and steps are in powers of 2 so the calculations | ||
806 | * can be done using shifts rather then divide. | ||
807 | */ | ||
808 | int index; | ||
809 | |||
810 | if ((n >> 16) == 0) | ||
811 | index = 0; | ||
812 | else if (((n - V2_16) >> 19) == 0) | ||
813 | index = ((n - V2_16) >> 12) + 1; | ||
814 | else if (((n - V2_16 - V2_19) >> 22) == 0) | ||
815 | index = ((n - V2_16 - V2_19) >> 15 ) + 1 + 128; | ||
816 | else if (((n - V2_16 - V2_19 - V2_22) >> 24) == 0) | ||
817 | index = ((n - V2_16 - V2_19 - V2_22) >> 18 ) + 1 + 256; | ||
818 | else | ||
819 | index = ENTRIES-1; | ||
820 | |||
821 | /* make sure index is valid */ | ||
822 | if ((index > ENTRIES) || (index < 0)) | ||
823 | index = ENTRIES-1; | ||
824 | |||
825 | return initial_lfsr[index]; | ||
826 | } | ||
827 | |||
828 | static int pm_rtas_activate_spu_profiling(u32 node) | ||
829 | { | ||
830 | int ret, i; | ||
831 | struct pm_signal pm_signal_local[NR_PHYS_CTRS]; | ||
832 | |||
833 | /* | ||
834 | * Set up the rtas call to configure the debug bus to | ||
835 | * route the SPU PCs. Setup the pm_signal for each SPU | ||
836 | */ | ||
837 | for (i = 0; i < NUM_SPUS_PER_NODE; i++) { | ||
838 | pm_signal_local[i].cpu = node; | ||
839 | pm_signal_local[i].signal_group = 41; | ||
840 | /* spu i on word (i/2) */ | ||
841 | pm_signal_local[i].bus_word = 1 << i / 2; | ||
842 | /* spu i */ | ||
843 | pm_signal_local[i].sub_unit = i; | ||
844 | pm_signal_local[i].bit = 63; | ||
845 | } | ||
846 | |||
847 | ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE, | ||
848 | PASSTHRU_ENABLE, pm_signal_local, | ||
849 | (NUM_SPUS_PER_NODE | ||
850 | * sizeof(struct pm_signal))); | ||
851 | |||
852 | if (unlikely(ret)) { | ||
853 | printk(KERN_WARNING "%s: rtas returned: %d\n", | ||
854 | __FUNCTION__, ret); | ||
855 | return -EIO; | ||
856 | } | ||
857 | |||
858 | return 0; | ||
859 | } | ||
860 | |||
861 | #ifdef CONFIG_CPU_FREQ | ||
862 | static int | ||
863 | oprof_cpufreq_notify(struct notifier_block *nb, unsigned long val, void *data) | ||
864 | { | ||
865 | int ret = 0; | ||
866 | struct cpufreq_freqs *frq = data; | ||
867 | if ((val == CPUFREQ_PRECHANGE && frq->old < frq->new) || | ||
868 | (val == CPUFREQ_POSTCHANGE && frq->old > frq->new) || | ||
869 | (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) | ||
870 | set_spu_profiling_frequency(frq->new, spu_cycle_reset); | ||
871 | return ret; | ||
872 | } | ||
873 | |||
874 | static struct notifier_block cpu_freq_notifier_block = { | ||
875 | .notifier_call = oprof_cpufreq_notify | ||
876 | }; | ||
877 | #endif | ||
878 | |||
879 | static int cell_global_start_spu(struct op_counter_config *ctr) | ||
880 | { | ||
881 | int subfunc; | ||
882 | unsigned int lfsr_value; | ||
883 | int cpu; | ||
884 | int ret; | ||
885 | int rtas_error; | ||
886 | unsigned int cpu_khzfreq = 0; | ||
887 | |||
888 | /* The SPU profiling uses time-based profiling based on | ||
889 | * cpu frequency, so if configured with the CPU_FREQ | ||
890 | * option, we should detect frequency changes and react | ||
891 | * accordingly. | ||
892 | */ | ||
893 | #ifdef CONFIG_CPU_FREQ | ||
894 | ret = cpufreq_register_notifier(&cpu_freq_notifier_block, | ||
895 | CPUFREQ_TRANSITION_NOTIFIER); | ||
896 | if (ret < 0) | ||
897 | /* this is not a fatal error */ | ||
898 | printk(KERN_ERR "CPU freq change registration failed: %d\n", | ||
899 | ret); | ||
900 | |||
901 | else | ||
902 | cpu_khzfreq = cpufreq_quick_get(smp_processor_id()); | ||
903 | #endif | ||
904 | |||
905 | set_spu_profiling_frequency(cpu_khzfreq, spu_cycle_reset); | ||
906 | |||
907 | for_each_online_cpu(cpu) { | ||
908 | if (cbe_get_hw_thread_id(cpu)) | ||
909 | continue; | ||
910 | |||
911 | /* | ||
912 | * Setup SPU cycle-based profiling. | ||
913 | * Set perf_mon_control bit 0 to a zero before | ||
914 | * enabling spu collection hardware. | ||
915 | */ | ||
916 | cbe_write_pm(cpu, pm_control, 0); | ||
917 | |||
918 | if (spu_cycle_reset > MAX_SPU_COUNT) | ||
919 | /* use largest possible value */ | ||
920 | lfsr_value = calculate_lfsr(MAX_SPU_COUNT-1); | ||
921 | else | ||
922 | lfsr_value = calculate_lfsr(spu_cycle_reset); | ||
923 | |||
924 | /* must use a non zero value. Zero disables data collection. */ | ||
925 | if (lfsr_value == 0) | ||
926 | lfsr_value = calculate_lfsr(1); | ||
927 | |||
928 | lfsr_value = lfsr_value << 8; /* shift lfsr to correct | ||
929 | * register location | ||
930 | */ | ||
931 | |||
932 | /* debug bus setup */ | ||
933 | ret = pm_rtas_activate_spu_profiling(cbe_cpu_to_node(cpu)); | ||
934 | |||
935 | if (unlikely(ret)) { | ||
936 | rtas_error = ret; | ||
937 | goto out; | ||
938 | } | ||
939 | |||
940 | |||
941 | subfunc = 2; /* 2 - activate SPU tracing, 3 - deactivate */ | ||
942 | |||
943 | /* start profiling */ | ||
944 | ret = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc, | ||
945 | cbe_cpu_to_node(cpu), lfsr_value); | ||
946 | |||
947 | if (unlikely(ret != 0)) { | ||
948 | printk(KERN_ERR | ||
949 | "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n", | ||
950 | __FUNCTION__, ret); | ||
951 | rtas_error = -EIO; | ||
952 | goto out; | ||
953 | } | ||
954 | } | ||
955 | |||
956 | rtas_error = start_spu_profiling(spu_cycle_reset); | ||
957 | if (rtas_error) | ||
958 | goto out_stop; | ||
959 | |||
960 | oprofile_running = 1; | ||
961 | return 0; | ||
962 | |||
963 | out_stop: | ||
964 | cell_global_stop_spu(); /* clean up the PMU/debug bus */ | ||
613 | out: | 965 | out: |
614 | ; | 966 | return rtas_error; |
615 | } | 967 | } |
616 | 968 | ||
617 | static void cell_global_start(struct op_counter_config *ctr) | 969 | static int cell_global_start_ppu(struct op_counter_config *ctr) |
618 | { | 970 | { |
619 | u32 cpu; | 971 | u32 cpu, i; |
620 | u32 interrupt_mask = 0; | 972 | u32 interrupt_mask = 0; |
621 | u32 i; | ||
622 | 973 | ||
623 | /* This routine gets called once for the system. | 974 | /* This routine gets called once for the system. |
624 | * There is one performance monitor per node, so we | 975 | * There is one performance monitor per node, so we |
@@ -651,19 +1002,79 @@ static void cell_global_start(struct op_counter_config *ctr) | |||
651 | oprofile_running = 1; | 1002 | oprofile_running = 1; |
652 | smp_wmb(); | 1003 | smp_wmb(); |
653 | 1004 | ||
654 | /* NOTE: start_virt_cntrs will result in cell_virtual_cntr() being | 1005 | /* |
655 | * executed which manipulates the PMU. We start the "virtual counter" | 1006 | * NOTE: start_virt_cntrs will result in cell_virtual_cntr() being |
1007 | * executed which manipulates the PMU. We start the "virtual counter" | ||
656 | * here so that we do not need to synchronize access to the PMU in | 1008 | * here so that we do not need to synchronize access to the PMU in |
657 | * the above for-loop. | 1009 | * the above for-loop. |
658 | */ | 1010 | */ |
659 | start_virt_cntrs(); | 1011 | start_virt_cntrs(); |
1012 | |||
1013 | return 0; | ||
660 | } | 1014 | } |
661 | 1015 | ||
662 | static void cell_global_stop(void) | 1016 | static int cell_global_start(struct op_counter_config *ctr) |
1017 | { | ||
1018 | if (spu_cycle_reset) | ||
1019 | return cell_global_start_spu(ctr); | ||
1020 | else | ||
1021 | return cell_global_start_ppu(ctr); | ||
1022 | } | ||
1023 | |||
1024 | /* | ||
1025 | * Note the generic OProfile stop calls do not support returning | ||
1026 | * an error on stop. Hence, will not return an error if the FW | ||
1027 | * calls fail on stop. Failure to reset the debug bus is not an issue. | ||
1028 | * Failure to disable the SPU profiling is not an issue. The FW calls | ||
1029 | * to enable the performance counters and debug bus will work even if | ||
1030 | * the hardware was not cleanly reset. | ||
1031 | */ | ||
1032 | static void cell_global_stop_spu(void) | ||
1033 | { | ||
1034 | int subfunc, rtn_value; | ||
1035 | unsigned int lfsr_value; | ||
1036 | int cpu; | ||
1037 | |||
1038 | oprofile_running = 0; | ||
1039 | |||
1040 | #ifdef CONFIG_CPU_FREQ | ||
1041 | cpufreq_unregister_notifier(&cpu_freq_notifier_block, | ||
1042 | CPUFREQ_TRANSITION_NOTIFIER); | ||
1043 | #endif | ||
1044 | |||
1045 | for_each_online_cpu(cpu) { | ||
1046 | if (cbe_get_hw_thread_id(cpu)) | ||
1047 | continue; | ||
1048 | |||
1049 | subfunc = 3; /* | ||
1050 | * 2 - activate SPU tracing, | ||
1051 | * 3 - deactivate | ||
1052 | */ | ||
1053 | lfsr_value = 0x8f100000; | ||
1054 | |||
1055 | rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL, | ||
1056 | subfunc, cbe_cpu_to_node(cpu), | ||
1057 | lfsr_value); | ||
1058 | |||
1059 | if (unlikely(rtn_value != 0)) { | ||
1060 | printk(KERN_ERR | ||
1061 | "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n", | ||
1062 | __FUNCTION__, rtn_value); | ||
1063 | } | ||
1064 | |||
1065 | /* Deactivate the signals */ | ||
1066 | pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); | ||
1067 | } | ||
1068 | |||
1069 | stop_spu_profiling(); | ||
1070 | } | ||
1071 | |||
1072 | static void cell_global_stop_ppu(void) | ||
663 | { | 1073 | { |
664 | int cpu; | 1074 | int cpu; |
665 | 1075 | ||
666 | /* This routine will be called once for the system. | 1076 | /* |
1077 | * This routine will be called once for the system. | ||
667 | * There is one performance monitor per node, so we | 1078 | * There is one performance monitor per node, so we |
668 | * only need to perform this function once per node. | 1079 | * only need to perform this function once per node. |
669 | */ | 1080 | */ |
@@ -687,8 +1098,16 @@ static void cell_global_stop(void) | |||
687 | } | 1098 | } |
688 | } | 1099 | } |
689 | 1100 | ||
690 | static void | 1101 | static void cell_global_stop(void) |
691 | cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) | 1102 | { |
1103 | if (spu_cycle_reset) | ||
1104 | cell_global_stop_spu(); | ||
1105 | else | ||
1106 | cell_global_stop_ppu(); | ||
1107 | } | ||
1108 | |||
1109 | static void cell_handle_interrupt(struct pt_regs *regs, | ||
1110 | struct op_counter_config *ctr) | ||
692 | { | 1111 | { |
693 | u32 cpu; | 1112 | u32 cpu; |
694 | u64 pc; | 1113 | u64 pc; |
@@ -699,13 +1118,15 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) | |||
699 | 1118 | ||
700 | cpu = smp_processor_id(); | 1119 | cpu = smp_processor_id(); |
701 | 1120 | ||
702 | /* Need to make sure the interrupt handler and the virt counter | 1121 | /* |
1122 | * Need to make sure the interrupt handler and the virt counter | ||
703 | * routine are not running at the same time. See the | 1123 | * routine are not running at the same time. See the |
704 | * cell_virtual_cntr() routine for additional comments. | 1124 | * cell_virtual_cntr() routine for additional comments. |
705 | */ | 1125 | */ |
706 | spin_lock_irqsave(&virt_cntr_lock, flags); | 1126 | spin_lock_irqsave(&virt_cntr_lock, flags); |
707 | 1127 | ||
708 | /* Need to disable and reenable the performance counters | 1128 | /* |
1129 | * Need to disable and reenable the performance counters | ||
709 | * to get the desired behavior from the hardware. This | 1130 | * to get the desired behavior from the hardware. This |
710 | * is hardware specific. | 1131 | * is hardware specific. |
711 | */ | 1132 | */ |
@@ -714,7 +1135,8 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) | |||
714 | 1135 | ||
715 | interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu); | 1136 | interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu); |
716 | 1137 | ||
717 | /* If the interrupt mask has been cleared, then the virt cntr | 1138 | /* |
1139 | * If the interrupt mask has been cleared, then the virt cntr | ||
718 | * has cleared the interrupt. When the thread that generated | 1140 | * has cleared the interrupt. When the thread that generated |
719 | * the interrupt is restored, the data count will be restored to | 1141 | * the interrupt is restored, the data count will be restored to |
720 | * 0xffffff0 to cause the interrupt to be regenerated. | 1142 | * 0xffffff0 to cause the interrupt to be regenerated. |
@@ -732,18 +1154,20 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) | |||
732 | } | 1154 | } |
733 | } | 1155 | } |
734 | 1156 | ||
735 | /* The counters were frozen by the interrupt. | 1157 | /* |
1158 | * The counters were frozen by the interrupt. | ||
736 | * Reenable the interrupt and restart the counters. | 1159 | * Reenable the interrupt and restart the counters. |
737 | * If there was a race between the interrupt handler and | 1160 | * If there was a race between the interrupt handler and |
738 | * the virtual counter routine. The virutal counter | 1161 | * the virtual counter routine. The virutal counter |
739 | * routine may have cleared the interrupts. Hence must | 1162 | * routine may have cleared the interrupts. Hence must |
740 | * use the virt_cntr_inter_mask to re-enable the interrupts. | 1163 | * use the virt_cntr_inter_mask to re-enable the interrupts. |
741 | */ | 1164 | */ |
742 | cbe_enable_pm_interrupts(cpu, hdw_thread, | 1165 | cbe_enable_pm_interrupts(cpu, hdw_thread, |
743 | virt_cntr_inter_mask); | 1166 | virt_cntr_inter_mask); |
744 | 1167 | ||
745 | /* The writes to the various performance counters only writes | 1168 | /* |
746 | * to a latch. The new values (interrupt setting bits, reset | 1169 | * The writes to the various performance counters only writes |
1170 | * to a latch. The new values (interrupt setting bits, reset | ||
747 | * counter value etc.) are not copied to the actual registers | 1171 | * counter value etc.) are not copied to the actual registers |
748 | * until the performance monitor is enabled. In order to get | 1172 | * until the performance monitor is enabled. In order to get |
749 | * this to work as desired, the permormance monitor needs to | 1173 | * this to work as desired, the permormance monitor needs to |
@@ -755,10 +1179,33 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) | |||
755 | spin_unlock_irqrestore(&virt_cntr_lock, flags); | 1179 | spin_unlock_irqrestore(&virt_cntr_lock, flags); |
756 | } | 1180 | } |
757 | 1181 | ||
1182 | /* | ||
1183 | * This function is called from the generic OProfile | ||
1184 | * driver. When profiling PPUs, we need to do the | ||
1185 | * generic sync start; otherwise, do spu_sync_start. | ||
1186 | */ | ||
1187 | static int cell_sync_start(void) | ||
1188 | { | ||
1189 | if (spu_cycle_reset) | ||
1190 | return spu_sync_start(); | ||
1191 | else | ||
1192 | return DO_GENERIC_SYNC; | ||
1193 | } | ||
1194 | |||
1195 | static int cell_sync_stop(void) | ||
1196 | { | ||
1197 | if (spu_cycle_reset) | ||
1198 | return spu_sync_stop(); | ||
1199 | else | ||
1200 | return 1; | ||
1201 | } | ||
1202 | |||
758 | struct op_powerpc_model op_model_cell = { | 1203 | struct op_powerpc_model op_model_cell = { |
759 | .reg_setup = cell_reg_setup, | 1204 | .reg_setup = cell_reg_setup, |
760 | .cpu_setup = cell_cpu_setup, | 1205 | .cpu_setup = cell_cpu_setup, |
761 | .global_start = cell_global_start, | 1206 | .global_start = cell_global_start, |
762 | .global_stop = cell_global_stop, | 1207 | .global_stop = cell_global_stop, |
1208 | .sync_start = cell_sync_start, | ||
1209 | .sync_stop = cell_sync_stop, | ||
763 | .handle_interrupt = cell_handle_interrupt, | 1210 | .handle_interrupt = cell_handle_interrupt, |
764 | }; | 1211 | }; |