diff options
Diffstat (limited to 'arch/powerpc/oprofile/op_model_cell.c')
-rw-r--r-- | arch/powerpc/oprofile/op_model_cell.c | 149 |
1 files changed, 94 insertions, 55 deletions
diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c index 2eb15f388103..e08e1d7b3dc5 100644 --- a/arch/powerpc/oprofile/op_model_cell.c +++ b/arch/powerpc/oprofile/op_model_cell.c | |||
@@ -39,10 +39,17 @@ | |||
39 | #include "../platforms/cell/interrupt.h" | 39 | #include "../platforms/cell/interrupt.h" |
40 | 40 | ||
41 | #define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */ | 41 | #define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */ |
42 | #define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying | ||
43 | * PPU_CYCLES event | ||
44 | */ | ||
42 | #define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */ | 45 | #define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */ |
43 | 46 | ||
44 | #define NUM_THREADS 2 | 47 | #define NUM_THREADS 2 /* number of physical threads in |
45 | #define VIRT_CNTR_SW_TIME_NS 100000000 // 0.5 seconds | 48 | * physical processor |
49 | */ | ||
50 | #define NUM_TRACE_BUS_WORDS 4 | ||
51 | #define NUM_INPUT_BUS_WORDS 2 | ||
52 | |||
46 | 53 | ||
47 | struct pmc_cntrl_data { | 54 | struct pmc_cntrl_data { |
48 | unsigned long vcntr; | 55 | unsigned long vcntr; |
@@ -58,7 +65,7 @@ struct pmc_cntrl_data { | |||
58 | struct pm_signal { | 65 | struct pm_signal { |
59 | u16 cpu; /* Processor to modify */ | 66 | u16 cpu; /* Processor to modify */ |
60 | u16 sub_unit; /* hw subunit this applies to (if applicable) */ | 67 | u16 sub_unit; /* hw subunit this applies to (if applicable) */ |
61 | u16 signal_group; /* Signal Group to Enable/Disable */ | 68 | short int signal_group; /* Signal Group to Enable/Disable */ |
62 | u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event | 69 | u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event |
63 | * Bus Word(s) (bitmask) | 70 | * Bus Word(s) (bitmask) |
64 | */ | 71 | */ |
@@ -93,7 +100,6 @@ static struct { | |||
93 | u32 pm07_cntrl[NR_PHYS_CTRS]; | 100 | u32 pm07_cntrl[NR_PHYS_CTRS]; |
94 | } pm_regs; | 101 | } pm_regs; |
95 | 102 | ||
96 | |||
97 | #define GET_SUB_UNIT(x) ((x & 0x0000f000) >> 12) | 103 | #define GET_SUB_UNIT(x) ((x & 0x0000f000) >> 12) |
98 | #define GET_BUS_WORD(x) ((x & 0x000000f0) >> 4) | 104 | #define GET_BUS_WORD(x) ((x & 0x000000f0) >> 4) |
99 | #define GET_BUS_TYPE(x) ((x & 0x00000300) >> 8) | 105 | #define GET_BUS_TYPE(x) ((x & 0x00000300) >> 8) |
@@ -101,7 +107,6 @@ static struct { | |||
101 | #define GET_COUNT_CYCLES(x) (x & 0x00000001) | 107 | #define GET_COUNT_CYCLES(x) (x & 0x00000001) |
102 | #define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2) | 108 | #define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2) |
103 | 109 | ||
104 | |||
105 | static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values); | 110 | static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values); |
106 | 111 | ||
107 | static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; | 112 | static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; |
@@ -129,8 +134,8 @@ static spinlock_t virt_cntr_lock = SPIN_LOCK_UNLOCKED; | |||
129 | 134 | ||
130 | static u32 ctr_enabled; | 135 | static u32 ctr_enabled; |
131 | 136 | ||
132 | static unsigned char trace_bus[4]; | 137 | static unsigned char trace_bus[NUM_TRACE_BUS_WORDS]; |
133 | static unsigned char input_bus[2]; | 138 | static unsigned char input_bus[NUM_INPUT_BUS_WORDS]; |
134 | 139 | ||
135 | /* | 140 | /* |
136 | * Firmware interface functions | 141 | * Firmware interface functions |
@@ -177,25 +182,40 @@ static void pm_rtas_reset_signals(u32 node) | |||
177 | static void pm_rtas_activate_signals(u32 node, u32 count) | 182 | static void pm_rtas_activate_signals(u32 node, u32 count) |
178 | { | 183 | { |
179 | int ret; | 184 | int ret; |
180 | int j; | 185 | int i, j; |
181 | struct pm_signal pm_signal_local[NR_PHYS_CTRS]; | 186 | struct pm_signal pm_signal_local[NR_PHYS_CTRS]; |
182 | 187 | ||
188 | /* There is no debug setup required for the cycles event. | ||
189 | * Note that only events in the same group can be used. | ||
190 | * Otherwise, there will be conflicts in correctly routing | ||
191 | * the signals on the debug bus. It is the responsiblity | ||
192 | * of the OProfile user tool to check the events are in | ||
193 | * the same group. | ||
194 | */ | ||
195 | i = 0; | ||
183 | for (j = 0; j < count; j++) { | 196 | for (j = 0; j < count; j++) { |
184 | /* fw expects physical cpu # */ | 197 | if (pm_signal[j].signal_group != PPU_CYCLES_GRP_NUM) { |
185 | pm_signal_local[j].cpu = node; | 198 | |
186 | pm_signal_local[j].signal_group = pm_signal[j].signal_group; | 199 | /* fw expects physical cpu # */ |
187 | pm_signal_local[j].bus_word = pm_signal[j].bus_word; | 200 | pm_signal_local[i].cpu = node; |
188 | pm_signal_local[j].sub_unit = pm_signal[j].sub_unit; | 201 | pm_signal_local[i].signal_group |
189 | pm_signal_local[j].bit = pm_signal[j].bit; | 202 | = pm_signal[j].signal_group; |
203 | pm_signal_local[i].bus_word = pm_signal[j].bus_word; | ||
204 | pm_signal_local[i].sub_unit = pm_signal[j].sub_unit; | ||
205 | pm_signal_local[i].bit = pm_signal[j].bit; | ||
206 | i++; | ||
207 | } | ||
190 | } | 208 | } |
191 | 209 | ||
192 | ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE, PASSTHRU_ENABLE, | 210 | if (i != 0) { |
193 | pm_signal_local, | 211 | ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE, PASSTHRU_ENABLE, |
194 | count * sizeof(struct pm_signal)); | 212 | pm_signal_local, |
213 | i * sizeof(struct pm_signal)); | ||
195 | 214 | ||
196 | if (ret) | 215 | if (ret) |
197 | printk(KERN_WARNING "%s: rtas returned: %d\n", | 216 | printk(KERN_WARNING "%s: rtas returned: %d\n", |
198 | __FUNCTION__, ret); | 217 | __FUNCTION__, ret); |
218 | } | ||
199 | } | 219 | } |
200 | 220 | ||
201 | /* | 221 | /* |
@@ -212,7 +232,7 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask) | |||
212 | /* Special Event: Count all cpu cycles */ | 232 | /* Special Event: Count all cpu cycles */ |
213 | pm_regs.pm07_cntrl[ctr] = CBE_COUNT_ALL_CYCLES; | 233 | pm_regs.pm07_cntrl[ctr] = CBE_COUNT_ALL_CYCLES; |
214 | p = &(pm_signal[ctr]); | 234 | p = &(pm_signal[ctr]); |
215 | p->signal_group = 21; | 235 | p->signal_group = PPU_CYCLES_GRP_NUM; |
216 | p->bus_word = 1; | 236 | p->bus_word = 1; |
217 | p->sub_unit = 0; | 237 | p->sub_unit = 0; |
218 | p->bit = 0; | 238 | p->bit = 0; |
@@ -232,13 +252,21 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask) | |||
232 | 252 | ||
233 | p->signal_group = event / 100; | 253 | p->signal_group = event / 100; |
234 | p->bus_word = bus_word; | 254 | p->bus_word = bus_word; |
235 | p->sub_unit = unit_mask & 0x0000f000; | 255 | p->sub_unit = (unit_mask & 0x0000f000) >> 12; |
236 | 256 | ||
237 | pm_regs.pm07_cntrl[ctr] = 0; | 257 | pm_regs.pm07_cntrl[ctr] = 0; |
238 | pm_regs.pm07_cntrl[ctr] |= PM07_CTR_COUNT_CYCLES(count_cycles); | 258 | pm_regs.pm07_cntrl[ctr] |= PM07_CTR_COUNT_CYCLES(count_cycles); |
239 | pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity); | 259 | pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity); |
240 | pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control); | 260 | pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control); |
241 | 261 | ||
262 | /* Some of the islands signal selection is based on 64 bit words. | ||
263 | * The debug bus words are 32 bits, the input words to the performance | ||
264 | * counters are defined as 32 bits. Need to convert the 64 bit island | ||
265 | * specification to the appropriate 32 input bit and bus word for the | ||
266 | * performance counter event selection. See the CELL Performance | ||
267 | * monitoring signals manual and the Perf cntr hardware descriptions | ||
268 | * for the details. | ||
269 | */ | ||
242 | if (input_control == 0) { | 270 | if (input_control == 0) { |
243 | if (signal_bit > 31) { | 271 | if (signal_bit > 31) { |
244 | signal_bit -= 32; | 272 | signal_bit -= 32; |
@@ -259,12 +287,12 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask) | |||
259 | p->bit = signal_bit; | 287 | p->bit = signal_bit; |
260 | } | 288 | } |
261 | 289 | ||
262 | for (i = 0; i < 4; i++) { | 290 | for (i = 0; i < NUM_TRACE_BUS_WORDS; i++) { |
263 | if (bus_word & (1 << i)) { | 291 | if (bus_word & (1 << i)) { |
264 | pm_regs.debug_bus_control |= | 292 | pm_regs.debug_bus_control |= |
265 | (bus_type << (31 - (2 * i) + 1)); | 293 | (bus_type << (31 - (2 * i) + 1)); |
266 | 294 | ||
267 | for (j = 0; j < 2; j++) { | 295 | for (j = 0; j < NUM_INPUT_BUS_WORDS; j++) { |
268 | if (input_bus[j] == 0xff) { | 296 | if (input_bus[j] == 0xff) { |
269 | input_bus[j] = i; | 297 | input_bus[j] = i; |
270 | pm_regs.group_control |= | 298 | pm_regs.group_control |= |
@@ -278,52 +306,58 @@ out: | |||
278 | ; | 306 | ; |
279 | } | 307 | } |
280 | 308 | ||
281 | static void write_pm_cntrl(int cpu, struct pm_cntrl *pm_cntrl) | 309 | static void write_pm_cntrl(int cpu) |
282 | { | 310 | { |
283 | /* Oprofile will use 32 bit counters, set bits 7:10 to 0 */ | 311 | /* Oprofile will use 32 bit counters, set bits 7:10 to 0 |
312 | * pmregs.pm_cntrl is a global | ||
313 | */ | ||
314 | |||
284 | u32 val = 0; | 315 | u32 val = 0; |
285 | if (pm_cntrl->enable == 1) | 316 | if (pm_regs.pm_cntrl.enable == 1) |
286 | val |= CBE_PM_ENABLE_PERF_MON; | 317 | val |= CBE_PM_ENABLE_PERF_MON; |
287 | 318 | ||
288 | if (pm_cntrl->stop_at_max == 1) | 319 | if (pm_regs.pm_cntrl.stop_at_max == 1) |
289 | val |= CBE_PM_STOP_AT_MAX; | 320 | val |= CBE_PM_STOP_AT_MAX; |
290 | 321 | ||
291 | if (pm_cntrl->trace_mode == 1) | 322 | if (pm_regs.pm_cntrl.trace_mode == 1) |
292 | val |= CBE_PM_TRACE_MODE_SET(pm_cntrl->trace_mode); | 323 | val |= CBE_PM_TRACE_MODE_SET(pm_regs.pm_cntrl.trace_mode); |
293 | 324 | ||
294 | if (pm_cntrl->freeze == 1) | 325 | if (pm_regs.pm_cntrl.freeze == 1) |
295 | val |= CBE_PM_FREEZE_ALL_CTRS; | 326 | val |= CBE_PM_FREEZE_ALL_CTRS; |
296 | 327 | ||
297 | /* Routine set_count_mode must be called previously to set | 328 | /* Routine set_count_mode must be called previously to set |
298 | * the count mode based on the user selection of user and kernel. | 329 | * the count mode based on the user selection of user and kernel. |
299 | */ | 330 | */ |
300 | val |= CBE_PM_COUNT_MODE_SET(pm_cntrl->count_mode); | 331 | val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode); |
301 | cbe_write_pm(cpu, pm_control, val); | 332 | cbe_write_pm(cpu, pm_control, val); |
302 | } | 333 | } |
303 | 334 | ||
304 | static inline void | 335 | static inline void |
305 | set_count_mode(u32 kernel, u32 user, struct pm_cntrl *pm_cntrl) | 336 | set_count_mode(u32 kernel, u32 user) |
306 | { | 337 | { |
307 | /* The user must specify user and kernel if they want them. If | 338 | /* The user must specify user and kernel if they want them. If |
308 | * neither is specified, OProfile will count in hypervisor mode | 339 | * neither is specified, OProfile will count in hypervisor mode. |
340 | * pm_regs.pm_cntrl is a global | ||
309 | */ | 341 | */ |
310 | if (kernel) { | 342 | if (kernel) { |
311 | if (user) | 343 | if (user) |
312 | pm_cntrl->count_mode = CBE_COUNT_ALL_MODES; | 344 | pm_regs.pm_cntrl.count_mode = CBE_COUNT_ALL_MODES; |
313 | else | 345 | else |
314 | pm_cntrl->count_mode = CBE_COUNT_SUPERVISOR_MODE; | 346 | pm_regs.pm_cntrl.count_mode = |
347 | CBE_COUNT_SUPERVISOR_MODE; | ||
315 | } else { | 348 | } else { |
316 | if (user) | 349 | if (user) |
317 | pm_cntrl->count_mode = CBE_COUNT_PROBLEM_MODE; | 350 | pm_regs.pm_cntrl.count_mode = CBE_COUNT_PROBLEM_MODE; |
318 | else | 351 | else |
319 | pm_cntrl->count_mode = CBE_COUNT_HYPERVISOR_MODE; | 352 | pm_regs.pm_cntrl.count_mode = |
353 | CBE_COUNT_HYPERVISOR_MODE; | ||
320 | } | 354 | } |
321 | } | 355 | } |
322 | 356 | ||
323 | static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl) | 357 | static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl) |
324 | { | 358 | { |
325 | 359 | ||
326 | pm07_cntrl[ctr] |= PM07_CTR_ENABLE(1); | 360 | pm07_cntrl[ctr] |= CBE_PM_CTR_ENABLE; |
327 | cbe_write_pm07_control(cpu, ctr, pm07_cntrl[ctr]); | 361 | cbe_write_pm07_control(cpu, ctr, pm07_cntrl[ctr]); |
328 | } | 362 | } |
329 | 363 | ||
@@ -365,6 +399,14 @@ static void cell_virtual_cntr(unsigned long data) | |||
365 | hdw_thread = 1 ^ hdw_thread; | 399 | hdw_thread = 1 ^ hdw_thread; |
366 | next_hdw_thread = hdw_thread; | 400 | next_hdw_thread = hdw_thread; |
367 | 401 | ||
402 | for (i = 0; i < num_counters; i++) | ||
403 | /* There are some per thread events. Must do the | ||
404 | * set event, for the thread that is being started | ||
405 | */ | ||
406 | set_pm_event(i, | ||
407 | pmc_cntrl[next_hdw_thread][i].evnts, | ||
408 | pmc_cntrl[next_hdw_thread][i].masks); | ||
409 | |||
368 | /* The following is done only once per each node, but | 410 | /* The following is done only once per each node, but |
369 | * we need cpu #, not node #, to pass to the cbe_xxx functions. | 411 | * we need cpu #, not node #, to pass to the cbe_xxx functions. |
370 | */ | 412 | */ |
@@ -385,12 +427,13 @@ static void cell_virtual_cntr(unsigned long data) | |||
385 | == 0xFFFFFFFF) | 427 | == 0xFFFFFFFF) |
386 | /* If the cntr value is 0xffffffff, we must | 428 | /* If the cntr value is 0xffffffff, we must |
387 | * reset that to 0xfffffff0 when the current | 429 | * reset that to 0xfffffff0 when the current |
388 | * thread is restarted. This will generate a new | 430 | * thread is restarted. This will generate a |
389 | * interrupt and make sure that we never restore | 431 | * new interrupt and make sure that we never |
390 | * the counters to the max value. If the counters | 432 | * restore the counters to the max value. If |
391 | * were restored to the max value, they do not | 433 | * the counters were restored to the max value, |
392 | * increment and no interrupts are generated. Hence | 434 | * they do not increment and no interrupts are |
393 | * no more samples will be collected on that cpu. | 435 | * generated. Hence no more samples will be |
436 | * collected on that cpu. | ||
394 | */ | 437 | */ |
395 | cbe_write_ctr(cpu, i, 0xFFFFFFF0); | 438 | cbe_write_ctr(cpu, i, 0xFFFFFFF0); |
396 | else | 439 | else |
@@ -410,9 +453,6 @@ static void cell_virtual_cntr(unsigned long data) | |||
410 | * Must do the set event, enable_cntr | 453 | * Must do the set event, enable_cntr |
411 | * for each cpu. | 454 | * for each cpu. |
412 | */ | 455 | */ |
413 | set_pm_event(i, | ||
414 | pmc_cntrl[next_hdw_thread][i].evnts, | ||
415 | pmc_cntrl[next_hdw_thread][i].masks); | ||
416 | enable_ctr(cpu, i, | 456 | enable_ctr(cpu, i, |
417 | pm_regs.pm07_cntrl); | 457 | pm_regs.pm07_cntrl); |
418 | } else { | 458 | } else { |
@@ -465,8 +505,7 @@ cell_reg_setup(struct op_counter_config *ctr, | |||
465 | pm_regs.pm_cntrl.trace_mode = 0; | 505 | pm_regs.pm_cntrl.trace_mode = 0; |
466 | pm_regs.pm_cntrl.freeze = 1; | 506 | pm_regs.pm_cntrl.freeze = 1; |
467 | 507 | ||
468 | set_count_mode(sys->enable_kernel, sys->enable_user, | 508 | set_count_mode(sys->enable_kernel, sys->enable_user); |
469 | &pm_regs.pm_cntrl); | ||
470 | 509 | ||
471 | /* Setup the thread 0 events */ | 510 | /* Setup the thread 0 events */ |
472 | for (i = 0; i < num_ctrs; ++i) { | 511 | for (i = 0; i < num_ctrs; ++i) { |
@@ -498,10 +537,10 @@ cell_reg_setup(struct op_counter_config *ctr, | |||
498 | pmc_cntrl[1][i].vcntr = i; | 537 | pmc_cntrl[1][i].vcntr = i; |
499 | } | 538 | } |
500 | 539 | ||
501 | for (i = 0; i < 4; i++) | 540 | for (i = 0; i < NUM_TRACE_BUS_WORDS; i++) |
502 | trace_bus[i] = 0xff; | 541 | trace_bus[i] = 0xff; |
503 | 542 | ||
504 | for (i = 0; i < 2; i++) | 543 | for (i = 0; i < NUM_INPUT_BUS_WORDS; i++) |
505 | input_bus[i] = 0xff; | 544 | input_bus[i] = 0xff; |
506 | 545 | ||
507 | /* Our counters count up, and "count" refers to | 546 | /* Our counters count up, and "count" refers to |
@@ -560,7 +599,7 @@ static void cell_cpu_setup(struct op_counter_config *cntr) | |||
560 | cbe_write_pm(cpu, pm_start_stop, 0); | 599 | cbe_write_pm(cpu, pm_start_stop, 0); |
561 | cbe_write_pm(cpu, group_control, pm_regs.group_control); | 600 | cbe_write_pm(cpu, group_control, pm_regs.group_control); |
562 | cbe_write_pm(cpu, debug_bus_control, pm_regs.debug_bus_control); | 601 | cbe_write_pm(cpu, debug_bus_control, pm_regs.debug_bus_control); |
563 | write_pm_cntrl(cpu, &pm_regs.pm_cntrl); | 602 | write_pm_cntrl(cpu); |
564 | 603 | ||
565 | for (i = 0; i < num_counters; ++i) { | 604 | for (i = 0; i < num_counters; ++i) { |
566 | if (ctr_enabled & (1 << i)) { | 605 | if (ctr_enabled & (1 << i)) { |
@@ -602,7 +641,7 @@ static void cell_global_start(struct op_counter_config *ctr) | |||
602 | } | 641 | } |
603 | } | 642 | } |
604 | 643 | ||
605 | cbe_clear_pm_interrupts(cpu); | 644 | cbe_get_and_clear_pm_interrupts(cpu); |
606 | cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask); | 645 | cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask); |
607 | cbe_enable_pm(cpu); | 646 | cbe_enable_pm(cpu); |
608 | } | 647 | } |
@@ -672,7 +711,7 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) | |||
672 | 711 | ||
673 | cbe_disable_pm(cpu); | 712 | cbe_disable_pm(cpu); |
674 | 713 | ||
675 | interrupt_mask = cbe_clear_pm_interrupts(cpu); | 714 | interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu); |
676 | 715 | ||
677 | /* If the interrupt mask has been cleared, then the virt cntr | 716 | /* If the interrupt mask has been cleared, then the virt cntr |
678 | * has cleared the interrupt. When the thread that generated | 717 | * has cleared the interrupt. When the thread that generated |