diff options
author | Carl Love <carll@us.ibm.com> | 2007-02-13 16:02:02 -0500 |
---|---|---|
committer | Arnd Bergmann <arnd@klappe.arndb.de> | 2007-02-13 16:03:06 -0500 |
commit | bcb63e25ed3c56ee40cca4d18fbaac1d2a40c1d6 (patch) | |
tree | d82fcdbfa5b262747e0b9548d526305a3fe0d8a0 /arch/powerpc/oprofile | |
parent | 128b8546a83a9e37448bc126e1045dc1db291165 (diff) |
[POWERPC] cell: PPU Oprofile cleanup patch
This is a clean up patch that includes the following changes:
-Some comments were added to clarify the code based on feedback
from the community.
-The write_pm_cntrl() and set_count_mode() were passed a
structure element from a global variable. The argument was
removed so the functions now just operate on the global directly.
-The set_pm_event() function call in the cell_virtual_cntr()
routine was moved to a for-loop before the for_each_cpu loop
Signed-off-by: Carl Love <carll@us.ibm.com>
Signed-off-by: Maynard Johnson <mpjohn@us.ibm.com>
Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Diffstat (limited to 'arch/powerpc/oprofile')
-rw-r--r-- | arch/powerpc/oprofile/op_model_cell.c | 104 |
1 files changed, 63 insertions, 41 deletions
diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c index 2eb15f388103..8d4a9586464d 100644 --- a/arch/powerpc/oprofile/op_model_cell.c +++ b/arch/powerpc/oprofile/op_model_cell.c | |||
@@ -41,8 +41,12 @@ | |||
41 | #define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */ | 41 | #define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */ |
42 | #define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */ | 42 | #define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */ |
43 | 43 | ||
44 | #define NUM_THREADS 2 | 44 | #define NUM_THREADS 2 /* number of physical threads in |
45 | #define VIRT_CNTR_SW_TIME_NS 100000000 // 0.5 seconds | 45 | * physical processor |
46 | */ | ||
47 | #define NUM_TRACE_BUS_WORDS 4 | ||
48 | #define NUM_INPUT_BUS_WORDS 2 | ||
49 | |||
46 | 50 | ||
47 | struct pmc_cntrl_data { | 51 | struct pmc_cntrl_data { |
48 | unsigned long vcntr; | 52 | unsigned long vcntr; |
@@ -93,7 +97,6 @@ static struct { | |||
93 | u32 pm07_cntrl[NR_PHYS_CTRS]; | 97 | u32 pm07_cntrl[NR_PHYS_CTRS]; |
94 | } pm_regs; | 98 | } pm_regs; |
95 | 99 | ||
96 | |||
97 | #define GET_SUB_UNIT(x) ((x & 0x0000f000) >> 12) | 100 | #define GET_SUB_UNIT(x) ((x & 0x0000f000) >> 12) |
98 | #define GET_BUS_WORD(x) ((x & 0x000000f0) >> 4) | 101 | #define GET_BUS_WORD(x) ((x & 0x000000f0) >> 4) |
99 | #define GET_BUS_TYPE(x) ((x & 0x00000300) >> 8) | 102 | #define GET_BUS_TYPE(x) ((x & 0x00000300) >> 8) |
@@ -101,7 +104,6 @@ static struct { | |||
101 | #define GET_COUNT_CYCLES(x) (x & 0x00000001) | 104 | #define GET_COUNT_CYCLES(x) (x & 0x00000001) |
102 | #define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2) | 105 | #define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2) |
103 | 106 | ||
104 | |||
105 | static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values); | 107 | static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values); |
106 | 108 | ||
107 | static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; | 109 | static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; |
@@ -129,8 +131,8 @@ static spinlock_t virt_cntr_lock = SPIN_LOCK_UNLOCKED; | |||
129 | 131 | ||
130 | static u32 ctr_enabled; | 132 | static u32 ctr_enabled; |
131 | 133 | ||
132 | static unsigned char trace_bus[4]; | 134 | static unsigned char trace_bus[NUM_TRACE_BUS_WORDS]; |
133 | static unsigned char input_bus[2]; | 135 | static unsigned char input_bus[NUM_INPUT_BUS_WORDS]; |
134 | 136 | ||
135 | /* | 137 | /* |
136 | * Firmware interface functions | 138 | * Firmware interface functions |
@@ -183,7 +185,8 @@ static void pm_rtas_activate_signals(u32 node, u32 count) | |||
183 | for (j = 0; j < count; j++) { | 185 | for (j = 0; j < count; j++) { |
184 | /* fw expects physical cpu # */ | 186 | /* fw expects physical cpu # */ |
185 | pm_signal_local[j].cpu = node; | 187 | pm_signal_local[j].cpu = node; |
186 | pm_signal_local[j].signal_group = pm_signal[j].signal_group; | 188 | pm_signal_local[j].signal_group |
189 | = pm_signal[j].signal_group; | ||
187 | pm_signal_local[j].bus_word = pm_signal[j].bus_word; | 190 | pm_signal_local[j].bus_word = pm_signal[j].bus_word; |
188 | pm_signal_local[j].sub_unit = pm_signal[j].sub_unit; | 191 | pm_signal_local[j].sub_unit = pm_signal[j].sub_unit; |
189 | pm_signal_local[j].bit = pm_signal[j].bit; | 192 | pm_signal_local[j].bit = pm_signal[j].bit; |
@@ -232,13 +235,21 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask) | |||
232 | 235 | ||
233 | p->signal_group = event / 100; | 236 | p->signal_group = event / 100; |
234 | p->bus_word = bus_word; | 237 | p->bus_word = bus_word; |
235 | p->sub_unit = unit_mask & 0x0000f000; | 238 | p->sub_unit = (unit_mask & 0x0000f000) >> 12; |
236 | 239 | ||
237 | pm_regs.pm07_cntrl[ctr] = 0; | 240 | pm_regs.pm07_cntrl[ctr] = 0; |
238 | pm_regs.pm07_cntrl[ctr] |= PM07_CTR_COUNT_CYCLES(count_cycles); | 241 | pm_regs.pm07_cntrl[ctr] |= PM07_CTR_COUNT_CYCLES(count_cycles); |
239 | pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity); | 242 | pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity); |
240 | pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control); | 243 | pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control); |
241 | 244 | ||
245 | /* Some of the islands signal selection is based on 64 bit words. | ||
246 | * The debug bus words are 32 bits, the input words to the performance | ||
247 | * counters are defined as 32 bits. Need to convert the 64 bit island | ||
248 | * specification to the appropriate 32 input bit and bus word for the | ||
249 | * performance counter event selection. See the CELL Performance | ||
250 | * monitoring signals manual and the Perf cntr hardware descriptions | ||
251 | * for the details. | ||
252 | */ | ||
242 | if (input_control == 0) { | 253 | if (input_control == 0) { |
243 | if (signal_bit > 31) { | 254 | if (signal_bit > 31) { |
244 | signal_bit -= 32; | 255 | signal_bit -= 32; |
@@ -259,12 +270,12 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask) | |||
259 | p->bit = signal_bit; | 270 | p->bit = signal_bit; |
260 | } | 271 | } |
261 | 272 | ||
262 | for (i = 0; i < 4; i++) { | 273 | for (i = 0; i < NUM_TRACE_BUS_WORDS; i++) { |
263 | if (bus_word & (1 << i)) { | 274 | if (bus_word & (1 << i)) { |
264 | pm_regs.debug_bus_control |= | 275 | pm_regs.debug_bus_control |= |
265 | (bus_type << (31 - (2 * i) + 1)); | 276 | (bus_type << (31 - (2 * i) + 1)); |
266 | 277 | ||
267 | for (j = 0; j < 2; j++) { | 278 | for (j = 0; j < NUM_INPUT_BUS_WORDS; j++) { |
268 | if (input_bus[j] == 0xff) { | 279 | if (input_bus[j] == 0xff) { |
269 | input_bus[j] = i; | 280 | input_bus[j] = i; |
270 | pm_regs.group_control |= | 281 | pm_regs.group_control |= |
@@ -278,52 +289,58 @@ out: | |||
278 | ; | 289 | ; |
279 | } | 290 | } |
280 | 291 | ||
281 | static void write_pm_cntrl(int cpu, struct pm_cntrl *pm_cntrl) | 292 | static void write_pm_cntrl(int cpu) |
282 | { | 293 | { |
283 | /* Oprofile will use 32 bit counters, set bits 7:10 to 0 */ | 294 | /* Oprofile will use 32 bit counters, set bits 7:10 to 0 |
295 | * pmregs.pm_cntrl is a global | ||
296 | */ | ||
297 | |||
284 | u32 val = 0; | 298 | u32 val = 0; |
285 | if (pm_cntrl->enable == 1) | 299 | if (pm_regs.pm_cntrl.enable == 1) |
286 | val |= CBE_PM_ENABLE_PERF_MON; | 300 | val |= CBE_PM_ENABLE_PERF_MON; |
287 | 301 | ||
288 | if (pm_cntrl->stop_at_max == 1) | 302 | if (pm_regs.pm_cntrl.stop_at_max == 1) |
289 | val |= CBE_PM_STOP_AT_MAX; | 303 | val |= CBE_PM_STOP_AT_MAX; |
290 | 304 | ||
291 | if (pm_cntrl->trace_mode == 1) | 305 | if (pm_regs.pm_cntrl.trace_mode == 1) |
292 | val |= CBE_PM_TRACE_MODE_SET(pm_cntrl->trace_mode); | 306 | val |= CBE_PM_TRACE_MODE_SET(pm_regs.pm_cntrl.trace_mode); |
293 | 307 | ||
294 | if (pm_cntrl->freeze == 1) | 308 | if (pm_regs.pm_cntrl.freeze == 1) |
295 | val |= CBE_PM_FREEZE_ALL_CTRS; | 309 | val |= CBE_PM_FREEZE_ALL_CTRS; |
296 | 310 | ||
297 | /* Routine set_count_mode must be called previously to set | 311 | /* Routine set_count_mode must be called previously to set |
298 | * the count mode based on the user selection of user and kernel. | 312 | * the count mode based on the user selection of user and kernel. |
299 | */ | 313 | */ |
300 | val |= CBE_PM_COUNT_MODE_SET(pm_cntrl->count_mode); | 314 | val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode); |
301 | cbe_write_pm(cpu, pm_control, val); | 315 | cbe_write_pm(cpu, pm_control, val); |
302 | } | 316 | } |
303 | 317 | ||
304 | static inline void | 318 | static inline void |
305 | set_count_mode(u32 kernel, u32 user, struct pm_cntrl *pm_cntrl) | 319 | set_count_mode(u32 kernel, u32 user) |
306 | { | 320 | { |
307 | /* The user must specify user and kernel if they want them. If | 321 | /* The user must specify user and kernel if they want them. If |
308 | * neither is specified, OProfile will count in hypervisor mode | 322 | * neither is specified, OProfile will count in hypervisor mode. |
323 | * pm_regs.pm_cntrl is a global | ||
309 | */ | 324 | */ |
310 | if (kernel) { | 325 | if (kernel) { |
311 | if (user) | 326 | if (user) |
312 | pm_cntrl->count_mode = CBE_COUNT_ALL_MODES; | 327 | pm_regs.pm_cntrl.count_mode = CBE_COUNT_ALL_MODES; |
313 | else | 328 | else |
314 | pm_cntrl->count_mode = CBE_COUNT_SUPERVISOR_MODE; | 329 | pm_regs.pm_cntrl.count_mode = |
330 | CBE_COUNT_SUPERVISOR_MODE; | ||
315 | } else { | 331 | } else { |
316 | if (user) | 332 | if (user) |
317 | pm_cntrl->count_mode = CBE_COUNT_PROBLEM_MODE; | 333 | pm_regs.pm_cntrl.count_mode = CBE_COUNT_PROBLEM_MODE; |
318 | else | 334 | else |
319 | pm_cntrl->count_mode = CBE_COUNT_HYPERVISOR_MODE; | 335 | pm_regs.pm_cntrl.count_mode = |
336 | CBE_COUNT_HYPERVISOR_MODE; | ||
320 | } | 337 | } |
321 | } | 338 | } |
322 | 339 | ||
323 | static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl) | 340 | static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl) |
324 | { | 341 | { |
325 | 342 | ||
326 | pm07_cntrl[ctr] |= PM07_CTR_ENABLE(1); | 343 | pm07_cntrl[ctr] |= CBE_PM_CTR_ENABLE; |
327 | cbe_write_pm07_control(cpu, ctr, pm07_cntrl[ctr]); | 344 | cbe_write_pm07_control(cpu, ctr, pm07_cntrl[ctr]); |
328 | } | 345 | } |
329 | 346 | ||
@@ -365,6 +382,14 @@ static void cell_virtual_cntr(unsigned long data) | |||
365 | hdw_thread = 1 ^ hdw_thread; | 382 | hdw_thread = 1 ^ hdw_thread; |
366 | next_hdw_thread = hdw_thread; | 383 | next_hdw_thread = hdw_thread; |
367 | 384 | ||
385 | for (i = 0; i < num_counters; i++) | ||
386 | /* There are some per thread events. Must do the | ||
387 | * set event, for the thread that is being started | ||
388 | */ | ||
389 | set_pm_event(i, | ||
390 | pmc_cntrl[next_hdw_thread][i].evnts, | ||
391 | pmc_cntrl[next_hdw_thread][i].masks); | ||
392 | |||
368 | /* The following is done only once per each node, but | 393 | /* The following is done only once per each node, but |
369 | * we need cpu #, not node #, to pass to the cbe_xxx functions. | 394 | * we need cpu #, not node #, to pass to the cbe_xxx functions. |
370 | */ | 395 | */ |
@@ -385,12 +410,13 @@ static void cell_virtual_cntr(unsigned long data) | |||
385 | == 0xFFFFFFFF) | 410 | == 0xFFFFFFFF) |
386 | /* If the cntr value is 0xffffffff, we must | 411 | /* If the cntr value is 0xffffffff, we must |
387 | * reset that to 0xfffffff0 when the current | 412 | * reset that to 0xfffffff0 when the current |
388 | * thread is restarted. This will generate a new | 413 | * thread is restarted. This will generate a |
389 | * interrupt and make sure that we never restore | 414 | * new interrupt and make sure that we never |
390 | * the counters to the max value. If the counters | 415 | * restore the counters to the max value. If |
391 | * were restored to the max value, they do not | 416 | * the counters were restored to the max value, |
392 | * increment and no interrupts are generated. Hence | 417 | * they do not increment and no interrupts are |
393 | * no more samples will be collected on that cpu. | 418 | * generated. Hence no more samples will be |
419 | * collected on that cpu. | ||
394 | */ | 420 | */ |
395 | cbe_write_ctr(cpu, i, 0xFFFFFFF0); | 421 | cbe_write_ctr(cpu, i, 0xFFFFFFF0); |
396 | else | 422 | else |
@@ -410,9 +436,6 @@ static void cell_virtual_cntr(unsigned long data) | |||
410 | * Must do the set event, enable_cntr | 436 | * Must do the set event, enable_cntr |
411 | * for each cpu. | 437 | * for each cpu. |
412 | */ | 438 | */ |
413 | set_pm_event(i, | ||
414 | pmc_cntrl[next_hdw_thread][i].evnts, | ||
415 | pmc_cntrl[next_hdw_thread][i].masks); | ||
416 | enable_ctr(cpu, i, | 439 | enable_ctr(cpu, i, |
417 | pm_regs.pm07_cntrl); | 440 | pm_regs.pm07_cntrl); |
418 | } else { | 441 | } else { |
@@ -465,8 +488,7 @@ cell_reg_setup(struct op_counter_config *ctr, | |||
465 | pm_regs.pm_cntrl.trace_mode = 0; | 488 | pm_regs.pm_cntrl.trace_mode = 0; |
466 | pm_regs.pm_cntrl.freeze = 1; | 489 | pm_regs.pm_cntrl.freeze = 1; |
467 | 490 | ||
468 | set_count_mode(sys->enable_kernel, sys->enable_user, | 491 | set_count_mode(sys->enable_kernel, sys->enable_user); |
469 | &pm_regs.pm_cntrl); | ||
470 | 492 | ||
471 | /* Setup the thread 0 events */ | 493 | /* Setup the thread 0 events */ |
472 | for (i = 0; i < num_ctrs; ++i) { | 494 | for (i = 0; i < num_ctrs; ++i) { |
@@ -498,10 +520,10 @@ cell_reg_setup(struct op_counter_config *ctr, | |||
498 | pmc_cntrl[1][i].vcntr = i; | 520 | pmc_cntrl[1][i].vcntr = i; |
499 | } | 521 | } |
500 | 522 | ||
501 | for (i = 0; i < 4; i++) | 523 | for (i = 0; i < NUM_TRACE_BUS_WORDS; i++) |
502 | trace_bus[i] = 0xff; | 524 | trace_bus[i] = 0xff; |
503 | 525 | ||
504 | for (i = 0; i < 2; i++) | 526 | for (i = 0; i < NUM_INPUT_BUS_WORDS; i++) |
505 | input_bus[i] = 0xff; | 527 | input_bus[i] = 0xff; |
506 | 528 | ||
507 | /* Our counters count up, and "count" refers to | 529 | /* Our counters count up, and "count" refers to |
@@ -560,7 +582,7 @@ static void cell_cpu_setup(struct op_counter_config *cntr) | |||
560 | cbe_write_pm(cpu, pm_start_stop, 0); | 582 | cbe_write_pm(cpu, pm_start_stop, 0); |
561 | cbe_write_pm(cpu, group_control, pm_regs.group_control); | 583 | cbe_write_pm(cpu, group_control, pm_regs.group_control); |
562 | cbe_write_pm(cpu, debug_bus_control, pm_regs.debug_bus_control); | 584 | cbe_write_pm(cpu, debug_bus_control, pm_regs.debug_bus_control); |
563 | write_pm_cntrl(cpu, &pm_regs.pm_cntrl); | 585 | write_pm_cntrl(cpu); |
564 | 586 | ||
565 | for (i = 0; i < num_counters; ++i) { | 587 | for (i = 0; i < num_counters; ++i) { |
566 | if (ctr_enabled & (1 << i)) { | 588 | if (ctr_enabled & (1 << i)) { |
@@ -602,7 +624,7 @@ static void cell_global_start(struct op_counter_config *ctr) | |||
602 | } | 624 | } |
603 | } | 625 | } |
604 | 626 | ||
605 | cbe_clear_pm_interrupts(cpu); | 627 | cbe_get_and_clear_pm_interrupts(cpu); |
606 | cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask); | 628 | cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask); |
607 | cbe_enable_pm(cpu); | 629 | cbe_enable_pm(cpu); |
608 | } | 630 | } |
@@ -672,7 +694,7 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) | |||
672 | 694 | ||
673 | cbe_disable_pm(cpu); | 695 | cbe_disable_pm(cpu); |
674 | 696 | ||
675 | interrupt_mask = cbe_clear_pm_interrupts(cpu); | 697 | interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu); |
676 | 698 | ||
677 | /* If the interrupt mask has been cleared, then the virt cntr | 699 | /* If the interrupt mask has been cleared, then the virt cntr |
678 | * has cleared the interrupt. When the thread that generated | 700 | * has cleared the interrupt. When the thread that generated |