diff options
| author | Carl Love <cel@us.ibm.com> | 2008-12-01 19:18:34 -0500 |
|---|---|---|
| committer | Robert Richter <robert.richter@amd.com> | 2009-01-08 09:49:39 -0500 |
| commit | 9b93418e7ee59dbc96d44cfde7f65f886e54dba9 (patch) | |
| tree | 39f2e913f17b3a9dc50b6af39a32489a735ce3a6 | |
| parent | 4a6908a3a050aacc9c3a2f36b276b46c0629ad91 (diff) | |
powerpc/oprofile: IBM CELL: cleanup and restructuring
This patch restructures and cleans up the code a bit to make it
easier to add new functionality later. The patch makes no
functional changes to the existing code.
Signed-off-by: Carl Love <carll@us.ibm.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
| -rw-r--r-- | arch/powerpc/oprofile/cell/spu_profiler.c | 24 | ||||
| -rw-r--r-- | arch/powerpc/oprofile/op_model_cell.c | 320 |
2 files changed, 191 insertions, 153 deletions
diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c index dd499c3e9da..8b1b9ccaff9 100644 --- a/arch/powerpc/oprofile/cell/spu_profiler.c +++ b/arch/powerpc/oprofile/cell/spu_profiler.c | |||
| @@ -31,8 +31,8 @@ static unsigned int profiling_interval; | |||
| 31 | 31 | ||
| 32 | #define SPU_PC_MASK 0xFFFF | 32 | #define SPU_PC_MASK 0xFFFF |
| 33 | 33 | ||
| 34 | static DEFINE_SPINLOCK(sample_array_lock); | 34 | static DEFINE_SPINLOCK(oprof_spu_smpl_arry_lck); |
| 35 | unsigned long sample_array_lock_flags; | 35 | unsigned long oprof_spu_smpl_arry_lck_flags; |
| 36 | 36 | ||
| 37 | void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset) | 37 | void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset) |
| 38 | { | 38 | { |
| @@ -145,13 +145,13 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer) | |||
| 145 | * sample array must be loaded and then processed for a given | 145 | * sample array must be loaded and then processed for a given |
| 146 | * cpu. The sample array is not per cpu. | 146 | * cpu. The sample array is not per cpu. |
| 147 | */ | 147 | */ |
| 148 | spin_lock_irqsave(&sample_array_lock, | 148 | spin_lock_irqsave(&oprof_spu_smpl_arry_lck, |
| 149 | sample_array_lock_flags); | 149 | oprof_spu_smpl_arry_lck_flags); |
| 150 | num_samples = cell_spu_pc_collection(cpu); | 150 | num_samples = cell_spu_pc_collection(cpu); |
| 151 | 151 | ||
| 152 | if (num_samples == 0) { | 152 | if (num_samples == 0) { |
| 153 | spin_unlock_irqrestore(&sample_array_lock, | 153 | spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck, |
| 154 | sample_array_lock_flags); | 154 | oprof_spu_smpl_arry_lck_flags); |
| 155 | continue; | 155 | continue; |
| 156 | } | 156 | } |
| 157 | 157 | ||
| @@ -162,8 +162,8 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer) | |||
| 162 | num_samples); | 162 | num_samples); |
| 163 | } | 163 | } |
| 164 | 164 | ||
| 165 | spin_unlock_irqrestore(&sample_array_lock, | 165 | spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck, |
| 166 | sample_array_lock_flags); | 166 | oprof_spu_smpl_arry_lck_flags); |
| 167 | 167 | ||
| 168 | } | 168 | } |
| 169 | smp_wmb(); /* insure spu event buffer updates are written */ | 169 | smp_wmb(); /* insure spu event buffer updates are written */ |
| @@ -182,13 +182,13 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer) | |||
| 182 | 182 | ||
| 183 | static struct hrtimer timer; | 183 | static struct hrtimer timer; |
| 184 | /* | 184 | /* |
| 185 | * Entry point for SPU profiling. | 185 | * Entry point for SPU cycle profiling. |
| 186 | * NOTE: SPU profiling is done system-wide, not per-CPU. | 186 | * NOTE: SPU profiling is done system-wide, not per-CPU. |
| 187 | * | 187 | * |
| 188 | * cycles_reset is the count value specified by the user when | 188 | * cycles_reset is the count value specified by the user when |
| 189 | * setting up OProfile to count SPU_CYCLES. | 189 | * setting up OProfile to count SPU_CYCLES. |
| 190 | */ | 190 | */ |
| 191 | int start_spu_profiling(unsigned int cycles_reset) | 191 | int start_spu_profiling_cycles(unsigned int cycles_reset) |
| 192 | { | 192 | { |
| 193 | ktime_t kt; | 193 | ktime_t kt; |
| 194 | 194 | ||
| @@ -212,10 +212,10 @@ int start_spu_profiling(unsigned int cycles_reset) | |||
| 212 | return 0; | 212 | return 0; |
| 213 | } | 213 | } |
| 214 | 214 | ||
| 215 | void stop_spu_profiling(void) | 215 | void stop_spu_profiling_cycles(void) |
| 216 | { | 216 | { |
| 217 | spu_prof_running = 0; | 217 | spu_prof_running = 0; |
| 218 | hrtimer_cancel(&timer); | 218 | hrtimer_cancel(&timer); |
| 219 | kfree(samples); | 219 | kfree(samples); |
| 220 | pr_debug("SPU_PROF: stop_spu_profiling issued\n"); | 220 | pr_debug("SPU_PROF: stop_spu_profiling_cycles issued\n"); |
| 221 | } | 221 | } |
diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c index 25a4ec2514a..ad7f32c848f 100644 --- a/arch/powerpc/oprofile/op_model_cell.c +++ b/arch/powerpc/oprofile/op_model_cell.c | |||
| @@ -40,14 +40,9 @@ | |||
| 40 | #include "../platforms/cell/interrupt.h" | 40 | #include "../platforms/cell/interrupt.h" |
| 41 | #include "cell/pr_util.h" | 41 | #include "cell/pr_util.h" |
| 42 | 42 | ||
| 43 | static void cell_global_stop_spu(void); | 43 | #define PPU_PROFILING 0 |
| 44 | 44 | #define SPU_PROFILING_CYCLES 1 | |
| 45 | /* | 45 | #define SPU_PROFILING_EVENTS 2 |
| 46 | * spu_cycle_reset is the number of cycles between samples. | ||
| 47 | * This variable is used for SPU profiling and should ONLY be set | ||
| 48 | * at the beginning of cell_reg_setup; otherwise, it's read-only. | ||
| 49 | */ | ||
| 50 | static unsigned int spu_cycle_reset; | ||
| 51 | 46 | ||
| 52 | #define NUM_SPUS_PER_NODE 8 | 47 | #define NUM_SPUS_PER_NODE 8 |
| 53 | #define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */ | 48 | #define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */ |
| @@ -66,6 +61,14 @@ static unsigned int spu_cycle_reset; | |||
| 66 | 61 | ||
| 67 | #define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */ | 62 | #define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */ |
| 68 | 63 | ||
| 64 | /* | ||
| 65 | * spu_cycle_reset is the number of cycles between samples. | ||
| 66 | * This variable is used for SPU profiling and should ONLY be set | ||
| 67 | * at the beginning of cell_reg_setup; otherwise, it's read-only. | ||
| 68 | */ | ||
| 69 | static unsigned int spu_cycle_reset; | ||
| 70 | static unsigned int profiling_mode; | ||
| 71 | |||
| 69 | struct pmc_cntrl_data { | 72 | struct pmc_cntrl_data { |
| 70 | unsigned long vcntr; | 73 | unsigned long vcntr; |
| 71 | unsigned long evnts; | 74 | unsigned long evnts; |
| @@ -122,7 +125,6 @@ static struct { | |||
| 122 | #define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2) | 125 | #define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2) |
| 123 | 126 | ||
| 124 | static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values); | 127 | static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values); |
| 125 | |||
| 126 | static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; | 128 | static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; |
| 127 | 129 | ||
| 128 | /* | 130 | /* |
| @@ -165,7 +167,7 @@ static int spu_rtas_token; /* token for SPU cycle profiling */ | |||
| 165 | static u32 reset_value[NR_PHYS_CTRS]; | 167 | static u32 reset_value[NR_PHYS_CTRS]; |
| 166 | static int num_counters; | 168 | static int num_counters; |
| 167 | static int oprofile_running; | 169 | static int oprofile_running; |
| 168 | static DEFINE_SPINLOCK(virt_cntr_lock); | 170 | static DEFINE_SPINLOCK(cntr_lock); |
| 169 | 171 | ||
| 170 | static u32 ctr_enabled; | 172 | static u32 ctr_enabled; |
| 171 | 173 | ||
| @@ -367,7 +369,7 @@ static void write_pm_cntrl(int cpu) | |||
| 367 | if (pm_regs.pm_cntrl.stop_at_max == 1) | 369 | if (pm_regs.pm_cntrl.stop_at_max == 1) |
| 368 | val |= CBE_PM_STOP_AT_MAX; | 370 | val |= CBE_PM_STOP_AT_MAX; |
| 369 | 371 | ||
| 370 | if (pm_regs.pm_cntrl.trace_mode == 1) | 372 | if (pm_regs.pm_cntrl.trace_mode != 0) |
| 371 | val |= CBE_PM_TRACE_MODE_SET(pm_regs.pm_cntrl.trace_mode); | 373 | val |= CBE_PM_TRACE_MODE_SET(pm_regs.pm_cntrl.trace_mode); |
| 372 | 374 | ||
| 373 | if (pm_regs.pm_cntrl.freeze == 1) | 375 | if (pm_regs.pm_cntrl.freeze == 1) |
| @@ -441,7 +443,7 @@ static void cell_virtual_cntr(unsigned long data) | |||
| 441 | * not both playing with the counters on the same node. | 443 | * not both playing with the counters on the same node. |
| 442 | */ | 444 | */ |
| 443 | 445 | ||
| 444 | spin_lock_irqsave(&virt_cntr_lock, flags); | 446 | spin_lock_irqsave(&cntr_lock, flags); |
| 445 | 447 | ||
| 446 | prev_hdw_thread = hdw_thread; | 448 | prev_hdw_thread = hdw_thread; |
| 447 | 449 | ||
| @@ -527,7 +529,7 @@ static void cell_virtual_cntr(unsigned long data) | |||
| 527 | cbe_enable_pm(cpu); | 529 | cbe_enable_pm(cpu); |
| 528 | } | 530 | } |
| 529 | 531 | ||
| 530 | spin_unlock_irqrestore(&virt_cntr_lock, flags); | 532 | spin_unlock_irqrestore(&cntr_lock, flags); |
| 531 | 533 | ||
| 532 | mod_timer(&timer_virt_cntr, jiffies + HZ / 10); | 534 | mod_timer(&timer_virt_cntr, jiffies + HZ / 10); |
| 533 | } | 535 | } |
| @@ -541,44 +543,30 @@ static void start_virt_cntrs(void) | |||
| 541 | add_timer(&timer_virt_cntr); | 543 | add_timer(&timer_virt_cntr); |
| 542 | } | 544 | } |
| 543 | 545 | ||
| 544 | /* This function is called once for all cpus combined */ | 546 | static int cell_reg_setup_spu_cycles(struct op_counter_config *ctr, |
| 545 | static int cell_reg_setup(struct op_counter_config *ctr, | ||
| 546 | struct op_system_config *sys, int num_ctrs) | 547 | struct op_system_config *sys, int num_ctrs) |
| 547 | { | 548 | { |
| 548 | int i, j, cpu; | 549 | spu_cycle_reset = ctr[0].count; |
| 549 | spu_cycle_reset = 0; | ||
| 550 | |||
| 551 | if (ctr[0].event == SPU_CYCLES_EVENT_NUM) { | ||
| 552 | spu_cycle_reset = ctr[0].count; | ||
| 553 | |||
| 554 | /* | ||
| 555 | * Each node will need to make the rtas call to start | ||
| 556 | * and stop SPU profiling. Get the token once and store it. | ||
| 557 | */ | ||
| 558 | spu_rtas_token = rtas_token("ibm,cbe-spu-perftools"); | ||
| 559 | |||
| 560 | if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) { | ||
| 561 | printk(KERN_ERR | ||
| 562 | "%s: rtas token ibm,cbe-spu-perftools unknown\n", | ||
| 563 | __func__); | ||
| 564 | return -EIO; | ||
| 565 | } | ||
| 566 | } | ||
| 567 | |||
| 568 | pm_rtas_token = rtas_token("ibm,cbe-perftools"); | ||
| 569 | 550 | ||
| 570 | /* | 551 | /* |
| 571 | * For all events excetp PPU CYCLEs, each node will need to make | 552 | * Each node will need to make the rtas call to start |
| 572 | * the rtas cbe-perftools call to setup and reset the debug bus. | 553 | * and stop SPU profiling. Get the token once and store it. |
| 573 | * Make the token lookup call once and store it in the global | ||
| 574 | * variable pm_rtas_token. | ||
| 575 | */ | 554 | */ |
| 576 | if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) { | 555 | spu_rtas_token = rtas_token("ibm,cbe-spu-perftools"); |
| 556 | |||
| 557 | if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) { | ||
| 577 | printk(KERN_ERR | 558 | printk(KERN_ERR |
| 578 | "%s: rtas token ibm,cbe-perftools unknown\n", | 559 | "%s: rtas token ibm,cbe-spu-perftools unknown\n", |
| 579 | __func__); | 560 | __func__); |
| 580 | return -EIO; | 561 | return -EIO; |
| 581 | } | 562 | } |
| 563 | return 0; | ||
| 564 | } | ||
| 565 | |||
| 566 | static int cell_reg_setup_ppu(struct op_counter_config *ctr, | ||
| 567 | struct op_system_config *sys, int num_ctrs) | ||
| 568 | { | ||
| 569 | int i, j, cpu; | ||
| 582 | 570 | ||
| 583 | num_counters = num_ctrs; | 571 | num_counters = num_ctrs; |
| 584 | 572 | ||
| @@ -665,6 +653,41 @@ static int cell_reg_setup(struct op_counter_config *ctr, | |||
| 665 | } | 653 | } |
| 666 | 654 | ||
| 667 | 655 | ||
| 656 | /* This function is called once for all cpus combined */ | ||
| 657 | static int cell_reg_setup(struct op_counter_config *ctr, | ||
| 658 | struct op_system_config *sys, int num_ctrs) | ||
| 659 | { | ||
| 660 | int ret; | ||
| 661 | |||
| 662 | spu_cycle_reset = 0; | ||
| 663 | |||
| 664 | /* | ||
| 665 | * For all events except PPU CYCLEs, each node will need to make | ||
| 666 | * the rtas cbe-perftools call to setup and reset the debug bus. | ||
| 667 | * Make the token lookup call once and store it in the global | ||
| 668 | * variable pm_rtas_token. | ||
| 669 | */ | ||
| 670 | pm_rtas_token = rtas_token("ibm,cbe-perftools"); | ||
| 671 | |||
| 672 | if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) { | ||
| 673 | printk(KERN_ERR | ||
| 674 | "%s: rtas token ibm,cbe-perftools unknown\n", | ||
| 675 | __func__); | ||
| 676 | return -EIO; | ||
| 677 | } | ||
| 678 | |||
| 679 | if (ctr[0].event == SPU_CYCLES_EVENT_NUM) { | ||
| 680 | profiling_mode = SPU_PROFILING_CYCLES; | ||
| 681 | ret = cell_reg_setup_spu_cycles(ctr, sys, num_ctrs); | ||
| 682 | } else { | ||
| 683 | profiling_mode = PPU_PROFILING; | ||
| 684 | ret = cell_reg_setup_ppu(ctr, sys, num_ctrs); | ||
| 685 | } | ||
| 686 | |||
| 687 | return ret; | ||
| 688 | } | ||
| 689 | |||
| 690 | |||
| 668 | 691 | ||
| 669 | /* This function is called once for each cpu */ | 692 | /* This function is called once for each cpu */ |
| 670 | static int cell_cpu_setup(struct op_counter_config *cntr) | 693 | static int cell_cpu_setup(struct op_counter_config *cntr) |
| @@ -673,7 +696,11 @@ static int cell_cpu_setup(struct op_counter_config *cntr) | |||
| 673 | u32 num_enabled = 0; | 696 | u32 num_enabled = 0; |
| 674 | int i; | 697 | int i; |
| 675 | 698 | ||
| 676 | if (spu_cycle_reset) | 699 | /* Cycle based SPU profiling does not use the performance |
| 700 | * counters. The trace array is configured to collect | ||
| 701 | * the data. | ||
| 702 | */ | ||
| 703 | if (profiling_mode == SPU_PROFILING_CYCLES) | ||
| 677 | return 0; | 704 | return 0; |
| 678 | 705 | ||
| 679 | /* There is one performance monitor per processor chip (i.e. node), | 706 | /* There is one performance monitor per processor chip (i.e. node), |
| @@ -686,7 +713,6 @@ static int cell_cpu_setup(struct op_counter_config *cntr) | |||
| 686 | cbe_disable_pm(cpu); | 713 | cbe_disable_pm(cpu); |
| 687 | cbe_disable_pm_interrupts(cpu); | 714 | cbe_disable_pm_interrupts(cpu); |
| 688 | 715 | ||
| 689 | cbe_write_pm(cpu, pm_interval, 0); | ||
| 690 | cbe_write_pm(cpu, pm_start_stop, 0); | 716 | cbe_write_pm(cpu, pm_start_stop, 0); |
| 691 | cbe_write_pm(cpu, group_control, pm_regs.group_control); | 717 | cbe_write_pm(cpu, group_control, pm_regs.group_control); |
| 692 | cbe_write_pm(cpu, debug_bus_control, pm_regs.debug_bus_control); | 718 | cbe_write_pm(cpu, debug_bus_control, pm_regs.debug_bus_control); |
| @@ -885,7 +911,94 @@ static struct notifier_block cpu_freq_notifier_block = { | |||
| 885 | }; | 911 | }; |
| 886 | #endif | 912 | #endif |
| 887 | 913 | ||
| 888 | static int cell_global_start_spu(struct op_counter_config *ctr) | 914 | /* |
| 915 | * Note the generic OProfile stop calls do not support returning | ||
| 916 | * an error on stop. Hence, will not return an error if the FW | ||
| 917 | * calls fail on stop. Failure to reset the debug bus is not an issue. | ||
| 918 | * Failure to disable the SPU profiling is not an issue. The FW calls | ||
| 919 | * to enable the performance counters and debug bus will work even if | ||
| 920 | * the hardware was not cleanly reset. | ||
| 921 | */ | ||
| 922 | static void cell_global_stop_spu_cycles(void) | ||
| 923 | { | ||
| 924 | int subfunc, rtn_value; | ||
| 925 | unsigned int lfsr_value; | ||
| 926 | int cpu; | ||
| 927 | |||
| 928 | oprofile_running = 0; | ||
| 929 | |||
| 930 | #ifdef CONFIG_CPU_FREQ | ||
| 931 | cpufreq_unregister_notifier(&cpu_freq_notifier_block, | ||
| 932 | CPUFREQ_TRANSITION_NOTIFIER); | ||
| 933 | #endif | ||
| 934 | |||
| 935 | for_each_online_cpu(cpu) { | ||
| 936 | if (cbe_get_hw_thread_id(cpu)) | ||
| 937 | continue; | ||
| 938 | |||
| 939 | subfunc = 3; /* | ||
| 940 | * 2 - activate SPU tracing, | ||
| 941 | * 3 - deactivate | ||
| 942 | */ | ||
| 943 | lfsr_value = 0x8f100000; | ||
| 944 | |||
| 945 | rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL, | ||
| 946 | subfunc, cbe_cpu_to_node(cpu), | ||
| 947 | lfsr_value); | ||
| 948 | |||
| 949 | if (unlikely(rtn_value != 0)) { | ||
| 950 | printk(KERN_ERR | ||
| 951 | "%s: rtas call ibm,cbe-spu-perftools " \ | ||
| 952 | "failed, return = %d\n", | ||
| 953 | __func__, rtn_value); | ||
| 954 | } | ||
| 955 | |||
| 956 | /* Deactivate the signals */ | ||
| 957 | pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); | ||
| 958 | } | ||
| 959 | |||
| 960 | if (profiling_mode == SPU_PROFILING_CYCLES) | ||
| 961 | stop_spu_profiling_cycles(); | ||
| 962 | } | ||
| 963 | |||
| 964 | static void cell_global_stop_ppu(void) | ||
| 965 | { | ||
| 966 | int cpu; | ||
| 967 | |||
| 968 | /* | ||
| 969 | * This routine will be called once for the system. | ||
| 970 | * There is one performance monitor per node, so we | ||
| 971 | * only need to perform this function once per node. | ||
| 972 | */ | ||
| 973 | del_timer_sync(&timer_virt_cntr); | ||
| 974 | oprofile_running = 0; | ||
| 975 | smp_wmb(); | ||
| 976 | |||
| 977 | for_each_online_cpu(cpu) { | ||
| 978 | if (cbe_get_hw_thread_id(cpu)) | ||
| 979 | continue; | ||
| 980 | |||
| 981 | cbe_sync_irq(cbe_cpu_to_node(cpu)); | ||
| 982 | /* Stop the counters */ | ||
| 983 | cbe_disable_pm(cpu); | ||
| 984 | |||
| 985 | /* Deactivate the signals */ | ||
| 986 | pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); | ||
| 987 | |||
| 988 | /* Deactivate interrupts */ | ||
| 989 | cbe_disable_pm_interrupts(cpu); | ||
| 990 | } | ||
| 991 | } | ||
| 992 | |||
| 993 | static void cell_global_stop(void) | ||
| 994 | { | ||
| 995 | if (profiling_mode == PPU_PROFILING) | ||
| 996 | cell_global_stop_ppu(); | ||
| 997 | else | ||
| 998 | cell_global_stop_spu_cycles(); | ||
| 999 | } | ||
| 1000 | |||
| 1001 | static int cell_global_start_spu_cycles(struct op_counter_config *ctr) | ||
| 889 | { | 1002 | { |
| 890 | int subfunc; | 1003 | int subfunc; |
| 891 | unsigned int lfsr_value; | 1004 | unsigned int lfsr_value; |
| @@ -955,14 +1068,14 @@ static int cell_global_start_spu(struct op_counter_config *ctr) | |||
| 955 | 1068 | ||
| 956 | if (unlikely(ret != 0)) { | 1069 | if (unlikely(ret != 0)) { |
| 957 | printk(KERN_ERR | 1070 | printk(KERN_ERR |
| 958 | "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n", | 1071 | "%s: rtas call ibm,cbe-spu-perftools failed, " \ |
| 959 | __func__, ret); | 1072 | "return = %d\n", __func__, ret); |
| 960 | rtas_error = -EIO; | 1073 | rtas_error = -EIO; |
| 961 | goto out; | 1074 | goto out; |
| 962 | } | 1075 | } |
| 963 | } | 1076 | } |
| 964 | 1077 | ||
| 965 | rtas_error = start_spu_profiling(spu_cycle_reset); | 1078 | rtas_error = start_spu_profiling_cycles(spu_cycle_reset); |
| 966 | if (rtas_error) | 1079 | if (rtas_error) |
| 967 | goto out_stop; | 1080 | goto out_stop; |
| 968 | 1081 | ||
| @@ -970,7 +1083,7 @@ static int cell_global_start_spu(struct op_counter_config *ctr) | |||
| 970 | return 0; | 1083 | return 0; |
| 971 | 1084 | ||
| 972 | out_stop: | 1085 | out_stop: |
| 973 | cell_global_stop_spu(); /* clean up the PMU/debug bus */ | 1086 | cell_global_stop_spu_cycles(); /* clean up the PMU/debug bus */ |
| 974 | out: | 1087 | out: |
| 975 | return rtas_error; | 1088 | return rtas_error; |
| 976 | } | 1089 | } |
| @@ -1024,99 +1137,15 @@ static int cell_global_start_ppu(struct op_counter_config *ctr) | |||
| 1024 | 1137 | ||
| 1025 | static int cell_global_start(struct op_counter_config *ctr) | 1138 | static int cell_global_start(struct op_counter_config *ctr) |
| 1026 | { | 1139 | { |
| 1027 | if (spu_cycle_reset) | 1140 | if (profiling_mode == SPU_PROFILING_CYCLES) |
| 1028 | return cell_global_start_spu(ctr); | 1141 | return cell_global_start_spu_cycles(ctr); |
| 1029 | else | 1142 | else |
| 1030 | return cell_global_start_ppu(ctr); | 1143 | return cell_global_start_ppu(ctr); |
| 1031 | } | 1144 | } |
| 1032 | 1145 | ||
| 1033 | /* | ||
| 1034 | * Note the generic OProfile stop calls do not support returning | ||
| 1035 | * an error on stop. Hence, will not return an error if the FW | ||
| 1036 | * calls fail on stop. Failure to reset the debug bus is not an issue. | ||
| 1037 | * Failure to disable the SPU profiling is not an issue. The FW calls | ||
| 1038 | * to enable the performance counters and debug bus will work even if | ||
| 1039 | * the hardware was not cleanly reset. | ||
| 1040 | */ | ||
| 1041 | static void cell_global_stop_spu(void) | ||
| 1042 | { | ||
| 1043 | int subfunc, rtn_value; | ||
| 1044 | unsigned int lfsr_value; | ||
| 1045 | int cpu; | ||
| 1046 | |||
| 1047 | oprofile_running = 0; | ||
| 1048 | 1146 | ||
| 1049 | #ifdef CONFIG_CPU_FREQ | 1147 | static void cell_handle_interrupt_ppu(struct pt_regs *regs, |
| 1050 | cpufreq_unregister_notifier(&cpu_freq_notifier_block, | 1148 | struct op_counter_config *ctr) |
| 1051 | CPUFREQ_TRANSITION_NOTIFIER); | ||
| 1052 | #endif | ||
| 1053 | |||
| 1054 | for_each_online_cpu(cpu) { | ||
| 1055 | if (cbe_get_hw_thread_id(cpu)) | ||
| 1056 | continue; | ||
| 1057 | |||
| 1058 | subfunc = 3; /* | ||
| 1059 | * 2 - activate SPU tracing, | ||
| 1060 | * 3 - deactivate | ||
| 1061 | */ | ||
| 1062 | lfsr_value = 0x8f100000; | ||
| 1063 | |||
| 1064 | rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL, | ||
| 1065 | subfunc, cbe_cpu_to_node(cpu), | ||
| 1066 | lfsr_value); | ||
| 1067 | |||
| 1068 | if (unlikely(rtn_value != 0)) { | ||
| 1069 | printk(KERN_ERR | ||
| 1070 | "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n", | ||
| 1071 | __func__, rtn_value); | ||
| 1072 | } | ||
| 1073 | |||
| 1074 | /* Deactivate the signals */ | ||
| 1075 | pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); | ||
| 1076 | } | ||
| 1077 | |||
| 1078 | stop_spu_profiling(); | ||
| 1079 | } | ||
| 1080 | |||
| 1081 | static void cell_global_stop_ppu(void) | ||
| 1082 | { | ||
| 1083 | int cpu; | ||
| 1084 | |||
| 1085 | /* | ||
| 1086 | * This routine will be called once for the system. | ||
| 1087 | * There is one performance monitor per node, so we | ||
| 1088 | * only need to perform this function once per node. | ||
| 1089 | */ | ||
| 1090 | del_timer_sync(&timer_virt_cntr); | ||
| 1091 | oprofile_running = 0; | ||
| 1092 | smp_wmb(); | ||
| 1093 | |||
| 1094 | for_each_online_cpu(cpu) { | ||
| 1095 | if (cbe_get_hw_thread_id(cpu)) | ||
| 1096 | continue; | ||
| 1097 | |||
| 1098 | cbe_sync_irq(cbe_cpu_to_node(cpu)); | ||
| 1099 | /* Stop the counters */ | ||
| 1100 | cbe_disable_pm(cpu); | ||
| 1101 | |||
| 1102 | /* Deactivate the signals */ | ||
| 1103 | pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); | ||
| 1104 | |||
| 1105 | /* Deactivate interrupts */ | ||
| 1106 | cbe_disable_pm_interrupts(cpu); | ||
| 1107 | } | ||
| 1108 | } | ||
| 1109 | |||
| 1110 | static void cell_global_stop(void) | ||
| 1111 | { | ||
| 1112 | if (spu_cycle_reset) | ||
| 1113 | cell_global_stop_spu(); | ||
| 1114 | else | ||
| 1115 | cell_global_stop_ppu(); | ||
| 1116 | } | ||
| 1117 | |||
| 1118 | static void cell_handle_interrupt(struct pt_regs *regs, | ||
| 1119 | struct op_counter_config *ctr) | ||
| 1120 | { | 1149 | { |
| 1121 | u32 cpu; | 1150 | u32 cpu; |
| 1122 | u64 pc; | 1151 | u64 pc; |
| @@ -1132,7 +1161,7 @@ static void cell_handle_interrupt(struct pt_regs *regs, | |||
| 1132 | * routine are not running at the same time. See the | 1161 | * routine are not running at the same time. See the |
| 1133 | * cell_virtual_cntr() routine for additional comments. | 1162 | * cell_virtual_cntr() routine for additional comments. |
| 1134 | */ | 1163 | */ |
| 1135 | spin_lock_irqsave(&virt_cntr_lock, flags); | 1164 | spin_lock_irqsave(&cntr_lock, flags); |
| 1136 | 1165 | ||
| 1137 | /* | 1166 | /* |
| 1138 | * Need to disable and reenable the performance counters | 1167 | * Need to disable and reenable the performance counters |
| @@ -1185,7 +1214,14 @@ static void cell_handle_interrupt(struct pt_regs *regs, | |||
| 1185 | */ | 1214 | */ |
| 1186 | cbe_enable_pm(cpu); | 1215 | cbe_enable_pm(cpu); |
| 1187 | } | 1216 | } |
| 1188 | spin_unlock_irqrestore(&virt_cntr_lock, flags); | 1217 | spin_unlock_irqrestore(&cntr_lock, flags); |
| 1218 | } | ||
| 1219 | |||
| 1220 | static void cell_handle_interrupt(struct pt_regs *regs, | ||
| 1221 | struct op_counter_config *ctr) | ||
| 1222 | { | ||
| 1223 | if (profiling_mode == PPU_PROFILING) | ||
| 1224 | cell_handle_interrupt_ppu(regs, ctr); | ||
| 1189 | } | 1225 | } |
| 1190 | 1226 | ||
| 1191 | /* | 1227 | /* |
| @@ -1195,7 +1231,8 @@ static void cell_handle_interrupt(struct pt_regs *regs, | |||
| 1195 | */ | 1231 | */ |
| 1196 | static int cell_sync_start(void) | 1232 | static int cell_sync_start(void) |
| 1197 | { | 1233 | { |
| 1198 | if (spu_cycle_reset) | 1234 | if ((profiling_mode == SPU_PROFILING_CYCLES) || |
| 1235 | (profiling_mode == SPU_PROFILING_EVENTS)) | ||
| 1199 | return spu_sync_start(); | 1236 | return spu_sync_start(); |
| 1200 | else | 1237 | else |
| 1201 | return DO_GENERIC_SYNC; | 1238 | return DO_GENERIC_SYNC; |
| @@ -1203,7 +1240,8 @@ static int cell_sync_start(void) | |||
| 1203 | 1240 | ||
| 1204 | static int cell_sync_stop(void) | 1241 | static int cell_sync_stop(void) |
| 1205 | { | 1242 | { |
| 1206 | if (spu_cycle_reset) | 1243 | if ((profiling_mode == SPU_PROFILING_CYCLES) || |
| 1244 | (profiling_mode == SPU_PROFILING_EVENTS)) | ||
| 1207 | return spu_sync_stop(); | 1245 | return spu_sync_stop(); |
| 1208 | else | 1246 | else |
| 1209 | return 1; | 1247 | return 1; |
