diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-02-28 13:15:31 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-02-28 13:15:31 -0500 |
| commit | d25e8dbdab203ed8b4fd0a174bb5259e35ecd87c (patch) | |
| tree | 46aadc89806daebe509f35daa962114b7a730090 | |
| parent | 642c4c75a765d7a3244ab39c8e6fb09be21eca5b (diff) | |
| parent | cfc9c0b450176a077205ef39092f0dc1a04e020a (diff) | |
Merge branch 'oprofile-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'oprofile-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
oprofile/x86: fix msr access to reserved counters
oprofile/x86: use kzalloc() instead of kmalloc()
oprofile/x86: fix perfctr nmi reservation for mulitplexing
oprofile/x86: add comment to counter-in-use warning
oprofile/x86: warn user if a counter is already active
oprofile/x86: implement randomization for IBS periodic op counter
oprofile/x86: implement lsfr pseudo-random number generator for IBS
oprofile/x86: implement IBS cpuid feature detection
oprofile/x86: remove node check in AMD IBS initialization
oprofile/x86: remove OPROFILE_IBS config option
oprofile: remove EXPERIMENTAL from the config option description
oprofile: remove tracing build dependency
| -rw-r--r-- | arch/Kconfig | 18 | ||||
| -rw-r--r-- | arch/x86/oprofile/nmi_int.c | 17 | ||||
| -rw-r--r-- | arch/x86/oprofile/op_model_amd.c | 244 | ||||
| -rw-r--r-- | arch/x86/oprofile/op_model_p4.c | 6 | ||||
| -rw-r--r-- | arch/x86/oprofile/op_model_ppro.c | 17 | ||||
| -rw-r--r-- | arch/x86/oprofile/op_x86_model.h | 20 | ||||
| -rw-r--r-- | init/Kconfig | 2 |
7 files changed, 200 insertions, 124 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 9d055b4f0585..06a13729c8df 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
| @@ -3,11 +3,9 @@ | |||
| 3 | # | 3 | # |
| 4 | 4 | ||
| 5 | config OPROFILE | 5 | config OPROFILE |
| 6 | tristate "OProfile system profiling (EXPERIMENTAL)" | 6 | tristate "OProfile system profiling" |
| 7 | depends on PROFILING | 7 | depends on PROFILING |
| 8 | depends on HAVE_OPROFILE | 8 | depends on HAVE_OPROFILE |
| 9 | depends on TRACING_SUPPORT | ||
| 10 | select TRACING | ||
| 11 | select RING_BUFFER | 9 | select RING_BUFFER |
| 12 | select RING_BUFFER_ALLOW_SWAP | 10 | select RING_BUFFER_ALLOW_SWAP |
| 13 | help | 11 | help |
| @@ -17,20 +15,6 @@ config OPROFILE | |||
| 17 | 15 | ||
| 18 | If unsure, say N. | 16 | If unsure, say N. |
| 19 | 17 | ||
| 20 | config OPROFILE_IBS | ||
| 21 | bool "OProfile AMD IBS support (EXPERIMENTAL)" | ||
| 22 | default n | ||
| 23 | depends on OPROFILE && SMP && X86 | ||
| 24 | help | ||
| 25 | Instruction-Based Sampling (IBS) is a new profiling | ||
| 26 | technique that provides rich, precise program performance | ||
| 27 | information. IBS is introduced by AMD Family10h processors | ||
| 28 | (AMD Opteron Quad-Core processor "Barcelona") to overcome | ||
| 29 | the limitations of conventional performance counter | ||
| 30 | sampling. | ||
| 31 | |||
| 32 | If unsure, say N. | ||
| 33 | |||
| 34 | config OPROFILE_EVENT_MULTIPLEX | 18 | config OPROFILE_EVENT_MULTIPLEX |
| 35 | bool "OProfile multiplexing support (EXPERIMENTAL)" | 19 | bool "OProfile multiplexing support (EXPERIMENTAL)" |
| 36 | default n | 20 | default n |
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 3347f696edc7..2c505ee71014 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
| @@ -159,7 +159,7 @@ static int nmi_setup_mux(void) | |||
| 159 | 159 | ||
| 160 | for_each_possible_cpu(i) { | 160 | for_each_possible_cpu(i) { |
| 161 | per_cpu(cpu_msrs, i).multiplex = | 161 | per_cpu(cpu_msrs, i).multiplex = |
| 162 | kmalloc(multiplex_size, GFP_KERNEL); | 162 | kzalloc(multiplex_size, GFP_KERNEL); |
| 163 | if (!per_cpu(cpu_msrs, i).multiplex) | 163 | if (!per_cpu(cpu_msrs, i).multiplex) |
| 164 | return 0; | 164 | return 0; |
| 165 | } | 165 | } |
| @@ -179,7 +179,6 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) | |||
| 179 | if (counter_config[i].enabled) { | 179 | if (counter_config[i].enabled) { |
| 180 | multiplex[i].saved = -(u64)counter_config[i].count; | 180 | multiplex[i].saved = -(u64)counter_config[i].count; |
| 181 | } else { | 181 | } else { |
| 182 | multiplex[i].addr = 0; | ||
| 183 | multiplex[i].saved = 0; | 182 | multiplex[i].saved = 0; |
| 184 | } | 183 | } |
| 185 | } | 184 | } |
| @@ -189,25 +188,27 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) | |||
| 189 | 188 | ||
| 190 | static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) | 189 | static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) |
| 191 | { | 190 | { |
| 191 | struct op_msr *counters = msrs->counters; | ||
| 192 | struct op_msr *multiplex = msrs->multiplex; | 192 | struct op_msr *multiplex = msrs->multiplex; |
| 193 | int i; | 193 | int i; |
| 194 | 194 | ||
| 195 | for (i = 0; i < model->num_counters; ++i) { | 195 | for (i = 0; i < model->num_counters; ++i) { |
| 196 | int virt = op_x86_phys_to_virt(i); | 196 | int virt = op_x86_phys_to_virt(i); |
| 197 | if (multiplex[virt].addr) | 197 | if (counters[i].addr) |
| 198 | rdmsrl(multiplex[virt].addr, multiplex[virt].saved); | 198 | rdmsrl(counters[i].addr, multiplex[virt].saved); |
| 199 | } | 199 | } |
| 200 | } | 200 | } |
| 201 | 201 | ||
| 202 | static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) | 202 | static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) |
| 203 | { | 203 | { |
| 204 | struct op_msr *counters = msrs->counters; | ||
| 204 | struct op_msr *multiplex = msrs->multiplex; | 205 | struct op_msr *multiplex = msrs->multiplex; |
| 205 | int i; | 206 | int i; |
| 206 | 207 | ||
| 207 | for (i = 0; i < model->num_counters; ++i) { | 208 | for (i = 0; i < model->num_counters; ++i) { |
| 208 | int virt = op_x86_phys_to_virt(i); | 209 | int virt = op_x86_phys_to_virt(i); |
| 209 | if (multiplex[virt].addr) | 210 | if (counters[i].addr) |
| 210 | wrmsrl(multiplex[virt].addr, multiplex[virt].saved); | 211 | wrmsrl(counters[i].addr, multiplex[virt].saved); |
| 211 | } | 212 | } |
| 212 | } | 213 | } |
| 213 | 214 | ||
| @@ -303,11 +304,11 @@ static int allocate_msrs(void) | |||
| 303 | 304 | ||
| 304 | int i; | 305 | int i; |
| 305 | for_each_possible_cpu(i) { | 306 | for_each_possible_cpu(i) { |
| 306 | per_cpu(cpu_msrs, i).counters = kmalloc(counters_size, | 307 | per_cpu(cpu_msrs, i).counters = kzalloc(counters_size, |
| 307 | GFP_KERNEL); | 308 | GFP_KERNEL); |
| 308 | if (!per_cpu(cpu_msrs, i).counters) | 309 | if (!per_cpu(cpu_msrs, i).counters) |
| 309 | return 0; | 310 | return 0; |
| 310 | per_cpu(cpu_msrs, i).controls = kmalloc(controls_size, | 311 | per_cpu(cpu_msrs, i).controls = kzalloc(controls_size, |
| 311 | GFP_KERNEL); | 312 | GFP_KERNEL); |
| 312 | if (!per_cpu(cpu_msrs, i).controls) | 313 | if (!per_cpu(cpu_msrs, i).controls) |
| 313 | return 0; | 314 | return 0; |
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 39686c29f03a..6a58256dce9f 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c | |||
| @@ -22,6 +22,9 @@ | |||
| 22 | #include <asm/ptrace.h> | 22 | #include <asm/ptrace.h> |
| 23 | #include <asm/msr.h> | 23 | #include <asm/msr.h> |
| 24 | #include <asm/nmi.h> | 24 | #include <asm/nmi.h> |
| 25 | #include <asm/apic.h> | ||
| 26 | #include <asm/processor.h> | ||
| 27 | #include <asm/cpufeature.h> | ||
| 25 | 28 | ||
| 26 | #include "op_x86_model.h" | 29 | #include "op_x86_model.h" |
| 27 | #include "op_counter.h" | 30 | #include "op_counter.h" |
| @@ -43,15 +46,13 @@ | |||
| 43 | 46 | ||
| 44 | static unsigned long reset_value[NUM_VIRT_COUNTERS]; | 47 | static unsigned long reset_value[NUM_VIRT_COUNTERS]; |
| 45 | 48 | ||
| 46 | #ifdef CONFIG_OPROFILE_IBS | ||
| 47 | |||
| 48 | /* IbsFetchCtl bits/masks */ | 49 | /* IbsFetchCtl bits/masks */ |
| 49 | #define IBS_FETCH_RAND_EN (1ULL<<57) | 50 | #define IBS_FETCH_RAND_EN (1ULL<<57) |
| 50 | #define IBS_FETCH_VAL (1ULL<<49) | 51 | #define IBS_FETCH_VAL (1ULL<<49) |
| 51 | #define IBS_FETCH_ENABLE (1ULL<<48) | 52 | #define IBS_FETCH_ENABLE (1ULL<<48) |
| 52 | #define IBS_FETCH_CNT_MASK 0xFFFF0000ULL | 53 | #define IBS_FETCH_CNT_MASK 0xFFFF0000ULL |
| 53 | 54 | ||
| 54 | /*IbsOpCtl bits */ | 55 | /* IbsOpCtl bits */ |
| 55 | #define IBS_OP_CNT_CTL (1ULL<<19) | 56 | #define IBS_OP_CNT_CTL (1ULL<<19) |
| 56 | #define IBS_OP_VAL (1ULL<<18) | 57 | #define IBS_OP_VAL (1ULL<<18) |
| 57 | #define IBS_OP_ENABLE (1ULL<<17) | 58 | #define IBS_OP_ENABLE (1ULL<<17) |
| @@ -59,7 +60,7 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS]; | |||
| 59 | #define IBS_FETCH_SIZE 6 | 60 | #define IBS_FETCH_SIZE 6 |
| 60 | #define IBS_OP_SIZE 12 | 61 | #define IBS_OP_SIZE 12 |
| 61 | 62 | ||
| 62 | static int has_ibs; /* AMD Family10h and later */ | 63 | static u32 ibs_caps; |
| 63 | 64 | ||
| 64 | struct op_ibs_config { | 65 | struct op_ibs_config { |
| 65 | unsigned long op_enabled; | 66 | unsigned long op_enabled; |
| @@ -71,24 +72,52 @@ struct op_ibs_config { | |||
| 71 | }; | 72 | }; |
| 72 | 73 | ||
| 73 | static struct op_ibs_config ibs_config; | 74 | static struct op_ibs_config ibs_config; |
| 75 | static u64 ibs_op_ctl; | ||
| 74 | 76 | ||
| 75 | #endif | 77 | /* |
| 78 | * IBS cpuid feature detection | ||
| 79 | */ | ||
| 76 | 80 | ||
| 77 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | 81 | #define IBS_CPUID_FEATURES 0x8000001b |
| 82 | |||
| 83 | /* | ||
| 84 | * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but | ||
| 85 | * bit 0 is used to indicate the existence of IBS. | ||
| 86 | */ | ||
| 87 | #define IBS_CAPS_AVAIL (1LL<<0) | ||
| 88 | #define IBS_CAPS_RDWROPCNT (1LL<<3) | ||
| 89 | #define IBS_CAPS_OPCNT (1LL<<4) | ||
| 90 | |||
| 91 | /* | ||
| 92 | * IBS randomization macros | ||
| 93 | */ | ||
| 94 | #define IBS_RANDOM_BITS 12 | ||
| 95 | #define IBS_RANDOM_MASK ((1ULL << IBS_RANDOM_BITS) - 1) | ||
| 96 | #define IBS_RANDOM_MAXCNT_OFFSET (1ULL << (IBS_RANDOM_BITS - 5)) | ||
| 78 | 97 | ||
| 79 | static void op_mux_fill_in_addresses(struct op_msrs * const msrs) | 98 | static u32 get_ibs_caps(void) |
| 80 | { | 99 | { |
| 81 | int i; | 100 | u32 ibs_caps; |
| 101 | unsigned int max_level; | ||
| 82 | 102 | ||
| 83 | for (i = 0; i < NUM_VIRT_COUNTERS; i++) { | 103 | if (!boot_cpu_has(X86_FEATURE_IBS)) |
| 84 | int hw_counter = op_x86_virt_to_phys(i); | 104 | return 0; |
| 85 | if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) | 105 | |
| 86 | msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter; | 106 | /* check IBS cpuid feature flags */ |
| 87 | else | 107 | max_level = cpuid_eax(0x80000000); |
| 88 | msrs->multiplex[i].addr = 0; | 108 | if (max_level < IBS_CPUID_FEATURES) |
| 89 | } | 109 | return IBS_CAPS_AVAIL; |
| 110 | |||
| 111 | ibs_caps = cpuid_eax(IBS_CPUID_FEATURES); | ||
| 112 | if (!(ibs_caps & IBS_CAPS_AVAIL)) | ||
| 113 | /* cpuid flags not valid */ | ||
| 114 | return IBS_CAPS_AVAIL; | ||
| 115 | |||
| 116 | return ibs_caps; | ||
| 90 | } | 117 | } |
| 91 | 118 | ||
| 119 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
| 120 | |||
| 92 | static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, | 121 | static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, |
| 93 | struct op_msrs const * const msrs) | 122 | struct op_msrs const * const msrs) |
| 94 | { | 123 | { |
| @@ -98,7 +127,7 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, | |||
| 98 | /* enable active counters */ | 127 | /* enable active counters */ |
| 99 | for (i = 0; i < NUM_COUNTERS; ++i) { | 128 | for (i = 0; i < NUM_COUNTERS; ++i) { |
| 100 | int virt = op_x86_phys_to_virt(i); | 129 | int virt = op_x86_phys_to_virt(i); |
| 101 | if (!counter_config[virt].enabled) | 130 | if (!reset_value[virt]) |
| 102 | continue; | 131 | continue; |
| 103 | rdmsrl(msrs->controls[i].addr, val); | 132 | rdmsrl(msrs->controls[i].addr, val); |
| 104 | val &= model->reserved; | 133 | val &= model->reserved; |
| @@ -107,10 +136,6 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, | |||
| 107 | } | 136 | } |
| 108 | } | 137 | } |
| 109 | 138 | ||
| 110 | #else | ||
| 111 | |||
| 112 | static inline void op_mux_fill_in_addresses(struct op_msrs * const msrs) { } | ||
| 113 | |||
| 114 | #endif | 139 | #endif |
| 115 | 140 | ||
| 116 | /* functions for op_amd_spec */ | 141 | /* functions for op_amd_spec */ |
| @@ -122,18 +147,12 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs) | |||
| 122 | for (i = 0; i < NUM_COUNTERS; i++) { | 147 | for (i = 0; i < NUM_COUNTERS; i++) { |
| 123 | if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) | 148 | if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) |
| 124 | msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; | 149 | msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; |
| 125 | else | ||
| 126 | msrs->counters[i].addr = 0; | ||
| 127 | } | 150 | } |
| 128 | 151 | ||
| 129 | for (i = 0; i < NUM_CONTROLS; i++) { | 152 | for (i = 0; i < NUM_CONTROLS; i++) { |
| 130 | if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) | 153 | if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) |
| 131 | msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; | 154 | msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; |
| 132 | else | ||
| 133 | msrs->controls[i].addr = 0; | ||
| 134 | } | 155 | } |
| 135 | |||
| 136 | op_mux_fill_in_addresses(msrs); | ||
| 137 | } | 156 | } |
| 138 | 157 | ||
| 139 | static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | 158 | static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, |
| @@ -144,7 +163,8 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | |||
| 144 | 163 | ||
| 145 | /* setup reset_value */ | 164 | /* setup reset_value */ |
| 146 | for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { | 165 | for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { |
| 147 | if (counter_config[i].enabled) | 166 | if (counter_config[i].enabled |
| 167 | && msrs->counters[op_x86_virt_to_phys(i)].addr) | ||
| 148 | reset_value[i] = counter_config[i].count; | 168 | reset_value[i] = counter_config[i].count; |
| 149 | else | 169 | else |
| 150 | reset_value[i] = 0; | 170 | reset_value[i] = 0; |
| @@ -152,9 +172,18 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | |||
| 152 | 172 | ||
| 153 | /* clear all counters */ | 173 | /* clear all counters */ |
| 154 | for (i = 0; i < NUM_CONTROLS; ++i) { | 174 | for (i = 0; i < NUM_CONTROLS; ++i) { |
| 155 | if (unlikely(!msrs->controls[i].addr)) | 175 | if (unlikely(!msrs->controls[i].addr)) { |
| 176 | if (counter_config[i].enabled && !smp_processor_id()) | ||
| 177 | /* | ||
| 178 | * counter is reserved, this is on all | ||
| 179 | * cpus, so report only for cpu #0 | ||
| 180 | */ | ||
| 181 | op_x86_warn_reserved(i); | ||
| 156 | continue; | 182 | continue; |
| 183 | } | ||
| 157 | rdmsrl(msrs->controls[i].addr, val); | 184 | rdmsrl(msrs->controls[i].addr, val); |
| 185 | if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) | ||
| 186 | op_x86_warn_in_use(i); | ||
| 158 | val &= model->reserved; | 187 | val &= model->reserved; |
| 159 | wrmsrl(msrs->controls[i].addr, val); | 188 | wrmsrl(msrs->controls[i].addr, val); |
| 160 | } | 189 | } |
| @@ -169,9 +198,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | |||
| 169 | /* enable active counters */ | 198 | /* enable active counters */ |
| 170 | for (i = 0; i < NUM_COUNTERS; ++i) { | 199 | for (i = 0; i < NUM_COUNTERS; ++i) { |
| 171 | int virt = op_x86_phys_to_virt(i); | 200 | int virt = op_x86_phys_to_virt(i); |
| 172 | if (!counter_config[virt].enabled) | 201 | if (!reset_value[virt]) |
| 173 | continue; | ||
| 174 | if (!msrs->counters[i].addr) | ||
| 175 | continue; | 202 | continue; |
| 176 | 203 | ||
| 177 | /* setup counter registers */ | 204 | /* setup counter registers */ |
| @@ -185,7 +212,60 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | |||
| 185 | } | 212 | } |
| 186 | } | 213 | } |
| 187 | 214 | ||
| 188 | #ifdef CONFIG_OPROFILE_IBS | 215 | /* |
| 216 | * 16-bit Linear Feedback Shift Register (LFSR) | ||
| 217 | * | ||
| 218 | * 16 14 13 11 | ||
| 219 | * Feedback polynomial = X + X + X + X + 1 | ||
| 220 | */ | ||
| 221 | static unsigned int lfsr_random(void) | ||
| 222 | { | ||
| 223 | static unsigned int lfsr_value = 0xF00D; | ||
| 224 | unsigned int bit; | ||
| 225 | |||
| 226 | /* Compute next bit to shift in */ | ||
| 227 | bit = ((lfsr_value >> 0) ^ | ||
| 228 | (lfsr_value >> 2) ^ | ||
| 229 | (lfsr_value >> 3) ^ | ||
| 230 | (lfsr_value >> 5)) & 0x0001; | ||
| 231 | |||
| 232 | /* Advance to next register value */ | ||
| 233 | lfsr_value = (lfsr_value >> 1) | (bit << 15); | ||
| 234 | |||
| 235 | return lfsr_value; | ||
| 236 | } | ||
| 237 | |||
| 238 | /* | ||
| 239 | * IBS software randomization | ||
| 240 | * | ||
| 241 | * The IBS periodic op counter is randomized in software. The lower 12 | ||
| 242 | * bits of the 20 bit counter are randomized. IbsOpCurCnt is | ||
| 243 | * initialized with a 12 bit random value. | ||
| 244 | */ | ||
| 245 | static inline u64 op_amd_randomize_ibs_op(u64 val) | ||
| 246 | { | ||
| 247 | unsigned int random = lfsr_random(); | ||
| 248 | |||
| 249 | if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) | ||
| 250 | /* | ||
| 251 | * Work around if the hw can not write to IbsOpCurCnt | ||
| 252 | * | ||
| 253 | * Randomize the lower 8 bits of the 16 bit | ||
| 254 | * IbsOpMaxCnt [15:0] value in the range of -128 to | ||
| 255 | * +127 by adding/subtracting an offset to the | ||
| 256 | * maximum count (IbsOpMaxCnt). | ||
| 257 | * | ||
| 258 | * To avoid over or underflows and protect upper bits | ||
| 259 | * starting at bit 16, the initial value for | ||
| 260 | * IbsOpMaxCnt must fit in the range from 0x0081 to | ||
| 261 | * 0xff80. | ||
| 262 | */ | ||
| 263 | val += (s8)(random >> 4); | ||
| 264 | else | ||
| 265 | val |= (u64)(random & IBS_RANDOM_MASK) << 32; | ||
| 266 | |||
| 267 | return val; | ||
| 268 | } | ||
| 189 | 269 | ||
| 190 | static inline void | 270 | static inline void |
| 191 | op_amd_handle_ibs(struct pt_regs * const regs, | 271 | op_amd_handle_ibs(struct pt_regs * const regs, |
| @@ -194,7 +274,7 @@ op_amd_handle_ibs(struct pt_regs * const regs, | |||
| 194 | u64 val, ctl; | 274 | u64 val, ctl; |
| 195 | struct op_entry entry; | 275 | struct op_entry entry; |
| 196 | 276 | ||
| 197 | if (!has_ibs) | 277 | if (!ibs_caps) |
| 198 | return; | 278 | return; |
| 199 | 279 | ||
| 200 | if (ibs_config.fetch_enabled) { | 280 | if (ibs_config.fetch_enabled) { |
| @@ -236,8 +316,7 @@ op_amd_handle_ibs(struct pt_regs * const regs, | |||
| 236 | oprofile_write_commit(&entry); | 316 | oprofile_write_commit(&entry); |
| 237 | 317 | ||
| 238 | /* reenable the IRQ */ | 318 | /* reenable the IRQ */ |
| 239 | ctl &= ~IBS_OP_VAL & 0xFFFFFFFF; | 319 | ctl = op_amd_randomize_ibs_op(ibs_op_ctl); |
| 240 | ctl |= IBS_OP_ENABLE; | ||
| 241 | wrmsrl(MSR_AMD64_IBSOPCTL, ctl); | 320 | wrmsrl(MSR_AMD64_IBSOPCTL, ctl); |
| 242 | } | 321 | } |
| 243 | } | 322 | } |
| @@ -246,41 +325,57 @@ op_amd_handle_ibs(struct pt_regs * const regs, | |||
| 246 | static inline void op_amd_start_ibs(void) | 325 | static inline void op_amd_start_ibs(void) |
| 247 | { | 326 | { |
| 248 | u64 val; | 327 | u64 val; |
| 249 | if (has_ibs && ibs_config.fetch_enabled) { | 328 | |
| 329 | if (!ibs_caps) | ||
| 330 | return; | ||
| 331 | |||
| 332 | if (ibs_config.fetch_enabled) { | ||
| 250 | val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; | 333 | val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; |
| 251 | val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; | 334 | val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; |
| 252 | val |= IBS_FETCH_ENABLE; | 335 | val |= IBS_FETCH_ENABLE; |
| 253 | wrmsrl(MSR_AMD64_IBSFETCHCTL, val); | 336 | wrmsrl(MSR_AMD64_IBSFETCHCTL, val); |
| 254 | } | 337 | } |
| 255 | 338 | ||
| 256 | if (has_ibs && ibs_config.op_enabled) { | 339 | if (ibs_config.op_enabled) { |
| 257 | val = (ibs_config.max_cnt_op >> 4) & 0xFFFF; | 340 | ibs_op_ctl = ibs_config.max_cnt_op >> 4; |
| 258 | val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0; | 341 | if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) { |
| 259 | val |= IBS_OP_ENABLE; | 342 | /* |
| 343 | * IbsOpCurCnt not supported. See | ||
| 344 | * op_amd_randomize_ibs_op() for details. | ||
| 345 | */ | ||
| 346 | ibs_op_ctl = clamp(ibs_op_ctl, 0x0081ULL, 0xFF80ULL); | ||
| 347 | } else { | ||
| 348 | /* | ||
| 349 | * The start value is randomized with a | ||
| 350 | * positive offset, we need to compensate it | ||
| 351 | * with the half of the randomized range. Also | ||
| 352 | * avoid underflows. | ||
| 353 | */ | ||
| 354 | ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET, | ||
| 355 | 0xFFFFULL); | ||
| 356 | } | ||
| 357 | if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops) | ||
| 358 | ibs_op_ctl |= IBS_OP_CNT_CTL; | ||
| 359 | ibs_op_ctl |= IBS_OP_ENABLE; | ||
| 360 | val = op_amd_randomize_ibs_op(ibs_op_ctl); | ||
| 260 | wrmsrl(MSR_AMD64_IBSOPCTL, val); | 361 | wrmsrl(MSR_AMD64_IBSOPCTL, val); |
| 261 | } | 362 | } |
| 262 | } | 363 | } |
| 263 | 364 | ||
| 264 | static void op_amd_stop_ibs(void) | 365 | static void op_amd_stop_ibs(void) |
| 265 | { | 366 | { |
| 266 | if (has_ibs && ibs_config.fetch_enabled) | 367 | if (!ibs_caps) |
| 368 | return; | ||
| 369 | |||
| 370 | if (ibs_config.fetch_enabled) | ||
| 267 | /* clear max count and enable */ | 371 | /* clear max count and enable */ |
| 268 | wrmsrl(MSR_AMD64_IBSFETCHCTL, 0); | 372 | wrmsrl(MSR_AMD64_IBSFETCHCTL, 0); |
| 269 | 373 | ||
| 270 | if (has_ibs && ibs_config.op_enabled) | 374 | if (ibs_config.op_enabled) |
| 271 | /* clear max count and enable */ | 375 | /* clear max count and enable */ |
| 272 | wrmsrl(MSR_AMD64_IBSOPCTL, 0); | 376 | wrmsrl(MSR_AMD64_IBSOPCTL, 0); |
| 273 | } | 377 | } |
| 274 | 378 | ||
| 275 | #else | ||
| 276 | |||
| 277 | static inline void op_amd_handle_ibs(struct pt_regs * const regs, | ||
| 278 | struct op_msrs const * const msrs) { } | ||
| 279 | static inline void op_amd_start_ibs(void) { } | ||
| 280 | static inline void op_amd_stop_ibs(void) { } | ||
| 281 | |||
| 282 | #endif | ||
| 283 | |||
| 284 | static int op_amd_check_ctrs(struct pt_regs * const regs, | 379 | static int op_amd_check_ctrs(struct pt_regs * const regs, |
| 285 | struct op_msrs const * const msrs) | 380 | struct op_msrs const * const msrs) |
| 286 | { | 381 | { |
| @@ -355,8 +450,6 @@ static void op_amd_shutdown(struct op_msrs const * const msrs) | |||
| 355 | } | 450 | } |
| 356 | } | 451 | } |
| 357 | 452 | ||
| 358 | #ifdef CONFIG_OPROFILE_IBS | ||
| 359 | |||
| 360 | static u8 ibs_eilvt_off; | 453 | static u8 ibs_eilvt_off; |
| 361 | 454 | ||
| 362 | static inline void apic_init_ibs_nmi_per_cpu(void *arg) | 455 | static inline void apic_init_ibs_nmi_per_cpu(void *arg) |
| @@ -405,45 +498,36 @@ static int init_ibs_nmi(void) | |||
| 405 | return 1; | 498 | return 1; |
| 406 | } | 499 | } |
| 407 | 500 | ||
| 408 | #ifdef CONFIG_NUMA | ||
| 409 | /* Sanity check */ | ||
| 410 | /* Works only for 64bit with proper numa implementation. */ | ||
| 411 | if (nodes != num_possible_nodes()) { | ||
| 412 | printk(KERN_DEBUG "Failed to setup CPU node(s) for IBS, " | ||
| 413 | "found: %d, expected %d", | ||
| 414 | nodes, num_possible_nodes()); | ||
| 415 | return 1; | ||
| 416 | } | ||
| 417 | #endif | ||
| 418 | return 0; | 501 | return 0; |
| 419 | } | 502 | } |
| 420 | 503 | ||
| 421 | /* uninitialize the APIC for the IBS interrupts if needed */ | 504 | /* uninitialize the APIC for the IBS interrupts if needed */ |
| 422 | static void clear_ibs_nmi(void) | 505 | static void clear_ibs_nmi(void) |
| 423 | { | 506 | { |
| 424 | if (has_ibs) | 507 | if (ibs_caps) |
| 425 | on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); | 508 | on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); |
| 426 | } | 509 | } |
| 427 | 510 | ||
| 428 | /* initialize the APIC for the IBS interrupts if available */ | 511 | /* initialize the APIC for the IBS interrupts if available */ |
| 429 | static void ibs_init(void) | 512 | static void ibs_init(void) |
| 430 | { | 513 | { |
| 431 | has_ibs = boot_cpu_has(X86_FEATURE_IBS); | 514 | ibs_caps = get_ibs_caps(); |
| 432 | 515 | ||
| 433 | if (!has_ibs) | 516 | if (!ibs_caps) |
| 434 | return; | 517 | return; |
| 435 | 518 | ||
| 436 | if (init_ibs_nmi()) { | 519 | if (init_ibs_nmi()) { |
| 437 | has_ibs = 0; | 520 | ibs_caps = 0; |
| 438 | return; | 521 | return; |
| 439 | } | 522 | } |
| 440 | 523 | ||
| 441 | printk(KERN_INFO "oprofile: AMD IBS detected\n"); | 524 | printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", |
| 525 | (unsigned)ibs_caps); | ||
| 442 | } | 526 | } |
| 443 | 527 | ||
| 444 | static void ibs_exit(void) | 528 | static void ibs_exit(void) |
| 445 | { | 529 | { |
| 446 | if (!has_ibs) | 530 | if (!ibs_caps) |
| 447 | return; | 531 | return; |
| 448 | 532 | ||
| 449 | clear_ibs_nmi(); | 533 | clear_ibs_nmi(); |
| @@ -463,7 +547,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) | |||
| 463 | if (ret) | 547 | if (ret) |
| 464 | return ret; | 548 | return ret; |
| 465 | 549 | ||
| 466 | if (!has_ibs) | 550 | if (!ibs_caps) |
| 467 | return ret; | 551 | return ret; |
| 468 | 552 | ||
| 469 | /* model specific files */ | 553 | /* model specific files */ |
| @@ -473,7 +557,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) | |||
| 473 | ibs_config.fetch_enabled = 0; | 557 | ibs_config.fetch_enabled = 0; |
| 474 | ibs_config.max_cnt_op = 250000; | 558 | ibs_config.max_cnt_op = 250000; |
| 475 | ibs_config.op_enabled = 0; | 559 | ibs_config.op_enabled = 0; |
| 476 | ibs_config.dispatched_ops = 1; | 560 | ibs_config.dispatched_ops = 0; |
| 477 | 561 | ||
| 478 | dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); | 562 | dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); |
| 479 | oprofilefs_create_ulong(sb, dir, "enable", | 563 | oprofilefs_create_ulong(sb, dir, "enable", |
| @@ -488,8 +572,9 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) | |||
| 488 | &ibs_config.op_enabled); | 572 | &ibs_config.op_enabled); |
| 489 | oprofilefs_create_ulong(sb, dir, "max_count", | 573 | oprofilefs_create_ulong(sb, dir, "max_count", |
| 490 | &ibs_config.max_cnt_op); | 574 | &ibs_config.max_cnt_op); |
| 491 | oprofilefs_create_ulong(sb, dir, "dispatched_ops", | 575 | if (ibs_caps & IBS_CAPS_OPCNT) |
| 492 | &ibs_config.dispatched_ops); | 576 | oprofilefs_create_ulong(sb, dir, "dispatched_ops", |
| 577 | &ibs_config.dispatched_ops); | ||
| 493 | 578 | ||
| 494 | return 0; | 579 | return 0; |
| 495 | } | 580 | } |
| @@ -507,19 +592,6 @@ static void op_amd_exit(void) | |||
| 507 | ibs_exit(); | 592 | ibs_exit(); |
| 508 | } | 593 | } |
| 509 | 594 | ||
| 510 | #else | ||
| 511 | |||
| 512 | /* no IBS support */ | ||
| 513 | |||
| 514 | static int op_amd_init(struct oprofile_operations *ops) | ||
| 515 | { | ||
| 516 | return 0; | ||
| 517 | } | ||
| 518 | |||
| 519 | static void op_amd_exit(void) {} | ||
| 520 | |||
| 521 | #endif /* CONFIG_OPROFILE_IBS */ | ||
| 522 | |||
| 523 | struct op_x86_model_spec op_amd_spec = { | 595 | struct op_x86_model_spec op_amd_spec = { |
| 524 | .num_counters = NUM_COUNTERS, | 596 | .num_counters = NUM_COUNTERS, |
| 525 | .num_controls = NUM_CONTROLS, | 597 | .num_controls = NUM_CONTROLS, |
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index ac6b354becdf..e6a160a4684a 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c | |||
| @@ -394,12 +394,6 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs) | |||
| 394 | setup_num_counters(); | 394 | setup_num_counters(); |
| 395 | stag = get_stagger(); | 395 | stag = get_stagger(); |
| 396 | 396 | ||
| 397 | /* initialize some registers */ | ||
| 398 | for (i = 0; i < num_counters; ++i) | ||
| 399 | msrs->counters[i].addr = 0; | ||
| 400 | for (i = 0; i < num_controls; ++i) | ||
| 401 | msrs->controls[i].addr = 0; | ||
| 402 | |||
| 403 | /* the counter & cccr registers we pay attention to */ | 397 | /* the counter & cccr registers we pay attention to */ |
| 404 | for (i = 0; i < num_counters; ++i) { | 398 | for (i = 0; i < num_counters; ++i) { |
| 405 | addr = p4_counters[VIRT_CTR(stag, i)].counter_address; | 399 | addr = p4_counters[VIRT_CTR(stag, i)].counter_address; |
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 8eb05878554c..5d1727ba409e 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c | |||
| @@ -37,15 +37,11 @@ static void ppro_fill_in_addresses(struct op_msrs * const msrs) | |||
| 37 | for (i = 0; i < num_counters; i++) { | 37 | for (i = 0; i < num_counters; i++) { |
| 38 | if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) | 38 | if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) |
| 39 | msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; | 39 | msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; |
| 40 | else | ||
| 41 | msrs->counters[i].addr = 0; | ||
| 42 | } | 40 | } |
| 43 | 41 | ||
| 44 | for (i = 0; i < num_counters; i++) { | 42 | for (i = 0; i < num_counters; i++) { |
| 45 | if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) | 43 | if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) |
| 46 | msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; | 44 | msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; |
| 47 | else | ||
| 48 | msrs->controls[i].addr = 0; | ||
| 49 | } | 45 | } |
| 50 | } | 46 | } |
| 51 | 47 | ||
| @@ -57,7 +53,7 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, | |||
| 57 | int i; | 53 | int i; |
| 58 | 54 | ||
| 59 | if (!reset_value) { | 55 | if (!reset_value) { |
| 60 | reset_value = kmalloc(sizeof(reset_value[0]) * num_counters, | 56 | reset_value = kzalloc(sizeof(reset_value[0]) * num_counters, |
| 61 | GFP_ATOMIC); | 57 | GFP_ATOMIC); |
| 62 | if (!reset_value) | 58 | if (!reset_value) |
| 63 | return; | 59 | return; |
| @@ -82,9 +78,18 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, | |||
| 82 | 78 | ||
| 83 | /* clear all counters */ | 79 | /* clear all counters */ |
| 84 | for (i = 0; i < num_counters; ++i) { | 80 | for (i = 0; i < num_counters; ++i) { |
| 85 | if (unlikely(!msrs->controls[i].addr)) | 81 | if (unlikely(!msrs->controls[i].addr)) { |
| 82 | if (counter_config[i].enabled && !smp_processor_id()) | ||
| 83 | /* | ||
| 84 | * counter is reserved, this is on all | ||
| 85 | * cpus, so report only for cpu #0 | ||
| 86 | */ | ||
| 87 | op_x86_warn_reserved(i); | ||
| 86 | continue; | 88 | continue; |
| 89 | } | ||
| 87 | rdmsrl(msrs->controls[i].addr, val); | 90 | rdmsrl(msrs->controls[i].addr, val); |
| 91 | if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) | ||
| 92 | op_x86_warn_in_use(i); | ||
| 88 | val &= model->reserved; | 93 | val &= model->reserved; |
| 89 | wrmsrl(msrs->controls[i].addr, val); | 94 | wrmsrl(msrs->controls[i].addr, val); |
| 90 | } | 95 | } |
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 7b8e75d16081..ff82a755edd4 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h | |||
| @@ -57,6 +57,26 @@ struct op_x86_model_spec { | |||
| 57 | 57 | ||
| 58 | struct op_counter_config; | 58 | struct op_counter_config; |
| 59 | 59 | ||
| 60 | static inline void op_x86_warn_in_use(int counter) | ||
| 61 | { | ||
| 62 | /* | ||
| 63 | * The warning indicates an already running counter. If | ||
| 64 | * oprofile doesn't collect data, then try using a different | ||
| 65 | * performance counter on your platform to monitor the desired | ||
| 66 | * event. Delete counter #%d from the desired event by editing | ||
| 67 | * the /usr/share/oprofile/%s/<cpu>/events file. If the event | ||
| 68 | * cannot be monitored by any other counter, contact your | ||
| 69 | * hardware or BIOS vendor. | ||
| 70 | */ | ||
| 71 | pr_warning("oprofile: counter #%d on cpu #%d may already be used\n", | ||
| 72 | counter, smp_processor_id()); | ||
| 73 | } | ||
| 74 | |||
| 75 | static inline void op_x86_warn_reserved(int counter) | ||
| 76 | { | ||
| 77 | pr_warning("oprofile: counter #%d is already reserved\n", counter); | ||
| 78 | } | ||
| 79 | |||
| 60 | extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, | 80 | extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, |
| 61 | struct op_counter_config *counter_config); | 81 | struct op_counter_config *counter_config); |
| 62 | extern int op_x86_phys_to_virt(int phys); | 82 | extern int op_x86_phys_to_virt(int phys); |
diff --git a/init/Kconfig b/init/Kconfig index d038a57004a2..207ae29354a3 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
| @@ -1128,7 +1128,7 @@ config MMAP_ALLOW_UNINITIALIZED | |||
| 1128 | See Documentation/nommu-mmap.txt for more information. | 1128 | See Documentation/nommu-mmap.txt for more information. |
| 1129 | 1129 | ||
| 1130 | config PROFILING | 1130 | config PROFILING |
| 1131 | bool "Profiling support (EXPERIMENTAL)" | 1131 | bool "Profiling support" |
| 1132 | help | 1132 | help |
| 1133 | Say Y here to enable the extended profiling support mechanisms used | 1133 | Say Y here to enable the extended profiling support mechanisms used |
| 1134 | by profilers such as OProfile. | 1134 | by profilers such as OProfile. |
