aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/oprofile/nmi_int.c101
-rw-r--r--arch/x86/oprofile/op_model_amd.c272
-rw-r--r--arch/x86/oprofile/op_model_p4.c60
-rw-r--r--arch/x86/oprofile/op_model_ppro.c95
-rw-r--r--arch/x86/oprofile/op_x86_model.h47
-rw-r--r--drivers/oprofile/cpu_buffer.c16
-rw-r--r--include/linux/oprofile.h2
7 files changed, 287 insertions, 306 deletions
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 89b9a5cd63da..93df76dd60f4 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -31,6 +31,26 @@ static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
31/* 0 == registered but off, 1 == registered and on */ 31/* 0 == registered but off, 1 == registered and on */
32static int nmi_enabled = 0; 32static int nmi_enabled = 0;
33 33
34/* common functions */
35
36u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
37 struct op_counter_config *counter_config)
38{
39 u64 val = 0;
40 u16 event = (u16)counter_config->event;
41
42 val |= ARCH_PERFMON_EVENTSEL_INT;
43 val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0;
44 val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0;
45 val |= (counter_config->unit_mask & 0xFF) << 8;
46 event &= model->event_mask ? model->event_mask : 0xFF;
47 val |= event & 0xFF;
48 val |= (event & 0x0F00) << 24;
49
50 return val;
51}
52
53
34static int profile_exceptions_notify(struct notifier_block *self, 54static int profile_exceptions_notify(struct notifier_block *self,
35 unsigned long val, void *data) 55 unsigned long val, void *data)
36{ 56{
@@ -52,26 +72,18 @@ static int profile_exceptions_notify(struct notifier_block *self,
52 72
53static void nmi_cpu_save_registers(struct op_msrs *msrs) 73static void nmi_cpu_save_registers(struct op_msrs *msrs)
54{ 74{
55 unsigned int const nr_ctrs = model->num_counters;
56 unsigned int const nr_ctrls = model->num_controls;
57 struct op_msr *counters = msrs->counters; 75 struct op_msr *counters = msrs->counters;
58 struct op_msr *controls = msrs->controls; 76 struct op_msr *controls = msrs->controls;
59 unsigned int i; 77 unsigned int i;
60 78
61 for (i = 0; i < nr_ctrs; ++i) { 79 for (i = 0; i < model->num_counters; ++i) {
62 if (counters[i].addr) { 80 if (counters[i].addr)
63 rdmsr(counters[i].addr, 81 rdmsrl(counters[i].addr, counters[i].saved);
64 counters[i].saved.low,
65 counters[i].saved.high);
66 }
67 } 82 }
68 83
69 for (i = 0; i < nr_ctrls; ++i) { 84 for (i = 0; i < model->num_controls; ++i) {
70 if (controls[i].addr) { 85 if (controls[i].addr)
71 rdmsr(controls[i].addr, 86 rdmsrl(controls[i].addr, controls[i].saved);
72 controls[i].saved.low,
73 controls[i].saved.high);
74 }
75 } 87 }
76} 88}
77 89
@@ -126,7 +138,7 @@ static void nmi_cpu_setup(void *dummy)
126 int cpu = smp_processor_id(); 138 int cpu = smp_processor_id();
127 struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); 139 struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
128 spin_lock(&oprofilefs_lock); 140 spin_lock(&oprofilefs_lock);
129 model->setup_ctrs(msrs); 141 model->setup_ctrs(model, msrs);
130 spin_unlock(&oprofilefs_lock); 142 spin_unlock(&oprofilefs_lock);
131 per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC); 143 per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
132 apic_write(APIC_LVTPC, APIC_DM_NMI); 144 apic_write(APIC_LVTPC, APIC_DM_NMI);
@@ -178,26 +190,18 @@ static int nmi_setup(void)
178 190
179static void nmi_restore_registers(struct op_msrs *msrs) 191static void nmi_restore_registers(struct op_msrs *msrs)
180{ 192{
181 unsigned int const nr_ctrs = model->num_counters;
182 unsigned int const nr_ctrls = model->num_controls;
183 struct op_msr *counters = msrs->counters; 193 struct op_msr *counters = msrs->counters;
184 struct op_msr *controls = msrs->controls; 194 struct op_msr *controls = msrs->controls;
185 unsigned int i; 195 unsigned int i;
186 196
187 for (i = 0; i < nr_ctrls; ++i) { 197 for (i = 0; i < model->num_controls; ++i) {
188 if (controls[i].addr) { 198 if (controls[i].addr)
189 wrmsr(controls[i].addr, 199 wrmsrl(controls[i].addr, controls[i].saved);
190 controls[i].saved.low,
191 controls[i].saved.high);
192 }
193 } 200 }
194 201
195 for (i = 0; i < nr_ctrs; ++i) { 202 for (i = 0; i < model->num_counters; ++i) {
196 if (counters[i].addr) { 203 if (counters[i].addr)
197 wrmsr(counters[i].addr, 204 wrmsrl(counters[i].addr, counters[i].saved);
198 counters[i].saved.low,
199 counters[i].saved.high);
200 }
201 } 205 }
202} 206}
203 207
@@ -402,6 +406,7 @@ module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0);
402static int __init ppro_init(char **cpu_type) 406static int __init ppro_init(char **cpu_type)
403{ 407{
404 __u8 cpu_model = boot_cpu_data.x86_model; 408 __u8 cpu_model = boot_cpu_data.x86_model;
409 struct op_x86_model_spec const *spec = &op_ppro_spec; /* default */
405 410
406 if (force_arch_perfmon && cpu_has_arch_perfmon) 411 if (force_arch_perfmon && cpu_has_arch_perfmon)
407 return 0; 412 return 0;
@@ -428,7 +433,7 @@ static int __init ppro_init(char **cpu_type)
428 *cpu_type = "i386/core_2"; 433 *cpu_type = "i386/core_2";
429 break; 434 break;
430 case 26: 435 case 26:
431 arch_perfmon_setup_counters(); 436 spec = &op_arch_perfmon_spec;
432 *cpu_type = "i386/core_i7"; 437 *cpu_type = "i386/core_i7";
433 break; 438 break;
434 case 28: 439 case 28:
@@ -439,17 +444,7 @@ static int __init ppro_init(char **cpu_type)
439 return 0; 444 return 0;
440 } 445 }
441 446
442 model = &op_ppro_spec; 447 model = spec;
443 return 1;
444}
445
446static int __init arch_perfmon_init(char **cpu_type)
447{
448 if (!cpu_has_arch_perfmon)
449 return 0;
450 *cpu_type = "i386/arch_perfmon";
451 model = &op_arch_perfmon_spec;
452 arch_perfmon_setup_counters();
453 return 1; 448 return 1;
454} 449}
455 450
@@ -471,27 +466,26 @@ int __init op_nmi_init(struct oprofile_operations *ops)
471 /* Needs to be at least an Athlon (or hammer in 32bit mode) */ 466 /* Needs to be at least an Athlon (or hammer in 32bit mode) */
472 467
473 switch (family) { 468 switch (family) {
474 default:
475 return -ENODEV;
476 case 6: 469 case 6:
477 model = &op_amd_spec;
478 cpu_type = "i386/athlon"; 470 cpu_type = "i386/athlon";
479 break; 471 break;
480 case 0xf: 472 case 0xf:
481 model = &op_amd_spec; 473 /*
482 /* Actually it could be i386/hammer too, but give 474 * Actually it could be i386/hammer too, but
483 user space an consistent name. */ 475 * give user space an consistent name.
476 */
484 cpu_type = "x86-64/hammer"; 477 cpu_type = "x86-64/hammer";
485 break; 478 break;
486 case 0x10: 479 case 0x10:
487 model = &op_amd_spec;
488 cpu_type = "x86-64/family10"; 480 cpu_type = "x86-64/family10";
489 break; 481 break;
490 case 0x11: 482 case 0x11:
491 model = &op_amd_spec;
492 cpu_type = "x86-64/family11h"; 483 cpu_type = "x86-64/family11h";
493 break; 484 break;
485 default:
486 return -ENODEV;
494 } 487 }
488 model = &op_amd_spec;
495 break; 489 break;
496 490
497 case X86_VENDOR_INTEL: 491 case X86_VENDOR_INTEL:
@@ -510,8 +504,15 @@ int __init op_nmi_init(struct oprofile_operations *ops)
510 break; 504 break;
511 } 505 }
512 506
513 if (!cpu_type && !arch_perfmon_init(&cpu_type)) 507 if (cpu_type)
508 break;
509
510 if (!cpu_has_arch_perfmon)
514 return -ENODEV; 511 return -ENODEV;
512
513 /* use arch perfmon as fallback */
514 cpu_type = "i386/arch_perfmon";
515 model = &op_arch_perfmon_spec;
515 break; 516 break;
516 517
517 default: 518 default:
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 8fdf06e4edf9..e95268eb9220 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -25,43 +25,28 @@
25 25
26#define NUM_COUNTERS 4 26#define NUM_COUNTERS 4
27#define NUM_CONTROLS 4 27#define NUM_CONTROLS 4
28#define OP_EVENT_MASK 0x0FFF
29#define OP_CTR_OVERFLOW (1ULL<<31)
28 30
29#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) 31#define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21))
30#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0)
31#define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0)
32#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
33
34#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
35#define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
36#define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
37#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
38#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
39#define CTRL_CLEAR_LO(x) (x &= (1<<21))
40#define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0)
41#define CTRL_SET_ENABLE(val) (val |= 1<<20)
42#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
43#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
44#define CTRL_SET_UM(val, m) (val |= (m << 8))
45#define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff))
46#define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf))
47#define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9))
48#define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8))
49 32
50static unsigned long reset_value[NUM_COUNTERS]; 33static unsigned long reset_value[NUM_COUNTERS];
51 34
52#ifdef CONFIG_OPROFILE_IBS 35#ifdef CONFIG_OPROFILE_IBS
53 36
54/* IbsFetchCtl bits/masks */ 37/* IbsFetchCtl bits/masks */
55#define IBS_FETCH_HIGH_VALID_BIT (1UL << 17) /* bit 49 */ 38#define IBS_FETCH_RAND_EN (1ULL<<57)
56#define IBS_FETCH_HIGH_ENABLE (1UL << 16) /* bit 48 */ 39#define IBS_FETCH_VAL (1ULL<<49)
57#define IBS_FETCH_LOW_MAX_CNT_MASK 0x0000FFFFUL /* MaxCnt mask */ 40#define IBS_FETCH_ENABLE (1ULL<<48)
41#define IBS_FETCH_CNT_MASK 0xFFFF0000ULL
58 42
59/*IbsOpCtl bits */ 43/*IbsOpCtl bits */
60#define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */ 44#define IBS_OP_CNT_CTL (1ULL<<19)
61#define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */ 45#define IBS_OP_VAL (1ULL<<18)
46#define IBS_OP_ENABLE (1ULL<<17)
62 47
63#define IBS_FETCH_SIZE 6 48#define IBS_FETCH_SIZE 6
64#define IBS_OP_SIZE 12 49#define IBS_OP_SIZE 12
65 50
66static int has_ibs; /* AMD Family10h and later */ 51static int has_ibs; /* AMD Family10h and later */
67 52
@@ -99,49 +84,38 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
99 } 84 }
100} 85}
101 86
102 87static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
103static void op_amd_setup_ctrs(struct op_msrs const * const msrs) 88 struct op_msrs const * const msrs)
104{ 89{
105 unsigned int low, high; 90 u64 val;
106 int i; 91 int i;
107 92
108 /* clear all counters */ 93 /* clear all counters */
109 for (i = 0 ; i < NUM_CONTROLS; ++i) { 94 for (i = 0 ; i < NUM_CONTROLS; ++i) {
110 if (unlikely(!CTRL_IS_RESERVED(msrs, i))) 95 if (unlikely(!msrs->controls[i].addr))
111 continue; 96 continue;
112 CTRL_READ(low, high, msrs, i); 97 rdmsrl(msrs->controls[i].addr, val);
113 CTRL_CLEAR_LO(low); 98 val &= model->reserved;
114 CTRL_CLEAR_HI(high); 99 wrmsrl(msrs->controls[i].addr, val);
115 CTRL_WRITE(low, high, msrs, i);
116 } 100 }
117 101
118 /* avoid a false detection of ctr overflows in NMI handler */ 102 /* avoid a false detection of ctr overflows in NMI handler */
119 for (i = 0; i < NUM_COUNTERS; ++i) { 103 for (i = 0; i < NUM_COUNTERS; ++i) {
120 if (unlikely(!CTR_IS_RESERVED(msrs, i))) 104 if (unlikely(!msrs->counters[i].addr))
121 continue; 105 continue;
122 CTR_WRITE(1, msrs, i); 106 wrmsrl(msrs->counters[i].addr, -1LL);
123 } 107 }
124 108
125 /* enable active counters */ 109 /* enable active counters */
126 for (i = 0; i < NUM_COUNTERS; ++i) { 110 for (i = 0; i < NUM_COUNTERS; ++i) {
127 if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { 111 if (counter_config[i].enabled && msrs->counters[i].addr) {
128 reset_value[i] = counter_config[i].count; 112 reset_value[i] = counter_config[i].count;
129 113 wrmsrl(msrs->counters[i].addr,
130 CTR_WRITE(counter_config[i].count, msrs, i); 114 -(s64)counter_config[i].count);
131 115 rdmsrl(msrs->controls[i].addr, val);
132 CTRL_READ(low, high, msrs, i); 116 val &= model->reserved;
133 CTRL_CLEAR_LO(low); 117 val |= op_x86_get_ctrl(model, &counter_config[i]);
134 CTRL_CLEAR_HI(high); 118 wrmsrl(msrs->controls[i].addr, val);
135 CTRL_SET_ENABLE(low);
136 CTRL_SET_USR(low, counter_config[i].user);
137 CTRL_SET_KERN(low, counter_config[i].kernel);
138 CTRL_SET_UM(low, counter_config[i].unit_mask);
139 CTRL_SET_EVENT_LOW(low, counter_config[i].event);
140 CTRL_SET_EVENT_HIGH(high, counter_config[i].event);
141 CTRL_SET_HOST_ONLY(high, 0);
142 CTRL_SET_GUEST_ONLY(high, 0);
143
144 CTRL_WRITE(low, high, msrs, i);
145 } else { 119 } else {
146 reset_value[i] = 0; 120 reset_value[i] = 0;
147 } 121 }
@@ -154,93 +128,119 @@ static inline int
154op_amd_handle_ibs(struct pt_regs * const regs, 128op_amd_handle_ibs(struct pt_regs * const regs,
155 struct op_msrs const * const msrs) 129 struct op_msrs const * const msrs)
156{ 130{
157 u32 low, high; 131 u64 val, ctl;
158 u64 msr;
159 struct op_entry entry; 132 struct op_entry entry;
160 133
161 if (!has_ibs) 134 if (!has_ibs)
162 return 1; 135 return 0;
163 136
164 if (ibs_config.fetch_enabled) { 137 if (ibs_config.fetch_enabled) {
165 rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); 138 rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
166 if (high & IBS_FETCH_HIGH_VALID_BIT) { 139 if (ctl & IBS_FETCH_VAL) {
167 rdmsrl(MSR_AMD64_IBSFETCHLINAD, msr); 140 rdmsrl(MSR_AMD64_IBSFETCHLINAD, val);
168 oprofile_write_reserve(&entry, regs, msr, 141 oprofile_write_reserve(&entry, regs, val,
169 IBS_FETCH_CODE, IBS_FETCH_SIZE); 142 IBS_FETCH_CODE, IBS_FETCH_SIZE);
170 oprofile_add_data(&entry, (u32)msr); 143 oprofile_add_data64(&entry, val);
171 oprofile_add_data(&entry, (u32)(msr >> 32)); 144 oprofile_add_data64(&entry, ctl);
172 oprofile_add_data(&entry, low); 145 rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val);
173 oprofile_add_data(&entry, high); 146 oprofile_add_data64(&entry, val);
174 rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, msr);
175 oprofile_add_data(&entry, (u32)msr);
176 oprofile_add_data(&entry, (u32)(msr >> 32));
177 oprofile_write_commit(&entry); 147 oprofile_write_commit(&entry);
178 148
179 /* reenable the IRQ */ 149 /* reenable the IRQ */
180 high &= ~IBS_FETCH_HIGH_VALID_BIT; 150 ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT_MASK);
181 high |= IBS_FETCH_HIGH_ENABLE; 151 ctl |= IBS_FETCH_ENABLE;
182 low &= IBS_FETCH_LOW_MAX_CNT_MASK; 152 wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
183 wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
184 } 153 }
185 } 154 }
186 155
187 if (ibs_config.op_enabled) { 156 if (ibs_config.op_enabled) {
188 rdmsr(MSR_AMD64_IBSOPCTL, low, high); 157 rdmsrl(MSR_AMD64_IBSOPCTL, ctl);
189 if (low & IBS_OP_LOW_VALID_BIT) { 158 if (ctl & IBS_OP_VAL) {
190 rdmsrl(MSR_AMD64_IBSOPRIP, msr); 159 rdmsrl(MSR_AMD64_IBSOPRIP, val);
191 oprofile_write_reserve(&entry, regs, msr, 160 oprofile_write_reserve(&entry, regs, val,
192 IBS_OP_CODE, IBS_OP_SIZE); 161 IBS_OP_CODE, IBS_OP_SIZE);
193 oprofile_add_data(&entry, (u32)msr); 162 oprofile_add_data64(&entry, val);
194 oprofile_add_data(&entry, (u32)(msr >> 32)); 163 rdmsrl(MSR_AMD64_IBSOPDATA, val);
195 rdmsrl(MSR_AMD64_IBSOPDATA, msr); 164 oprofile_add_data64(&entry, val);
196 oprofile_add_data(&entry, (u32)msr); 165 rdmsrl(MSR_AMD64_IBSOPDATA2, val);
197 oprofile_add_data(&entry, (u32)(msr >> 32)); 166 oprofile_add_data64(&entry, val);
198 rdmsrl(MSR_AMD64_IBSOPDATA2, msr); 167 rdmsrl(MSR_AMD64_IBSOPDATA3, val);
199 oprofile_add_data(&entry, (u32)msr); 168 oprofile_add_data64(&entry, val);
200 oprofile_add_data(&entry, (u32)(msr >> 32)); 169 rdmsrl(MSR_AMD64_IBSDCLINAD, val);
201 rdmsrl(MSR_AMD64_IBSOPDATA3, msr); 170 oprofile_add_data64(&entry, val);
202 oprofile_add_data(&entry, (u32)msr); 171 rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
203 oprofile_add_data(&entry, (u32)(msr >> 32)); 172 oprofile_add_data64(&entry, val);
204 rdmsrl(MSR_AMD64_IBSDCLINAD, msr);
205 oprofile_add_data(&entry, (u32)msr);
206 oprofile_add_data(&entry, (u32)(msr >> 32));
207 rdmsrl(MSR_AMD64_IBSDCPHYSAD, msr);
208 oprofile_add_data(&entry, (u32)msr);
209 oprofile_add_data(&entry, (u32)(msr >> 32));
210 oprofile_write_commit(&entry); 173 oprofile_write_commit(&entry);
211 174
212 /* reenable the IRQ */ 175 /* reenable the IRQ */
213 high = 0; 176 ctl &= ~IBS_OP_VAL & 0xFFFFFFFF;
214 low &= ~IBS_OP_LOW_VALID_BIT; 177 ctl |= IBS_OP_ENABLE;
215 low |= IBS_OP_LOW_ENABLE; 178 wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
216 wrmsr(MSR_AMD64_IBSOPCTL, low, high);
217 } 179 }
218 } 180 }
219 181
220 return 1; 182 return 1;
221} 183}
222 184
185static inline void op_amd_start_ibs(void)
186{
187 u64 val;
188 if (has_ibs && ibs_config.fetch_enabled) {
189 val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
190 val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
191 val |= IBS_FETCH_ENABLE;
192 wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
193 }
194
195 if (has_ibs && ibs_config.op_enabled) {
196 val = (ibs_config.max_cnt_op >> 4) & 0xFFFF;
197 val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0;
198 val |= IBS_OP_ENABLE;
199 wrmsrl(MSR_AMD64_IBSOPCTL, val);
200 }
201}
202
203static void op_amd_stop_ibs(void)
204{
205 if (has_ibs && ibs_config.fetch_enabled)
206 /* clear max count and enable */
207 wrmsrl(MSR_AMD64_IBSFETCHCTL, 0);
208
209 if (has_ibs && ibs_config.op_enabled)
210 /* clear max count and enable */
211 wrmsrl(MSR_AMD64_IBSOPCTL, 0);
212}
213
214#else
215
216static inline int op_amd_handle_ibs(struct pt_regs * const regs,
217 struct op_msrs const * const msrs)
218{
219 return 0;
220}
221static inline void op_amd_start_ibs(void) { }
222static inline void op_amd_stop_ibs(void) { }
223
223#endif 224#endif
224 225
225static int op_amd_check_ctrs(struct pt_regs * const regs, 226static int op_amd_check_ctrs(struct pt_regs * const regs,
226 struct op_msrs const * const msrs) 227 struct op_msrs const * const msrs)
227{ 228{
228 unsigned int low, high; 229 u64 val;
229 int i; 230 int i;
230 231
231 for (i = 0 ; i < NUM_COUNTERS; ++i) { 232 for (i = 0 ; i < NUM_COUNTERS; ++i) {
232 if (!reset_value[i]) 233 if (!reset_value[i])
233 continue; 234 continue;
234 CTR_READ(low, high, msrs, i); 235 rdmsrl(msrs->counters[i].addr, val);
235 if (CTR_OVERFLOWED(low)) { 236 /* bit is clear if overflowed: */
236 oprofile_add_sample(regs, i); 237 if (val & OP_CTR_OVERFLOW)
237 CTR_WRITE(reset_value[i], msrs, i); 238 continue;
238 } 239 oprofile_add_sample(regs, i);
240 wrmsrl(msrs->counters[i].addr, -(s64)reset_value[i]);
239 } 241 }
240 242
241#ifdef CONFIG_OPROFILE_IBS
242 op_amd_handle_ibs(regs, msrs); 243 op_amd_handle_ibs(regs, msrs);
243#endif
244 244
245 /* See op_model_ppro.c */ 245 /* See op_model_ppro.c */
246 return 1; 246 return 1;
@@ -248,38 +248,22 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
248 248
249static void op_amd_start(struct op_msrs const * const msrs) 249static void op_amd_start(struct op_msrs const * const msrs)
250{ 250{
251 unsigned int low, high; 251 u64 val;
252 int i; 252 int i;
253 for (i = 0 ; i < NUM_COUNTERS ; ++i) { 253 for (i = 0 ; i < NUM_COUNTERS ; ++i) {
254 if (reset_value[i]) { 254 if (reset_value[i]) {
255 CTRL_READ(low, high, msrs, i); 255 rdmsrl(msrs->controls[i].addr, val);
256 CTRL_SET_ACTIVE(low); 256 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
257 CTRL_WRITE(low, high, msrs, i); 257 wrmsrl(msrs->controls[i].addr, val);
258 } 258 }
259 } 259 }
260 260
261#ifdef CONFIG_OPROFILE_IBS 261 op_amd_start_ibs();
262 if (has_ibs && ibs_config.fetch_enabled) {
263 low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
264 high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */
265 + IBS_FETCH_HIGH_ENABLE;
266 wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
267 }
268
269 if (has_ibs && ibs_config.op_enabled) {
270 low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF)
271 + ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */
272 + IBS_OP_LOW_ENABLE;
273 high = 0;
274 wrmsr(MSR_AMD64_IBSOPCTL, low, high);
275 }
276#endif
277} 262}
278 263
279
280static void op_amd_stop(struct op_msrs const * const msrs) 264static void op_amd_stop(struct op_msrs const * const msrs)
281{ 265{
282 unsigned int low, high; 266 u64 val;
283 int i; 267 int i;
284 268
285 /* 269 /*
@@ -289,26 +273,12 @@ static void op_amd_stop(struct op_msrs const * const msrs)
289 for (i = 0 ; i < NUM_COUNTERS ; ++i) { 273 for (i = 0 ; i < NUM_COUNTERS ; ++i) {
290 if (!reset_value[i]) 274 if (!reset_value[i])
291 continue; 275 continue;
292 CTRL_READ(low, high, msrs, i); 276 rdmsrl(msrs->controls[i].addr, val);
293 CTRL_SET_INACTIVE(low); 277 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
294 CTRL_WRITE(low, high, msrs, i); 278 wrmsrl(msrs->controls[i].addr, val);
295 }
296
297#ifdef CONFIG_OPROFILE_IBS
298 if (has_ibs && ibs_config.fetch_enabled) {
299 /* clear max count and enable */
300 low = 0;
301 high = 0;
302 wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
303 } 279 }
304 280
305 if (has_ibs && ibs_config.op_enabled) { 281 op_amd_stop_ibs();
306 /* clear max count and enable */
307 low = 0;
308 high = 0;
309 wrmsr(MSR_AMD64_IBSOPCTL, low, high);
310 }
311#endif
312} 282}
313 283
314static void op_amd_shutdown(struct op_msrs const * const msrs) 284static void op_amd_shutdown(struct op_msrs const * const msrs)
@@ -316,11 +286,11 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
316 int i; 286 int i;
317 287
318 for (i = 0 ; i < NUM_COUNTERS ; ++i) { 288 for (i = 0 ; i < NUM_COUNTERS ; ++i) {
319 if (CTR_IS_RESERVED(msrs, i)) 289 if (msrs->counters[i].addr)
320 release_perfctr_nmi(MSR_K7_PERFCTR0 + i); 290 release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
321 } 291 }
322 for (i = 0 ; i < NUM_CONTROLS ; ++i) { 292 for (i = 0 ; i < NUM_CONTROLS ; ++i) {
323 if (CTRL_IS_RESERVED(msrs, i)) 293 if (msrs->controls[i].addr)
324 release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); 294 release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
325 } 295 }
326} 296}
@@ -491,14 +461,16 @@ static void op_amd_exit(void) {}
491#endif /* CONFIG_OPROFILE_IBS */ 461#endif /* CONFIG_OPROFILE_IBS */
492 462
493struct op_x86_model_spec const op_amd_spec = { 463struct op_x86_model_spec const op_amd_spec = {
494 .init = op_amd_init,
495 .exit = op_amd_exit,
496 .num_counters = NUM_COUNTERS, 464 .num_counters = NUM_COUNTERS,
497 .num_controls = NUM_CONTROLS, 465 .num_controls = NUM_CONTROLS,
466 .reserved = MSR_AMD_EVENTSEL_RESERVED,
467 .event_mask = OP_EVENT_MASK,
468 .init = op_amd_init,
469 .exit = op_amd_exit,
498 .fill_in_addresses = &op_amd_fill_in_addresses, 470 .fill_in_addresses = &op_amd_fill_in_addresses,
499 .setup_ctrs = &op_amd_setup_ctrs, 471 .setup_ctrs = &op_amd_setup_ctrs,
500 .check_ctrs = &op_amd_check_ctrs, 472 .check_ctrs = &op_amd_check_ctrs,
501 .start = &op_amd_start, 473 .start = &op_amd_start,
502 .stop = &op_amd_stop, 474 .stop = &op_amd_stop,
503 .shutdown = &op_amd_shutdown 475 .shutdown = &op_amd_shutdown,
504}; 476};
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 819b131fd752..f01e53b118fa 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -32,6 +32,8 @@
32#define NUM_CCCRS_HT2 9 32#define NUM_CCCRS_HT2 9
33#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) 33#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
34 34
35#define OP_CTR_OVERFLOW (1ULL<<31)
36
35static unsigned int num_counters = NUM_COUNTERS_NON_HT; 37static unsigned int num_counters = NUM_COUNTERS_NON_HT;
36static unsigned int num_controls = NUM_CONTROLS_NON_HT; 38static unsigned int num_controls = NUM_CONTROLS_NON_HT;
37 39
@@ -350,8 +352,6 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
350#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1)) 352#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
351#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25)) 353#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
352#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9)) 354#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
353#define ESCR_READ(escr, high, ev, i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
354#define ESCR_WRITE(escr, high, ev, i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
355 355
356#define CCCR_RESERVED_BITS 0x38030FFF 356#define CCCR_RESERVED_BITS 0x38030FFF
357#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS) 357#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
@@ -361,17 +361,9 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
361#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27)) 361#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
362#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12)) 362#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
363#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12)) 363#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
364#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
365#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
366#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) 364#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
367#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) 365#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
368 366
369#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
370#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
371#define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0)
372#define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0)
373#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
374
375 367
376/* this assigns a "stagger" to the current CPU, which is used throughout 368/* this assigns a "stagger" to the current CPU, which is used throughout
377 the code in this module as an extra array offset, to select the "even" 369 the code in this module as an extra array offset, to select the "even"
@@ -515,7 +507,7 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
515 if (ev->bindings[i].virt_counter & counter_bit) { 507 if (ev->bindings[i].virt_counter & counter_bit) {
516 508
517 /* modify ESCR */ 509 /* modify ESCR */
518 ESCR_READ(escr, high, ev, i); 510 rdmsr(ev->bindings[i].escr_address, escr, high);
519 ESCR_CLEAR(escr); 511 ESCR_CLEAR(escr);
520 if (stag == 0) { 512 if (stag == 0) {
521 ESCR_SET_USR_0(escr, counter_config[ctr].user); 513 ESCR_SET_USR_0(escr, counter_config[ctr].user);
@@ -526,10 +518,11 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
526 } 518 }
527 ESCR_SET_EVENT_SELECT(escr, ev->event_select); 519 ESCR_SET_EVENT_SELECT(escr, ev->event_select);
528 ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); 520 ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
529 ESCR_WRITE(escr, high, ev, i); 521 wrmsr(ev->bindings[i].escr_address, escr, high);
530 522
531 /* modify CCCR */ 523 /* modify CCCR */
532 CCCR_READ(cccr, high, VIRT_CTR(stag, ctr)); 524 rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
525 cccr, high);
533 CCCR_CLEAR(cccr); 526 CCCR_CLEAR(cccr);
534 CCCR_SET_REQUIRED_BITS(cccr); 527 CCCR_SET_REQUIRED_BITS(cccr);
535 CCCR_SET_ESCR_SELECT(cccr, ev->escr_select); 528 CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
@@ -537,7 +530,8 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
537 CCCR_SET_PMI_OVF_0(cccr); 530 CCCR_SET_PMI_OVF_0(cccr);
538 else 531 else
539 CCCR_SET_PMI_OVF_1(cccr); 532 CCCR_SET_PMI_OVF_1(cccr);
540 CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr)); 533 wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
534 cccr, high);
541 return; 535 return;
542 } 536 }
543 } 537 }
@@ -548,7 +542,8 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
548} 542}
549 543
550 544
551static void p4_setup_ctrs(struct op_msrs const * const msrs) 545static void p4_setup_ctrs(struct op_x86_model_spec const *model,
546 struct op_msrs const * const msrs)
552{ 547{
553 unsigned int i; 548 unsigned int i;
554 unsigned int low, high; 549 unsigned int low, high;
@@ -564,7 +559,7 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs)
564 559
565 /* clear the cccrs we will use */ 560 /* clear the cccrs we will use */
566 for (i = 0 ; i < num_counters ; i++) { 561 for (i = 0 ; i < num_counters ; i++) {
567 if (unlikely(!CTRL_IS_RESERVED(msrs, i))) 562 if (unlikely(!msrs->controls[i].addr))
568 continue; 563 continue;
569 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); 564 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
570 CCCR_CLEAR(low); 565 CCCR_CLEAR(low);
@@ -574,17 +569,18 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs)
574 569
575 /* clear all escrs (including those outside our concern) */ 570 /* clear all escrs (including those outside our concern) */
576 for (i = num_counters; i < num_controls; i++) { 571 for (i = num_counters; i < num_controls; i++) {
577 if (unlikely(!CTRL_IS_RESERVED(msrs, i))) 572 if (unlikely(!msrs->controls[i].addr))
578 continue; 573 continue;
579 wrmsr(msrs->controls[i].addr, 0, 0); 574 wrmsr(msrs->controls[i].addr, 0, 0);
580 } 575 }
581 576
582 /* setup all counters */ 577 /* setup all counters */
583 for (i = 0 ; i < num_counters ; ++i) { 578 for (i = 0 ; i < num_counters ; ++i) {
584 if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) { 579 if (counter_config[i].enabled && msrs->controls[i].addr) {
585 reset_value[i] = counter_config[i].count; 580 reset_value[i] = counter_config[i].count;
586 pmc_setup_one_p4_counter(i); 581 pmc_setup_one_p4_counter(i);
587 CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); 582 wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address,
583 -(s64)counter_config[i].count);
588 } else { 584 } else {
589 reset_value[i] = 0; 585 reset_value[i] = 0;
590 } 586 }
@@ -624,14 +620,16 @@ static int p4_check_ctrs(struct pt_regs * const regs,
624 620
625 real = VIRT_CTR(stag, i); 621 real = VIRT_CTR(stag, i);
626 622
627 CCCR_READ(low, high, real); 623 rdmsr(p4_counters[real].cccr_address, low, high);
628 CTR_READ(ctr, high, real); 624 rdmsr(p4_counters[real].counter_address, ctr, high);
629 if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) { 625 if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) {
630 oprofile_add_sample(regs, i); 626 oprofile_add_sample(regs, i);
631 CTR_WRITE(reset_value[i], real); 627 wrmsrl(p4_counters[real].counter_address,
628 -(s64)reset_value[i]);
632 CCCR_CLEAR_OVF(low); 629 CCCR_CLEAR_OVF(low);
633 CCCR_WRITE(low, high, real); 630 wrmsr(p4_counters[real].cccr_address, low, high);
634 CTR_WRITE(reset_value[i], real); 631 wrmsrl(p4_counters[real].counter_address,
632 -(s64)reset_value[i]);
635 } 633 }
636 } 634 }
637 635
@@ -653,9 +651,9 @@ static void p4_start(struct op_msrs const * const msrs)
653 for (i = 0; i < num_counters; ++i) { 651 for (i = 0; i < num_counters; ++i) {
654 if (!reset_value[i]) 652 if (!reset_value[i])
655 continue; 653 continue;
656 CCCR_READ(low, high, VIRT_CTR(stag, i)); 654 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
657 CCCR_SET_ENABLE(low); 655 CCCR_SET_ENABLE(low);
658 CCCR_WRITE(low, high, VIRT_CTR(stag, i)); 656 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
659 } 657 }
660} 658}
661 659
@@ -670,9 +668,9 @@ static void p4_stop(struct op_msrs const * const msrs)
670 for (i = 0; i < num_counters; ++i) { 668 for (i = 0; i < num_counters; ++i) {
671 if (!reset_value[i]) 669 if (!reset_value[i])
672 continue; 670 continue;
673 CCCR_READ(low, high, VIRT_CTR(stag, i)); 671 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
674 CCCR_SET_DISABLE(low); 672 CCCR_SET_DISABLE(low);
675 CCCR_WRITE(low, high, VIRT_CTR(stag, i)); 673 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
676 } 674 }
677} 675}
678 676
@@ -681,7 +679,7 @@ static void p4_shutdown(struct op_msrs const * const msrs)
681 int i; 679 int i;
682 680
683 for (i = 0 ; i < num_counters ; ++i) { 681 for (i = 0 ; i < num_counters ; ++i) {
684 if (CTR_IS_RESERVED(msrs, i)) 682 if (msrs->counters[i].addr)
685 release_perfctr_nmi(msrs->counters[i].addr); 683 release_perfctr_nmi(msrs->counters[i].addr);
686 } 684 }
687 /* 685 /*
@@ -690,7 +688,7 @@ static void p4_shutdown(struct op_msrs const * const msrs)
690 * This saves a few bits. 688 * This saves a few bits.
691 */ 689 */
692 for (i = num_counters ; i < num_controls ; ++i) { 690 for (i = num_counters ; i < num_controls ; ++i) {
693 if (CTRL_IS_RESERVED(msrs, i)) 691 if (msrs->controls[i].addr)
694 release_evntsel_nmi(msrs->controls[i].addr); 692 release_evntsel_nmi(msrs->controls[i].addr);
695 } 693 }
696} 694}
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 4da7230b3d17..cd72d5c73b49 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -10,6 +10,7 @@
10 * @author Philippe Elie 10 * @author Philippe Elie
11 * @author Graydon Hoare 11 * @author Graydon Hoare
12 * @author Andi Kleen 12 * @author Andi Kleen
13 * @author Robert Richter <robert.richter@amd.com>
13 */ 14 */
14 15
15#include <linux/oprofile.h> 16#include <linux/oprofile.h>
@@ -18,7 +19,6 @@
18#include <asm/msr.h> 19#include <asm/msr.h>
19#include <asm/apic.h> 20#include <asm/apic.h>
20#include <asm/nmi.h> 21#include <asm/nmi.h>
21#include <asm/perf_counter.h>
22 22
23#include "op_x86_model.h" 23#include "op_x86_model.h"
24#include "op_counter.h" 24#include "op_counter.h"
@@ -26,20 +26,7 @@
26static int num_counters = 2; 26static int num_counters = 2;
27static int counter_width = 32; 27static int counter_width = 32;
28 28
29#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) 29#define MSR_PPRO_EVENTSEL_RESERVED ((0xFFFFFFFFULL<<32)|(1ULL<<21))
30#define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1))))
31
32#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
33#define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
34#define CTRL_WRITE(l, h, msrs, c) do {wrmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
35#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
36#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
37#define CTRL_CLEAR(x) (x &= (1<<21))
38#define CTRL_SET_ENABLE(val) (val |= 1<<20)
39#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
40#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
41#define CTRL_SET_UM(val, m) (val |= (m << 8))
42#define CTRL_SET_EVENT(val, e) (val |= e)
43 30
44static u64 *reset_value; 31static u64 *reset_value;
45 32
@@ -63,9 +50,10 @@ static void ppro_fill_in_addresses(struct op_msrs * const msrs)
63} 50}
64 51
65 52
66static void ppro_setup_ctrs(struct op_msrs const * const msrs) 53static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
54 struct op_msrs const * const msrs)
67{ 55{
68 unsigned int low, high; 56 u64 val;
69 int i; 57 int i;
70 58
71 if (!reset_value) { 59 if (!reset_value) {
@@ -94,35 +82,29 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
94 82
95 /* clear all counters */ 83 /* clear all counters */
96 for (i = 0 ; i < num_counters; ++i) { 84 for (i = 0 ; i < num_counters; ++i) {
97 if (unlikely(!CTRL_IS_RESERVED(msrs, i))) 85 if (unlikely(!msrs->controls[i].addr))
98 continue; 86 continue;
99 CTRL_READ(low, high, msrs, i); 87 rdmsrl(msrs->controls[i].addr, val);
100 CTRL_CLEAR(low); 88 val &= model->reserved;
101 CTRL_WRITE(low, high, msrs, i); 89 wrmsrl(msrs->controls[i].addr, val);
102 } 90 }
103 91
104 /* avoid a false detection of ctr overflows in NMI handler */ 92 /* avoid a false detection of ctr overflows in NMI handler */
105 for (i = 0; i < num_counters; ++i) { 93 for (i = 0; i < num_counters; ++i) {
106 if (unlikely(!CTR_IS_RESERVED(msrs, i))) 94 if (unlikely(!msrs->counters[i].addr))
107 continue; 95 continue;
108 wrmsrl(msrs->counters[i].addr, -1LL); 96 wrmsrl(msrs->counters[i].addr, -1LL);
109 } 97 }
110 98
111 /* enable active counters */ 99 /* enable active counters */
112 for (i = 0; i < num_counters; ++i) { 100 for (i = 0; i < num_counters; ++i) {
113 if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { 101 if (counter_config[i].enabled && msrs->counters[i].addr) {
114 reset_value[i] = counter_config[i].count; 102 reset_value[i] = counter_config[i].count;
115
116 wrmsrl(msrs->counters[i].addr, -reset_value[i]); 103 wrmsrl(msrs->counters[i].addr, -reset_value[i]);
117 104 rdmsrl(msrs->controls[i].addr, val);
118 CTRL_READ(low, high, msrs, i); 105 val &= model->reserved;
119 CTRL_CLEAR(low); 106 val |= op_x86_get_ctrl(model, &counter_config[i]);
120 CTRL_SET_ENABLE(low); 107 wrmsrl(msrs->controls[i].addr, val);
121 CTRL_SET_USR(low, counter_config[i].user);
122 CTRL_SET_KERN(low, counter_config[i].kernel);
123 CTRL_SET_UM(low, counter_config[i].unit_mask);
124 CTRL_SET_EVENT(low, counter_config[i].event);
125 CTRL_WRITE(low, high, msrs, i);
126 } else { 108 } else {
127 reset_value[i] = 0; 109 reset_value[i] = 0;
128 } 110 }
@@ -147,10 +129,10 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
147 if (!reset_value[i]) 129 if (!reset_value[i])
148 continue; 130 continue;
149 rdmsrl(msrs->counters[i].addr, val); 131 rdmsrl(msrs->counters[i].addr, val);
150 if (CTR_OVERFLOWED(val)) { 132 if (val & (1ULL << (counter_width - 1)))
151 oprofile_add_sample(regs, i); 133 continue;
152 wrmsrl(msrs->counters[i].addr, -reset_value[i]); 134 oprofile_add_sample(regs, i);
153 } 135 wrmsrl(msrs->counters[i].addr, -reset_value[i]);
154 } 136 }
155 137
156out: 138out:
@@ -171,16 +153,16 @@ out:
171 153
172static void ppro_start(struct op_msrs const * const msrs) 154static void ppro_start(struct op_msrs const * const msrs)
173{ 155{
174 unsigned int low, high; 156 u64 val;
175 int i; 157 int i;
176 158
177 if (!reset_value) 159 if (!reset_value)
178 return; 160 return;
179 for (i = 0; i < num_counters; ++i) { 161 for (i = 0; i < num_counters; ++i) {
180 if (reset_value[i]) { 162 if (reset_value[i]) {
181 CTRL_READ(low, high, msrs, i); 163 rdmsrl(msrs->controls[i].addr, val);
182 CTRL_SET_ACTIVE(low); 164 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
183 CTRL_WRITE(low, high, msrs, i); 165 wrmsrl(msrs->controls[i].addr, val);
184 } 166 }
185 } 167 }
186} 168}
@@ -188,7 +170,7 @@ static void ppro_start(struct op_msrs const * const msrs)
188 170
189static void ppro_stop(struct op_msrs const * const msrs) 171static void ppro_stop(struct op_msrs const * const msrs)
190{ 172{
191 unsigned int low, high; 173 u64 val;
192 int i; 174 int i;
193 175
194 if (!reset_value) 176 if (!reset_value)
@@ -196,9 +178,9 @@ static void ppro_stop(struct op_msrs const * const msrs)
196 for (i = 0; i < num_counters; ++i) { 178 for (i = 0; i < num_counters; ++i) {
197 if (!reset_value[i]) 179 if (!reset_value[i])
198 continue; 180 continue;
199 CTRL_READ(low, high, msrs, i); 181 rdmsrl(msrs->controls[i].addr, val);
200 CTRL_SET_INACTIVE(low); 182 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
201 CTRL_WRITE(low, high, msrs, i); 183 wrmsrl(msrs->controls[i].addr, val);
202 } 184 }
203} 185}
204 186
@@ -207,11 +189,11 @@ static void ppro_shutdown(struct op_msrs const * const msrs)
207 int i; 189 int i;
208 190
209 for (i = 0 ; i < num_counters ; ++i) { 191 for (i = 0 ; i < num_counters ; ++i) {
210 if (CTR_IS_RESERVED(msrs, i)) 192 if (msrs->counters[i].addr)
211 release_perfctr_nmi(MSR_P6_PERFCTR0 + i); 193 release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
212 } 194 }
213 for (i = 0 ; i < num_counters ; ++i) { 195 for (i = 0 ; i < num_counters ; ++i) {
214 if (CTRL_IS_RESERVED(msrs, i)) 196 if (msrs->controls[i].addr)
215 release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); 197 release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
216 } 198 }
217 if (reset_value) { 199 if (reset_value) {
@@ -221,9 +203,10 @@ static void ppro_shutdown(struct op_msrs const * const msrs)
221} 203}
222 204
223 205
224struct op_x86_model_spec op_ppro_spec = { 206struct op_x86_model_spec const op_ppro_spec = {
225 .num_counters = 2, /* can be overriden */ 207 .num_counters = 2,
226 .num_controls = 2, /* dito */ 208 .num_controls = 2,
209 .reserved = MSR_PPRO_EVENTSEL_RESERVED,
227 .fill_in_addresses = &ppro_fill_in_addresses, 210 .fill_in_addresses = &ppro_fill_in_addresses,
228 .setup_ctrs = &ppro_setup_ctrs, 211 .setup_ctrs = &ppro_setup_ctrs,
229 .check_ctrs = &ppro_check_ctrs, 212 .check_ctrs = &ppro_check_ctrs,
@@ -241,7 +224,7 @@ struct op_x86_model_spec op_ppro_spec = {
241 * the specific CPU. 224 * the specific CPU.
242 */ 225 */
243 226
244void arch_perfmon_setup_counters(void) 227static void arch_perfmon_setup_counters(void)
245{ 228{
246 union cpuid10_eax eax; 229 union cpuid10_eax eax;
247 230
@@ -259,11 +242,17 @@ void arch_perfmon_setup_counters(void)
259 242
260 op_arch_perfmon_spec.num_counters = num_counters; 243 op_arch_perfmon_spec.num_counters = num_counters;
261 op_arch_perfmon_spec.num_controls = num_counters; 244 op_arch_perfmon_spec.num_controls = num_counters;
262 op_ppro_spec.num_counters = num_counters; 245}
263 op_ppro_spec.num_controls = num_counters; 246
247static int arch_perfmon_init(struct oprofile_operations *ignore)
248{
249 arch_perfmon_setup_counters();
250 return 0;
264} 251}
265 252
266struct op_x86_model_spec op_arch_perfmon_spec = { 253struct op_x86_model_spec op_arch_perfmon_spec = {
254 .reserved = MSR_PPRO_EVENTSEL_RESERVED,
255 .init = &arch_perfmon_init,
267 /* num_counters/num_controls filled in at runtime */ 256 /* num_counters/num_controls filled in at runtime */
268 .fill_in_addresses = &ppro_fill_in_addresses, 257 .fill_in_addresses = &ppro_fill_in_addresses,
269 /* user space does the cpuid check for available events */ 258 /* user space does the cpuid check for available events */
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 825e79064d64..505489873b9d 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -6,19 +6,18 @@
6 * @remark Read the file COPYING 6 * @remark Read the file COPYING
7 * 7 *
8 * @author Graydon Hoare 8 * @author Graydon Hoare
9 * @author Robert Richter <robert.richter@amd.com>
9 */ 10 */
10 11
11#ifndef OP_X86_MODEL_H 12#ifndef OP_X86_MODEL_H
12#define OP_X86_MODEL_H 13#define OP_X86_MODEL_H
13 14
14struct op_saved_msr { 15#include <asm/types.h>
15 unsigned int high; 16#include <asm/perf_counter.h>
16 unsigned int low;
17};
18 17
19struct op_msr { 18struct op_msr {
20 unsigned long addr; 19 unsigned long addr;
21 struct op_saved_msr saved; 20 u64 saved;
22}; 21};
23 22
24struct op_msrs { 23struct op_msrs {
@@ -28,29 +27,37 @@ struct op_msrs {
28 27
29struct pt_regs; 28struct pt_regs;
30 29
30struct oprofile_operations;
31
31/* The model vtable abstracts the differences between 32/* The model vtable abstracts the differences between
32 * various x86 CPU models' perfctr support. 33 * various x86 CPU models' perfctr support.
33 */ 34 */
34struct op_x86_model_spec { 35struct op_x86_model_spec {
35 int (*init)(struct oprofile_operations *ops); 36 unsigned int num_counters;
36 void (*exit)(void); 37 unsigned int num_controls;
37 unsigned int num_counters; 38 u64 reserved;
38 unsigned int num_controls; 39 u16 event_mask;
39 void (*fill_in_addresses)(struct op_msrs * const msrs); 40 int (*init)(struct oprofile_operations *ops);
40 void (*setup_ctrs)(struct op_msrs const * const msrs); 41 void (*exit)(void);
41 int (*check_ctrs)(struct pt_regs * const regs, 42 void (*fill_in_addresses)(struct op_msrs * const msrs);
42 struct op_msrs const * const msrs); 43 void (*setup_ctrs)(struct op_x86_model_spec const *model,
43 void (*start)(struct op_msrs const * const msrs); 44 struct op_msrs const * const msrs);
44 void (*stop)(struct op_msrs const * const msrs); 45 int (*check_ctrs)(struct pt_regs * const regs,
45 void (*shutdown)(struct op_msrs const * const msrs); 46 struct op_msrs const * const msrs);
47 void (*start)(struct op_msrs const * const msrs);
48 void (*stop)(struct op_msrs const * const msrs);
49 void (*shutdown)(struct op_msrs const * const msrs);
46}; 50};
47 51
48extern struct op_x86_model_spec op_ppro_spec; 52struct op_counter_config;
53
54extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
55 struct op_counter_config *counter_config);
56
57extern struct op_x86_model_spec const op_ppro_spec;
49extern struct op_x86_model_spec const op_p4_spec; 58extern struct op_x86_model_spec const op_p4_spec;
50extern struct op_x86_model_spec const op_p4_ht2_spec; 59extern struct op_x86_model_spec const op_p4_ht2_spec;
51extern struct op_x86_model_spec const op_amd_spec; 60extern struct op_x86_model_spec const op_amd_spec;
52extern struct op_x86_model_spec op_arch_perfmon_spec; 61extern struct op_x86_model_spec op_arch_perfmon_spec;
53 62
54extern void arch_perfmon_setup_counters(void);
55
56#endif /* OP_X86_MODEL_H */ 63#endif /* OP_X86_MODEL_H */
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 242257b19441..a7aae24f2889 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -21,7 +21,6 @@
21 21
22#include <linux/sched.h> 22#include <linux/sched.h>
23#include <linux/oprofile.h> 23#include <linux/oprofile.h>
24#include <linux/vmalloc.h>
25#include <linux/errno.h> 24#include <linux/errno.h>
26 25
27#include "event_buffer.h" 26#include "event_buffer.h"
@@ -407,6 +406,21 @@ int oprofile_add_data(struct op_entry *entry, unsigned long val)
407 return op_cpu_buffer_add_data(entry, val); 406 return op_cpu_buffer_add_data(entry, val);
408} 407}
409 408
409int oprofile_add_data64(struct op_entry *entry, u64 val)
410{
411 if (!entry->event)
412 return 0;
413 if (op_cpu_buffer_get_size(entry) < 2)
414 /*
415 * the function returns 0 to indicate a too small
416 * buffer, even if there is some space left
417 */
418 return 0;
419 if (!op_cpu_buffer_add_data(entry, (u32)val))
420 return 0;
421 return op_cpu_buffer_add_data(entry, (u32)(val >> 32));
422}
423
410int oprofile_write_commit(struct op_entry *entry) 424int oprofile_write_commit(struct op_entry *entry)
411{ 425{
412 if (!entry->event) 426 if (!entry->event)
diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h
index 1d9518bc4c58..d68d2ed94f15 100644
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -171,7 +171,6 @@ struct op_sample;
171struct op_entry { 171struct op_entry {
172 struct ring_buffer_event *event; 172 struct ring_buffer_event *event;
173 struct op_sample *sample; 173 struct op_sample *sample;
174 unsigned long irq_flags;
175 unsigned long size; 174 unsigned long size;
176 unsigned long *data; 175 unsigned long *data;
177}; 176};
@@ -180,6 +179,7 @@ void oprofile_write_reserve(struct op_entry *entry,
180 struct pt_regs * const regs, 179 struct pt_regs * const regs,
181 unsigned long pc, int code, int size); 180 unsigned long pc, int code, int size);
182int oprofile_add_data(struct op_entry *entry, unsigned long val); 181int oprofile_add_data(struct op_entry *entry, unsigned long val);
182int oprofile_add_data64(struct op_entry *entry, u64 val);
183int oprofile_write_commit(struct op_entry *entry); 183int oprofile_write_commit(struct op_entry *entry);
184 184
185#endif /* OPROFILE_H */ 185#endif /* OPROFILE_H */