aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDon Zickus <dzickus@redhat.com>2006-09-26 04:52:26 -0400
committerAndi Kleen <andi@basil.nowhere.org>2006-09-26 04:52:26 -0400
commitcb9c448c661d40ce2efbce8e9c19cc4d420d8ccc (patch)
tree2aa5614f06e57e8f5266f91ccfff1a92fa9f3792
parent828f0afda123a96ff4e8078f057a302f4b4232ae (diff)
[PATCH] i386: Utilize performance counter reservation framework in oprofile
Incorporates the new performance counter reservation system in oprofile. Also cleans up a lot of the initialization code. The code original zero'd out every register associated with performance counters regardless if those registers were used or not. This causes issues with the nmi watchdog. Now oprofile tries to reserve registers and gives up if it can't get them. Cc: levon@movementarian.org Cc: oprofile-list@lists.sf.net Signed-off-by: Don Zickus <dzickus@redhat.com> Signed-off-by: Andi Kleen <ak@suse.de>
-rw-r--r--arch/i386/oprofile/nmi_int.c41
-rw-r--r--arch/i386/oprofile/op_model_athlon.c54
-rw-r--r--arch/i386/oprofile/op_model_p4.c152
-rw-r--r--arch/i386/oprofile/op_model_ppro.c65
-rw-r--r--arch/i386/oprofile/op_x86_model.h1
5 files changed, 199 insertions, 114 deletions
diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c
index 5f8dc8a21bd7..8710ca081b1e 100644
--- a/arch/i386/oprofile/nmi_int.c
+++ b/arch/i386/oprofile/nmi_int.c
@@ -98,15 +98,19 @@ static void nmi_cpu_save_registers(struct op_msrs * msrs)
98 unsigned int i; 98 unsigned int i;
99 99
100 for (i = 0; i < nr_ctrs; ++i) { 100 for (i = 0; i < nr_ctrs; ++i) {
101 rdmsr(counters[i].addr, 101 if (counters[i].addr){
102 counters[i].saved.low, 102 rdmsr(counters[i].addr,
103 counters[i].saved.high); 103 counters[i].saved.low,
104 counters[i].saved.high);
105 }
104 } 106 }
105 107
106 for (i = 0; i < nr_ctrls; ++i) { 108 for (i = 0; i < nr_ctrls; ++i) {
107 rdmsr(controls[i].addr, 109 if (controls[i].addr){
108 controls[i].saved.low, 110 rdmsr(controls[i].addr,
109 controls[i].saved.high); 111 controls[i].saved.low,
112 controls[i].saved.high);
113 }
110 } 114 }
111} 115}
112 116
@@ -205,15 +209,19 @@ static void nmi_restore_registers(struct op_msrs * msrs)
205 unsigned int i; 209 unsigned int i;
206 210
207 for (i = 0; i < nr_ctrls; ++i) { 211 for (i = 0; i < nr_ctrls; ++i) {
208 wrmsr(controls[i].addr, 212 if (controls[i].addr){
209 controls[i].saved.low, 213 wrmsr(controls[i].addr,
210 controls[i].saved.high); 214 controls[i].saved.low,
215 controls[i].saved.high);
216 }
211 } 217 }
212 218
213 for (i = 0; i < nr_ctrs; ++i) { 219 for (i = 0; i < nr_ctrs; ++i) {
214 wrmsr(counters[i].addr, 220 if (counters[i].addr){
215 counters[i].saved.low, 221 wrmsr(counters[i].addr,
216 counters[i].saved.high); 222 counters[i].saved.low,
223 counters[i].saved.high);
224 }
217 } 225 }
218} 226}
219 227
@@ -234,6 +242,7 @@ static void nmi_cpu_shutdown(void * dummy)
234 apic_write(APIC_LVTPC, saved_lvtpc[cpu]); 242 apic_write(APIC_LVTPC, saved_lvtpc[cpu]);
235 apic_write(APIC_LVTERR, v); 243 apic_write(APIC_LVTERR, v);
236 nmi_restore_registers(msrs); 244 nmi_restore_registers(msrs);
245 model->shutdown(msrs);
237} 246}
238 247
239 248
@@ -284,6 +293,14 @@ static int nmi_create_files(struct super_block * sb, struct dentry * root)
284 struct dentry * dir; 293 struct dentry * dir;
285 char buf[4]; 294 char buf[4];
286 295
296 /* quick little hack to _not_ expose a counter if it is not
297 * available for use. This should protect userspace app.
298 * NOTE: assumes 1:1 mapping here (that counters are organized
299 * sequentially in their struct assignment).
300 */
301 if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
302 continue;
303
287 snprintf(buf, sizeof(buf), "%d", i); 304 snprintf(buf, sizeof(buf), "%d", i);
288 dir = oprofilefs_mkdir(sb, root, buf); 305 dir = oprofilefs_mkdir(sb, root, buf);
289 oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); 306 oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
diff --git a/arch/i386/oprofile/op_model_athlon.c b/arch/i386/oprofile/op_model_athlon.c
index 693bdea4a52b..3057a19e4641 100644
--- a/arch/i386/oprofile/op_model_athlon.c
+++ b/arch/i386/oprofile/op_model_athlon.c
@@ -21,10 +21,12 @@
21#define NUM_COUNTERS 4 21#define NUM_COUNTERS 4
22#define NUM_CONTROLS 4 22#define NUM_CONTROLS 4
23 23
24#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
24#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) 25#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
25#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0) 26#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0)
26#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) 27#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
27 28
29#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
28#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0) 30#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
29#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0) 31#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
30#define CTRL_SET_ACTIVE(n) (n |= (1<<22)) 32#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
@@ -40,15 +42,21 @@ static unsigned long reset_value[NUM_COUNTERS];
40 42
41static void athlon_fill_in_addresses(struct op_msrs * const msrs) 43static void athlon_fill_in_addresses(struct op_msrs * const msrs)
42{ 44{
43 msrs->counters[0].addr = MSR_K7_PERFCTR0; 45 int i;
44 msrs->counters[1].addr = MSR_K7_PERFCTR1; 46
45 msrs->counters[2].addr = MSR_K7_PERFCTR2; 47 for (i=0; i < NUM_COUNTERS; i++) {
46 msrs->counters[3].addr = MSR_K7_PERFCTR3; 48 if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
47 49 msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
48 msrs->controls[0].addr = MSR_K7_EVNTSEL0; 50 else
49 msrs->controls[1].addr = MSR_K7_EVNTSEL1; 51 msrs->counters[i].addr = 0;
50 msrs->controls[2].addr = MSR_K7_EVNTSEL2; 52 }
51 msrs->controls[3].addr = MSR_K7_EVNTSEL3; 53
54 for (i=0; i < NUM_CONTROLS; i++) {
55 if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i))
56 msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
57 else
58 msrs->controls[i].addr = 0;
59 }
52} 60}
53 61
54 62
@@ -59,19 +67,23 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs)
59 67
60 /* clear all counters */ 68 /* clear all counters */
61 for (i = 0 ; i < NUM_CONTROLS; ++i) { 69 for (i = 0 ; i < NUM_CONTROLS; ++i) {
70 if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
71 continue;
62 CTRL_READ(low, high, msrs, i); 72 CTRL_READ(low, high, msrs, i);
63 CTRL_CLEAR(low); 73 CTRL_CLEAR(low);
64 CTRL_WRITE(low, high, msrs, i); 74 CTRL_WRITE(low, high, msrs, i);
65 } 75 }
66 76
67 /* avoid a false detection of ctr overflows in NMI handler */ 77 /* avoid a false detection of ctr overflows in NMI handler */
68 for (i = 0; i < NUM_COUNTERS; ++i) { 78 for (i = 0; i < NUM_COUNTERS; ++i) {
79 if (unlikely(!CTR_IS_RESERVED(msrs,i)))
80 continue;
69 CTR_WRITE(1, msrs, i); 81 CTR_WRITE(1, msrs, i);
70 } 82 }
71 83
72 /* enable active counters */ 84 /* enable active counters */
73 for (i = 0; i < NUM_COUNTERS; ++i) { 85 for (i = 0; i < NUM_COUNTERS; ++i) {
74 if (counter_config[i].enabled) { 86 if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) {
75 reset_value[i] = counter_config[i].count; 87 reset_value[i] = counter_config[i].count;
76 88
77 CTR_WRITE(counter_config[i].count, msrs, i); 89 CTR_WRITE(counter_config[i].count, msrs, i);
@@ -98,6 +110,8 @@ static int athlon_check_ctrs(struct pt_regs * const regs,
98 int i; 110 int i;
99 111
100 for (i = 0 ; i < NUM_COUNTERS; ++i) { 112 for (i = 0 ; i < NUM_COUNTERS; ++i) {
113 if (!reset_value[i])
114 continue;
101 CTR_READ(low, high, msrs, i); 115 CTR_READ(low, high, msrs, i);
102 if (CTR_OVERFLOWED(low)) { 116 if (CTR_OVERFLOWED(low)) {
103 oprofile_add_sample(regs, i); 117 oprofile_add_sample(regs, i);
@@ -132,12 +146,27 @@ static void athlon_stop(struct op_msrs const * const msrs)
132 /* Subtle: stop on all counters to avoid race with 146 /* Subtle: stop on all counters to avoid race with
133 * setting our pm callback */ 147 * setting our pm callback */
134 for (i = 0 ; i < NUM_COUNTERS ; ++i) { 148 for (i = 0 ; i < NUM_COUNTERS ; ++i) {
149 if (!reset_value[i])
150 continue;
135 CTRL_READ(low, high, msrs, i); 151 CTRL_READ(low, high, msrs, i);
136 CTRL_SET_INACTIVE(low); 152 CTRL_SET_INACTIVE(low);
137 CTRL_WRITE(low, high, msrs, i); 153 CTRL_WRITE(low, high, msrs, i);
138 } 154 }
139} 155}
140 156
157static void athlon_shutdown(struct op_msrs const * const msrs)
158{
159 int i;
160
161 for (i = 0 ; i < NUM_COUNTERS ; ++i) {
162 if (CTR_IS_RESERVED(msrs,i))
163 release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
164 }
165 for (i = 0 ; i < NUM_CONTROLS ; ++i) {
166 if (CTRL_IS_RESERVED(msrs,i))
167 release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
168 }
169}
141 170
142struct op_x86_model_spec const op_athlon_spec = { 171struct op_x86_model_spec const op_athlon_spec = {
143 .num_counters = NUM_COUNTERS, 172 .num_counters = NUM_COUNTERS,
@@ -146,5 +175,6 @@ struct op_x86_model_spec const op_athlon_spec = {
146 .setup_ctrs = &athlon_setup_ctrs, 175 .setup_ctrs = &athlon_setup_ctrs,
147 .check_ctrs = &athlon_check_ctrs, 176 .check_ctrs = &athlon_check_ctrs,
148 .start = &athlon_start, 177 .start = &athlon_start,
149 .stop = &athlon_stop 178 .stop = &athlon_stop,
179 .shutdown = &athlon_shutdown
150}; 180};
diff --git a/arch/i386/oprofile/op_model_p4.c b/arch/i386/oprofile/op_model_p4.c
index 7c61d357b82b..47925927b12f 100644
--- a/arch/i386/oprofile/op_model_p4.c
+++ b/arch/i386/oprofile/op_model_p4.c
@@ -32,7 +32,7 @@
32#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) 32#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
33 33
34static unsigned int num_counters = NUM_COUNTERS_NON_HT; 34static unsigned int num_counters = NUM_COUNTERS_NON_HT;
35 35static unsigned int num_controls = NUM_CONTROLS_NON_HT;
36 36
37/* this has to be checked dynamically since the 37/* this has to be checked dynamically since the
38 hyper-threadedness of a chip is discovered at 38 hyper-threadedness of a chip is discovered at
@@ -40,8 +40,10 @@ static unsigned int num_counters = NUM_COUNTERS_NON_HT;
40static inline void setup_num_counters(void) 40static inline void setup_num_counters(void)
41{ 41{
42#ifdef CONFIG_SMP 42#ifdef CONFIG_SMP
43 if (smp_num_siblings == 2) 43 if (smp_num_siblings == 2){
44 num_counters = NUM_COUNTERS_HT2; 44 num_counters = NUM_COUNTERS_HT2;
45 num_controls = NUM_CONTROLS_HT2;
46 }
45#endif 47#endif
46} 48}
47 49
@@ -97,15 +99,6 @@ static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
97 99
98#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT 100#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
99 101
100/* All cccr we don't use. */
101static int p4_unused_cccr[NUM_UNUSED_CCCRS] = {
102 MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3,
103 MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3,
104 MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3,
105 MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR1,
106 MSR_P4_IQ_CCCR2, MSR_P4_IQ_CCCR3
107};
108
109/* p4 event codes in libop/op_event.h are indices into this table. */ 102/* p4 event codes in libop/op_event.h are indices into this table. */
110 103
111static struct p4_event_binding p4_events[NUM_EVENTS] = { 104static struct p4_event_binding p4_events[NUM_EVENTS] = {
@@ -372,6 +365,8 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
372#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) 365#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
373#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) 366#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
374 367
368#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
369#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
375#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0) 370#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
376#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0) 371#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
377#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) 372#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
@@ -401,29 +396,34 @@ static unsigned long reset_value[NUM_COUNTERS_NON_HT];
401static void p4_fill_in_addresses(struct op_msrs * const msrs) 396static void p4_fill_in_addresses(struct op_msrs * const msrs)
402{ 397{
403 unsigned int i; 398 unsigned int i;
404 unsigned int addr, stag; 399 unsigned int addr, cccraddr, stag;
405 400
406 setup_num_counters(); 401 setup_num_counters();
407 stag = get_stagger(); 402 stag = get_stagger();
408 403
409 /* the counter registers we pay attention to */ 404 /* initialize some registers */
410 for (i = 0; i < num_counters; ++i) { 405 for (i = 0; i < num_counters; ++i) {
411 msrs->counters[i].addr = 406 msrs->counters[i].addr = 0;
412 p4_counters[VIRT_CTR(stag, i)].counter_address;
413 } 407 }
414 408 for (i = 0; i < num_controls; ++i) {
415 /* FIXME: bad feeling, we don't save the 10 counters we don't use. */ 409 msrs->controls[i].addr = 0;
416
417 /* 18 CCCR registers */
418 for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
419 addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
420 msrs->controls[i].addr = addr;
421 } 410 }
422 411
412 /* the counter & cccr registers we pay attention to */
413 for (i = 0; i < num_counters; ++i) {
414 addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
415 cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
416 if (reserve_perfctr_nmi(addr)){
417 msrs->counters[i].addr = addr;
418 msrs->controls[i].addr = cccraddr;
419 }
420 }
421
423 /* 43 ESCR registers in three or four discontiguous group */ 422 /* 43 ESCR registers in three or four discontiguous group */
424 for (addr = MSR_P4_BSU_ESCR0 + stag; 423 for (addr = MSR_P4_BSU_ESCR0 + stag;
425 addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) { 424 addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
426 msrs->controls[i].addr = addr; 425 if (reserve_evntsel_nmi(addr))
426 msrs->controls[i].addr = addr;
427 } 427 }
428 428
429 /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1 429 /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
@@ -431,47 +431,57 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs)
431 if (boot_cpu_data.x86_model >= 0x3) { 431 if (boot_cpu_data.x86_model >= 0x3) {
432 for (addr = MSR_P4_BSU_ESCR0 + stag; 432 for (addr = MSR_P4_BSU_ESCR0 + stag;
433 addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) { 433 addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
434 msrs->controls[i].addr = addr; 434 if (reserve_evntsel_nmi(addr))
435 msrs->controls[i].addr = addr;
435 } 436 }
436 } else { 437 } else {
437 for (addr = MSR_P4_IQ_ESCR0 + stag; 438 for (addr = MSR_P4_IQ_ESCR0 + stag;
438 addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) { 439 addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
439 msrs->controls[i].addr = addr; 440 if (reserve_evntsel_nmi(addr))
441 msrs->controls[i].addr = addr;
440 } 442 }
441 } 443 }
442 444
443 for (addr = MSR_P4_RAT_ESCR0 + stag; 445 for (addr = MSR_P4_RAT_ESCR0 + stag;
444 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { 446 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
445 msrs->controls[i].addr = addr; 447 if (reserve_evntsel_nmi(addr))
448 msrs->controls[i].addr = addr;
446 } 449 }
447 450
448 for (addr = MSR_P4_MS_ESCR0 + stag; 451 for (addr = MSR_P4_MS_ESCR0 + stag;
449 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { 452 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
450 msrs->controls[i].addr = addr; 453 if (reserve_evntsel_nmi(addr))
454 msrs->controls[i].addr = addr;
451 } 455 }
452 456
453 for (addr = MSR_P4_IX_ESCR0 + stag; 457 for (addr = MSR_P4_IX_ESCR0 + stag;
454 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { 458 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
455 msrs->controls[i].addr = addr; 459 if (reserve_evntsel_nmi(addr))
460 msrs->controls[i].addr = addr;
456 } 461 }
457 462
458 /* there are 2 remaining non-contiguously located ESCRs */ 463 /* there are 2 remaining non-contiguously located ESCRs */
459 464
460 if (num_counters == NUM_COUNTERS_NON_HT) { 465 if (num_counters == NUM_COUNTERS_NON_HT) {
461 /* standard non-HT CPUs handle both remaining ESCRs*/ 466 /* standard non-HT CPUs handle both remaining ESCRs*/
462 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; 467 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
463 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; 468 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
469 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
470 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
464 471
465 } else if (stag == 0) { 472 } else if (stag == 0) {
466 /* HT CPUs give the first remainder to the even thread, as 473 /* HT CPUs give the first remainder to the even thread, as
467 the 32nd control register */ 474 the 32nd control register */
468 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; 475 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
476 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
469 477
470 } else { 478 } else {
471 /* and two copies of the second to the odd thread, 479 /* and two copies of the second to the odd thread,
472 for the 22st and 23nd control registers */ 480 for the 22st and 23nd control registers */
473 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; 481 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
474 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; 482 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
483 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
484 }
475 } 485 }
476} 486}
477 487
@@ -544,7 +554,6 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs)
544{ 554{
545 unsigned int i; 555 unsigned int i;
546 unsigned int low, high; 556 unsigned int low, high;
547 unsigned int addr;
548 unsigned int stag; 557 unsigned int stag;
549 558
550 stag = get_stagger(); 559 stag = get_stagger();
@@ -557,59 +566,24 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs)
557 566
558 /* clear the cccrs we will use */ 567 /* clear the cccrs we will use */
559 for (i = 0 ; i < num_counters ; i++) { 568 for (i = 0 ; i < num_counters ; i++) {
569 if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
570 continue;
560 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); 571 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
561 CCCR_CLEAR(low); 572 CCCR_CLEAR(low);
562 CCCR_SET_REQUIRED_BITS(low); 573 CCCR_SET_REQUIRED_BITS(low);
563 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); 574 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
564 } 575 }
565 576
566 /* clear cccrs outside our concern */
567 for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) {
568 rdmsr(p4_unused_cccr[i], low, high);
569 CCCR_CLEAR(low);
570 CCCR_SET_REQUIRED_BITS(low);
571 wrmsr(p4_unused_cccr[i], low, high);
572 }
573
574 /* clear all escrs (including those outside our concern) */ 577 /* clear all escrs (including those outside our concern) */
575 for (addr = MSR_P4_BSU_ESCR0 + stag; 578 for (i = num_counters; i < num_controls; i++) {
576 addr < MSR_P4_IQ_ESCR0; addr += addr_increment()) { 579 if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
577 wrmsr(addr, 0, 0); 580 continue;
578 } 581 wrmsr(msrs->controls[i].addr, 0, 0);
579
580 /* On older models clear also MSR_P4_IQ_ESCR0/1 */
581 if (boot_cpu_data.x86_model < 0x3) {
582 wrmsr(MSR_P4_IQ_ESCR0, 0, 0);
583 wrmsr(MSR_P4_IQ_ESCR1, 0, 0);
584 }
585
586 for (addr = MSR_P4_RAT_ESCR0 + stag;
587 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
588 wrmsr(addr, 0, 0);
589 }
590
591 for (addr = MSR_P4_MS_ESCR0 + stag;
592 addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){
593 wrmsr(addr, 0, 0);
594 }
595
596 for (addr = MSR_P4_IX_ESCR0 + stag;
597 addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){
598 wrmsr(addr, 0, 0);
599 } 582 }
600 583
601 if (num_counters == NUM_COUNTERS_NON_HT) {
602 wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
603 wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
604 } else if (stag == 0) {
605 wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
606 } else {
607 wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
608 }
609
610 /* setup all counters */ 584 /* setup all counters */
611 for (i = 0 ; i < num_counters ; ++i) { 585 for (i = 0 ; i < num_counters ; ++i) {
612 if (counter_config[i].enabled) { 586 if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) {
613 reset_value[i] = counter_config[i].count; 587 reset_value[i] = counter_config[i].count;
614 pmc_setup_one_p4_counter(i); 588 pmc_setup_one_p4_counter(i);
615 CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); 589 CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
@@ -696,12 +670,32 @@ static void p4_stop(struct op_msrs const * const msrs)
696 stag = get_stagger(); 670 stag = get_stagger();
697 671
698 for (i = 0; i < num_counters; ++i) { 672 for (i = 0; i < num_counters; ++i) {
673 if (!reset_value[i])
674 continue;
699 CCCR_READ(low, high, VIRT_CTR(stag, i)); 675 CCCR_READ(low, high, VIRT_CTR(stag, i));
700 CCCR_SET_DISABLE(low); 676 CCCR_SET_DISABLE(low);
701 CCCR_WRITE(low, high, VIRT_CTR(stag, i)); 677 CCCR_WRITE(low, high, VIRT_CTR(stag, i));
702 } 678 }
703} 679}
704 680
681static void p4_shutdown(struct op_msrs const * const msrs)
682{
683 int i;
684
685 for (i = 0 ; i < num_counters ; ++i) {
686 if (CTR_IS_RESERVED(msrs,i))
687 release_perfctr_nmi(msrs->counters[i].addr);
688 }
689 /* some of the control registers are specially reserved in
690 * conjunction with the counter registers (hence the starting offset).
691 * This saves a few bits.
692 */
693 for (i = num_counters ; i < num_controls ; ++i) {
694 if (CTRL_IS_RESERVED(msrs,i))
695 release_evntsel_nmi(msrs->controls[i].addr);
696 }
697}
698
705 699
706#ifdef CONFIG_SMP 700#ifdef CONFIG_SMP
707struct op_x86_model_spec const op_p4_ht2_spec = { 701struct op_x86_model_spec const op_p4_ht2_spec = {
@@ -711,7 +705,8 @@ struct op_x86_model_spec const op_p4_ht2_spec = {
711 .setup_ctrs = &p4_setup_ctrs, 705 .setup_ctrs = &p4_setup_ctrs,
712 .check_ctrs = &p4_check_ctrs, 706 .check_ctrs = &p4_check_ctrs,
713 .start = &p4_start, 707 .start = &p4_start,
714 .stop = &p4_stop 708 .stop = &p4_stop,
709 .shutdown = &p4_shutdown
715}; 710};
716#endif 711#endif
717 712
@@ -722,5 +717,6 @@ struct op_x86_model_spec const op_p4_spec = {
722 .setup_ctrs = &p4_setup_ctrs, 717 .setup_ctrs = &p4_setup_ctrs,
723 .check_ctrs = &p4_check_ctrs, 718 .check_ctrs = &p4_check_ctrs,
724 .start = &p4_start, 719 .start = &p4_start,
725 .stop = &p4_stop 720 .stop = &p4_stop,
721 .shutdown = &p4_shutdown
726}; 722};
diff --git a/arch/i386/oprofile/op_model_ppro.c b/arch/i386/oprofile/op_model_ppro.c
index 5c3ab4b027ad..f88e05ba8eb3 100644
--- a/arch/i386/oprofile/op_model_ppro.c
+++ b/arch/i386/oprofile/op_model_ppro.c
@@ -22,10 +22,12 @@
22#define NUM_COUNTERS 2 22#define NUM_COUNTERS 2
23#define NUM_CONTROLS 2 23#define NUM_CONTROLS 2
24 24
25#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
25#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) 26#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
26#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0) 27#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0)
27#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) 28#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
28 29
30#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
29#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0) 31#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
30#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0) 32#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
31#define CTRL_SET_ACTIVE(n) (n |= (1<<22)) 33#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
@@ -41,11 +43,21 @@ static unsigned long reset_value[NUM_COUNTERS];
41 43
42static void ppro_fill_in_addresses(struct op_msrs * const msrs) 44static void ppro_fill_in_addresses(struct op_msrs * const msrs)
43{ 45{
44 msrs->counters[0].addr = MSR_P6_PERFCTR0; 46 int i;
45 msrs->counters[1].addr = MSR_P6_PERFCTR1; 47
48 for (i=0; i < NUM_COUNTERS; i++) {
49 if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
50 msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
51 else
52 msrs->counters[i].addr = 0;
53 }
46 54
47 msrs->controls[0].addr = MSR_P6_EVNTSEL0; 55 for (i=0; i < NUM_CONTROLS; i++) {
48 msrs->controls[1].addr = MSR_P6_EVNTSEL1; 56 if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
57 msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
58 else
59 msrs->controls[i].addr = 0;
60 }
49} 61}
50 62
51 63
@@ -56,6 +68,8 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
56 68
57 /* clear all counters */ 69 /* clear all counters */
58 for (i = 0 ; i < NUM_CONTROLS; ++i) { 70 for (i = 0 ; i < NUM_CONTROLS; ++i) {
71 if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
72 continue;
59 CTRL_READ(low, high, msrs, i); 73 CTRL_READ(low, high, msrs, i);
60 CTRL_CLEAR(low); 74 CTRL_CLEAR(low);
61 CTRL_WRITE(low, high, msrs, i); 75 CTRL_WRITE(low, high, msrs, i);
@@ -63,12 +77,14 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
63 77
64 /* avoid a false detection of ctr overflows in NMI handler */ 78 /* avoid a false detection of ctr overflows in NMI handler */
65 for (i = 0; i < NUM_COUNTERS; ++i) { 79 for (i = 0; i < NUM_COUNTERS; ++i) {
80 if (unlikely(!CTR_IS_RESERVED(msrs,i)))
81 continue;
66 CTR_WRITE(1, msrs, i); 82 CTR_WRITE(1, msrs, i);
67 } 83 }
68 84
69 /* enable active counters */ 85 /* enable active counters */
70 for (i = 0; i < NUM_COUNTERS; ++i) { 86 for (i = 0; i < NUM_COUNTERS; ++i) {
71 if (counter_config[i].enabled) { 87 if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) {
72 reset_value[i] = counter_config[i].count; 88 reset_value[i] = counter_config[i].count;
73 89
74 CTR_WRITE(counter_config[i].count, msrs, i); 90 CTR_WRITE(counter_config[i].count, msrs, i);
@@ -81,6 +97,8 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
81 CTRL_SET_UM(low, counter_config[i].unit_mask); 97 CTRL_SET_UM(low, counter_config[i].unit_mask);
82 CTRL_SET_EVENT(low, counter_config[i].event); 98 CTRL_SET_EVENT(low, counter_config[i].event);
83 CTRL_WRITE(low, high, msrs, i); 99 CTRL_WRITE(low, high, msrs, i);
100 } else {
101 reset_value[i] = 0;
84 } 102 }
85 } 103 }
86} 104}
@@ -93,6 +111,8 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
93 int i; 111 int i;
94 112
95 for (i = 0 ; i < NUM_COUNTERS; ++i) { 113 for (i = 0 ; i < NUM_COUNTERS; ++i) {
114 if (!reset_value[i])
115 continue;
96 CTR_READ(low, high, msrs, i); 116 CTR_READ(low, high, msrs, i);
97 if (CTR_OVERFLOWED(low)) { 117 if (CTR_OVERFLOWED(low)) {
98 oprofile_add_sample(regs, i); 118 oprofile_add_sample(regs, i);
@@ -118,18 +138,38 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
118static void ppro_start(struct op_msrs const * const msrs) 138static void ppro_start(struct op_msrs const * const msrs)
119{ 139{
120 unsigned int low,high; 140 unsigned int low,high;
121 CTRL_READ(low, high, msrs, 0); 141
122 CTRL_SET_ACTIVE(low); 142 if (reset_value[0]) {
123 CTRL_WRITE(low, high, msrs, 0); 143 CTRL_READ(low, high, msrs, 0);
144 CTRL_SET_ACTIVE(low);
145 CTRL_WRITE(low, high, msrs, 0);
146 }
124} 147}
125 148
126 149
127static void ppro_stop(struct op_msrs const * const msrs) 150static void ppro_stop(struct op_msrs const * const msrs)
128{ 151{
129 unsigned int low,high; 152 unsigned int low,high;
130 CTRL_READ(low, high, msrs, 0); 153
131 CTRL_SET_INACTIVE(low); 154 if (reset_value[0]) {
132 CTRL_WRITE(low, high, msrs, 0); 155 CTRL_READ(low, high, msrs, 0);
156 CTRL_SET_INACTIVE(low);
157 CTRL_WRITE(low, high, msrs, 0);
158 }
159}
160
161static void ppro_shutdown(struct op_msrs const * const msrs)
162{
163 int i;
164
165 for (i = 0 ; i < NUM_COUNTERS ; ++i) {
166 if (CTR_IS_RESERVED(msrs,i))
167 release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
168 }
169 for (i = 0 ; i < NUM_CONTROLS ; ++i) {
170 if (CTRL_IS_RESERVED(msrs,i))
171 release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
172 }
133} 173}
134 174
135 175
@@ -140,5 +180,6 @@ struct op_x86_model_spec const op_ppro_spec = {
140 .setup_ctrs = &ppro_setup_ctrs, 180 .setup_ctrs = &ppro_setup_ctrs,
141 .check_ctrs = &ppro_check_ctrs, 181 .check_ctrs = &ppro_check_ctrs,
142 .start = &ppro_start, 182 .start = &ppro_start,
143 .stop = &ppro_stop 183 .stop = &ppro_stop,
184 .shutdown = &ppro_shutdown
144}; 185};
diff --git a/arch/i386/oprofile/op_x86_model.h b/arch/i386/oprofile/op_x86_model.h
index 123b7e90a9ee..abb1aa95b979 100644
--- a/arch/i386/oprofile/op_x86_model.h
+++ b/arch/i386/oprofile/op_x86_model.h
@@ -40,6 +40,7 @@ struct op_x86_model_spec {
40 struct op_msrs const * const msrs); 40 struct op_msrs const * const msrs);
41 void (*start)(struct op_msrs const * const msrs); 41 void (*start)(struct op_msrs const * const msrs);
42 void (*stop)(struct op_msrs const * const msrs); 42 void (*stop)(struct op_msrs const * const msrs);
43 void (*shutdown)(struct op_msrs const * const msrs);
43}; 44};
44 45
45extern struct op_x86_model_spec const op_ppro_spec; 46extern struct op_x86_model_spec const op_ppro_spec;