diff options
author | Don Zickus <dzickus@redhat.com> | 2006-09-26 04:52:26 -0400 |
---|---|---|
committer | Andi Kleen <andi@basil.nowhere.org> | 2006-09-26 04:52:26 -0400 |
commit | cb9c448c661d40ce2efbce8e9c19cc4d420d8ccc (patch) | |
tree | 2aa5614f06e57e8f5266f91ccfff1a92fa9f3792 | |
parent | 828f0afda123a96ff4e8078f057a302f4b4232ae (diff) |
[PATCH] i386: Utilize performance counter reservation framework in oprofile
Incorporates the new performance counter reservation system in oprofile.
Also cleans up a lot of the initialization code. The code original zero'd
out every register associated with performance counters regardless if those
registers were used or not. This causes issues with the nmi watchdog.
Now oprofile tries to reserve registers and gives up if it can't get them.
Cc: levon@movementarian.org
Cc: oprofile-list@lists.sf.net
Signed-off-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Andi Kleen <ak@suse.de>
-rw-r--r-- | arch/i386/oprofile/nmi_int.c | 41 | ||||
-rw-r--r-- | arch/i386/oprofile/op_model_athlon.c | 54 | ||||
-rw-r--r-- | arch/i386/oprofile/op_model_p4.c | 152 | ||||
-rw-r--r-- | arch/i386/oprofile/op_model_ppro.c | 65 | ||||
-rw-r--r-- | arch/i386/oprofile/op_x86_model.h | 1 |
5 files changed, 199 insertions, 114 deletions
diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c index 5f8dc8a21bd7..8710ca081b1e 100644 --- a/arch/i386/oprofile/nmi_int.c +++ b/arch/i386/oprofile/nmi_int.c | |||
@@ -98,15 +98,19 @@ static void nmi_cpu_save_registers(struct op_msrs * msrs) | |||
98 | unsigned int i; | 98 | unsigned int i; |
99 | 99 | ||
100 | for (i = 0; i < nr_ctrs; ++i) { | 100 | for (i = 0; i < nr_ctrs; ++i) { |
101 | rdmsr(counters[i].addr, | 101 | if (counters[i].addr){ |
102 | counters[i].saved.low, | 102 | rdmsr(counters[i].addr, |
103 | counters[i].saved.high); | 103 | counters[i].saved.low, |
104 | counters[i].saved.high); | ||
105 | } | ||
104 | } | 106 | } |
105 | 107 | ||
106 | for (i = 0; i < nr_ctrls; ++i) { | 108 | for (i = 0; i < nr_ctrls; ++i) { |
107 | rdmsr(controls[i].addr, | 109 | if (controls[i].addr){ |
108 | controls[i].saved.low, | 110 | rdmsr(controls[i].addr, |
109 | controls[i].saved.high); | 111 | controls[i].saved.low, |
112 | controls[i].saved.high); | ||
113 | } | ||
110 | } | 114 | } |
111 | } | 115 | } |
112 | 116 | ||
@@ -205,15 +209,19 @@ static void nmi_restore_registers(struct op_msrs * msrs) | |||
205 | unsigned int i; | 209 | unsigned int i; |
206 | 210 | ||
207 | for (i = 0; i < nr_ctrls; ++i) { | 211 | for (i = 0; i < nr_ctrls; ++i) { |
208 | wrmsr(controls[i].addr, | 212 | if (controls[i].addr){ |
209 | controls[i].saved.low, | 213 | wrmsr(controls[i].addr, |
210 | controls[i].saved.high); | 214 | controls[i].saved.low, |
215 | controls[i].saved.high); | ||
216 | } | ||
211 | } | 217 | } |
212 | 218 | ||
213 | for (i = 0; i < nr_ctrs; ++i) { | 219 | for (i = 0; i < nr_ctrs; ++i) { |
214 | wrmsr(counters[i].addr, | 220 | if (counters[i].addr){ |
215 | counters[i].saved.low, | 221 | wrmsr(counters[i].addr, |
216 | counters[i].saved.high); | 222 | counters[i].saved.low, |
223 | counters[i].saved.high); | ||
224 | } | ||
217 | } | 225 | } |
218 | } | 226 | } |
219 | 227 | ||
@@ -234,6 +242,7 @@ static void nmi_cpu_shutdown(void * dummy) | |||
234 | apic_write(APIC_LVTPC, saved_lvtpc[cpu]); | 242 | apic_write(APIC_LVTPC, saved_lvtpc[cpu]); |
235 | apic_write(APIC_LVTERR, v); | 243 | apic_write(APIC_LVTERR, v); |
236 | nmi_restore_registers(msrs); | 244 | nmi_restore_registers(msrs); |
245 | model->shutdown(msrs); | ||
237 | } | 246 | } |
238 | 247 | ||
239 | 248 | ||
@@ -284,6 +293,14 @@ static int nmi_create_files(struct super_block * sb, struct dentry * root) | |||
284 | struct dentry * dir; | 293 | struct dentry * dir; |
285 | char buf[4]; | 294 | char buf[4]; |
286 | 295 | ||
296 | /* quick little hack to _not_ expose a counter if it is not | ||
297 | * available for use. This should protect userspace app. | ||
298 | * NOTE: assumes 1:1 mapping here (that counters are organized | ||
299 | * sequentially in their struct assignment). | ||
300 | */ | ||
301 | if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i))) | ||
302 | continue; | ||
303 | |||
287 | snprintf(buf, sizeof(buf), "%d", i); | 304 | snprintf(buf, sizeof(buf), "%d", i); |
288 | dir = oprofilefs_mkdir(sb, root, buf); | 305 | dir = oprofilefs_mkdir(sb, root, buf); |
289 | oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); | 306 | oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); |
diff --git a/arch/i386/oprofile/op_model_athlon.c b/arch/i386/oprofile/op_model_athlon.c index 693bdea4a52b..3057a19e4641 100644 --- a/arch/i386/oprofile/op_model_athlon.c +++ b/arch/i386/oprofile/op_model_athlon.c | |||
@@ -21,10 +21,12 @@ | |||
21 | #define NUM_COUNTERS 4 | 21 | #define NUM_COUNTERS 4 |
22 | #define NUM_CONTROLS 4 | 22 | #define NUM_CONTROLS 4 |
23 | 23 | ||
24 | #define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) | ||
24 | #define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) | 25 | #define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) |
25 | #define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0) | 26 | #define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0) |
26 | #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) | 27 | #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) |
27 | 28 | ||
29 | #define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) | ||
28 | #define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0) | 30 | #define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0) |
29 | #define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0) | 31 | #define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0) |
30 | #define CTRL_SET_ACTIVE(n) (n |= (1<<22)) | 32 | #define CTRL_SET_ACTIVE(n) (n |= (1<<22)) |
@@ -40,15 +42,21 @@ static unsigned long reset_value[NUM_COUNTERS]; | |||
40 | 42 | ||
41 | static void athlon_fill_in_addresses(struct op_msrs * const msrs) | 43 | static void athlon_fill_in_addresses(struct op_msrs * const msrs) |
42 | { | 44 | { |
43 | msrs->counters[0].addr = MSR_K7_PERFCTR0; | 45 | int i; |
44 | msrs->counters[1].addr = MSR_K7_PERFCTR1; | 46 | |
45 | msrs->counters[2].addr = MSR_K7_PERFCTR2; | 47 | for (i=0; i < NUM_COUNTERS; i++) { |
46 | msrs->counters[3].addr = MSR_K7_PERFCTR3; | 48 | if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) |
47 | 49 | msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; | |
48 | msrs->controls[0].addr = MSR_K7_EVNTSEL0; | 50 | else |
49 | msrs->controls[1].addr = MSR_K7_EVNTSEL1; | 51 | msrs->counters[i].addr = 0; |
50 | msrs->controls[2].addr = MSR_K7_EVNTSEL2; | 52 | } |
51 | msrs->controls[3].addr = MSR_K7_EVNTSEL3; | 53 | |
54 | for (i=0; i < NUM_CONTROLS; i++) { | ||
55 | if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) | ||
56 | msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; | ||
57 | else | ||
58 | msrs->controls[i].addr = 0; | ||
59 | } | ||
52 | } | 60 | } |
53 | 61 | ||
54 | 62 | ||
@@ -59,19 +67,23 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs) | |||
59 | 67 | ||
60 | /* clear all counters */ | 68 | /* clear all counters */ |
61 | for (i = 0 ; i < NUM_CONTROLS; ++i) { | 69 | for (i = 0 ; i < NUM_CONTROLS; ++i) { |
70 | if (unlikely(!CTRL_IS_RESERVED(msrs,i))) | ||
71 | continue; | ||
62 | CTRL_READ(low, high, msrs, i); | 72 | CTRL_READ(low, high, msrs, i); |
63 | CTRL_CLEAR(low); | 73 | CTRL_CLEAR(low); |
64 | CTRL_WRITE(low, high, msrs, i); | 74 | CTRL_WRITE(low, high, msrs, i); |
65 | } | 75 | } |
66 | 76 | ||
67 | /* avoid a false detection of ctr overflows in NMI handler */ | 77 | /* avoid a false detection of ctr overflows in NMI handler */ |
68 | for (i = 0; i < NUM_COUNTERS; ++i) { | 78 | for (i = 0; i < NUM_COUNTERS; ++i) { |
79 | if (unlikely(!CTR_IS_RESERVED(msrs,i))) | ||
80 | continue; | ||
69 | CTR_WRITE(1, msrs, i); | 81 | CTR_WRITE(1, msrs, i); |
70 | } | 82 | } |
71 | 83 | ||
72 | /* enable active counters */ | 84 | /* enable active counters */ |
73 | for (i = 0; i < NUM_COUNTERS; ++i) { | 85 | for (i = 0; i < NUM_COUNTERS; ++i) { |
74 | if (counter_config[i].enabled) { | 86 | if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) { |
75 | reset_value[i] = counter_config[i].count; | 87 | reset_value[i] = counter_config[i].count; |
76 | 88 | ||
77 | CTR_WRITE(counter_config[i].count, msrs, i); | 89 | CTR_WRITE(counter_config[i].count, msrs, i); |
@@ -98,6 +110,8 @@ static int athlon_check_ctrs(struct pt_regs * const regs, | |||
98 | int i; | 110 | int i; |
99 | 111 | ||
100 | for (i = 0 ; i < NUM_COUNTERS; ++i) { | 112 | for (i = 0 ; i < NUM_COUNTERS; ++i) { |
113 | if (!reset_value[i]) | ||
114 | continue; | ||
101 | CTR_READ(low, high, msrs, i); | 115 | CTR_READ(low, high, msrs, i); |
102 | if (CTR_OVERFLOWED(low)) { | 116 | if (CTR_OVERFLOWED(low)) { |
103 | oprofile_add_sample(regs, i); | 117 | oprofile_add_sample(regs, i); |
@@ -132,12 +146,27 @@ static void athlon_stop(struct op_msrs const * const msrs) | |||
132 | /* Subtle: stop on all counters to avoid race with | 146 | /* Subtle: stop on all counters to avoid race with |
133 | * setting our pm callback */ | 147 | * setting our pm callback */ |
134 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { | 148 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { |
149 | if (!reset_value[i]) | ||
150 | continue; | ||
135 | CTRL_READ(low, high, msrs, i); | 151 | CTRL_READ(low, high, msrs, i); |
136 | CTRL_SET_INACTIVE(low); | 152 | CTRL_SET_INACTIVE(low); |
137 | CTRL_WRITE(low, high, msrs, i); | 153 | CTRL_WRITE(low, high, msrs, i); |
138 | } | 154 | } |
139 | } | 155 | } |
140 | 156 | ||
157 | static void athlon_shutdown(struct op_msrs const * const msrs) | ||
158 | { | ||
159 | int i; | ||
160 | |||
161 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { | ||
162 | if (CTR_IS_RESERVED(msrs,i)) | ||
163 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
164 | } | ||
165 | for (i = 0 ; i < NUM_CONTROLS ; ++i) { | ||
166 | if (CTRL_IS_RESERVED(msrs,i)) | ||
167 | release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | ||
168 | } | ||
169 | } | ||
141 | 170 | ||
142 | struct op_x86_model_spec const op_athlon_spec = { | 171 | struct op_x86_model_spec const op_athlon_spec = { |
143 | .num_counters = NUM_COUNTERS, | 172 | .num_counters = NUM_COUNTERS, |
@@ -146,5 +175,6 @@ struct op_x86_model_spec const op_athlon_spec = { | |||
146 | .setup_ctrs = &athlon_setup_ctrs, | 175 | .setup_ctrs = &athlon_setup_ctrs, |
147 | .check_ctrs = &athlon_check_ctrs, | 176 | .check_ctrs = &athlon_check_ctrs, |
148 | .start = &athlon_start, | 177 | .start = &athlon_start, |
149 | .stop = &athlon_stop | 178 | .stop = &athlon_stop, |
179 | .shutdown = &athlon_shutdown | ||
150 | }; | 180 | }; |
diff --git a/arch/i386/oprofile/op_model_p4.c b/arch/i386/oprofile/op_model_p4.c index 7c61d357b82b..47925927b12f 100644 --- a/arch/i386/oprofile/op_model_p4.c +++ b/arch/i386/oprofile/op_model_p4.c | |||
@@ -32,7 +32,7 @@ | |||
32 | #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) | 32 | #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) |
33 | 33 | ||
34 | static unsigned int num_counters = NUM_COUNTERS_NON_HT; | 34 | static unsigned int num_counters = NUM_COUNTERS_NON_HT; |
35 | 35 | static unsigned int num_controls = NUM_CONTROLS_NON_HT; | |
36 | 36 | ||
37 | /* this has to be checked dynamically since the | 37 | /* this has to be checked dynamically since the |
38 | hyper-threadedness of a chip is discovered at | 38 | hyper-threadedness of a chip is discovered at |
@@ -40,8 +40,10 @@ static unsigned int num_counters = NUM_COUNTERS_NON_HT; | |||
40 | static inline void setup_num_counters(void) | 40 | static inline void setup_num_counters(void) |
41 | { | 41 | { |
42 | #ifdef CONFIG_SMP | 42 | #ifdef CONFIG_SMP |
43 | if (smp_num_siblings == 2) | 43 | if (smp_num_siblings == 2){ |
44 | num_counters = NUM_COUNTERS_HT2; | 44 | num_counters = NUM_COUNTERS_HT2; |
45 | num_controls = NUM_CONTROLS_HT2; | ||
46 | } | ||
45 | #endif | 47 | #endif |
46 | } | 48 | } |
47 | 49 | ||
@@ -97,15 +99,6 @@ static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = { | |||
97 | 99 | ||
98 | #define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT | 100 | #define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT |
99 | 101 | ||
100 | /* All cccr we don't use. */ | ||
101 | static int p4_unused_cccr[NUM_UNUSED_CCCRS] = { | ||
102 | MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3, | ||
103 | MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3, | ||
104 | MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3, | ||
105 | MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR1, | ||
106 | MSR_P4_IQ_CCCR2, MSR_P4_IQ_CCCR3 | ||
107 | }; | ||
108 | |||
109 | /* p4 event codes in libop/op_event.h are indices into this table. */ | 102 | /* p4 event codes in libop/op_event.h are indices into this table. */ |
110 | 103 | ||
111 | static struct p4_event_binding p4_events[NUM_EVENTS] = { | 104 | static struct p4_event_binding p4_events[NUM_EVENTS] = { |
@@ -372,6 +365,8 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { | |||
372 | #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) | 365 | #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) |
373 | #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) | 366 | #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) |
374 | 367 | ||
368 | #define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) | ||
369 | #define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) | ||
375 | #define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0) | 370 | #define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0) |
376 | #define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0) | 371 | #define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0) |
377 | #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) | 372 | #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) |
@@ -401,29 +396,34 @@ static unsigned long reset_value[NUM_COUNTERS_NON_HT]; | |||
401 | static void p4_fill_in_addresses(struct op_msrs * const msrs) | 396 | static void p4_fill_in_addresses(struct op_msrs * const msrs) |
402 | { | 397 | { |
403 | unsigned int i; | 398 | unsigned int i; |
404 | unsigned int addr, stag; | 399 | unsigned int addr, cccraddr, stag; |
405 | 400 | ||
406 | setup_num_counters(); | 401 | setup_num_counters(); |
407 | stag = get_stagger(); | 402 | stag = get_stagger(); |
408 | 403 | ||
409 | /* the counter registers we pay attention to */ | 404 | /* initialize some registers */ |
410 | for (i = 0; i < num_counters; ++i) { | 405 | for (i = 0; i < num_counters; ++i) { |
411 | msrs->counters[i].addr = | 406 | msrs->counters[i].addr = 0; |
412 | p4_counters[VIRT_CTR(stag, i)].counter_address; | ||
413 | } | 407 | } |
414 | 408 | for (i = 0; i < num_controls; ++i) { | |
415 | /* FIXME: bad feeling, we don't save the 10 counters we don't use. */ | 409 | msrs->controls[i].addr = 0; |
416 | |||
417 | /* 18 CCCR registers */ | ||
418 | for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag; | ||
419 | addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) { | ||
420 | msrs->controls[i].addr = addr; | ||
421 | } | 410 | } |
422 | 411 | ||
412 | /* the counter & cccr registers we pay attention to */ | ||
413 | for (i = 0; i < num_counters; ++i) { | ||
414 | addr = p4_counters[VIRT_CTR(stag, i)].counter_address; | ||
415 | cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address; | ||
416 | if (reserve_perfctr_nmi(addr)){ | ||
417 | msrs->counters[i].addr = addr; | ||
418 | msrs->controls[i].addr = cccraddr; | ||
419 | } | ||
420 | } | ||
421 | |||
423 | /* 43 ESCR registers in three or four discontiguous group */ | 422 | /* 43 ESCR registers in three or four discontiguous group */ |
424 | for (addr = MSR_P4_BSU_ESCR0 + stag; | 423 | for (addr = MSR_P4_BSU_ESCR0 + stag; |
425 | addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) { | 424 | addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) { |
426 | msrs->controls[i].addr = addr; | 425 | if (reserve_evntsel_nmi(addr)) |
426 | msrs->controls[i].addr = addr; | ||
427 | } | 427 | } |
428 | 428 | ||
429 | /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1 | 429 | /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1 |
@@ -431,47 +431,57 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs) | |||
431 | if (boot_cpu_data.x86_model >= 0x3) { | 431 | if (boot_cpu_data.x86_model >= 0x3) { |
432 | for (addr = MSR_P4_BSU_ESCR0 + stag; | 432 | for (addr = MSR_P4_BSU_ESCR0 + stag; |
433 | addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) { | 433 | addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) { |
434 | msrs->controls[i].addr = addr; | 434 | if (reserve_evntsel_nmi(addr)) |
435 | msrs->controls[i].addr = addr; | ||
435 | } | 436 | } |
436 | } else { | 437 | } else { |
437 | for (addr = MSR_P4_IQ_ESCR0 + stag; | 438 | for (addr = MSR_P4_IQ_ESCR0 + stag; |
438 | addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) { | 439 | addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) { |
439 | msrs->controls[i].addr = addr; | 440 | if (reserve_evntsel_nmi(addr)) |
441 | msrs->controls[i].addr = addr; | ||
440 | } | 442 | } |
441 | } | 443 | } |
442 | 444 | ||
443 | for (addr = MSR_P4_RAT_ESCR0 + stag; | 445 | for (addr = MSR_P4_RAT_ESCR0 + stag; |
444 | addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { | 446 | addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { |
445 | msrs->controls[i].addr = addr; | 447 | if (reserve_evntsel_nmi(addr)) |
448 | msrs->controls[i].addr = addr; | ||
446 | } | 449 | } |
447 | 450 | ||
448 | for (addr = MSR_P4_MS_ESCR0 + stag; | 451 | for (addr = MSR_P4_MS_ESCR0 + stag; |
449 | addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { | 452 | addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { |
450 | msrs->controls[i].addr = addr; | 453 | if (reserve_evntsel_nmi(addr)) |
454 | msrs->controls[i].addr = addr; | ||
451 | } | 455 | } |
452 | 456 | ||
453 | for (addr = MSR_P4_IX_ESCR0 + stag; | 457 | for (addr = MSR_P4_IX_ESCR0 + stag; |
454 | addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { | 458 | addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { |
455 | msrs->controls[i].addr = addr; | 459 | if (reserve_evntsel_nmi(addr)) |
460 | msrs->controls[i].addr = addr; | ||
456 | } | 461 | } |
457 | 462 | ||
458 | /* there are 2 remaining non-contiguously located ESCRs */ | 463 | /* there are 2 remaining non-contiguously located ESCRs */ |
459 | 464 | ||
460 | if (num_counters == NUM_COUNTERS_NON_HT) { | 465 | if (num_counters == NUM_COUNTERS_NON_HT) { |
461 | /* standard non-HT CPUs handle both remaining ESCRs*/ | 466 | /* standard non-HT CPUs handle both remaining ESCRs*/ |
462 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; | 467 | if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) |
463 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; | 468 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; |
469 | if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4)) | ||
470 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; | ||
464 | 471 | ||
465 | } else if (stag == 0) { | 472 | } else if (stag == 0) { |
466 | /* HT CPUs give the first remainder to the even thread, as | 473 | /* HT CPUs give the first remainder to the even thread, as |
467 | the 32nd control register */ | 474 | the 32nd control register */ |
468 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; | 475 | if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4)) |
476 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; | ||
469 | 477 | ||
470 | } else { | 478 | } else { |
471 | /* and two copies of the second to the odd thread, | 479 | /* and two copies of the second to the odd thread, |
472 | for the 22st and 23nd control registers */ | 480 | for the 22st and 23nd control registers */ |
473 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; | 481 | if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) { |
474 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; | 482 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; |
483 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; | ||
484 | } | ||
475 | } | 485 | } |
476 | } | 486 | } |
477 | 487 | ||
@@ -544,7 +554,6 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs) | |||
544 | { | 554 | { |
545 | unsigned int i; | 555 | unsigned int i; |
546 | unsigned int low, high; | 556 | unsigned int low, high; |
547 | unsigned int addr; | ||
548 | unsigned int stag; | 557 | unsigned int stag; |
549 | 558 | ||
550 | stag = get_stagger(); | 559 | stag = get_stagger(); |
@@ -557,59 +566,24 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs) | |||
557 | 566 | ||
558 | /* clear the cccrs we will use */ | 567 | /* clear the cccrs we will use */ |
559 | for (i = 0 ; i < num_counters ; i++) { | 568 | for (i = 0 ; i < num_counters ; i++) { |
569 | if (unlikely(!CTRL_IS_RESERVED(msrs,i))) | ||
570 | continue; | ||
560 | rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); | 571 | rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); |
561 | CCCR_CLEAR(low); | 572 | CCCR_CLEAR(low); |
562 | CCCR_SET_REQUIRED_BITS(low); | 573 | CCCR_SET_REQUIRED_BITS(low); |
563 | wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); | 574 | wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); |
564 | } | 575 | } |
565 | 576 | ||
566 | /* clear cccrs outside our concern */ | ||
567 | for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) { | ||
568 | rdmsr(p4_unused_cccr[i], low, high); | ||
569 | CCCR_CLEAR(low); | ||
570 | CCCR_SET_REQUIRED_BITS(low); | ||
571 | wrmsr(p4_unused_cccr[i], low, high); | ||
572 | } | ||
573 | |||
574 | /* clear all escrs (including those outside our concern) */ | 577 | /* clear all escrs (including those outside our concern) */ |
575 | for (addr = MSR_P4_BSU_ESCR0 + stag; | 578 | for (i = num_counters; i < num_controls; i++) { |
576 | addr < MSR_P4_IQ_ESCR0; addr += addr_increment()) { | 579 | if (unlikely(!CTRL_IS_RESERVED(msrs,i))) |
577 | wrmsr(addr, 0, 0); | 580 | continue; |
578 | } | 581 | wrmsr(msrs->controls[i].addr, 0, 0); |
579 | |||
580 | /* On older models clear also MSR_P4_IQ_ESCR0/1 */ | ||
581 | if (boot_cpu_data.x86_model < 0x3) { | ||
582 | wrmsr(MSR_P4_IQ_ESCR0, 0, 0); | ||
583 | wrmsr(MSR_P4_IQ_ESCR1, 0, 0); | ||
584 | } | ||
585 | |||
586 | for (addr = MSR_P4_RAT_ESCR0 + stag; | ||
587 | addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { | ||
588 | wrmsr(addr, 0, 0); | ||
589 | } | ||
590 | |||
591 | for (addr = MSR_P4_MS_ESCR0 + stag; | ||
592 | addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){ | ||
593 | wrmsr(addr, 0, 0); | ||
594 | } | ||
595 | |||
596 | for (addr = MSR_P4_IX_ESCR0 + stag; | ||
597 | addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){ | ||
598 | wrmsr(addr, 0, 0); | ||
599 | } | 582 | } |
600 | 583 | ||
601 | if (num_counters == NUM_COUNTERS_NON_HT) { | ||
602 | wrmsr(MSR_P4_CRU_ESCR4, 0, 0); | ||
603 | wrmsr(MSR_P4_CRU_ESCR5, 0, 0); | ||
604 | } else if (stag == 0) { | ||
605 | wrmsr(MSR_P4_CRU_ESCR4, 0, 0); | ||
606 | } else { | ||
607 | wrmsr(MSR_P4_CRU_ESCR5, 0, 0); | ||
608 | } | ||
609 | |||
610 | /* setup all counters */ | 584 | /* setup all counters */ |
611 | for (i = 0 ; i < num_counters ; ++i) { | 585 | for (i = 0 ; i < num_counters ; ++i) { |
612 | if (counter_config[i].enabled) { | 586 | if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) { |
613 | reset_value[i] = counter_config[i].count; | 587 | reset_value[i] = counter_config[i].count; |
614 | pmc_setup_one_p4_counter(i); | 588 | pmc_setup_one_p4_counter(i); |
615 | CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); | 589 | CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); |
@@ -696,12 +670,32 @@ static void p4_stop(struct op_msrs const * const msrs) | |||
696 | stag = get_stagger(); | 670 | stag = get_stagger(); |
697 | 671 | ||
698 | for (i = 0; i < num_counters; ++i) { | 672 | for (i = 0; i < num_counters; ++i) { |
673 | if (!reset_value[i]) | ||
674 | continue; | ||
699 | CCCR_READ(low, high, VIRT_CTR(stag, i)); | 675 | CCCR_READ(low, high, VIRT_CTR(stag, i)); |
700 | CCCR_SET_DISABLE(low); | 676 | CCCR_SET_DISABLE(low); |
701 | CCCR_WRITE(low, high, VIRT_CTR(stag, i)); | 677 | CCCR_WRITE(low, high, VIRT_CTR(stag, i)); |
702 | } | 678 | } |
703 | } | 679 | } |
704 | 680 | ||
681 | static void p4_shutdown(struct op_msrs const * const msrs) | ||
682 | { | ||
683 | int i; | ||
684 | |||
685 | for (i = 0 ; i < num_counters ; ++i) { | ||
686 | if (CTR_IS_RESERVED(msrs,i)) | ||
687 | release_perfctr_nmi(msrs->counters[i].addr); | ||
688 | } | ||
689 | /* some of the control registers are specially reserved in | ||
690 | * conjunction with the counter registers (hence the starting offset). | ||
691 | * This saves a few bits. | ||
692 | */ | ||
693 | for (i = num_counters ; i < num_controls ; ++i) { | ||
694 | if (CTRL_IS_RESERVED(msrs,i)) | ||
695 | release_evntsel_nmi(msrs->controls[i].addr); | ||
696 | } | ||
697 | } | ||
698 | |||
705 | 699 | ||
706 | #ifdef CONFIG_SMP | 700 | #ifdef CONFIG_SMP |
707 | struct op_x86_model_spec const op_p4_ht2_spec = { | 701 | struct op_x86_model_spec const op_p4_ht2_spec = { |
@@ -711,7 +705,8 @@ struct op_x86_model_spec const op_p4_ht2_spec = { | |||
711 | .setup_ctrs = &p4_setup_ctrs, | 705 | .setup_ctrs = &p4_setup_ctrs, |
712 | .check_ctrs = &p4_check_ctrs, | 706 | .check_ctrs = &p4_check_ctrs, |
713 | .start = &p4_start, | 707 | .start = &p4_start, |
714 | .stop = &p4_stop | 708 | .stop = &p4_stop, |
709 | .shutdown = &p4_shutdown | ||
715 | }; | 710 | }; |
716 | #endif | 711 | #endif |
717 | 712 | ||
@@ -722,5 +717,6 @@ struct op_x86_model_spec const op_p4_spec = { | |||
722 | .setup_ctrs = &p4_setup_ctrs, | 717 | .setup_ctrs = &p4_setup_ctrs, |
723 | .check_ctrs = &p4_check_ctrs, | 718 | .check_ctrs = &p4_check_ctrs, |
724 | .start = &p4_start, | 719 | .start = &p4_start, |
725 | .stop = &p4_stop | 720 | .stop = &p4_stop, |
721 | .shutdown = &p4_shutdown | ||
726 | }; | 722 | }; |
diff --git a/arch/i386/oprofile/op_model_ppro.c b/arch/i386/oprofile/op_model_ppro.c index 5c3ab4b027ad..f88e05ba8eb3 100644 --- a/arch/i386/oprofile/op_model_ppro.c +++ b/arch/i386/oprofile/op_model_ppro.c | |||
@@ -22,10 +22,12 @@ | |||
22 | #define NUM_COUNTERS 2 | 22 | #define NUM_COUNTERS 2 |
23 | #define NUM_CONTROLS 2 | 23 | #define NUM_CONTROLS 2 |
24 | 24 | ||
25 | #define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) | ||
25 | #define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) | 26 | #define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) |
26 | #define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0) | 27 | #define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0) |
27 | #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) | 28 | #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) |
28 | 29 | ||
30 | #define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) | ||
29 | #define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0) | 31 | #define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0) |
30 | #define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0) | 32 | #define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0) |
31 | #define CTRL_SET_ACTIVE(n) (n |= (1<<22)) | 33 | #define CTRL_SET_ACTIVE(n) (n |= (1<<22)) |
@@ -41,11 +43,21 @@ static unsigned long reset_value[NUM_COUNTERS]; | |||
41 | 43 | ||
42 | static void ppro_fill_in_addresses(struct op_msrs * const msrs) | 44 | static void ppro_fill_in_addresses(struct op_msrs * const msrs) |
43 | { | 45 | { |
44 | msrs->counters[0].addr = MSR_P6_PERFCTR0; | 46 | int i; |
45 | msrs->counters[1].addr = MSR_P6_PERFCTR1; | 47 | |
48 | for (i=0; i < NUM_COUNTERS; i++) { | ||
49 | if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) | ||
50 | msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; | ||
51 | else | ||
52 | msrs->counters[i].addr = 0; | ||
53 | } | ||
46 | 54 | ||
47 | msrs->controls[0].addr = MSR_P6_EVNTSEL0; | 55 | for (i=0; i < NUM_CONTROLS; i++) { |
48 | msrs->controls[1].addr = MSR_P6_EVNTSEL1; | 56 | if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) |
57 | msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; | ||
58 | else | ||
59 | msrs->controls[i].addr = 0; | ||
60 | } | ||
49 | } | 61 | } |
50 | 62 | ||
51 | 63 | ||
@@ -56,6 +68,8 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) | |||
56 | 68 | ||
57 | /* clear all counters */ | 69 | /* clear all counters */ |
58 | for (i = 0 ; i < NUM_CONTROLS; ++i) { | 70 | for (i = 0 ; i < NUM_CONTROLS; ++i) { |
71 | if (unlikely(!CTRL_IS_RESERVED(msrs,i))) | ||
72 | continue; | ||
59 | CTRL_READ(low, high, msrs, i); | 73 | CTRL_READ(low, high, msrs, i); |
60 | CTRL_CLEAR(low); | 74 | CTRL_CLEAR(low); |
61 | CTRL_WRITE(low, high, msrs, i); | 75 | CTRL_WRITE(low, high, msrs, i); |
@@ -63,12 +77,14 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) | |||
63 | 77 | ||
64 | /* avoid a false detection of ctr overflows in NMI handler */ | 78 | /* avoid a false detection of ctr overflows in NMI handler */ |
65 | for (i = 0; i < NUM_COUNTERS; ++i) { | 79 | for (i = 0; i < NUM_COUNTERS; ++i) { |
80 | if (unlikely(!CTR_IS_RESERVED(msrs,i))) | ||
81 | continue; | ||
66 | CTR_WRITE(1, msrs, i); | 82 | CTR_WRITE(1, msrs, i); |
67 | } | 83 | } |
68 | 84 | ||
69 | /* enable active counters */ | 85 | /* enable active counters */ |
70 | for (i = 0; i < NUM_COUNTERS; ++i) { | 86 | for (i = 0; i < NUM_COUNTERS; ++i) { |
71 | if (counter_config[i].enabled) { | 87 | if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) { |
72 | reset_value[i] = counter_config[i].count; | 88 | reset_value[i] = counter_config[i].count; |
73 | 89 | ||
74 | CTR_WRITE(counter_config[i].count, msrs, i); | 90 | CTR_WRITE(counter_config[i].count, msrs, i); |
@@ -81,6 +97,8 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) | |||
81 | CTRL_SET_UM(low, counter_config[i].unit_mask); | 97 | CTRL_SET_UM(low, counter_config[i].unit_mask); |
82 | CTRL_SET_EVENT(low, counter_config[i].event); | 98 | CTRL_SET_EVENT(low, counter_config[i].event); |
83 | CTRL_WRITE(low, high, msrs, i); | 99 | CTRL_WRITE(low, high, msrs, i); |
100 | } else { | ||
101 | reset_value[i] = 0; | ||
84 | } | 102 | } |
85 | } | 103 | } |
86 | } | 104 | } |
@@ -93,6 +111,8 @@ static int ppro_check_ctrs(struct pt_regs * const regs, | |||
93 | int i; | 111 | int i; |
94 | 112 | ||
95 | for (i = 0 ; i < NUM_COUNTERS; ++i) { | 113 | for (i = 0 ; i < NUM_COUNTERS; ++i) { |
114 | if (!reset_value[i]) | ||
115 | continue; | ||
96 | CTR_READ(low, high, msrs, i); | 116 | CTR_READ(low, high, msrs, i); |
97 | if (CTR_OVERFLOWED(low)) { | 117 | if (CTR_OVERFLOWED(low)) { |
98 | oprofile_add_sample(regs, i); | 118 | oprofile_add_sample(regs, i); |
@@ -118,18 +138,38 @@ static int ppro_check_ctrs(struct pt_regs * const regs, | |||
118 | static void ppro_start(struct op_msrs const * const msrs) | 138 | static void ppro_start(struct op_msrs const * const msrs) |
119 | { | 139 | { |
120 | unsigned int low,high; | 140 | unsigned int low,high; |
121 | CTRL_READ(low, high, msrs, 0); | 141 | |
122 | CTRL_SET_ACTIVE(low); | 142 | if (reset_value[0]) { |
123 | CTRL_WRITE(low, high, msrs, 0); | 143 | CTRL_READ(low, high, msrs, 0); |
144 | CTRL_SET_ACTIVE(low); | ||
145 | CTRL_WRITE(low, high, msrs, 0); | ||
146 | } | ||
124 | } | 147 | } |
125 | 148 | ||
126 | 149 | ||
127 | static void ppro_stop(struct op_msrs const * const msrs) | 150 | static void ppro_stop(struct op_msrs const * const msrs) |
128 | { | 151 | { |
129 | unsigned int low,high; | 152 | unsigned int low,high; |
130 | CTRL_READ(low, high, msrs, 0); | 153 | |
131 | CTRL_SET_INACTIVE(low); | 154 | if (reset_value[0]) { |
132 | CTRL_WRITE(low, high, msrs, 0); | 155 | CTRL_READ(low, high, msrs, 0); |
156 | CTRL_SET_INACTIVE(low); | ||
157 | CTRL_WRITE(low, high, msrs, 0); | ||
158 | } | ||
159 | } | ||
160 | |||
161 | static void ppro_shutdown(struct op_msrs const * const msrs) | ||
162 | { | ||
163 | int i; | ||
164 | |||
165 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { | ||
166 | if (CTR_IS_RESERVED(msrs,i)) | ||
167 | release_perfctr_nmi(MSR_P6_PERFCTR0 + i); | ||
168 | } | ||
169 | for (i = 0 ; i < NUM_CONTROLS ; ++i) { | ||
170 | if (CTRL_IS_RESERVED(msrs,i)) | ||
171 | release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); | ||
172 | } | ||
133 | } | 173 | } |
134 | 174 | ||
135 | 175 | ||
@@ -140,5 +180,6 @@ struct op_x86_model_spec const op_ppro_spec = { | |||
140 | .setup_ctrs = &ppro_setup_ctrs, | 180 | .setup_ctrs = &ppro_setup_ctrs, |
141 | .check_ctrs = &ppro_check_ctrs, | 181 | .check_ctrs = &ppro_check_ctrs, |
142 | .start = &ppro_start, | 182 | .start = &ppro_start, |
143 | .stop = &ppro_stop | 183 | .stop = &ppro_stop, |
184 | .shutdown = &ppro_shutdown | ||
144 | }; | 185 | }; |
diff --git a/arch/i386/oprofile/op_x86_model.h b/arch/i386/oprofile/op_x86_model.h index 123b7e90a9ee..abb1aa95b979 100644 --- a/arch/i386/oprofile/op_x86_model.h +++ b/arch/i386/oprofile/op_x86_model.h | |||
@@ -40,6 +40,7 @@ struct op_x86_model_spec { | |||
40 | struct op_msrs const * const msrs); | 40 | struct op_msrs const * const msrs); |
41 | void (*start)(struct op_msrs const * const msrs); | 41 | void (*start)(struct op_msrs const * const msrs); |
42 | void (*stop)(struct op_msrs const * const msrs); | 42 | void (*stop)(struct op_msrs const * const msrs); |
43 | void (*shutdown)(struct op_msrs const * const msrs); | ||
43 | }; | 44 | }; |
44 | 45 | ||
45 | extern struct op_x86_model_spec const op_ppro_spec; | 46 | extern struct op_x86_model_spec const op_ppro_spec; |