aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/oprofile/op_model_p4.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386/oprofile/op_model_p4.c')
-rw-r--r--arch/i386/oprofile/op_model_p4.c152
1 files changed, 74 insertions, 78 deletions
diff --git a/arch/i386/oprofile/op_model_p4.c b/arch/i386/oprofile/op_model_p4.c
index 7c61d357b82b..47925927b12f 100644
--- a/arch/i386/oprofile/op_model_p4.c
+++ b/arch/i386/oprofile/op_model_p4.c
@@ -32,7 +32,7 @@
32#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) 32#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
33 33
34static unsigned int num_counters = NUM_COUNTERS_NON_HT; 34static unsigned int num_counters = NUM_COUNTERS_NON_HT;
35 35static unsigned int num_controls = NUM_CONTROLS_NON_HT;
36 36
37/* this has to be checked dynamically since the 37/* this has to be checked dynamically since the
38 hyper-threadedness of a chip is discovered at 38 hyper-threadedness of a chip is discovered at
@@ -40,8 +40,10 @@ static unsigned int num_counters = NUM_COUNTERS_NON_HT;
40static inline void setup_num_counters(void) 40static inline void setup_num_counters(void)
41{ 41{
42#ifdef CONFIG_SMP 42#ifdef CONFIG_SMP
43 if (smp_num_siblings == 2) 43 if (smp_num_siblings == 2){
44 num_counters = NUM_COUNTERS_HT2; 44 num_counters = NUM_COUNTERS_HT2;
45 num_controls = NUM_CONTROLS_HT2;
46 }
45#endif 47#endif
46} 48}
47 49
@@ -97,15 +99,6 @@ static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
97 99
98#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT 100#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
99 101
100/* All cccr we don't use. */
101static int p4_unused_cccr[NUM_UNUSED_CCCRS] = {
102 MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3,
103 MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3,
104 MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3,
105 MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR1,
106 MSR_P4_IQ_CCCR2, MSR_P4_IQ_CCCR3
107};
108
109/* p4 event codes in libop/op_event.h are indices into this table. */ 102/* p4 event codes in libop/op_event.h are indices into this table. */
110 103
111static struct p4_event_binding p4_events[NUM_EVENTS] = { 104static struct p4_event_binding p4_events[NUM_EVENTS] = {
@@ -372,6 +365,8 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
372#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) 365#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
373#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) 366#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
374 367
368#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
369#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
375#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0) 370#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
376#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0) 371#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
377#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) 372#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
@@ -401,29 +396,34 @@ static unsigned long reset_value[NUM_COUNTERS_NON_HT];
401static void p4_fill_in_addresses(struct op_msrs * const msrs) 396static void p4_fill_in_addresses(struct op_msrs * const msrs)
402{ 397{
403 unsigned int i; 398 unsigned int i;
404 unsigned int addr, stag; 399 unsigned int addr, cccraddr, stag;
405 400
406 setup_num_counters(); 401 setup_num_counters();
407 stag = get_stagger(); 402 stag = get_stagger();
408 403
409 /* the counter registers we pay attention to */ 404 /* initialize some registers */
410 for (i = 0; i < num_counters; ++i) { 405 for (i = 0; i < num_counters; ++i) {
411 msrs->counters[i].addr = 406 msrs->counters[i].addr = 0;
412 p4_counters[VIRT_CTR(stag, i)].counter_address;
413 } 407 }
414 408 for (i = 0; i < num_controls; ++i) {
415 /* FIXME: bad feeling, we don't save the 10 counters we don't use. */ 409 msrs->controls[i].addr = 0;
416
417 /* 18 CCCR registers */
418 for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
419 addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
420 msrs->controls[i].addr = addr;
421 } 410 }
422 411
412 /* the counter & cccr registers we pay attention to */
413 for (i = 0; i < num_counters; ++i) {
414 addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
415 cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
416 if (reserve_perfctr_nmi(addr)){
417 msrs->counters[i].addr = addr;
418 msrs->controls[i].addr = cccraddr;
419 }
420 }
421
423 /* 43 ESCR registers in three or four discontiguous group */ 422 /* 43 ESCR registers in three or four discontiguous group */
424 for (addr = MSR_P4_BSU_ESCR0 + stag; 423 for (addr = MSR_P4_BSU_ESCR0 + stag;
425 addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) { 424 addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
426 msrs->controls[i].addr = addr; 425 if (reserve_evntsel_nmi(addr))
426 msrs->controls[i].addr = addr;
427 } 427 }
428 428
429 /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1 429 /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
@@ -431,47 +431,57 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs)
431 if (boot_cpu_data.x86_model >= 0x3) { 431 if (boot_cpu_data.x86_model >= 0x3) {
432 for (addr = MSR_P4_BSU_ESCR0 + stag; 432 for (addr = MSR_P4_BSU_ESCR0 + stag;
433 addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) { 433 addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
434 msrs->controls[i].addr = addr; 434 if (reserve_evntsel_nmi(addr))
435 msrs->controls[i].addr = addr;
435 } 436 }
436 } else { 437 } else {
437 for (addr = MSR_P4_IQ_ESCR0 + stag; 438 for (addr = MSR_P4_IQ_ESCR0 + stag;
438 addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) { 439 addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
439 msrs->controls[i].addr = addr; 440 if (reserve_evntsel_nmi(addr))
441 msrs->controls[i].addr = addr;
440 } 442 }
441 } 443 }
442 444
443 for (addr = MSR_P4_RAT_ESCR0 + stag; 445 for (addr = MSR_P4_RAT_ESCR0 + stag;
444 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { 446 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
445 msrs->controls[i].addr = addr; 447 if (reserve_evntsel_nmi(addr))
448 msrs->controls[i].addr = addr;
446 } 449 }
447 450
448 for (addr = MSR_P4_MS_ESCR0 + stag; 451 for (addr = MSR_P4_MS_ESCR0 + stag;
449 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { 452 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
450 msrs->controls[i].addr = addr; 453 if (reserve_evntsel_nmi(addr))
454 msrs->controls[i].addr = addr;
451 } 455 }
452 456
453 for (addr = MSR_P4_IX_ESCR0 + stag; 457 for (addr = MSR_P4_IX_ESCR0 + stag;
454 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { 458 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
455 msrs->controls[i].addr = addr; 459 if (reserve_evntsel_nmi(addr))
460 msrs->controls[i].addr = addr;
456 } 461 }
457 462
458 /* there are 2 remaining non-contiguously located ESCRs */ 463 /* there are 2 remaining non-contiguously located ESCRs */
459 464
460 if (num_counters == NUM_COUNTERS_NON_HT) { 465 if (num_counters == NUM_COUNTERS_NON_HT) {
461 /* standard non-HT CPUs handle both remaining ESCRs*/ 466 /* standard non-HT CPUs handle both remaining ESCRs*/
462 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; 467 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
463 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; 468 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
469 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
470 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
464 471
465 } else if (stag == 0) { 472 } else if (stag == 0) {
466 /* HT CPUs give the first remainder to the even thread, as 473 /* HT CPUs give the first remainder to the even thread, as
467 the 32nd control register */ 474 the 32nd control register */
468 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; 475 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
476 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
469 477
470 } else { 478 } else {
471 /* and two copies of the second to the odd thread, 479 /* and two copies of the second to the odd thread,
472 for the 22st and 23nd control registers */ 480 for the 22st and 23nd control registers */
473 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; 481 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
474 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; 482 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
483 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
484 }
475 } 485 }
476} 486}
477 487
@@ -544,7 +554,6 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs)
544{ 554{
545 unsigned int i; 555 unsigned int i;
546 unsigned int low, high; 556 unsigned int low, high;
547 unsigned int addr;
548 unsigned int stag; 557 unsigned int stag;
549 558
550 stag = get_stagger(); 559 stag = get_stagger();
@@ -557,59 +566,24 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs)
557 566
558 /* clear the cccrs we will use */ 567 /* clear the cccrs we will use */
559 for (i = 0 ; i < num_counters ; i++) { 568 for (i = 0 ; i < num_counters ; i++) {
569 if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
570 continue;
560 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); 571 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
561 CCCR_CLEAR(low); 572 CCCR_CLEAR(low);
562 CCCR_SET_REQUIRED_BITS(low); 573 CCCR_SET_REQUIRED_BITS(low);
563 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); 574 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
564 } 575 }
565 576
566 /* clear cccrs outside our concern */
567 for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) {
568 rdmsr(p4_unused_cccr[i], low, high);
569 CCCR_CLEAR(low);
570 CCCR_SET_REQUIRED_BITS(low);
571 wrmsr(p4_unused_cccr[i], low, high);
572 }
573
574 /* clear all escrs (including those outside our concern) */ 577 /* clear all escrs (including those outside our concern) */
575 for (addr = MSR_P4_BSU_ESCR0 + stag; 578 for (i = num_counters; i < num_controls; i++) {
576 addr < MSR_P4_IQ_ESCR0; addr += addr_increment()) { 579 if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
577 wrmsr(addr, 0, 0); 580 continue;
578 } 581 wrmsr(msrs->controls[i].addr, 0, 0);
579
580 /* On older models clear also MSR_P4_IQ_ESCR0/1 */
581 if (boot_cpu_data.x86_model < 0x3) {
582 wrmsr(MSR_P4_IQ_ESCR0, 0, 0);
583 wrmsr(MSR_P4_IQ_ESCR1, 0, 0);
584 }
585
586 for (addr = MSR_P4_RAT_ESCR0 + stag;
587 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
588 wrmsr(addr, 0, 0);
589 }
590
591 for (addr = MSR_P4_MS_ESCR0 + stag;
592 addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){
593 wrmsr(addr, 0, 0);
594 }
595
596 for (addr = MSR_P4_IX_ESCR0 + stag;
597 addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){
598 wrmsr(addr, 0, 0);
599 } 582 }
600 583
601 if (num_counters == NUM_COUNTERS_NON_HT) {
602 wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
603 wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
604 } else if (stag == 0) {
605 wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
606 } else {
607 wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
608 }
609
610 /* setup all counters */ 584 /* setup all counters */
611 for (i = 0 ; i < num_counters ; ++i) { 585 for (i = 0 ; i < num_counters ; ++i) {
612 if (counter_config[i].enabled) { 586 if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) {
613 reset_value[i] = counter_config[i].count; 587 reset_value[i] = counter_config[i].count;
614 pmc_setup_one_p4_counter(i); 588 pmc_setup_one_p4_counter(i);
615 CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); 589 CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
@@ -696,12 +670,32 @@ static void p4_stop(struct op_msrs const * const msrs)
696 stag = get_stagger(); 670 stag = get_stagger();
697 671
698 for (i = 0; i < num_counters; ++i) { 672 for (i = 0; i < num_counters; ++i) {
673 if (!reset_value[i])
674 continue;
699 CCCR_READ(low, high, VIRT_CTR(stag, i)); 675 CCCR_READ(low, high, VIRT_CTR(stag, i));
700 CCCR_SET_DISABLE(low); 676 CCCR_SET_DISABLE(low);
701 CCCR_WRITE(low, high, VIRT_CTR(stag, i)); 677 CCCR_WRITE(low, high, VIRT_CTR(stag, i));
702 } 678 }
703} 679}
704 680
681static void p4_shutdown(struct op_msrs const * const msrs)
682{
683 int i;
684
685 for (i = 0 ; i < num_counters ; ++i) {
686 if (CTR_IS_RESERVED(msrs,i))
687 release_perfctr_nmi(msrs->counters[i].addr);
688 }
689 /* some of the control registers are specially reserved in
690 * conjunction with the counter registers (hence the starting offset).
691 * This saves a few bits.
692 */
693 for (i = num_counters ; i < num_controls ; ++i) {
694 if (CTRL_IS_RESERVED(msrs,i))
695 release_evntsel_nmi(msrs->controls[i].addr);
696 }
697}
698
705 699
706#ifdef CONFIG_SMP 700#ifdef CONFIG_SMP
707struct op_x86_model_spec const op_p4_ht2_spec = { 701struct op_x86_model_spec const op_p4_ht2_spec = {
@@ -711,7 +705,8 @@ struct op_x86_model_spec const op_p4_ht2_spec = {
711 .setup_ctrs = &p4_setup_ctrs, 705 .setup_ctrs = &p4_setup_ctrs,
712 .check_ctrs = &p4_check_ctrs, 706 .check_ctrs = &p4_check_ctrs,
713 .start = &p4_start, 707 .start = &p4_start,
714 .stop = &p4_stop 708 .stop = &p4_stop,
709 .shutdown = &p4_shutdown
715}; 710};
716#endif 711#endif
717 712
@@ -722,5 +717,6 @@ struct op_x86_model_spec const op_p4_spec = {
722 .setup_ctrs = &p4_setup_ctrs, 717 .setup_ctrs = &p4_setup_ctrs,
723 .check_ctrs = &p4_check_ctrs, 718 .check_ctrs = &p4_check_ctrs,
724 .start = &p4_start, 719 .start = &p4_start,
725 .stop = &p4_stop 720 .stop = &p4_stop,
721 .shutdown = &p4_shutdown
726}; 722};