diff options
Diffstat (limited to 'arch/i386/oprofile/op_model_p4.c')
-rw-r--r-- | arch/i386/oprofile/op_model_p4.c | 152 |
1 files changed, 74 insertions, 78 deletions
diff --git a/arch/i386/oprofile/op_model_p4.c b/arch/i386/oprofile/op_model_p4.c index 7c61d357b82b..47925927b12f 100644 --- a/arch/i386/oprofile/op_model_p4.c +++ b/arch/i386/oprofile/op_model_p4.c | |||
@@ -32,7 +32,7 @@ | |||
32 | #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) | 32 | #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) |
33 | 33 | ||
34 | static unsigned int num_counters = NUM_COUNTERS_NON_HT; | 34 | static unsigned int num_counters = NUM_COUNTERS_NON_HT; |
35 | 35 | static unsigned int num_controls = NUM_CONTROLS_NON_HT; | |
36 | 36 | ||
37 | /* this has to be checked dynamically since the | 37 | /* this has to be checked dynamically since the |
38 | hyper-threadedness of a chip is discovered at | 38 | hyper-threadedness of a chip is discovered at |
@@ -40,8 +40,10 @@ static unsigned int num_counters = NUM_COUNTERS_NON_HT; | |||
40 | static inline void setup_num_counters(void) | 40 | static inline void setup_num_counters(void) |
41 | { | 41 | { |
42 | #ifdef CONFIG_SMP | 42 | #ifdef CONFIG_SMP |
43 | if (smp_num_siblings == 2) | 43 | if (smp_num_siblings == 2){ |
44 | num_counters = NUM_COUNTERS_HT2; | 44 | num_counters = NUM_COUNTERS_HT2; |
45 | num_controls = NUM_CONTROLS_HT2; | ||
46 | } | ||
45 | #endif | 47 | #endif |
46 | } | 48 | } |
47 | 49 | ||
@@ -97,15 +99,6 @@ static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = { | |||
97 | 99 | ||
98 | #define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT | 100 | #define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT |
99 | 101 | ||
100 | /* All cccr we don't use. */ | ||
101 | static int p4_unused_cccr[NUM_UNUSED_CCCRS] = { | ||
102 | MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3, | ||
103 | MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3, | ||
104 | MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3, | ||
105 | MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR1, | ||
106 | MSR_P4_IQ_CCCR2, MSR_P4_IQ_CCCR3 | ||
107 | }; | ||
108 | |||
109 | /* p4 event codes in libop/op_event.h are indices into this table. */ | 102 | /* p4 event codes in libop/op_event.h are indices into this table. */ |
110 | 103 | ||
111 | static struct p4_event_binding p4_events[NUM_EVENTS] = { | 104 | static struct p4_event_binding p4_events[NUM_EVENTS] = { |
@@ -372,6 +365,8 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { | |||
372 | #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) | 365 | #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) |
373 | #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) | 366 | #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) |
374 | 367 | ||
368 | #define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) | ||
369 | #define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) | ||
375 | #define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0) | 370 | #define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0) |
376 | #define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0) | 371 | #define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0) |
377 | #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) | 372 | #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) |
@@ -401,29 +396,34 @@ static unsigned long reset_value[NUM_COUNTERS_NON_HT]; | |||
401 | static void p4_fill_in_addresses(struct op_msrs * const msrs) | 396 | static void p4_fill_in_addresses(struct op_msrs * const msrs) |
402 | { | 397 | { |
403 | unsigned int i; | 398 | unsigned int i; |
404 | unsigned int addr, stag; | 399 | unsigned int addr, cccraddr, stag; |
405 | 400 | ||
406 | setup_num_counters(); | 401 | setup_num_counters(); |
407 | stag = get_stagger(); | 402 | stag = get_stagger(); |
408 | 403 | ||
409 | /* the counter registers we pay attention to */ | 404 | /* initialize some registers */ |
410 | for (i = 0; i < num_counters; ++i) { | 405 | for (i = 0; i < num_counters; ++i) { |
411 | msrs->counters[i].addr = | 406 | msrs->counters[i].addr = 0; |
412 | p4_counters[VIRT_CTR(stag, i)].counter_address; | ||
413 | } | 407 | } |
414 | 408 | for (i = 0; i < num_controls; ++i) { | |
415 | /* FIXME: bad feeling, we don't save the 10 counters we don't use. */ | 409 | msrs->controls[i].addr = 0; |
416 | |||
417 | /* 18 CCCR registers */ | ||
418 | for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag; | ||
419 | addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) { | ||
420 | msrs->controls[i].addr = addr; | ||
421 | } | 410 | } |
422 | 411 | ||
412 | /* the counter & cccr registers we pay attention to */ | ||
413 | for (i = 0; i < num_counters; ++i) { | ||
414 | addr = p4_counters[VIRT_CTR(stag, i)].counter_address; | ||
415 | cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address; | ||
416 | if (reserve_perfctr_nmi(addr)){ | ||
417 | msrs->counters[i].addr = addr; | ||
418 | msrs->controls[i].addr = cccraddr; | ||
419 | } | ||
420 | } | ||
421 | |||
423 | /* 43 ESCR registers in three or four discontiguous group */ | 422 | /* 43 ESCR registers in three or four discontiguous group */ |
424 | for (addr = MSR_P4_BSU_ESCR0 + stag; | 423 | for (addr = MSR_P4_BSU_ESCR0 + stag; |
425 | addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) { | 424 | addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) { |
426 | msrs->controls[i].addr = addr; | 425 | if (reserve_evntsel_nmi(addr)) |
426 | msrs->controls[i].addr = addr; | ||
427 | } | 427 | } |
428 | 428 | ||
429 | /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1 | 429 | /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1 |
@@ -431,47 +431,57 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs) | |||
431 | if (boot_cpu_data.x86_model >= 0x3) { | 431 | if (boot_cpu_data.x86_model >= 0x3) { |
432 | for (addr = MSR_P4_BSU_ESCR0 + stag; | 432 | for (addr = MSR_P4_BSU_ESCR0 + stag; |
433 | addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) { | 433 | addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) { |
434 | msrs->controls[i].addr = addr; | 434 | if (reserve_evntsel_nmi(addr)) |
435 | msrs->controls[i].addr = addr; | ||
435 | } | 436 | } |
436 | } else { | 437 | } else { |
437 | for (addr = MSR_P4_IQ_ESCR0 + stag; | 438 | for (addr = MSR_P4_IQ_ESCR0 + stag; |
438 | addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) { | 439 | addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) { |
439 | msrs->controls[i].addr = addr; | 440 | if (reserve_evntsel_nmi(addr)) |
441 | msrs->controls[i].addr = addr; | ||
440 | } | 442 | } |
441 | } | 443 | } |
442 | 444 | ||
443 | for (addr = MSR_P4_RAT_ESCR0 + stag; | 445 | for (addr = MSR_P4_RAT_ESCR0 + stag; |
444 | addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { | 446 | addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { |
445 | msrs->controls[i].addr = addr; | 447 | if (reserve_evntsel_nmi(addr)) |
448 | msrs->controls[i].addr = addr; | ||
446 | } | 449 | } |
447 | 450 | ||
448 | for (addr = MSR_P4_MS_ESCR0 + stag; | 451 | for (addr = MSR_P4_MS_ESCR0 + stag; |
449 | addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { | 452 | addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { |
450 | msrs->controls[i].addr = addr; | 453 | if (reserve_evntsel_nmi(addr)) |
454 | msrs->controls[i].addr = addr; | ||
451 | } | 455 | } |
452 | 456 | ||
453 | for (addr = MSR_P4_IX_ESCR0 + stag; | 457 | for (addr = MSR_P4_IX_ESCR0 + stag; |
454 | addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { | 458 | addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { |
455 | msrs->controls[i].addr = addr; | 459 | if (reserve_evntsel_nmi(addr)) |
460 | msrs->controls[i].addr = addr; | ||
456 | } | 461 | } |
457 | 462 | ||
458 | /* there are 2 remaining non-contiguously located ESCRs */ | 463 | /* there are 2 remaining non-contiguously located ESCRs */ |
459 | 464 | ||
460 | if (num_counters == NUM_COUNTERS_NON_HT) { | 465 | if (num_counters == NUM_COUNTERS_NON_HT) { |
461 | /* standard non-HT CPUs handle both remaining ESCRs*/ | 466 | /* standard non-HT CPUs handle both remaining ESCRs*/ |
462 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; | 467 | if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) |
463 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; | 468 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; |
469 | if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4)) | ||
470 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; | ||
464 | 471 | ||
465 | } else if (stag == 0) { | 472 | } else if (stag == 0) { |
466 | /* HT CPUs give the first remainder to the even thread, as | 473 | /* HT CPUs give the first remainder to the even thread, as |
467 | the 32nd control register */ | 474 | the 32nd control register */ |
468 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; | 475 | if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4)) |
476 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; | ||
469 | 477 | ||
470 | } else { | 478 | } else { |
471 | /* and two copies of the second to the odd thread, | 479 | /* and two copies of the second to the odd thread, |
472 | for the 22st and 23nd control registers */ | 480 | for the 22st and 23nd control registers */ |
473 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; | 481 | if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) { |
474 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; | 482 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; |
483 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; | ||
484 | } | ||
475 | } | 485 | } |
476 | } | 486 | } |
477 | 487 | ||
@@ -544,7 +554,6 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs) | |||
544 | { | 554 | { |
545 | unsigned int i; | 555 | unsigned int i; |
546 | unsigned int low, high; | 556 | unsigned int low, high; |
547 | unsigned int addr; | ||
548 | unsigned int stag; | 557 | unsigned int stag; |
549 | 558 | ||
550 | stag = get_stagger(); | 559 | stag = get_stagger(); |
@@ -557,59 +566,24 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs) | |||
557 | 566 | ||
558 | /* clear the cccrs we will use */ | 567 | /* clear the cccrs we will use */ |
559 | for (i = 0 ; i < num_counters ; i++) { | 568 | for (i = 0 ; i < num_counters ; i++) { |
569 | if (unlikely(!CTRL_IS_RESERVED(msrs,i))) | ||
570 | continue; | ||
560 | rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); | 571 | rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); |
561 | CCCR_CLEAR(low); | 572 | CCCR_CLEAR(low); |
562 | CCCR_SET_REQUIRED_BITS(low); | 573 | CCCR_SET_REQUIRED_BITS(low); |
563 | wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); | 574 | wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); |
564 | } | 575 | } |
565 | 576 | ||
566 | /* clear cccrs outside our concern */ | ||
567 | for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) { | ||
568 | rdmsr(p4_unused_cccr[i], low, high); | ||
569 | CCCR_CLEAR(low); | ||
570 | CCCR_SET_REQUIRED_BITS(low); | ||
571 | wrmsr(p4_unused_cccr[i], low, high); | ||
572 | } | ||
573 | |||
574 | /* clear all escrs (including those outside our concern) */ | 577 | /* clear all escrs (including those outside our concern) */ |
575 | for (addr = MSR_P4_BSU_ESCR0 + stag; | 578 | for (i = num_counters; i < num_controls; i++) { |
576 | addr < MSR_P4_IQ_ESCR0; addr += addr_increment()) { | 579 | if (unlikely(!CTRL_IS_RESERVED(msrs,i))) |
577 | wrmsr(addr, 0, 0); | 580 | continue; |
578 | } | 581 | wrmsr(msrs->controls[i].addr, 0, 0); |
579 | |||
580 | /* On older models clear also MSR_P4_IQ_ESCR0/1 */ | ||
581 | if (boot_cpu_data.x86_model < 0x3) { | ||
582 | wrmsr(MSR_P4_IQ_ESCR0, 0, 0); | ||
583 | wrmsr(MSR_P4_IQ_ESCR1, 0, 0); | ||
584 | } | ||
585 | |||
586 | for (addr = MSR_P4_RAT_ESCR0 + stag; | ||
587 | addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { | ||
588 | wrmsr(addr, 0, 0); | ||
589 | } | ||
590 | |||
591 | for (addr = MSR_P4_MS_ESCR0 + stag; | ||
592 | addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){ | ||
593 | wrmsr(addr, 0, 0); | ||
594 | } | ||
595 | |||
596 | for (addr = MSR_P4_IX_ESCR0 + stag; | ||
597 | addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){ | ||
598 | wrmsr(addr, 0, 0); | ||
599 | } | 582 | } |
600 | 583 | ||
601 | if (num_counters == NUM_COUNTERS_NON_HT) { | ||
602 | wrmsr(MSR_P4_CRU_ESCR4, 0, 0); | ||
603 | wrmsr(MSR_P4_CRU_ESCR5, 0, 0); | ||
604 | } else if (stag == 0) { | ||
605 | wrmsr(MSR_P4_CRU_ESCR4, 0, 0); | ||
606 | } else { | ||
607 | wrmsr(MSR_P4_CRU_ESCR5, 0, 0); | ||
608 | } | ||
609 | |||
610 | /* setup all counters */ | 584 | /* setup all counters */ |
611 | for (i = 0 ; i < num_counters ; ++i) { | 585 | for (i = 0 ; i < num_counters ; ++i) { |
612 | if (counter_config[i].enabled) { | 586 | if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) { |
613 | reset_value[i] = counter_config[i].count; | 587 | reset_value[i] = counter_config[i].count; |
614 | pmc_setup_one_p4_counter(i); | 588 | pmc_setup_one_p4_counter(i); |
615 | CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); | 589 | CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); |
@@ -696,12 +670,32 @@ static void p4_stop(struct op_msrs const * const msrs) | |||
696 | stag = get_stagger(); | 670 | stag = get_stagger(); |
697 | 671 | ||
698 | for (i = 0; i < num_counters; ++i) { | 672 | for (i = 0; i < num_counters; ++i) { |
673 | if (!reset_value[i]) | ||
674 | continue; | ||
699 | CCCR_READ(low, high, VIRT_CTR(stag, i)); | 675 | CCCR_READ(low, high, VIRT_CTR(stag, i)); |
700 | CCCR_SET_DISABLE(low); | 676 | CCCR_SET_DISABLE(low); |
701 | CCCR_WRITE(low, high, VIRT_CTR(stag, i)); | 677 | CCCR_WRITE(low, high, VIRT_CTR(stag, i)); |
702 | } | 678 | } |
703 | } | 679 | } |
704 | 680 | ||
681 | static void p4_shutdown(struct op_msrs const * const msrs) | ||
682 | { | ||
683 | int i; | ||
684 | |||
685 | for (i = 0 ; i < num_counters ; ++i) { | ||
686 | if (CTR_IS_RESERVED(msrs,i)) | ||
687 | release_perfctr_nmi(msrs->counters[i].addr); | ||
688 | } | ||
689 | /* some of the control registers are specially reserved in | ||
690 | * conjunction with the counter registers (hence the starting offset). | ||
691 | * This saves a few bits. | ||
692 | */ | ||
693 | for (i = num_counters ; i < num_controls ; ++i) { | ||
694 | if (CTRL_IS_RESERVED(msrs,i)) | ||
695 | release_evntsel_nmi(msrs->controls[i].addr); | ||
696 | } | ||
697 | } | ||
698 | |||
705 | 699 | ||
706 | #ifdef CONFIG_SMP | 700 | #ifdef CONFIG_SMP |
707 | struct op_x86_model_spec const op_p4_ht2_spec = { | 701 | struct op_x86_model_spec const op_p4_ht2_spec = { |
@@ -711,7 +705,8 @@ struct op_x86_model_spec const op_p4_ht2_spec = { | |||
711 | .setup_ctrs = &p4_setup_ctrs, | 705 | .setup_ctrs = &p4_setup_ctrs, |
712 | .check_ctrs = &p4_check_ctrs, | 706 | .check_ctrs = &p4_check_ctrs, |
713 | .start = &p4_start, | 707 | .start = &p4_start, |
714 | .stop = &p4_stop | 708 | .stop = &p4_stop, |
709 | .shutdown = &p4_shutdown | ||
715 | }; | 710 | }; |
716 | #endif | 711 | #endif |
717 | 712 | ||
@@ -722,5 +717,6 @@ struct op_x86_model_spec const op_p4_spec = { | |||
722 | .setup_ctrs = &p4_setup_ctrs, | 717 | .setup_ctrs = &p4_setup_ctrs, |
723 | .check_ctrs = &p4_check_ctrs, | 718 | .check_ctrs = &p4_check_ctrs, |
724 | .start = &p4_start, | 719 | .start = &p4_start, |
725 | .stop = &p4_stop | 720 | .stop = &p4_stop, |
721 | .shutdown = &p4_shutdown | ||
726 | }; | 722 | }; |