aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorJacob Pan <jacob.jun.pan@linux.intel.com>2015-03-26 17:28:45 -0400
committerIngo Molnar <mingo@kernel.org>2015-04-17 03:58:56 -0400
commit645523960102fa0ac0578d070630e49ab05f06d1 (patch)
tree8832fe2feca350486fede2f7a9e0cb35b69f7040 /arch/x86
parent517e6341fa123ec3a2f9ea78ad547be910529881 (diff)
perf/x86/intel/rapl: Fix energy counter measurements but supporing per domain energy units
RAPL energy hardware unit can vary within a single CPU package, e.g. HSW server DRAM has a fixed energy unit of 15.3 uJ (2^-16) whereas the unit on other domains can be enumerated from power unit MSR. There might be other variations in the future, this patch adds per cpu model quirk to allow special handling of certain cpus. hw_unit is also removed from per cpu data since it is not per cpu and the sampling rate for energy counter is typically not high. Without this patch, DRAM domain on HSW servers will be counted 4x higher than the real energy counter. Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Stephane Eranian <eranian@google.com> Cc: Andi Kleen <andi.kleen@intel.com> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Link: http://lkml.kernel.org/r/1427405325-780-1-git-send-email-jacob.jun.pan@linux.intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_rapl.c94
1 files changed, 73 insertions, 21 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index c4bb8b8e5017..999289b94025 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -62,6 +62,14 @@
62#define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */ 62#define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */
63#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */ 63#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */
64 64
65#define NR_RAPL_DOMAINS 0x4
66static const char *rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
67 "pp0-core",
68 "package",
69 "dram",
70 "pp1-gpu",
71};
72
65/* Clients have PP0, PKG */ 73/* Clients have PP0, PKG */
66#define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\ 74#define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
67 1<<RAPL_IDX_PKG_NRG_STAT|\ 75 1<<RAPL_IDX_PKG_NRG_STAT|\
@@ -112,7 +120,6 @@ static struct perf_pmu_events_attr event_attr_##v = { \
112 120
113struct rapl_pmu { 121struct rapl_pmu {
114 spinlock_t lock; 122 spinlock_t lock;
115 int hw_unit; /* 1/2^hw_unit Joule */
116 int n_active; /* number of active events */ 123 int n_active; /* number of active events */
117 struct list_head active_list; 124 struct list_head active_list;
118 struct pmu *pmu; /* pointer to rapl_pmu_class */ 125 struct pmu *pmu; /* pointer to rapl_pmu_class */
@@ -120,6 +127,7 @@ struct rapl_pmu {
120 struct hrtimer hrtimer; 127 struct hrtimer hrtimer;
121}; 128};
122 129
130static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; /* 1/2^hw_unit Joule */
123static struct pmu rapl_pmu_class; 131static struct pmu rapl_pmu_class;
124static cpumask_t rapl_cpu_mask; 132static cpumask_t rapl_cpu_mask;
125static int rapl_cntr_mask; 133static int rapl_cntr_mask;
@@ -127,6 +135,7 @@ static int rapl_cntr_mask;
127static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu); 135static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu);
128static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free); 136static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free);
129 137
138static struct x86_pmu_quirk *rapl_quirks;
130static inline u64 rapl_read_counter(struct perf_event *event) 139static inline u64 rapl_read_counter(struct perf_event *event)
131{ 140{
132 u64 raw; 141 u64 raw;
@@ -134,15 +143,28 @@ static inline u64 rapl_read_counter(struct perf_event *event)
134 return raw; 143 return raw;
135} 144}
136 145
137static inline u64 rapl_scale(u64 v) 146#define rapl_add_quirk(func_) \
147do { \
148 static struct x86_pmu_quirk __quirk __initdata = { \
149 .func = func_, \
150 }; \
151 __quirk.next = rapl_quirks; \
152 rapl_quirks = &__quirk; \
153} while (0)
154
155static inline u64 rapl_scale(u64 v, int cfg)
138{ 156{
157 if (cfg > NR_RAPL_DOMAINS) {
158 pr_warn("invalid domain %d, failed to scale data\n", cfg);
159 return v;
160 }
139 /* 161 /*
140 * scale delta to smallest unit (1/2^32) 162 * scale delta to smallest unit (1/2^32)
141 * users must then scale back: count * 1/(1e9*2^32) to get Joules 163 * users must then scale back: count * 1/(1e9*2^32) to get Joules
142 * or use ldexp(count, -32). 164 * or use ldexp(count, -32).
143 * Watts = Joules/Time delta 165 * Watts = Joules/Time delta
144 */ 166 */
145 return v << (32 - __this_cpu_read(rapl_pmu)->hw_unit); 167 return v << (32 - rapl_hw_unit[cfg - 1]);
146} 168}
147 169
148static u64 rapl_event_update(struct perf_event *event) 170static u64 rapl_event_update(struct perf_event *event)
@@ -173,7 +195,7 @@ again:
173 delta = (new_raw_count << shift) - (prev_raw_count << shift); 195 delta = (new_raw_count << shift) - (prev_raw_count << shift);
174 delta >>= shift; 196 delta >>= shift;
175 197
176 sdelta = rapl_scale(delta); 198 sdelta = rapl_scale(delta, event->hw.config);
177 199
178 local64_add(sdelta, &event->count); 200 local64_add(sdelta, &event->count);
179 201
@@ -546,12 +568,22 @@ static void rapl_cpu_init(int cpu)
546 cpumask_set_cpu(cpu, &rapl_cpu_mask); 568 cpumask_set_cpu(cpu, &rapl_cpu_mask);
547} 569}
548 570
571static __init void rapl_hsw_server_quirk(void)
572{
573 /*
574 * DRAM domain on HSW server has fixed energy unit which can be
575 * different than the unit from power unit MSR.
576 * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
577 * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
578 */
579 rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
580}
581
549static int rapl_cpu_prepare(int cpu) 582static int rapl_cpu_prepare(int cpu)
550{ 583{
551 struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); 584 struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
552 int phys_id = topology_physical_package_id(cpu); 585 int phys_id = topology_physical_package_id(cpu);
553 u64 ms; 586 u64 ms;
554 u64 msr_rapl_power_unit_bits;
555 587
556 if (pmu) 588 if (pmu)
557 return 0; 589 return 0;
@@ -559,24 +591,13 @@ static int rapl_cpu_prepare(int cpu)
559 if (phys_id < 0) 591 if (phys_id < 0)
560 return -1; 592 return -1;
561 593
562 /* protect rdmsrl() to handle virtualization */
563 if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
564 return -1;
565
566 pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); 594 pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
567 if (!pmu) 595 if (!pmu)
568 return -1; 596 return -1;
569
570 spin_lock_init(&pmu->lock); 597 spin_lock_init(&pmu->lock);
571 598
572 INIT_LIST_HEAD(&pmu->active_list); 599 INIT_LIST_HEAD(&pmu->active_list);
573 600
574 /*
575 * grab power unit as: 1/2^unit Joules
576 *
577 * we cache in local PMU instance
578 */
579 pmu->hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
580 pmu->pmu = &rapl_pmu_class; 601 pmu->pmu = &rapl_pmu_class;
581 602
582 /* 603 /*
@@ -586,8 +607,8 @@ static int rapl_cpu_prepare(int cpu)
586 * divide interval by 2 to avoid lockstep (2 * 100) 607 * divide interval by 2 to avoid lockstep (2 * 100)
587 * if hw unit is 32, then we use 2 ms 1/200/2 608 * if hw unit is 32, then we use 2 ms 1/200/2
588 */ 609 */
589 if (pmu->hw_unit < 32) 610 if (rapl_hw_unit[0] < 32)
590 ms = (1000 / (2 * 100)) * (1ULL << (32 - pmu->hw_unit - 1)); 611 ms = (1000 / (2 * 100)) * (1ULL << (32 - rapl_hw_unit[0] - 1));
591 else 612 else
592 ms = 2; 613 ms = 2;
593 614
@@ -655,6 +676,20 @@ static int rapl_cpu_notifier(struct notifier_block *self,
655 return NOTIFY_OK; 676 return NOTIFY_OK;
656} 677}
657 678
679static int rapl_check_hw_unit(void)
680{
681 u64 msr_rapl_power_unit_bits;
682 int i;
683
684 /* protect rdmsrl() to handle virtualization */
685 if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
686 return -1;
687 for (i = 0; i < NR_RAPL_DOMAINS; i++)
688 rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
689
690 return 0;
691}
692
658static const struct x86_cpu_id rapl_cpu_match[] = { 693static const struct x86_cpu_id rapl_cpu_match[] = {
659 [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 }, 694 [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
660 [1] = {}, 695 [1] = {},
@@ -664,6 +699,8 @@ static int __init rapl_pmu_init(void)
664{ 699{
665 struct rapl_pmu *pmu; 700 struct rapl_pmu *pmu;
666 int cpu, ret; 701 int cpu, ret;
702 struct x86_pmu_quirk *quirk;
703 int i;
667 704
668 /* 705 /*
669 * check for Intel processor family 6 706 * check for Intel processor family 6
@@ -678,6 +715,11 @@ static int __init rapl_pmu_init(void)
678 rapl_cntr_mask = RAPL_IDX_CLN; 715 rapl_cntr_mask = RAPL_IDX_CLN;
679 rapl_pmu_events_group.attrs = rapl_events_cln_attr; 716 rapl_pmu_events_group.attrs = rapl_events_cln_attr;
680 break; 717 break;
718 case 63: /* Haswell-Server */
719 rapl_add_quirk(rapl_hsw_server_quirk);
720 rapl_cntr_mask = RAPL_IDX_SRV;
721 rapl_pmu_events_group.attrs = rapl_events_srv_attr;
722 break;
681 case 60: /* Haswell */ 723 case 60: /* Haswell */
682 case 69: /* Haswell-Celeron */ 724 case 69: /* Haswell-Celeron */
683 rapl_cntr_mask = RAPL_IDX_HSW; 725 rapl_cntr_mask = RAPL_IDX_HSW;
@@ -693,7 +735,13 @@ static int __init rapl_pmu_init(void)
693 /* unsupported */ 735 /* unsupported */
694 return 0; 736 return 0;
695 } 737 }
738 ret = rapl_check_hw_unit();
739 if (ret)
740 return ret;
696 741
742 /* run cpu model quirks */
743 for (quirk = rapl_quirks; quirk; quirk = quirk->next)
744 quirk->func();
697 cpu_notifier_register_begin(); 745 cpu_notifier_register_begin();
698 746
699 for_each_online_cpu(cpu) { 747 for_each_online_cpu(cpu) {
@@ -714,14 +762,18 @@ static int __init rapl_pmu_init(void)
714 762
715 pmu = __this_cpu_read(rapl_pmu); 763 pmu = __this_cpu_read(rapl_pmu);
716 764
717 pr_info("RAPL PMU detected, hw unit 2^-%d Joules," 765 pr_info("RAPL PMU detected,"
718 " API unit is 2^-32 Joules," 766 " API unit is 2^-32 Joules,"
719 " %d fixed counters" 767 " %d fixed counters"
720 " %llu ms ovfl timer\n", 768 " %llu ms ovfl timer\n",
721 pmu->hw_unit,
722 hweight32(rapl_cntr_mask), 769 hweight32(rapl_cntr_mask),
723 ktime_to_ms(pmu->timer_interval)); 770 ktime_to_ms(pmu->timer_interval));
724 771 for (i = 0; i < NR_RAPL_DOMAINS; i++) {
772 if (rapl_cntr_mask & (1 << i)) {
773 pr_info("hw unit of domain %s 2^-%d Joules\n",
774 rapl_domain_names[i], rapl_hw_unit[i]);
775 }
776 }
725out: 777out:
726 cpu_notifier_register_done(); 778 cpu_notifier_register_done();
727 779