diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-18 11:26:46 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-18 11:26:46 -0400 |
commit | 96b90f27bcf22f1d06cc16d9475cefa6ea4c4718 (patch) | |
tree | a886ad5f611dea36c6d4b615dfdcdbbcf5bd3135 /arch/x86 | |
parent | 396c9df2231865ef55aa031e3f5df9d99e036869 (diff) | |
parent | 0c99241c93b8060441f3c8434848e54b5338f922 (diff) |
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar:
"This update has mostly fixes, but also other bits:
- perf tooling fixes
- PMU driver fixes
- Intel Broadwell PMU driver HW-enablement for LBR callstacks
- a late coming 'perf kmem' tool update that enables it to also
analyze page allocation data. Note, this comes with MM tracepoint
changes that we believe to not break anything: because it changes
the formerly opaque 'struct page *' field that uniquely identifies
pages to 'pfn' which identifies pages uniquely too, but isn't as
opaque and can be used for other purposes as well"
* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf/x86/intel/pt: Fix and clean up error handling in pt_event_add()
perf/x86/intel: Add Broadwell support for the LBR callstack
perf/x86/intel/rapl: Fix energy counter measurements but supporing per domain energy units
perf/x86/intel: Fix Core2,Atom,NHM,WSM cycles:pp events
perf/x86: Fix hw_perf_event::flags collision
perf probe: Fix segfault when probe with lazy_line to file
perf probe: Find compilation directory path for lazy matching
perf probe: Set retprobe flag when probe in address-based alternative mode
perf kmem: Analyze page allocator events also
tracing, mm: Record pfn instead of pointer to struct page
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 18 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_pt.c | 33 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_rapl.c | 94 |
5 files changed, 106 insertions, 49 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 329f0356ad4a..6ac5cb7a9e14 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -65,15 +65,15 @@ struct event_constraint { | |||
65 | /* | 65 | /* |
66 | * struct hw_perf_event.flags flags | 66 | * struct hw_perf_event.flags flags |
67 | */ | 67 | */ |
68 | #define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */ | 68 | #define PERF_X86_EVENT_PEBS_LDLAT 0x0001 /* ld+ldlat data address sampling */ |
69 | #define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */ | 69 | #define PERF_X86_EVENT_PEBS_ST 0x0002 /* st data address sampling */ |
70 | #define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style datala, store */ | 70 | #define PERF_X86_EVENT_PEBS_ST_HSW 0x0004 /* haswell style datala, store */ |
71 | #define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */ | 71 | #define PERF_X86_EVENT_COMMITTED 0x0008 /* event passed commit_txn */ |
72 | #define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */ | 72 | #define PERF_X86_EVENT_PEBS_LD_HSW 0x0010 /* haswell style datala, load */ |
73 | #define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */ | 73 | #define PERF_X86_EVENT_PEBS_NA_HSW 0x0020 /* haswell style datala, unknown */ |
74 | #define PERF_X86_EVENT_EXCL 0x40 /* HT exclusivity on counter */ | 74 | #define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */ |
75 | #define PERF_X86_EVENT_DYNAMIC 0x80 /* dynamic alloc'd constraint */ | 75 | #define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */ |
76 | #define PERF_X86_EVENT_RDPMC_ALLOWED 0x40 /* grant rdpmc permission */ | 76 | #define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */ |
77 | 77 | ||
78 | 78 | ||
79 | struct amd_nb { | 79 | struct amd_nb { |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 9da2400c2ec3..219d3fb423a1 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -3275,7 +3275,7 @@ __init int intel_pmu_init(void) | |||
3275 | hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE| | 3275 | hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE| |
3276 | BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM; | 3276 | BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM; |
3277 | 3277 | ||
3278 | intel_pmu_lbr_init_snb(); | 3278 | intel_pmu_lbr_init_hsw(); |
3279 | 3279 | ||
3280 | x86_pmu.event_constraints = intel_bdw_event_constraints; | 3280 | x86_pmu.event_constraints = intel_bdw_event_constraints; |
3281 | x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; | 3281 | x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index ca69ea56c712..813f75d71175 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -558,6 +558,8 @@ struct event_constraint intel_core2_pebs_event_constraints[] = { | |||
558 | INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ | 558 | INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ |
559 | INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ | 559 | INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ |
560 | INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ | 560 | INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ |
561 | /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ | ||
562 | INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01), | ||
561 | EVENT_CONSTRAINT_END | 563 | EVENT_CONSTRAINT_END |
562 | }; | 564 | }; |
563 | 565 | ||
@@ -565,6 +567,8 @@ struct event_constraint intel_atom_pebs_event_constraints[] = { | |||
565 | INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ | 567 | INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ |
566 | INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ | 568 | INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ |
567 | INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ | 569 | INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ |
570 | /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ | ||
571 | INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01), | ||
568 | EVENT_CONSTRAINT_END | 572 | EVENT_CONSTRAINT_END |
569 | }; | 573 | }; |
570 | 574 | ||
@@ -588,6 +592,8 @@ struct event_constraint intel_nehalem_pebs_event_constraints[] = { | |||
588 | INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ | 592 | INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ |
589 | INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ | 593 | INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ |
590 | INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ | 594 | INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ |
595 | /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ | ||
596 | INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f), | ||
591 | EVENT_CONSTRAINT_END | 597 | EVENT_CONSTRAINT_END |
592 | }; | 598 | }; |
593 | 599 | ||
@@ -603,6 +609,8 @@ struct event_constraint intel_westmere_pebs_event_constraints[] = { | |||
603 | INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ | 609 | INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ |
604 | INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ | 610 | INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ |
605 | INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ | 611 | INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ |
612 | /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ | ||
613 | INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f), | ||
606 | EVENT_CONSTRAINT_END | 614 | EVENT_CONSTRAINT_END |
607 | }; | 615 | }; |
608 | 616 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c index f2770641c0fd..ffe666c2c6b5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_pt.c +++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c | |||
@@ -988,39 +988,36 @@ static int pt_event_add(struct perf_event *event, int mode) | |||
988 | int ret = -EBUSY; | 988 | int ret = -EBUSY; |
989 | 989 | ||
990 | if (pt->handle.event) | 990 | if (pt->handle.event) |
991 | goto out; | 991 | goto fail; |
992 | 992 | ||
993 | buf = perf_aux_output_begin(&pt->handle, event); | 993 | buf = perf_aux_output_begin(&pt->handle, event); |
994 | if (!buf) { | 994 | ret = -EINVAL; |
995 | ret = -EINVAL; | 995 | if (!buf) |
996 | goto out; | 996 | goto fail_stop; |
997 | } | ||
998 | 997 | ||
999 | pt_buffer_reset_offsets(buf, pt->handle.head); | 998 | pt_buffer_reset_offsets(buf, pt->handle.head); |
1000 | if (!buf->snapshot) { | 999 | if (!buf->snapshot) { |
1001 | ret = pt_buffer_reset_markers(buf, &pt->handle); | 1000 | ret = pt_buffer_reset_markers(buf, &pt->handle); |
1002 | if (ret) { | 1001 | if (ret) |
1003 | perf_aux_output_end(&pt->handle, 0, true); | 1002 | goto fail_end_stop; |
1004 | goto out; | ||
1005 | } | ||
1006 | } | 1003 | } |
1007 | 1004 | ||
1008 | if (mode & PERF_EF_START) { | 1005 | if (mode & PERF_EF_START) { |
1009 | pt_event_start(event, 0); | 1006 | pt_event_start(event, 0); |
1010 | if (hwc->state == PERF_HES_STOPPED) { | 1007 | ret = -EBUSY; |
1011 | pt_event_del(event, 0); | 1008 | if (hwc->state == PERF_HES_STOPPED) |
1012 | ret = -EBUSY; | 1009 | goto fail_end_stop; |
1013 | } | ||
1014 | } else { | 1010 | } else { |
1015 | hwc->state = PERF_HES_STOPPED; | 1011 | hwc->state = PERF_HES_STOPPED; |
1016 | } | 1012 | } |
1017 | 1013 | ||
1018 | ret = 0; | 1014 | return 0; |
1019 | out: | ||
1020 | |||
1021 | if (ret) | ||
1022 | hwc->state = PERF_HES_STOPPED; | ||
1023 | 1015 | ||
1016 | fail_end_stop: | ||
1017 | perf_aux_output_end(&pt->handle, 0, true); | ||
1018 | fail_stop: | ||
1019 | hwc->state = PERF_HES_STOPPED; | ||
1020 | fail: | ||
1024 | return ret; | 1021 | return ret; |
1025 | } | 1022 | } |
1026 | 1023 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index c4bb8b8e5017..999289b94025 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c | |||
@@ -62,6 +62,14 @@ | |||
62 | #define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */ | 62 | #define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */ |
63 | #define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */ | 63 | #define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */ |
64 | 64 | ||
65 | #define NR_RAPL_DOMAINS 0x4 | ||
66 | static const char *rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { | ||
67 | "pp0-core", | ||
68 | "package", | ||
69 | "dram", | ||
70 | "pp1-gpu", | ||
71 | }; | ||
72 | |||
65 | /* Clients have PP0, PKG */ | 73 | /* Clients have PP0, PKG */ |
66 | #define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\ | 74 | #define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\ |
67 | 1<<RAPL_IDX_PKG_NRG_STAT|\ | 75 | 1<<RAPL_IDX_PKG_NRG_STAT|\ |
@@ -112,7 +120,6 @@ static struct perf_pmu_events_attr event_attr_##v = { \ | |||
112 | 120 | ||
113 | struct rapl_pmu { | 121 | struct rapl_pmu { |
114 | spinlock_t lock; | 122 | spinlock_t lock; |
115 | int hw_unit; /* 1/2^hw_unit Joule */ | ||
116 | int n_active; /* number of active events */ | 123 | int n_active; /* number of active events */ |
117 | struct list_head active_list; | 124 | struct list_head active_list; |
118 | struct pmu *pmu; /* pointer to rapl_pmu_class */ | 125 | struct pmu *pmu; /* pointer to rapl_pmu_class */ |
@@ -120,6 +127,7 @@ struct rapl_pmu { | |||
120 | struct hrtimer hrtimer; | 127 | struct hrtimer hrtimer; |
121 | }; | 128 | }; |
122 | 129 | ||
130 | static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; /* 1/2^hw_unit Joule */ | ||
123 | static struct pmu rapl_pmu_class; | 131 | static struct pmu rapl_pmu_class; |
124 | static cpumask_t rapl_cpu_mask; | 132 | static cpumask_t rapl_cpu_mask; |
125 | static int rapl_cntr_mask; | 133 | static int rapl_cntr_mask; |
@@ -127,6 +135,7 @@ static int rapl_cntr_mask; | |||
127 | static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu); | 135 | static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu); |
128 | static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free); | 136 | static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free); |
129 | 137 | ||
138 | static struct x86_pmu_quirk *rapl_quirks; | ||
130 | static inline u64 rapl_read_counter(struct perf_event *event) | 139 | static inline u64 rapl_read_counter(struct perf_event *event) |
131 | { | 140 | { |
132 | u64 raw; | 141 | u64 raw; |
@@ -134,15 +143,28 @@ static inline u64 rapl_read_counter(struct perf_event *event) | |||
134 | return raw; | 143 | return raw; |
135 | } | 144 | } |
136 | 145 | ||
137 | static inline u64 rapl_scale(u64 v) | 146 | #define rapl_add_quirk(func_) \ |
147 | do { \ | ||
148 | static struct x86_pmu_quirk __quirk __initdata = { \ | ||
149 | .func = func_, \ | ||
150 | }; \ | ||
151 | __quirk.next = rapl_quirks; \ | ||
152 | rapl_quirks = &__quirk; \ | ||
153 | } while (0) | ||
154 | |||
155 | static inline u64 rapl_scale(u64 v, int cfg) | ||
138 | { | 156 | { |
157 | if (cfg > NR_RAPL_DOMAINS) { | ||
158 | pr_warn("invalid domain %d, failed to scale data\n", cfg); | ||
159 | return v; | ||
160 | } | ||
139 | /* | 161 | /* |
140 | * scale delta to smallest unit (1/2^32) | 162 | * scale delta to smallest unit (1/2^32) |
141 | * users must then scale back: count * 1/(1e9*2^32) to get Joules | 163 | * users must then scale back: count * 1/(1e9*2^32) to get Joules |
142 | * or use ldexp(count, -32). | 164 | * or use ldexp(count, -32). |
143 | * Watts = Joules/Time delta | 165 | * Watts = Joules/Time delta |
144 | */ | 166 | */ |
145 | return v << (32 - __this_cpu_read(rapl_pmu)->hw_unit); | 167 | return v << (32 - rapl_hw_unit[cfg - 1]); |
146 | } | 168 | } |
147 | 169 | ||
148 | static u64 rapl_event_update(struct perf_event *event) | 170 | static u64 rapl_event_update(struct perf_event *event) |
@@ -173,7 +195,7 @@ again: | |||
173 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | 195 | delta = (new_raw_count << shift) - (prev_raw_count << shift); |
174 | delta >>= shift; | 196 | delta >>= shift; |
175 | 197 | ||
176 | sdelta = rapl_scale(delta); | 198 | sdelta = rapl_scale(delta, event->hw.config); |
177 | 199 | ||
178 | local64_add(sdelta, &event->count); | 200 | local64_add(sdelta, &event->count); |
179 | 201 | ||
@@ -546,12 +568,22 @@ static void rapl_cpu_init(int cpu) | |||
546 | cpumask_set_cpu(cpu, &rapl_cpu_mask); | 568 | cpumask_set_cpu(cpu, &rapl_cpu_mask); |
547 | } | 569 | } |
548 | 570 | ||
571 | static __init void rapl_hsw_server_quirk(void) | ||
572 | { | ||
573 | /* | ||
574 | * DRAM domain on HSW server has fixed energy unit which can be | ||
575 | * different than the unit from power unit MSR. | ||
576 | * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2 | ||
577 | * of 2. Datasheet, September 2014, Reference Number: 330784-001 " | ||
578 | */ | ||
579 | rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16; | ||
580 | } | ||
581 | |||
549 | static int rapl_cpu_prepare(int cpu) | 582 | static int rapl_cpu_prepare(int cpu) |
550 | { | 583 | { |
551 | struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); | 584 | struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); |
552 | int phys_id = topology_physical_package_id(cpu); | 585 | int phys_id = topology_physical_package_id(cpu); |
553 | u64 ms; | 586 | u64 ms; |
554 | u64 msr_rapl_power_unit_bits; | ||
555 | 587 | ||
556 | if (pmu) | 588 | if (pmu) |
557 | return 0; | 589 | return 0; |
@@ -559,24 +591,13 @@ static int rapl_cpu_prepare(int cpu) | |||
559 | if (phys_id < 0) | 591 | if (phys_id < 0) |
560 | return -1; | 592 | return -1; |
561 | 593 | ||
562 | /* protect rdmsrl() to handle virtualization */ | ||
563 | if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits)) | ||
564 | return -1; | ||
565 | |||
566 | pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); | 594 | pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); |
567 | if (!pmu) | 595 | if (!pmu) |
568 | return -1; | 596 | return -1; |
569 | |||
570 | spin_lock_init(&pmu->lock); | 597 | spin_lock_init(&pmu->lock); |
571 | 598 | ||
572 | INIT_LIST_HEAD(&pmu->active_list); | 599 | INIT_LIST_HEAD(&pmu->active_list); |
573 | 600 | ||
574 | /* | ||
575 | * grab power unit as: 1/2^unit Joules | ||
576 | * | ||
577 | * we cache in local PMU instance | ||
578 | */ | ||
579 | pmu->hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; | ||
580 | pmu->pmu = &rapl_pmu_class; | 601 | pmu->pmu = &rapl_pmu_class; |
581 | 602 | ||
582 | /* | 603 | /* |
@@ -586,8 +607,8 @@ static int rapl_cpu_prepare(int cpu) | |||
586 | * divide interval by 2 to avoid lockstep (2 * 100) | 607 | * divide interval by 2 to avoid lockstep (2 * 100) |
587 | * if hw unit is 32, then we use 2 ms 1/200/2 | 608 | * if hw unit is 32, then we use 2 ms 1/200/2 |
588 | */ | 609 | */ |
589 | if (pmu->hw_unit < 32) | 610 | if (rapl_hw_unit[0] < 32) |
590 | ms = (1000 / (2 * 100)) * (1ULL << (32 - pmu->hw_unit - 1)); | 611 | ms = (1000 / (2 * 100)) * (1ULL << (32 - rapl_hw_unit[0] - 1)); |
591 | else | 612 | else |
592 | ms = 2; | 613 | ms = 2; |
593 | 614 | ||
@@ -655,6 +676,20 @@ static int rapl_cpu_notifier(struct notifier_block *self, | |||
655 | return NOTIFY_OK; | 676 | return NOTIFY_OK; |
656 | } | 677 | } |
657 | 678 | ||
679 | static int rapl_check_hw_unit(void) | ||
680 | { | ||
681 | u64 msr_rapl_power_unit_bits; | ||
682 | int i; | ||
683 | |||
684 | /* protect rdmsrl() to handle virtualization */ | ||
685 | if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits)) | ||
686 | return -1; | ||
687 | for (i = 0; i < NR_RAPL_DOMAINS; i++) | ||
688 | rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; | ||
689 | |||
690 | return 0; | ||
691 | } | ||
692 | |||
658 | static const struct x86_cpu_id rapl_cpu_match[] = { | 693 | static const struct x86_cpu_id rapl_cpu_match[] = { |
659 | [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 }, | 694 | [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 }, |
660 | [1] = {}, | 695 | [1] = {}, |
@@ -664,6 +699,8 @@ static int __init rapl_pmu_init(void) | |||
664 | { | 699 | { |
665 | struct rapl_pmu *pmu; | 700 | struct rapl_pmu *pmu; |
666 | int cpu, ret; | 701 | int cpu, ret; |
702 | struct x86_pmu_quirk *quirk; | ||
703 | int i; | ||
667 | 704 | ||
668 | /* | 705 | /* |
669 | * check for Intel processor family 6 | 706 | * check for Intel processor family 6 |
@@ -678,6 +715,11 @@ static int __init rapl_pmu_init(void) | |||
678 | rapl_cntr_mask = RAPL_IDX_CLN; | 715 | rapl_cntr_mask = RAPL_IDX_CLN; |
679 | rapl_pmu_events_group.attrs = rapl_events_cln_attr; | 716 | rapl_pmu_events_group.attrs = rapl_events_cln_attr; |
680 | break; | 717 | break; |
718 | case 63: /* Haswell-Server */ | ||
719 | rapl_add_quirk(rapl_hsw_server_quirk); | ||
720 | rapl_cntr_mask = RAPL_IDX_SRV; | ||
721 | rapl_pmu_events_group.attrs = rapl_events_srv_attr; | ||
722 | break; | ||
681 | case 60: /* Haswell */ | 723 | case 60: /* Haswell */ |
682 | case 69: /* Haswell-Celeron */ | 724 | case 69: /* Haswell-Celeron */ |
683 | rapl_cntr_mask = RAPL_IDX_HSW; | 725 | rapl_cntr_mask = RAPL_IDX_HSW; |
@@ -693,7 +735,13 @@ static int __init rapl_pmu_init(void) | |||
693 | /* unsupported */ | 735 | /* unsupported */ |
694 | return 0; | 736 | return 0; |
695 | } | 737 | } |
738 | ret = rapl_check_hw_unit(); | ||
739 | if (ret) | ||
740 | return ret; | ||
696 | 741 | ||
742 | /* run cpu model quirks */ | ||
743 | for (quirk = rapl_quirks; quirk; quirk = quirk->next) | ||
744 | quirk->func(); | ||
697 | cpu_notifier_register_begin(); | 745 | cpu_notifier_register_begin(); |
698 | 746 | ||
699 | for_each_online_cpu(cpu) { | 747 | for_each_online_cpu(cpu) { |
@@ -714,14 +762,18 @@ static int __init rapl_pmu_init(void) | |||
714 | 762 | ||
715 | pmu = __this_cpu_read(rapl_pmu); | 763 | pmu = __this_cpu_read(rapl_pmu); |
716 | 764 | ||
717 | pr_info("RAPL PMU detected, hw unit 2^-%d Joules," | 765 | pr_info("RAPL PMU detected," |
718 | " API unit is 2^-32 Joules," | 766 | " API unit is 2^-32 Joules," |
719 | " %d fixed counters" | 767 | " %d fixed counters" |
720 | " %llu ms ovfl timer\n", | 768 | " %llu ms ovfl timer\n", |
721 | pmu->hw_unit, | ||
722 | hweight32(rapl_cntr_mask), | 769 | hweight32(rapl_cntr_mask), |
723 | ktime_to_ms(pmu->timer_interval)); | 770 | ktime_to_ms(pmu->timer_interval)); |
724 | 771 | for (i = 0; i < NR_RAPL_DOMAINS; i++) { | |
772 | if (rapl_cntr_mask & (1 << i)) { | ||
773 | pr_info("hw unit of domain %s 2^-%d Joules\n", | ||
774 | rapl_domain_names[i], rapl_hw_unit[i]); | ||
775 | } | ||
776 | } | ||
725 | out: | 777 | out: |
726 | cpu_notifier_register_done(); | 778 | cpu_notifier_register_done(); |
727 | 779 | ||