diff options
author | Ingo Molnar <mingo@elte.hu> | 2010-03-12 15:06:35 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-03-12 15:06:37 -0500 |
commit | 0308635917273030db6121d67c41ef2279b30340 (patch) | |
tree | f65e386905199f7a1060119c53a51eb15c32b8e6 /arch/x86/kernel | |
parent | 3997d3776a6e89586e76a0ef355bfbbd8a76966c (diff) | |
parent | 0b861225a5890f22445f08ca9cc7a87cff276ff7 (diff) |
Merge branch 'perf/x86' into perf/core
Merge reason: The new P4 driver is stable and ready now for more
testing.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 49 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_amd.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 15 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_p4.c | 607 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_p6.c | 2 |
5 files changed, 653 insertions, 22 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index a6d92c34135..978d297170a 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -190,6 +190,8 @@ struct x86_pmu { | |||
190 | void (*enable_all)(void); | 190 | void (*enable_all)(void); |
191 | void (*enable)(struct perf_event *); | 191 | void (*enable)(struct perf_event *); |
192 | void (*disable)(struct perf_event *); | 192 | void (*disable)(struct perf_event *); |
193 | int (*hw_config)(struct perf_event_attr *attr, struct hw_perf_event *hwc); | ||
194 | int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); | ||
193 | unsigned eventsel; | 195 | unsigned eventsel; |
194 | unsigned perfctr; | 196 | unsigned perfctr; |
195 | u64 (*event_map)(int); | 197 | u64 (*event_map)(int); |
@@ -415,6 +417,25 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) | |||
415 | return 0; | 417 | return 0; |
416 | } | 418 | } |
417 | 419 | ||
420 | static int x86_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc) | ||
421 | { | ||
422 | /* | ||
423 | * Generate PMC IRQs: | ||
424 | * (keep 'enabled' bit clear for now) | ||
425 | */ | ||
426 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; | ||
427 | |||
428 | /* | ||
429 | * Count user and OS events unless requested not to | ||
430 | */ | ||
431 | if (!attr->exclude_user) | ||
432 | hwc->config |= ARCH_PERFMON_EVENTSEL_USR; | ||
433 | if (!attr->exclude_kernel) | ||
434 | hwc->config |= ARCH_PERFMON_EVENTSEL_OS; | ||
435 | |||
436 | return 0; | ||
437 | } | ||
438 | |||
418 | /* | 439 | /* |
419 | * Setup the hardware configuration for a given attr_type | 440 | * Setup the hardware configuration for a given attr_type |
420 | */ | 441 | */ |
@@ -446,23 +467,13 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
446 | 467 | ||
447 | event->destroy = hw_perf_event_destroy; | 468 | event->destroy = hw_perf_event_destroy; |
448 | 469 | ||
449 | /* | ||
450 | * Generate PMC IRQs: | ||
451 | * (keep 'enabled' bit clear for now) | ||
452 | */ | ||
453 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; | ||
454 | |||
455 | hwc->idx = -1; | 470 | hwc->idx = -1; |
456 | hwc->last_cpu = -1; | 471 | hwc->last_cpu = -1; |
457 | hwc->last_tag = ~0ULL; | 472 | hwc->last_tag = ~0ULL; |
458 | 473 | ||
459 | /* | 474 | /* Processor specifics */ |
460 | * Count user and OS events unless requested not to. | 475 | if (x86_pmu.hw_config(attr, hwc)) |
461 | */ | 476 | return -EOPNOTSUPP; |
462 | if (!attr->exclude_user) | ||
463 | hwc->config |= ARCH_PERFMON_EVENTSEL_USR; | ||
464 | if (!attr->exclude_kernel) | ||
465 | hwc->config |= ARCH_PERFMON_EVENTSEL_OS; | ||
466 | 477 | ||
467 | if (!hwc->sample_period) { | 478 | if (!hwc->sample_period) { |
468 | hwc->sample_period = x86_pmu.max_period; | 479 | hwc->sample_period = x86_pmu.max_period; |
@@ -517,7 +528,7 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
517 | return -EOPNOTSUPP; | 528 | return -EOPNOTSUPP; |
518 | 529 | ||
519 | /* BTS is currently only allowed for user-mode. */ | 530 | /* BTS is currently only allowed for user-mode. */ |
520 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | 531 | if (!attr->exclude_kernel) |
521 | return -EOPNOTSUPP; | 532 | return -EOPNOTSUPP; |
522 | } | 533 | } |
523 | 534 | ||
@@ -925,7 +936,7 @@ static int x86_pmu_enable(struct perf_event *event) | |||
925 | if (n < 0) | 936 | if (n < 0) |
926 | return n; | 937 | return n; |
927 | 938 | ||
928 | ret = x86_schedule_events(cpuc, n, assign); | 939 | ret = x86_pmu.schedule_events(cpuc, n, assign); |
929 | if (ret) | 940 | if (ret) |
930 | return ret; | 941 | return ret; |
931 | /* | 942 | /* |
@@ -1252,12 +1263,15 @@ int hw_perf_group_sched_in(struct perf_event *leader, | |||
1252 | int assign[X86_PMC_IDX_MAX]; | 1263 | int assign[X86_PMC_IDX_MAX]; |
1253 | int n0, n1, ret; | 1264 | int n0, n1, ret; |
1254 | 1265 | ||
1266 | if (!x86_pmu_initialized()) | ||
1267 | return 0; | ||
1268 | |||
1255 | /* n0 = total number of events */ | 1269 | /* n0 = total number of events */ |
1256 | n0 = collect_events(cpuc, leader, true); | 1270 | n0 = collect_events(cpuc, leader, true); |
1257 | if (n0 < 0) | 1271 | if (n0 < 0) |
1258 | return n0; | 1272 | return n0; |
1259 | 1273 | ||
1260 | ret = x86_schedule_events(cpuc, n0, assign); | 1274 | ret = x86_pmu.schedule_events(cpuc, n0, assign); |
1261 | if (ret) | 1275 | if (ret) |
1262 | return ret; | 1276 | return ret; |
1263 | 1277 | ||
@@ -1307,6 +1321,7 @@ undo: | |||
1307 | 1321 | ||
1308 | #include "perf_event_amd.c" | 1322 | #include "perf_event_amd.c" |
1309 | #include "perf_event_p6.c" | 1323 | #include "perf_event_p6.c" |
1324 | #include "perf_event_p4.c" | ||
1310 | #include "perf_event_intel_lbr.c" | 1325 | #include "perf_event_intel_lbr.c" |
1311 | #include "perf_event_intel_ds.c" | 1326 | #include "perf_event_intel_ds.c" |
1312 | #include "perf_event_intel.c" | 1327 | #include "perf_event_intel.c" |
@@ -1509,7 +1524,7 @@ static int validate_group(struct perf_event *event) | |||
1509 | 1524 | ||
1510 | fake_cpuc->n_events = n; | 1525 | fake_cpuc->n_events = n; |
1511 | 1526 | ||
1512 | ret = x86_schedule_events(fake_cpuc, n, NULL); | 1527 | ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); |
1513 | 1528 | ||
1514 | out_free: | 1529 | out_free: |
1515 | kfree(fake_cpuc); | 1530 | kfree(fake_cpuc); |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 573458f1caf..358a8e3d05f 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -363,6 +363,8 @@ static __initconst struct x86_pmu amd_pmu = { | |||
363 | .enable_all = x86_pmu_enable_all, | 363 | .enable_all = x86_pmu_enable_all, |
364 | .enable = x86_pmu_enable_event, | 364 | .enable = x86_pmu_enable_event, |
365 | .disable = x86_pmu_disable_event, | 365 | .disable = x86_pmu_disable_event, |
366 | .hw_config = x86_hw_config, | ||
367 | .schedule_events = x86_schedule_events, | ||
366 | .eventsel = MSR_K7_EVNTSEL0, | 368 | .eventsel = MSR_K7_EVNTSEL0, |
367 | .perfctr = MSR_K7_PERFCTR0, | 369 | .perfctr = MSR_K7_PERFCTR0, |
368 | .event_map = amd_pmu_event_map, | 370 | .event_map = amd_pmu_event_map, |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 971dc6e7d54..044b8436b19 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -749,6 +749,8 @@ static __initconst struct x86_pmu core_pmu = { | |||
749 | .enable_all = x86_pmu_enable_all, | 749 | .enable_all = x86_pmu_enable_all, |
750 | .enable = x86_pmu_enable_event, | 750 | .enable = x86_pmu_enable_event, |
751 | .disable = x86_pmu_disable_event, | 751 | .disable = x86_pmu_disable_event, |
752 | .hw_config = x86_hw_config, | ||
753 | .schedule_events = x86_schedule_events, | ||
752 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | 754 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, |
753 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | 755 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, |
754 | .event_map = intel_pmu_event_map, | 756 | .event_map = intel_pmu_event_map, |
@@ -786,6 +788,8 @@ static __initconst struct x86_pmu intel_pmu = { | |||
786 | .enable_all = intel_pmu_enable_all, | 788 | .enable_all = intel_pmu_enable_all, |
787 | .enable = intel_pmu_enable_event, | 789 | .enable = intel_pmu_enable_event, |
788 | .disable = intel_pmu_disable_event, | 790 | .disable = intel_pmu_disable_event, |
791 | .hw_config = x86_hw_config, | ||
792 | .schedule_events = x86_schedule_events, | ||
789 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | 793 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, |
790 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | 794 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, |
791 | .event_map = intel_pmu_event_map, | 795 | .event_map = intel_pmu_event_map, |
@@ -839,12 +843,13 @@ static __init int intel_pmu_init(void) | |||
839 | int version; | 843 | int version; |
840 | 844 | ||
841 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | 845 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { |
842 | /* check for P6 processor family */ | 846 | switch (boot_cpu_data.x86) { |
843 | if (boot_cpu_data.x86 == 6) { | 847 | case 0x6: |
844 | return p6_pmu_init(); | 848 | return p6_pmu_init(); |
845 | } else { | 849 | case 0xf: |
850 | return p4_pmu_init(); | ||
851 | } | ||
846 | return -ENODEV; | 852 | return -ENODEV; |
847 | } | ||
848 | } | 853 | } |
849 | 854 | ||
850 | /* | 855 | /* |
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c new file mode 100644 index 00000000000..381f593e829 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -0,0 +1,607 @@ | |||
1 | /* | ||
2 | * Netburst Perfomance Events (P4, old Xeon) | ||
3 | * | ||
4 | * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org> | ||
5 | * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com> | ||
6 | * | ||
7 | * For licencing details see kernel-base/COPYING | ||
8 | */ | ||
9 | |||
10 | #ifdef CONFIG_CPU_SUP_INTEL | ||
11 | |||
12 | #include <asm/perf_event_p4.h> | ||
13 | |||
14 | /* | ||
15 | * array indices: 0,1 - HT threads, used with HT enabled cpu | ||
16 | */ | ||
17 | struct p4_event_template { | ||
18 | u32 opcode; /* ESCR event + CCCR selector */ | ||
19 | u64 config; /* packed predefined bits */ | ||
20 | int dep; /* upstream dependency event index */ | ||
21 | unsigned int emask; /* ESCR EventMask */ | ||
22 | unsigned int escr_msr[2]; /* ESCR MSR for this event */ | ||
23 | unsigned int cntr[2]; /* counter index (offset) */ | ||
24 | }; | ||
25 | |||
26 | struct p4_pmu_res { | ||
27 | /* maps hw_conf::idx into template for ESCR sake */ | ||
28 | struct p4_event_template *tpl[ARCH_P4_MAX_CCCR]; | ||
29 | }; | ||
30 | |||
31 | static DEFINE_PER_CPU(struct p4_pmu_res, p4_pmu_config); | ||
32 | |||
33 | /* | ||
34 | * WARN: CCCR1 doesn't have a working enable bit so try to not | ||
35 | * use it if possible | ||
36 | * | ||
37 | * Also as only we start to support raw events we will need to | ||
38 | * append _all_ P4_EVENT_PACK'ed events here | ||
39 | */ | ||
40 | struct p4_event_template p4_templates[] = { | ||
41 | [0] = { | ||
42 | .opcode = P4_UOP_TYPE, | ||
43 | .config = 0, | ||
44 | .dep = -1, | ||
45 | .emask = | ||
46 | P4_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS) | | ||
47 | P4_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES), | ||
48 | .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, | ||
49 | .cntr = { 16, 17 }, | ||
50 | }, | ||
51 | [1] = { | ||
52 | .opcode = P4_GLOBAL_POWER_EVENTS, | ||
53 | .config = 0, | ||
54 | .dep = -1, | ||
55 | .emask = | ||
56 | P4_EVENT_ATTR(P4_GLOBAL_POWER_EVENTS, RUNNING), | ||
57 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
58 | .cntr = { 0, 2 }, | ||
59 | }, | ||
60 | [2] = { | ||
61 | .opcode = P4_INSTR_RETIRED, | ||
62 | .config = 0, | ||
63 | .dep = 0, /* needs front-end tagging */ | ||
64 | .emask = | ||
65 | P4_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSNTAG) | | ||
66 | P4_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSTAG) | | ||
67 | P4_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSNTAG) | | ||
68 | P4_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSTAG), | ||
69 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
70 | .cntr = { 12, 14 }, | ||
71 | }, | ||
72 | [3] = { | ||
73 | .opcode = P4_BSQ_CACHE_REFERENCE, | ||
74 | .config = 0, | ||
75 | .dep = -1, | ||
76 | .emask = | ||
77 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | | ||
78 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | | ||
79 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | | ||
80 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | | ||
81 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | | ||
82 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITM), | ||
83 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, | ||
84 | .cntr = { 0, 2 }, | ||
85 | }, | ||
86 | [4] = { | ||
87 | .opcode = P4_BSQ_CACHE_REFERENCE, | ||
88 | .config = 0, | ||
89 | .dep = -1, | ||
90 | .emask = | ||
91 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | | ||
92 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | | ||
93 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, WR_2ndL_MISS), | ||
94 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, | ||
95 | .cntr = { 0, 3 }, | ||
96 | }, | ||
97 | [5] = { | ||
98 | .opcode = P4_RETIRED_BRANCH_TYPE, | ||
99 | .config = 0, | ||
100 | .dep = -1, | ||
101 | .emask = | ||
102 | P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CONDITIONAL) | | ||
103 | P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CALL) | | ||
104 | P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, RETURN) | | ||
105 | P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, INDIRECT), | ||
106 | .escr_msr = { MSR_P4_TBPU_ESCR0, MSR_P4_TBPU_ESCR1 }, | ||
107 | .cntr = { 4, 6 }, | ||
108 | }, | ||
109 | [6] = { | ||
110 | .opcode = P4_MISPRED_BRANCH_RETIRED, | ||
111 | .config = 0, | ||
112 | .dep = -1, | ||
113 | .emask = | ||
114 | P4_EVENT_ATTR(P4_MISPRED_BRANCH_RETIRED, NBOGUS), | ||
115 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
116 | .cntr = { 12, 14 }, | ||
117 | }, | ||
118 | [7] = { | ||
119 | .opcode = P4_FSB_DATA_ACTIVITY, | ||
120 | .config = p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), | ||
121 | .dep = -1, | ||
122 | .emask = | ||
123 | P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_DRV) | | ||
124 | P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OWN), | ||
125 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
126 | .cntr = { 0, 2 }, | ||
127 | }, | ||
128 | }; | ||
129 | |||
130 | static struct p4_event_template *p4_event_map[PERF_COUNT_HW_MAX] = { | ||
131 | /* non-halted CPU clocks */ | ||
132 | [PERF_COUNT_HW_CPU_CYCLES] = &p4_templates[1], | ||
133 | |||
134 | /* retired instructions: dep on tagging the FSB */ | ||
135 | [PERF_COUNT_HW_INSTRUCTIONS] = &p4_templates[2], | ||
136 | |||
137 | /* cache hits */ | ||
138 | [PERF_COUNT_HW_CACHE_REFERENCES] = &p4_templates[3], | ||
139 | |||
140 | /* cache misses */ | ||
141 | [PERF_COUNT_HW_CACHE_MISSES] = &p4_templates[4], | ||
142 | |||
143 | /* branch instructions retired */ | ||
144 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = &p4_templates[5], | ||
145 | |||
146 | /* mispredicted branches retired */ | ||
147 | [PERF_COUNT_HW_BRANCH_MISSES] = &p4_templates[6], | ||
148 | |||
149 | /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */ | ||
150 | [PERF_COUNT_HW_BUS_CYCLES] = &p4_templates[7], | ||
151 | }; | ||
152 | |||
153 | static u64 p4_pmu_event_map(int hw_event) | ||
154 | { | ||
155 | struct p4_event_template *tpl; | ||
156 | u64 config; | ||
157 | |||
158 | if (hw_event > ARRAY_SIZE(p4_event_map)) { | ||
159 | printk_once(KERN_ERR "PMU: Incorrect event index\n"); | ||
160 | return 0; | ||
161 | } | ||
162 | tpl = p4_event_map[hw_event]; | ||
163 | |||
164 | /* | ||
165 | * fill config up according to | ||
166 | * a predefined event template | ||
167 | */ | ||
168 | config = tpl->config; | ||
169 | config |= p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(tpl->opcode) << P4_EVNTSEL_EVENT_SHIFT); | ||
170 | config |= p4_config_pack_escr(tpl->emask << P4_EVNTSEL_EVENTMASK_SHIFT); | ||
171 | config |= p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(tpl->opcode) << P4_CCCR_ESCR_SELECT_SHIFT); | ||
172 | |||
173 | /* on HT machine we need a special bit */ | ||
174 | if (p4_ht_active() && p4_ht_thread(raw_smp_processor_id())) | ||
175 | config = p4_set_ht_bit(config); | ||
176 | |||
177 | return config; | ||
178 | } | ||
179 | |||
180 | /* | ||
181 | * Note that we still have 5 events (from global events SDM list) | ||
182 | * intersected in opcode+emask bits so we will need another | ||
183 | * scheme there do distinguish templates. | ||
184 | */ | ||
185 | static inline int p4_pmu_emask_match(unsigned int dst, unsigned int src) | ||
186 | { | ||
187 | return dst & src; | ||
188 | } | ||
189 | |||
190 | static struct p4_event_template *p4_pmu_template_lookup(u64 config) | ||
191 | { | ||
192 | u32 opcode = p4_config_unpack_opcode(config); | ||
193 | unsigned int emask = p4_config_unpack_emask(config); | ||
194 | unsigned int i; | ||
195 | |||
196 | for (i = 0; i < ARRAY_SIZE(p4_templates); i++) { | ||
197 | if (opcode == p4_templates[i].opcode && | ||
198 | p4_pmu_emask_match(emask, p4_templates[i].emask)) | ||
199 | return &p4_templates[i]; | ||
200 | } | ||
201 | |||
202 | return NULL; | ||
203 | } | ||
204 | |||
205 | /* | ||
206 | * We don't control raw events so it's up to the caller | ||
207 | * to pass sane values (and we don't count the thread number | ||
208 | * on HT machine but allow HT-compatible specifics to be | ||
209 | * passed on) | ||
210 | */ | ||
211 | static u64 p4_pmu_raw_event(u64 hw_event) | ||
212 | { | ||
213 | return hw_event & | ||
214 | (p4_config_pack_escr(P4_EVNTSEL_MASK_HT) | | ||
215 | p4_config_pack_cccr(P4_CCCR_MASK_HT)); | ||
216 | } | ||
217 | |||
218 | static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc) | ||
219 | { | ||
220 | int cpu = raw_smp_processor_id(); | ||
221 | |||
222 | /* | ||
223 | * the reason we use cpu that early is that: if we get scheduled | ||
224 | * first time on the same cpu -- we will not need swap thread | ||
225 | * specific flags in config (and will save some cpu cycles) | ||
226 | */ | ||
227 | |||
228 | /* CCCR by default */ | ||
229 | hwc->config = p4_config_pack_cccr(p4_default_cccr_conf(cpu)); | ||
230 | |||
231 | /* Count user and OS events unless not requested to */ | ||
232 | hwc->config |= p4_config_pack_escr(p4_default_escr_conf(cpu, attr->exclude_kernel, | ||
233 | attr->exclude_user)); | ||
234 | return 0; | ||
235 | } | ||
236 | |||
237 | static inline void p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) | ||
238 | { | ||
239 | unsigned long dummy; | ||
240 | |||
241 | rdmsrl(hwc->config_base + hwc->idx, dummy); | ||
242 | if (dummy & P4_CCCR_OVF) { | ||
243 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
244 | ((u64)dummy) & ~P4_CCCR_OVF); | ||
245 | } | ||
246 | } | ||
247 | |||
248 | static inline void p4_pmu_disable_event(struct perf_event *event) | ||
249 | { | ||
250 | struct hw_perf_event *hwc = &event->hw; | ||
251 | |||
252 | /* | ||
253 | * If event gets disabled while counter is in overflowed | ||
254 | * state we need to clear P4_CCCR_OVF, otherwise interrupt get | ||
255 | * asserted again and again | ||
256 | */ | ||
257 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
258 | (u64)(p4_config_unpack_cccr(hwc->config)) & | ||
259 | ~P4_CCCR_ENABLE & ~P4_CCCR_OVF); | ||
260 | } | ||
261 | |||
262 | static void p4_pmu_disable_all(void) | ||
263 | { | ||
264 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
265 | int idx; | ||
266 | |||
267 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | ||
268 | struct perf_event *event = cpuc->events[idx]; | ||
269 | if (!test_bit(idx, cpuc->active_mask)) | ||
270 | continue; | ||
271 | p4_pmu_disable_event(event); | ||
272 | } | ||
273 | } | ||
274 | |||
275 | static void p4_pmu_enable_event(struct perf_event *event) | ||
276 | { | ||
277 | struct hw_perf_event *hwc = &event->hw; | ||
278 | int thread = p4_ht_config_thread(hwc->config); | ||
279 | u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); | ||
280 | u64 escr_base; | ||
281 | struct p4_event_template *tpl; | ||
282 | struct p4_pmu_res *c; | ||
283 | |||
284 | /* | ||
285 | * some preparation work from per-cpu private fields | ||
286 | * since we need to find out which ESCR to use | ||
287 | */ | ||
288 | c = &__get_cpu_var(p4_pmu_config); | ||
289 | tpl = c->tpl[hwc->idx]; | ||
290 | if (!tpl) { | ||
291 | pr_crit("%s: Wrong index: %d\n", __func__, hwc->idx); | ||
292 | return; | ||
293 | } | ||
294 | escr_base = (u64)tpl->escr_msr[thread]; | ||
295 | |||
296 | /* | ||
297 | * - we dont support cascaded counters yet | ||
298 | * - and counter 1 is broken (erratum) | ||
299 | */ | ||
300 | WARN_ON_ONCE(p4_is_event_cascaded(hwc->config)); | ||
301 | WARN_ON_ONCE(hwc->idx == 1); | ||
302 | |||
303 | (void)checking_wrmsrl(escr_base, escr_conf); | ||
304 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
305 | (u64)(p4_config_unpack_cccr(hwc->config)) | P4_CCCR_ENABLE); | ||
306 | } | ||
307 | |||
308 | static void p4_pmu_enable_all(void) | ||
309 | { | ||
310 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
311 | int idx; | ||
312 | |||
313 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | ||
314 | struct perf_event *event = cpuc->events[idx]; | ||
315 | if (!test_bit(idx, cpuc->active_mask)) | ||
316 | continue; | ||
317 | p4_pmu_enable_event(event); | ||
318 | } | ||
319 | } | ||
320 | |||
321 | static int p4_pmu_handle_irq(struct pt_regs *regs) | ||
322 | { | ||
323 | struct perf_sample_data data; | ||
324 | struct cpu_hw_events *cpuc; | ||
325 | struct perf_event *event; | ||
326 | struct hw_perf_event *hwc; | ||
327 | int idx, handled = 0; | ||
328 | u64 val; | ||
329 | |||
330 | data.addr = 0; | ||
331 | data.raw = NULL; | ||
332 | |||
333 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
334 | |||
335 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | ||
336 | |||
337 | if (!test_bit(idx, cpuc->active_mask)) | ||
338 | continue; | ||
339 | |||
340 | event = cpuc->events[idx]; | ||
341 | hwc = &event->hw; | ||
342 | |||
343 | WARN_ON_ONCE(hwc->idx != idx); | ||
344 | |||
345 | /* | ||
346 | * FIXME: Redundant call, actually not needed | ||
347 | * but just to check if we're screwed | ||
348 | */ | ||
349 | p4_pmu_clear_cccr_ovf(hwc); | ||
350 | |||
351 | val = x86_perf_event_update(event); | ||
352 | if (val & (1ULL << (x86_pmu.event_bits - 1))) | ||
353 | continue; | ||
354 | |||
355 | /* | ||
356 | * event overflow | ||
357 | */ | ||
358 | handled = 1; | ||
359 | data.period = event->hw.last_period; | ||
360 | |||
361 | if (!x86_perf_event_set_period(event)) | ||
362 | continue; | ||
363 | if (perf_event_overflow(event, 1, &data, regs)) | ||
364 | p4_pmu_disable_event(event); | ||
365 | } | ||
366 | |||
367 | if (handled) { | ||
368 | /* p4 quirk: unmask it again */ | ||
369 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); | ||
370 | inc_irq_stat(apic_perf_irqs); | ||
371 | } | ||
372 | |||
373 | return handled; | ||
374 | } | ||
375 | |||
376 | /* | ||
377 | * swap thread specific fields according to a thread | ||
378 | * we are going to run on | ||
379 | */ | ||
380 | static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) | ||
381 | { | ||
382 | u32 escr, cccr; | ||
383 | |||
384 | /* | ||
385 | * we either lucky and continue on same cpu or no HT support | ||
386 | */ | ||
387 | if (!p4_should_swap_ts(hwc->config, cpu)) | ||
388 | return; | ||
389 | |||
390 | /* | ||
391 | * the event is migrated from an another logical | ||
392 | * cpu, so we need to swap thread specific flags | ||
393 | */ | ||
394 | |||
395 | escr = p4_config_unpack_escr(hwc->config); | ||
396 | cccr = p4_config_unpack_cccr(hwc->config); | ||
397 | |||
398 | if (p4_ht_thread(cpu)) { | ||
399 | cccr &= ~P4_CCCR_OVF_PMI_T0; | ||
400 | cccr |= P4_CCCR_OVF_PMI_T1; | ||
401 | if (escr & P4_EVNTSEL_T0_OS) { | ||
402 | escr &= ~P4_EVNTSEL_T0_OS; | ||
403 | escr |= P4_EVNTSEL_T1_OS; | ||
404 | } | ||
405 | if (escr & P4_EVNTSEL_T0_USR) { | ||
406 | escr &= ~P4_EVNTSEL_T0_USR; | ||
407 | escr |= P4_EVNTSEL_T1_USR; | ||
408 | } | ||
409 | hwc->config = p4_config_pack_escr(escr); | ||
410 | hwc->config |= p4_config_pack_cccr(cccr); | ||
411 | hwc->config |= P4_CONFIG_HT; | ||
412 | } else { | ||
413 | cccr &= ~P4_CCCR_OVF_PMI_T1; | ||
414 | cccr |= P4_CCCR_OVF_PMI_T0; | ||
415 | if (escr & P4_EVNTSEL_T1_OS) { | ||
416 | escr &= ~P4_EVNTSEL_T1_OS; | ||
417 | escr |= P4_EVNTSEL_T0_OS; | ||
418 | } | ||
419 | if (escr & P4_EVNTSEL_T1_USR) { | ||
420 | escr &= ~P4_EVNTSEL_T1_USR; | ||
421 | escr |= P4_EVNTSEL_T0_USR; | ||
422 | } | ||
423 | hwc->config = p4_config_pack_escr(escr); | ||
424 | hwc->config |= p4_config_pack_cccr(cccr); | ||
425 | hwc->config &= ~P4_CONFIG_HT; | ||
426 | } | ||
427 | } | ||
428 | |||
429 | /* ESCRs are not sequential in memory so we need a map */ | ||
430 | static unsigned int p4_escr_map[ARCH_P4_TOTAL_ESCR] = { | ||
431 | MSR_P4_ALF_ESCR0, /* 0 */ | ||
432 | MSR_P4_ALF_ESCR1, /* 1 */ | ||
433 | MSR_P4_BPU_ESCR0, /* 2 */ | ||
434 | MSR_P4_BPU_ESCR1, /* 3 */ | ||
435 | MSR_P4_BSU_ESCR0, /* 4 */ | ||
436 | MSR_P4_BSU_ESCR1, /* 5 */ | ||
437 | MSR_P4_CRU_ESCR0, /* 6 */ | ||
438 | MSR_P4_CRU_ESCR1, /* 7 */ | ||
439 | MSR_P4_CRU_ESCR2, /* 8 */ | ||
440 | MSR_P4_CRU_ESCR3, /* 9 */ | ||
441 | MSR_P4_CRU_ESCR4, /* 10 */ | ||
442 | MSR_P4_CRU_ESCR5, /* 11 */ | ||
443 | MSR_P4_DAC_ESCR0, /* 12 */ | ||
444 | MSR_P4_DAC_ESCR1, /* 13 */ | ||
445 | MSR_P4_FIRM_ESCR0, /* 14 */ | ||
446 | MSR_P4_FIRM_ESCR1, /* 15 */ | ||
447 | MSR_P4_FLAME_ESCR0, /* 16 */ | ||
448 | MSR_P4_FLAME_ESCR1, /* 17 */ | ||
449 | MSR_P4_FSB_ESCR0, /* 18 */ | ||
450 | MSR_P4_FSB_ESCR1, /* 19 */ | ||
451 | MSR_P4_IQ_ESCR0, /* 20 */ | ||
452 | MSR_P4_IQ_ESCR1, /* 21 */ | ||
453 | MSR_P4_IS_ESCR0, /* 22 */ | ||
454 | MSR_P4_IS_ESCR1, /* 23 */ | ||
455 | MSR_P4_ITLB_ESCR0, /* 24 */ | ||
456 | MSR_P4_ITLB_ESCR1, /* 25 */ | ||
457 | MSR_P4_IX_ESCR0, /* 26 */ | ||
458 | MSR_P4_IX_ESCR1, /* 27 */ | ||
459 | MSR_P4_MOB_ESCR0, /* 28 */ | ||
460 | MSR_P4_MOB_ESCR1, /* 29 */ | ||
461 | MSR_P4_MS_ESCR0, /* 30 */ | ||
462 | MSR_P4_MS_ESCR1, /* 31 */ | ||
463 | MSR_P4_PMH_ESCR0, /* 32 */ | ||
464 | MSR_P4_PMH_ESCR1, /* 33 */ | ||
465 | MSR_P4_RAT_ESCR0, /* 34 */ | ||
466 | MSR_P4_RAT_ESCR1, /* 35 */ | ||
467 | MSR_P4_SAAT_ESCR0, /* 36 */ | ||
468 | MSR_P4_SAAT_ESCR1, /* 37 */ | ||
469 | MSR_P4_SSU_ESCR0, /* 38 */ | ||
470 | MSR_P4_SSU_ESCR1, /* 39 */ | ||
471 | MSR_P4_TBPU_ESCR0, /* 40 */ | ||
472 | MSR_P4_TBPU_ESCR1, /* 41 */ | ||
473 | MSR_P4_TC_ESCR0, /* 42 */ | ||
474 | MSR_P4_TC_ESCR1, /* 43 */ | ||
475 | MSR_P4_U2L_ESCR0, /* 44 */ | ||
476 | MSR_P4_U2L_ESCR1, /* 45 */ | ||
477 | }; | ||
478 | |||
479 | static int p4_get_escr_idx(unsigned int addr) | ||
480 | { | ||
481 | unsigned int i; | ||
482 | |||
483 | for (i = 0; i < ARRAY_SIZE(p4_escr_map); i++) { | ||
484 | if (addr == p4_escr_map[i]) | ||
485 | return i; | ||
486 | } | ||
487 | |||
488 | return -1; | ||
489 | } | ||
490 | |||
491 | static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | ||
492 | { | ||
493 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
494 | unsigned long escr_mask[BITS_TO_LONGS(ARCH_P4_TOTAL_ESCR)]; | ||
495 | |||
496 | struct hw_perf_event *hwc; | ||
497 | struct p4_event_template *tpl; | ||
498 | struct p4_pmu_res *c; | ||
499 | int cpu = raw_smp_processor_id(); | ||
500 | int escr_idx, thread, i, num; | ||
501 | |||
502 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | ||
503 | bitmap_zero(escr_mask, ARCH_P4_TOTAL_ESCR); | ||
504 | |||
505 | c = &__get_cpu_var(p4_pmu_config); | ||
506 | /* | ||
507 | * Firstly find out which resource events are going | ||
508 | * to use, if ESCR+CCCR tuple is already borrowed | ||
509 | * then get out of here | ||
510 | */ | ||
511 | for (i = 0, num = n; i < n; i++, num--) { | ||
512 | hwc = &cpuc->event_list[i]->hw; | ||
513 | tpl = p4_pmu_template_lookup(hwc->config); | ||
514 | if (!tpl) | ||
515 | goto done; | ||
516 | thread = p4_ht_thread(cpu); | ||
517 | escr_idx = p4_get_escr_idx(tpl->escr_msr[thread]); | ||
518 | if (escr_idx == -1) | ||
519 | goto done; | ||
520 | |||
521 | /* already allocated and remains on the same cpu */ | ||
522 | if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) { | ||
523 | if (assign) | ||
524 | assign[i] = hwc->idx; | ||
525 | /* upstream dependent event */ | ||
526 | if (unlikely(tpl->dep != -1)) | ||
527 | printk_once(KERN_WARNING "PMU: Dep events are " | ||
528 | "not implemented yet\n"); | ||
529 | goto reserve; | ||
530 | } | ||
531 | |||
532 | /* it may be already borrowed */ | ||
533 | if (test_bit(tpl->cntr[thread], used_mask) || | ||
534 | test_bit(escr_idx, escr_mask)) | ||
535 | goto done; | ||
536 | |||
537 | /* | ||
538 | * ESCR+CCCR+COUNTERs are available to use lets swap | ||
539 | * thread specific bits, push assigned bits | ||
540 | * back and save template into per-cpu | ||
541 | * area (which will allow us to find out the ESCR | ||
542 | * to be used at moment of "enable event via real MSR") | ||
543 | */ | ||
544 | p4_pmu_swap_config_ts(hwc, cpu); | ||
545 | if (assign) { | ||
546 | assign[i] = tpl->cntr[thread]; | ||
547 | c->tpl[assign[i]] = tpl; | ||
548 | } | ||
549 | reserve: | ||
550 | set_bit(tpl->cntr[thread], used_mask); | ||
551 | set_bit(escr_idx, escr_mask); | ||
552 | } | ||
553 | |||
554 | done: | ||
555 | return num ? -ENOSPC : 0; | ||
556 | } | ||
557 | |||
558 | static __initconst struct x86_pmu p4_pmu = { | ||
559 | .name = "Netburst P4/Xeon", | ||
560 | .handle_irq = p4_pmu_handle_irq, | ||
561 | .disable_all = p4_pmu_disable_all, | ||
562 | .enable_all = p4_pmu_enable_all, | ||
563 | .enable = p4_pmu_enable_event, | ||
564 | .disable = p4_pmu_disable_event, | ||
565 | .eventsel = MSR_P4_BPU_CCCR0, | ||
566 | .perfctr = MSR_P4_BPU_PERFCTR0, | ||
567 | .event_map = p4_pmu_event_map, | ||
568 | .raw_event = p4_pmu_raw_event, | ||
569 | .max_events = ARRAY_SIZE(p4_event_map), | ||
570 | .get_event_constraints = x86_get_event_constraints, | ||
571 | /* | ||
572 | * IF HT disabled we may need to use all | ||
573 | * ARCH_P4_MAX_CCCR counters simulaneously | ||
574 | * though leave it restricted at moment assuming | ||
575 | * HT is on | ||
576 | */ | ||
577 | .num_events = ARCH_P4_MAX_CCCR, | ||
578 | .apic = 1, | ||
579 | .event_bits = 40, | ||
580 | .event_mask = (1ULL << 40) - 1, | ||
581 | .max_period = (1ULL << 39) - 1, | ||
582 | .hw_config = p4_hw_config, | ||
583 | .schedule_events = p4_pmu_schedule_events, | ||
584 | }; | ||
585 | |||
586 | static __init int p4_pmu_init(void) | ||
587 | { | ||
588 | unsigned int low, high; | ||
589 | |||
590 | /* If we get stripped -- indexig fails */ | ||
591 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); | ||
592 | |||
593 | rdmsr(MSR_IA32_MISC_ENABLE, low, high); | ||
594 | if (!(low & (1 << 7))) { | ||
595 | pr_cont("unsupported Netburst CPU model %d ", | ||
596 | boot_cpu_data.x86_model); | ||
597 | return -ENODEV; | ||
598 | } | ||
599 | |||
600 | pr_cont("Netburst events, "); | ||
601 | |||
602 | x86_pmu = p4_pmu; | ||
603 | |||
604 | return 0; | ||
605 | } | ||
606 | |||
607 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index a330485d14d..6ff4d01d880 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c | |||
@@ -109,6 +109,8 @@ static __initconst struct x86_pmu p6_pmu = { | |||
109 | .enable_all = p6_pmu_enable_all, | 109 | .enable_all = p6_pmu_enable_all, |
110 | .enable = p6_pmu_enable_event, | 110 | .enable = p6_pmu_enable_event, |
111 | .disable = p6_pmu_disable_event, | 111 | .disable = p6_pmu_disable_event, |
112 | .hw_config = x86_hw_config, | ||
113 | .schedule_events = x86_schedule_events, | ||
112 | .eventsel = MSR_P6_EVNTSEL0, | 114 | .eventsel = MSR_P6_EVNTSEL0, |
113 | .perfctr = MSR_P6_PERFCTR0, | 115 | .perfctr = MSR_P6_PERFCTR0, |
114 | .event_map = p6_pmu_event_map, | 116 | .event_map = p6_pmu_event_map, |