diff options
| author | Ingo Molnar <mingo@kernel.org> | 2012-12-08 09:25:06 -0500 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2012-12-08 09:25:06 -0500 |
| commit | f0b9abfb044649bc452fb2fb975ff2fd599cc6a3 (patch) | |
| tree | 7800081c5cb16a4dfee1e57a70f3be90f7b50d9a /arch/x86/kernel/cpu | |
| parent | adc1ef1e37358d3c17d1a74a58b2e104fc0bda15 (diff) | |
| parent | 1b3c393cd43f22ead8a6a2f839efc6df8ebd7465 (diff) | |
Merge branch 'linus' into perf/core
Conflicts:
tools/perf/Makefile
tools/perf/builtin-test.c
tools/perf/perf.h
tools/perf/tests/parse-events.c
tools/perf/util/evsel.h
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/kernel/cpu')
| -rw-r--r-- | arch/x86/kernel/cpu/amd.c | 14 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_amd.c | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_intel.c | 31 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_uncore.c | 45 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_knc.c | 93 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_p6.c | 127 |
6 files changed, 255 insertions, 57 deletions
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f7e98a2c0d1..1b7d1656a04 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
| @@ -631,6 +631,20 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
| 631 | } | 631 | } |
| 632 | } | 632 | } |
| 633 | 633 | ||
| 634 | /* | ||
| 635 | * The way access filter has a performance penalty on some workloads. | ||
| 636 | * Disable it on the affected CPUs. | ||
| 637 | */ | ||
| 638 | if ((c->x86 == 0x15) && | ||
| 639 | (c->x86_model >= 0x02) && (c->x86_model < 0x20)) { | ||
| 640 | u64 val; | ||
| 641 | |||
| 642 | if (!rdmsrl_safe(0xc0011021, &val) && !(val & 0x1E)) { | ||
| 643 | val |= 0x1E; | ||
| 644 | wrmsrl_safe(0xc0011021, val); | ||
| 645 | } | ||
| 646 | } | ||
| 647 | |||
| 634 | cpu_detect_cache_sizes(c); | 648 | cpu_detect_cache_sizes(c); |
| 635 | 649 | ||
| 636 | /* Multi core CPU? */ | 650 | /* Multi core CPU? */ |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 698b6ec12e0..1ac581f38df 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
| @@ -6,7 +6,7 @@ | |||
| 6 | * | 6 | * |
| 7 | * Written by Jacob Shin - AMD, Inc. | 7 | * Written by Jacob Shin - AMD, Inc. |
| 8 | * | 8 | * |
| 9 | * Support: borislav.petkov@amd.com | 9 | * Maintained by: Borislav Petkov <bp@alien8.de> |
| 10 | * | 10 | * |
| 11 | * April 2006 | 11 | * April 2006 |
| 12 | * - added support for AMD Family 0x10 processors | 12 | * - added support for AMD Family 0x10 processors |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 5f88abf07e9..4f9a3cbfc4a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
| @@ -285,34 +285,39 @@ void cmci_clear(void) | |||
| 285 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); | 285 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); |
| 286 | } | 286 | } |
| 287 | 287 | ||
| 288 | static long cmci_rediscover_work_func(void *arg) | ||
| 289 | { | ||
| 290 | int banks; | ||
| 291 | |||
| 292 | /* Recheck banks in case CPUs don't all have the same */ | ||
| 293 | if (cmci_supported(&banks)) | ||
| 294 | cmci_discover(banks); | ||
| 295 | |||
| 296 | return 0; | ||
| 297 | } | ||
| 298 | |||
| 288 | /* | 299 | /* |
| 289 | * After a CPU went down cycle through all the others and rediscover | 300 | * After a CPU went down cycle through all the others and rediscover |
| 290 | * Must run in process context. | 301 | * Must run in process context. |
| 291 | */ | 302 | */ |
| 292 | void cmci_rediscover(int dying) | 303 | void cmci_rediscover(int dying) |
| 293 | { | 304 | { |
| 294 | int banks; | 305 | int cpu, banks; |
| 295 | int cpu; | ||
| 296 | cpumask_var_t old; | ||
| 297 | 306 | ||
| 298 | if (!cmci_supported(&banks)) | 307 | if (!cmci_supported(&banks)) |
| 299 | return; | 308 | return; |
| 300 | if (!alloc_cpumask_var(&old, GFP_KERNEL)) | ||
| 301 | return; | ||
| 302 | cpumask_copy(old, ¤t->cpus_allowed); | ||
| 303 | 309 | ||
| 304 | for_each_online_cpu(cpu) { | 310 | for_each_online_cpu(cpu) { |
| 305 | if (cpu == dying) | 311 | if (cpu == dying) |
| 306 | continue; | 312 | continue; |
| 307 | if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) | 313 | |
| 314 | if (cpu == smp_processor_id()) { | ||
| 315 | cmci_rediscover_work_func(NULL); | ||
| 308 | continue; | 316 | continue; |
| 309 | /* Recheck banks in case CPUs don't all have the same */ | 317 | } |
| 310 | if (cmci_supported(&banks)) | ||
| 311 | cmci_discover(banks); | ||
| 312 | } | ||
| 313 | 318 | ||
| 314 | set_cpus_allowed_ptr(current, old); | 319 | work_on_cpu(cpu, cmci_rediscover_work_func, NULL); |
| 315 | free_cpumask_var(old); | 320 | } |
| 316 | } | 321 | } |
| 317 | 322 | ||
| 318 | /* | 323 | /* |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 5df8d32ba91..3cf3d97cce3 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c | |||
| @@ -118,22 +118,24 @@ static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box) | |||
| 118 | { | 118 | { |
| 119 | struct pci_dev *pdev = box->pci_dev; | 119 | struct pci_dev *pdev = box->pci_dev; |
| 120 | int box_ctl = uncore_pci_box_ctl(box); | 120 | int box_ctl = uncore_pci_box_ctl(box); |
| 121 | u32 config; | 121 | u32 config = 0; |
| 122 | 122 | ||
| 123 | pci_read_config_dword(pdev, box_ctl, &config); | 123 | if (!pci_read_config_dword(pdev, box_ctl, &config)) { |
| 124 | config |= SNBEP_PMON_BOX_CTL_FRZ; | 124 | config |= SNBEP_PMON_BOX_CTL_FRZ; |
| 125 | pci_write_config_dword(pdev, box_ctl, config); | 125 | pci_write_config_dword(pdev, box_ctl, config); |
| 126 | } | ||
| 126 | } | 127 | } |
| 127 | 128 | ||
| 128 | static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box) | 129 | static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box) |
| 129 | { | 130 | { |
| 130 | struct pci_dev *pdev = box->pci_dev; | 131 | struct pci_dev *pdev = box->pci_dev; |
| 131 | int box_ctl = uncore_pci_box_ctl(box); | 132 | int box_ctl = uncore_pci_box_ctl(box); |
| 132 | u32 config; | 133 | u32 config = 0; |
| 133 | 134 | ||
| 134 | pci_read_config_dword(pdev, box_ctl, &config); | 135 | if (!pci_read_config_dword(pdev, box_ctl, &config)) { |
| 135 | config &= ~SNBEP_PMON_BOX_CTL_FRZ; | 136 | config &= ~SNBEP_PMON_BOX_CTL_FRZ; |
| 136 | pci_write_config_dword(pdev, box_ctl, config); | 137 | pci_write_config_dword(pdev, box_ctl, config); |
| 138 | } | ||
| 137 | } | 139 | } |
| 138 | 140 | ||
| 139 | static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event) | 141 | static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event) |
| @@ -156,7 +158,7 @@ static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct pe | |||
| 156 | { | 158 | { |
| 157 | struct pci_dev *pdev = box->pci_dev; | 159 | struct pci_dev *pdev = box->pci_dev; |
| 158 | struct hw_perf_event *hwc = &event->hw; | 160 | struct hw_perf_event *hwc = &event->hw; |
| 159 | u64 count; | 161 | u64 count = 0; |
| 160 | 162 | ||
| 161 | pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count); | 163 | pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count); |
| 162 | pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1); | 164 | pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1); |
| @@ -603,11 +605,12 @@ static struct pci_driver snbep_uncore_pci_driver = { | |||
| 603 | /* | 605 | /* |
| 604 | * build pci bus to socket mapping | 606 | * build pci bus to socket mapping |
| 605 | */ | 607 | */ |
| 606 | static void snbep_pci2phy_map_init(void) | 608 | static int snbep_pci2phy_map_init(void) |
| 607 | { | 609 | { |
| 608 | struct pci_dev *ubox_dev = NULL; | 610 | struct pci_dev *ubox_dev = NULL; |
| 609 | int i, bus, nodeid; | 611 | int i, bus, nodeid; |
| 610 | u32 config; | 612 | int err = 0; |
| 613 | u32 config = 0; | ||
| 611 | 614 | ||
| 612 | while (1) { | 615 | while (1) { |
| 613 | /* find the UBOX device */ | 616 | /* find the UBOX device */ |
| @@ -618,10 +621,14 @@ static void snbep_pci2phy_map_init(void) | |||
| 618 | break; | 621 | break; |
| 619 | bus = ubox_dev->bus->number; | 622 | bus = ubox_dev->bus->number; |
| 620 | /* get the Node ID of the local register */ | 623 | /* get the Node ID of the local register */ |
| 621 | pci_read_config_dword(ubox_dev, 0x40, &config); | 624 | err = pci_read_config_dword(ubox_dev, 0x40, &config); |
| 625 | if (err) | ||
| 626 | break; | ||
| 622 | nodeid = config; | 627 | nodeid = config; |
| 623 | /* get the Node ID mapping */ | 628 | /* get the Node ID mapping */ |
| 624 | pci_read_config_dword(ubox_dev, 0x54, &config); | 629 | err = pci_read_config_dword(ubox_dev, 0x54, &config); |
| 630 | if (err) | ||
| 631 | break; | ||
| 625 | /* | 632 | /* |
| 626 | * every three bits in the Node ID mapping register maps | 633 | * every three bits in the Node ID mapping register maps |
| 627 | * to a particular node. | 634 | * to a particular node. |
| @@ -633,7 +640,11 @@ static void snbep_pci2phy_map_init(void) | |||
| 633 | } | 640 | } |
| 634 | } | 641 | } |
| 635 | }; | 642 | }; |
| 636 | return; | 643 | |
| 644 | if (ubox_dev) | ||
| 645 | pci_dev_put(ubox_dev); | ||
| 646 | |||
| 647 | return err ? pcibios_err_to_errno(err) : 0; | ||
| 637 | } | 648 | } |
| 638 | /* end of Sandy Bridge-EP uncore support */ | 649 | /* end of Sandy Bridge-EP uncore support */ |
| 639 | 650 | ||
| @@ -1547,7 +1558,6 @@ void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event) | |||
| 1547 | { | 1558 | { |
| 1548 | struct hw_perf_event *hwc = &event->hw; | 1559 | struct hw_perf_event *hwc = &event->hw; |
| 1549 | struct hw_perf_event_extra *reg1 = &hwc->extra_reg; | 1560 | struct hw_perf_event_extra *reg1 = &hwc->extra_reg; |
| 1550 | int port; | ||
| 1551 | 1561 | ||
| 1552 | /* adjust the main event selector and extra register index */ | 1562 | /* adjust the main event selector and extra register index */ |
| 1553 | if (reg1->idx % 2) { | 1563 | if (reg1->idx % 2) { |
| @@ -1559,7 +1569,6 @@ void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event) | |||
| 1559 | } | 1569 | } |
| 1560 | 1570 | ||
| 1561 | /* adjust extra register config */ | 1571 | /* adjust extra register config */ |
| 1562 | port = reg1->idx / 6 + box->pmu->pmu_idx * 4; | ||
| 1563 | switch (reg1->idx % 6) { | 1572 | switch (reg1->idx % 6) { |
| 1564 | case 2: | 1573 | case 2: |
| 1565 | /* shift the 8~15 bits to the 0~7 bits */ | 1574 | /* shift the 8~15 bits to the 0~7 bits */ |
| @@ -2578,9 +2587,11 @@ static int __init uncore_pci_init(void) | |||
| 2578 | 2587 | ||
| 2579 | switch (boot_cpu_data.x86_model) { | 2588 | switch (boot_cpu_data.x86_model) { |
| 2580 | case 45: /* Sandy Bridge-EP */ | 2589 | case 45: /* Sandy Bridge-EP */ |
| 2590 | ret = snbep_pci2phy_map_init(); | ||
| 2591 | if (ret) | ||
| 2592 | return ret; | ||
| 2581 | pci_uncores = snbep_pci_uncores; | 2593 | pci_uncores = snbep_pci_uncores; |
| 2582 | uncore_pci_driver = &snbep_uncore_pci_driver; | 2594 | uncore_pci_driver = &snbep_uncore_pci_driver; |
| 2583 | snbep_pci2phy_map_init(); | ||
| 2584 | break; | 2595 | break; |
| 2585 | default: | 2596 | default: |
| 2586 | return 0; | 2597 | return 0; |
diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/kernel/cpu/perf_event_knc.c index 7c46bfdbc37..4b7731bf23a 100644 --- a/arch/x86/kernel/cpu/perf_event_knc.c +++ b/arch/x86/kernel/cpu/perf_event_knc.c | |||
| @@ -3,6 +3,8 @@ | |||
| 3 | #include <linux/perf_event.h> | 3 | #include <linux/perf_event.h> |
| 4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
| 5 | 5 | ||
| 6 | #include <asm/hardirq.h> | ||
| 7 | |||
| 6 | #include "perf_event.h" | 8 | #include "perf_event.h" |
| 7 | 9 | ||
| 8 | static const u64 knc_perfmon_event_map[] = | 10 | static const u64 knc_perfmon_event_map[] = |
| @@ -173,30 +175,100 @@ static void knc_pmu_enable_all(int added) | |||
| 173 | static inline void | 175 | static inline void |
| 174 | knc_pmu_disable_event(struct perf_event *event) | 176 | knc_pmu_disable_event(struct perf_event *event) |
| 175 | { | 177 | { |
| 176 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 177 | struct hw_perf_event *hwc = &event->hw; | 178 | struct hw_perf_event *hwc = &event->hw; |
| 178 | u64 val; | 179 | u64 val; |
| 179 | 180 | ||
| 180 | val = hwc->config; | 181 | val = hwc->config; |
| 181 | if (cpuc->enabled) | 182 | val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; |
| 182 | val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; | ||
| 183 | 183 | ||
| 184 | (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); | 184 | (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); |
| 185 | } | 185 | } |
| 186 | 186 | ||
| 187 | static void knc_pmu_enable_event(struct perf_event *event) | 187 | static void knc_pmu_enable_event(struct perf_event *event) |
| 188 | { | 188 | { |
| 189 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 190 | struct hw_perf_event *hwc = &event->hw; | 189 | struct hw_perf_event *hwc = &event->hw; |
| 191 | u64 val; | 190 | u64 val; |
| 192 | 191 | ||
| 193 | val = hwc->config; | 192 | val = hwc->config; |
| 194 | if (cpuc->enabled) | 193 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; |
| 195 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; | ||
| 196 | 194 | ||
| 197 | (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); | 195 | (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); |
| 198 | } | 196 | } |
| 199 | 197 | ||
| 198 | static inline u64 knc_pmu_get_status(void) | ||
| 199 | { | ||
| 200 | u64 status; | ||
| 201 | |||
| 202 | rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status); | ||
| 203 | |||
| 204 | return status; | ||
| 205 | } | ||
| 206 | |||
| 207 | static inline void knc_pmu_ack_status(u64 ack) | ||
| 208 | { | ||
| 209 | wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack); | ||
| 210 | } | ||
| 211 | |||
| 212 | static int knc_pmu_handle_irq(struct pt_regs *regs) | ||
| 213 | { | ||
| 214 | struct perf_sample_data data; | ||
| 215 | struct cpu_hw_events *cpuc; | ||
| 216 | int handled = 0; | ||
| 217 | int bit, loops; | ||
| 218 | u64 status; | ||
| 219 | |||
| 220 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 221 | |||
| 222 | knc_pmu_disable_all(); | ||
| 223 | |||
| 224 | status = knc_pmu_get_status(); | ||
| 225 | if (!status) { | ||
| 226 | knc_pmu_enable_all(0); | ||
| 227 | return handled; | ||
| 228 | } | ||
| 229 | |||
| 230 | loops = 0; | ||
| 231 | again: | ||
| 232 | knc_pmu_ack_status(status); | ||
| 233 | if (++loops > 100) { | ||
| 234 | WARN_ONCE(1, "perf: irq loop stuck!\n"); | ||
| 235 | perf_event_print_debug(); | ||
| 236 | goto done; | ||
| 237 | } | ||
| 238 | |||
| 239 | inc_irq_stat(apic_perf_irqs); | ||
| 240 | |||
| 241 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | ||
| 242 | struct perf_event *event = cpuc->events[bit]; | ||
| 243 | |||
| 244 | handled++; | ||
| 245 | |||
| 246 | if (!test_bit(bit, cpuc->active_mask)) | ||
| 247 | continue; | ||
| 248 | |||
| 249 | if (!intel_pmu_save_and_restart(event)) | ||
| 250 | continue; | ||
| 251 | |||
| 252 | perf_sample_data_init(&data, 0, event->hw.last_period); | ||
| 253 | |||
| 254 | if (perf_event_overflow(event, &data, regs)) | ||
| 255 | x86_pmu_stop(event, 0); | ||
| 256 | } | ||
| 257 | |||
| 258 | /* | ||
| 259 | * Repeat if there is more work to be done: | ||
| 260 | */ | ||
| 261 | status = knc_pmu_get_status(); | ||
| 262 | if (status) | ||
| 263 | goto again; | ||
| 264 | |||
| 265 | done: | ||
| 266 | knc_pmu_enable_all(0); | ||
| 267 | |||
| 268 | return handled; | ||
| 269 | } | ||
| 270 | |||
| 271 | |||
| 200 | PMU_FORMAT_ATTR(event, "config:0-7" ); | 272 | PMU_FORMAT_ATTR(event, "config:0-7" ); |
| 201 | PMU_FORMAT_ATTR(umask, "config:8-15" ); | 273 | PMU_FORMAT_ATTR(umask, "config:8-15" ); |
| 202 | PMU_FORMAT_ATTR(edge, "config:18" ); | 274 | PMU_FORMAT_ATTR(edge, "config:18" ); |
| @@ -214,7 +286,7 @@ static struct attribute *intel_knc_formats_attr[] = { | |||
| 214 | 286 | ||
| 215 | static __initconst struct x86_pmu knc_pmu = { | 287 | static __initconst struct x86_pmu knc_pmu = { |
| 216 | .name = "knc", | 288 | .name = "knc", |
| 217 | .handle_irq = x86_pmu_handle_irq, | 289 | .handle_irq = knc_pmu_handle_irq, |
| 218 | .disable_all = knc_pmu_disable_all, | 290 | .disable_all = knc_pmu_disable_all, |
| 219 | .enable_all = knc_pmu_enable_all, | 291 | .enable_all = knc_pmu_enable_all, |
| 220 | .enable = knc_pmu_enable_event, | 292 | .enable = knc_pmu_enable_event, |
| @@ -226,12 +298,11 @@ static __initconst struct x86_pmu knc_pmu = { | |||
| 226 | .event_map = knc_pmu_event_map, | 298 | .event_map = knc_pmu_event_map, |
| 227 | .max_events = ARRAY_SIZE(knc_perfmon_event_map), | 299 | .max_events = ARRAY_SIZE(knc_perfmon_event_map), |
| 228 | .apic = 1, | 300 | .apic = 1, |
| 229 | .max_period = (1ULL << 31) - 1, | 301 | .max_period = (1ULL << 39) - 1, |
| 230 | .version = 0, | 302 | .version = 0, |
| 231 | .num_counters = 2, | 303 | .num_counters = 2, |
| 232 | /* in theory 40 bits, early silicon is buggy though */ | 304 | .cntval_bits = 40, |
| 233 | .cntval_bits = 32, | 305 | .cntval_mask = (1ULL << 40) - 1, |
| 234 | .cntval_mask = (1ULL << 32) - 1, | ||
| 235 | .get_event_constraints = x86_get_event_constraints, | 306 | .get_event_constraints = x86_get_event_constraints, |
| 236 | .event_constraints = knc_event_constraints, | 307 | .event_constraints = knc_event_constraints, |
| 237 | .format_attrs = intel_knc_formats_attr, | 308 | .format_attrs = intel_knc_formats_attr, |
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index 900b76b5d6e..f2af39f5dc3 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c | |||
| @@ -8,13 +8,106 @@ | |||
| 8 | */ | 8 | */ |
| 9 | static const u64 p6_perfmon_event_map[] = | 9 | static const u64 p6_perfmon_event_map[] = |
| 10 | { | 10 | { |
| 11 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, | 11 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, /* CPU_CLK_UNHALTED */ |
| 12 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | 12 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, /* INST_RETIRED */ |
| 13 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, | 13 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, /* L2_RQSTS:M:E:S:I */ |
| 14 | [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, | 14 | [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, /* L2_RQSTS:I */ |
| 15 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | 15 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, /* BR_INST_RETIRED */ |
| 16 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | 16 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, /* BR_MISS_PRED_RETIRED */ |
| 17 | [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, | 17 | [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, /* BUS_DRDY_CLOCKS */ |
| 18 | [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a2, /* RESOURCE_STALLS */ | ||
| 19 | |||
| 20 | }; | ||
| 21 | |||
| 22 | static __initconst u64 p6_hw_cache_event_ids | ||
| 23 | [PERF_COUNT_HW_CACHE_MAX] | ||
| 24 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
| 25 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
| 26 | { | ||
| 27 | [ C(L1D) ] = { | ||
| 28 | [ C(OP_READ) ] = { | ||
| 29 | [ C(RESULT_ACCESS) ] = 0x0043, /* DATA_MEM_REFS */ | ||
| 30 | [ C(RESULT_MISS) ] = 0x0045, /* DCU_LINES_IN */ | ||
| 31 | }, | ||
| 32 | [ C(OP_WRITE) ] = { | ||
| 33 | [ C(RESULT_ACCESS) ] = 0, | ||
| 34 | [ C(RESULT_MISS) ] = 0x0f29, /* L2_LD:M:E:S:I */ | ||
| 35 | }, | ||
| 36 | [ C(OP_PREFETCH) ] = { | ||
| 37 | [ C(RESULT_ACCESS) ] = 0, | ||
| 38 | [ C(RESULT_MISS) ] = 0, | ||
| 39 | }, | ||
| 40 | }, | ||
| 41 | [ C(L1I ) ] = { | ||
| 42 | [ C(OP_READ) ] = { | ||
| 43 | [ C(RESULT_ACCESS) ] = 0x0080, /* IFU_IFETCH */ | ||
| 44 | [ C(RESULT_MISS) ] = 0x0f28, /* L2_IFETCH:M:E:S:I */ | ||
| 45 | }, | ||
| 46 | [ C(OP_WRITE) ] = { | ||
| 47 | [ C(RESULT_ACCESS) ] = -1, | ||
| 48 | [ C(RESULT_MISS) ] = -1, | ||
| 49 | }, | ||
| 50 | [ C(OP_PREFETCH) ] = { | ||
| 51 | [ C(RESULT_ACCESS) ] = 0, | ||
| 52 | [ C(RESULT_MISS) ] = 0, | ||
| 53 | }, | ||
| 54 | }, | ||
| 55 | [ C(LL ) ] = { | ||
| 56 | [ C(OP_READ) ] = { | ||
| 57 | [ C(RESULT_ACCESS) ] = 0, | ||
| 58 | [ C(RESULT_MISS) ] = 0, | ||
| 59 | }, | ||
| 60 | [ C(OP_WRITE) ] = { | ||
| 61 | [ C(RESULT_ACCESS) ] = 0, | ||
| 62 | [ C(RESULT_MISS) ] = 0x0025, /* L2_M_LINES_INM */ | ||
| 63 | }, | ||
| 64 | [ C(OP_PREFETCH) ] = { | ||
| 65 | [ C(RESULT_ACCESS) ] = 0, | ||
| 66 | [ C(RESULT_MISS) ] = 0, | ||
| 67 | }, | ||
| 68 | }, | ||
| 69 | [ C(DTLB) ] = { | ||
| 70 | [ C(OP_READ) ] = { | ||
| 71 | [ C(RESULT_ACCESS) ] = 0x0043, /* DATA_MEM_REFS */ | ||
| 72 | [ C(RESULT_MISS) ] = 0, | ||
| 73 | }, | ||
| 74 | [ C(OP_WRITE) ] = { | ||
| 75 | [ C(RESULT_ACCESS) ] = 0, | ||
| 76 | [ C(RESULT_MISS) ] = 0, | ||
| 77 | }, | ||
| 78 | [ C(OP_PREFETCH) ] = { | ||
| 79 | [ C(RESULT_ACCESS) ] = 0, | ||
| 80 | [ C(RESULT_MISS) ] = 0, | ||
| 81 | }, | ||
| 82 | }, | ||
| 83 | [ C(ITLB) ] = { | ||
| 84 | [ C(OP_READ) ] = { | ||
| 85 | [ C(RESULT_ACCESS) ] = 0x0080, /* IFU_IFETCH */ | ||
| 86 | [ C(RESULT_MISS) ] = 0x0085, /* ITLB_MISS */ | ||
| 87 | }, | ||
| 88 | [ C(OP_WRITE) ] = { | ||
| 89 | [ C(RESULT_ACCESS) ] = -1, | ||
| 90 | [ C(RESULT_MISS) ] = -1, | ||
| 91 | }, | ||
| 92 | [ C(OP_PREFETCH) ] = { | ||
| 93 | [ C(RESULT_ACCESS) ] = -1, | ||
| 94 | [ C(RESULT_MISS) ] = -1, | ||
| 95 | }, | ||
| 96 | }, | ||
| 97 | [ C(BPU ) ] = { | ||
| 98 | [ C(OP_READ) ] = { | ||
| 99 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED */ | ||
| 100 | [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISS_PRED_RETIRED */ | ||
| 101 | }, | ||
| 102 | [ C(OP_WRITE) ] = { | ||
| 103 | [ C(RESULT_ACCESS) ] = -1, | ||
| 104 | [ C(RESULT_MISS) ] = -1, | ||
| 105 | }, | ||
| 106 | [ C(OP_PREFETCH) ] = { | ||
| 107 | [ C(RESULT_ACCESS) ] = -1, | ||
| 108 | [ C(RESULT_MISS) ] = -1, | ||
| 109 | }, | ||
| 110 | }, | ||
| 18 | }; | 111 | }; |
| 19 | 112 | ||
| 20 | static u64 p6_pmu_event_map(int hw_event) | 113 | static u64 p6_pmu_event_map(int hw_event) |
| @@ -34,7 +127,7 @@ static struct event_constraint p6_event_constraints[] = | |||
| 34 | { | 127 | { |
| 35 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ | 128 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ |
| 36 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | 129 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ |
| 37 | INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ | 130 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ |
| 38 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | 131 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ |
| 39 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | 132 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ |
| 40 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | 133 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ |
| @@ -64,25 +157,25 @@ static void p6_pmu_enable_all(int added) | |||
| 64 | static inline void | 157 | static inline void |
| 65 | p6_pmu_disable_event(struct perf_event *event) | 158 | p6_pmu_disable_event(struct perf_event *event) |
| 66 | { | 159 | { |
| 67 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 68 | struct hw_perf_event *hwc = &event->hw; | 160 | struct hw_perf_event *hwc = &event->hw; |
| 69 | u64 val = P6_NOP_EVENT; | 161 | u64 val = P6_NOP_EVENT; |
| 70 | 162 | ||
| 71 | if (cpuc->enabled) | ||
| 72 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; | ||
| 73 | |||
| 74 | (void)wrmsrl_safe(hwc->config_base, val); | 163 | (void)wrmsrl_safe(hwc->config_base, val); |
| 75 | } | 164 | } |
| 76 | 165 | ||
| 77 | static void p6_pmu_enable_event(struct perf_event *event) | 166 | static void p6_pmu_enable_event(struct perf_event *event) |
| 78 | { | 167 | { |
| 79 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 80 | struct hw_perf_event *hwc = &event->hw; | 168 | struct hw_perf_event *hwc = &event->hw; |
| 81 | u64 val; | 169 | u64 val; |
| 82 | 170 | ||
| 83 | val = hwc->config; | 171 | val = hwc->config; |
| 84 | if (cpuc->enabled) | 172 | |
| 85 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; | 173 | /* |
| 174 | * p6 only has a global event enable, set on PerfEvtSel0 | ||
| 175 | * We "disable" events by programming P6_NOP_EVENT | ||
| 176 | * and we rely on p6_pmu_enable_all() being called | ||
| 177 | * to actually enable the events. | ||
| 178 | */ | ||
| 86 | 179 | ||
| 87 | (void)wrmsrl_safe(hwc->config_base, val); | 180 | (void)wrmsrl_safe(hwc->config_base, val); |
| 88 | } | 181 | } |
| @@ -160,5 +253,9 @@ __init int p6_pmu_init(void) | |||
| 160 | 253 | ||
| 161 | x86_pmu = p6_pmu; | 254 | x86_pmu = p6_pmu; |
| 162 | 255 | ||
| 256 | memcpy(hw_cache_event_ids, p6_hw_cache_event_ids, | ||
| 257 | sizeof(hw_cache_event_ids)); | ||
| 258 | |||
| 259 | |||
| 163 | return 0; | 260 | return 0; |
| 164 | } | 261 | } |
